feat(editor): add --catalog-pluck cross-format entry lookup

New utility extracts a single entry by id from any
registered catalog format without dumping the whole file.
Useful when a catalog has hundreds of entries and you
only want to inspect one — e.g. "show me WBOS encounter
102" or "what's in WHRT bind 204".

Auto-detects format from the file's 4-byte magic, looks
up the registered --info-* flag in the format table,
spawns that handler as a subprocess with --json, then
filters the entries[] array to just the matching id. The
primary-key field is auto-discovered: prefers the first
*Id field that ISN'T a known foreign-key reference (mapId,
areaId, spellId, npcId, factionId, etc. — 25 known
external-ref names filtered out). Falls back to first
remaining *Id, then first numeric field.

Without the foreign-key filter, alphabetical key
iteration in nlohmann::json picks the wrong field — for
WHRT entries with both areaId and bindId, naive code
would identify by areaId and miss obvious lookups.
Caught during smoke-test and fixed before commit.

Output formats: terminal table (default) or --json.
Accepts file path with or without the .wXXX extension.
CLI flag count 1111 -> 1112.
This commit is contained in:
Kelsi 2026-05-10 00:37:53 -07:00
parent 8c0cab27be
commit 16454c57c4
6 changed files with 363 additions and 0 deletions

View file

@ -1537,6 +1537,7 @@ add_executable(wowee_editor
tools/editor/cli_group_compositions_catalog.cpp
tools/editor/cli_hearth_binds_catalog.cpp
tools/editor/cli_server_broadcasts_catalog.cpp
tools/editor/cli_catalog_pluck.cpp
tools/editor/cli_quest_objective.cpp
tools/editor/cli_quest_reward.cpp
tools/editor/cli_clone.cpp

View file

@ -137,6 +137,7 @@ const char* const kArgRequired[] = {
"--info-magic", "--summary-dir", "--rename-by-magic",
"--bulk-rename-by-magic", "--touch-tree", "--tree-summary-md",
"--catalog-grep", "--diff-headers", "--audit-tree",
"--catalog-pluck",
"--magic-fix", "--bulk-validate",
"--bulk-export-json", "--bulk-import-json",
"--diff-tree", "--orphan-jsons", "--list-by-magic",

View file

@ -0,0 +1,346 @@
#include "cli_catalog_pluck.hpp"
#include "cli_arg_parse.hpp"
#include "cli_format_table.hpp"
#include <nlohmann/json.hpp>
#include <array>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <string>
#include <vector>
namespace wowee {
namespace editor {
namespace cli {
namespace {
// Same shell-quoting helper as cli_bulk_validate — single
// quote and escape embedded single quotes.
std::string shellQuote(const std::string& s) {
std::string out;
out.reserve(s.size() + 2);
out.push_back('\'');
for (char c : s) {
if (c == '\'') out += "'\"'\"'";
else out.push_back(c);
}
out.push_back('\'');
return out;
}
bool peekMagic(const std::string& path, char magic[4]) {
std::ifstream is(path, std::ios::binary);
if (!is) return false;
if (!is.read(magic, 4) || is.gcount() != 4) return false;
return true;
}
std::string normalizePathToBase(std::string base,
const char* extension) {
// Strip the format extension if present so subprocess
// calls receive the bare base path the per-format
// --info-wXXX handler expects.
if (!extension || !*extension) return base;
size_t extLen = std::strlen(extension);
if (base.size() >= extLen &&
base.compare(base.size() - extLen, extLen, extension) == 0) {
base.resize(base.size() - extLen);
}
return base;
}
// Capture the full stdout of a child process invoked via
// popen. Returns the trimmed output string and the exit
// status. On platforms without WEXITSTATUS, treat any
// nonzero return as failure.
std::string runAndCapture(const std::string& cmd, int& outRc) {
std::string buf;
FILE* pipe = popen(cmd.c_str(), "r");
if (!pipe) {
outRc = 127;
return buf;
}
char chunk[4096];
while (std::fgets(chunk, sizeof(chunk), pipe) != nullptr) {
buf += chunk;
}
int rc = pclose(pipe);
#ifdef WEXITSTATUS
if (rc != -1) {
outRc = WEXITSTATUS(rc);
} else {
outRc = rc;
}
#else
outRc = rc;
#endif
return buf;
}
// Field names that are conventionally cross-references
// to OTHER catalogs, not the primary key of THIS entry.
// nlohmann::json's default storage is std::map (alphabet-
// ically ordered), so a naive "first *Id field" picks up
// the wrong field for catalogs that mention foreign keys
// before their own (WHRT areaId/bindId, etc.). The pluck
// algorithm filters these out before falling back.
bool isExternalRefField(const std::string& k) {
static const char* kExternals[] = {
"mapId", "areaId", "zoneId", "subAreaId",
"spellId", "itemId", "npcId", "creatureId",
"objectId", "gameObjectId",
"factionId", "factionTemplateId",
"difficultyId", "instanceId",
"raceId", "classId", "classMask", "raceMask",
"skillLineId", "questId", "talentId",
"achievementId", "criteriaId", "lootId",
"soundId", "movieId", "displayId", "modelId",
"iconId", "textureId", "auraId",
};
for (const char* ref : kExternals) {
if (k == ref) return true;
}
return false;
}
// Walk a JSON entry object and find the value of its
// primary-key field. Convention: the primary key is the
// first field whose name ends in "Id" AND is NOT a known
// external-reference field. nlohmann::json iterates keys
// alphabetically, so we filter foreign keys before
// picking. Falls back to first numeric field if no *Id
// remains.
std::pair<bool, uint64_t>
findEntryPrimaryKey(const nlohmann::json& entry) {
if (!entry.is_object()) return {false, 0};
// First pass: *Id fields that aren't known foreign keys.
for (auto it = entry.begin(); it != entry.end(); ++it) {
const std::string& k = it.key();
if (k.size() >= 2 &&
k.compare(k.size() - 2, 2, "Id") == 0 &&
it.value().is_number_integer() &&
!isExternalRefField(k)) {
return {true, it.value().get<uint64_t>()};
}
}
// Second pass: any *Id (lets pluck still work on
// catalogs whose primary key happens to share a name
// with a foreign-key convention).
for (auto it = entry.begin(); it != entry.end(); ++it) {
const std::string& k = it.key();
if (k.size() >= 2 &&
k.compare(k.size() - 2, 2, "Id") == 0 &&
it.value().is_number_integer()) {
return {true, it.value().get<uint64_t>()};
}
}
// Fallback: first numeric field.
for (auto it = entry.begin(); it != entry.end(); ++it) {
if (it.value().is_number_integer()) {
return {true, it.value().get<uint64_t>()};
}
}
return {false, 0};
}
// Same algorithm but returning the field NAME — used so
// the operator can know which field they searched
// (compId vs bindId vs broadcastId etc.) without having
// to memorize per-format conventions.
std::string findEntryPrimaryKeyName(const nlohmann::json& entry) {
if (!entry.is_object()) return {};
for (auto it = entry.begin(); it != entry.end(); ++it) {
const std::string& k = it.key();
if (k.size() >= 2 &&
k.compare(k.size() - 2, 2, "Id") == 0 &&
it.value().is_number_integer() &&
!isExternalRefField(k)) {
return k;
}
}
for (auto it = entry.begin(); it != entry.end(); ++it) {
const std::string& k = it.key();
if (k.size() >= 2 &&
k.compare(k.size() - 2, 2, "Id") == 0 &&
it.value().is_number_integer()) {
return k;
}
}
for (auto it = entry.begin(); it != entry.end(); ++it) {
if (it.value().is_number_integer()) return it.key();
}
return {};
}
int handlePluck(int& i, int argc, char** argv) {
if (i + 2 >= argc) {
std::fprintf(stderr,
"catalog-pluck: usage: --catalog-pluck "
"<wXXX-file> <id> [--json]\n");
return 1;
}
std::string fileArg = argv[++i];
std::string idArg = argv[++i];
bool jsonOut = consumeJsonFlag(i, argc, argv);
// Parse search id as unsigned integer.
uint64_t searchId = 0;
try {
searchId = std::stoull(idArg);
} catch (...) {
std::fprintf(stderr,
"catalog-pluck: <id> must be a numeric literal "
"(got '%s')\n", idArg.c_str());
return 1;
}
// Read the magic. If file lookup fails directly, try
// again after appending the format-table extension
// matched by the leading 4 bytes of any sibling file.
std::string filePath = fileArg;
char magic[4]{};
if (!peekMagic(filePath, magic)) {
// Try common extensions: scan the format table
// and attempt each ".wXXX" suffix.
for (const FormatMagicEntry* row = formatTableBegin();
row != formatTableEnd(); ++row) {
std::string with = fileArg + row->extension;
if (peekMagic(with, magic)) {
filePath = with;
break;
}
}
}
if (magic[0] == 0) {
std::fprintf(stderr,
"catalog-pluck: cannot read magic from '%s' "
"(file not found?)\n", fileArg.c_str());
return 1;
}
const FormatMagicEntry* fmt = findFormatByMagic(magic);
if (!fmt) {
std::fprintf(stderr,
"catalog-pluck: unknown magic '%c%c%c%c' in '%s'\n",
magic[0], magic[1], magic[2], magic[3],
filePath.c_str());
return 1;
}
if (!fmt->infoFlag) {
std::fprintf(stderr,
"catalog-pluck: format '%c%c%c%c' has no "
"--info-* flag in the format table — pluck "
"is only supported for catalogs with an "
"--info-* surface\n",
magic[0], magic[1], magic[2], magic[3]);
return 1;
}
// Build the subprocess invocation: the same binary
// (argv[0]) with the per-format inspect flag and JSON
// output. Strip the extension so the inspect handler
// sees the bare base path it expects.
std::string base = normalizePathToBase(filePath, fmt->extension);
std::string cmd = shellQuote(argv[0]) + " " +
fmt->infoFlag + " " +
shellQuote(base) + " --json 2>/dev/null";
int rc = 0;
std::string stdoutBuf = runAndCapture(cmd, rc);
if (rc != 0 || stdoutBuf.empty()) {
std::fprintf(stderr,
"catalog-pluck: inspect subprocess for '%s' "
"failed (rc=%d)\n", filePath.c_str(), rc);
return 1;
}
nlohmann::json doc;
try {
doc = nlohmann::json::parse(stdoutBuf);
} catch (const std::exception& ex) {
std::fprintf(stderr,
"catalog-pluck: failed to parse inspect output "
"as JSON: %s\n", ex.what());
return 1;
}
if (!doc.contains("entries") || !doc["entries"].is_array()) {
std::fprintf(stderr,
"catalog-pluck: inspect output has no "
"'entries' array\n");
return 1;
}
// Locate the entry whose primary-key field matches.
const nlohmann::json* match = nullptr;
std::string keyName;
for (const auto& entry : doc["entries"]) {
auto [ok, key] = findEntryPrimaryKey(entry);
if (ok && key == searchId) {
match = &entry;
keyName = findEntryPrimaryKeyName(entry);
break;
}
}
if (!match) {
std::fprintf(stderr,
"catalog-pluck: no entry with id %llu in '%s' "
"(searched %zu entries)\n",
static_cast<unsigned long long>(searchId),
filePath.c_str(),
doc["entries"].size());
return 1;
}
if (jsonOut) {
nlohmann::json out;
out["file"] = filePath;
out["magic"] = std::string(magic, 4);
out["primaryKey"] = keyName;
out["entry"] = *match;
std::printf("%s\n", out.dump(2).c_str());
return 0;
}
// Pretty terminal output.
std::printf("catalog-pluck: %s\n", filePath.c_str());
std::printf(" magic : '%c%c%c%c'\n",
magic[0], magic[1], magic[2], magic[3]);
std::printf(" primaryKey : %s = %llu\n",
keyName.c_str(),
static_cast<unsigned long long>(searchId));
std::printf(" entry:\n");
for (auto it = match->begin(); it != match->end(); ++it) {
const std::string& k = it.key();
const auto& v = it.value();
std::string vs;
if (v.is_string()) {
vs = v.get<std::string>();
} else if (v.is_number_integer()) {
vs = std::to_string(v.get<long long>());
} else if (v.is_number_float()) {
char buf[32];
std::snprintf(buf, sizeof(buf), "%g",
v.get<double>());
vs = buf;
} else if (v.is_boolean()) {
vs = v.get<bool>() ? "true" : "false";
} else {
vs = v.dump();
}
std::printf(" %-22s : %s\n", k.c_str(), vs.c_str());
}
return 0;
}
} // namespace
bool handleCatalogPluck(int& i, int argc, char** argv, int& outRc) {
if (std::strcmp(argv[i], "--catalog-pluck") == 0 &&
i + 2 < argc) {
outRc = handlePluck(i, argc, argv); return true;
}
return false;
}
} // namespace cli
} // namespace editor
} // namespace wowee

View file

@ -0,0 +1,11 @@
#pragma once
namespace wowee {
namespace editor {
namespace cli {
bool handleCatalogPluck(int& i, int argc, char** argv, int& outRc);
} // namespace cli
} // namespace editor
} // namespace wowee

View file

@ -143,6 +143,7 @@
#include "cli_group_compositions_catalog.hpp"
#include "cli_hearth_binds_catalog.hpp"
#include "cli_server_broadcasts_catalog.hpp"
#include "cli_catalog_pluck.hpp"
#include "cli_quest_objective.hpp"
#include "cli_quest_reward.hpp"
#include "cli_clone.hpp"
@ -327,6 +328,7 @@ constexpr DispatchFn kDispatchTable[] = {
handleGroupCompositionsCatalog,
handleHearthBindsCatalog,
handleServerBroadcastsCatalog,
handleCatalogPluck,
handleQuestObjective,
handleQuestReward,
handleClone,

View file

@ -2139,6 +2139,8 @@ void printUsage(const char* argv0) {
std::printf(" Export binary .wscb to a human-editable JSON sidecar (defaults to <base>.wscb.json; emits both channelKind/factionFilter ints AND name strings)\n");
std::printf(" --import-wscb-json <json-path> [out-base]\n");
std::printf(" Import a .wscb.json sidecar back into binary .wscb (channelKind int OR \"login\"/\"system\"/\"raidwarning\"/\"motd\"/\"helptip\"; factionFilter int OR \"alliance\"/\"horde\"/\"both\")\n");
std::printf(" --catalog-pluck <wXXX-file> <id> [--json]\n");
std::printf(" Extract one entry by id from any registered catalog format. Auto-detects magic, dispatches to the per-format --info-* handler internally, then prints just the matching entry. Primary-key field is auto-detected (first *Id field, or first numeric)\n");
std::printf(" --gen-weather-temperate <wow-base> [zoneName]\n");
std::printf(" Emit .wow weather schedule: clear-dominant + occasional rain + fog (forest / grassland)\n");
std::printf(" --gen-weather-arctic <wow-base> [zoneName]\n");