From 471ddfef07ecac30741c3af6075c1e2809b5eb0b Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sun, 10 May 2026 01:09:09 -0700 Subject: [PATCH] feat(editor): add --catalog-find directory-wide id search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New utility complements --catalog-pluck (single-file id lookup) by walking a directory tree recursively and searching every catalog for entries whose primary key matches the supplied id. Reports each hit as [WXXX] file:fieldName=id "name" so the operator can locate where any given id lives across a 100+ format project. Useful when chasing cross-references like "id 631 is referenced by WGRP.mapId — where is it actually defined?" Optional --magic filter narrows the search to one format family. Necessary because primary-key id ranges overlap across formats (id=200 might be both a WCMG group and a WGRP composition); without the filter the operator gets all collisions, which is itself useful for spotting unintentional id reuse. Auto-detects per-file format magic, skips files with unknown magic and files whose format has no --info-* surface (asset formats like .wom that aren't catalog- shaped). Re-uses the same primary-key auto-discovery + external-ref filter as --catalog-pluck. Both utilities should grow into a shared helper header once a third utility needs the same lookup logic — for now a noted duplication. CLI flag count 1133 -> 1134. --- CMakeLists.txt | 1 + tools/editor/cli_arg_required.cpp | 2 +- tools/editor/cli_catalog_find.cpp | 324 ++++++++++++++++++++++++++++++ tools/editor/cli_catalog_find.hpp | 11 + tools/editor/cli_dispatch.cpp | 2 + tools/editor/cli_help.cpp | 2 + 6 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 tools/editor/cli_catalog_find.cpp create mode 100644 tools/editor/cli_catalog_find.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6edaf0b8..2ddc870a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1544,6 +1544,7 @@ add_executable(wowee_editor tools/editor/cli_realm_list_catalog.cpp tools/editor/cli_emotes_catalog.cpp tools/editor/cli_catalog_pluck.cpp + tools/editor/cli_catalog_find.cpp tools/editor/cli_quest_objective.cpp tools/editor/cli_quest_reward.cpp tools/editor/cli_clone.cpp diff --git a/tools/editor/cli_arg_required.cpp b/tools/editor/cli_arg_required.cpp index 7355be74..8029ad1d 100644 --- a/tools/editor/cli_arg_required.cpp +++ b/tools/editor/cli_arg_required.cpp @@ -137,7 +137,7 @@ const char* const kArgRequired[] = { "--info-magic", "--summary-dir", "--rename-by-magic", "--bulk-rename-by-magic", "--touch-tree", "--tree-summary-md", "--catalog-grep", "--diff-headers", "--audit-tree", - "--catalog-pluck", + "--catalog-pluck", "--catalog-find", "--magic-fix", "--bulk-validate", "--bulk-export-json", "--bulk-import-json", "--diff-tree", "--orphan-jsons", "--list-by-magic", diff --git a/tools/editor/cli_catalog_find.cpp b/tools/editor/cli_catalog_find.cpp new file mode 100644 index 00000000..42a98ad7 --- /dev/null +++ b/tools/editor/cli_catalog_find.cpp @@ -0,0 +1,324 @@ +#include "cli_catalog_find.hpp" +#include "cli_arg_parse.hpp" +#include "cli_format_table.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace wowee { +namespace editor { +namespace cli { + +namespace { + +namespace fs = std::filesystem; + +std::string shellQuote(const std::string& s) { + std::string out; + out.reserve(s.size() + 2); + out.push_back('\''); + for (char c : s) { + if (c == '\'') out += "'\"'\"'"; + else out.push_back(c); + } + out.push_back('\''); + return out; +} + +bool peekMagic(const fs::path& path, char magic[4]) { + std::ifstream is(path, std::ios::binary); + if (!is) return false; + if (!is.read(magic, 4) || is.gcount() != 4) return false; + return true; +} + +// Same external-ref filter as cli_catalog_pluck. Kept in +// sync — when a new format adds a foreign-key suffix that +// the old filter misses, both files must be updated. +// Future cleanup: share via cli_catalog_pluck.hpp once +// either utility needs a third common helper. +bool isExternalRefField(const std::string& k) { + static const char* kExternals[] = { + "mapId", "areaId", "zoneId", "subAreaId", + "spellId", "itemId", "npcId", "creatureId", + "objectId", "gameObjectId", + "factionId", "factionTemplateId", + "difficultyId", "instanceId", + "raceId", "classId", "classMask", "raceMask", + "skillLineId", "questId", "talentId", + "achievementId", "criteriaId", "lootId", + "soundId", "movieId", "displayId", "modelId", + "iconId", "textureId", "auraId", + "animationId", "particleId", "ribbonId", + "vehicleId", "seatId", "currencyId", + "trainerId", "vendorId", "mailTemplateId", + }; + for (const char* ref : kExternals) { + if (k == ref) return true; + } + return false; +} + +std::pair +findEntryPrimaryKey(const nlohmann::json& entry) { + if (!entry.is_object()) return {false, 0}; + for (auto it = entry.begin(); it != entry.end(); ++it) { + const std::string& k = it.key(); + if (k.size() >= 2 && + k.compare(k.size() - 2, 2, "Id") == 0 && + it.value().is_number_integer() && + !isExternalRefField(k)) { + return {true, it.value().get()}; + } + } + for (auto it = entry.begin(); it != entry.end(); ++it) { + const std::string& k = it.key(); + if (k.size() >= 2 && + k.compare(k.size() - 2, 2, "Id") == 0 && + it.value().is_number_integer()) { + return {true, it.value().get()}; + } + } + for (auto it = entry.begin(); it != entry.end(); ++it) { + if (it.value().is_number_integer()) { + return {true, it.value().get()}; + } + } + return {false, 0}; +} + +std::string findEntryPrimaryKeyName(const nlohmann::json& entry) { + if (!entry.is_object()) return {}; + for (auto it = entry.begin(); it != entry.end(); ++it) { + const std::string& k = it.key(); + if (k.size() >= 2 && + k.compare(k.size() - 2, 2, "Id") == 0 && + it.value().is_number_integer() && + !isExternalRefField(k)) { + return k; + } + } + for (auto it = entry.begin(); it != entry.end(); ++it) { + const std::string& k = it.key(); + if (k.size() >= 2 && + k.compare(k.size() - 2, 2, "Id") == 0 && + it.value().is_number_integer()) { + return k; + } + } + for (auto it = entry.begin(); it != entry.end(); ++it) { + if (it.value().is_number_integer()) return it.key(); + } + return {}; +} + +std::string runAndCapture(const std::string& cmd, int& outRc) { + std::string buf; + FILE* pipe = popen(cmd.c_str(), "r"); + if (!pipe) { + outRc = 127; + return buf; + } + char chunk[4096]; + while (std::fgets(chunk, sizeof(chunk), pipe) != nullptr) { + buf += chunk; + } + int rc = pclose(pipe); +#ifdef WEXITSTATUS + outRc = (rc != -1) ? WEXITSTATUS(rc) : rc; +#else + outRc = rc; +#endif + return buf; +} + +struct Hit { + fs::path path; + std::string magic; // 4-char as string + std::string primaryKeyField; + std::string entryName; + nlohmann::json entry; +}; + +int handleFind(int& i, int argc, char** argv) { + if (i + 2 >= argc) { + std::fprintf(stderr, + "catalog-find: usage: --catalog-find " + " [--magic ] [--json]\n"); + return 1; + } + std::string dir = argv[++i]; + std::string idArg = argv[++i]; + bool jsonOut = consumeJsonFlag(i, argc, argv); + // Optional --magic filter to limit search to + // one format. Useful when an id is a primary key in + // multiple format families and you only want hits from + // one (e.g. id 100 matches both WGRP comp 100 and + // WSCB broadcast 100 — --magic WGRP narrows it). + std::string magicFilter; + while (i + 1 < argc && std::strcmp(argv[i + 1], "--magic") == 0 && + i + 2 < argc) { + ++i; + magicFilter = argv[++i]; + } + + if (!fs::exists(dir) || !fs::is_directory(dir)) { + std::fprintf(stderr, + "catalog-find: not a directory: %s\n", dir.c_str()); + return 1; + } + + uint64_t searchId = 0; + try { + searchId = std::stoull(idArg); + } catch (...) { + std::fprintf(stderr, + "catalog-find: must be a numeric literal " + "(got '%s')\n", idArg.c_str()); + return 1; + } + + std::vector hits; + size_t scanned = 0; + size_t skippedNoFlag = 0; + size_t skippedUnknownMagic = 0; + + for (const auto& dirent : + fs::recursive_directory_iterator(dir)) { + if (!dirent.is_regular_file()) continue; + char magic[4]{}; + if (!peekMagic(dirent.path(), magic)) continue; + const FormatMagicEntry* fmt = findFormatByMagic(magic); + if (!fmt) { + ++skippedUnknownMagic; + continue; + } + if (!magicFilter.empty()) { + std::string m(magic, 4); + // Pad / strip trailing space — table magics + // include space chars (e.g. "WOM "). + if (m != magicFilter) continue; + } + if (!fmt->infoFlag) { + ++skippedNoFlag; + continue; + } + ++scanned; + // Strip extension to get the base path the + // per-format inspect handler expects. + std::string base = dirent.path().string(); + if (fmt->extension && *fmt->extension) { + size_t extLen = std::strlen(fmt->extension); + if (base.size() >= extLen && + base.compare(base.size() - extLen, extLen, + fmt->extension) == 0) { + base.resize(base.size() - extLen); + } + } + std::string cmd = shellQuote(argv[0]) + " " + + fmt->infoFlag + " " + + shellQuote(base) + " --json 2>/dev/null"; + int rc = 0; + std::string out = runAndCapture(cmd, rc); + if (rc != 0 || out.empty()) continue; + nlohmann::json doc; + try { + doc = nlohmann::json::parse(out); + } catch (...) { + continue; + } + if (!doc.contains("entries") || + !doc["entries"].is_array()) continue; + for (const auto& entry : doc["entries"]) { + auto [ok, key] = findEntryPrimaryKey(entry); + if (!ok || key != searchId) continue; + Hit h; + h.path = dirent.path(); + h.magic = std::string(magic, 4); + h.primaryKeyField = findEntryPrimaryKeyName(entry); + if (entry.is_object() && entry.contains("name") && + entry["name"].is_string()) { + h.entryName = entry["name"].get(); + } + h.entry = entry; + hits.push_back(h); + } + } + + if (jsonOut) { + nlohmann::json out; + out["directory"] = dir; + out["searchId"] = searchId; + if (!magicFilter.empty()) out["magicFilter"] = magicFilter; + out["scanned"] = scanned; + out["hits"] = nlohmann::json::array(); + for (const auto& h : hits) { + out["hits"].push_back({ + {"file", h.path.string()}, + {"magic", h.magic}, + {"primaryKey", h.primaryKeyField}, + {"name", h.entryName}, + {"entry", h.entry}, + }); + } + std::printf("%s\n", out.dump(2).c_str()); + return hits.empty() ? 1 : 0; + } + + std::printf("catalog-find: searched %zu catalog files " + "in '%s' for id=%llu", + scanned, dir.c_str(), + static_cast(searchId)); + if (!magicFilter.empty()) { + std::printf(" (magic=%s)", magicFilter.c_str()); + } + std::printf("\n"); + if (skippedNoFlag > 0) { + std::printf(" (skipped %zu files: format has no " + "--info-* surface)\n", skippedNoFlag); + } + if (skippedUnknownMagic > 0) { + std::printf(" (skipped %zu files: unknown magic)\n", + skippedUnknownMagic); + } + if (hits.empty()) { + std::printf(" no hits — id %llu is not a primary " + "key in any catalog under this tree\n", + static_cast(searchId)); + return 1; + } + std::printf(" hits (%zu):\n", hits.size()); + for (const auto& h : hits) { + std::printf(" [%s] %s:%s=%llu", + h.magic.c_str(), h.path.string().c_str(), + h.primaryKeyField.c_str(), + static_cast(searchId)); + if (!h.entryName.empty()) { + std::printf(" \"%s\"", h.entryName.c_str()); + } + std::printf("\n"); + } + return 0; +} + +} // namespace + +bool handleCatalogFind(int& i, int argc, char** argv, int& outRc) { + if (std::strcmp(argv[i], "--catalog-find") == 0 && + i + 2 < argc) { + outRc = handleFind(i, argc, argv); return true; + } + return false; +} + +} // namespace cli +} // namespace editor +} // namespace wowee diff --git a/tools/editor/cli_catalog_find.hpp b/tools/editor/cli_catalog_find.hpp new file mode 100644 index 00000000..67483201 --- /dev/null +++ b/tools/editor/cli_catalog_find.hpp @@ -0,0 +1,11 @@ +#pragma once + +namespace wowee { +namespace editor { +namespace cli { + +bool handleCatalogFind(int& i, int argc, char** argv, int& outRc); + +} // namespace cli +} // namespace editor +} // namespace wowee diff --git a/tools/editor/cli_dispatch.cpp b/tools/editor/cli_dispatch.cpp index b73483c1..296a9167 100644 --- a/tools/editor/cli_dispatch.cpp +++ b/tools/editor/cli_dispatch.cpp @@ -147,6 +147,7 @@ #include "cli_realm_list_catalog.hpp" #include "cli_emotes_catalog.hpp" #include "cli_catalog_pluck.hpp" +#include "cli_catalog_find.hpp" #include "cli_quest_objective.hpp" #include "cli_quest_reward.hpp" #include "cli_clone.hpp" @@ -335,6 +336,7 @@ constexpr DispatchFn kDispatchTable[] = { handleRealmListCatalog, handleEmotesCatalog, handleCatalogPluck, + handleCatalogFind, handleQuestObjective, handleQuestReward, handleClone, diff --git a/tools/editor/cli_help.cpp b/tools/editor/cli_help.cpp index f50084ab..398e5a62 100644 --- a/tools/editor/cli_help.cpp +++ b/tools/editor/cli_help.cpp @@ -2183,6 +2183,8 @@ void printUsage(const char* argv0) { std::printf(" Import a .wemo.json sidecar back into binary .wemo (emoteKind int OR \"social\"/\"combat\"/\"roleplay\"/\"system\"; sex int OR \"both\"/\"male\"/\"female\"; ttsHint int OR \"talk\"/\"whisper\"/\"yell\"/\"silent\")\n"); std::printf(" --catalog-pluck [--json]\n"); std::printf(" Extract one entry by id from any registered catalog format. Auto-detects magic, dispatches to the per-format --info-* handler internally, then prints just the matching entry. Primary-key field is auto-detected (first *Id field, or first numeric)\n"); + std::printf(" --catalog-find [--magic ] [--json]\n"); + std::printf(" Search every catalog file under for an entry with the given id (recursive walk). Prints all hits as [WXXX] file:fieldName=id name. Use --magic to limit search to one format family when the same id is a primary key in multiple\n"); std::printf(" --gen-weather-temperate [zoneName]\n"); std::printf(" Emit .wow weather schedule: clear-dominant + occasional rain + fog (forest / grassland)\n"); std::printf(" --gen-weather-arctic [zoneName]\n");