diff --git a/CMakeLists.txt b/CMakeLists.txt index 111cac32..1530fc34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1484,6 +1484,7 @@ add_executable(wowee_editor tools/editor/cli_diff_tree.cpp tools/editor/cli_orphan_jsons.cpp tools/editor/cli_list_by_magic.cpp + tools/editor/cli_catalog_stats.cpp tools/editor/cli_macros_catalog.cpp tools/editor/cli_char_features_catalog.cpp tools/editor/cli_pvp_catalog.cpp diff --git a/tools/editor/cli_arg_required.cpp b/tools/editor/cli_arg_required.cpp index fd8bf851..b7242ebc 100644 --- a/tools/editor/cli_arg_required.cpp +++ b/tools/editor/cli_arg_required.cpp @@ -140,6 +140,7 @@ const char* const kArgRequired[] = { "--magic-fix", "--bulk-validate", "--bulk-export-json", "--bulk-import-json", "--diff-tree", "--orphan-jsons", "--list-by-magic", + "--catalog-stats", "--gen-animations", "--gen-animations-combat", "--gen-animations-movement", "--info-wani", "--validate-wani", "--export-wani-json", "--import-wani-json", diff --git a/tools/editor/cli_catalog_stats.cpp b/tools/editor/cli_catalog_stats.cpp new file mode 100644 index 00000000..37a40cea --- /dev/null +++ b/tools/editor/cli_catalog_stats.cpp @@ -0,0 +1,182 @@ +#include "cli_catalog_stats.hpp" +#include "cli_arg_parse.hpp" +#include "cli_format_table.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace wowee { +namespace editor { +namespace cli { + +namespace { + +namespace fs = std::filesystem; + +// Probe a catalog file's header and try to walk a few of +// the entries far enough to learn the first uint32 ID +// field. Stops after a configurable cap so we don't read +// huge files unnecessarily. +struct StatsProbe { + bool headerOk = false; + char magic[4] = {0, 0, 0, 0}; + uint32_t version = 0; + std::string catalogName; + uint32_t entryCount = 0; + uintmax_t totalBytes = 0; + + // Header byte layout: + // magic(4) + version(4) + nameLen(4) + name(nameLen) + entryCount(4) + uintmax_t headerBytes = 0; + uintmax_t entrySectionBytes = 0; + double averageEntryBytes = 0.0; + uint32_t catalogNameBytes = 0; + + // First few entry IDs we successfully read by reading + // each entry's leading uint32. We can't reliably know + // the per-entry size, so once we read 3 IDs we stop — + // this is a sample, not an exhaustive enumeration. + std::vector firstEntryIds; +}; + +bool probe(const fs::path& path, StatsProbe& out) { + std::error_code ec; + out.totalBytes = fs::file_size(path, ec); + if (ec) out.totalBytes = 0; + std::ifstream is(path, std::ios::binary); + if (!is) return false; + if (!is.read(out.magic, 4) || is.gcount() != 4) return false; + if (!is.read(reinterpret_cast(&out.version), 4)) return false; + uint32_t nameLen = 0; + if (!is.read(reinterpret_cast(&nameLen), 4)) return false; + if (nameLen > (1u << 20)) return false; + out.catalogName.resize(nameLen); + if (nameLen > 0) { + is.read(out.catalogName.data(), nameLen); + if (is.gcount() != static_cast(nameLen)) { + out.catalogName.clear(); + return false; + } + } + out.catalogNameBytes = nameLen; + if (!is.read(reinterpret_cast(&out.entryCount), 4)) + return false; + out.headerOk = true; + out.headerBytes = 4 + 4 + 4 + nameLen + 4; // magic+ver+nameLen+name+count + if (out.totalBytes >= out.headerBytes) { + out.entrySectionBytes = out.totalBytes - out.headerBytes; + } + if (out.entryCount > 0) { + out.averageEntryBytes = + static_cast(out.entrySectionBytes) / + static_cast(out.entryCount); + } + // Read just the first entry's leading uint32 as a + // reliable sample. We can't advance to subsequent + // entries without knowing the per-format size (each + // entry has variable-length name+description strings), + // so multi-sampling produces garbage for most formats. + // The first id is always at exactly headerBytes — that + // one we can trust. + if (out.entryCount > 0 && out.entrySectionBytes >= 4) { + is.seekg(static_cast(out.headerBytes), + std::ios::beg); + uint32_t id = 0; + if (is.read(reinterpret_cast(&id), 4) && + is.gcount() == 4) { + out.firstEntryIds.push_back(id); + } + } + return true; +} + +int handleStats(int& i, int argc, char** argv) { + std::string path = argv[++i]; + bool jsonOut = consumeJsonFlag(i, argc, argv); + if (!fs::exists(path)) { + std::fprintf(stderr, + "catalog-stats: file not found: %s\n", path.c_str()); + return 1; + } + StatsProbe p; + if (!probe(path, p) || !p.headerOk) { + std::fprintf(stderr, + "catalog-stats: failed to read header from %s\n", + path.c_str()); + return 1; + } + const FormatMagicEntry* fmt = findFormatByMagic(p.magic); + if (jsonOut) { + nlohmann::json j; + j["path"] = path; + char ms[5] = {p.magic[0], p.magic[1], p.magic[2], + p.magic[3], 0}; + j["magic"] = ms; + if (fmt) { + j["format"] = fmt->extension; + j["category"] = fmt->category; + j["description"] = fmt->description; + } else { + j["format"] = nullptr; + } + j["version"] = p.version; + j["catalogName"] = p.catalogName; + j["entryCount"] = p.entryCount; + j["totalBytes"] = p.totalBytes; + j["headerBytes"] = p.headerBytes; + j["entrySectionBytes"] = p.entrySectionBytes; + j["catalogNameBytes"] = p.catalogNameBytes; + j["averageEntryBytes"] = p.averageEntryBytes; + j["firstEntryId"] = p.firstEntryIds.empty() + ? 0u : p.firstEntryIds.front(); + std::printf("%s\n", j.dump(2).c_str()); + return 0; + } + char ms[5] = {p.magic[0], p.magic[1], p.magic[2], p.magic[3], 0}; + std::printf("catalog-stats: %s\n", path.c_str()); + std::printf(" magic : '%s'%s\n", ms, + fmt ? "" : " (unknown — not in format table)"); + if (fmt) { + std::printf(" format : %s (%s, %s)\n", + fmt->description, fmt->extension, fmt->category); + } + std::printf(" version : %u\n", p.version); + std::printf(" catalogName : %s\n", p.catalogName.c_str()); + std::printf(" entryCount : %u\n", p.entryCount); + std::printf("\n"); + std::printf(" totalBytes : %llu\n", + static_cast(p.totalBytes)); + std::printf(" headerBytes : %llu (magic + version + nameLen + name + entryCount)\n", + static_cast(p.headerBytes)); + std::printf(" entrySectionBytes: %llu\n", + static_cast(p.entrySectionBytes)); + std::printf(" catalogNameBytes : %u\n", p.catalogNameBytes); + if (p.entryCount > 0) { + std::printf(" avgEntryBytes : %.1f\n", p.averageEntryBytes); + } + if (!p.firstEntryIds.empty()) { + std::printf("\n firstEntryId : %u\n", + p.firstEntryIds.front()); + } + return 0; +} + +} // namespace + +bool handleCatalogStats(int& i, int argc, char** argv, int& outRc) { + if (std::strcmp(argv[i], "--catalog-stats") == 0 && i + 1 < argc) { + outRc = handleStats(i, argc, argv); return true; + } + return false; +} + +} // namespace cli +} // namespace editor +} // namespace wowee diff --git a/tools/editor/cli_catalog_stats.hpp b/tools/editor/cli_catalog_stats.hpp new file mode 100644 index 00000000..66b67709 --- /dev/null +++ b/tools/editor/cli_catalog_stats.hpp @@ -0,0 +1,11 @@ +#pragma once + +namespace wowee { +namespace editor { +namespace cli { + +bool handleCatalogStats(int& i, int argc, char** argv, int& outRc); + +} // namespace cli +} // namespace editor +} // namespace wowee diff --git a/tools/editor/cli_dispatch.cpp b/tools/editor/cli_dispatch.cpp index a4d7d9a3..eb562e34 100644 --- a/tools/editor/cli_dispatch.cpp +++ b/tools/editor/cli_dispatch.cpp @@ -101,6 +101,7 @@ #include "cli_diff_tree.hpp" #include "cli_orphan_jsons.hpp" #include "cli_list_by_magic.hpp" +#include "cli_catalog_stats.hpp" #include "cli_macros_catalog.hpp" #include "cli_char_features_catalog.hpp" #include "cli_pvp_catalog.hpp" @@ -273,6 +274,7 @@ constexpr DispatchFn kDispatchTable[] = { handleDiffTree, handleOrphanJsons, handleListByMagic, + handleCatalogStats, handleMacrosCatalog, handleCharFeaturesCatalog, handlePVPCatalog, diff --git a/tools/editor/cli_help.cpp b/tools/editor/cli_help.cpp index cf76e401..5ae819b2 100644 --- a/tools/editor/cli_help.cpp +++ b/tools/editor/cli_help.cpp @@ -1381,6 +1381,8 @@ void printUsage(const char* argv0) { std::printf(" Find .wXXX.json sidecars whose binary .wXXX is missing. Useful after deleting/moving binaries — orphan JSONs accumulate noise and may shadow re-imports. Exit 1 if any orphans found\n"); std::printf(" --list-by-magic [--json]\n"); std::printf(" List every file in a directory tree matching a 4-char magic (e.g. WSPL). Reports per-file size + entry count + catalog name + relative path. Exit 1 if no matches\n"); + std::printf(" --catalog-stats [--json]\n"); + std::printf(" Single-file deep stats — header bytes vs entry-section bytes, average entry size, sampled entry IDs. Useful for sizing analysis (which catalogs are biggest, where do the bytes go)\n"); std::printf(" --gen-animations [name]\n"); std::printf(" Emit .wani starter: 5 essential animations (Stand / Walk / Run / Death / AttackUnarmed) with fallback chains\n"); std::printf(" --gen-animations-combat [name]\n");