From c9e8ad99307c3b41978a9a352ea4f10797d1b5b7 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Wed, 6 May 2026 15:25:18 -0700 Subject: [PATCH] feat(editor): add --diff-extract for asset-extract directory comparison Compares two extracted asset directories side-by-side per file extension. Useful for diffing a fresh asset_extract run against a previous baseline (did the new MPQ add files? did any get dropped?), or comparing what each WoW expansion contributes: wowee_editor --diff-extract baseline/ new/ Diff: baseline/ vs new/ totals: 4 files / 0.0 MB vs 4 files / 0.0 MB Per-extension (count then bytes): ext a count b count a bytes b bytes status .blp 2 2 0 0 .dbc 1 0 0 0 -A .m2 1 2 0 0 DIFF 2 extension(s) differ Status column flags imbalance: -A only in A (extension dropped going B-ward) +B only in B (extension added) DIFF count differs but both sides have some Recursive walk so subdirectories aggregate into the parent's extension counts. JSON mode emits per-extension {count,bytes} pairs for both sides plus union diff count for CI consumption. Diff family for directory-shaped formats: --diff-zone unpacked zone dir vs zone dir --diff-extract extracted asset dir vs extract dir <- new Verified on synthesized 4-file dirs (a: 2 blp + 1 dbc + 1 m2; b: 2 blp + 0 dbc + 2 m2): correctly flags -A on .dbc, DIFF on .m2, exit 1. --- tools/editor/main.cpp | 106 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/tools/editor/main.cpp b/tools/editor/main.cpp index 14407875..1eadc6be 100644 --- a/tools/editor/main.cpp +++ b/tools/editor/main.cpp @@ -642,6 +642,8 @@ static void printUsage(const char* argv0) { std::printf(" Compare two WOC collision meshes (triangles, walkable/steep counts, tile)\n"); std::printf(" --diff-jsondbc [--json]\n"); std::printf(" Compare two JSON DBC sidecars (format/source/recordCount/fieldCount)\n"); + std::printf(" --diff-extract [--json]\n"); + std::printf(" Compare two extracted asset directories (per-extension file count + bytes)\n"); std::printf(" --pack-wcp [dst] Pack a zone dir/name into a .wcp archive and exit\n"); std::printf(" --unpack-wcp [dst] Extract a WCP archive (default dst=custom_zones/) and exit\n"); std::printf(" --list-commands Print every recognized --flag, one per line, and exit\n"); @@ -750,6 +752,11 @@ int main(int argc, char* argv[]) { "--diff-jsondbc requires \n"); return 1; } + if (std::strcmp(argv[i], "--diff-extract") == 0 && i + 2 >= argc) { + std::fprintf(stderr, + "--diff-extract requires \n"); + return 1; + } if (std::strcmp(argv[i], "--diff-wcp") == 0 && i + 2 >= argc) { std::fprintf(stderr, "--diff-wcp requires two paths\n"); return 1; @@ -3645,6 +3652,105 @@ int main(int argc, char* argv[]) { return 0; } return 1; + } else if (std::strcmp(argv[i], "--diff-extract") == 0 && i + 2 < argc) { + // Compare two extracted asset directories. Useful for diffing + // a fresh asset_extract run against a previous baseline (did + // the new MPQ add files? did any get dropped?), or comparing + // what each WoW expansion contributes. + std::string aDir = argv[++i]; + std::string bDir = argv[++i]; + bool jsonOut = (i + 1 < argc && + std::strcmp(argv[i + 1], "--json") == 0); + if (jsonOut) i++; + namespace fs = std::filesystem; + for (const auto& d : {aDir, bDir}) { + if (!fs::exists(d) || !fs::is_directory(d)) { + std::fprintf(stderr, + "diff-extract: %s is not a directory\n", d.c_str()); + return 1; + } + } + // Tally per-extension counts + bytes for each side. + struct Stats { int count = 0; uint64_t bytes = 0; }; + auto walk = [](const std::string& dir) { + std::map m; + std::error_code ec; + for (const auto& e : fs::recursive_directory_iterator(dir, ec)) { + if (!e.is_regular_file()) continue; + std::string ext = e.path().extension().string(); + std::transform(ext.begin(), ext.end(), ext.begin(), + [](unsigned char c) { return std::tolower(c); }); + if (ext.empty()) ext = "(no-ext)"; + auto& s = m[ext]; + s.count++; + s.bytes += e.file_size(ec); + } + return m; + }; + auto a = walk(aDir); + auto b = walk(bDir); + // Union of all extensions. + std::set allExts; + for (const auto& [e, _] : a) allExts.insert(e); + for (const auto& [e, _] : b) allExts.insert(e); + int diffs = 0; + for (const auto& e : allExts) { + int aC = a.count(e) ? a[e].count : 0; + int bC = b.count(e) ? b[e].count : 0; + if (aC != bC) diffs++; + } + int aTotalFiles = 0, bTotalFiles = 0; + uint64_t aTotalBytes = 0, bTotalBytes = 0; + for (const auto& [_, s] : a) { aTotalFiles += s.count; aTotalBytes += s.bytes; } + for (const auto& [_, s] : b) { bTotalFiles += s.count; bTotalBytes += s.bytes; } + if (jsonOut) { + nlohmann::json j; + j["a"] = aDir; j["b"] = bDir; + j["totalFiles"] = {{"a", aTotalFiles}, {"b", bTotalFiles}}; + j["totalBytes"] = {{"a", aTotalBytes}, {"b", bTotalBytes}}; + nlohmann::json byExt = nlohmann::json::array(); + for (const auto& e : allExts) { + int aC = a.count(e) ? a[e].count : 0; + int bC = b.count(e) ? b[e].count : 0; + uint64_t aB = a.count(e) ? a[e].bytes : 0; + uint64_t bB = b.count(e) ? b[e].bytes : 0; + byExt.push_back({{"ext", e}, + {"a", {{"count", aC}, {"bytes", aB}}}, + {"b", {{"count", bC}, {"bytes", bB}}}}); + } + j["byExtension"] = byExt; + j["totalDiffs"] = diffs; + j["identical"] = (diffs == 0); + std::printf("%s\n", j.dump(2).c_str()); + return diffs == 0 ? 0 : 1; + } + std::printf("Diff: %s vs %s\n", aDir.c_str(), bDir.c_str()); + std::printf(" totals: %d files / %.1f MB vs %d files / %.1f MB\n", + aTotalFiles, aTotalBytes / (1024.0 * 1024.0), + bTotalFiles, bTotalBytes / (1024.0 * 1024.0)); + std::printf("\n Per-extension (count then bytes):\n"); + std::printf(" %-12s a count b count a bytes b bytes status\n", "ext"); + for (const auto& e : allExts) { + int aC = a.count(e) ? a[e].count : 0; + int bC = b.count(e) ? b[e].count : 0; + uint64_t aB = a.count(e) ? a[e].bytes : 0; + uint64_t bB = b.count(e) ? b[e].bytes : 0; + const char* status = (aC == bC) ? "" + : (aC == 0) ? "+B" + : (bC == 0) ? "-A" + : "DIFF"; + std::printf(" %-12s %9d %9d %10llu %12llu %s\n", + e.c_str(), aC, bC, + static_cast(aB), + static_cast(bB), + status); + } + if (diffs == 0) { + std::printf("\n IDENTICAL (per-extension counts match)\n"); + return 0; + } + std::printf("\n %d extension(s) differ\n", diffs); + return 1; } else if (std::strcmp(argv[i], "--list-wcp") == 0 && i + 1 < argc) { // Like --info-wcp but prints every file path. Useful for spotting // missing or unexpected entries before unpacking.