feat(editor): add --diff-extract for asset-extract directory comparison

Compares two extracted asset directories side-by-side per file
extension. Useful for diffing a fresh asset_extract run against
a previous baseline (did the new MPQ add files? did any get
dropped?), or comparing what each WoW expansion contributes:

  wowee_editor --diff-extract baseline/ new/

  Diff: baseline/ vs new/
    totals: 4 files / 0.0 MB    vs    4 files / 0.0 MB

    Per-extension (count then bytes):
    ext            a count   b count    a bytes      b bytes  status
    .blp                 2         2           0            0
    .dbc                 1         0           0            0  -A
    .m2                  1         2           0            0  DIFF

    2 extension(s) differ

Status column flags imbalance:
  -A   only in A (extension dropped going B-ward)
  +B   only in B (extension added)
  DIFF count differs but both sides have some

Recursive walk so subdirectories aggregate into the parent's
extension counts. JSON mode emits per-extension {count,bytes}
pairs for both sides plus union diff count for CI consumption.

Diff family for directory-shaped formats:
  --diff-zone     unpacked zone dir vs zone dir
  --diff-extract  extracted asset dir vs extract dir  <- new

Verified on synthesized 4-file dirs (a: 2 blp + 1 dbc + 1 m2;
b: 2 blp + 0 dbc + 2 m2): correctly flags -A on .dbc, DIFF on
.m2, exit 1.
This commit is contained in:
Kelsi 2026-05-06 15:25:18 -07:00
parent 0f275cfee7
commit c9e8ad9930

View file

@ -642,6 +642,8 @@ static void printUsage(const char* argv0) {
std::printf(" Compare two WOC collision meshes (triangles, walkable/steep counts, tile)\n");
std::printf(" --diff-jsondbc <a> <b> [--json]\n");
std::printf(" Compare two JSON DBC sidecars (format/source/recordCount/fieldCount)\n");
std::printf(" --diff-extract <a> <b> [--json]\n");
std::printf(" Compare two extracted asset directories (per-extension file count + bytes)\n");
std::printf(" --pack-wcp <zone> [dst] Pack a zone dir/name into a .wcp archive and exit\n");
std::printf(" --unpack-wcp <wcp> [dst] Extract a WCP archive (default dst=custom_zones/) and exit\n");
std::printf(" --list-commands Print every recognized --flag, one per line, and exit\n");
@ -750,6 +752,11 @@ int main(int argc, char* argv[]) {
"--diff-jsondbc requires <a.json> <b.json>\n");
return 1;
}
if (std::strcmp(argv[i], "--diff-extract") == 0 && i + 2 >= argc) {
std::fprintf(stderr,
"--diff-extract requires <dirA> <dirB>\n");
return 1;
}
if (std::strcmp(argv[i], "--diff-wcp") == 0 && i + 2 >= argc) {
std::fprintf(stderr, "--diff-wcp requires two paths\n");
return 1;
@ -3645,6 +3652,105 @@ int main(int argc, char* argv[]) {
return 0;
}
return 1;
} else if (std::strcmp(argv[i], "--diff-extract") == 0 && i + 2 < argc) {
// Compare two extracted asset directories. Useful for diffing
// a fresh asset_extract run against a previous baseline (did
// the new MPQ add files? did any get dropped?), or comparing
// what each WoW expansion contributes.
std::string aDir = argv[++i];
std::string bDir = argv[++i];
bool jsonOut = (i + 1 < argc &&
std::strcmp(argv[i + 1], "--json") == 0);
if (jsonOut) i++;
namespace fs = std::filesystem;
for (const auto& d : {aDir, bDir}) {
if (!fs::exists(d) || !fs::is_directory(d)) {
std::fprintf(stderr,
"diff-extract: %s is not a directory\n", d.c_str());
return 1;
}
}
// Tally per-extension counts + bytes for each side.
struct Stats { int count = 0; uint64_t bytes = 0; };
auto walk = [](const std::string& dir) {
std::map<std::string, Stats> m;
std::error_code ec;
for (const auto& e : fs::recursive_directory_iterator(dir, ec)) {
if (!e.is_regular_file()) continue;
std::string ext = e.path().extension().string();
std::transform(ext.begin(), ext.end(), ext.begin(),
[](unsigned char c) { return std::tolower(c); });
if (ext.empty()) ext = "(no-ext)";
auto& s = m[ext];
s.count++;
s.bytes += e.file_size(ec);
}
return m;
};
auto a = walk(aDir);
auto b = walk(bDir);
// Union of all extensions.
std::set<std::string> allExts;
for (const auto& [e, _] : a) allExts.insert(e);
for (const auto& [e, _] : b) allExts.insert(e);
int diffs = 0;
for (const auto& e : allExts) {
int aC = a.count(e) ? a[e].count : 0;
int bC = b.count(e) ? b[e].count : 0;
if (aC != bC) diffs++;
}
int aTotalFiles = 0, bTotalFiles = 0;
uint64_t aTotalBytes = 0, bTotalBytes = 0;
for (const auto& [_, s] : a) { aTotalFiles += s.count; aTotalBytes += s.bytes; }
for (const auto& [_, s] : b) { bTotalFiles += s.count; bTotalBytes += s.bytes; }
if (jsonOut) {
nlohmann::json j;
j["a"] = aDir; j["b"] = bDir;
j["totalFiles"] = {{"a", aTotalFiles}, {"b", bTotalFiles}};
j["totalBytes"] = {{"a", aTotalBytes}, {"b", bTotalBytes}};
nlohmann::json byExt = nlohmann::json::array();
for (const auto& e : allExts) {
int aC = a.count(e) ? a[e].count : 0;
int bC = b.count(e) ? b[e].count : 0;
uint64_t aB = a.count(e) ? a[e].bytes : 0;
uint64_t bB = b.count(e) ? b[e].bytes : 0;
byExt.push_back({{"ext", e},
{"a", {{"count", aC}, {"bytes", aB}}},
{"b", {{"count", bC}, {"bytes", bB}}}});
}
j["byExtension"] = byExt;
j["totalDiffs"] = diffs;
j["identical"] = (diffs == 0);
std::printf("%s\n", j.dump(2).c_str());
return diffs == 0 ? 0 : 1;
}
std::printf("Diff: %s vs %s\n", aDir.c_str(), bDir.c_str());
std::printf(" totals: %d files / %.1f MB vs %d files / %.1f MB\n",
aTotalFiles, aTotalBytes / (1024.0 * 1024.0),
bTotalFiles, bTotalBytes / (1024.0 * 1024.0));
std::printf("\n Per-extension (count then bytes):\n");
std::printf(" %-12s a count b count a bytes b bytes status\n", "ext");
for (const auto& e : allExts) {
int aC = a.count(e) ? a[e].count : 0;
int bC = b.count(e) ? b[e].count : 0;
uint64_t aB = a.count(e) ? a[e].bytes : 0;
uint64_t bB = b.count(e) ? b[e].bytes : 0;
const char* status = (aC == bC) ? ""
: (aC == 0) ? "+B"
: (bC == 0) ? "-A"
: "DIFF";
std::printf(" %-12s %9d %9d %10llu %12llu %s\n",
e.c_str(), aC, bC,
static_cast<unsigned long long>(aB),
static_cast<unsigned long long>(bB),
status);
}
if (diffs == 0) {
std::printf("\n IDENTICAL (per-extension counts match)\n");
return 0;
}
std::printf("\n %d extension(s) differ\n", diffs);
return 1;
} else if (std::strcmp(argv[i], "--list-wcp") == 0 && i + 1 < argc) {
// Like --info-wcp but prints every file path. Useful for spotting
// missing or unexpected entries before unpacking.