feat(editor): add --diff-tree to compare two directories of .w* catalogs

Walks both trees in parallel, classifies each file by its 4-byte
magic, and bucks differences into five categories:
  - only-in-A         file present in A, missing from B (removed)
  - only-in-B         file present in B, missing from A (added)
  - magic-changed     same path but the format swapped
                      (e.g. somebody renamed a .wsrg to .wmat)
  - size-changed      same magic, different byte size
                      (content was edited)
  - identical         same magic, same size

Exit 1 if any category but identical is non-zero, so it composes
into shell pipelines and CI. JSON sidecar via --json.

Useful for project-version comparison: did anything actually
change between two snapshots? --diff-tree answers in one pass.
Pairs naturally with the existing --diff-headers (which goes
deeper on a single file pair) and the --bulk-* / --audit-tree
family of cross-tree utilities.

Files whose magic isn't recognized by the format table are
silently skipped so unrelated junk in the tree (build artifacts,
temp files) doesn't pollute the diff. Identity check is
magic+size only — true byte-equality would need a hash and the
heuristic is good enough for the typical use case.

This is the 15th cross-format utility:
  --list-formats / --info-magic / --summary-dir / --rename-by-magic
  --bulk-rename-by-magic / --touch-tree / --tree-summary-md
  --catalog-grep / --diff-headers / --audit-tree / --magic-fix
  --bulk-validate / --bulk-export-json / --bulk-import-json
  --diff-tree

CLI flag count 988 -> 989.
This commit is contained in:
Kelsi 2026-05-09 22:46:52 -07:00
parent 6b2bfb0f5a
commit a97bc676db
6 changed files with 252 additions and 0 deletions

View file

@ -1475,6 +1475,7 @@ add_executable(wowee_editor
tools/editor/cli_magic_fix.cpp
tools/editor/cli_bulk_validate.cpp
tools/editor/cli_bulk_json.cpp
tools/editor/cli_diff_tree.cpp
tools/editor/cli_macros_catalog.cpp
tools/editor/cli_char_features_catalog.cpp
tools/editor/cli_pvp_catalog.cpp

View file

@ -139,6 +139,7 @@ const char* const kArgRequired[] = {
"--catalog-grep", "--diff-headers", "--audit-tree",
"--magic-fix", "--bulk-validate",
"--bulk-export-json", "--bulk-import-json",
"--diff-tree",
"--gen-animations", "--gen-animations-combat", "--gen-animations-movement",
"--info-wani", "--validate-wani",
"--export-wani-json", "--import-wani-json",

View file

@ -0,0 +1,235 @@
#include "cli_diff_tree.hpp"
#include "cli_arg_parse.hpp"
#include "cli_format_table.hpp"
#include <nlohmann/json.hpp>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <map>
#include <string>
#include <vector>
namespace wowee {
namespace editor {
namespace cli {
namespace {
namespace fs = std::filesystem;
struct FileInfo {
char magic[4] = {0, 0, 0, 0};
bool magicOk = false;
uintmax_t size = 0;
const FormatMagicEntry* fmt = nullptr;
};
bool peekMagic(const fs::path& path, char magic[4]) {
std::ifstream is(path, std::ios::binary);
if (!is) return false;
if (!is.read(magic, 4) || is.gcount() != 4) return false;
return true;
}
// Walk a directory and build relativePath -> FileInfo for
// every Wowee-recognized file. Files whose magic isn't in
// the format table are skipped (so unrelated junk in the
// tree doesn't pollute the diff).
std::map<std::string, FileInfo> indexTree(const fs::path& root) {
std::map<std::string, FileInfo> out;
std::error_code ec;
for (const auto& entry : fs::recursive_directory_iterator(root, ec)) {
if (ec) break;
if (!entry.is_regular_file()) continue;
FileInfo fi;
fi.size = entry.file_size(ec);
if (ec) { ec.clear(); continue; }
if (peekMagic(entry.path(), fi.magic)) {
fi.fmt = findFormatByMagic(fi.magic);
fi.magicOk = (fi.fmt != nullptr);
}
if (!fi.magicOk) continue; // skip non-Wowee
std::string rel = fs::relative(entry.path(), root, ec).string();
if (ec) { ec.clear(); continue; }
out[rel] = fi;
}
return out;
}
enum class ChangeKind {
OnlyInA, // file present in A, missing from B
OnlyInB, // file present in B, missing from A
MagicChanged, // present in both but different magic
SizeChanged, // same magic, different size
Identical, // same magic, same size (good enough as a
// first-cut heuristic — true byte-equal
// takes a hash that we don't bother with)
};
struct DiffRow {
std::string path;
ChangeKind kind;
const FormatMagicEntry* fmtA = nullptr;
const FormatMagicEntry* fmtB = nullptr;
uintmax_t sizeA = 0;
uintmax_t sizeB = 0;
};
const char* changeKindLabel(ChangeKind k) {
switch (k) {
case ChangeKind::OnlyInA: return "only-in-A";
case ChangeKind::OnlyInB: return "only-in-B";
case ChangeKind::MagicChanged: return "magic-changed";
case ChangeKind::SizeChanged: return "size-changed";
case ChangeKind::Identical: return "identical";
}
return "?";
}
int handleDiff(int& i, int argc, char** argv) {
std::string dirA = argv[++i];
std::string dirB = argv[++i];
bool jsonOut = consumeJsonFlag(i, argc, argv);
if (!fs::exists(dirA) || !fs::is_directory(dirA)) {
std::fprintf(stderr,
"diff-tree: not a directory: %s\n", dirA.c_str());
return 1;
}
if (!fs::exists(dirB) || !fs::is_directory(dirB)) {
std::fprintf(stderr,
"diff-tree: not a directory: %s\n", dirB.c_str());
return 1;
}
auto idxA = indexTree(dirA);
auto idxB = indexTree(dirB);
std::vector<DiffRow> rows;
// Walk A's keys: each is either OnlyInA or present in
// both (which becomes MagicChanged / SizeChanged /
// Identical depending on the comparison).
for (const auto& [path, fa] : idxA) {
auto it = idxB.find(path);
if (it == idxB.end()) {
DiffRow r;
r.path = path;
r.kind = ChangeKind::OnlyInA;
r.fmtA = fa.fmt;
r.sizeA = fa.size;
rows.push_back(std::move(r));
continue;
}
const FileInfo& fb = it->second;
DiffRow r;
r.path = path;
r.fmtA = fa.fmt;
r.fmtB = fb.fmt;
r.sizeA = fa.size;
r.sizeB = fb.size;
if (fa.fmt != fb.fmt) r.kind = ChangeKind::MagicChanged;
else if (fa.size != fb.size) r.kind = ChangeKind::SizeChanged;
else r.kind = ChangeKind::Identical;
rows.push_back(std::move(r));
}
// Now walk B's keys looking for OnlyInB.
for (const auto& [path, fb] : idxB) {
if (idxA.find(path) != idxA.end()) continue;
DiffRow r;
r.path = path;
r.kind = ChangeKind::OnlyInB;
r.fmtB = fb.fmt;
r.sizeB = fb.size;
rows.push_back(std::move(r));
}
size_t onlyA = 0, onlyB = 0, magicCh = 0, sizeCh = 0, identical = 0;
for (const auto& r : rows) {
switch (r.kind) {
case ChangeKind::OnlyInA: ++onlyA; break;
case ChangeKind::OnlyInB: ++onlyB; break;
case ChangeKind::MagicChanged: ++magicCh; break;
case ChangeKind::SizeChanged: ++sizeCh; break;
case ChangeKind::Identical: ++identical; break;
}
}
bool anyDiff = (onlyA + onlyB + magicCh + sizeCh) > 0;
if (jsonOut) {
nlohmann::json j;
j["dirA"] = dirA;
j["dirB"] = dirB;
j["countA"] = idxA.size();
j["countB"] = idxB.size();
j["onlyInA"] = onlyA;
j["onlyInB"] = onlyB;
j["magicChanged"] = magicCh;
j["sizeChanged"] = sizeCh;
j["identical"] = identical;
j["allIdentical"] = !anyDiff;
nlohmann::json arr = nlohmann::json::array();
for (const auto& r : rows) {
if (r.kind == ChangeKind::Identical) continue;
nlohmann::json je;
je["path"] = r.path;
je["kind"] = changeKindLabel(r.kind);
if (r.fmtA) je["formatA"] = r.fmtA->extension;
if (r.fmtB) je["formatB"] = r.fmtB->extension;
je["sizeA"] = r.sizeA;
je["sizeB"] = r.sizeB;
arr.push_back(je);
}
j["differences"] = arr;
std::printf("%s\n", j.dump(2).c_str());
return anyDiff ? 1 : 0;
}
std::printf("diff-tree: %s vs %s\n", dirA.c_str(), dirB.c_str());
std::printf(" files in A : %zu\n", idxA.size());
std::printf(" files in B : %zu\n", idxB.size());
std::printf(" only-in-A : %zu\n", onlyA);
std::printf(" only-in-B : %zu\n", onlyB);
std::printf(" magic-changed : %zu\n", magicCh);
std::printf(" size-changed : %zu\n", sizeCh);
std::printf(" identical : %zu\n", identical);
if (!anyDiff) {
std::printf(" trees are identical at the magic+size level\n");
return 0;
}
auto printGroup = [&](ChangeKind k, const char* heading,
bool showSizes) {
size_t n = 0;
for (const auto& r : rows) if (r.kind == k) ++n;
if (n == 0) return;
std::printf("\n %s (%zu):\n", heading, n);
for (const auto& r : rows) {
if (r.kind != k) continue;
if (showSizes) {
std::printf(" %s [%llu B -> %llu B]\n",
r.path.c_str(),
static_cast<unsigned long long>(r.sizeA),
static_cast<unsigned long long>(r.sizeB));
} else {
std::printf(" %s\n", r.path.c_str());
}
}
};
printGroup(ChangeKind::OnlyInA, "Removed from B (present in A only)", false);
printGroup(ChangeKind::OnlyInB, "Added to B (not in A)", false);
printGroup(ChangeKind::MagicChanged,
"Magic changed between A and B (format swapped!)", true);
printGroup(ChangeKind::SizeChanged,
"Same magic but byte size changed (content edited)", true);
return 1;
}
} // namespace
bool handleDiffTree(int& i, int argc, char** argv, int& outRc) {
if (std::strcmp(argv[i], "--diff-tree") == 0 && i + 2 < argc) {
outRc = handleDiff(i, argc, argv); return true;
}
return false;
}
} // namespace cli
} // namespace editor
} // namespace wowee

View file

@ -0,0 +1,11 @@
#pragma once
namespace wowee {
namespace editor {
namespace cli {
bool handleDiffTree(int& i, int argc, char** argv, int& outRc);
} // namespace cli
} // namespace editor
} // namespace wowee

View file

@ -98,6 +98,7 @@
#include "cli_magic_fix.hpp"
#include "cli_bulk_validate.hpp"
#include "cli_bulk_json.hpp"
#include "cli_diff_tree.hpp"
#include "cli_macros_catalog.hpp"
#include "cli_char_features_catalog.hpp"
#include "cli_pvp_catalog.hpp"
@ -261,6 +262,7 @@ constexpr DispatchFn kDispatchTable[] = {
handleMagicFix,
handleBulkValidate,
handleBulkJson,
handleDiffTree,
handleMacrosCatalog,
handleCharFeaturesCatalog,
handlePVPCatalog,

View file

@ -1375,6 +1375,8 @@ void printUsage(const char* argv0) {
std::printf(" Recursively export every recognized .w* file to its JSON sidecar via the per-format --export-X-json flag. Useful for git-friendly diffs of binary catalogs. Exit 1 if any failure\n");
std::printf(" --bulk-import-json <dir> [--json]\n");
std::printf(" Recursively import every .wXXX.json sidecar back to its binary .w* form via the per-format --import-X-json flag. Inverse of --bulk-export-json. Exit 1 if any failure\n");
std::printf(" --diff-tree <dirA> <dirB> [--json]\n");
std::printf(" Compare two directory trees of .w* catalogs at the magic+size level. Reports only-in-A / only-in-B / magic-changed / size-changed / identical counts and lists changed paths. Exit 1 if any difference\n");
std::printf(" --gen-animations <wani-base> [name]\n");
std::printf(" Emit .wani starter: 5 essential animations (Stand / Walk / Run / Death / AttackUnarmed) with fallback chains\n");
std::printf(" --gen-animations-combat <wani-base> [name]\n");