From a97bc676dbbbecd78d1251f6c91c37eabedb4050 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 9 May 2026 22:46:52 -0700 Subject: [PATCH] feat(editor): add --diff-tree to compare two directories of .w* catalogs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Walks both trees in parallel, classifies each file by its 4-byte magic, and bucks differences into five categories: - only-in-A file present in A, missing from B (removed) - only-in-B file present in B, missing from A (added) - magic-changed same path but the format swapped (e.g. somebody renamed a .wsrg to .wmat) - size-changed same magic, different byte size (content was edited) - identical same magic, same size Exit 1 if any category but identical is non-zero, so it composes into shell pipelines and CI. JSON sidecar via --json. Useful for project-version comparison: did anything actually change between two snapshots? --diff-tree answers in one pass. Pairs naturally with the existing --diff-headers (which goes deeper on a single file pair) and the --bulk-* / --audit-tree family of cross-tree utilities. Files whose magic isn't recognized by the format table are silently skipped so unrelated junk in the tree (build artifacts, temp files) doesn't pollute the diff. Identity check is magic+size only — true byte-equality would need a hash and the heuristic is good enough for the typical use case. This is the 15th cross-format utility: --list-formats / --info-magic / --summary-dir / --rename-by-magic --bulk-rename-by-magic / --touch-tree / --tree-summary-md --catalog-grep / --diff-headers / --audit-tree / --magic-fix --bulk-validate / --bulk-export-json / --bulk-import-json --diff-tree CLI flag count 988 -> 989. --- CMakeLists.txt | 1 + tools/editor/cli_arg_required.cpp | 1 + tools/editor/cli_diff_tree.cpp | 235 ++++++++++++++++++++++++++++++ tools/editor/cli_diff_tree.hpp | 11 ++ tools/editor/cli_dispatch.cpp | 2 + tools/editor/cli_help.cpp | 2 + 6 files changed, 252 insertions(+) create mode 100644 tools/editor/cli_diff_tree.cpp create mode 100644 tools/editor/cli_diff_tree.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b39995fc..de90952a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1475,6 +1475,7 @@ add_executable(wowee_editor tools/editor/cli_magic_fix.cpp tools/editor/cli_bulk_validate.cpp tools/editor/cli_bulk_json.cpp + tools/editor/cli_diff_tree.cpp tools/editor/cli_macros_catalog.cpp tools/editor/cli_char_features_catalog.cpp tools/editor/cli_pvp_catalog.cpp diff --git a/tools/editor/cli_arg_required.cpp b/tools/editor/cli_arg_required.cpp index 9ef58d9a..4da5cb6f 100644 --- a/tools/editor/cli_arg_required.cpp +++ b/tools/editor/cli_arg_required.cpp @@ -139,6 +139,7 @@ const char* const kArgRequired[] = { "--catalog-grep", "--diff-headers", "--audit-tree", "--magic-fix", "--bulk-validate", "--bulk-export-json", "--bulk-import-json", + "--diff-tree", "--gen-animations", "--gen-animations-combat", "--gen-animations-movement", "--info-wani", "--validate-wani", "--export-wani-json", "--import-wani-json", diff --git a/tools/editor/cli_diff_tree.cpp b/tools/editor/cli_diff_tree.cpp new file mode 100644 index 00000000..852efac6 --- /dev/null +++ b/tools/editor/cli_diff_tree.cpp @@ -0,0 +1,235 @@ +#include "cli_diff_tree.hpp" +#include "cli_arg_parse.hpp" +#include "cli_format_table.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace wowee { +namespace editor { +namespace cli { + +namespace { + +namespace fs = std::filesystem; + +struct FileInfo { + char magic[4] = {0, 0, 0, 0}; + bool magicOk = false; + uintmax_t size = 0; + const FormatMagicEntry* fmt = nullptr; +}; + +bool peekMagic(const fs::path& path, char magic[4]) { + std::ifstream is(path, std::ios::binary); + if (!is) return false; + if (!is.read(magic, 4) || is.gcount() != 4) return false; + return true; +} + +// Walk a directory and build relativePath -> FileInfo for +// every Wowee-recognized file. Files whose magic isn't in +// the format table are skipped (so unrelated junk in the +// tree doesn't pollute the diff). +std::map indexTree(const fs::path& root) { + std::map out; + std::error_code ec; + for (const auto& entry : fs::recursive_directory_iterator(root, ec)) { + if (ec) break; + if (!entry.is_regular_file()) continue; + FileInfo fi; + fi.size = entry.file_size(ec); + if (ec) { ec.clear(); continue; } + if (peekMagic(entry.path(), fi.magic)) { + fi.fmt = findFormatByMagic(fi.magic); + fi.magicOk = (fi.fmt != nullptr); + } + if (!fi.magicOk) continue; // skip non-Wowee + std::string rel = fs::relative(entry.path(), root, ec).string(); + if (ec) { ec.clear(); continue; } + out[rel] = fi; + } + return out; +} + +enum class ChangeKind { + OnlyInA, // file present in A, missing from B + OnlyInB, // file present in B, missing from A + MagicChanged, // present in both but different magic + SizeChanged, // same magic, different size + Identical, // same magic, same size (good enough as a + // first-cut heuristic — true byte-equal + // takes a hash that we don't bother with) +}; + +struct DiffRow { + std::string path; + ChangeKind kind; + const FormatMagicEntry* fmtA = nullptr; + const FormatMagicEntry* fmtB = nullptr; + uintmax_t sizeA = 0; + uintmax_t sizeB = 0; +}; + +const char* changeKindLabel(ChangeKind k) { + switch (k) { + case ChangeKind::OnlyInA: return "only-in-A"; + case ChangeKind::OnlyInB: return "only-in-B"; + case ChangeKind::MagicChanged: return "magic-changed"; + case ChangeKind::SizeChanged: return "size-changed"; + case ChangeKind::Identical: return "identical"; + } + return "?"; +} + +int handleDiff(int& i, int argc, char** argv) { + std::string dirA = argv[++i]; + std::string dirB = argv[++i]; + bool jsonOut = consumeJsonFlag(i, argc, argv); + if (!fs::exists(dirA) || !fs::is_directory(dirA)) { + std::fprintf(stderr, + "diff-tree: not a directory: %s\n", dirA.c_str()); + return 1; + } + if (!fs::exists(dirB) || !fs::is_directory(dirB)) { + std::fprintf(stderr, + "diff-tree: not a directory: %s\n", dirB.c_str()); + return 1; + } + auto idxA = indexTree(dirA); + auto idxB = indexTree(dirB); + std::vector rows; + // Walk A's keys: each is either OnlyInA or present in + // both (which becomes MagicChanged / SizeChanged / + // Identical depending on the comparison). + for (const auto& [path, fa] : idxA) { + auto it = idxB.find(path); + if (it == idxB.end()) { + DiffRow r; + r.path = path; + r.kind = ChangeKind::OnlyInA; + r.fmtA = fa.fmt; + r.sizeA = fa.size; + rows.push_back(std::move(r)); + continue; + } + const FileInfo& fb = it->second; + DiffRow r; + r.path = path; + r.fmtA = fa.fmt; + r.fmtB = fb.fmt; + r.sizeA = fa.size; + r.sizeB = fb.size; + if (fa.fmt != fb.fmt) r.kind = ChangeKind::MagicChanged; + else if (fa.size != fb.size) r.kind = ChangeKind::SizeChanged; + else r.kind = ChangeKind::Identical; + rows.push_back(std::move(r)); + } + // Now walk B's keys looking for OnlyInB. + for (const auto& [path, fb] : idxB) { + if (idxA.find(path) != idxA.end()) continue; + DiffRow r; + r.path = path; + r.kind = ChangeKind::OnlyInB; + r.fmtB = fb.fmt; + r.sizeB = fb.size; + rows.push_back(std::move(r)); + } + size_t onlyA = 0, onlyB = 0, magicCh = 0, sizeCh = 0, identical = 0; + for (const auto& r : rows) { + switch (r.kind) { + case ChangeKind::OnlyInA: ++onlyA; break; + case ChangeKind::OnlyInB: ++onlyB; break; + case ChangeKind::MagicChanged: ++magicCh; break; + case ChangeKind::SizeChanged: ++sizeCh; break; + case ChangeKind::Identical: ++identical; break; + } + } + bool anyDiff = (onlyA + onlyB + magicCh + sizeCh) > 0; + if (jsonOut) { + nlohmann::json j; + j["dirA"] = dirA; + j["dirB"] = dirB; + j["countA"] = idxA.size(); + j["countB"] = idxB.size(); + j["onlyInA"] = onlyA; + j["onlyInB"] = onlyB; + j["magicChanged"] = magicCh; + j["sizeChanged"] = sizeCh; + j["identical"] = identical; + j["allIdentical"] = !anyDiff; + nlohmann::json arr = nlohmann::json::array(); + for (const auto& r : rows) { + if (r.kind == ChangeKind::Identical) continue; + nlohmann::json je; + je["path"] = r.path; + je["kind"] = changeKindLabel(r.kind); + if (r.fmtA) je["formatA"] = r.fmtA->extension; + if (r.fmtB) je["formatB"] = r.fmtB->extension; + je["sizeA"] = r.sizeA; + je["sizeB"] = r.sizeB; + arr.push_back(je); + } + j["differences"] = arr; + std::printf("%s\n", j.dump(2).c_str()); + return anyDiff ? 1 : 0; + } + std::printf("diff-tree: %s vs %s\n", dirA.c_str(), dirB.c_str()); + std::printf(" files in A : %zu\n", idxA.size()); + std::printf(" files in B : %zu\n", idxB.size()); + std::printf(" only-in-A : %zu\n", onlyA); + std::printf(" only-in-B : %zu\n", onlyB); + std::printf(" magic-changed : %zu\n", magicCh); + std::printf(" size-changed : %zu\n", sizeCh); + std::printf(" identical : %zu\n", identical); + if (!anyDiff) { + std::printf(" trees are identical at the magic+size level\n"); + return 0; + } + auto printGroup = [&](ChangeKind k, const char* heading, + bool showSizes) { + size_t n = 0; + for (const auto& r : rows) if (r.kind == k) ++n; + if (n == 0) return; + std::printf("\n %s (%zu):\n", heading, n); + for (const auto& r : rows) { + if (r.kind != k) continue; + if (showSizes) { + std::printf(" %s [%llu B -> %llu B]\n", + r.path.c_str(), + static_cast(r.sizeA), + static_cast(r.sizeB)); + } else { + std::printf(" %s\n", r.path.c_str()); + } + } + }; + printGroup(ChangeKind::OnlyInA, "Removed from B (present in A only)", false); + printGroup(ChangeKind::OnlyInB, "Added to B (not in A)", false); + printGroup(ChangeKind::MagicChanged, + "Magic changed between A and B (format swapped!)", true); + printGroup(ChangeKind::SizeChanged, + "Same magic but byte size changed (content edited)", true); + return 1; +} + +} // namespace + +bool handleDiffTree(int& i, int argc, char** argv, int& outRc) { + if (std::strcmp(argv[i], "--diff-tree") == 0 && i + 2 < argc) { + outRc = handleDiff(i, argc, argv); return true; + } + return false; +} + +} // namespace cli +} // namespace editor +} // namespace wowee diff --git a/tools/editor/cli_diff_tree.hpp b/tools/editor/cli_diff_tree.hpp new file mode 100644 index 00000000..b03928cd --- /dev/null +++ b/tools/editor/cli_diff_tree.hpp @@ -0,0 +1,11 @@ +#pragma once + +namespace wowee { +namespace editor { +namespace cli { + +bool handleDiffTree(int& i, int argc, char** argv, int& outRc); + +} // namespace cli +} // namespace editor +} // namespace wowee diff --git a/tools/editor/cli_dispatch.cpp b/tools/editor/cli_dispatch.cpp index bdd0ef7f..62a1164b 100644 --- a/tools/editor/cli_dispatch.cpp +++ b/tools/editor/cli_dispatch.cpp @@ -98,6 +98,7 @@ #include "cli_magic_fix.hpp" #include "cli_bulk_validate.hpp" #include "cli_bulk_json.hpp" +#include "cli_diff_tree.hpp" #include "cli_macros_catalog.hpp" #include "cli_char_features_catalog.hpp" #include "cli_pvp_catalog.hpp" @@ -261,6 +262,7 @@ constexpr DispatchFn kDispatchTable[] = { handleMagicFix, handleBulkValidate, handleBulkJson, + handleDiffTree, handleMacrosCatalog, handleCharFeaturesCatalog, handlePVPCatalog, diff --git a/tools/editor/cli_help.cpp b/tools/editor/cli_help.cpp index 5c9f2128..5b8aa69d 100644 --- a/tools/editor/cli_help.cpp +++ b/tools/editor/cli_help.cpp @@ -1375,6 +1375,8 @@ void printUsage(const char* argv0) { std::printf(" Recursively export every recognized .w* file to its JSON sidecar via the per-format --export-X-json flag. Useful for git-friendly diffs of binary catalogs. Exit 1 if any failure\n"); std::printf(" --bulk-import-json [--json]\n"); std::printf(" Recursively import every .wXXX.json sidecar back to its binary .w* form via the per-format --import-X-json flag. Inverse of --bulk-export-json. Exit 1 if any failure\n"); + std::printf(" --diff-tree [--json]\n"); + std::printf(" Compare two directory trees of .w* catalogs at the magic+size level. Reports only-in-A / only-in-B / magic-changed / size-changed / identical counts and lists changed paths. Exit 1 if any difference\n"); std::printf(" --gen-animations [name]\n"); std::printf(" Emit .wani starter: 5 essential animations (Stand / Walk / Run / Death / AttackUnarmed) with fallback chains\n"); std::printf(" --gen-animations-combat [name]\n");