From 824b6ebf535920aac9958364af0ce9a7f127d8a9 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 9 May 2026 21:45:54 -0700 Subject: [PATCH] feat(editor): add --audit-tree to flag corrupted/misnamed Wowee files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Walks a directory recursively and groups problems by category: too-small (file under 16 bytes — can't hold a header), unknown-magic (.w* file whose magic is not in the format table), ext-mismatch (extension says one format but the magic says another — usually from a renamed file), magic-no-ext (file with recognized Wowee magic but no .w* extension), and header-trunc (magic matches but the rest of the header is truncated). Returns exit 1 if any issue is found, so it composes into shell pipelines and CI checks. JSON sidecar via --json. Catches the kinds of breakage that --summary-dir silently rolls into the "unrecognized" bucket — a renamed .wsrg file masquerading as .wsct shows up cleanly here but would otherwise be invisible. Like every cross-format utility this reuses cli_format_table.cpp, so new formats are audited automatically. CLI flag count 906 -> 907. --- CMakeLists.txt | 1 + tools/editor/cli_arg_required.cpp | 2 +- tools/editor/cli_audit_tree.cpp | 280 ++++++++++++++++++++++++++++++ tools/editor/cli_audit_tree.hpp | 11 ++ tools/editor/cli_dispatch.cpp | 2 + tools/editor/cli_help.cpp | 2 + 6 files changed, 297 insertions(+), 1 deletion(-) create mode 100644 tools/editor/cli_audit_tree.cpp create mode 100644 tools/editor/cli_audit_tree.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c31c3367..03b691fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1460,6 +1460,7 @@ add_executable(wowee_editor tools/editor/cli_lfg_catalog.cpp tools/editor/cli_catalog_grep.cpp tools/editor/cli_diff_headers.cpp + tools/editor/cli_audit_tree.cpp tools/editor/cli_macros_catalog.cpp tools/editor/cli_char_features_catalog.cpp tools/editor/cli_pvp_catalog.cpp diff --git a/tools/editor/cli_arg_required.cpp b/tools/editor/cli_arg_required.cpp index 45c0f964..f80726e8 100644 --- a/tools/editor/cli_arg_required.cpp +++ b/tools/editor/cli_arg_required.cpp @@ -136,7 +136,7 @@ const char* const kArgRequired[] = { "--export-wliq-json", "--import-wliq-json", "--info-magic", "--summary-dir", "--rename-by-magic", "--bulk-rename-by-magic", "--touch-tree", "--tree-summary-md", - "--catalog-grep", "--diff-headers", + "--catalog-grep", "--diff-headers", "--audit-tree", "--gen-animations", "--gen-animations-combat", "--gen-animations-movement", "--info-wani", "--validate-wani", "--export-wani-json", "--import-wani-json", diff --git a/tools/editor/cli_audit_tree.cpp b/tools/editor/cli_audit_tree.cpp new file mode 100644 index 00000000..45dcdfa3 --- /dev/null +++ b/tools/editor/cli_audit_tree.cpp @@ -0,0 +1,280 @@ +#include "cli_audit_tree.hpp" +#include "cli_arg_parse.hpp" +#include "cli_format_table.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace wowee { +namespace editor { +namespace cli { + +namespace { + +namespace fs = std::filesystem; + +// Issue categories surfaced by the audit. Order matters +// only for deterministic output — a single file may +// belong to at most one category (the worst-fit wins). +enum class IssueKind { + TooSmall, // file < 16 bytes — can't hold a header + UnknownMagic, // magic not in kFormats + ExtensionMismatch, // extension says X but magic says Y + MagicWithoutExt, // magic recognized but file has no .w* extension + HeaderTruncated, // header parses but truncated mid-string +}; + +struct Issue { + fs::path path; + IssueKind kind; + std::string detail; // human-readable extra info + const FormatMagicEntry* expectedFmt = nullptr; + const FormatMagicEntry* actualFmt = nullptr; +}; + +const char* issueKindLabel(IssueKind k) { + switch (k) { + case IssueKind::TooSmall: return "too-small"; + case IssueKind::UnknownMagic: return "unknown-magic"; + case IssueKind::ExtensionMismatch: return "ext-mismatch"; + case IssueKind::MagicWithoutExt: return "magic-no-ext"; + case IssueKind::HeaderTruncated: return "header-trunc"; + } + return "?"; +} + +bool extensionLooksLikeWowee(const fs::path& p) { + std::string ext = p.extension().string(); + if (ext.size() < 2 || ext[0] != '.') return false; + return ext[1] == 'w' || ext[1] == 'W'; +} + +const FormatMagicEntry* findFormatByExtension(const std::string& ext) { + if (ext.empty()) return nullptr; + for (const FormatMagicEntry* p = formatTableBegin(); + p != formatTableEnd(); ++p) { + // Case-insensitive match on extension since the + // table stores lowercase ".wsrg" but a renamed file + // might be "FOO.WSRG". + const char* a = p->extension; + const char* b = ext.c_str(); + bool match = true; + while (*a && *b) { + char ca = *a; char cb = *b; + if (ca >= 'A' && ca <= 'Z') ca += 32; + if (cb >= 'A' && cb <= 'Z') cb += 32; + if (ca != cb) { match = false; break; } + ++a; ++b; + } + if (match && *a == 0 && *b == 0) return p; + } + return nullptr; +} + +// Read the leading 16+nameLen bytes and report whether the +// header parses cleanly. Fills magic + format on success. +struct PeekResult { + bool readMagic = false; + bool readHeader = false; // magic+version+nameLen+name+entryCount all present + char magic[4] = {0, 0, 0, 0}; + uint32_t version = 0; + uint32_t nameLen = 0; + uint32_t entryCount = 0; + uintmax_t fileSize = 0; +}; + +PeekResult peekFile(const fs::path& path) { + PeekResult r; + std::error_code ec; + r.fileSize = fs::file_size(path, ec); + if (ec) r.fileSize = 0; + std::ifstream is(path, std::ios::binary); + if (!is) return r; + if (!is.read(r.magic, 4) || is.gcount() != 4) return r; + r.readMagic = true; + if (!is.read(reinterpret_cast(&r.version), 4)) return r; + if (!is.read(reinterpret_cast(&r.nameLen), 4)) return r; + // Reject implausible name lengths up front — these usually + // indicate the file is not actually a Wowee catalog. + if (r.nameLen > (1u << 20)) return r; + is.seekg(r.nameLen, std::ios::cur); + if (!is.read(reinterpret_cast(&r.entryCount), 4)) return r; + r.readHeader = true; + return r; +} + +int handleAudit(int& i, int argc, char** argv) { + std::string dir = argv[++i]; + bool jsonOut = consumeJsonFlag(i, argc, argv); + if (!fs::exists(dir) || !fs::is_directory(dir)) { + std::fprintf(stderr, + "audit-tree: not a directory: %s\n", dir.c_str()); + return 1; + } + std::vector issues; + uint64_t totalFiles = 0; + uint64_t cleanFiles = 0; + for (const auto& entry : fs::recursive_directory_iterator(dir)) { + if (!entry.is_regular_file()) continue; + ++totalFiles; + const fs::path& path = entry.path(); + std::string ext = path.extension().string(); + const FormatMagicEntry* extFmt = findFormatByExtension(ext); + bool extLooksWowee = extensionLooksLikeWowee(path); + // For files that don't look Wowee-related at all, + // skip them silently — only audit candidates that + // either have a wowee-shaped extension or actually + // start with a known magic. + PeekResult pr = peekFile(path); + if (pr.fileSize < 16) { + // Anything under 16 bytes can't even hold the + // 4-byte magic + 4-byte version + 4-byte + // nameLen + 4-byte entryCount minimum. Only + // flag if the file has a wowee extension — + // sub-16-byte unrelated files are noise. + if (extLooksWowee) { + Issue iss; + iss.path = path; + iss.kind = IssueKind::TooSmall; + iss.detail = std::to_string(pr.fileSize) + + " bytes — header needs at least 16"; + iss.expectedFmt = extFmt; + issues.push_back(std::move(iss)); + } + continue; + } + const FormatMagicEntry* magicFmt = nullptr; + if (pr.readMagic) magicFmt = findFormatByMagic(pr.magic); + if (!magicFmt && !extLooksWowee) continue; // not ours + if (!magicFmt && extLooksWowee) { + Issue iss; + iss.path = path; + iss.kind = IssueKind::UnknownMagic; + char ms[5] = {pr.magic[0], pr.magic[1], + pr.magic[2], pr.magic[3], 0}; + // Filter non-printable characters from the + // displayed magic to keep terminal output safe. + for (char& c : ms) { + if (c != 0 && (c < 0x20 || c >= 0x7F)) c = '?'; + } + iss.detail = std::string("magic '") + ms + + "' not in format table"; + iss.expectedFmt = extFmt; + issues.push_back(std::move(iss)); + continue; + } + if (magicFmt && !extLooksWowee) { + Issue iss; + iss.path = path; + iss.kind = IssueKind::MagicWithoutExt; + iss.detail = std::string("magic '") + magicFmt->magic[0] + + magicFmt->magic[1] + magicFmt->magic[2] + + magicFmt->magic[3] + "' detected but file " + + "has no .w* extension"; + iss.actualFmt = magicFmt; + issues.push_back(std::move(iss)); + continue; + } + if (magicFmt && extFmt && magicFmt != extFmt) { + Issue iss; + iss.path = path; + iss.kind = IssueKind::ExtensionMismatch; + iss.detail = std::string("extension ") + extFmt->extension + + " says " + extFmt->category + + " but magic says " + magicFmt->category + + " (" + magicFmt->extension + ")"; + iss.expectedFmt = extFmt; + iss.actualFmt = magicFmt; + issues.push_back(std::move(iss)); + continue; + } + if (magicFmt && !pr.readHeader) { + // Reaching here means the magic byte matched but + // the header was truncated mid-string or before + // entryCount. The file is corrupt. + Issue iss; + iss.path = path; + iss.kind = IssueKind::HeaderTruncated; + iss.detail = "header parses past magic but is " + "truncated before entryCount"; + iss.actualFmt = magicFmt; + issues.push_back(std::move(iss)); + continue; + } + ++cleanFiles; + } + bool ok = issues.empty(); + if (jsonOut) { + nlohmann::json j; + j["dir"] = dir; + j["totalFiles"] = totalFiles; + j["cleanFiles"] = cleanFiles; + j["issueCount"] = issues.size(); + j["ok"] = ok; + nlohmann::json arr = nlohmann::json::array(); + for (const auto& iss : issues) { + arr.push_back({ + {"path", fs::relative(iss.path, dir).string()}, + {"kind", issueKindLabel(iss.kind)}, + {"detail", iss.detail}, + }); + } + j["issues"] = arr; + std::printf("%s\n", j.dump(2).c_str()); + return ok ? 0 : 1; + } + std::printf("audit-tree: %s\n", dir.c_str()); + std::printf(" total files : %llu\n", + static_cast(totalFiles)); + std::printf(" clean wowee : %llu\n", + static_cast(cleanFiles)); + std::printf(" issues found : %zu\n", issues.size()); + if (ok) { + std::printf(" OK — no extension/magic mismatches, no truncated headers\n"); + return 0; + } + // Group by issue kind for readable output. + auto printGroup = [&](IssueKind k, const char* heading) { + size_t n = 0; + for (const auto& iss : issues) if (iss.kind == k) ++n; + if (n == 0) return; + std::printf("\n %s (%zu):\n", heading, n); + for (const auto& iss : issues) { + if (iss.kind != k) continue; + std::printf(" %s\n", + fs::relative(iss.path, dir).string().c_str()); + std::printf(" %s\n", iss.detail.c_str()); + } + }; + printGroup(IssueKind::TooSmall, + "Files too small to contain a header"); + printGroup(IssueKind::UnknownMagic, + "Files with .w* extension but unrecognized magic"); + printGroup(IssueKind::ExtensionMismatch, + "Extension/magic mismatch (renamed files?)"); + printGroup(IssueKind::MagicWithoutExt, + "Wowee magic detected but no .w* extension"); + printGroup(IssueKind::HeaderTruncated, + "Truncated headers (corrupted files)"); + return 1; +} + +} // namespace + +bool handleAuditTree(int& i, int argc, char** argv, int& outRc) { + if (std::strcmp(argv[i], "--audit-tree") == 0 && i + 1 < argc) { + outRc = handleAudit(i, argc, argv); return true; + } + return false; +} + +} // namespace cli +} // namespace editor +} // namespace wowee diff --git a/tools/editor/cli_audit_tree.hpp b/tools/editor/cli_audit_tree.hpp new file mode 100644 index 00000000..b5ed9eb5 --- /dev/null +++ b/tools/editor/cli_audit_tree.hpp @@ -0,0 +1,11 @@ +#pragma once + +namespace wowee { +namespace editor { +namespace cli { + +bool handleAuditTree(int& i, int argc, char** argv, int& outRc); + +} // namespace cli +} // namespace editor +} // namespace wowee diff --git a/tools/editor/cli_dispatch.cpp b/tools/editor/cli_dispatch.cpp index 111fe44b..62cde8dc 100644 --- a/tools/editor/cli_dispatch.cpp +++ b/tools/editor/cli_dispatch.cpp @@ -94,6 +94,7 @@ #include "cli_lfg_catalog.hpp" #include "cli_catalog_grep.hpp" #include "cli_diff_headers.hpp" +#include "cli_audit_tree.hpp" #include "cli_macros_catalog.hpp" #include "cli_char_features_catalog.hpp" #include "cli_pvp_catalog.hpp" @@ -242,6 +243,7 @@ constexpr DispatchFn kDispatchTable[] = { handleLFGCatalog, handleCatalogGrep, handleDiffHeaders, + handleAuditTree, handleMacrosCatalog, handleCharFeaturesCatalog, handlePVPCatalog, diff --git a/tools/editor/cli_help.cpp b/tools/editor/cli_help.cpp index 7f86351a..3ec9a501 100644 --- a/tools/editor/cli_help.cpp +++ b/tools/editor/cli_help.cpp @@ -1365,6 +1365,8 @@ void printUsage(const char* argv0) { std::printf(" Recursively search catalog NAMES (the internal name field) across .w* files in . Case-insensitive by default. Exit 1 if no match\n"); std::printf(" --diff-headers [--json]\n"); std::printf(" Compare two .w* files at the standard catalog header level (magic / version / name / entry count / file size). Exit 1 if any field differs\n"); + std::printf(" --audit-tree [--json]\n"); + std::printf(" Walk directory recursively and flag corrupted/misnamed Wowee files: too-small, unknown-magic, ext/magic mismatch, magic-without-ext, truncated headers. Exit 1 on any issue\n"); std::printf(" --gen-animations [name]\n"); std::printf(" Emit .wani starter: 5 essential animations (Stand / Walk / Run / Death / AttackUnarmed) with fallback chains\n"); std::printf(" --gen-animations-combat [name]\n");