mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-05-10 19:13:52 +00:00
281 lines
10 KiB
C++
281 lines
10 KiB
C++
|
|
#include "cli_audit_tree.hpp"
|
||
|
|
#include "cli_arg_parse.hpp"
|
||
|
|
#include "cli_format_table.hpp"
|
||
|
|
|
||
|
|
#include <nlohmann/json.hpp>
|
||
|
|
|
||
|
|
#include <cstdint>
|
||
|
|
#include <cstdio>
|
||
|
|
#include <cstring>
|
||
|
|
#include <filesystem>
|
||
|
|
#include <fstream>
|
||
|
|
#include <string>
|
||
|
|
#include <vector>
|
||
|
|
|
||
|
|
namespace wowee {
|
||
|
|
namespace editor {
|
||
|
|
namespace cli {
|
||
|
|
|
||
|
|
namespace {
|
||
|
|
|
||
|
|
namespace fs = std::filesystem;
|
||
|
|
|
||
|
|
// Issue categories surfaced by the audit. Order matters
|
||
|
|
// only for deterministic output — a single file may
|
||
|
|
// belong to at most one category (the worst-fit wins).
|
||
|
|
enum class IssueKind {
|
||
|
|
TooSmall, // file < 16 bytes — can't hold a header
|
||
|
|
UnknownMagic, // magic not in kFormats
|
||
|
|
ExtensionMismatch, // extension says X but magic says Y
|
||
|
|
MagicWithoutExt, // magic recognized but file has no .w* extension
|
||
|
|
HeaderTruncated, // header parses but truncated mid-string
|
||
|
|
};
|
||
|
|
|
||
|
|
struct Issue {
|
||
|
|
fs::path path;
|
||
|
|
IssueKind kind;
|
||
|
|
std::string detail; // human-readable extra info
|
||
|
|
const FormatMagicEntry* expectedFmt = nullptr;
|
||
|
|
const FormatMagicEntry* actualFmt = nullptr;
|
||
|
|
};
|
||
|
|
|
||
|
|
const char* issueKindLabel(IssueKind k) {
|
||
|
|
switch (k) {
|
||
|
|
case IssueKind::TooSmall: return "too-small";
|
||
|
|
case IssueKind::UnknownMagic: return "unknown-magic";
|
||
|
|
case IssueKind::ExtensionMismatch: return "ext-mismatch";
|
||
|
|
case IssueKind::MagicWithoutExt: return "magic-no-ext";
|
||
|
|
case IssueKind::HeaderTruncated: return "header-trunc";
|
||
|
|
}
|
||
|
|
return "?";
|
||
|
|
}
|
||
|
|
|
||
|
|
bool extensionLooksLikeWowee(const fs::path& p) {
|
||
|
|
std::string ext = p.extension().string();
|
||
|
|
if (ext.size() < 2 || ext[0] != '.') return false;
|
||
|
|
return ext[1] == 'w' || ext[1] == 'W';
|
||
|
|
}
|
||
|
|
|
||
|
|
const FormatMagicEntry* findFormatByExtension(const std::string& ext) {
|
||
|
|
if (ext.empty()) return nullptr;
|
||
|
|
for (const FormatMagicEntry* p = formatTableBegin();
|
||
|
|
p != formatTableEnd(); ++p) {
|
||
|
|
// Case-insensitive match on extension since the
|
||
|
|
// table stores lowercase ".wsrg" but a renamed file
|
||
|
|
// might be "FOO.WSRG".
|
||
|
|
const char* a = p->extension;
|
||
|
|
const char* b = ext.c_str();
|
||
|
|
bool match = true;
|
||
|
|
while (*a && *b) {
|
||
|
|
char ca = *a; char cb = *b;
|
||
|
|
if (ca >= 'A' && ca <= 'Z') ca += 32;
|
||
|
|
if (cb >= 'A' && cb <= 'Z') cb += 32;
|
||
|
|
if (ca != cb) { match = false; break; }
|
||
|
|
++a; ++b;
|
||
|
|
}
|
||
|
|
if (match && *a == 0 && *b == 0) return p;
|
||
|
|
}
|
||
|
|
return nullptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Read the leading 16+nameLen bytes and report whether the
|
||
|
|
// header parses cleanly. Fills magic + format on success.
|
||
|
|
struct PeekResult {
|
||
|
|
bool readMagic = false;
|
||
|
|
bool readHeader = false; // magic+version+nameLen+name+entryCount all present
|
||
|
|
char magic[4] = {0, 0, 0, 0};
|
||
|
|
uint32_t version = 0;
|
||
|
|
uint32_t nameLen = 0;
|
||
|
|
uint32_t entryCount = 0;
|
||
|
|
uintmax_t fileSize = 0;
|
||
|
|
};
|
||
|
|
|
||
|
|
PeekResult peekFile(const fs::path& path) {
|
||
|
|
PeekResult r;
|
||
|
|
std::error_code ec;
|
||
|
|
r.fileSize = fs::file_size(path, ec);
|
||
|
|
if (ec) r.fileSize = 0;
|
||
|
|
std::ifstream is(path, std::ios::binary);
|
||
|
|
if (!is) return r;
|
||
|
|
if (!is.read(r.magic, 4) || is.gcount() != 4) return r;
|
||
|
|
r.readMagic = true;
|
||
|
|
if (!is.read(reinterpret_cast<char*>(&r.version), 4)) return r;
|
||
|
|
if (!is.read(reinterpret_cast<char*>(&r.nameLen), 4)) return r;
|
||
|
|
// Reject implausible name lengths up front — these usually
|
||
|
|
// indicate the file is not actually a Wowee catalog.
|
||
|
|
if (r.nameLen > (1u << 20)) return r;
|
||
|
|
is.seekg(r.nameLen, std::ios::cur);
|
||
|
|
if (!is.read(reinterpret_cast<char*>(&r.entryCount), 4)) return r;
|
||
|
|
r.readHeader = true;
|
||
|
|
return r;
|
||
|
|
}
|
||
|
|
|
||
|
|
int handleAudit(int& i, int argc, char** argv) {
|
||
|
|
std::string dir = argv[++i];
|
||
|
|
bool jsonOut = consumeJsonFlag(i, argc, argv);
|
||
|
|
if (!fs::exists(dir) || !fs::is_directory(dir)) {
|
||
|
|
std::fprintf(stderr,
|
||
|
|
"audit-tree: not a directory: %s\n", dir.c_str());
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
std::vector<Issue> issues;
|
||
|
|
uint64_t totalFiles = 0;
|
||
|
|
uint64_t cleanFiles = 0;
|
||
|
|
for (const auto& entry : fs::recursive_directory_iterator(dir)) {
|
||
|
|
if (!entry.is_regular_file()) continue;
|
||
|
|
++totalFiles;
|
||
|
|
const fs::path& path = entry.path();
|
||
|
|
std::string ext = path.extension().string();
|
||
|
|
const FormatMagicEntry* extFmt = findFormatByExtension(ext);
|
||
|
|
bool extLooksWowee = extensionLooksLikeWowee(path);
|
||
|
|
// For files that don't look Wowee-related at all,
|
||
|
|
// skip them silently — only audit candidates that
|
||
|
|
// either have a wowee-shaped extension or actually
|
||
|
|
// start with a known magic.
|
||
|
|
PeekResult pr = peekFile(path);
|
||
|
|
if (pr.fileSize < 16) {
|
||
|
|
// Anything under 16 bytes can't even hold the
|
||
|
|
// 4-byte magic + 4-byte version + 4-byte
|
||
|
|
// nameLen + 4-byte entryCount minimum. Only
|
||
|
|
// flag if the file has a wowee extension —
|
||
|
|
// sub-16-byte unrelated files are noise.
|
||
|
|
if (extLooksWowee) {
|
||
|
|
Issue iss;
|
||
|
|
iss.path = path;
|
||
|
|
iss.kind = IssueKind::TooSmall;
|
||
|
|
iss.detail = std::to_string(pr.fileSize) +
|
||
|
|
" bytes — header needs at least 16";
|
||
|
|
iss.expectedFmt = extFmt;
|
||
|
|
issues.push_back(std::move(iss));
|
||
|
|
}
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
const FormatMagicEntry* magicFmt = nullptr;
|
||
|
|
if (pr.readMagic) magicFmt = findFormatByMagic(pr.magic);
|
||
|
|
if (!magicFmt && !extLooksWowee) continue; // not ours
|
||
|
|
if (!magicFmt && extLooksWowee) {
|
||
|
|
Issue iss;
|
||
|
|
iss.path = path;
|
||
|
|
iss.kind = IssueKind::UnknownMagic;
|
||
|
|
char ms[5] = {pr.magic[0], pr.magic[1],
|
||
|
|
pr.magic[2], pr.magic[3], 0};
|
||
|
|
// Filter non-printable characters from the
|
||
|
|
// displayed magic to keep terminal output safe.
|
||
|
|
for (char& c : ms) {
|
||
|
|
if (c != 0 && (c < 0x20 || c >= 0x7F)) c = '?';
|
||
|
|
}
|
||
|
|
iss.detail = std::string("magic '") + ms +
|
||
|
|
"' not in format table";
|
||
|
|
iss.expectedFmt = extFmt;
|
||
|
|
issues.push_back(std::move(iss));
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (magicFmt && !extLooksWowee) {
|
||
|
|
Issue iss;
|
||
|
|
iss.path = path;
|
||
|
|
iss.kind = IssueKind::MagicWithoutExt;
|
||
|
|
iss.detail = std::string("magic '") + magicFmt->magic[0] +
|
||
|
|
magicFmt->magic[1] + magicFmt->magic[2] +
|
||
|
|
magicFmt->magic[3] + "' detected but file " +
|
||
|
|
"has no .w* extension";
|
||
|
|
iss.actualFmt = magicFmt;
|
||
|
|
issues.push_back(std::move(iss));
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (magicFmt && extFmt && magicFmt != extFmt) {
|
||
|
|
Issue iss;
|
||
|
|
iss.path = path;
|
||
|
|
iss.kind = IssueKind::ExtensionMismatch;
|
||
|
|
iss.detail = std::string("extension ") + extFmt->extension +
|
||
|
|
" says " + extFmt->category +
|
||
|
|
" but magic says " + magicFmt->category +
|
||
|
|
" (" + magicFmt->extension + ")";
|
||
|
|
iss.expectedFmt = extFmt;
|
||
|
|
iss.actualFmt = magicFmt;
|
||
|
|
issues.push_back(std::move(iss));
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (magicFmt && !pr.readHeader) {
|
||
|
|
// Reaching here means the magic byte matched but
|
||
|
|
// the header was truncated mid-string or before
|
||
|
|
// entryCount. The file is corrupt.
|
||
|
|
Issue iss;
|
||
|
|
iss.path = path;
|
||
|
|
iss.kind = IssueKind::HeaderTruncated;
|
||
|
|
iss.detail = "header parses past magic but is "
|
||
|
|
"truncated before entryCount";
|
||
|
|
iss.actualFmt = magicFmt;
|
||
|
|
issues.push_back(std::move(iss));
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
++cleanFiles;
|
||
|
|
}
|
||
|
|
bool ok = issues.empty();
|
||
|
|
if (jsonOut) {
|
||
|
|
nlohmann::json j;
|
||
|
|
j["dir"] = dir;
|
||
|
|
j["totalFiles"] = totalFiles;
|
||
|
|
j["cleanFiles"] = cleanFiles;
|
||
|
|
j["issueCount"] = issues.size();
|
||
|
|
j["ok"] = ok;
|
||
|
|
nlohmann::json arr = nlohmann::json::array();
|
||
|
|
for (const auto& iss : issues) {
|
||
|
|
arr.push_back({
|
||
|
|
{"path", fs::relative(iss.path, dir).string()},
|
||
|
|
{"kind", issueKindLabel(iss.kind)},
|
||
|
|
{"detail", iss.detail},
|
||
|
|
});
|
||
|
|
}
|
||
|
|
j["issues"] = arr;
|
||
|
|
std::printf("%s\n", j.dump(2).c_str());
|
||
|
|
return ok ? 0 : 1;
|
||
|
|
}
|
||
|
|
std::printf("audit-tree: %s\n", dir.c_str());
|
||
|
|
std::printf(" total files : %llu\n",
|
||
|
|
static_cast<unsigned long long>(totalFiles));
|
||
|
|
std::printf(" clean wowee : %llu\n",
|
||
|
|
static_cast<unsigned long long>(cleanFiles));
|
||
|
|
std::printf(" issues found : %zu\n", issues.size());
|
||
|
|
if (ok) {
|
||
|
|
std::printf(" OK — no extension/magic mismatches, no truncated headers\n");
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
// Group by issue kind for readable output.
|
||
|
|
auto printGroup = [&](IssueKind k, const char* heading) {
|
||
|
|
size_t n = 0;
|
||
|
|
for (const auto& iss : issues) if (iss.kind == k) ++n;
|
||
|
|
if (n == 0) return;
|
||
|
|
std::printf("\n %s (%zu):\n", heading, n);
|
||
|
|
for (const auto& iss : issues) {
|
||
|
|
if (iss.kind != k) continue;
|
||
|
|
std::printf(" %s\n",
|
||
|
|
fs::relative(iss.path, dir).string().c_str());
|
||
|
|
std::printf(" %s\n", iss.detail.c_str());
|
||
|
|
}
|
||
|
|
};
|
||
|
|
printGroup(IssueKind::TooSmall,
|
||
|
|
"Files too small to contain a header");
|
||
|
|
printGroup(IssueKind::UnknownMagic,
|
||
|
|
"Files with .w* extension but unrecognized magic");
|
||
|
|
printGroup(IssueKind::ExtensionMismatch,
|
||
|
|
"Extension/magic mismatch (renamed files?)");
|
||
|
|
printGroup(IssueKind::MagicWithoutExt,
|
||
|
|
"Wowee magic detected but no .w* extension");
|
||
|
|
printGroup(IssueKind::HeaderTruncated,
|
||
|
|
"Truncated headers (corrupted files)");
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
} // namespace
|
||
|
|
|
||
|
|
bool handleAuditTree(int& i, int argc, char** argv, int& outRc) {
|
||
|
|
if (std::strcmp(argv[i], "--audit-tree") == 0 && i + 1 < argc) {
|
||
|
|
outRc = handleAudit(i, argc, argv); return true;
|
||
|
|
}
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
} // namespace cli
|
||
|
|
} // namespace editor
|
||
|
|
} // namespace wowee
|