From 339256a794058af7bfd61284eb8488a8b2ea87be Mon Sep 17 00:00:00 2001 From: Kelsi Date: Wed, 6 May 2026 18:33:29 -0700 Subject: [PATCH] feat(editor): add --info-extract-budget for sortable extract-dir byte breakdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion to --info-pack-budget (which operates on .wcp archives). Per-extension byte breakdown of an extract dir, sorted largest-first. Answers 'where did my 31 GB extract go?' with a flat sortable table: wowee_editor --info-extract-budget /home/k/Desktop/wowee/Data Extract budget: /home/k/Desktop/wowee/Data total: 284613 file(s), 31482.11 MB ext count bytes MB share .adt 11213 11985924414 11430.7 36.3% .wav 39396 8107038542 7731.5 24.6% .blp 133742 4990640480 4759.4 15.1% .m2 48466 2568180656 2449.2 7.8% .wmo 16526 2286454107 2180.5 6.9% .mp3 1222 1976864519 1885.3 6.0% ... Caps to top 30 extensions with the rest rolled into '(other)' so big extracts (this one has 30+ format types) don't drown the output. Pairs with --info-extract-tree (hierarchical view) and --info-extract (sidecar coverage) — three lenses on an extract directory: structure, formats, byte costs. Verified on a real 31GB Data/ extract: ADT files dominate at 36% (11GB), with WAV audio second at 25% (8GB). --- tools/editor/main.cpp | 91 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/tools/editor/main.cpp b/tools/editor/main.cpp index 71382475..a2b6211c 100644 --- a/tools/editor/main.cpp +++ b/tools/editor/main.cpp @@ -701,6 +701,8 @@ static void printUsage(const char* argv0) { std::printf(" Walk extracted asset tree and report open-format coverage and exit\n"); std::printf(" --info-extract-tree \n"); std::printf(" Hierarchical view of an extracted asset tree grouped by top-level dir + format\n"); + std::printf(" --info-extract-budget [--json]\n"); + std::printf(" Per-extension byte breakdown of an extract dir (sized largest-first)\n"); std::printf(" --info-png [--json]\n"); std::printf(" Print PNG header (width, height, channels, bit depth) and exit\n"); std::printf(" --info-blp [--json]\n"); @@ -807,7 +809,8 @@ int main(int argc, char* argv[]) { "--info-bones", "--list-zone-textures", "--info-wob", "--info-woc", "--info-wot", "--info-creatures", "--info-objects", "--info-quests", - "--info-extract", "--info-extract-tree", "--list-missing-sidecars", + "--info-extract", "--info-extract-tree", "--info-extract-budget", + "--list-missing-sidecars", "--info-png", "--info-jsondbc", "--info-blp", "--info-pack-budget", "--info-m2", "--info-wmo", "--info-adt", "--info-zone", "--info-wcp", "--list-wcp", @@ -1913,6 +1916,92 @@ int main(int argc, char* argv[]) { } } return 0; + } else if (std::strcmp(argv[i], "--info-extract-budget") == 0 && i + 1 < argc) { + // Per-extension byte breakdown of an extract dir, sorted + // largest-first. Companion to --info-pack-budget (which + // operates on .wcp archives) — this answers 'where did my + // 31 GB extract go?' with a flat sortable table. + std::string dataDir = argv[++i]; + bool jsonOut = (i + 1 < argc && + std::strcmp(argv[i + 1], "--json") == 0); + if (jsonOut) i++; + namespace fs = std::filesystem; + if (!fs::exists(dataDir) || !fs::is_directory(dataDir)) { + std::fprintf(stderr, + "info-extract-budget: %s is not a directory\n", + dataDir.c_str()); + return 1; + } + std::map> byExt; + uint64_t totalBytes = 0; + int totalFiles = 0; + std::error_code ec; + for (const auto& entry : fs::recursive_directory_iterator(dataDir, ec)) { + if (!entry.is_regular_file()) continue; + std::string ext = entry.path().extension().string(); + std::transform(ext.begin(), ext.end(), ext.begin(), + [](unsigned char c) { return std::tolower(c); }); + if (ext.empty()) ext = "(no-ext)"; + uint64_t sz = entry.file_size(ec); + if (ec) continue; + byExt[ext].first++; + byExt[ext].second += sz; + totalBytes += sz; + totalFiles++; + } + std::vector>> sorted( + byExt.begin(), byExt.end()); + std::sort(sorted.begin(), sorted.end(), + [](const auto& a, const auto& b) { + return a.second.second > b.second.second; + }); + if (jsonOut) { + nlohmann::json j; + j["dir"] = dataDir; + j["totalFiles"] = totalFiles; + j["totalBytes"] = totalBytes; + nlohmann::json arr = nlohmann::json::array(); + for (const auto& [ext, cb] : sorted) { + arr.push_back({{"ext", ext}, + {"count", cb.first}, + {"bytes", cb.second}}); + } + j["byExtension"] = arr; + std::printf("%s\n", j.dump(2).c_str()); + return 0; + } + std::printf("Extract budget: %s\n", dataDir.c_str()); + std::printf(" total: %d file(s), %.2f MB\n", + totalFiles, totalBytes / (1024.0 * 1024.0)); + std::printf("\n ext count bytes MB share\n"); + // Cap to top 30 to keep output manageable on huge extracts; + // suppressed entries roll into 'other'. + const size_t kTopN = 30; + uint64_t otherBytes = 0; + int otherCount = 0; + for (size_t k = 0; k < sorted.size(); ++k) { + if (k < kTopN) { + const auto& [ext, cb] = sorted[k]; + double pct = totalBytes > 0 + ? 100.0 * cb.second / totalBytes : 0.0; + std::printf(" %-12s %6d %11llu %8.1f %5.1f%%\n", + ext.c_str(), cb.first, + static_cast(cb.second), + cb.second / (1024.0 * 1024.0), pct); + } else { + otherBytes += sorted[k].second.second; + otherCount += sorted[k].second.first; + } + } + if (otherCount > 0) { + double pct = totalBytes > 0 ? 100.0 * otherBytes / totalBytes : 0.0; + std::printf(" %-12s %6d %11llu %8.1f %5.1f%% (%zu more extensions)\n", + "(other)", otherCount, + static_cast(otherBytes), + otherBytes / (1024.0 * 1024.0), pct, + sorted.size() - kTopN); + } + return 0; } else if (std::strcmp(argv[i], "--list-missing-sidecars") == 0 && i + 1 < argc) { // Actionable counterpart to --info-extract: emit one line per // proprietary file lacking its open-format sidecar. Pipe into