From b3fa8cf5f3110f25c050f43fc232f54500f3bf9f Mon Sep 17 00:00:00 2001 From: k Date: Sat, 4 Apr 2026 00:22:07 -0700 Subject: [PATCH] fix: warden mmap on macOS, add external listfile support to asset extractor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop PROT_EXEC from warden module mmap when using Unicorn emulation (not needed — module image is copied into emulator address space). Use MAP_JIT on macOS for the native fallback path. Add --listfile option to asset_extract and SFileAddListFileEntries support for resolving unnamed MPQ hash table entries from external listfiles. --- src/game/warden_module.cpp | 18 ++++++- tools/asset_extract/extractor.cpp | 87 +++++++++++++++++++++++++------ tools/asset_extract/extractor.hpp | 1 + tools/asset_extract/main.cpp | 21 ++++++++ 4 files changed, 110 insertions(+), 17 deletions(-) diff --git a/src/game/warden_module.cpp b/src/game/warden_module.cpp index 36be3f58..f3ea6723 100644 --- a/src/game/warden_module.cpp +++ b/src/game/warden_module.cpp @@ -535,11 +535,25 @@ bool WardenModule::parseExecutableFormat(const std::vector& exeData) { return false; } #else + // When using Unicorn emulation the module image is copied into the + // emulator's address space, so we only need read/write access here. + // Native execution paths (non-Unicorn) need PROT_EXEC; on macOS this + // requires MAP_JIT due to hardened-runtime restrictions. + #ifdef HAVE_UNICORN + int mmapProt = PROT_READ | PROT_WRITE; + int mmapFlags = MAP_PRIVATE | MAP_ANONYMOUS; + #elif defined(__APPLE__) + int mmapProt = PROT_READ | PROT_WRITE | PROT_EXEC; + int mmapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT; + #else + int mmapProt = PROT_READ | PROT_WRITE | PROT_EXEC; + int mmapFlags = MAP_PRIVATE | MAP_ANONYMOUS; + #endif moduleMemory_ = mmap( nullptr, finalCodeSize, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, + mmapProt, + mmapFlags, -1, 0 ); diff --git a/tools/asset_extract/extractor.cpp b/tools/asset_extract/extractor.cpp index 1df2d510..3c61bef3 100644 --- a/tools/asset_extract/extractor.cpp +++ b/tools/asset_extract/extractor.cpp @@ -537,10 +537,56 @@ static std::vector discoverArchives(const std::string& mpqDir, return result; } +// Read a text file into a vector of lines (for external listfile loading) +static std::vector readLines(const std::string& path) { + std::vector lines; + std::ifstream f(path); + if (!f) return lines; + std::string line; + while (std::getline(f, line)) { + // Trim trailing \r + if (!line.empty() && line.back() == '\r') line.pop_back(); + if (!line.empty()) lines.push_back(std::move(line)); + } + return lines; +} + +// Extract the (listfile) from an MPQ archive into a set of filenames +static void extractInternalListfile(HANDLE hMpq, std::set& out) { + HANDLE hFile = nullptr; + if (!SFileOpenFileEx(hMpq, "(listfile)", 0, &hFile)) return; + + DWORD size = SFileGetFileSize(hFile, nullptr); + if (size == SFILE_INVALID_SIZE || size == 0) { + SFileCloseFile(hFile); + return; + } + + std::vector buf(size); + DWORD bytesRead = 0; + if (!SFileReadFile(hFile, buf.data(), size, &bytesRead, nullptr)) { + SFileCloseFile(hFile); + return; + } + SFileCloseFile(hFile); + + // Parse newline/CR-delimited entries + std::string entry; + for (DWORD i = 0; i < bytesRead; ++i) { + if (buf[i] == '\n' || buf[i] == '\r') { + if (!entry.empty()) { + out.insert(std::move(entry)); + entry.clear(); + } + } else { + entry += buf[i]; + } + } + if (!entry.empty()) out.insert(std::move(entry)); +} + bool Extractor::enumerateFiles(const Options& opts, std::vector& outFiles) { - // Open all archives, enumerate files from highest priority to lowest. - // Use a set to deduplicate (highest-priority version wins). auto archives = discoverArchives(opts.mpqDir, opts.expansion, opts.locale); if (archives.empty()) { std::cerr << "No MPQ archives found in: " << opts.mpqDir << "\n"; @@ -549,12 +595,20 @@ bool Extractor::enumerateFiles(const Options& opts, std::cout << "Found " << archives.size() << " MPQ archives\n"; + // Load external listfile into memory once (avoids repeated file I/O) + std::vector externalEntries; + std::vector externalPtrs; + if (!opts.listFile.empty()) { + externalEntries = readLines(opts.listFile); + externalPtrs.reserve(externalEntries.size()); + for (const auto& e : externalEntries) externalPtrs.push_back(e.c_str()); + std::cout << " Loaded external listfile: " << externalEntries.size() << " entries\n"; + } + const auto wantedDbcs = buildWantedDbcSet(opts); + std::set seenNormalized; // Enumerate from highest priority first so first-seen files win - std::set seenNormalized; - std::vector> fileList; // (original name, archive path) - for (auto it = archives.rbegin(); it != archives.rend(); ++it) { HANDLE hMpq = nullptr; if (!SFileOpenArchive(it->path.c_str(), 0, 0, &hMpq)) { @@ -562,6 +616,14 @@ bool Extractor::enumerateFiles(const Options& opts, continue; } + // Inject external listfile entries into archive's in-memory name table. + // SFileAddListFileEntries is fast — it only hashes the names against the + // archive's hash table, no file I/O involved. + if (!externalPtrs.empty()) { + SFileAddListFileEntries(hMpq, externalPtrs.data(), + static_cast(externalPtrs.size())); + } + if (opts.verbose) { std::cout << " Scanning: " << it->path << " (priority " << it->priority << ")\n"; } @@ -571,28 +633,20 @@ bool Extractor::enumerateFiles(const Options& opts, if (hFind) { do { std::string fileName = findData.cFileName; - // Skip internal listfile/attributes if (fileName == "(listfile)" || fileName == "(attributes)" || fileName == "(signature)" || fileName == "(patch_metadata)") { continue; } - if (shouldSkipFile(opts, fileName)) { - continue; - } + if (shouldSkipFile(opts, fileName)) continue; - // Verify file actually exists in this archive's hash table - // (listfiles can reference files from other archives) - if (!SFileHasFile(hMpq, fileName.c_str())) { - continue; - } + if (!SFileHasFile(hMpq, fileName.c_str())) continue; std::string norm = normalizeWowPath(fileName); if (opts.onlyUsedDbcs && !wantedDbcs.empty() && !wantedDbcs.contains(norm)) { continue; } if (seenNormalized.insert(norm).second) { - // First time seeing this file — this is the highest-priority version outFiles.push_back(fileName); } } while (SFileFindNextFile(hFind, &findData)); @@ -674,6 +728,9 @@ bool Extractor::run(const Options& opts) { for (const auto& ad : archives) { HANDLE h = nullptr; if (SFileOpenArchive(ad.path.c_str(), 0, 0, &h)) { + if (!opts.listFile.empty()) { + SFileAddListFile(h, opts.listFile.c_str()); + } sharedHandles.push_back({h, ad.priority, ad.path}); } else { std::cerr << " Failed to open archive: " << ad.path << "\n"; diff --git a/tools/asset_extract/extractor.hpp b/tools/asset_extract/extractor.hpp index e9aa646d..48588273 100644 --- a/tools/asset_extract/extractor.hpp +++ b/tools/asset_extract/extractor.hpp @@ -26,6 +26,7 @@ public: bool onlyUsedDbcs = false; // Extract only the DBC files wowee uses (implies DBFilesClient/*.dbc filter) std::string dbcCsvOutputDir; // When set, write CSVs into this directory instead of outputDir/expansions//db std::string referenceManifest; // If set, only extract files NOT in this manifest (delta extraction) + std::string listFile; // External listfile for MPQ enumeration (resolves unnamed hash entries) }; struct Stats { diff --git a/tools/asset_extract/main.cpp b/tools/asset_extract/main.cpp index 6d9c27f5..0add3e99 100644 --- a/tools/asset_extract/main.cpp +++ b/tools/asset_extract/main.cpp @@ -20,6 +20,7 @@ static void printUsage(const char* prog) { << " --skip-dbc Do not extract DBFilesClient/*.dbc (visual assets only)\n" << " --dbc-csv Convert selected DBFilesClient/*.dbc to CSV under\n" << " /expansions//db/*.csv (for committing)\n" + << " --listfile External listfile for MPQ file enumeration (auto-detected)\n" << " --reference-manifest \n" << " Only extract files NOT in this manifest (delta extraction)\n" << " --dbc-csv-out Write CSV DBCs into (overrides default output path)\n" @@ -53,6 +54,8 @@ int main(int argc, char** argv) { opts.generateDbcCsv = true; } else if (std::strcmp(argv[i], "--dbc-csv-out") == 0 && i + 1 < argc) { opts.dbcCsvOutputDir = argv[++i]; + } else if (std::strcmp(argv[i], "--listfile") == 0 && i + 1 < argc) { + opts.listFile = argv[++i]; } else if (std::strcmp(argv[i], "--reference-manifest") == 0 && i + 1 < argc) { opts.referenceManifest = argv[++i]; } else if (std::strcmp(argv[i], "--verify") == 0) { @@ -99,6 +102,24 @@ int main(int argc, char** argv) { } opts.locale = locale; + // Auto-detect external listfile if not specified + if (opts.listFile.empty()) { + // Look next to the binary, then in the source tree + namespace fs = std::filesystem; + std::string binDir = fs::path(argv[0]).parent_path().string(); + for (const auto& candidate : { + binDir + "/listfile.txt", + binDir + "/../../../tools/asset_extract/listfile.txt", + opts.mpqDir + "/listfile.txt", + }) { + if (fs::exists(candidate)) { + opts.listFile = candidate; + std::cout << "Auto-detected listfile: " << candidate << "\n"; + break; + } + } + } + std::cout << "=== Wowee Asset Extractor ===\n"; std::cout << "MPQ directory: " << opts.mpqDir << "\n"; std::cout << "Output: " << opts.outputDir << "\n";