fix: warden mmap on macOS, add external listfile support to asset extractor

Drop PROT_EXEC from warden module mmap when using Unicorn emulation
(not needed — module image is copied into emulator address space). Use
MAP_JIT on macOS for the native fallback path.

Add --listfile option to asset_extract and SFileAddListFileEntries
support for resolving unnamed MPQ hash table entries from external
listfiles.
This commit is contained in:
k 2026-04-04 00:22:07 -07:00
parent 84108c44f5
commit b3fa8cf5f3
4 changed files with 110 additions and 17 deletions

View file

@ -535,11 +535,25 @@ bool WardenModule::parseExecutableFormat(const std::vector<uint8_t>& exeData) {
return false;
}
#else
// When using Unicorn emulation the module image is copied into the
// emulator's address space, so we only need read/write access here.
// Native execution paths (non-Unicorn) need PROT_EXEC; on macOS this
// requires MAP_JIT due to hardened-runtime restrictions.
#ifdef HAVE_UNICORN
int mmapProt = PROT_READ | PROT_WRITE;
int mmapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
#elif defined(__APPLE__)
int mmapProt = PROT_READ | PROT_WRITE | PROT_EXEC;
int mmapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT;
#else
int mmapProt = PROT_READ | PROT_WRITE | PROT_EXEC;
int mmapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
#endif
moduleMemory_ = mmap(
nullptr,
finalCodeSize,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS,
mmapProt,
mmapFlags,
-1,
0
);

View file

@ -537,10 +537,56 @@ static std::vector<ArchiveDesc> discoverArchives(const std::string& mpqDir,
return result;
}
// Read a text file into a vector of lines (for external listfile loading)
static std::vector<std::string> readLines(const std::string& path) {
std::vector<std::string> lines;
std::ifstream f(path);
if (!f) return lines;
std::string line;
while (std::getline(f, line)) {
// Trim trailing \r
if (!line.empty() && line.back() == '\r') line.pop_back();
if (!line.empty()) lines.push_back(std::move(line));
}
return lines;
}
// Extract the (listfile) from an MPQ archive into a set of filenames
static void extractInternalListfile(HANDLE hMpq, std::set<std::string>& out) {
HANDLE hFile = nullptr;
if (!SFileOpenFileEx(hMpq, "(listfile)", 0, &hFile)) return;
DWORD size = SFileGetFileSize(hFile, nullptr);
if (size == SFILE_INVALID_SIZE || size == 0) {
SFileCloseFile(hFile);
return;
}
std::vector<char> buf(size);
DWORD bytesRead = 0;
if (!SFileReadFile(hFile, buf.data(), size, &bytesRead, nullptr)) {
SFileCloseFile(hFile);
return;
}
SFileCloseFile(hFile);
// Parse newline/CR-delimited entries
std::string entry;
for (DWORD i = 0; i < bytesRead; ++i) {
if (buf[i] == '\n' || buf[i] == '\r') {
if (!entry.empty()) {
out.insert(std::move(entry));
entry.clear();
}
} else {
entry += buf[i];
}
}
if (!entry.empty()) out.insert(std::move(entry));
}
bool Extractor::enumerateFiles(const Options& opts,
std::vector<std::string>& outFiles) {
// Open all archives, enumerate files from highest priority to lowest.
// Use a set to deduplicate (highest-priority version wins).
auto archives = discoverArchives(opts.mpqDir, opts.expansion, opts.locale);
if (archives.empty()) {
std::cerr << "No MPQ archives found in: " << opts.mpqDir << "\n";
@ -549,12 +595,20 @@ bool Extractor::enumerateFiles(const Options& opts,
std::cout << "Found " << archives.size() << " MPQ archives\n";
// Load external listfile into memory once (avoids repeated file I/O)
std::vector<std::string> externalEntries;
std::vector<const char*> externalPtrs;
if (!opts.listFile.empty()) {
externalEntries = readLines(opts.listFile);
externalPtrs.reserve(externalEntries.size());
for (const auto& e : externalEntries) externalPtrs.push_back(e.c_str());
std::cout << " Loaded external listfile: " << externalEntries.size() << " entries\n";
}
const auto wantedDbcs = buildWantedDbcSet(opts);
std::set<std::string> seenNormalized;
// Enumerate from highest priority first so first-seen files win
std::set<std::string> seenNormalized;
std::vector<std::pair<std::string, std::string>> fileList; // (original name, archive path)
for (auto it = archives.rbegin(); it != archives.rend(); ++it) {
HANDLE hMpq = nullptr;
if (!SFileOpenArchive(it->path.c_str(), 0, 0, &hMpq)) {
@ -562,6 +616,14 @@ bool Extractor::enumerateFiles(const Options& opts,
continue;
}
// Inject external listfile entries into archive's in-memory name table.
// SFileAddListFileEntries is fast — it only hashes the names against the
// archive's hash table, no file I/O involved.
if (!externalPtrs.empty()) {
SFileAddListFileEntries(hMpq, externalPtrs.data(),
static_cast<DWORD>(externalPtrs.size()));
}
if (opts.verbose) {
std::cout << " Scanning: " << it->path << " (priority " << it->priority << ")\n";
}
@ -571,28 +633,20 @@ bool Extractor::enumerateFiles(const Options& opts,
if (hFind) {
do {
std::string fileName = findData.cFileName;
// Skip internal listfile/attributes
if (fileName == "(listfile)" || fileName == "(attributes)" ||
fileName == "(signature)" || fileName == "(patch_metadata)") {
continue;
}
if (shouldSkipFile(opts, fileName)) {
continue;
}
if (shouldSkipFile(opts, fileName)) continue;
// Verify file actually exists in this archive's hash table
// (listfiles can reference files from other archives)
if (!SFileHasFile(hMpq, fileName.c_str())) {
continue;
}
if (!SFileHasFile(hMpq, fileName.c_str())) continue;
std::string norm = normalizeWowPath(fileName);
if (opts.onlyUsedDbcs && !wantedDbcs.empty() && !wantedDbcs.contains(norm)) {
continue;
}
if (seenNormalized.insert(norm).second) {
// First time seeing this file — this is the highest-priority version
outFiles.push_back(fileName);
}
} while (SFileFindNextFile(hFind, &findData));
@ -674,6 +728,9 @@ bool Extractor::run(const Options& opts) {
for (const auto& ad : archives) {
HANDLE h = nullptr;
if (SFileOpenArchive(ad.path.c_str(), 0, 0, &h)) {
if (!opts.listFile.empty()) {
SFileAddListFile(h, opts.listFile.c_str());
}
sharedHandles.push_back({h, ad.priority, ad.path});
} else {
std::cerr << " Failed to open archive: " << ad.path << "\n";

View file

@ -26,6 +26,7 @@ public:
bool onlyUsedDbcs = false; // Extract only the DBC files wowee uses (implies DBFilesClient/*.dbc filter)
std::string dbcCsvOutputDir; // When set, write CSVs into this directory instead of outputDir/expansions/<exp>/db
std::string referenceManifest; // If set, only extract files NOT in this manifest (delta extraction)
std::string listFile; // External listfile for MPQ enumeration (resolves unnamed hash entries)
};
struct Stats {

View file

@ -20,6 +20,7 @@ static void printUsage(const char* prog) {
<< " --skip-dbc Do not extract DBFilesClient/*.dbc (visual assets only)\n"
<< " --dbc-csv Convert selected DBFilesClient/*.dbc to CSV under\n"
<< " <output>/expansions/<expansion>/db/*.csv (for committing)\n"
<< " --listfile <path> External listfile for MPQ file enumeration (auto-detected)\n"
<< " --reference-manifest <path>\n"
<< " Only extract files NOT in this manifest (delta extraction)\n"
<< " --dbc-csv-out <dir> Write CSV DBCs into <dir> (overrides default output path)\n"
@ -53,6 +54,8 @@ int main(int argc, char** argv) {
opts.generateDbcCsv = true;
} else if (std::strcmp(argv[i], "--dbc-csv-out") == 0 && i + 1 < argc) {
opts.dbcCsvOutputDir = argv[++i];
} else if (std::strcmp(argv[i], "--listfile") == 0 && i + 1 < argc) {
opts.listFile = argv[++i];
} else if (std::strcmp(argv[i], "--reference-manifest") == 0 && i + 1 < argc) {
opts.referenceManifest = argv[++i];
} else if (std::strcmp(argv[i], "--verify") == 0) {
@ -99,6 +102,24 @@ int main(int argc, char** argv) {
}
opts.locale = locale;
// Auto-detect external listfile if not specified
if (opts.listFile.empty()) {
// Look next to the binary, then in the source tree
namespace fs = std::filesystem;
std::string binDir = fs::path(argv[0]).parent_path().string();
for (const auto& candidate : {
binDir + "/listfile.txt",
binDir + "/../../../tools/asset_extract/listfile.txt",
opts.mpqDir + "/listfile.txt",
}) {
if (fs::exists(candidate)) {
opts.listFile = candidate;
std::cout << "Auto-detected listfile: " << candidate << "\n";
break;
}
}
}
std::cout << "=== Wowee Asset Extractor ===\n";
std::cout << "MPQ directory: " << opts.mpqDir << "\n";
std::cout << "Output: " << opts.outputDir << "\n";