Merge pull request #47 from sschepens/patch-2

refactor asset extractor
This commit is contained in:
Kelsi Rae Davis 2026-04-05 01:10:28 -07:00 committed by GitHub
commit 50fdfd2e22
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 147 additions and 353 deletions

24
.gitignore vendored
View file

@ -91,27 +91,9 @@ saves/
wowee_[0-9][0-9][0-9][0-9]
# Extracted assets (run ./extract_assets.sh or .\extract_assets.ps1 to generate)
Data/db/
Data/character/
Data/creature/
Data/terrain/
Data/world/
Data/interface/
Data/item/
Data/sound/
Data/spell/
Data/environment/
Data/misc/
Data/enUS/
Data/Character/
Data/Creature/
Data/World/
Data/manifest.json
Data/expansions/*/manifest.json
Data/expansions/*/assets/
Data/expansions/*/overlay/
Data/expansions/*/db/*.csv
Data/hd/
Data/*
!Data/opcodes
ingest/
# Asset pipeline state and texture packs

View file

@ -14,7 +14,6 @@
#include <iostream>
#include <mutex>
#include <set>
#include <sstream>
#include <thread>
#include <unordered_map>
#include <unordered_set>
@ -32,12 +31,6 @@ namespace tools {
namespace fs = std::filesystem;
using wowee::pipeline::DBCFile;
// Archive descriptor for priority-based loading
struct ArchiveDesc {
std::string path;
int priority;
};
static std::string toLowerStr(std::string s) {
std::transform(s.begin(), s.end(), s.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
@ -394,145 +387,148 @@ std::string Extractor::detectExpansion(const std::string& mpqDir) {
return "";
}
std::string Extractor::detectLocale(const std::string& mpqDir) {
for (const auto& loc : kKnownLocales) {
if (fs::is_directory(mpqDir + "/" + loc))
return loc;
static std::string findCaseInsensitiveDirectory(const std::string& parentDir,
const std::string& directoryName) {
if (!fs::exists(parentDir) || !fs::is_directory(parentDir)) return "";
std::string lowerDirectoryName = toLowerStr(directoryName);
for (const auto& entry : fs::directory_iterator(parentDir)) {
if (!entry.is_directory()) continue;
std::string name = entry.path().filename().string();
if (toLowerStr(name) == lowerDirectoryName) {
return name;
}
}
return "";
}
std::string Extractor::detectLocale(const std::string& mpqDir) {
if (!fs::exists(mpqDir) || !fs::is_directory(mpqDir)) return "";
for (const auto& entry : fs::directory_iterator(mpqDir)) {
if (!entry.is_directory()) continue;
std::string name = entry.path().filename().string();
std::string lower = toLowerStr(name);
for (const auto& loc : kKnownLocales) {
if (toLowerStr(loc) == lower) {
return name;
}
}
}
return "";
}
static std::unordered_map<std::string, std::string> buildCaseMap(const std::string& dir) {
std::unordered_map<std::string, std::string> map;
if (!fs::exists(dir) || !fs::is_directory(dir)) return map;
for (const auto& entry : fs::directory_iterator(dir)) {
if (entry.is_regular_file()) {
std::string filename = entry.path().filename().string();
if (filename.rfind("._", 0) == 0) {
continue;
}
std::string ext = toLowerStr(entry.path().extension().string());
if (ext == ".mpq") {
std::string lower = toLowerStr(filename);
map[lower] = filename;
}
}
}
return map;
}
// Discover archive files with expansion-specific and locale-aware loading
static std::vector<ArchiveDesc> discoverArchives(const std::string& mpqDir,
static std::vector<std::string> discoverArchives(const std::string& mpqDir,
const std::string& expansion,
const std::string& locale) {
std::vector<ArchiveDesc> result;
std::vector<std::string> result;
auto tryAdd = [&](const std::string& name, int prio) {
std::string fullPath = mpqDir + "/" + name;
if (fs::exists(fullPath)) {
result.push_back({fullPath, prio});
auto caseMap = buildCaseMap(mpqDir);
std::string lowerLocale = toLowerStr(locale);
if (!locale.empty()) {
std::string actualLocaleDir = findCaseInsensitiveDirectory(mpqDir, locale);
if (actualLocaleDir.empty()) {
actualLocaleDir = locale;
}
};
if (expansion == "classic" || expansion == "turtle") {
// Vanilla-era base archives (also used by Turtle WoW clients)
tryAdd("base.MPQ", 90);
tryAdd("base.mpq", 90);
tryAdd("backup.MPQ", 95);
tryAdd("backup.mpq", 95);
tryAdd("dbc.MPQ", 100);
tryAdd("dbc.mpq", 100);
tryAdd("fonts.MPQ", 100);
tryAdd("fonts.mpq", 100);
tryAdd("interface.MPQ", 100);
tryAdd("interface.mpq", 100);
tryAdd("misc.MPQ", 100);
tryAdd("misc.mpq", 100);
tryAdd("model.MPQ", 100);
tryAdd("model.mpq", 100);
tryAdd("sound.MPQ", 100);
tryAdd("sound.mpq", 100);
tryAdd("speech.MPQ", 100);
tryAdd("speech.mpq", 100);
tryAdd("terrain.MPQ", 100);
tryAdd("terrain.mpq", 100);
tryAdd("texture.MPQ", 100);
tryAdd("texture.mpq", 100);
tryAdd("wmo.MPQ", 100);
tryAdd("wmo.mpq", 100);
// Patches
tryAdd("patch.MPQ", 150);
tryAdd("patch.mpq", 150);
for (int i = 1; i <= 9; ++i) {
tryAdd("patch-" + std::to_string(i) + ".MPQ", 160 + (i * 10));
tryAdd("patch-" + std::to_string(i) + ".mpq", 160 + (i * 10));
}
// Turtle WoW uses letter patch MPQs (patch-a.mpq ... patch-z.mpq).
for (char c = 'a'; c <= 'z'; ++c) {
tryAdd(std::string("patch-") + c + ".mpq", 800 + (c - 'a'));
tryAdd(std::string("Patch-") + static_cast<char>(std::toupper(c)) + ".mpq", 900 + (c - 'a'));
}
// Locale
if (!locale.empty()) {
tryAdd(locale + "/base-" + locale + ".MPQ", 230);
tryAdd(locale + "/speech-" + locale + ".MPQ", 240);
tryAdd(locale + "/locale-" + locale + ".MPQ", 250);
tryAdd(locale + "/patch-" + locale + ".MPQ", 450);
}
} else if (expansion == "tbc") {
// TBC 2.4.x base archives
tryAdd("common.MPQ", 100);
tryAdd("common-2.MPQ", 100);
tryAdd("expansion.MPQ", 100);
// Patches
tryAdd("patch.MPQ", 150);
tryAdd("patch-2.MPQ", 200);
tryAdd("patch-3.MPQ", 300);
tryAdd("patch-4.MPQ", 400);
tryAdd("patch-5.MPQ", 500);
// Letter patches
for (char c = 'a'; c <= 'z'; ++c) {
tryAdd(std::string("patch-") + c + ".mpq", 800 + (c - 'a'));
tryAdd(std::string("Patch-") + static_cast<char>(std::toupper(c)) + ".mpq", 900 + (c - 'a'));
}
// Locale
if (!locale.empty()) {
tryAdd(locale + "/backup-" + locale + ".MPQ", 225);
tryAdd(locale + "/base-" + locale + ".MPQ", 230);
tryAdd(locale + "/speech-" + locale + ".MPQ", 240);
tryAdd(locale + "/expansion-speech-" + locale + ".MPQ", 245);
tryAdd(locale + "/expansion-locale-" + locale + ".MPQ", 246);
tryAdd(locale + "/locale-" + locale + ".MPQ", 250);
tryAdd(locale + "/patch-" + locale + ".MPQ", 450);
tryAdd(locale + "/patch-" + locale + "-2.MPQ", 460);
tryAdd(locale + "/patch-" + locale + "-3.MPQ", 470);
}
} else {
// WotLK 3.3.5a (default)
tryAdd("common.MPQ", 100);
tryAdd("common-2.MPQ", 100);
tryAdd("expansion.MPQ", 100);
tryAdd("lichking.MPQ", 100);
// Patches
tryAdd("patch.MPQ", 150);
tryAdd("patch-2.MPQ", 200);
tryAdd("patch-3.MPQ", 300);
tryAdd("patch-4.MPQ", 400);
tryAdd("patch-5.MPQ", 500);
// Letter patches
for (char c = 'a'; c <= 'z'; ++c) {
tryAdd(std::string("patch-") + c + ".mpq", 800 + (c - 'a'));
tryAdd(std::string("Patch-") + static_cast<char>(std::toupper(c)) + ".mpq", 900 + (c - 'a'));
}
// Locale
if (!locale.empty()) {
tryAdd(locale + "/backup-" + locale + ".MPQ", 225);
tryAdd(locale + "/base-" + locale + ".MPQ", 230);
tryAdd(locale + "/speech-" + locale + ".MPQ", 240);
tryAdd(locale + "/expansion-speech-" + locale + ".MPQ", 245);
tryAdd(locale + "/expansion-locale-" + locale + ".MPQ", 246);
tryAdd(locale + "/lichking-speech-" + locale + ".MPQ", 248);
tryAdd(locale + "/lichking-locale-" + locale + ".MPQ", 249);
tryAdd(locale + "/locale-" + locale + ".MPQ", 250);
tryAdd(locale + "/patch-" + locale + ".MPQ", 450);
tryAdd(locale + "/patch-" + locale + "-2.MPQ", 460);
tryAdd(locale + "/patch-" + locale + "-3.MPQ", 470);
fs::path localeDirPath = fs::path(mpqDir) / actualLocaleDir;
std::string localeDir = localeDirPath.string();
auto localeMap = buildCaseMap(localeDir);
for (auto& [name, realName] : localeMap) {
fs::path fullPath = fs::path(actualLocaleDir) / realName;
caseMap[lowerLocale + "/" + name] = fullPath.string();
}
}
// Sort by priority so highest-priority archives are last
// (we'll iterate highest-prio first when extracting)
std::sort(result.begin(), result.end(),
[](const ArchiveDesc& a, const ArchiveDesc& b) { return a.priority < b.priority; });
std::vector<std::string> baseSequence;
std::vector<std::string> localeSequence;
if (expansion == "classic" || expansion == "turtle") {
baseSequence = {
"base.mpq", "backup.mpq", "dbc.mpq", "fonts.mpq",
"interface.mpq", "misc.mpq", "model.mpq", "sound.mpq",
"speech.mpq", "terrain.mpq", "texture.mpq", "wmo.mpq"
};
} else if (expansion == "tbc") {
baseSequence = { "common.mpq", "expansion.mpq" };
if (!locale.empty()) {
localeSequence = {
lowerLocale + "/backup-" + lowerLocale + ".mpq",
lowerLocale + "/base-" + lowerLocale + ".mpq",
lowerLocale + "/locale-" + lowerLocale + ".mpq",
lowerLocale + "/speech-" + lowerLocale + ".mpq",
lowerLocale + "/expansion-locale-" + lowerLocale + ".mpq",
lowerLocale + "/expansion-speech-" + lowerLocale + ".mpq",
};
}
} else {
baseSequence = { "common.mpq", "common-2.mpq", "expansion.mpq", "lichking.mpq" };
if (!locale.empty()) {
localeSequence = {
lowerLocale + "/backup-" + lowerLocale + ".mpq",
lowerLocale + "/base-" + lowerLocale + ".mpq",
lowerLocale + "/locale-" + lowerLocale + ".mpq",
lowerLocale + "/speech-" + lowerLocale + ".mpq",
lowerLocale + "/expansion-locale-" + lowerLocale + ".mpq",
lowerLocale + "/expansion-speech-" + lowerLocale + ".mpq",
lowerLocale + "/lichking-locale-" + lowerLocale + ".mpq",
lowerLocale + "/lichking-speech-" + lowerLocale + ".mpq",
};
}
}
std::vector<std::string> sequence;
for (const auto& name : baseSequence) {
sequence.push_back(name);
}
for (const auto& name : localeSequence) {
sequence.push_back(name);
}
// Interleave patches: base patch then locale patch for each tier
std::vector<std::string> patchSuffixes = {""};
for (int i = 2; i <= 9; ++i) {
patchSuffixes.push_back(std::string("-") + std::to_string(i));
}
for (char c = 'a'; c <= 'z'; ++c) {
patchSuffixes.push_back(std::string("-") + c);
}
for (const auto& suffix : patchSuffixes) {
sequence.push_back("patch" + suffix + ".mpq");
if (!locale.empty()) {
sequence.push_back(lowerLocale + "/patch-" + lowerLocale + suffix + ".mpq");
}
}
auto addIfPresent = [&](const std::string& expected) {
auto it = caseMap.find(toLowerStr(expected));
if (it != caseMap.end()) {
fs::path fullPath = fs::path(mpqDir) / it->second;
result.push_back(fullPath.string());
}
};
for (const auto& entry : sequence) {
addIfPresent(entry);
}
return result;
}
@ -592,8 +588,8 @@ bool Extractor::enumerateFiles(const Options& opts,
// Enumerate from highest priority first so first-seen files win
for (auto it = archives.rbegin(); it != archives.rend(); ++it) {
HANDLE hMpq = nullptr;
if (!SFileOpenArchive(it->path.c_str(), 0, 0, &hMpq)) {
std::cerr << " Failed to open: " << it->path << "\n";
if (!SFileOpenArchive(it->c_str(), 0, 0, &hMpq)) {
std::cerr << " Failed to open: " << *it << "\n";
continue;
}
@ -605,7 +601,7 @@ bool Extractor::enumerateFiles(const Options& opts,
}
if (opts.verbose) {
std::cout << " Scanning: " << it->path << " (priority " << it->priority << ")\n";
std::cout << " Scanning: " << *it << "\n";
}
SFILE_FIND_DATA findData;
@ -701,19 +697,18 @@ bool Extractor::run(const Options& opts) {
// thread-safe even with separate handles, so we serialize all MPQ reads.
struct SharedArchive {
HANDLE handle;
int priority;
std::string path;
};
std::vector<SharedArchive> sharedHandles;
for (const auto& ad : archives) {
for (const auto& path : archives) {
HANDLE h = nullptr;
if (SFileOpenArchive(ad.path.c_str(), 0, 0, &h)) {
if (SFileOpenArchive(path.c_str(), 0, 0, &h)) {
if (!opts.listFile.empty()) {
SFileAddListFile(h, opts.listFile.c_str());
}
sharedHandles.push_back({h, ad.priority, ad.path});
sharedHandles.push_back({h, path});
} else {
std::cerr << " Failed to open archive: " << ad.path << "\n";
std::cerr << " Failed to open archive: " << path << "\n";
}
}
if (sharedHandles.empty()) {
@ -740,7 +735,7 @@ bool Extractor::run(const Options& opts) {
// Map to new filesystem path
std::string mappedPath = PathMapper::mapPath(wowPath);
std::string fullOutputPath = effectiveOutputDir + "/" + mappedPath;
fs::path fullOutputPath = fs::path(effectiveOutputDir) / mappedPath;
// Read file data from MPQ under lock
std::vector<uint8_t> data;
@ -869,7 +864,7 @@ bool Extractor::run(const Options& opts) {
}
// Merge with existing manifest so partial extractions don't nuke prior entries
std::string manifestPath = effectiveOutputDir + "/manifest.json";
fs::path manifestPath = fs::path(effectiveOutputDir) / "manifest.json";
if (fs::exists(manifestPath)) {
auto existing = loadManifestEntries(manifestPath);
if (!existing.empty()) {
@ -906,7 +901,7 @@ bool Extractor::run(const Options& opts) {
std::cout << "Verifying extracted files...\n";
uint64_t verified = 0, verifyFailed = 0;
for (const auto& entry : manifestEntries) {
std::string fsPath = effectiveOutputDir + "/" + entry.filesystemPath;
fs::path fsPath = fs::path(effectiveOutputDir) / entry.filesystemPath;
std::ifstream f(fsPath, std::ios::binary | std::ios::ate);
if (!f.is_open()) {
std::cerr << " MISSING: " << fsPath << "\n";

View file

@ -18,189 +18,10 @@ std::string PathMapper::toForwardSlash(const std::string& str) {
return result;
}
bool PathMapper::startsWithCI(const std::string& str, const std::string& prefix) {
if (str.size() < prefix.size()) return false;
for (size_t i = 0; i < prefix.size(); ++i) {
if (std::tolower(static_cast<unsigned char>(str[i])) !=
std::tolower(static_cast<unsigned char>(prefix[i]))) {
return false;
}
}
return true;
}
std::string PathMapper::extractAfterPrefix(const std::string& path, size_t prefixLen) {
if (prefixLen >= path.size()) return {};
return path.substr(prefixLen);
}
std::string PathMapper::mapPath(const std::string& wowPath) {
// Lowercase entire output path — WoW archives contain mixed-case variants
// of the same path which create duplicate directories on case-sensitive filesystems.
return toLower(mapPathImpl(wowPath));
}
std::string PathMapper::mapPathImpl(const std::string& wowPath) {
std::string rest;
// DBFilesClient\ → db/
if (startsWithCI(wowPath, "DBFilesClient\\")) {
rest = extractAfterPrefix(wowPath, 14);
return "db/" + toForwardSlash(rest);
}
// Character\{Race}\{Gender}\ → character/{race}/{gender}/
if (startsWithCI(wowPath, "Character\\")) {
rest = extractAfterPrefix(wowPath, 10);
std::string lowered = toLower(rest);
return "character/" + toForwardSlash(lowered);
}
// Creature\{Name}\ → creature/{name}/
if (startsWithCI(wowPath, "Creature\\")) {
rest = extractAfterPrefix(wowPath, 9);
// Keep first component lowercase for directory, preserve filename case
std::string fwd = toForwardSlash(rest);
auto slash = fwd.find('/');
if (slash != std::string::npos) {
return "creature/" + toLower(fwd.substr(0, slash)) + "/" + fwd.substr(slash + 1);
}
return "creature/" + fwd;
}
// Item\ObjectComponents\ → item/objectcomponents/
if (startsWithCI(wowPath, "Item\\ObjectComponents\\")) {
rest = extractAfterPrefix(wowPath, 22);
return "item/objectcomponents/" + toForwardSlash(rest);
}
// Item\TextureComponents\ → item/texturecomponents/
if (startsWithCI(wowPath, "Item\\TextureComponents\\")) {
rest = extractAfterPrefix(wowPath, 23);
return "item/texturecomponents/" + toForwardSlash(rest);
}
// Interface\Icons\ → interface/icons/
if (startsWithCI(wowPath, "Interface\\Icons\\")) {
rest = extractAfterPrefix(wowPath, 16);
return "interface/icons/" + toForwardSlash(rest);
}
// Interface\GossipFrame\ → interface/gossip/
if (startsWithCI(wowPath, "Interface\\GossipFrame\\")) {
rest = extractAfterPrefix(wowPath, 21);
return "interface/gossip/" + toForwardSlash(rest);
}
// Interface\{rest} → interface/{rest}/
if (startsWithCI(wowPath, "Interface\\")) {
rest = extractAfterPrefix(wowPath, 10);
return "interface/" + toForwardSlash(rest);
}
// Textures\Minimap\ → terrain/minimap/
if (startsWithCI(wowPath, "Textures\\Minimap\\")) {
rest = extractAfterPrefix(wowPath, 17);
return "terrain/minimap/" + toForwardSlash(rest);
}
// Textures\BakedNpcTextures\ → creature/baked/
if (startsWithCI(wowPath, "Textures\\BakedNpcTextures\\")) {
rest = extractAfterPrefix(wowPath, 25);
return "creature/baked/" + toForwardSlash(rest);
}
// Textures\{rest} → terrain/textures/{rest}
if (startsWithCI(wowPath, "Textures\\")) {
rest = extractAfterPrefix(wowPath, 9);
return "terrain/textures/" + toForwardSlash(rest);
}
// World\Maps\{Map}\ → terrain/maps/{map}/
if (startsWithCI(wowPath, "World\\Maps\\")) {
rest = extractAfterPrefix(wowPath, 11);
std::string fwd = toForwardSlash(rest);
auto slash = fwd.find('/');
if (slash != std::string::npos) {
return "terrain/maps/" + toLower(fwd.substr(0, slash)) + "/" + fwd.substr(slash + 1);
}
return "terrain/maps/" + fwd;
}
// World\wmo\ → world/wmo/ (preserve subpath)
if (startsWithCI(wowPath, "World\\wmo\\")) {
rest = extractAfterPrefix(wowPath, 10);
return "world/wmo/" + toForwardSlash(rest);
}
// World\Doodads\ → world/doodads/
if (startsWithCI(wowPath, "World\\Doodads\\")) {
rest = extractAfterPrefix(wowPath, 14);
return "world/doodads/" + toForwardSlash(rest);
}
// World\{rest} → world/{rest}/
if (startsWithCI(wowPath, "World\\")) {
rest = extractAfterPrefix(wowPath, 6);
return "world/" + toForwardSlash(rest);
}
// Environments\ → environment/
if (startsWithCI(wowPath, "Environments\\")) {
rest = extractAfterPrefix(wowPath, 13);
return "environment/" + toForwardSlash(rest);
}
// Sound\Ambience\ → sound/ambient/
if (startsWithCI(wowPath, "Sound\\Ambience\\")) {
rest = extractAfterPrefix(wowPath, 15);
return "sound/ambient/" + toForwardSlash(rest);
}
// Sound\Character\ → sound/character/
if (startsWithCI(wowPath, "Sound\\Character\\")) {
rest = extractAfterPrefix(wowPath, 16);
return "sound/character/" + toForwardSlash(rest);
}
// Sound\Doodad\ → sound/doodad/
if (startsWithCI(wowPath, "Sound\\Doodad\\")) {
rest = extractAfterPrefix(wowPath, 13);
return "sound/doodad/" + toForwardSlash(rest);
}
// Sound\Creature\ → sound/creature/
if (startsWithCI(wowPath, "Sound\\Creature\\")) {
rest = extractAfterPrefix(wowPath, 15);
return "sound/creature/" + toForwardSlash(rest);
}
// Sound\Spells\ → sound/spell/
if (startsWithCI(wowPath, "Sound\\Spells\\")) {
rest = extractAfterPrefix(wowPath, 13);
return "sound/spell/" + toForwardSlash(rest);
}
// Sound\Music\ → sound/music/
if (startsWithCI(wowPath, "Sound\\Music\\")) {
rest = extractAfterPrefix(wowPath, 12);
return "sound/music/" + toForwardSlash(rest);
}
// Sound\{rest} → sound/{rest}/
if (startsWithCI(wowPath, "Sound\\")) {
rest = extractAfterPrefix(wowPath, 6);
return "sound/" + toForwardSlash(rest);
}
// Spells\ → spell/
if (startsWithCI(wowPath, "Spells\\")) {
rest = extractAfterPrefix(wowPath, 7);
return "spell/" + toForwardSlash(rest);
}
// Everything else → misc/{original_path}
return "misc/" + toForwardSlash(wowPath);
return toLower(toForwardSlash(wowPath));
}
} // namespace tools

View file

@ -6,7 +6,7 @@ namespace wowee {
namespace tools {
/**
* Maps WoW virtual paths to reorganized filesystem categories.
* Maps WoW virtual paths to organized filesystem categories.
*
* Input: WoW virtual path (e.g., "Creature\\Bear\\BearSkin.blp")
* Output: Category-based relative path (e.g., "creature/bear/BearSkin.blp")
@ -14,19 +14,15 @@ namespace tools {
class PathMapper {
public:
/**
* Map a WoW virtual path to a reorganized filesystem path.
* Map a WoW virtual path to a organized filesystem path.
* @param wowPath Original WoW virtual path (backslash-separated)
* @return Reorganized relative path (forward-slash separated, fully lowercased)
* @return Organized relative path (forward-slash separated, fully lowercased)
*/
static std::string mapPath(const std::string& wowPath);
private:
static std::string mapPathImpl(const std::string& wowPath);
// Helpers for prefix matching (case-insensitive)
static bool startsWithCI(const std::string& str, const std::string& prefix);
static std::string toLower(const std::string& str);
static std::string toForwardSlash(const std::string& str);
static std::string extractAfterPrefix(const std::string& path, size_t prefixLen);
};
} // namespace tools