mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-05-02 15:53:51 +00:00
Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)
Move CPU-heavy BLP texture decoding from main thread to background worker threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures, creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now accepts a pre-decoded BLP cache that loadTexture() checks before falling back to synchronous decode. Defer WMO normal/height map generation (3 per-pixel passes: luminance, box blur, Sobel) during terrain streaming finalization — this was the dominant remaining bottleneck after BLP pre-decoding. Terrain streaming stalls: 1576ms → 124ms worst case.
This commit is contained in:
parent
0313bd8692
commit
7ac990cff4
13 changed files with 573 additions and 109 deletions
|
|
@ -3,6 +3,7 @@
|
|||
#include "core/window.hpp"
|
||||
#include "core/input.hpp"
|
||||
#include "game/character.hpp"
|
||||
#include "pipeline/blp_loader.hpp"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
|
@ -23,7 +24,7 @@ namespace rendering { class Renderer; }
|
|||
namespace ui { class UIManager; }
|
||||
namespace auth { class AuthHandler; }
|
||||
namespace game { class GameHandler; class World; class ExpansionRegistry; }
|
||||
namespace pipeline { class AssetManager; class DBCLayout; struct M2Model; }
|
||||
namespace pipeline { class AssetManager; class DBCLayout; struct M2Model; struct WMOModel; }
|
||||
namespace audio { enum class VoiceType; }
|
||||
|
||||
namespace core {
|
||||
|
|
@ -206,6 +207,7 @@ private:
|
|||
uint32_t modelId;
|
||||
float x, y, z, orientation;
|
||||
std::shared_ptr<pipeline::M2Model> model; // parsed on background thread
|
||||
std::unordered_map<std::string, pipeline::BLPImage> predecodedTextures; // decoded on bg thread
|
||||
bool valid = false;
|
||||
bool permanent_failure = false;
|
||||
};
|
||||
|
|
@ -337,6 +339,24 @@ private:
|
|||
};
|
||||
std::vector<PendingGameObjectSpawn> pendingGameObjectSpawns_;
|
||||
void processGameObjectSpawnQueue();
|
||||
|
||||
// Async WMO loading for game objects (file I/O + parse on background thread)
|
||||
struct PreparedGameObjectWMO {
|
||||
uint64_t guid;
|
||||
uint32_t entry;
|
||||
uint32_t displayId;
|
||||
float x, y, z, orientation;
|
||||
std::shared_ptr<pipeline::WMOModel> wmoModel;
|
||||
std::unordered_map<std::string, pipeline::BLPImage> predecodedTextures; // decoded on bg thread
|
||||
bool valid = false;
|
||||
bool isWmo = false;
|
||||
std::string modelPath;
|
||||
};
|
||||
struct AsyncGameObjectLoad {
|
||||
std::future<PreparedGameObjectWMO> future;
|
||||
};
|
||||
std::vector<AsyncGameObjectLoad> asyncGameObjectLoads_;
|
||||
void processAsyncGameObjectResults();
|
||||
struct PendingTransportDoodadBatch {
|
||||
uint64_t guid = 0;
|
||||
uint32_t modelId = 0;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "pipeline/m2_loader.hpp"
|
||||
#include "pipeline/blp_loader.hpp"
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <vk_mem_alloc.h>
|
||||
#include <glm/glm.hpp>
|
||||
|
|
@ -114,7 +115,11 @@ public:
|
|||
void setShadowMap(VkTexture*, const glm::mat4&) {}
|
||||
void clearShadowMap() {}
|
||||
|
||||
// Pre-decoded BLP cache: set before calling loadModel() to skip main-thread BLP decode
|
||||
void setPredecodedBLPCache(std::unordered_map<std::string, pipeline::BLPImage>* cache) { predecodedBLPCache_ = cache; }
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, pipeline::BLPImage>* predecodedBLPCache_ = nullptr;
|
||||
// GPU representation of M2 model
|
||||
struct M2ModelGPU {
|
||||
VkBuffer vertexBuffer = VK_NULL_HANDLE;
|
||||
|
|
@ -180,6 +185,7 @@ private:
|
|||
|
||||
// Bone update throttling (skip frames for distant characters)
|
||||
uint32_t boneUpdateCounter = 0;
|
||||
const M2ModelGPU* cachedModel = nullptr; // Avoid per-frame hash lookups
|
||||
|
||||
// Per-instance bone SSBO (double-buffered per frame)
|
||||
VkBuffer boneBuffer[2] = {};
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "pipeline/m2_loader.hpp"
|
||||
#include "pipeline/blp_loader.hpp"
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <vk_mem_alloc.h>
|
||||
#include <glm/glm.hpp>
|
||||
|
|
@ -188,6 +189,7 @@ struct M2Instance {
|
|||
bool skipCollision = false; // WMO interior doodads — skip player wall collision
|
||||
float cachedBoundRadius = 0.0f;
|
||||
float portalSpinAngle = 0.0f; // Accumulated spin angle for portal rotation
|
||||
const M2ModelGPU* cachedModel = nullptr; // Avoid per-frame hash lookups
|
||||
|
||||
// Frame-skip optimization (update distant animations less frequently)
|
||||
uint8_t frameSkipCounter = 0;
|
||||
|
|
@ -328,6 +330,10 @@ public:
|
|||
|
||||
std::vector<glm::vec3> getWaterVegetationPositions(const glm::vec3& camPos, float maxDist) const;
|
||||
|
||||
// Pre-decoded BLP cache: set by terrain manager before calling loadModel()
|
||||
// so loadTexture() can skip the expensive assetManager->loadTexture() call.
|
||||
void setPredecodedBLPCache(std::unordered_map<std::string, pipeline::BLPImage>* cache) { predecodedBLPCache_ = cache; }
|
||||
|
||||
private:
|
||||
bool initialized_ = false;
|
||||
bool insideInterior = false;
|
||||
|
|
@ -414,6 +420,8 @@ private:
|
|||
uint32_t modelLimitRejectWarnings_ = 0;
|
||||
|
||||
VkTexture* loadTexture(const std::string& path, uint32_t texFlags = 0);
|
||||
std::unordered_map<std::string, pipeline::BLPImage>* predecodedBLPCache_ = nullptr;
|
||||
|
||||
struct TextureCacheEntry {
|
||||
std::unique_ptr<VkTexture> texture;
|
||||
size_t approxBytes = 0;
|
||||
|
|
|
|||
|
|
@ -121,6 +121,12 @@ struct PendingTile {
|
|||
// Pre-loaded terrain texture BLP data (loaded on background thread to avoid
|
||||
// blocking file I/O on the main thread during finalizeTile)
|
||||
std::unordered_map<std::string, pipeline::BLPImage> preloadedTextures;
|
||||
|
||||
// Pre-decoded M2 model textures (decoded on background thread)
|
||||
std::unordered_map<std::string, pipeline::BLPImage> preloadedM2Textures;
|
||||
|
||||
// Pre-decoded WMO textures (decoded on background thread)
|
||||
std::unordered_map<std::string, pipeline::BLPImage> preloadedWMOTextures;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -50,9 +50,12 @@ public:
|
|||
// Batch upload mode: records multiple upload commands into a single
|
||||
// command buffer, then submits with ONE fence wait instead of one per upload.
|
||||
void beginUploadBatch();
|
||||
void endUploadBatch();
|
||||
void endUploadBatch(); // Async: submits but does NOT wait for fence
|
||||
void endUploadBatchSync(); // Sync: submits and waits (for load screens)
|
||||
bool isInUploadBatch() const { return inUploadBatch_; }
|
||||
void deferStagingCleanup(AllocatedBuffer staging);
|
||||
void pollUploadBatches(); // Check completed async uploads, free staging buffers
|
||||
void waitAllUploads(); // Block until all in-flight uploads complete
|
||||
|
||||
// Accessors
|
||||
VkInstance getInstance() const { return instance; }
|
||||
|
|
@ -157,6 +160,14 @@ private:
|
|||
VkCommandBuffer batchCmd_ = VK_NULL_HANDLE;
|
||||
std::vector<AllocatedBuffer> batchStagingBuffers_;
|
||||
|
||||
// Async upload: in-flight batches awaiting GPU completion
|
||||
struct InFlightBatch {
|
||||
VkFence fence = VK_NULL_HANDLE;
|
||||
VkCommandBuffer cmd = VK_NULL_HANDLE;
|
||||
std::vector<AllocatedBuffer> stagingBuffers;
|
||||
};
|
||||
std::vector<InFlightBatch> inFlightBatches_;
|
||||
|
||||
// Depth buffer (shared across all framebuffers)
|
||||
VkImage depthImage = VK_NULL_HANDLE;
|
||||
VkImageView depthImageView = VK_NULL_HANDLE;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include "pipeline/blp_loader.hpp"
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <vk_mem_alloc.h>
|
||||
#include <glm/glm.hpp>
|
||||
|
|
@ -325,6 +326,12 @@ public:
|
|||
// Pre-compute floor cache for all loaded WMO instances
|
||||
void precomputeFloorCache();
|
||||
|
||||
// Pre-decoded BLP cache: set before calling loadModel() to skip main-thread BLP decode
|
||||
void setPredecodedBLPCache(std::unordered_map<std::string, pipeline::BLPImage>* cache) { predecodedBLPCache_ = cache; }
|
||||
|
||||
// Defer normal/height map generation during streaming to avoid CPU stalls
|
||||
void setDeferNormalMaps(bool defer) { deferNormalMaps_ = defer; }
|
||||
|
||||
private:
|
||||
// WMO material UBO — matches WMOMaterial in wmo.frag.glsl
|
||||
struct WMOMaterialUBO {
|
||||
|
|
@ -558,6 +565,7 @@ private:
|
|||
* Load a texture from path
|
||||
*/
|
||||
VkTexture* loadTexture(const std::string& path);
|
||||
std::unordered_map<std::string, pipeline::BLPImage>* predecodedBLPCache_ = nullptr;
|
||||
|
||||
/**
|
||||
* Generate normal+height map from diffuse RGBA8 pixels
|
||||
|
|
@ -670,6 +678,7 @@ private:
|
|||
|
||||
// Normal mapping / POM settings
|
||||
bool normalMappingEnabled_ = true; // on by default
|
||||
bool deferNormalMaps_ = false; // skip normal map gen during streaming
|
||||
float normalMapStrength_ = 0.8f; // 0.0 = flat, 1.0 = full, 2.0 = exaggerated
|
||||
bool pomEnabled_ = true; // on by default
|
||||
int pomQuality_ = 1; // 0=Low(16), 1=Medium(32), 2=High(64)
|
||||
|
|
|
|||
|
|
@ -6883,7 +6883,7 @@ void Application::spawnOnlineGameObject(uint64_t guid, uint32_t entry, uint32_t
|
|||
void Application::processAsyncCreatureResults() {
|
||||
// Check completed async model loads and finalize on main thread (GPU upload + instance creation).
|
||||
// Limit GPU model uploads per frame to avoid spikes, but always drain cheap bookkeeping.
|
||||
static constexpr int kMaxModelUploadsPerFrame = 3;
|
||||
static constexpr int kMaxModelUploadsPerFrame = 1;
|
||||
int modelUploads = 0;
|
||||
|
||||
for (auto it = asyncCreatureLoads_.begin(); it != asyncCreatureLoads_.end(); ) {
|
||||
|
|
@ -6925,13 +6925,17 @@ void Application::processAsyncCreatureResults() {
|
|||
}
|
||||
|
||||
// Upload model to GPU (must happen on main thread)
|
||||
// Use pre-decoded BLP cache to skip main-thread texture decode
|
||||
charRenderer->setPredecodedBLPCache(&result.predecodedTextures);
|
||||
if (!charRenderer->loadModel(*result.model, result.modelId)) {
|
||||
charRenderer->setPredecodedBLPCache(nullptr);
|
||||
nonRenderableCreatureDisplayIds_.insert(result.displayId);
|
||||
creaturePermanentFailureGuids_.insert(result.guid);
|
||||
pendingCreatureSpawnGuids_.erase(result.guid);
|
||||
creatureSpawnRetryCounts_.erase(result.guid);
|
||||
continue;
|
||||
}
|
||||
charRenderer->setPredecodedBLPCache(nullptr);
|
||||
displayIdModelCache_[result.displayId] = result.modelId;
|
||||
modelUploads++;
|
||||
|
||||
|
|
@ -6956,6 +6960,10 @@ void Application::processAsyncCreatureResults() {
|
|||
}
|
||||
|
||||
void Application::processCreatureSpawnQueue() {
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
// Budget: max 2ms per frame for creature spawning to prevent stutter.
|
||||
static constexpr float kSpawnBudgetMs = 2.0f;
|
||||
|
||||
// First, finalize any async model loads that completed on background threads.
|
||||
processAsyncCreatureResults();
|
||||
|
||||
|
|
@ -6965,18 +6973,15 @@ void Application::processCreatureSpawnQueue() {
|
|||
if (!creatureLookupsBuilt_) return;
|
||||
}
|
||||
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
// Budget: max 4ms per frame for creature spawning to prevent stutter.
|
||||
static constexpr float kSpawnBudgetMs = 4.0f;
|
||||
|
||||
int processed = 0;
|
||||
int asyncLaunched = 0;
|
||||
size_t rotationsLeft = pendingCreatureSpawns_.size();
|
||||
while (!pendingCreatureSpawns_.empty() &&
|
||||
processed < MAX_SPAWNS_PER_FRAME &&
|
||||
rotationsLeft > 0) {
|
||||
// Check time budget after each spawn (not for the first one, always process at least 1)
|
||||
if (processed > 0) {
|
||||
// Check time budget every iteration (including first — async results may
|
||||
// have already consumed the budget via GPU model uploads).
|
||||
{
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
float elapsedMs = std::chrono::duration<float, std::milli>(now - startTime).count();
|
||||
if (elapsedMs >= kSpawnBudgetMs) break;
|
||||
|
|
@ -7081,6 +7086,20 @@ void Application::processCreatureSpawnQueue() {
|
|||
}
|
||||
}
|
||||
|
||||
// Pre-decode model textures on background thread
|
||||
for (const auto& tex : model->textures) {
|
||||
if (tex.filename.empty()) continue;
|
||||
std::string texKey = tex.filename;
|
||||
std::replace(texKey.begin(), texKey.end(), '/', '\\');
|
||||
std::transform(texKey.begin(), texKey.end(), texKey.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
if (result.predecodedTextures.find(texKey) != result.predecodedTextures.end()) continue;
|
||||
auto blp = am->loadTexture(texKey);
|
||||
if (blp.isValid()) {
|
||||
result.predecodedTextures[texKey] = std::move(blp);
|
||||
}
|
||||
}
|
||||
|
||||
result.model = std::move(model);
|
||||
result.valid = true;
|
||||
return result;
|
||||
|
|
@ -7161,14 +7180,202 @@ void Application::processDeferredEquipmentQueue() {
|
|||
setOnlinePlayerEquipment(guid, equipData.first, equipData.second);
|
||||
}
|
||||
|
||||
void Application::processAsyncGameObjectResults() {
|
||||
for (auto it = asyncGameObjectLoads_.begin(); it != asyncGameObjectLoads_.end(); ) {
|
||||
if (!it->future.valid() ||
|
||||
it->future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto result = it->future.get();
|
||||
it = asyncGameObjectLoads_.erase(it);
|
||||
|
||||
if (!result.valid || !result.isWmo || !result.wmoModel) {
|
||||
// Fallback: spawn via sync path (likely an M2 or failed WMO)
|
||||
spawnOnlineGameObject(result.guid, result.entry, result.displayId,
|
||||
result.x, result.y, result.z, result.orientation);
|
||||
continue;
|
||||
}
|
||||
|
||||
// WMO parsed on background thread — do GPU upload + instance creation on main thread
|
||||
auto* wmoRenderer = renderer ? renderer->getWMORenderer() : nullptr;
|
||||
if (!wmoRenderer) continue;
|
||||
|
||||
uint32_t modelId = 0;
|
||||
auto itCache = gameObjectDisplayIdWmoCache_.find(result.displayId);
|
||||
if (itCache != gameObjectDisplayIdWmoCache_.end()) {
|
||||
modelId = itCache->second;
|
||||
} else {
|
||||
modelId = nextGameObjectWmoModelId_++;
|
||||
wmoRenderer->setPredecodedBLPCache(&result.predecodedTextures);
|
||||
if (!wmoRenderer->loadModel(*result.wmoModel, modelId)) {
|
||||
wmoRenderer->setPredecodedBLPCache(nullptr);
|
||||
LOG_WARNING("Failed to load async gameobject WMO: ", result.modelPath);
|
||||
continue;
|
||||
}
|
||||
wmoRenderer->setPredecodedBLPCache(nullptr);
|
||||
gameObjectDisplayIdWmoCache_[result.displayId] = modelId;
|
||||
}
|
||||
|
||||
glm::vec3 renderPos = core::coords::canonicalToRender(
|
||||
glm::vec3(result.x, result.y, result.z));
|
||||
uint32_t instanceId = wmoRenderer->createInstance(
|
||||
modelId, renderPos, glm::vec3(0.0f, 0.0f, result.orientation), 1.0f);
|
||||
if (instanceId == 0) continue;
|
||||
|
||||
gameObjectInstances_[result.guid] = {modelId, instanceId, true};
|
||||
|
||||
// Queue transport doodad loading if applicable
|
||||
std::string lowerPath = result.modelPath;
|
||||
std::transform(lowerPath.begin(), lowerPath.end(), lowerPath.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
if (lowerPath.find("transport") != std::string::npos) {
|
||||
const auto* doodadTemplates = wmoRenderer->getDoodadTemplates(modelId);
|
||||
if (doodadTemplates && !doodadTemplates->empty()) {
|
||||
PendingTransportDoodadBatch batch;
|
||||
batch.guid = result.guid;
|
||||
batch.modelId = modelId;
|
||||
batch.instanceId = instanceId;
|
||||
batch.x = result.x;
|
||||
batch.y = result.y;
|
||||
batch.z = result.z;
|
||||
batch.orientation = result.orientation;
|
||||
batch.doodadBudget = doodadTemplates->size();
|
||||
pendingTransportDoodadBatches_.push_back(batch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Application::processGameObjectSpawnQueue() {
|
||||
// Finalize any completed async WMO loads first
|
||||
processAsyncGameObjectResults();
|
||||
|
||||
if (pendingGameObjectSpawns_.empty()) return;
|
||||
|
||||
// Only spawn 1 game object per frame — each can involve heavy synchronous
|
||||
// WMO loading (root + groups from disk + GPU upload), easily 100ms+.
|
||||
auto& s = pendingGameObjectSpawns_.front();
|
||||
spawnOnlineGameObject(s.guid, s.entry, s.displayId, s.x, s.y, s.z, s.orientation);
|
||||
pendingGameObjectSpawns_.erase(pendingGameObjectSpawns_.begin());
|
||||
// Process spawns: cached WMOs and M2s go sync (cheap), uncached WMOs go async
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
static constexpr float kBudgetMs = 2.0f;
|
||||
static constexpr int kMaxAsyncLoads = 2;
|
||||
|
||||
while (!pendingGameObjectSpawns_.empty()) {
|
||||
float elapsedMs = std::chrono::duration<float, std::milli>(
|
||||
std::chrono::steady_clock::now() - startTime).count();
|
||||
if (elapsedMs >= kBudgetMs) break;
|
||||
|
||||
auto& s = pendingGameObjectSpawns_.front();
|
||||
|
||||
// Check if this is an uncached WMO that needs async loading
|
||||
std::string modelPath;
|
||||
if (gameObjectLookupsBuilt_) {
|
||||
// Check transport overrides first
|
||||
bool isTransport = gameHandler && gameHandler->isTransportGuid(s.guid);
|
||||
if (isTransport) {
|
||||
if (s.entry == 20808 || s.entry == 176231 || s.entry == 176310)
|
||||
modelPath = "World\\wmo\\transports\\transport_ship\\transportship.wmo";
|
||||
else if (s.displayId == 807 || s.displayId == 808 || s.displayId == 175080 || s.displayId == 176495 || s.displayId == 164871)
|
||||
modelPath = "World\\wmo\\transports\\transport_zeppelin\\transport_zeppelin.wmo";
|
||||
else if (s.displayId == 1587)
|
||||
modelPath = "World\\wmo\\transports\\transport_horde_zeppelin\\Transport_Horde_Zeppelin.wmo";
|
||||
else if (s.displayId == 2454 || s.displayId == 181688 || s.displayId == 190536)
|
||||
modelPath = "World\\wmo\\transports\\icebreaker\\Transport_Icebreaker_ship.wmo";
|
||||
}
|
||||
if (modelPath.empty())
|
||||
modelPath = getGameObjectModelPathForDisplayId(s.displayId);
|
||||
}
|
||||
|
||||
std::string lowerPath = modelPath;
|
||||
std::transform(lowerPath.begin(), lowerPath.end(), lowerPath.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
bool isWmo = lowerPath.size() >= 4 && lowerPath.substr(lowerPath.size() - 4) == ".wmo";
|
||||
bool isCached = isWmo && gameObjectDisplayIdWmoCache_.count(s.displayId);
|
||||
|
||||
if (isWmo && !isCached && !modelPath.empty() &&
|
||||
static_cast<int>(asyncGameObjectLoads_.size()) < kMaxAsyncLoads) {
|
||||
// Launch async WMO load — file I/O + parse on background thread
|
||||
auto* am = assetManager.get();
|
||||
PendingGameObjectSpawn capture = s;
|
||||
std::string capturePath = modelPath;
|
||||
AsyncGameObjectLoad load;
|
||||
load.future = std::async(std::launch::async,
|
||||
[am, capture, capturePath]() -> PreparedGameObjectWMO {
|
||||
PreparedGameObjectWMO result;
|
||||
result.guid = capture.guid;
|
||||
result.entry = capture.entry;
|
||||
result.displayId = capture.displayId;
|
||||
result.x = capture.x;
|
||||
result.y = capture.y;
|
||||
result.z = capture.z;
|
||||
result.orientation = capture.orientation;
|
||||
result.modelPath = capturePath;
|
||||
result.isWmo = true;
|
||||
|
||||
auto wmoData = am->readFile(capturePath);
|
||||
if (wmoData.empty()) return result;
|
||||
|
||||
auto wmo = std::make_shared<pipeline::WMOModel>(
|
||||
pipeline::WMOLoader::load(wmoData));
|
||||
|
||||
// Load groups
|
||||
if (wmo->nGroups > 0) {
|
||||
std::string basePath = capturePath;
|
||||
std::string ext;
|
||||
if (basePath.size() > 4) {
|
||||
ext = basePath.substr(basePath.size() - 4);
|
||||
basePath = basePath.substr(0, basePath.size() - 4);
|
||||
}
|
||||
for (uint32_t gi = 0; gi < wmo->nGroups; gi++) {
|
||||
char suffix[16];
|
||||
snprintf(suffix, sizeof(suffix), "_%03u%s", gi, ext.c_str());
|
||||
auto groupData = am->readFile(basePath + suffix);
|
||||
if (groupData.empty()) {
|
||||
snprintf(suffix, sizeof(suffix), "_%03u.wmo", gi);
|
||||
groupData = am->readFile(basePath + suffix);
|
||||
}
|
||||
if (!groupData.empty()) {
|
||||
pipeline::WMOLoader::loadGroup(groupData, *wmo, gi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pre-decode WMO textures on background thread
|
||||
for (const auto& texPath : wmo->textures) {
|
||||
if (texPath.empty()) continue;
|
||||
std::string texKey = texPath;
|
||||
size_t nul = texKey.find('\0');
|
||||
if (nul != std::string::npos) texKey.resize(nul);
|
||||
std::replace(texKey.begin(), texKey.end(), '/', '\\');
|
||||
std::transform(texKey.begin(), texKey.end(), texKey.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
if (texKey.empty()) continue;
|
||||
// Convert to .blp extension
|
||||
if (texKey.size() >= 4) {
|
||||
std::string ext = texKey.substr(texKey.size() - 4);
|
||||
if (ext == ".tga" || ext == ".dds") {
|
||||
texKey = texKey.substr(0, texKey.size() - 4) + ".blp";
|
||||
}
|
||||
}
|
||||
if (result.predecodedTextures.find(texKey) != result.predecodedTextures.end()) continue;
|
||||
auto blp = am->loadTexture(texKey);
|
||||
if (blp.isValid()) {
|
||||
result.predecodedTextures[texKey] = std::move(blp);
|
||||
}
|
||||
}
|
||||
|
||||
result.wmoModel = wmo;
|
||||
result.valid = true;
|
||||
return result;
|
||||
});
|
||||
asyncGameObjectLoads_.push_back(std::move(load));
|
||||
pendingGameObjectSpawns_.erase(pendingGameObjectSpawns_.begin());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cached WMO or M2 — spawn synchronously (cheap)
|
||||
spawnOnlineGameObject(s.guid, s.entry, s.displayId, s.x, s.y, s.z, s.orientation);
|
||||
pendingGameObjectSpawns_.erase(pendingGameObjectSpawns_.begin());
|
||||
}
|
||||
}
|
||||
|
||||
void Application::processPendingTransportDoodads() {
|
||||
|
|
|
|||
|
|
@ -625,7 +625,18 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) {
|
|||
return whiteTexture_.get();
|
||||
}
|
||||
|
||||
auto blpImage = assetManager->loadTexture(key);
|
||||
// Check pre-decoded BLP cache first (populated by background threads)
|
||||
pipeline::BLPImage blpImage;
|
||||
if (predecodedBLPCache_) {
|
||||
auto pit = predecodedBLPCache_->find(key);
|
||||
if (pit != predecodedBLPCache_->end()) {
|
||||
blpImage = std::move(pit->second);
|
||||
predecodedBLPCache_->erase(pit);
|
||||
}
|
||||
}
|
||||
if (!blpImage.isValid()) {
|
||||
blpImage = assetManager->loadTexture(key);
|
||||
}
|
||||
if (!blpImage.isValid()) {
|
||||
// Return white fallback but don't cache the failure — allow retry
|
||||
// on next character load in case the asset becomes available.
|
||||
|
|
@ -1412,8 +1423,9 @@ uint32_t CharacterRenderer::createInstance(uint32_t modelId, const glm::vec3& po
|
|||
instance.scale = scale;
|
||||
|
||||
// Initialize bone matrices to identity
|
||||
auto& model = models[modelId].data;
|
||||
instance.boneMatrices.resize(std::max(static_cast<size_t>(1), model.bones.size()), glm::mat4(1.0f));
|
||||
auto& gpuRef = models[modelId];
|
||||
instance.boneMatrices.resize(std::max(static_cast<size_t>(1), gpuRef.data.bones.size()), glm::mat4(1.0f));
|
||||
instance.cachedModel = &gpuRef;
|
||||
|
||||
uint32_t id = instance.id;
|
||||
instances[id] = std::move(instance);
|
||||
|
|
@ -1511,13 +1523,12 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
|
|||
if (distSq >= animUpdateRadiusSq) continue;
|
||||
|
||||
// Always advance animation time (cheap)
|
||||
auto modelIt = models.find(inst.modelId);
|
||||
if (modelIt != models.end() && !modelIt->second.data.sequences.empty()) {
|
||||
if (inst.cachedModel && !inst.cachedModel->data.sequences.empty()) {
|
||||
if (inst.currentSequenceIndex < 0) {
|
||||
inst.currentSequenceIndex = 0;
|
||||
inst.currentAnimationId = modelIt->second.data.sequences[0].id;
|
||||
inst.currentAnimationId = inst.cachedModel->data.sequences[0].id;
|
||||
}
|
||||
const auto& seq = modelIt->second.data.sequences[inst.currentSequenceIndex];
|
||||
const auto& seq = inst.cachedModel->data.sequences[inst.currentSequenceIndex];
|
||||
inst.animationTime += deltaTime * 1000.0f;
|
||||
if (seq.duration > 0 && inst.animationTime >= static_cast<float>(seq.duration)) {
|
||||
if (inst.animationLoop) {
|
||||
|
|
@ -1528,10 +1539,11 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
|
|||
}
|
||||
}
|
||||
|
||||
// Distance-tiered bone throttling: near=every frame, mid=every 3rd, far=every 6th
|
||||
// Distance-tiered bone throttling: near=every frame, mid=every 4th, far=every 8th
|
||||
uint32_t boneInterval = 1;
|
||||
if (distSq > 60.0f * 60.0f) boneInterval = 6;
|
||||
else if (distSq > 30.0f * 30.0f) boneInterval = 3;
|
||||
if (distSq > 40.0f * 40.0f) boneInterval = 8;
|
||||
else if (distSq > 20.0f * 20.0f) boneInterval = 4;
|
||||
else if (distSq > 10.0f * 10.0f) boneInterval = 2;
|
||||
|
||||
inst.boneUpdateCounter++;
|
||||
bool needsBones = (inst.boneUpdateCounter >= boneInterval) || inst.boneMatrices.empty();
|
||||
|
|
@ -1615,11 +1627,8 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
|
|||
}
|
||||
|
||||
void CharacterRenderer::updateAnimation(CharacterInstance& instance, float deltaTime) {
|
||||
auto modelIt = models.find(instance.modelId);
|
||||
if (modelIt == models.end()) {
|
||||
return;
|
||||
}
|
||||
const auto& model = modelIt->second.data;
|
||||
if (!instance.cachedModel) return;
|
||||
const auto& model = instance.cachedModel->data;
|
||||
|
||||
if (model.sequences.empty()) {
|
||||
return;
|
||||
|
|
@ -1732,7 +1741,8 @@ glm::quat CharacterRenderer::interpolateQuat(const pipeline::M2AnimationTrack& t
|
|||
// --- Bone transform calculation ---
|
||||
|
||||
void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) {
|
||||
auto& model = models[instance.modelId].data;
|
||||
if (!instance.cachedModel) return;
|
||||
auto& model = instance.cachedModel->data;
|
||||
|
||||
if (model.bones.empty()) {
|
||||
return;
|
||||
|
|
@ -1833,9 +1843,8 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet,
|
|||
}
|
||||
}
|
||||
|
||||
auto modelIt = models.find(instance.modelId);
|
||||
if (modelIt == models.end()) continue;
|
||||
const auto& gpuModel = modelIt->second;
|
||||
if (!instance.cachedModel) continue;
|
||||
const auto& gpuModel = *instance.cachedModel;
|
||||
|
||||
// Skip models without GPU buffers
|
||||
if (!gpuModel.vertexBuffer) continue;
|
||||
|
|
@ -2487,9 +2496,8 @@ void CharacterRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& light
|
|||
glm::vec3 diff = inst.position - shadowCenter;
|
||||
if (glm::dot(diff, diff) > shadowRadiusSq) continue;
|
||||
|
||||
auto modelIt = models.find(inst.modelId);
|
||||
if (modelIt == models.end()) continue;
|
||||
const M2ModelGPU& gpuModel = modelIt->second;
|
||||
if (!inst.cachedModel) continue;
|
||||
const M2ModelGPU& gpuModel = *inst.cachedModel;
|
||||
if (!gpuModel.vertexBuffer) continue;
|
||||
|
||||
glm::mat4 modelMat = inst.hasOverrideModelMatrix
|
||||
|
|
|
|||
|
|
@ -1657,6 +1657,7 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
|
|||
instance.cachedIsInvisibleTrap = mdlRef.isInvisibleTrap;
|
||||
instance.cachedIsInstancePortal = mdlRef.isInstancePortal;
|
||||
instance.cachedIsValid = mdlRef.isValid();
|
||||
instance.cachedModel = &mdlRef;
|
||||
|
||||
// Initialize animation: play first sequence (usually Stand/Idle)
|
||||
const auto& mdl = mdlRef;
|
||||
|
|
@ -1748,6 +1749,7 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
|
|||
instance.cachedIsGroundDetail = mdl2.isGroundDetail;
|
||||
instance.cachedIsInvisibleTrap = mdl2.isInvisibleTrap;
|
||||
instance.cachedIsValid = mdl2.isValid();
|
||||
instance.cachedModel = &mdl2;
|
||||
|
||||
// Initialize animation
|
||||
if (mdl2.hasAnimation && !mdl2.disableAnimation && !mdl2.sequences.empty()) {
|
||||
|
|
@ -2026,9 +2028,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
|
|||
instance.animTime += dtMs * (instance.animSpeed - 1.0f);
|
||||
|
||||
// For animation looping/variation, we need the actual model data.
|
||||
auto it = models.find(instance.modelId);
|
||||
if (it == models.end()) continue;
|
||||
const M2ModelGPU& model = it->second;
|
||||
if (!instance.cachedModel) continue;
|
||||
const M2ModelGPU& model = *instance.cachedModel;
|
||||
|
||||
// Validate sequence index
|
||||
if (instance.currentSequenceIndex < 0 ||
|
||||
|
|
@ -2084,6 +2085,14 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
|
|||
float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f);
|
||||
if (cullRadius > 0.0f && !updateFrustum.intersectsSphere(instance.position, paddedRadius)) continue;
|
||||
|
||||
// Distance-based frame skipping: update distant bones less frequently
|
||||
uint32_t boneInterval = 1;
|
||||
if (distSq > 200.0f * 200.0f) boneInterval = 8;
|
||||
else if (distSq > 100.0f * 100.0f) boneInterval = 4;
|
||||
else if (distSq > 50.0f * 50.0f) boneInterval = 2;
|
||||
instance.frameSkipCounter++;
|
||||
if ((instance.frameSkipCounter % boneInterval) != 0) continue;
|
||||
|
||||
boneWorkIndices_.push_back(idx);
|
||||
}
|
||||
|
||||
|
|
@ -2097,9 +2106,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
|
|||
for (size_t i : boneWorkIndices_) {
|
||||
if (i >= instances.size()) continue;
|
||||
auto& inst = instances[i];
|
||||
auto mdlIt = models.find(inst.modelId);
|
||||
if (mdlIt == models.end()) continue;
|
||||
computeBoneMatrices(mdlIt->second, inst);
|
||||
if (!inst.cachedModel) continue;
|
||||
computeBoneMatrices(*inst.cachedModel, inst);
|
||||
}
|
||||
} else {
|
||||
// Parallel — dispatch across worker threads
|
||||
|
|
@ -2112,9 +2120,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
|
|||
for (size_t i : boneWorkIndices_) {
|
||||
if (i >= instances.size()) continue;
|
||||
auto& inst = instances[i];
|
||||
auto mdlIt = models.find(inst.modelId);
|
||||
if (mdlIt == models.end()) continue;
|
||||
computeBoneMatrices(mdlIt->second, inst);
|
||||
if (!inst.cachedModel) continue;
|
||||
computeBoneMatrices(*inst.cachedModel, inst);
|
||||
}
|
||||
} else {
|
||||
const size_t chunkSize = animCount / numThreads;
|
||||
|
|
@ -2135,9 +2142,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
|
|||
size_t idx = boneWorkIndices_[j];
|
||||
if (idx >= instances.size()) continue;
|
||||
auto& inst = instances[idx];
|
||||
auto mdlIt = models.find(inst.modelId);
|
||||
if (mdlIt == models.end()) continue;
|
||||
computeBoneMatrices(mdlIt->second, inst);
|
||||
if (!inst.cachedModel) continue;
|
||||
computeBoneMatrices(*inst.cachedModel, inst);
|
||||
}
|
||||
}));
|
||||
start = end;
|
||||
|
|
@ -2159,9 +2165,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
|
|||
glm::vec3 toCam = instance.position - cachedCamPos_;
|
||||
float distSq = glm::dot(toCam, toCam);
|
||||
if (distSq > cachedMaxRenderDistSq_) continue;
|
||||
auto mdlIt = models.find(instance.modelId);
|
||||
if (mdlIt == models.end()) continue;
|
||||
emitParticles(instance, mdlIt->second, deltaTime);
|
||||
if (!instance.cachedModel) continue;
|
||||
emitParticles(instance, *instance.cachedModel, deltaTime);
|
||||
updateParticles(instance, deltaTime);
|
||||
}
|
||||
|
||||
|
|
@ -2865,9 +2870,8 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa
|
|||
glm::vec3 diff = instance.position - shadowCenter;
|
||||
if (glm::dot(diff, diff) > shadowRadiusSq) continue;
|
||||
|
||||
auto modelIt = models.find(instance.modelId);
|
||||
if (modelIt == models.end()) continue;
|
||||
const M2ModelGPU& model = modelIt->second;
|
||||
if (!instance.cachedModel) continue;
|
||||
const M2ModelGPU& model = *instance.cachedModel;
|
||||
|
||||
// Filter: only draw foliage models in foliage pass, non-foliage in non-foliage pass
|
||||
if (model.shadowWindFoliage != foliagePass) continue;
|
||||
|
|
@ -2973,8 +2977,7 @@ std::vector<glm::vec3> M2Renderer::getWaterVegetationPositions(const glm::vec3&
|
|||
std::vector<glm::vec3> result;
|
||||
float maxDistSq = maxDist * maxDist;
|
||||
for (const auto& inst : instances) {
|
||||
auto it = models.find(inst.modelId);
|
||||
if (it == models.end() || !it->second.isWaterVegetation) continue;
|
||||
if (!inst.cachedModel || !inst.cachedModel->isWaterVegetation) continue;
|
||||
glm::vec3 diff = inst.position - camPos;
|
||||
if (glm::dot(diff, diff) <= maxDistSq) {
|
||||
result.push_back(inst.position);
|
||||
|
|
@ -3085,9 +3088,8 @@ void M2Renderer::emitParticles(M2Instance& inst, const M2ModelGPU& gpu, float dt
|
|||
}
|
||||
|
||||
void M2Renderer::updateParticles(M2Instance& inst, float dt) {
|
||||
auto it = models.find(inst.modelId);
|
||||
if (it == models.end()) return;
|
||||
const auto& gpu = it->second;
|
||||
if (!inst.cachedModel) return;
|
||||
const auto& gpu = *inst.cachedModel;
|
||||
|
||||
for (size_t i = 0; i < inst.particles.size(); ) {
|
||||
auto& p = inst.particles[i];
|
||||
|
|
@ -3162,9 +3164,8 @@ void M2Renderer::renderM2Particles(VkCommandBuffer cmd, VkDescriptorSet perFrame
|
|||
|
||||
for (auto& inst : instances) {
|
||||
if (inst.particles.empty()) continue;
|
||||
auto it = models.find(inst.modelId);
|
||||
if (it == models.end()) continue;
|
||||
const auto& gpu = it->second;
|
||||
if (!inst.cachedModel) continue;
|
||||
const auto& gpu = *inst.cachedModel;
|
||||
|
||||
for (const auto& p : inst.particles) {
|
||||
if (p.emitterIndex < 0 || p.emitterIndex >= static_cast<int>(gpu.particleEmitters.size())) continue;
|
||||
|
|
@ -3549,9 +3550,13 @@ void M2Renderer::rebuildSpatialIndex() {
|
|||
particleInstanceIndices_.clear();
|
||||
|
||||
for (size_t i = 0; i < instances.size(); i++) {
|
||||
const auto& inst = instances[i];
|
||||
auto& inst = instances[i];
|
||||
instanceIndexById[inst.id] = i;
|
||||
|
||||
// Re-cache model pointer (may have changed after model map modifications)
|
||||
auto mdlIt = models.find(inst.modelId);
|
||||
inst.cachedModel = (mdlIt != models.end()) ? &mdlIt->second : nullptr;
|
||||
|
||||
// Rebuild dedup map (skip ground detail)
|
||||
if (!inst.cachedIsGroundDetail) {
|
||||
DedupKey dk{inst.modelId,
|
||||
|
|
@ -3684,8 +3689,18 @@ VkTexture* M2Renderer::loadTexture(const std::string& path, uint32_t texFlags) {
|
|||
containsToken(key, "campfire") ||
|
||||
containsToken(key, "bonfire");
|
||||
|
||||
// Load BLP texture
|
||||
pipeline::BLPImage blp = assetManager->loadTexture(key);
|
||||
// Check pre-decoded BLP cache first (populated by background worker threads)
|
||||
pipeline::BLPImage blp;
|
||||
if (predecodedBLPCache_) {
|
||||
auto pit = predecodedBLPCache_->find(key);
|
||||
if (pit != predecodedBLPCache_->end()) {
|
||||
blp = std::move(pit->second);
|
||||
predecodedBLPCache_->erase(pit);
|
||||
}
|
||||
}
|
||||
if (!blp.isValid()) {
|
||||
blp = assetManager->loadTexture(key);
|
||||
}
|
||||
if (!blp.isValid()) {
|
||||
// Return white fallback but don't cache the failure — MPQ reads can
|
||||
// fail transiently during streaming; allow retry on next model load.
|
||||
|
|
@ -3751,9 +3766,8 @@ VkTexture* M2Renderer::loadTexture(const std::string& path, uint32_t texFlags) {
|
|||
uint32_t M2Renderer::getTotalTriangleCount() const {
|
||||
uint32_t total = 0;
|
||||
for (const auto& instance : instances) {
|
||||
auto it = models.find(instance.modelId);
|
||||
if (it != models.end()) {
|
||||
total += it->second.indexCount / 3;
|
||||
if (instance.cachedModel) {
|
||||
total += instance.cachedModel->indexCount / 3;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
|
|
@ -3775,11 +3789,10 @@ std::optional<float> M2Renderer::getFloorHeight(float glX, float glY, float glZ,
|
|||
continue;
|
||||
}
|
||||
|
||||
auto it = models.find(instance.modelId);
|
||||
if (it == models.end()) continue;
|
||||
if (!instance.cachedModel) continue;
|
||||
if (instance.scale <= 0.001f) continue;
|
||||
|
||||
const M2ModelGPU& model = it->second;
|
||||
const M2ModelGPU& model = *instance.cachedModel;
|
||||
if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue;
|
||||
if (instance.skipCollision) continue;
|
||||
|
||||
|
|
@ -3931,10 +3944,9 @@ bool M2Renderer::checkCollision(const glm::vec3& from, const glm::vec3& to,
|
|||
if (from.z > instance.worldBoundsMax.z + 2.5f && adjustedPos.z > instance.worldBoundsMax.z + 2.5f) continue;
|
||||
if (from.z + 2.5f < instance.worldBoundsMin.z && adjustedPos.z + 2.5f < instance.worldBoundsMin.z) continue;
|
||||
|
||||
auto it = models.find(instance.modelId);
|
||||
if (it == models.end()) continue;
|
||||
if (!instance.cachedModel) continue;
|
||||
|
||||
const M2ModelGPU& model = it->second;
|
||||
const M2ModelGPU& model = *instance.cachedModel;
|
||||
if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue;
|
||||
if (instance.skipCollision) continue;
|
||||
if (instance.scale <= 0.001f) continue;
|
||||
|
|
@ -4172,10 +4184,9 @@ float M2Renderer::raycastBoundingBoxes(const glm::vec3& origin, const glm::vec3&
|
|||
continue;
|
||||
}
|
||||
|
||||
auto it = models.find(instance.modelId);
|
||||
if (it == models.end()) continue;
|
||||
if (!instance.cachedModel) continue;
|
||||
|
||||
const M2ModelGPU& model = it->second;
|
||||
const M2ModelGPU& model = *instance.cachedModel;
|
||||
if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue;
|
||||
glm::vec3 localMin, localMax;
|
||||
getTightCollisionBounds(model, localMin, localMax);
|
||||
|
|
|
|||
|
|
@ -2434,6 +2434,9 @@ void Renderer::update(float deltaTime) {
|
|||
cameraController->update(deltaTime);
|
||||
auto cameraEnd = std::chrono::steady_clock::now();
|
||||
lastCameraUpdateMs = std::chrono::duration<double, std::milli>(cameraEnd - cameraStart).count();
|
||||
if (lastCameraUpdateMs > 3.0) {
|
||||
LOG_WARNING("SLOW cameraController->update: ", lastCameraUpdateMs, "ms");
|
||||
}
|
||||
|
||||
// Update 3D audio listener position/orientation to match camera
|
||||
if (camera) {
|
||||
|
|
@ -2779,8 +2782,15 @@ void Renderer::update(float deltaTime) {
|
|||
|
||||
// Update M2 doodad animations (pass camera for frustum-culling bone computation)
|
||||
if (m2Renderer && camera) {
|
||||
auto m2Start = std::chrono::steady_clock::now();
|
||||
m2Renderer->update(deltaTime, camera->getPosition(),
|
||||
camera->getProjectionMatrix() * camera->getViewMatrix());
|
||||
float m2Ms = std::chrono::duration<float, std::milli>(
|
||||
std::chrono::steady_clock::now() - m2Start).count();
|
||||
if (m2Ms > 3.0f) {
|
||||
LOG_WARNING("SLOW m2Renderer->update: ", m2Ms, "ms (",
|
||||
m2Renderer->getInstanceCount(), " instances)");
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: play zone music, dispatching local files (file: prefix) vs MPQ paths
|
||||
|
|
|
|||
|
|
@ -231,9 +231,14 @@ bool TerrainManager::loadTile(int x, int y) {
|
|||
return false;
|
||||
}
|
||||
|
||||
VkContext* vkCtx = terrainRenderer ? terrainRenderer->getVkContext() : nullptr;
|
||||
if (vkCtx) vkCtx->beginUploadBatch();
|
||||
|
||||
FinalizingTile ft;
|
||||
ft.pending = std::move(pending);
|
||||
while (!advanceFinalization(ft)) {}
|
||||
|
||||
if (vkCtx) vkCtx->endUploadBatchSync(); // Sync — caller expects tile ready
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -407,6 +412,20 @@ std::shared_ptr<PendingTile> TerrainManager::prepareTile(int x, int y) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Pre-decode M2 model textures on background thread
|
||||
for (const auto& tex : m2Model.textures) {
|
||||
if (tex.filename.empty()) continue;
|
||||
std::string texKey = tex.filename;
|
||||
std::replace(texKey.begin(), texKey.end(), '/', '\\');
|
||||
std::transform(texKey.begin(), texKey.end(), texKey.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
if (pending->preloadedM2Textures.find(texKey) != pending->preloadedM2Textures.end()) continue;
|
||||
auto blp = assetManager->loadTexture(texKey);
|
||||
if (blp.isValid()) {
|
||||
pending->preloadedM2Textures[texKey] = std::move(blp);
|
||||
}
|
||||
}
|
||||
|
||||
PendingTile::M2Ready ready;
|
||||
ready.modelId = modelId;
|
||||
ready.model = std::move(m2Model);
|
||||
|
|
@ -584,6 +603,20 @@ std::shared_ptr<PendingTile> TerrainManager::prepareTile(int x, int y) {
|
|||
pipeline::M2Loader::loadSkin(skinData, m2Model);
|
||||
}
|
||||
if (!m2Model.isValid()) continue;
|
||||
|
||||
// Pre-decode doodad M2 textures on background thread
|
||||
for (const auto& tex : m2Model.textures) {
|
||||
if (tex.filename.empty()) continue;
|
||||
std::string texKey = tex.filename;
|
||||
std::replace(texKey.begin(), texKey.end(), '/', '\\');
|
||||
std::transform(texKey.begin(), texKey.end(), texKey.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
if (pending->preloadedM2Textures.find(texKey) != pending->preloadedM2Textures.end()) continue;
|
||||
auto blp = assetManager->loadTexture(texKey);
|
||||
if (blp.isValid()) {
|
||||
pending->preloadedM2Textures[texKey] = std::move(blp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build doodad's local transform (WoW coordinates)
|
||||
|
|
@ -654,6 +687,32 @@ std::shared_ptr<PendingTile> TerrainManager::prepareTile(int x, int y) {
|
|||
}
|
||||
}
|
||||
|
||||
// Pre-decode WMO textures on background thread
|
||||
for (const auto& texPath : wmoModel.textures) {
|
||||
if (texPath.empty()) continue;
|
||||
std::string texKey = texPath;
|
||||
// Truncate at NUL (WMO paths can have stray bytes)
|
||||
size_t nul = texKey.find('\0');
|
||||
if (nul != std::string::npos) texKey.resize(nul);
|
||||
std::replace(texKey.begin(), texKey.end(), '/', '\\');
|
||||
std::transform(texKey.begin(), texKey.end(), texKey.begin(),
|
||||
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
||||
if (texKey.empty()) continue;
|
||||
if (pending->preloadedWMOTextures.find(texKey) != pending->preloadedWMOTextures.end()) continue;
|
||||
// Try .blp variant
|
||||
std::string blpKey = texKey;
|
||||
if (blpKey.size() >= 4) {
|
||||
std::string ext = blpKey.substr(blpKey.size() - 4);
|
||||
if (ext == ".tga" || ext == ".dds") {
|
||||
blpKey = blpKey.substr(0, blpKey.size() - 4) + ".blp";
|
||||
}
|
||||
}
|
||||
auto blp = assetManager->loadTexture(blpKey);
|
||||
if (blp.isValid()) {
|
||||
pending->preloadedWMOTextures[blpKey] = std::move(blp);
|
||||
}
|
||||
}
|
||||
|
||||
PendingTile::WMOReady ready;
|
||||
// Cache WMO model uploads by path; placement dedup uses uniqueId separately.
|
||||
ready.modelId = static_cast<uint32_t>(std::hash<std::string>{}(wmoPath));
|
||||
|
|
@ -741,7 +800,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
|
|||
}
|
||||
bool allDone = terrainRenderer->loadTerrainIncremental(
|
||||
pending->mesh, pending->terrain.textures, x, y,
|
||||
ft.terrainChunkNext, 64);
|
||||
ft.terrainChunkNext, 32);
|
||||
if (!allDone) {
|
||||
return false; // More chunks remain — yield to time budget
|
||||
}
|
||||
|
|
@ -773,7 +832,9 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
|
|||
case FinalizationPhase::M2_MODELS: {
|
||||
// Upload multiple M2 models per call (batched GPU uploads)
|
||||
if (m2Renderer && ft.m2ModelIndex < pending->m2Models.size()) {
|
||||
constexpr size_t kModelsPerStep = 8;
|
||||
// Set pre-decoded BLP cache so loadTexture() skips main-thread BLP decode
|
||||
m2Renderer->setPredecodedBLPCache(&pending->preloadedM2Textures);
|
||||
constexpr size_t kModelsPerStep = 4;
|
||||
size_t uploaded = 0;
|
||||
while (ft.m2ModelIndex < pending->m2Models.size() && uploaded < kModelsPerStep) {
|
||||
auto& m2Ready = pending->m2Models[ft.m2ModelIndex];
|
||||
|
|
@ -786,6 +847,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
|
|||
ft.m2ModelIndex++;
|
||||
uploaded++;
|
||||
}
|
||||
m2Renderer->setPredecodedBLPCache(nullptr);
|
||||
// Stay in this phase until all models uploaded
|
||||
if (ft.m2ModelIndex < pending->m2Models.size()) {
|
||||
return false;
|
||||
|
|
@ -830,8 +892,11 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
|
|||
// Upload multiple WMO models per call (batched GPU uploads)
|
||||
if (wmoRenderer && assetManager) {
|
||||
wmoRenderer->initialize(nullptr, VK_NULL_HANDLE, assetManager);
|
||||
// Set pre-decoded BLP cache and defer normal maps during streaming
|
||||
wmoRenderer->setPredecodedBLPCache(&pending->preloadedWMOTextures);
|
||||
wmoRenderer->setDeferNormalMaps(true);
|
||||
|
||||
constexpr size_t kWmosPerStep = 4;
|
||||
constexpr size_t kWmosPerStep = 1;
|
||||
size_t uploaded = 0;
|
||||
while (ft.wmoModelIndex < pending->wmoModels.size() && uploaded < kWmosPerStep) {
|
||||
auto& wmoReady = pending->wmoModels[ft.wmoModelIndex];
|
||||
|
|
@ -843,6 +908,8 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
|
|||
uploaded++;
|
||||
}
|
||||
}
|
||||
wmoRenderer->setDeferNormalMaps(false);
|
||||
wmoRenderer->setPredecodedBLPCache(nullptr);
|
||||
if (ft.wmoModelIndex < pending->wmoModels.size()) return false;
|
||||
}
|
||||
ft.phase = FinalizationPhase::WMO_INSTANCES;
|
||||
|
|
@ -906,7 +973,9 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
|
|||
case FinalizationPhase::WMO_DOODADS: {
|
||||
// Upload multiple WMO doodad M2s per call (batched GPU uploads)
|
||||
if (m2Renderer && ft.wmoDoodadIndex < pending->wmoDoodads.size()) {
|
||||
constexpr size_t kDoodadsPerStep = 16;
|
||||
// Set pre-decoded BLP cache for doodad M2 textures
|
||||
m2Renderer->setPredecodedBLPCache(&pending->preloadedM2Textures);
|
||||
constexpr size_t kDoodadsPerStep = 4;
|
||||
size_t uploaded = 0;
|
||||
while (ft.wmoDoodadIndex < pending->wmoDoodads.size() && uploaded < kDoodadsPerStep) {
|
||||
auto& doodad = pending->wmoDoodads[ft.wmoDoodadIndex];
|
||||
|
|
@ -923,6 +992,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
|
|||
ft.wmoDoodadIndex++;
|
||||
uploaded++;
|
||||
}
|
||||
m2Renderer->setPredecodedBLPCache(nullptr);
|
||||
if (ft.wmoDoodadIndex < pending->wmoDoodads.size()) return false;
|
||||
}
|
||||
ft.phase = FinalizationPhase::WATER;
|
||||
|
|
@ -1080,11 +1150,6 @@ void TerrainManager::workerLoop() {
|
|||
}
|
||||
|
||||
void TerrainManager::processReadyTiles() {
|
||||
// Process tiles with time budget to avoid frame spikes
|
||||
// Taxi mode gets a slightly larger budget to avoid visible late-pop terrain/models.
|
||||
const float timeBudgetMs = taxiStreamingMode_ ? 8.0f : 3.0f;
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Move newly ready tiles into the finalizing deque.
|
||||
// Keep them in pendingTiles so streamTiles() won't re-enqueue them.
|
||||
{
|
||||
|
|
@ -1100,28 +1165,32 @@ void TerrainManager::processReadyTiles() {
|
|||
}
|
||||
}
|
||||
|
||||
// Outer upload batch: all GPU uploads across all advanceFinalization calls
|
||||
// this frame share a single command buffer submission + fence wait.
|
||||
VkContext* vkCtx = terrainRenderer ? terrainRenderer->getVkContext() : nullptr;
|
||||
|
||||
// Reclaim completed async uploads from previous frames (non-blocking)
|
||||
if (vkCtx) vkCtx->pollUploadBatches();
|
||||
|
||||
// Nothing to finalize — done.
|
||||
if (finalizingTiles_.empty()) return;
|
||||
|
||||
// Async upload batch: record GPU copies into a command buffer, submit with
|
||||
// a fence, but DON'T wait. The fence is polled on subsequent frames.
|
||||
// This eliminates the main-thread stall from vkWaitForFences entirely.
|
||||
const int maxSteps = taxiStreamingMode_ ? 8 : 2;
|
||||
int steps = 0;
|
||||
|
||||
if (vkCtx) vkCtx->beginUploadBatch();
|
||||
|
||||
// Drive incremental finalization within time budget
|
||||
while (!finalizingTiles_.empty()) {
|
||||
while (!finalizingTiles_.empty() && steps < maxSteps) {
|
||||
auto& ft = finalizingTiles_.front();
|
||||
bool done = advanceFinalization(ft);
|
||||
|
||||
if (done) {
|
||||
finalizingTiles_.pop_front();
|
||||
}
|
||||
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
float elapsedMs = std::chrono::duration<float, std::milli>(now - startTime).count();
|
||||
if (elapsedMs >= timeBudgetMs) {
|
||||
break;
|
||||
}
|
||||
steps++;
|
||||
}
|
||||
|
||||
if (vkCtx) vkCtx->endUploadBatch();
|
||||
if (vkCtx) vkCtx->endUploadBatch(); // Async — submits but doesn't wait
|
||||
}
|
||||
|
||||
void TerrainManager::processAllReadyTiles() {
|
||||
|
|
@ -1151,7 +1220,7 @@ void TerrainManager::processAllReadyTiles() {
|
|||
finalizingTiles_.pop_front();
|
||||
}
|
||||
|
||||
if (vkCtx) vkCtx->endUploadBatch();
|
||||
if (vkCtx) vkCtx->endUploadBatchSync(); // Sync — load screen needs data ready
|
||||
}
|
||||
|
||||
void TerrainManager::processOneReadyTile() {
|
||||
|
|
@ -1177,7 +1246,7 @@ void TerrainManager::processOneReadyTile() {
|
|||
while (!advanceFinalization(ft)) {}
|
||||
finalizingTiles_.pop_front();
|
||||
|
||||
if (vkCtx) vkCtx->endUploadBatch();
|
||||
if (vkCtx) vkCtx->endUploadBatchSync(); // Sync — load screen needs data ready
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -67,6 +67,14 @@ void VkContext::shutdown() {
|
|||
frame = {};
|
||||
}
|
||||
|
||||
// Clean up any in-flight async upload batches (device already idle)
|
||||
for (auto& batch : inFlightBatches_) {
|
||||
// Staging buffers: skip destroy — allocator is about to be torn down
|
||||
vkDestroyFence(device, batch.fence, nullptr);
|
||||
// Command buffer freed when pool is destroyed below
|
||||
}
|
||||
inFlightBatches_.clear();
|
||||
|
||||
if (immFence) { vkDestroyFence(device, immFence, nullptr); immFence = VK_NULL_HANDLE; }
|
||||
if (immCommandPool) { vkDestroyCommandPool(device, immCommandPool, nullptr); immCommandPool = VK_NULL_HANDLE; }
|
||||
|
||||
|
|
@ -1447,17 +1455,94 @@ void VkContext::endUploadBatch() {
|
|||
|
||||
inUploadBatch_ = false;
|
||||
|
||||
// Submit all recorded commands with a single fence wait
|
||||
if (batchStagingBuffers_.empty()) {
|
||||
// No GPU copies were recorded — skip the submit entirely.
|
||||
vkEndCommandBuffer(batchCmd_);
|
||||
vkFreeCommandBuffers(device, immCommandPool, 1, &batchCmd_);
|
||||
batchCmd_ = VK_NULL_HANDLE;
|
||||
return;
|
||||
}
|
||||
|
||||
// Submit commands with a NEW fence — don't wait, let GPU work in parallel.
|
||||
vkEndCommandBuffer(batchCmd_);
|
||||
|
||||
VkFenceCreateInfo fenceInfo{};
|
||||
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
VkFence fence = VK_NULL_HANDLE;
|
||||
vkCreateFence(device, &fenceInfo, nullptr, &fence);
|
||||
|
||||
VkSubmitInfo submitInfo{};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &batchCmd_;
|
||||
vkQueueSubmit(graphicsQueue, 1, &submitInfo, fence);
|
||||
|
||||
// Stash everything for later cleanup when fence signals
|
||||
InFlightBatch batch;
|
||||
batch.fence = fence;
|
||||
batch.cmd = batchCmd_;
|
||||
batch.stagingBuffers = std::move(batchStagingBuffers_);
|
||||
inFlightBatches_.push_back(std::move(batch));
|
||||
|
||||
batchCmd_ = VK_NULL_HANDLE;
|
||||
batchStagingBuffers_.clear();
|
||||
}
|
||||
|
||||
void VkContext::endUploadBatchSync() {
|
||||
if (uploadBatchDepth_ <= 0) return;
|
||||
uploadBatchDepth_--;
|
||||
if (uploadBatchDepth_ > 0) return;
|
||||
|
||||
inUploadBatch_ = false;
|
||||
|
||||
if (batchStagingBuffers_.empty()) {
|
||||
vkEndCommandBuffer(batchCmd_);
|
||||
vkFreeCommandBuffers(device, immCommandPool, 1, &batchCmd_);
|
||||
batchCmd_ = VK_NULL_HANDLE;
|
||||
return;
|
||||
}
|
||||
|
||||
// Synchronous path for load screens — submit and wait
|
||||
endSingleTimeCommands(batchCmd_);
|
||||
batchCmd_ = VK_NULL_HANDLE;
|
||||
|
||||
// Destroy all deferred staging buffers
|
||||
for (auto& staging : batchStagingBuffers_) {
|
||||
destroyBuffer(allocator, staging);
|
||||
}
|
||||
batchStagingBuffers_.clear();
|
||||
}
|
||||
|
||||
void VkContext::pollUploadBatches() {
|
||||
if (inFlightBatches_.empty()) return;
|
||||
|
||||
for (auto it = inFlightBatches_.begin(); it != inFlightBatches_.end(); ) {
|
||||
VkResult result = vkGetFenceStatus(device, it->fence);
|
||||
if (result == VK_SUCCESS) {
|
||||
// GPU finished — free resources
|
||||
for (auto& staging : it->stagingBuffers) {
|
||||
destroyBuffer(allocator, staging);
|
||||
}
|
||||
vkFreeCommandBuffers(device, immCommandPool, 1, &it->cmd);
|
||||
vkDestroyFence(device, it->fence, nullptr);
|
||||
it = inFlightBatches_.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VkContext::waitAllUploads() {
|
||||
for (auto& batch : inFlightBatches_) {
|
||||
vkWaitForFences(device, 1, &batch.fence, VK_TRUE, UINT64_MAX);
|
||||
for (auto& staging : batch.stagingBuffers) {
|
||||
destroyBuffer(allocator, staging);
|
||||
}
|
||||
vkFreeCommandBuffers(device, immCommandPool, 1, &batch.cmd);
|
||||
vkDestroyFence(device, batch.fence, nullptr);
|
||||
}
|
||||
inFlightBatches_.clear();
|
||||
}
|
||||
|
||||
void VkContext::deferStagingCleanup(AllocatedBuffer staging) {
|
||||
batchStagingBuffers_.push_back(staging);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2325,13 +2325,27 @@ VkTexture* WMORenderer::loadTexture(const std::string& path) {
|
|||
const auto& attemptedCandidates = uniqueCandidates;
|
||||
|
||||
// Try loading all candidates until one succeeds
|
||||
// Check pre-decoded BLP cache first (populated by background worker threads)
|
||||
pipeline::BLPImage blp;
|
||||
std::string resolvedKey;
|
||||
for (const auto& c : attemptedCandidates) {
|
||||
blp = assetManager->loadTexture(c);
|
||||
if (blp.isValid()) {
|
||||
resolvedKey = c;
|
||||
break;
|
||||
if (predecodedBLPCache_) {
|
||||
for (const auto& c : uniqueCandidates) {
|
||||
auto pit = predecodedBLPCache_->find(c);
|
||||
if (pit != predecodedBLPCache_->end()) {
|
||||
blp = std::move(pit->second);
|
||||
predecodedBLPCache_->erase(pit);
|
||||
resolvedKey = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!blp.isValid()) {
|
||||
for (const auto& c : attemptedCandidates) {
|
||||
blp = assetManager->loadTexture(c);
|
||||
if (blp.isValid()) {
|
||||
resolvedKey = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!blp.isValid()) {
|
||||
|
|
@ -2369,10 +2383,10 @@ VkTexture* WMORenderer::loadTexture(const std::string& path) {
|
|||
texture->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, VK_FILTER_LINEAR,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT);
|
||||
|
||||
// Generate normal+height map from diffuse pixels
|
||||
// Generate normal+height map from diffuse pixels (skip during streaming to avoid CPU stalls)
|
||||
float nhVariance = 0.0f;
|
||||
std::unique_ptr<VkTexture> nhMap;
|
||||
if (normalMappingEnabled_ || pomEnabled_) {
|
||||
if ((normalMappingEnabled_ || pomEnabled_) && !deferNormalMaps_) {
|
||||
nhMap = generateNormalHeightMap(blp.data.data(), blp.width, blp.height, nhVariance);
|
||||
if (nhMap) {
|
||||
approxBytes *= 2; // account for normal map in budget
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue