Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.
This commit is contained in:
Kelsi 2026-03-07 15:46:56 -08:00
parent 0313bd8692
commit 7ac990cff4
13 changed files with 573 additions and 109 deletions

View file

@ -625,7 +625,18 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) {
return whiteTexture_.get();
}
auto blpImage = assetManager->loadTexture(key);
// Check pre-decoded BLP cache first (populated by background threads)
pipeline::BLPImage blpImage;
if (predecodedBLPCache_) {
auto pit = predecodedBLPCache_->find(key);
if (pit != predecodedBLPCache_->end()) {
blpImage = std::move(pit->second);
predecodedBLPCache_->erase(pit);
}
}
if (!blpImage.isValid()) {
blpImage = assetManager->loadTexture(key);
}
if (!blpImage.isValid()) {
// Return white fallback but don't cache the failure — allow retry
// on next character load in case the asset becomes available.
@ -1412,8 +1423,9 @@ uint32_t CharacterRenderer::createInstance(uint32_t modelId, const glm::vec3& po
instance.scale = scale;
// Initialize bone matrices to identity
auto& model = models[modelId].data;
instance.boneMatrices.resize(std::max(static_cast<size_t>(1), model.bones.size()), glm::mat4(1.0f));
auto& gpuRef = models[modelId];
instance.boneMatrices.resize(std::max(static_cast<size_t>(1), gpuRef.data.bones.size()), glm::mat4(1.0f));
instance.cachedModel = &gpuRef;
uint32_t id = instance.id;
instances[id] = std::move(instance);
@ -1511,13 +1523,12 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
if (distSq >= animUpdateRadiusSq) continue;
// Always advance animation time (cheap)
auto modelIt = models.find(inst.modelId);
if (modelIt != models.end() && !modelIt->second.data.sequences.empty()) {
if (inst.cachedModel && !inst.cachedModel->data.sequences.empty()) {
if (inst.currentSequenceIndex < 0) {
inst.currentSequenceIndex = 0;
inst.currentAnimationId = modelIt->second.data.sequences[0].id;
inst.currentAnimationId = inst.cachedModel->data.sequences[0].id;
}
const auto& seq = modelIt->second.data.sequences[inst.currentSequenceIndex];
const auto& seq = inst.cachedModel->data.sequences[inst.currentSequenceIndex];
inst.animationTime += deltaTime * 1000.0f;
if (seq.duration > 0 && inst.animationTime >= static_cast<float>(seq.duration)) {
if (inst.animationLoop) {
@ -1528,10 +1539,11 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
}
}
// Distance-tiered bone throttling: near=every frame, mid=every 3rd, far=every 6th
// Distance-tiered bone throttling: near=every frame, mid=every 4th, far=every 8th
uint32_t boneInterval = 1;
if (distSq > 60.0f * 60.0f) boneInterval = 6;
else if (distSq > 30.0f * 30.0f) boneInterval = 3;
if (distSq > 40.0f * 40.0f) boneInterval = 8;
else if (distSq > 20.0f * 20.0f) boneInterval = 4;
else if (distSq > 10.0f * 10.0f) boneInterval = 2;
inst.boneUpdateCounter++;
bool needsBones = (inst.boneUpdateCounter >= boneInterval) || inst.boneMatrices.empty();
@ -1615,11 +1627,8 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
}
void CharacterRenderer::updateAnimation(CharacterInstance& instance, float deltaTime) {
auto modelIt = models.find(instance.modelId);
if (modelIt == models.end()) {
return;
}
const auto& model = modelIt->second.data;
if (!instance.cachedModel) return;
const auto& model = instance.cachedModel->data;
if (model.sequences.empty()) {
return;
@ -1732,7 +1741,8 @@ glm::quat CharacterRenderer::interpolateQuat(const pipeline::M2AnimationTrack& t
// --- Bone transform calculation ---
void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) {
auto& model = models[instance.modelId].data;
if (!instance.cachedModel) return;
auto& model = instance.cachedModel->data;
if (model.bones.empty()) {
return;
@ -1833,9 +1843,8 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet,
}
}
auto modelIt = models.find(instance.modelId);
if (modelIt == models.end()) continue;
const auto& gpuModel = modelIt->second;
if (!instance.cachedModel) continue;
const auto& gpuModel = *instance.cachedModel;
// Skip models without GPU buffers
if (!gpuModel.vertexBuffer) continue;
@ -2487,9 +2496,8 @@ void CharacterRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& light
glm::vec3 diff = inst.position - shadowCenter;
if (glm::dot(diff, diff) > shadowRadiusSq) continue;
auto modelIt = models.find(inst.modelId);
if (modelIt == models.end()) continue;
const M2ModelGPU& gpuModel = modelIt->second;
if (!inst.cachedModel) continue;
const M2ModelGPU& gpuModel = *inst.cachedModel;
if (!gpuModel.vertexBuffer) continue;
glm::mat4 modelMat = inst.hasOverrideModelMatrix