From 24f2ec75ec126c5598a545046f6a711ec68e765c Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 17:16:38 -0800 Subject: [PATCH] Defer normal map generation to reduce GPU model upload stalls by ~50% Each loadTexture call was generating a normal/height map inline (3 full-image passes: luminance + blur + Sobel). For models with 15-20 textures this added 30-40ms to the 70ms model upload. Now deferred to a per-frame budget (2/frame in-game, 10/frame during load screen). Models render without POM until their normal maps are ready. --- include/rendering/character_renderer.hpp | 13 +++++++ src/core/application.cpp | 13 +++++-- src/rendering/character_renderer.cpp | 45 ++++++++++++++++++++---- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/include/rendering/character_renderer.hpp b/include/rendering/character_renderer.hpp index c7cae0d7..83cb3e7f 100644 --- a/include/rendering/character_renderer.hpp +++ b/include/rendering/character_renderer.hpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace wowee { namespace pipeline { class AssetManager; } @@ -278,6 +279,7 @@ private: uint64_t lastUse = 0; bool hasAlpha = false; bool colorKeyBlack = false; + bool normalMapPending = false; // deferred normal map generation }; std::unordered_map textureCache; std::unordered_map textureHasAlphaByPtr_; @@ -302,6 +304,17 @@ private: std::unique_ptr generateNormalHeightMap( const uint8_t* pixels, uint32_t width, uint32_t height, float& outVariance); + // Deferred normal map generation — avoids stalling loadModel + struct PendingNormalMap { + std::string cacheKey; + std::vector pixels; // RGBA pixel data + uint32_t width, height; + }; + std::deque pendingNormalMaps_; +public: + void processPendingNormalMaps(int budget = 2); +private: + // Normal mapping / POM settings bool normalMappingEnabled_ = true; float normalMapStrength_ = 0.8f; diff --git a/src/core/application.cpp b/src/core/application.cpp index b003af53..1a239d8a 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -922,14 +922,20 @@ void Application::update(float deltaTime) { auto t3 = std::chrono::steady_clock::now(); processDeferredEquipmentQueue(); auto t4 = std::chrono::steady_clock::now(); + // Process deferred normal maps (2 per frame to spread CPU cost) + if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) { + cr->processPendingNormalMaps(2); + } + auto t5 = std::chrono::steady_clock::now(); float pMs = std::chrono::duration(t1 - t0).count(); float cMs = std::chrono::duration(t2 - t1).count(); float nMs = std::chrono::duration(t3 - t2).count(); float eMs = std::chrono::duration(t4 - t3).count(); - float total = pMs + cMs + nMs + eMs; + float nmMs = std::chrono::duration(t5 - t4).count(); + float total = pMs + cMs + nMs + eMs + nmMs; if (total > 4.0f) { LOG_WARNING("spawn/equip breakdown: player=", pMs, "ms creature=", cMs, - "ms npcComposite=", nMs, "ms equip=", eMs, "ms"); + "ms npcComposite=", nMs, "ms equip=", eMs, "ms normalMaps=", nmMs, "ms"); } }); // Self-heal missing creature visuals: if a nearby UNIT exists in @@ -4250,6 +4256,9 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float processCreatureSpawnQueue(); processAsyncNpcCompositeResults(); processDeferredEquipmentQueue(); + if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) { + cr->processPendingNormalMaps(10); // higher budget during load screen + } // Process ALL pending game object spawns (no 1-per-frame cap during load screen). while (!pendingGameObjectSpawns_.empty()) { diff --git a/src/rendering/character_renderer.cpp b/src/rendering/character_renderer.cpp index 2031a7b4..baaaf3e6 100644 --- a/src/rendering/character_renderer.cpp +++ b/src/rendering/character_renderer.cpp @@ -687,13 +687,16 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) { e.hasAlpha = hasAlpha; e.colorKeyBlack = colorKeyBlackHint; - // Generate normal/height map from diffuse texture - float nhVariance = 0.0f; - auto nhMap = generateNormalHeightMap(blpImage.data.data(), blpImage.width, blpImage.height, nhVariance); - if (nhMap) { - e.heightMapVariance = nhVariance; - e.approxBytes += approxTextureBytesWithMips(blpImage.width, blpImage.height); - e.normalHeightMap = std::move(nhMap); + // Defer normal/height map generation to avoid stalling loadModel. + // Normal maps are generated in processPendingNormalMaps() at a per-frame budget. + if (blpImage.width >= 32 && blpImage.height >= 32) { + PendingNormalMap pending; + pending.cacheKey = key; + pending.pixels.assign(blpImage.data.begin(), blpImage.data.end()); + pending.width = blpImage.width; + pending.height = blpImage.height; + pendingNormalMaps_.push_back(std::move(pending)); + e.normalMapPending = true; } textureCacheBytes_ += e.approxBytes; @@ -705,6 +708,34 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) { return texPtr; } +void CharacterRenderer::processPendingNormalMaps(int budget) { + if (pendingNormalMaps_.empty() || !vkCtx_) return; + + int processed = 0; + while (!pendingNormalMaps_.empty() && processed < budget) { + auto pending = std::move(pendingNormalMaps_.front()); + pendingNormalMaps_.pop_front(); + + auto it = textureCache.find(pending.cacheKey); + if (it == textureCache.end()) continue; // texture was evicted + + float nhVariance = 0.0f; + vkCtx_->beginUploadBatch(); + auto nhMap = generateNormalHeightMap(pending.pixels.data(), + pending.width, pending.height, nhVariance); + vkCtx_->endUploadBatch(); + + if (nhMap) { + it->second.heightMapVariance = nhVariance; + it->second.approxBytes += approxTextureBytesWithMips(pending.width, pending.height); + textureCacheBytes_ += approxTextureBytesWithMips(pending.width, pending.height); + it->second.normalHeightMap = std::move(nhMap); + } + it->second.normalMapPending = false; + processed++; + } +} + // Alpha-blend overlay onto composite at (dstX, dstY) static void blitOverlay(std::vector& composite, int compW, int compH, const pipeline::BLPImage& overlay, int dstX, int dstY) {