Defer normal map generation to reduce GPU model upload stalls by ~50%
Some checks are pending
Build / Build (arm64) (push) Waiting to run
Build / Build (x86-64) (push) Waiting to run
Build / Build (macOS arm64) (push) Waiting to run
Build / Build (windows-arm64) (push) Waiting to run
Build / Build (windows-x86-64) (push) Waiting to run
Security / CodeQL (C/C++) (push) Waiting to run
Security / Semgrep (push) Waiting to run
Security / Sanitizer Build (ASan/UBSan) (push) Waiting to run

Each loadTexture call was generating a normal/height map inline (3 full-image
passes: luminance + blur + Sobel). For models with 15-20 textures this added
30-40ms to the 70ms model upload. Now deferred to a per-frame budget (2/frame
in-game, 10/frame during load screen). Models render without POM until their
normal maps are ready.
This commit is contained in:
Kelsi 2026-03-07 17:16:38 -08:00
parent faca22ac5f
commit 24f2ec75ec
3 changed files with 62 additions and 9 deletions

View file

@ -12,6 +12,7 @@
#include <string>
#include <utility>
#include <future>
#include <deque>
namespace wowee {
namespace pipeline { class AssetManager; }
@ -278,6 +279,7 @@ private:
uint64_t lastUse = 0;
bool hasAlpha = false;
bool colorKeyBlack = false;
bool normalMapPending = false; // deferred normal map generation
};
std::unordered_map<std::string, TextureCacheEntry> textureCache;
std::unordered_map<VkTexture*, bool> textureHasAlphaByPtr_;
@ -302,6 +304,17 @@ private:
std::unique_ptr<VkTexture> generateNormalHeightMap(
const uint8_t* pixels, uint32_t width, uint32_t height, float& outVariance);
// Deferred normal map generation — avoids stalling loadModel
struct PendingNormalMap {
std::string cacheKey;
std::vector<uint8_t> pixels; // RGBA pixel data
uint32_t width, height;
};
std::deque<PendingNormalMap> pendingNormalMaps_;
public:
void processPendingNormalMaps(int budget = 2);
private:
// Normal mapping / POM settings
bool normalMappingEnabled_ = true;
float normalMapStrength_ = 0.8f;

View file

@ -922,14 +922,20 @@ void Application::update(float deltaTime) {
auto t3 = std::chrono::steady_clock::now();
processDeferredEquipmentQueue();
auto t4 = std::chrono::steady_clock::now();
// Process deferred normal maps (2 per frame to spread CPU cost)
if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) {
cr->processPendingNormalMaps(2);
}
auto t5 = std::chrono::steady_clock::now();
float pMs = std::chrono::duration<float, std::milli>(t1 - t0).count();
float cMs = std::chrono::duration<float, std::milli>(t2 - t1).count();
float nMs = std::chrono::duration<float, std::milli>(t3 - t2).count();
float eMs = std::chrono::duration<float, std::milli>(t4 - t3).count();
float total = pMs + cMs + nMs + eMs;
float nmMs = std::chrono::duration<float, std::milli>(t5 - t4).count();
float total = pMs + cMs + nMs + eMs + nmMs;
if (total > 4.0f) {
LOG_WARNING("spawn/equip breakdown: player=", pMs, "ms creature=", cMs,
"ms npcComposite=", nMs, "ms equip=", eMs, "ms");
"ms npcComposite=", nMs, "ms equip=", eMs, "ms normalMaps=", nmMs, "ms");
}
});
// Self-heal missing creature visuals: if a nearby UNIT exists in
@ -4250,6 +4256,9 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float
processCreatureSpawnQueue();
processAsyncNpcCompositeResults();
processDeferredEquipmentQueue();
if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) {
cr->processPendingNormalMaps(10); // higher budget during load screen
}
// Process ALL pending game object spawns (no 1-per-frame cap during load screen).
while (!pendingGameObjectSpawns_.empty()) {

View file

@ -687,13 +687,16 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) {
e.hasAlpha = hasAlpha;
e.colorKeyBlack = colorKeyBlackHint;
// Generate normal/height map from diffuse texture
float nhVariance = 0.0f;
auto nhMap = generateNormalHeightMap(blpImage.data.data(), blpImage.width, blpImage.height, nhVariance);
if (nhMap) {
e.heightMapVariance = nhVariance;
e.approxBytes += approxTextureBytesWithMips(blpImage.width, blpImage.height);
e.normalHeightMap = std::move(nhMap);
// Defer normal/height map generation to avoid stalling loadModel.
// Normal maps are generated in processPendingNormalMaps() at a per-frame budget.
if (blpImage.width >= 32 && blpImage.height >= 32) {
PendingNormalMap pending;
pending.cacheKey = key;
pending.pixels.assign(blpImage.data.begin(), blpImage.data.end());
pending.width = blpImage.width;
pending.height = blpImage.height;
pendingNormalMaps_.push_back(std::move(pending));
e.normalMapPending = true;
}
textureCacheBytes_ += e.approxBytes;
@ -705,6 +708,34 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) {
return texPtr;
}
void CharacterRenderer::processPendingNormalMaps(int budget) {
if (pendingNormalMaps_.empty() || !vkCtx_) return;
int processed = 0;
while (!pendingNormalMaps_.empty() && processed < budget) {
auto pending = std::move(pendingNormalMaps_.front());
pendingNormalMaps_.pop_front();
auto it = textureCache.find(pending.cacheKey);
if (it == textureCache.end()) continue; // texture was evicted
float nhVariance = 0.0f;
vkCtx_->beginUploadBatch();
auto nhMap = generateNormalHeightMap(pending.pixels.data(),
pending.width, pending.height, nhVariance);
vkCtx_->endUploadBatch();
if (nhMap) {
it->second.heightMapVariance = nhVariance;
it->second.approxBytes += approxTextureBytesWithMips(pending.width, pending.height);
textureCacheBytes_ += approxTextureBytesWithMips(pending.width, pending.height);
it->second.normalHeightMap = std::move(nhMap);
}
it->second.normalMapPending = false;
processed++;
}
}
// Alpha-blend overlay onto composite at (dstX, dstY)
static void blitOverlay(std::vector<uint8_t>& composite, int compW, int compH,
const pipeline::BLPImage& overlay, int dstX, int dstY) {