mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-04-17 17:43:52 +00:00
Defer normal map generation to reduce GPU model upload stalls by ~50%
Some checks are pending
Build / Build (arm64) (push) Waiting to run
Build / Build (x86-64) (push) Waiting to run
Build / Build (macOS arm64) (push) Waiting to run
Build / Build (windows-arm64) (push) Waiting to run
Build / Build (windows-x86-64) (push) Waiting to run
Security / CodeQL (C/C++) (push) Waiting to run
Security / Semgrep (push) Waiting to run
Security / Sanitizer Build (ASan/UBSan) (push) Waiting to run
Some checks are pending
Build / Build (arm64) (push) Waiting to run
Build / Build (x86-64) (push) Waiting to run
Build / Build (macOS arm64) (push) Waiting to run
Build / Build (windows-arm64) (push) Waiting to run
Build / Build (windows-x86-64) (push) Waiting to run
Security / CodeQL (C/C++) (push) Waiting to run
Security / Semgrep (push) Waiting to run
Security / Sanitizer Build (ASan/UBSan) (push) Waiting to run
Each loadTexture call was generating a normal/height map inline (3 full-image passes: luminance + blur + Sobel). For models with 15-20 textures this added 30-40ms to the 70ms model upload. Now deferred to a per-frame budget (2/frame in-game, 10/frame during load screen). Models render without POM until their normal maps are ready.
This commit is contained in:
parent
faca22ac5f
commit
24f2ec75ec
3 changed files with 62 additions and 9 deletions
|
|
@ -12,6 +12,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <future>
|
#include <future>
|
||||||
|
#include <deque>
|
||||||
|
|
||||||
namespace wowee {
|
namespace wowee {
|
||||||
namespace pipeline { class AssetManager; }
|
namespace pipeline { class AssetManager; }
|
||||||
|
|
@ -278,6 +279,7 @@ private:
|
||||||
uint64_t lastUse = 0;
|
uint64_t lastUse = 0;
|
||||||
bool hasAlpha = false;
|
bool hasAlpha = false;
|
||||||
bool colorKeyBlack = false;
|
bool colorKeyBlack = false;
|
||||||
|
bool normalMapPending = false; // deferred normal map generation
|
||||||
};
|
};
|
||||||
std::unordered_map<std::string, TextureCacheEntry> textureCache;
|
std::unordered_map<std::string, TextureCacheEntry> textureCache;
|
||||||
std::unordered_map<VkTexture*, bool> textureHasAlphaByPtr_;
|
std::unordered_map<VkTexture*, bool> textureHasAlphaByPtr_;
|
||||||
|
|
@ -302,6 +304,17 @@ private:
|
||||||
std::unique_ptr<VkTexture> generateNormalHeightMap(
|
std::unique_ptr<VkTexture> generateNormalHeightMap(
|
||||||
const uint8_t* pixels, uint32_t width, uint32_t height, float& outVariance);
|
const uint8_t* pixels, uint32_t width, uint32_t height, float& outVariance);
|
||||||
|
|
||||||
|
// Deferred normal map generation — avoids stalling loadModel
|
||||||
|
struct PendingNormalMap {
|
||||||
|
std::string cacheKey;
|
||||||
|
std::vector<uint8_t> pixels; // RGBA pixel data
|
||||||
|
uint32_t width, height;
|
||||||
|
};
|
||||||
|
std::deque<PendingNormalMap> pendingNormalMaps_;
|
||||||
|
public:
|
||||||
|
void processPendingNormalMaps(int budget = 2);
|
||||||
|
private:
|
||||||
|
|
||||||
// Normal mapping / POM settings
|
// Normal mapping / POM settings
|
||||||
bool normalMappingEnabled_ = true;
|
bool normalMappingEnabled_ = true;
|
||||||
float normalMapStrength_ = 0.8f;
|
float normalMapStrength_ = 0.8f;
|
||||||
|
|
|
||||||
|
|
@ -922,14 +922,20 @@ void Application::update(float deltaTime) {
|
||||||
auto t3 = std::chrono::steady_clock::now();
|
auto t3 = std::chrono::steady_clock::now();
|
||||||
processDeferredEquipmentQueue();
|
processDeferredEquipmentQueue();
|
||||||
auto t4 = std::chrono::steady_clock::now();
|
auto t4 = std::chrono::steady_clock::now();
|
||||||
|
// Process deferred normal maps (2 per frame to spread CPU cost)
|
||||||
|
if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) {
|
||||||
|
cr->processPendingNormalMaps(2);
|
||||||
|
}
|
||||||
|
auto t5 = std::chrono::steady_clock::now();
|
||||||
float pMs = std::chrono::duration<float, std::milli>(t1 - t0).count();
|
float pMs = std::chrono::duration<float, std::milli>(t1 - t0).count();
|
||||||
float cMs = std::chrono::duration<float, std::milli>(t2 - t1).count();
|
float cMs = std::chrono::duration<float, std::milli>(t2 - t1).count();
|
||||||
float nMs = std::chrono::duration<float, std::milli>(t3 - t2).count();
|
float nMs = std::chrono::duration<float, std::milli>(t3 - t2).count();
|
||||||
float eMs = std::chrono::duration<float, std::milli>(t4 - t3).count();
|
float eMs = std::chrono::duration<float, std::milli>(t4 - t3).count();
|
||||||
float total = pMs + cMs + nMs + eMs;
|
float nmMs = std::chrono::duration<float, std::milli>(t5 - t4).count();
|
||||||
|
float total = pMs + cMs + nMs + eMs + nmMs;
|
||||||
if (total > 4.0f) {
|
if (total > 4.0f) {
|
||||||
LOG_WARNING("spawn/equip breakdown: player=", pMs, "ms creature=", cMs,
|
LOG_WARNING("spawn/equip breakdown: player=", pMs, "ms creature=", cMs,
|
||||||
"ms npcComposite=", nMs, "ms equip=", eMs, "ms");
|
"ms npcComposite=", nMs, "ms equip=", eMs, "ms normalMaps=", nmMs, "ms");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
// Self-heal missing creature visuals: if a nearby UNIT exists in
|
// Self-heal missing creature visuals: if a nearby UNIT exists in
|
||||||
|
|
@ -4250,6 +4256,9 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float
|
||||||
processCreatureSpawnQueue();
|
processCreatureSpawnQueue();
|
||||||
processAsyncNpcCompositeResults();
|
processAsyncNpcCompositeResults();
|
||||||
processDeferredEquipmentQueue();
|
processDeferredEquipmentQueue();
|
||||||
|
if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) {
|
||||||
|
cr->processPendingNormalMaps(10); // higher budget during load screen
|
||||||
|
}
|
||||||
|
|
||||||
// Process ALL pending game object spawns (no 1-per-frame cap during load screen).
|
// Process ALL pending game object spawns (no 1-per-frame cap during load screen).
|
||||||
while (!pendingGameObjectSpawns_.empty()) {
|
while (!pendingGameObjectSpawns_.empty()) {
|
||||||
|
|
|
||||||
|
|
@ -687,13 +687,16 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) {
|
||||||
e.hasAlpha = hasAlpha;
|
e.hasAlpha = hasAlpha;
|
||||||
e.colorKeyBlack = colorKeyBlackHint;
|
e.colorKeyBlack = colorKeyBlackHint;
|
||||||
|
|
||||||
// Generate normal/height map from diffuse texture
|
// Defer normal/height map generation to avoid stalling loadModel.
|
||||||
float nhVariance = 0.0f;
|
// Normal maps are generated in processPendingNormalMaps() at a per-frame budget.
|
||||||
auto nhMap = generateNormalHeightMap(blpImage.data.data(), blpImage.width, blpImage.height, nhVariance);
|
if (blpImage.width >= 32 && blpImage.height >= 32) {
|
||||||
if (nhMap) {
|
PendingNormalMap pending;
|
||||||
e.heightMapVariance = nhVariance;
|
pending.cacheKey = key;
|
||||||
e.approxBytes += approxTextureBytesWithMips(blpImage.width, blpImage.height);
|
pending.pixels.assign(blpImage.data.begin(), blpImage.data.end());
|
||||||
e.normalHeightMap = std::move(nhMap);
|
pending.width = blpImage.width;
|
||||||
|
pending.height = blpImage.height;
|
||||||
|
pendingNormalMaps_.push_back(std::move(pending));
|
||||||
|
e.normalMapPending = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
textureCacheBytes_ += e.approxBytes;
|
textureCacheBytes_ += e.approxBytes;
|
||||||
|
|
@ -705,6 +708,34 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) {
|
||||||
return texPtr;
|
return texPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CharacterRenderer::processPendingNormalMaps(int budget) {
|
||||||
|
if (pendingNormalMaps_.empty() || !vkCtx_) return;
|
||||||
|
|
||||||
|
int processed = 0;
|
||||||
|
while (!pendingNormalMaps_.empty() && processed < budget) {
|
||||||
|
auto pending = std::move(pendingNormalMaps_.front());
|
||||||
|
pendingNormalMaps_.pop_front();
|
||||||
|
|
||||||
|
auto it = textureCache.find(pending.cacheKey);
|
||||||
|
if (it == textureCache.end()) continue; // texture was evicted
|
||||||
|
|
||||||
|
float nhVariance = 0.0f;
|
||||||
|
vkCtx_->beginUploadBatch();
|
||||||
|
auto nhMap = generateNormalHeightMap(pending.pixels.data(),
|
||||||
|
pending.width, pending.height, nhVariance);
|
||||||
|
vkCtx_->endUploadBatch();
|
||||||
|
|
||||||
|
if (nhMap) {
|
||||||
|
it->second.heightMapVariance = nhVariance;
|
||||||
|
it->second.approxBytes += approxTextureBytesWithMips(pending.width, pending.height);
|
||||||
|
textureCacheBytes_ += approxTextureBytesWithMips(pending.width, pending.height);
|
||||||
|
it->second.normalHeightMap = std::move(nhMap);
|
||||||
|
}
|
||||||
|
it->second.normalMapPending = false;
|
||||||
|
processed++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Alpha-blend overlay onto composite at (dstX, dstY)
|
// Alpha-blend overlay onto composite at (dstX, dstY)
|
||||||
static void blitOverlay(std::vector<uint8_t>& composite, int compW, int compH,
|
static void blitOverlay(std::vector<uint8_t>& composite, int compW, int compH,
|
||||||
const pipeline::BLPImage& overlay, int dstX, int dstY) {
|
const pipeline::BLPImage& overlay, int dstX, int dstY) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue