mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-03-25 08:30:13 +00:00
Background normal map generation, queue-draining load screen warmup
- Normal map CPU work (luminance→blur→Sobel) moved to background threads, main thread only does GPU upload (~1-2ms vs 15-22ms per texture) - Load screen warmup now waits until ALL spawn/equipment/gameobject queues are drained before transitioning (prevents naked character, NPC pop-in) - Exit condition: min 2s + 5 consecutive empty iterations, hard cap 15s - Equipment queue processes 8 items per warmup iteration instead of 1 - Added LoadingScreen::renderOverlay() for future world-behind-loading use
This commit is contained in:
parent
63efac9fa6
commit
02cf0e4df3
5 changed files with 218 additions and 60 deletions
|
|
@ -332,6 +332,11 @@ void CharacterRenderer::shutdown() {
|
|||
LOG_INFO("CharacterRenderer::shutdown instances=", instances.size(),
|
||||
" models=", models.size(), " override=", (void*)renderPassOverride_);
|
||||
|
||||
// Wait for any in-flight background normal map generation threads
|
||||
while (pendingNormalMapCount_.load(std::memory_order_relaxed) > 0) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
vkDeviceWaitIdle(vkCtx_->getDevice());
|
||||
VkDevice device = vkCtx_->getDevice();
|
||||
VmaAllocator alloc = vkCtx_->getAllocator();
|
||||
|
|
@ -413,6 +418,16 @@ void CharacterRenderer::clear() {
|
|||
LOG_INFO("CharacterRenderer::clear instances=", instances.size(),
|
||||
" models=", models.size());
|
||||
|
||||
// Wait for any in-flight background normal map generation threads
|
||||
while (pendingNormalMapCount_.load(std::memory_order_relaxed) > 0) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
// Discard any completed results that haven't been uploaded
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(normalMapResultsMutex_);
|
||||
completedNormalMaps_.clear();
|
||||
}
|
||||
|
||||
vkDeviceWaitIdle(vkCtx_->getDevice());
|
||||
VkDevice device = vkCtx_->getDevice();
|
||||
|
||||
|
|
@ -509,7 +524,32 @@ std::unique_ptr<VkTexture> CharacterRenderer::generateNormalHeightMap(
|
|||
const uint8_t* pixels, uint32_t width, uint32_t height, float& outVariance) {
|
||||
if (!vkCtx_ || width == 0 || height == 0) return nullptr;
|
||||
|
||||
// Use the CPU-only static method, then upload to GPU
|
||||
std::vector<uint8_t> dummy(width * height * 4);
|
||||
std::memcpy(dummy.data(), pixels, dummy.size());
|
||||
auto result = generateNormalHeightMapCPU("", std::move(dummy), width, height);
|
||||
outVariance = result.variance;
|
||||
|
||||
auto tex = std::make_unique<VkTexture>();
|
||||
if (!tex->upload(*vkCtx_, result.pixels.data(), width, height, VK_FORMAT_R8G8B8A8_UNORM, true)) {
|
||||
return nullptr;
|
||||
}
|
||||
tex->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, VK_FILTER_LINEAR,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT);
|
||||
return tex;
|
||||
}
|
||||
|
||||
// Static, thread-safe CPU-only normal map generation (no GPU access)
|
||||
CharacterRenderer::NormalMapResult CharacterRenderer::generateNormalHeightMapCPU(
|
||||
std::string cacheKey, std::vector<uint8_t> srcPixels, uint32_t width, uint32_t height) {
|
||||
NormalMapResult result;
|
||||
result.cacheKey = std::move(cacheKey);
|
||||
result.width = width;
|
||||
result.height = height;
|
||||
result.variance = 0.0f;
|
||||
|
||||
const uint32_t totalPixels = width * height;
|
||||
const uint8_t* pixels = srcPixels.data();
|
||||
|
||||
// Step 1: Compute height from luminance
|
||||
std::vector<float> heightMap(totalPixels);
|
||||
|
|
@ -524,7 +564,7 @@ std::unique_ptr<VkTexture> CharacterRenderer::generateNormalHeightMap(
|
|||
sumH2 += h * h;
|
||||
}
|
||||
double mean = sumH / totalPixels;
|
||||
outVariance = static_cast<float>(sumH2 / totalPixels - mean * mean);
|
||||
result.variance = static_cast<float>(sumH2 / totalPixels - mean * mean);
|
||||
|
||||
// Step 1.5: Box blur the height map to reduce noise from diffuse textures
|
||||
auto wrapSample = [&](const std::vector<float>& map, int x, int y) -> float {
|
||||
|
|
@ -545,11 +585,9 @@ std::unique_ptr<VkTexture> CharacterRenderer::generateNormalHeightMap(
|
|||
}
|
||||
}
|
||||
|
||||
// Step 2: Sobel 3x3 → normal map (crisp detail from original, blurred for POM alpha)
|
||||
// Higher strength than WMO (2.0) because character/weapon textures are hand-painted
|
||||
// with baked-in lighting that produces low-contrast gradients in the Sobel filter.
|
||||
// Step 2: Sobel 3x3 → normal map
|
||||
const float strength = 5.0f;
|
||||
std::vector<uint8_t> output(totalPixels * 4);
|
||||
result.pixels.resize(totalPixels * 4);
|
||||
|
||||
auto sampleH = [&](int x, int y) -> float {
|
||||
x = ((x % (int)width) + (int)width) % (int)width;
|
||||
|
|
@ -573,20 +611,14 @@ std::unique_ptr<VkTexture> CharacterRenderer::generateNormalHeightMap(
|
|||
if (len > 0.0f) { nx /= len; ny /= len; nz /= len; }
|
||||
|
||||
uint32_t idx = (y * width + x) * 4;
|
||||
output[idx + 0] = static_cast<uint8_t>(std::clamp((nx * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f));
|
||||
output[idx + 1] = static_cast<uint8_t>(std::clamp((ny * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f));
|
||||
output[idx + 2] = static_cast<uint8_t>(std::clamp((nz * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f));
|
||||
output[idx + 3] = static_cast<uint8_t>(std::clamp(blurredHeight[y * width + x] * 255.0f, 0.0f, 255.0f));
|
||||
result.pixels[idx + 0] = static_cast<uint8_t>(std::clamp((nx * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f));
|
||||
result.pixels[idx + 1] = static_cast<uint8_t>(std::clamp((ny * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f));
|
||||
result.pixels[idx + 2] = static_cast<uint8_t>(std::clamp((nz * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f));
|
||||
result.pixels[idx + 3] = static_cast<uint8_t>(std::clamp(blurredHeight[y * width + x] * 255.0f, 0.0f, 255.0f));
|
||||
}
|
||||
}
|
||||
|
||||
auto tex = std::make_unique<VkTexture>();
|
||||
if (!tex->upload(*vkCtx_, output.data(), width, height, VK_FORMAT_R8G8B8A8_UNORM, true)) {
|
||||
return nullptr;
|
||||
}
|
||||
tex->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, VK_FILTER_LINEAR,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT);
|
||||
return tex;
|
||||
return result;
|
||||
}
|
||||
|
||||
VkTexture* CharacterRenderer::loadTexture(const std::string& path) {
|
||||
|
|
@ -687,15 +719,22 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) {
|
|||
e.hasAlpha = hasAlpha;
|
||||
e.colorKeyBlack = colorKeyBlackHint;
|
||||
|
||||
// Defer normal/height map generation to avoid stalling loadModel.
|
||||
// Normal maps are generated in processPendingNormalMaps() at a per-frame budget.
|
||||
// Launch normal map generation on background thread — CPU work is pure compute,
|
||||
// only the GPU upload (in processPendingNormalMaps) needs the main thread (~1-2ms).
|
||||
if (blpImage.width >= 32 && blpImage.height >= 32) {
|
||||
PendingNormalMap pending;
|
||||
pending.cacheKey = key;
|
||||
pending.pixels.assign(blpImage.data.begin(), blpImage.data.end());
|
||||
pending.width = blpImage.width;
|
||||
pending.height = blpImage.height;
|
||||
pendingNormalMaps_.push_back(std::move(pending));
|
||||
uint32_t w = blpImage.width, h = blpImage.height;
|
||||
std::string ck = key;
|
||||
std::vector<uint8_t> px(blpImage.data.begin(), blpImage.data.end());
|
||||
pendingNormalMapCount_.fetch_add(1, std::memory_order_relaxed);
|
||||
auto* self = this;
|
||||
std::thread([self, ck = std::move(ck), px = std::move(px), w, h]() mutable {
|
||||
auto result = generateNormalHeightMapCPU(std::move(ck), std::move(px), w, h);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(self->normalMapResultsMutex_);
|
||||
self->completedNormalMaps_.push_back(std::move(result));
|
||||
}
|
||||
self->pendingNormalMapCount_.fetch_sub(1, std::memory_order_relaxed);
|
||||
}).detach();
|
||||
e.normalMapPending = true;
|
||||
}
|
||||
|
||||
|
|
@ -709,30 +748,39 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) {
|
|||
}
|
||||
|
||||
void CharacterRenderer::processPendingNormalMaps(int budget) {
|
||||
if (pendingNormalMaps_.empty() || !vkCtx_) return;
|
||||
if (!vkCtx_) return;
|
||||
|
||||
int processed = 0;
|
||||
while (!pendingNormalMaps_.empty() && processed < budget) {
|
||||
auto pending = std::move(pendingNormalMaps_.front());
|
||||
pendingNormalMaps_.pop_front();
|
||||
// Collect completed results from background threads
|
||||
std::deque<NormalMapResult> ready;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(normalMapResultsMutex_);
|
||||
if (completedNormalMaps_.empty()) return;
|
||||
int count = std::min(budget, static_cast<int>(completedNormalMaps_.size()));
|
||||
for (int i = 0; i < count; i++) {
|
||||
ready.push_back(std::move(completedNormalMaps_.front()));
|
||||
completedNormalMaps_.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
auto it = textureCache.find(pending.cacheKey);
|
||||
// GPU upload only (~1-2ms each) — CPU work already done on background thread
|
||||
for (auto& result : ready) {
|
||||
auto it = textureCache.find(result.cacheKey);
|
||||
if (it == textureCache.end()) continue; // texture was evicted
|
||||
|
||||
float nhVariance = 0.0f;
|
||||
vkCtx_->beginUploadBatch();
|
||||
auto nhMap = generateNormalHeightMap(pending.pixels.data(),
|
||||
pending.width, pending.height, nhVariance);
|
||||
vkCtx_->endUploadBatch();
|
||||
|
||||
if (nhMap) {
|
||||
it->second.heightMapVariance = nhVariance;
|
||||
it->second.approxBytes += approxTextureBytesWithMips(pending.width, pending.height);
|
||||
textureCacheBytes_ += approxTextureBytesWithMips(pending.width, pending.height);
|
||||
it->second.normalHeightMap = std::move(nhMap);
|
||||
auto tex = std::make_unique<VkTexture>();
|
||||
bool ok = tex->upload(*vkCtx_, result.pixels.data(), result.width, result.height,
|
||||
VK_FORMAT_R8G8B8A8_UNORM, true);
|
||||
if (ok) {
|
||||
tex->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, VK_FILTER_LINEAR,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT);
|
||||
it->second.heightMapVariance = result.variance;
|
||||
it->second.approxBytes += approxTextureBytesWithMips(result.width, result.height);
|
||||
textureCacheBytes_ += approxTextureBytesWithMips(result.width, result.height);
|
||||
it->second.normalHeightMap = std::move(tex);
|
||||
}
|
||||
vkCtx_->endUploadBatch();
|
||||
it->second.normalMapPending = false;
|
||||
processed++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -240,6 +240,66 @@ bool LoadingScreen::loadImage(const std::string& path) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void LoadingScreen::renderOverlay() {
|
||||
// Draw loading screen content as ImGui overlay within an existing ImGui frame.
|
||||
// Caller is responsible for ImGui NewFrame/Render and Vulkan frame management.
|
||||
ImGuiIO& io = ImGui::GetIO();
|
||||
float screenW = io.DisplaySize.x;
|
||||
float screenH = io.DisplaySize.y;
|
||||
|
||||
ImGui::SetNextWindowPos(ImVec2(0, 0));
|
||||
ImGui::SetNextWindowSize(ImVec2(screenW, screenH));
|
||||
ImGui::Begin("##LoadingScreenOverlay", nullptr,
|
||||
ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize |
|
||||
ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar |
|
||||
ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_NoBackground |
|
||||
ImGuiWindowFlags_NoBringToFrontOnFocus);
|
||||
|
||||
if (bgDescriptorSet) {
|
||||
ImGui::GetWindowDrawList()->AddImage(
|
||||
reinterpret_cast<ImTextureID>(bgDescriptorSet),
|
||||
ImVec2(0, 0), ImVec2(screenW, screenH));
|
||||
}
|
||||
|
||||
// Progress bar
|
||||
{
|
||||
const float barWidthFrac = 0.6f;
|
||||
const float barHeight = 6.0f;
|
||||
const float barY = screenH * 0.06f;
|
||||
float barX = screenW * (0.5f - barWidthFrac * 0.5f);
|
||||
float barW = screenW * barWidthFrac;
|
||||
ImDrawList* drawList = ImGui::GetWindowDrawList();
|
||||
drawList->AddRectFilled(ImVec2(barX, barY), ImVec2(barX + barW, barY + barHeight),
|
||||
IM_COL32(25, 25, 25, 200), 2.0f);
|
||||
if (loadProgress > 0.001f) {
|
||||
drawList->AddRectFilled(ImVec2(barX, barY), ImVec2(barX + barW * loadProgress, barY + barHeight),
|
||||
IM_COL32(199, 156, 33, 255), 2.0f);
|
||||
}
|
||||
drawList->AddRect(ImVec2(barX - 1, barY - 1), ImVec2(barX + barW + 1, barY + barHeight + 1),
|
||||
IM_COL32(140, 110, 25, 255), 2.0f);
|
||||
}
|
||||
|
||||
// Percentage text
|
||||
{
|
||||
char pctBuf[32];
|
||||
snprintf(pctBuf, sizeof(pctBuf), "%d%%", static_cast<int>(loadProgress * 100.0f));
|
||||
float textY = screenH * 0.06f - 20.0f;
|
||||
ImVec2 pctSize = ImGui::CalcTextSize(pctBuf);
|
||||
ImGui::SetCursorPos(ImVec2((screenW - pctSize.x) * 0.5f, textY));
|
||||
ImGui::TextColored(ImVec4(0.0f, 0.0f, 0.0f, 1.0f), "%s", pctBuf);
|
||||
}
|
||||
|
||||
// Status text
|
||||
{
|
||||
float statusY = screenH * 0.06f + 14.0f;
|
||||
ImVec2 statusSize = ImGui::CalcTextSize(statusText.c_str());
|
||||
ImGui::SetCursorPos(ImVec2((screenW - statusSize.x) * 0.5f, statusY));
|
||||
ImGui::TextColored(ImVec4(0.0f, 0.0f, 0.0f, 1.0f), "%s", statusText.c_str());
|
||||
}
|
||||
|
||||
ImGui::End();
|
||||
}
|
||||
|
||||
void LoadingScreen::render() {
|
||||
// If a frame is already in progress (e.g. called from a UI callback),
|
||||
// end it before starting our own
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue