From 63efac9fa66b956bc2ac4ab7cbc0fd81244f8ca0 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 17:31:47 -0800 Subject: [PATCH 01/13] Unlimited creature model uploads during load screen, remove duplicate code Loading screen now calls processCreatureSpawnQueue(unlimited=true) which removes the 1-upload-per-frame cap and 2ms time budget, allowing all pending creature models to upload to GPU in bulk. Also increases concurrent async background loads from 4 to 16 during load screen. Replaces 40-line inline duplicate of processAsyncCreatureResults with the shared function. --- include/core/application.hpp | 4 +-- src/core/application.cpp | 68 +++++++----------------------------- 2 files changed, 14 insertions(+), 58 deletions(-) diff --git a/include/core/application.hpp b/include/core/application.hpp index 84b89f32..165d11bb 100644 --- a/include/core/application.hpp +++ b/include/core/application.hpp @@ -215,7 +215,7 @@ private: std::future future; }; std::vector asyncCreatureLoads_; - void processAsyncCreatureResults(); + void processAsyncCreatureResults(bool unlimited = false); static constexpr int MAX_ASYNC_CREATURE_LOADS = 4; // concurrent background loads std::unordered_set deadCreatureGuids_; // GUIDs that should spawn in corpse/death pose std::unordered_map displayIdModelCache_; // displayId → modelId (model caching) @@ -373,7 +373,7 @@ private: std::unordered_set pendingPlayerSpawnGuids_; void processPlayerSpawnQueue(); std::unordered_set creaturePermanentFailureGuids_; - void processCreatureSpawnQueue(); + void processCreatureSpawnQueue(bool unlimited = false); struct PendingGameObjectSpawn { uint64_t guid; diff --git a/src/core/application.cpp b/src/core/application.cpp index 1a239d8a..23b2c15c 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -4207,53 +4207,8 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float processPlayerSpawnQueue(); // During load screen warmup: lift per-frame budgets so GPU uploads - // happen in bulk while the loading screen is still visible. - // Process ALL async creature model uploads (no 3-per-frame cap). - { - for (auto it = asyncCreatureLoads_.begin(); it != asyncCreatureLoads_.end(); ) { - if (!it->future.valid() || - it->future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { - ++it; - continue; - } - auto result = it->future.get(); - it = asyncCreatureLoads_.erase(it); - if (result.permanent_failure) { - nonRenderableCreatureDisplayIds_.insert(result.displayId); - creaturePermanentFailureGuids_.insert(result.guid); - pendingCreatureSpawnGuids_.erase(result.guid); - creatureSpawnRetryCounts_.erase(result.guid); - continue; - } - if (!result.valid || !result.model) { - pendingCreatureSpawnGuids_.erase(result.guid); - creatureSpawnRetryCounts_.erase(result.guid); - continue; - } - auto* charRenderer = renderer ? renderer->getCharacterRenderer() : nullptr; - if (!charRenderer) { pendingCreatureSpawnGuids_.erase(result.guid); continue; } - if (!charRenderer->loadModel(*result.model, result.modelId)) { - nonRenderableCreatureDisplayIds_.insert(result.displayId); - creaturePermanentFailureGuids_.insert(result.guid); - pendingCreatureSpawnGuids_.erase(result.guid); - creatureSpawnRetryCounts_.erase(result.guid); - continue; - } - displayIdModelCache_[result.displayId] = result.modelId; - pendingCreatureSpawnGuids_.erase(result.guid); - creatureSpawnRetryCounts_.erase(result.guid); - if (!creatureInstances_.count(result.guid) && - !creaturePermanentFailureGuids_.count(result.guid)) { - PendingCreatureSpawn s{}; - s.guid = result.guid; s.displayId = result.displayId; - s.x = result.x; s.y = result.y; s.z = result.z; - s.orientation = result.orientation; - pendingCreatureSpawns_.push_back(s); - pendingCreatureSpawnGuids_.insert(result.guid); - } - } - } - processCreatureSpawnQueue(); + // and spawns happen in bulk while the loading screen is still visible. + processCreatureSpawnQueue(true); // unlimited: no model upload cap, no time budget processAsyncNpcCompositeResults(); processDeferredEquipmentQueue(); if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) { @@ -6804,9 +6759,10 @@ void Application::spawnOnlineGameObject(uint64_t guid, uint32_t entry, uint32_t " displayId=", displayId, " at (", x, ", ", y, ", ", z, ")"); } -void Application::processAsyncCreatureResults() { +void Application::processAsyncCreatureResults(bool unlimited) { // Check completed async model loads and finalize on main thread (GPU upload + instance creation). // Limit GPU model uploads per frame to avoid spikes, but always drain cheap bookkeeping. + // In unlimited mode (load screen), process all pending uploads without cap. static constexpr int kMaxModelUploadsPerFrame = 1; int modelUploads = 0; @@ -6819,9 +6775,7 @@ void Application::processAsyncCreatureResults() { // Peek: if this result needs a NEW model upload (not cached) and we've hit // the upload budget, defer to next frame without consuming the future. - if (modelUploads >= kMaxModelUploadsPerFrame) { - // Check if this displayId already has a cached model (cheap spawn, no GPU upload). - // We can't peek the displayId without getting the future, so just break. + if (!unlimited && modelUploads >= kMaxModelUploadsPerFrame) { break; } @@ -6967,13 +6921,14 @@ void Application::processAsyncNpcCompositeResults() { } } -void Application::processCreatureSpawnQueue() { +void Application::processCreatureSpawnQueue(bool unlimited) { auto startTime = std::chrono::steady_clock::now(); // Budget: max 2ms per frame for creature spawning to prevent stutter. + // In unlimited mode (load screen), process everything without budget cap. static constexpr float kSpawnBudgetMs = 2.0f; // First, finalize any async model loads that completed on background threads. - processAsyncCreatureResults(); + processAsyncCreatureResults(unlimited); { auto now = std::chrono::steady_clock::now(); float asyncMs = std::chrono::duration(now - startTime).count(); @@ -6992,11 +6947,11 @@ void Application::processCreatureSpawnQueue() { int asyncLaunched = 0; size_t rotationsLeft = pendingCreatureSpawns_.size(); while (!pendingCreatureSpawns_.empty() && - processed < MAX_SPAWNS_PER_FRAME && + (unlimited || processed < MAX_SPAWNS_PER_FRAME) && rotationsLeft > 0) { // Check time budget every iteration (including first — async results may // have already consumed the budget via GPU model uploads). - { + if (!unlimited) { auto now = std::chrono::steady_clock::now(); float elapsedMs = std::chrono::duration(now - startTime).count(); if (elapsedMs >= kSpawnBudgetMs) break; @@ -7017,7 +6972,8 @@ void Application::processCreatureSpawnQueue() { // For new models: launch async load on background thread instead of blocking. if (needsNewModel) { - if (static_cast(asyncCreatureLoads_.size()) + asyncLaunched >= MAX_ASYNC_CREATURE_LOADS) { + const int maxAsync = unlimited ? (MAX_ASYNC_CREATURE_LOADS * 4) : MAX_ASYNC_CREATURE_LOADS; + if (static_cast(asyncCreatureLoads_.size()) + asyncLaunched >= maxAsync) { // Too many in-flight — defer to next frame pendingCreatureSpawns_.push_back(s); rotationsLeft--; From 02cf0e4df381e33b2ac04c32a3095219effa4235 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 18:40:24 -0800 Subject: [PATCH 02/13] Background normal map generation, queue-draining load screen warmup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Normal map CPU work (luminance→blur→Sobel) moved to background threads, main thread only does GPU upload (~1-2ms vs 15-22ms per texture) - Load screen warmup now waits until ALL spawn/equipment/gameobject queues are drained before transitioning (prevents naked character, NPC pop-in) - Exit condition: min 2s + 5 consecutive empty iterations, hard cap 15s - Equipment queue processes 8 items per warmup iteration instead of 1 - Added LoadingScreen::renderOverlay() for future world-behind-loading use --- include/rendering/character_renderer.hpp | 20 +++- include/rendering/loading_screen.hpp | 4 + src/core/application.cpp | 64 ++++++++--- src/rendering/character_renderer.cpp | 130 ++++++++++++++++------- src/rendering/loading_screen.cpp | 60 +++++++++++ 5 files changed, 218 insertions(+), 60 deletions(-) diff --git a/include/rendering/character_renderer.hpp b/include/rendering/character_renderer.hpp index 83cb3e7f..c4676008 100644 --- a/include/rendering/character_renderer.hpp +++ b/include/rendering/character_renderer.hpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include namespace wowee { namespace pipeline { class AssetManager; } @@ -304,15 +306,23 @@ private: std::unique_ptr generateNormalHeightMap( const uint8_t* pixels, uint32_t width, uint32_t height, float& outVariance); - // Deferred normal map generation — avoids stalling loadModel - struct PendingNormalMap { + // Background normal map generation — CPU work on thread pool, GPU upload on main thread + struct NormalMapResult { std::string cacheKey; - std::vector pixels; // RGBA pixel data + std::vector pixels; // RGBA normal map output uint32_t width, height; + float variance; }; - std::deque pendingNormalMaps_; + // Completed results ready for GPU upload (populated by background threads) + std::mutex normalMapResultsMutex_; + std::deque completedNormalMaps_; + std::atomic pendingNormalMapCount_{0}; // in-flight background tasks + + // Pure CPU normal map generation (thread-safe, no GPU access) + static NormalMapResult generateNormalHeightMapCPU( + std::string cacheKey, std::vector pixels, uint32_t width, uint32_t height); public: - void processPendingNormalMaps(int budget = 2); + void processPendingNormalMaps(int budget = 4); private: // Normal mapping / POM settings diff --git a/include/rendering/loading_screen.hpp b/include/rendering/loading_screen.hpp index 5f119676..afd134b9 100644 --- a/include/rendering/loading_screen.hpp +++ b/include/rendering/loading_screen.hpp @@ -24,6 +24,10 @@ public: // Render the loading screen with progress bar and status text (pure ImGui) void render(); + // Draw loading screen as ImGui overlay (call within an existing ImGui frame). + // Used during warmup to overlay loading screen on top of the rendered world. + void renderOverlay(); + void setProgress(float progress) { loadProgress = progress; } void setStatus(const std::string& status) { statusText = status; } diff --git a/src/core/application.cpp b/src/core/application.cpp index 23b2c15c..300bffc7 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -49,9 +49,9 @@ #include // GL/glew.h removed — Vulkan migration Phase 1 #include +#include #include #include -#include #include #include #include @@ -922,9 +922,9 @@ void Application::update(float deltaTime) { auto t3 = std::chrono::steady_clock::now(); processDeferredEquipmentQueue(); auto t4 = std::chrono::steady_clock::now(); - // Process deferred normal maps (2 per frame to spread CPU cost) + // Upload completed normal maps from background threads (~1-2ms each GPU upload) if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) { - cr->processPendingNormalMaps(2); + cr->processPendingNormalMaps(4); } auto t5 = std::chrono::steady_clock::now(); float pMs = std::chrono::duration(t1 - t0).count(); @@ -4167,11 +4167,17 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float }); } - // Hide first-login hitch by draining initial world packets/spawn queues before - // dropping the loading screen. Keep this bounded so we don't stall indefinitely. + // Keep the loading screen visible until all spawn/equipment/gameobject queues + // are fully drained. This ensures the player sees a fully populated world + // (character clothed, NPCs placed, game objects loaded) when the screen drops. { - const float kWarmupMaxSeconds = 2.5f; + const float kMinWarmupSeconds = 2.0f; // minimum time to drain network packets + const float kMaxWarmupSeconds = 15.0f; // hard cap to avoid infinite stall const auto warmupStart = std::chrono::high_resolution_clock::now(); + // Track consecutive idle iterations (all queues empty) to detect convergence + int idleIterations = 0; + const int kIdleThreshold = 5; // require 5 consecutive empty loops (~80ms) + while (true) { SDL_Event event; while (SDL_PollEvent(&event)) { @@ -4185,7 +4191,6 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float int w = event.window.data1; int h = event.window.data2; window->setSize(w, h); - // Vulkan viewport set in command buffer if (renderer && renderer->getCamera()) { renderer->getCamera()->setAspectRatio(static_cast(w) / h); } @@ -4208,14 +4213,17 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float // During load screen warmup: lift per-frame budgets so GPU uploads // and spawns happen in bulk while the loading screen is still visible. - processCreatureSpawnQueue(true); // unlimited: no model upload cap, no time budget + processCreatureSpawnQueue(true); processAsyncNpcCompositeResults(); - processDeferredEquipmentQueue(); + // Process equipment queue more aggressively during warmup (multiple per iteration) + for (int i = 0; i < 8 && (!deferredEquipmentQueue_.empty() || !asyncEquipmentLoads_.empty()); i++) { + processDeferredEquipmentQueue(); + } if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) { - cr->processPendingNormalMaps(10); // higher budget during load screen + cr->processPendingNormalMaps(INT_MAX); } - // Process ALL pending game object spawns (no 1-per-frame cap during load screen). + // Process ALL pending game object spawns. while (!pendingGameObjectSpawns_.empty()) { auto& s = pendingGameObjectSpawns_.front(); spawnOnlineGameObject(s.guid, s.entry, s.displayId, s.x, s.y, s.z, s.orientation); @@ -4226,14 +4234,42 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float processPendingMount(); updateQuestMarkers(); + // Update renderer (terrain streaming, animations) + if (renderer) { + renderer->update(1.0f / 60.0f); + } + const auto now = std::chrono::high_resolution_clock::now(); const float elapsed = std::chrono::duration(now - warmupStart).count(); - const float t = std::clamp(elapsed / kWarmupMaxSeconds, 0.0f, 1.0f); - showProgress("Finalizing world sync...", 0.97f + t * 0.025f); - if (elapsed >= kWarmupMaxSeconds) { + // Check if all queues are drained + bool queuesEmpty = + pendingCreatureSpawns_.empty() && + asyncCreatureLoads_.empty() && + asyncNpcCompositeLoads_.empty() && + deferredEquipmentQueue_.empty() && + asyncEquipmentLoads_.empty() && + pendingGameObjectSpawns_.empty() && + asyncGameObjectLoads_.empty() && + pendingPlayerSpawns_.empty(); + + if (queuesEmpty) { + idleIterations++; + } else { + idleIterations = 0; + } + + // Exit when: (min time passed AND queues drained for several iterations) OR hard cap + bool readyToExit = (elapsed >= kMinWarmupSeconds && idleIterations >= kIdleThreshold); + if (readyToExit || elapsed >= kMaxWarmupSeconds) { + if (elapsed >= kMaxWarmupSeconds) { + LOG_WARNING("Warmup hit hard cap (", kMaxWarmupSeconds, "s), entering world with pending work"); + } break; } + + const float t = std::clamp(elapsed / kMaxWarmupSeconds, 0.0f, 1.0f); + showProgress("Finalizing world sync...", 0.97f + t * 0.025f); SDL_Delay(16); } } diff --git a/src/rendering/character_renderer.cpp b/src/rendering/character_renderer.cpp index baaaf3e6..9607f755 100644 --- a/src/rendering/character_renderer.cpp +++ b/src/rendering/character_renderer.cpp @@ -332,6 +332,11 @@ void CharacterRenderer::shutdown() { LOG_INFO("CharacterRenderer::shutdown instances=", instances.size(), " models=", models.size(), " override=", (void*)renderPassOverride_); + // Wait for any in-flight background normal map generation threads + while (pendingNormalMapCount_.load(std::memory_order_relaxed) > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + vkDeviceWaitIdle(vkCtx_->getDevice()); VkDevice device = vkCtx_->getDevice(); VmaAllocator alloc = vkCtx_->getAllocator(); @@ -413,6 +418,16 @@ void CharacterRenderer::clear() { LOG_INFO("CharacterRenderer::clear instances=", instances.size(), " models=", models.size()); + // Wait for any in-flight background normal map generation threads + while (pendingNormalMapCount_.load(std::memory_order_relaxed) > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + // Discard any completed results that haven't been uploaded + { + std::lock_guard lock(normalMapResultsMutex_); + completedNormalMaps_.clear(); + } + vkDeviceWaitIdle(vkCtx_->getDevice()); VkDevice device = vkCtx_->getDevice(); @@ -509,7 +524,32 @@ std::unique_ptr CharacterRenderer::generateNormalHeightMap( const uint8_t* pixels, uint32_t width, uint32_t height, float& outVariance) { if (!vkCtx_ || width == 0 || height == 0) return nullptr; + // Use the CPU-only static method, then upload to GPU + std::vector dummy(width * height * 4); + std::memcpy(dummy.data(), pixels, dummy.size()); + auto result = generateNormalHeightMapCPU("", std::move(dummy), width, height); + outVariance = result.variance; + + auto tex = std::make_unique(); + if (!tex->upload(*vkCtx_, result.pixels.data(), width, height, VK_FORMAT_R8G8B8A8_UNORM, true)) { + return nullptr; + } + tex->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, VK_FILTER_LINEAR, + VK_SAMPLER_ADDRESS_MODE_REPEAT); + return tex; +} + +// Static, thread-safe CPU-only normal map generation (no GPU access) +CharacterRenderer::NormalMapResult CharacterRenderer::generateNormalHeightMapCPU( + std::string cacheKey, std::vector srcPixels, uint32_t width, uint32_t height) { + NormalMapResult result; + result.cacheKey = std::move(cacheKey); + result.width = width; + result.height = height; + result.variance = 0.0f; + const uint32_t totalPixels = width * height; + const uint8_t* pixels = srcPixels.data(); // Step 1: Compute height from luminance std::vector heightMap(totalPixels); @@ -524,7 +564,7 @@ std::unique_ptr CharacterRenderer::generateNormalHeightMap( sumH2 += h * h; } double mean = sumH / totalPixels; - outVariance = static_cast(sumH2 / totalPixels - mean * mean); + result.variance = static_cast(sumH2 / totalPixels - mean * mean); // Step 1.5: Box blur the height map to reduce noise from diffuse textures auto wrapSample = [&](const std::vector& map, int x, int y) -> float { @@ -545,11 +585,9 @@ std::unique_ptr CharacterRenderer::generateNormalHeightMap( } } - // Step 2: Sobel 3x3 → normal map (crisp detail from original, blurred for POM alpha) - // Higher strength than WMO (2.0) because character/weapon textures are hand-painted - // with baked-in lighting that produces low-contrast gradients in the Sobel filter. + // Step 2: Sobel 3x3 → normal map const float strength = 5.0f; - std::vector output(totalPixels * 4); + result.pixels.resize(totalPixels * 4); auto sampleH = [&](int x, int y) -> float { x = ((x % (int)width) + (int)width) % (int)width; @@ -573,20 +611,14 @@ std::unique_ptr CharacterRenderer::generateNormalHeightMap( if (len > 0.0f) { nx /= len; ny /= len; nz /= len; } uint32_t idx = (y * width + x) * 4; - output[idx + 0] = static_cast(std::clamp((nx * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f)); - output[idx + 1] = static_cast(std::clamp((ny * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f)); - output[idx + 2] = static_cast(std::clamp((nz * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f)); - output[idx + 3] = static_cast(std::clamp(blurredHeight[y * width + x] * 255.0f, 0.0f, 255.0f)); + result.pixels[idx + 0] = static_cast(std::clamp((nx * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f)); + result.pixels[idx + 1] = static_cast(std::clamp((ny * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f)); + result.pixels[idx + 2] = static_cast(std::clamp((nz * 0.5f + 0.5f) * 255.0f, 0.0f, 255.0f)); + result.pixels[idx + 3] = static_cast(std::clamp(blurredHeight[y * width + x] * 255.0f, 0.0f, 255.0f)); } } - auto tex = std::make_unique(); - if (!tex->upload(*vkCtx_, output.data(), width, height, VK_FORMAT_R8G8B8A8_UNORM, true)) { - return nullptr; - } - tex->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, VK_FILTER_LINEAR, - VK_SAMPLER_ADDRESS_MODE_REPEAT); - return tex; + return result; } VkTexture* CharacterRenderer::loadTexture(const std::string& path) { @@ -687,15 +719,22 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) { e.hasAlpha = hasAlpha; e.colorKeyBlack = colorKeyBlackHint; - // Defer normal/height map generation to avoid stalling loadModel. - // Normal maps are generated in processPendingNormalMaps() at a per-frame budget. + // Launch normal map generation on background thread — CPU work is pure compute, + // only the GPU upload (in processPendingNormalMaps) needs the main thread (~1-2ms). if (blpImage.width >= 32 && blpImage.height >= 32) { - PendingNormalMap pending; - pending.cacheKey = key; - pending.pixels.assign(blpImage.data.begin(), blpImage.data.end()); - pending.width = blpImage.width; - pending.height = blpImage.height; - pendingNormalMaps_.push_back(std::move(pending)); + uint32_t w = blpImage.width, h = blpImage.height; + std::string ck = key; + std::vector px(blpImage.data.begin(), blpImage.data.end()); + pendingNormalMapCount_.fetch_add(1, std::memory_order_relaxed); + auto* self = this; + std::thread([self, ck = std::move(ck), px = std::move(px), w, h]() mutable { + auto result = generateNormalHeightMapCPU(std::move(ck), std::move(px), w, h); + { + std::lock_guard lock(self->normalMapResultsMutex_); + self->completedNormalMaps_.push_back(std::move(result)); + } + self->pendingNormalMapCount_.fetch_sub(1, std::memory_order_relaxed); + }).detach(); e.normalMapPending = true; } @@ -709,30 +748,39 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) { } void CharacterRenderer::processPendingNormalMaps(int budget) { - if (pendingNormalMaps_.empty() || !vkCtx_) return; + if (!vkCtx_) return; - int processed = 0; - while (!pendingNormalMaps_.empty() && processed < budget) { - auto pending = std::move(pendingNormalMaps_.front()); - pendingNormalMaps_.pop_front(); + // Collect completed results from background threads + std::deque ready; + { + std::lock_guard lock(normalMapResultsMutex_); + if (completedNormalMaps_.empty()) return; + int count = std::min(budget, static_cast(completedNormalMaps_.size())); + for (int i = 0; i < count; i++) { + ready.push_back(std::move(completedNormalMaps_.front())); + completedNormalMaps_.pop_front(); + } + } - auto it = textureCache.find(pending.cacheKey); + // GPU upload only (~1-2ms each) — CPU work already done on background thread + for (auto& result : ready) { + auto it = textureCache.find(result.cacheKey); if (it == textureCache.end()) continue; // texture was evicted - float nhVariance = 0.0f; vkCtx_->beginUploadBatch(); - auto nhMap = generateNormalHeightMap(pending.pixels.data(), - pending.width, pending.height, nhVariance); - vkCtx_->endUploadBatch(); - - if (nhMap) { - it->second.heightMapVariance = nhVariance; - it->second.approxBytes += approxTextureBytesWithMips(pending.width, pending.height); - textureCacheBytes_ += approxTextureBytesWithMips(pending.width, pending.height); - it->second.normalHeightMap = std::move(nhMap); + auto tex = std::make_unique(); + bool ok = tex->upload(*vkCtx_, result.pixels.data(), result.width, result.height, + VK_FORMAT_R8G8B8A8_UNORM, true); + if (ok) { + tex->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, VK_FILTER_LINEAR, + VK_SAMPLER_ADDRESS_MODE_REPEAT); + it->second.heightMapVariance = result.variance; + it->second.approxBytes += approxTextureBytesWithMips(result.width, result.height); + textureCacheBytes_ += approxTextureBytesWithMips(result.width, result.height); + it->second.normalHeightMap = std::move(tex); } + vkCtx_->endUploadBatch(); it->second.normalMapPending = false; - processed++; } } diff --git a/src/rendering/loading_screen.cpp b/src/rendering/loading_screen.cpp index 34ad1aa6..a2e83a2b 100644 --- a/src/rendering/loading_screen.cpp +++ b/src/rendering/loading_screen.cpp @@ -240,6 +240,66 @@ bool LoadingScreen::loadImage(const std::string& path) { return true; } +void LoadingScreen::renderOverlay() { + // Draw loading screen content as ImGui overlay within an existing ImGui frame. + // Caller is responsible for ImGui NewFrame/Render and Vulkan frame management. + ImGuiIO& io = ImGui::GetIO(); + float screenW = io.DisplaySize.x; + float screenH = io.DisplaySize.y; + + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(ImVec2(screenW, screenH)); + ImGui::Begin("##LoadingScreenOverlay", nullptr, + ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | + ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | + ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_NoBackground | + ImGuiWindowFlags_NoBringToFrontOnFocus); + + if (bgDescriptorSet) { + ImGui::GetWindowDrawList()->AddImage( + reinterpret_cast(bgDescriptorSet), + ImVec2(0, 0), ImVec2(screenW, screenH)); + } + + // Progress bar + { + const float barWidthFrac = 0.6f; + const float barHeight = 6.0f; + const float barY = screenH * 0.06f; + float barX = screenW * (0.5f - barWidthFrac * 0.5f); + float barW = screenW * barWidthFrac; + ImDrawList* drawList = ImGui::GetWindowDrawList(); + drawList->AddRectFilled(ImVec2(barX, barY), ImVec2(barX + barW, barY + barHeight), + IM_COL32(25, 25, 25, 200), 2.0f); + if (loadProgress > 0.001f) { + drawList->AddRectFilled(ImVec2(barX, barY), ImVec2(barX + barW * loadProgress, barY + barHeight), + IM_COL32(199, 156, 33, 255), 2.0f); + } + drawList->AddRect(ImVec2(barX - 1, barY - 1), ImVec2(barX + barW + 1, barY + barHeight + 1), + IM_COL32(140, 110, 25, 255), 2.0f); + } + + // Percentage text + { + char pctBuf[32]; + snprintf(pctBuf, sizeof(pctBuf), "%d%%", static_cast(loadProgress * 100.0f)); + float textY = screenH * 0.06f - 20.0f; + ImVec2 pctSize = ImGui::CalcTextSize(pctBuf); + ImGui::SetCursorPos(ImVec2((screenW - pctSize.x) * 0.5f, textY)); + ImGui::TextColored(ImVec4(0.0f, 0.0f, 0.0f, 1.0f), "%s", pctBuf); + } + + // Status text + { + float statusY = screenH * 0.06f + 14.0f; + ImVec2 statusSize = ImGui::CalcTextSize(statusText.c_str()); + ImGui::SetCursorPos(ImVec2((screenW - statusSize.x) * 0.5f, statusY)); + ImGui::TextColored(ImVec4(0.0f, 0.0f, 0.0f, 1.0f), "%s", statusText.c_str()); + } + + ImGui::End(); +} + void LoadingScreen::render() { // If a frame is already in progress (e.g. called from a UI callback), // end it before starting our own From 16c6c2b6a0e673caab2f7308ebe812225a6add25 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 18:43:13 -0800 Subject: [PATCH 03/13] Raise diagnostic log thresholds to reduce log noise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SLOW update stages: 3ms → 50ms, renderer update: 5ms → 50ms, loadModel/processAsync/spawnCreature: 3ms → 100ms, terrain/camera: 3-5ms → 50ms. Remove per-frame spawn breakdown. --- src/core/application.cpp | 29 ++++++----------------------- src/rendering/renderer.cpp | 4 ++-- 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/src/core/application.cpp b/src/core/application.cpp index 300bffc7..c1907a15 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -868,7 +868,7 @@ void Application::update(float deltaTime) { } auto stageEnd = std::chrono::steady_clock::now(); float stageMs = std::chrono::duration(stageEnd - stageStart).count(); - if (stageMs > 3.0f) { + if (stageMs > 50.0f) { LOG_WARNING("SLOW update stage '", stageName, "': ", stageMs, "ms"); } }; @@ -913,30 +913,13 @@ void Application::update(float deltaTime) { inGameStep = "spawn/equipment queues"; updateCheckpoint = "in_game: spawn/equipment queues"; runInGameStage("spawn/equipment queues", [&] { - auto t0 = std::chrono::steady_clock::now(); processPlayerSpawnQueue(); - auto t1 = std::chrono::steady_clock::now(); processCreatureSpawnQueue(); - auto t2 = std::chrono::steady_clock::now(); processAsyncNpcCompositeResults(); - auto t3 = std::chrono::steady_clock::now(); processDeferredEquipmentQueue(); - auto t4 = std::chrono::steady_clock::now(); - // Upload completed normal maps from background threads (~1-2ms each GPU upload) if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) { cr->processPendingNormalMaps(4); } - auto t5 = std::chrono::steady_clock::now(); - float pMs = std::chrono::duration(t1 - t0).count(); - float cMs = std::chrono::duration(t2 - t1).count(); - float nMs = std::chrono::duration(t3 - t2).count(); - float eMs = std::chrono::duration(t4 - t3).count(); - float nmMs = std::chrono::duration(t5 - t4).count(); - float total = pMs + cMs + nMs + eMs + nmMs; - if (total > 4.0f) { - LOG_WARNING("spawn/equip breakdown: player=", pMs, "ms creature=", cMs, - "ms npcComposite=", nMs, "ms equip=", eMs, "ms normalMaps=", nmMs, "ms"); - } }); // Self-heal missing creature visuals: if a nearby UNIT exists in // entity state but has no render instance, queue a spawn retry. @@ -1514,7 +1497,7 @@ void Application::update(float deltaTime) { } float ruMs = std::chrono::duration( std::chrono::steady_clock::now() - rendererUpdateStart).count(); - if (ruMs > 5.0f) { + if (ruMs > 50.0f) { LOG_WARNING("SLOW update stage 'renderer->update': ", ruMs, "ms"); } } @@ -5145,7 +5128,7 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x { auto texEnd = std::chrono::steady_clock::now(); float texMs = std::chrono::duration(texEnd - texStart).count(); - if (texMs > 3.0f) { + if (texMs > 50.0f) { LOG_WARNING("spawnCreature texture setup took ", texMs, "ms displayId=", displayId, " hasPreDec=", hasPreDec, " extra=", dispData.extraDisplayId); } @@ -6854,7 +6837,7 @@ void Application::processAsyncCreatureResults(bool unlimited) { { auto uploadEnd = std::chrono::steady_clock::now(); float uploadMs = std::chrono::duration(uploadEnd - uploadStart).count(); - if (uploadMs > 3.0f) { + if (uploadMs > 100.0f) { LOG_WARNING("charRenderer->loadModel took ", uploadMs, "ms displayId=", result.displayId, " preDecoded=", result.predecodedTextures.size()); } @@ -6968,7 +6951,7 @@ void Application::processCreatureSpawnQueue(bool unlimited) { { auto now = std::chrono::steady_clock::now(); float asyncMs = std::chrono::duration(now - startTime).count(); - if (asyncMs > 3.0f) { + if (asyncMs > 100.0f) { LOG_WARNING("processAsyncCreatureResults took ", asyncMs, "ms"); } } @@ -7265,7 +7248,7 @@ void Application::processCreatureSpawnQueue(bool unlimited) { spawnOnlineCreature(s.guid, s.displayId, s.x, s.y, s.z, s.orientation); auto spawnEnd = std::chrono::steady_clock::now(); float spawnMs = std::chrono::duration(spawnEnd - spawnStart).count(); - if (spawnMs > 3.0f) { + if (spawnMs > 100.0f) { LOG_WARNING("spawnOnlineCreature took ", spawnMs, "ms displayId=", s.displayId); } } diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 55ba1370..d487e05e 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -2434,7 +2434,7 @@ void Renderer::update(float deltaTime) { cameraController->update(deltaTime); auto cameraEnd = std::chrono::steady_clock::now(); lastCameraUpdateMs = std::chrono::duration(cameraEnd - cameraStart).count(); - if (lastCameraUpdateMs > 3.0) { + if (lastCameraUpdateMs > 50.0) { LOG_WARNING("SLOW cameraController->update: ", lastCameraUpdateMs, "ms"); } @@ -2534,7 +2534,7 @@ void Renderer::update(float deltaTime) { terrainManager->update(*camera, deltaTime); float terrMs = std::chrono::duration( std::chrono::steady_clock::now() - terrStart).count(); - if (terrMs > 5.0f) { + if (terrMs > 50.0f) { LOG_WARNING("SLOW terrainManager->update: ", terrMs, "ms"); } } From a4966e486f3426181709742505dd3a6ad8780006 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 22:03:28 -0800 Subject: [PATCH 04/13] Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance - Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection - Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through - Fix WMO doodad quaternion component ordering (X/Y swap) - Linear normal map strength blend in shader for smooth slider control - Enable shadow sampling for interior WMO groups (covered outdoor areas) - Backfill deferred normal/height maps after streaming with descriptor rebind - M2: prepareRender only iterates animated instances, bone dirty flag - M2: remove worker thread VMA allocation, skip unready bone instances - WMO: persistent visibility vectors, sequential culling - Add FSR EASU/RCAS shaders --- assets/shaders/fsr_easu.frag.glsl | 102 +++ assets/shaders/fsr_easu.frag.spv | Bin 0 -> 10292 bytes assets/shaders/fsr_rcas.frag.glsl | 43 + assets/shaders/fsr_rcas.frag.spv | Bin 0 -> 3720 bytes assets/shaders/wmo.frag.glsl | 14 +- assets/shaders/wmo.frag.spv | Bin 21120 -> 12456 bytes include/core/application.hpp | 5 + include/game/game_handler.hpp | 3 + include/rendering/character_renderer.hpp | 2 + include/rendering/m2_renderer.hpp | 4 + include/rendering/renderer.hpp | 73 +- include/rendering/vk_context.hpp | 5 + include/rendering/wmo_renderer.hpp | 7 + include/ui/game_screen.hpp | 4 + src/core/application.cpp | 60 +- src/core/window.cpp | 11 +- src/game/game_handler.cpp | 9 + src/rendering/character_renderer.cpp | 55 ++ src/rendering/m2_renderer.cpp | 107 ++- src/rendering/performance_hud.cpp | 14 + src/rendering/renderer.cpp | 1033 +++++++++++++++++----- src/rendering/terrain_manager.cpp | 2 + src/rendering/vk_context.cpp | 32 +- src/rendering/wmo_renderer.cpp | 189 ++-- src/ui/game_screen.cpp | 45 + 25 files changed, 1467 insertions(+), 352 deletions(-) create mode 100644 assets/shaders/fsr_easu.frag.glsl create mode 100644 assets/shaders/fsr_easu.frag.spv create mode 100644 assets/shaders/fsr_rcas.frag.glsl create mode 100644 assets/shaders/fsr_rcas.frag.spv diff --git a/assets/shaders/fsr_easu.frag.glsl b/assets/shaders/fsr_easu.frag.glsl new file mode 100644 index 00000000..20e5ed32 --- /dev/null +++ b/assets/shaders/fsr_easu.frag.glsl @@ -0,0 +1,102 @@ +#version 450 +// FSR 1.0 EASU (Edge Adaptive Spatial Upsampling) — Fragment Shader +// Based on AMD FidelityFX Super Resolution 1.0 +// Implements edge-adaptive bilinear upsampling with directional filtering + +layout(set = 0, binding = 0) uniform sampler2D uInput; + +layout(push_constant) uniform FSRConstants { + vec4 con0; // inputSize.xy, 1/inputSize.xy + vec4 con1; // inputSize.xy / outputSize.xy, 0.5 * inputSize.xy / outputSize.xy + vec4 con2; // outputSize.xy, 1/outputSize.xy + vec4 con3; // sharpness, 0, 0, 0 +} fsr; + +layout(location = 0) in vec2 TexCoord; +layout(location = 0) out vec4 outColor; + +// Fetch a texel with offset (in input pixels) +vec3 fsrFetch(vec2 p, vec2 off) { + return textureLod(uInput, (p + off + 0.5) * fsr.con0.zw, 0.0).rgb; +} + +void main() { + // Undo the vertex shader Y flip (postprocess.vert flips for Vulkan overlay, + // but we need standard UV coords for texture sampling) + vec2 tc = vec2(TexCoord.x, 1.0 - TexCoord.y); + + // Map output pixel to input space + vec2 pp = tc * fsr.con2.xy; // output pixel position + vec2 ip = pp * fsr.con1.xy - 0.5; // input pixel position (centered) + vec2 fp = floor(ip); + vec2 ff = ip - fp; + + // 12-tap filter: 4x3 grid around the pixel + // b c + // e f g h + // i j k l + // n o + vec3 b = fsrFetch(fp, vec2( 0, -1)); + vec3 c = fsrFetch(fp, vec2( 1, -1)); + vec3 e = fsrFetch(fp, vec2(-1, 0)); + vec3 f = fsrFetch(fp, vec2( 0, 0)); + vec3 g = fsrFetch(fp, vec2( 1, 0)); + vec3 h = fsrFetch(fp, vec2( 2, 0)); + vec3 i = fsrFetch(fp, vec2(-1, 1)); + vec3 j = fsrFetch(fp, vec2( 0, 1)); + vec3 k = fsrFetch(fp, vec2( 1, 1)); + vec3 l = fsrFetch(fp, vec2( 2, 1)); + vec3 n = fsrFetch(fp, vec2( 0, 2)); + vec3 o = fsrFetch(fp, vec2( 1, 2)); + + // Luma (use green channel as good perceptual approximation) + float bL = b.g, cL = c.g, eL = e.g, fL = f.g; + float gL = g.g, hL = h.g, iL = i.g, jL = j.g; + float kL = k.g, lL = l.g, nL = n.g, oL = o.g; + + // Directional edge detection + // Compute gradients in 4 directions (N-S, E-W, NE-SW, NW-SE) + float dc = cL - jL; + float db = bL - kL; + float de = eL - hL; + float di = iL - lL; + + // Length of the edge in each direction + float lenH = abs(eL - fL) + abs(fL - gL) + abs(iL - jL) + abs(jL - kL); + float lenV = abs(bL - fL) + abs(fL - jL) + abs(cL - gL) + abs(gL - kL); + + // Determine dominant edge direction + float dirH = lenV / (lenH + lenV + 1e-7); + float dirV = lenH / (lenH + lenV + 1e-7); + + // Bilinear weights + float w1 = (1.0 - ff.x) * (1.0 - ff.y); + float w2 = ff.x * (1.0 - ff.y); + float w3 = (1.0 - ff.x) * ff.y; + float w4 = ff.x * ff.y; + + // Edge-aware sharpening: boost weights along edges + float sharpness = fsr.con3.x; + float edgeStr = max(abs(lenH - lenV) / (lenH + lenV + 1e-7), 0.0); + float sharp = mix(0.0, sharpness, edgeStr); + + // Sharpen bilinear by pulling toward nearest texel + float maxW = max(max(w1, w2), max(w3, w4)); + w1 = mix(w1, float(w1 == maxW), sharp * 0.25); + w2 = mix(w2, float(w2 == maxW), sharp * 0.25); + w3 = mix(w3, float(w3 == maxW), sharp * 0.25); + w4 = mix(w4, float(w4 == maxW), sharp * 0.25); + + // Normalize + float wSum = w1 + w2 + w3 + w4; + w1 /= wSum; w2 /= wSum; w3 /= wSum; w4 /= wSum; + + // Final color: weighted blend of the 4 nearest texels with edge awareness + vec3 color = f * w1 + g * w2 + j * w3 + k * w4; + + // Optional: blend in some of the surrounding texels for anti-aliasing + float aa = 0.125 * edgeStr; + color = mix(color, (b + c + e + h + i + l + n + o) / 8.0, aa * 0.15); + + outColor = vec4(clamp(color, 0.0, 1.0), 1.0); +} diff --git a/assets/shaders/fsr_easu.frag.spv b/assets/shaders/fsr_easu.frag.spv new file mode 100644 index 0000000000000000000000000000000000000000..5ddc2ea8f71d374f23c73a50e87cc4a107ade85c GIT binary patch literal 10292 zcmZ9R37nNx8OA@jcK}%waY0-MaLWY|+);2u7*rGl758D71qOzhkr@^d6%iE?ao-h1 zQIxVQ&9W>^D=RBoZ7<9Aeb-8}|Nrv6cJ6n7_vJj#^FHT2=R4o|mOGSskL;Uey|Mw> zplnO8tn%40>y473^{KS=Q)f;c)!oxP>WCvpYuG+3ReXAO$oghwT4P~jTZf55h>au% z<5On

HHp_9A*Y>z~z6ZK$6%t$ymHhNiBrhVGtA+glpi+d5hrn%fq&H1)JCY0=j& z@wIg{wJ&Z?4K+B@acdZ95M#7EZj_#huj-GBPYE4BKH+6PY)*&?40qt;ep?R!mV^YmkLmR8+ zeDBnB)5NXYJk!B*2#;?j zZCB%>#)UaQyNZ{xS#XA~c+acKm9q<~cqzLG&Z((*8>({Utg(uhDm9r-#cQ8*4s)w` zDQknPIo}0UxpL;b|KGQib;9T6-bGcpa>gmF@-Bf}+q%oDa^-A!6)$Bg;PZ3u6;-)% zwz7(svQ=>BY2DRTxpH=G6)$CL;O)70ZB?$Et*hdtY(3nI$htRI<;vNuRlJnl4tFi| z-dU9^XLnceQg$DF&Q!gnYy-F{#~Z;dIo2s_@ zX!j=k$i1Hj-wBt#;qlg|pH@wuF$}f? ztsajj{RH2G-X5bq$JjA9sg29IYv|`-xV45>ST?~q><(5d6MKU^ezi2LyC<4+GjCnF z_inXa;I2z8;k+D2z7OHY{$5A4VM!L;^uZLBYM{nf00C~Ym_$NutB6}GMa zVPL%;OVnq3^I16|{A}@xNACQNBRn_3>+-zwnFe+)&N=Fx38voGLpWFFW)L2OdT8f@ z9XHi3&bi+3HG}1;=6%<`YB6RPn9qsP?-Gbw+HWb`@$4JE%X3evU7dSU?RqrprP@t- zU-hVWNA6K`eeR`o4bt^~8-B;u*T%+rj^u>6xApG}b}jYq%&OPH;}|D_S=aHwuc`3t zJ%0B#UKjcM;rHG%Y`k?J0y~EL%6%jEI#oN0^W^z;Z~IYu3#Eouuh-Z?MC>`a?-(hc zP~cMv{IrC-XQn6IdNT{$_l~Hq-y3Cc{l0so{DOq*_uV7SHxzhd!u2;L-1;pAJ~!d| z+Y+vSL4kWCkMZ?)CS3ob0{2}c^)E@de&0tTFJGQ;>#s<-`6~*1Wy1BZO1S>j1%7S9 z^{+{|{mVYz^XN9!{u-hu5)+ z$LCjgjs&Y;O&$9jMR*TapN{4jM-$dEH=etn_c}Fem_L?KbDUb*afHwG;|M+8Q;#Rq zr_sYcC*+#C9}XwGCLeD+*qVAA&wHs_jCUefEygj}N)oR{}>wV2l_V6}m)r2bP0$MJq(&OTFU)uYc;uziA`4%QQ6 zo&naQZhxN>YT-W%tlyl^2)Wqj!wlkB=Hm169AXfm$DE$&wCYjod~nqA`61Wmv*TPM zYF!AnmL79@X3?rgt=ZtH)c}_3YoxuHh+1>N*3x56&qcKAQL7akwS4Z#_06SiCZg6n zu(kA<)6+z&9<|!RQOjqNT;D?4`9##}09#9sIXynp)T356IBNO4lI!cC_5CVpEe2al zk2yV^wCYjoQgGB-2A1nvPP>$dT9<*XrN^9}CA8{M>q>Ccx&kcMx03d9B5GX)ww9ii zgq{_&>QU<&aMW52mg~Eg_G%((T?e+79&>tD(W*zS8^BR(Em*E^9qsi*)VdLDEj{M+ ztf5tpTDO3s*3DqKzFTQ;BBIu9U~B0yr)NE_depiL9JTHQ%k|w&dj}DiRd(%ESLK*y|(pKLS_RzmZlR z{zt)H1F8QpxVrvLwDRyj4tAfX{wLt-`Zv?c!~cG;dpY%g0Isfo3#~l-9|XH!Q~!tH z>iVCgm52YsVE1I|{|H=N|5LQ`@IM1~-=+SK!qxRZO)C%o$G~fd)cCJX z*!`3GKM7aY{~WD6{GS55M^gW%;p+OIr(M>5B?dj#~9D&7r`$O>h|ACs}}yx zg56`mKL_@hqyOi@derUz60KVJzW{a*1^*)0a~S=<1lFT&|Cedi!vAHk=P>wJz^-BR z|0-CIy8U0FRSW;u!0Gs3hr36k|2M#T)P2tOq5US&izpN3qt>^;_6`1Ru=_V^y$05! z9<{y$j#}og;<|Q>?-GtYI=HKLR_3`KbS6u=Rrf z1e~7B*Wr59qyA69QU7&t)c+aSG0eN}uhObT?Vp3wee(-=y6(S(>rs!feg%%P%tx(X zgVS~Y4Ln_!-@^5%N3Gw1qn7!Y_wT`uV?OHt0i0eVe}t#!^-pj;>QVpC;HYmt>i-4o z80Mq?U%}Q3{x@*C?th2tQIGon07rfEQU9M{$1v}@zd@@Owci4(1^*Y=a})gEc|Pj> z2d+mww0FSjZxd_s{rX?9waq#1o3v_C*U6^i^eXV)aOV|cl;C>QLo0*T({XFy);8z3 z??Tn;^5M`~nVEd{&w!c@^qQ+j};@EqmsfTBu+>;)6Et-0aGaPI`b;t2{ zuv#6fzc1~6#9qvQByD=m_s?_o83C5ZemD@E?#=hX-HWj%2f_8I$MttGm~Z77U_NRc z0#0ik3Qza-D7YT=tMWZ_7+5{s*Q4Q%VQwJ5?%Z34gZX9$66W`#^?3ZPZ!YFA7VI$w zKLV_0AioT)b0nB=WqqunUyrr?JAwJ=do0-pg-=QkdhQeE zmxA@EN1tWjwxrLcaE~kcEC=gR_dHnVGO)GGMV%Gk6-k}T;T~Vqxxzf59(Aq+TgzP3 zSqWa1)VT`oaYmh0U_I(l=W4LE%(*T-v}&;q*MbiqV!vM}Ce-z>rd5kMtN~ji`1RoF z`J6odweVTAG5!s3J?hbC9r&Web0b`zdU)1@I}^`MaDD3Gxf#49@!SH}ryibL!D|xF zZE$_+;kg~WKJnZE*QXwyJLQSzF1SAR={!|D_Itp0C*j^yzZXrvdF%NMx)1Ey#=1WM zR=b~wdK7blVkHZ9s}#~6WZonQx5I%T4v=4-S+*kdrK Z-}5#t-vrDDGjLj4+bSgv?|CQFh$HUGeAm ztNh|QKF{Q>W1s1BDs}Iz>gwu#yC)rsw=GF&QMx~^NPnhNod_IKR7fxw5!n^ z-?ew&9y3;^j#i>MtJ0FxiPt7e)tZ4vu)El5^3+)X)Irsh{$^5VT9yWf#s-In2ZxT1 zm8Yi08qJFnm9dFxtui)VovxId)!B-9OS8OctvoR^o+osXk+0j8y3&2D(X7l@CPu55 zD1p+=K2kb4&_6m*uQi&bTC)M~CZg^r*K4hPh33A{o;BtgEAZ3hlGETFX#>98!ky_M z{LJy%)J)S@S8MO5D)R&N`t&%VR(>zugK_R|yoJOE@N)$oz%TUMk%qu!{+Kfit`+zs zxKZG@!E*(E7hGxK&h!Czvg!=yceeVREX^0ZE1gGYZ_VjagO1JVN_BkWTxoi$R%t|U z2In{xd$Olq@3*ecGzaPv^=V)_h8AbLO@R@bXREdv8>F^*-itwc z^(?nyj^Li1L+-uW(qaqTb%U>^igkLa*?|{t0CnSgu#L?39?FgNz?b6HG~>(g#=TQp z_-k)!n)%D|-Y@lo1@~^Kt%t9~_h5^#odsV7b~xWXn`=GSTZeZ&`!&8EZ!hYb$lQoG zZoPa2-^|Ph37F^nO+}KL{mDI3hw+E;&UFpra?e!FeqHM^%;A0>$M;AW#^u{F^R2f7 zZ!PCV{2B5`qw|4E=yJzh?&UtSe-`&Q2uX+AJ#&>{yWD%DSw{hQZ9`_CB+;^OF z-*V2&ZTx&2ueWjEaLjkVzT=$VE_gquL9TWIPIzil6< z?b#Z0p7(Dv=G~J!&v)uwJIz|weH1fCeVDJ~o$tZaHHWqRrq=gc^$#e4Q_q+ zvIGAFW>4EO=Xlqi#MG}8{Ha14VU9iS#JvCN*9z@vu(gdv@6Ui;-&pj%3#_g=^4@#l z#-sN=;JkPL(d4~92XF8Fc{KItbuZXn)uY#aU~3tRUiX7t&sg;O0$5#h+4|GGZwwR0an)>d9VI<7?1n< zCfGZk-`BU`>fV2UTjY^H0`A4~{1b3>^Zo6SNB$_-Z=2_zf~%W<5-*SZcfg)!o_`vy zZoa==^2mP=?4I)c_u=a1``aduJMtme9)o`bwuj}s#BTh@nCIpH9CM9F?ikqhgP#Fg zKjxOeG1uHPS?rT6<{IYOpTD)nqUJa_@2>*S`}-8`8vdS`Ydq$j1?T;pgXjHK;W5`- zs7uNj= z^Zdg?{~68m5A(a3{|n}K+lg7r`R1M|zR|B>a~z@l299~5{a$E`@guzBW%xfZ=bLZb PwcW#Ays>{-`2hAGj+gx9 literal 0 HcmV?d00001 diff --git a/assets/shaders/wmo.frag.glsl b/assets/shaders/wmo.frag.glsl index c04e1a93..a4bae057 100644 --- a/assets/shaders/wmo.frag.glsl +++ b/assets/shaders/wmo.frag.glsl @@ -149,21 +149,21 @@ void main() { vec3 norm = vertexNormal; if (enableNormalMap != 0 && lodFactor < 0.99 && normalMapStrength > 0.001) { vec3 mapNormal = texture(uNormalHeightMap, finalUV).rgb * 2.0 - 1.0; - // Scale XY by strength to control effect intensity - mapNormal.xy *= normalMapStrength; mapNormal = normalize(mapNormal); vec3 worldNormal = normalize(TBN * mapNormal); if (!gl_FrontFacing) worldNormal = -worldNormal; - // Blend: strength + LOD both contribute to fade toward vertex normal - float blendFactor = max(lodFactor, 1.0 - normalMapStrength); - norm = normalize(mix(worldNormal, vertexNormal, blendFactor)); + // Linear blend: strength controls how much normal map detail shows, + // LOD fades out at distance. Both multiply for smooth falloff. + float blend = clamp(normalMapStrength, 0.0, 1.0) * (1.0 - lodFactor); + norm = normalize(mix(vertexNormal, worldNormal, blend)); } vec3 result; - // Sample shadow map — skip for interior WMO groups (no sun indoors) + // Sample shadow map for all WMO groups (interior groups with 0x2000 flag + // include covered outdoor areas like archways/streets that should receive shadows) float shadow = 1.0; - if (shadowParams.x > 0.5 && isInterior == 0) { + if (shadowParams.x > 0.5) { vec3 ldir = normalize(-lightDir.xyz); float normalOffset = SHADOW_TEXEL * 2.0 * (1.0 - abs(dot(norm, ldir))); vec3 biasedPos = FragPos + norm * normalOffset; diff --git a/assets/shaders/wmo.frag.spv b/assets/shaders/wmo.frag.spv index 2453f0ff763aa7053f9ce3d164e1646cb97d2d03..524dbd1ea425698858608248b93668dbc5ee5e21 100644 GIT binary patch literal 12456 zcmaKy3ACQm6^4I8Lc|b65>_Mq`P_ za*gF1XD-#Kp6-n$VH$9qt9I4C8~G6ETe9E%*jWTUYiU00&N{tVn03(~(2g1NDMW9i0EXnh8BG);~W6=&c0*+;)+ zYMj~0b*pjqkz1+8EnT=(Yh35T^{R2^Y2P(#oa4#$t8vQ~ZeWe;TDbLV+;W8*Qsd0k zcoujp^|1!d&H9{1+aGcM(bul&kABClu08!-3+tQyu8TQzN46*aU=2Lh!@5Q{AD=pB zEjnu0(a)~+e(LsXMbc01yZy|q-cQ|rZAkjb{kLC7UY+3ddmM>h*7n63m$hA3YOZcPB~S1g z_*)}u*N^zt#z(Fvx_jRKj<4?dQg^Hk(8bB82YO%heu(k4tM^C#%@ci4v9C&$!Nt!w zTOy4{+X&;dfcmNO*iXF`aa{8pN9!2oGqUL3JsH<{6Of+sU)nr*PDJne>h&X|d%;~j ztM^vXPeyMv+FJT>6MAONy)MT5K3zUGbNeZ}c^bpq)IIldj%AF~5o@jg8MNBuW1j|g zk42vic1`r3L#w}ICih#wjYey$YyL1uo3*Gv(OvTe81ysVBgIM1eAOQfA`Iut7pbL(kK3VR{G;D#+w8tRI@hf z-l1|ssG#d|B)W5sB+d+Yo-vunW$51Pu`fjD{%G!rx6u2ZHMy#r^L(f3-_{!c#pqYh zn$^5tzDA!kX;Ra_GeyGQ)R(FB-c5a3i`ZywN#0%2^^tcSGtNrr&po?yvtJ+dMx$-( z*oRhn>*(m4pwD>Xi{?B=SN)%D<~as^`0x{|dTtBmuUSuZ-$8Q8>og8xeP0csYTnDI z!>7jaJp;};M3U1q4|AjS@#xdhM_lyws2>u`vQ}nV$NdV%br}WgIQv0*_rFl|{n6!K zEc$^(zl>h4(;?{kDB&i6^;5cWX7rl~cHgXD`smY*TUgtqV%r!z0hy@4nUi~LH9Gb{ zx|KcG6Kt$)i%mc8yVW7R>*f}H4Y1so%ycbUpT*eZ*F@x;zxd~6tiE95{}VoT+b@`h zxwKF`?bb|R`;JEQy)yt@eUs9fljn0FSpHUg%yAvCoOix?tq1loFKz20a^@u-&79|8 z9}4zfoege9jBo9R!D&0ItjES+Iq%$Xn-tD_cQ^c8=S{)-YMWZP&A_g;Hs_x_Y@NUT zc4YqgSQGutG3&Gi*t+%Wh@5p68{0Y`05^Rm=f5&*B^q&BB&mTs6bKxd}?W4_oF^Sg4e6{@ukuzWMICyJxgcvb@-(^Q5 z?i1f}=HE_?W5Dvea^Bp-Q^0bHkB#?2*5^0~-+|MKssHg{`^aa_P5|>)t(o?j#rG7j zIcawu-!M;~ufY$&{S4hX*zQgr_i!87e&%3&xv6ka%%H=b+D(Vo=J%%Y&j$NgCv9gU za*iQ(eat~!K6lQ!VB>FqB%b$f_SkvY-3NYKW{;f@Cw~rkd$unC%S{7&&&gegZd~_( zc5PG0%{k3L*B*T)*#61sQm}o86<_V6bqDgRP5_`OF8~Pn)^>JK#aEzXA3p_jcNc(dE2Ha|RZG<@sx_=OZZAUYq;) zQCj!0ZSLc(%YE$Leja(X*xV0euo*+!pT1)ojjazp+Fq&I+-I%Wyf?MIR%~~XyL-AF zUAw|M-0jc5H3&`#V^E0x@$Be1fi@w(S4U!SYJZ%s;@M8SgOXXzu#uK3Y`le(QOM zcfr|vA~6?(^-&V@Yj9%j2X+kotnW9pu7PdN#8}4F?@YwLuOs^A{b?KD?eVR?Y2fT< zoYAb4-*Z#pyoWpsc?V2`lh1d?bg*2$GtLJ4RPQ73S;#LCa}?)JI2U{llC?e`EN4FE zY8?C6x30^Zdpl$7gs=UK>u*whllw(rb9Wym_Ze{V$^BxmTynnz>|^fQW+HOtAbl_rc9z`Ft17 z1_f5X4^}9=omU>sTdKH*j&JS?`mNCSu4*a|9ium`k7MZ53z2gSadKM|Y|iH981b>q z7;Axz;kf$S-!b~q`Z$I@eGoav5N96!!H$u6#K$&c3;-L$arL*qW2{4)G4vUT$T@~M zbzcwcTJqPN+xjT-nOpkVX52wwPbB(?V*m3haV^y#|^0|+CfaSL& zPVS@C&>crx*0VQQ-Y4AZg)@h&=NfSOYI8mJE$^PS(4C8RdB<{Ht)q3--UeqatzYU@ zUkmG_?%G=?+w^hWoqK+Rx_9((U9*P=fnC4ph;g#_ZL`LQVRP?$p7ghWz7MvC^F8MG z!6US;k#pDqUR!?e?FhCe{vI@zG4*lGd>7be%*ka;zfgbGs1i+_YzI7ZqRUrro*4c5$(-4yWG?#Cgv_p9ywP zPD7lBcfloa^0{*^1sg*?^|~Bvz5Ms#)aweceCl;2SWfXty<(pQ|9$%`V_bvXF{aTs zV_XX-pBUGH2oW3`Z!nn z=$Bl71FoOR+tB3{pX8b|c{{wEYpAbhGH32iu;c0D{W6i(^JVKD?0!3j+&qWg3;KKZ z>UWWMQhblb*E`EQ&VI(J->crQ@jVK=_wO!<{fv_}x*Po6b8`=zeAeh*u$aj%?I0W45B^nvj@SBKO4UOK6?mWTfW;CfaQ#nzK?;^xBgrCad>U% z+YYv`eEz%Y6JTR%%RBS8U~}}}9khD}eRrl-zk_qU@ksW_lW_8>)l*8TNhj0-Su_M=yfq@;-VQ?Bn}L z+e?U?dr_Rb>ou_Xc+a_Bxx3x~dv|Hi+P+zQp&Qy$_qV~;PkZjNcZ;ugnf7-oTgU&8 z@LsWbhTj6~yAVnJ-v?X&g-GiE0i1m5{~=gT@o^5qynjAI(I@l#7@T=(i{Gch z{=Ylf-z$UVBZ;*NTw*x~W9vJ$ygz$_tzY()_ew7~`?v?Sr`~IVt-tovd#&PYy|vft z-M84Rw|;$))NXCCwVQ@5d#XR2d}=oUET{OGQ@wTr;q`HxtkXJRd25q7t_QXb+LHG$ zuKm`$ypn?SyY={*bsGwLtr6~X3Z@zgq<39f9+4J1@yyx6=&pr3fote$TxDEH1 zP!t;!n--fF3&s`YwNB6#!1LDD{m>=FoLZ`s>fYpmB^=6nA3pC^ji(&sjWs-n`j!j$gcVU|{jk z@OiD~;#N;zbMdmC!RFH8o|R4gHmdpc^et_TEUPF=A{7}$Gd_rc{D-3K`gamE!BX-8IyJBvx+ zl|9XKXY~v&Tu|COiydiO8KC4kid|@j%FNp_rl7AXZR3jlY0s;$F_vL4w(GF4x#~El z0E8J=yi_~RoviJd;Nj+~W@}cnZ>VS35bzyE5AC95{llk}dS@|A+vr=~YPP3b`Z#uP zW0k|9cb4OyK6l>S#&C16r_mz%_>$AUqA^4o!y|*NU3iweXtV}aG!{08j6c4L;a(W& zYxT6bb_{o0&(KMIhVCEKccU`4Ydkd2TsqQf3~J3JGDc_F@ARHN)*DE_31z?W;AUTA zNvnBE|6p&U#WIcN<(_QY=Fgj3_G#y^5qO}#cfry|i&?CXwQ-xDODW3T#a0>3Zxir} zX3z2!!;G+~L0$Tm+Bx}e3hqk=3x)@qeanYel=gN#Hv{(!&1tM`wAb!6=w7bTYTDkO zzR~($irzQUJDP-RvKFq@IIlT4t2tm`_wbc)OGgG7Yj{r0gl$}L6}I_^eXpfuedl#| z4>es2$Gm}dS+h0VIBiksb2EAs3r3%fid)um<*4P%TO(!79RGIgBki)En9@MecU*B7 zt$WY09-$o=Y_2r0`89BxUoyMM6RChhkW+_`1ob{r3|!RF9Ni%A$~ zJbHIyS+;V?Z3Xw=_uKel8@QFtL2i$BvV_IAqdeEbcn^8TEpIJ8VX(h%nA@YLZ+Y8q zvg7phHCkn@I*Pr~hkJUfI&>EMqR(#*o-o*GbMAjRFV6#8_pzMxz+nH`o`3zEuE8YFeYrgG#y;tDpa~$@z?}v7L@0Io$Hfka|=T|jv@e$yK_*rY89Sb>l z$fxu>8hvJI>nxusGkbHUMP z>TK*NpHVExa_8t?8D8&Koy{G^Dmc&i-YUmekI~QnFTJx^jm{<-t&wprgBuw=5!-Wp zFPvuqVau_vL+@=2WUG~&&&YF4xMGcUFB}J1U-dYG=_nqATmP)+C>};{@%(JZ9$!3$ z?pTf1=?qDb@9kg86R|ya-xC&b@1o%j84LMpbs{?TinOx9qIZ#1vfO(H=B1Hk6D=U#dB~=ScaK{-rRl7 zA*$;fI=G>iw;HPGoVMsyyoGkxXGggDymv!iKkv@sfJ&b^*xzvD8^?FD{*lpg7qf}u zyTh&ssT3#TQ@$L4_dvYDu{}!8GbNmT^z$4D#}FmwGcBBb^xLw=c?QXCTjM-?n zKIiIob?xcz`?2*+f1e}fuoZG5@$)RO9_AC>e7x$MwP@F{-Orfye(Lt?LefueY(I0W z_fxmuY9#&S*7s}YH4aX{JCOKgZSSjbS=)zdT-Nq4H7;vwaS|_UyJd~b+D?XZeez6n zJ$$C@PE4=pQ`%~&`)slP=BeFp6m@e{_j^U%OWiZucr>H>6pdWkXHb~>cwyV)L~BcC_}FSKq$U^_ha_4wX$m<7wXyKkb?KcIdmI zPewK<+l~?&S6+YH-I3?GqVI#Q|F&eaFKyx;4K9k+(~LVEG+C8PyLuO5{_&p&=FXX6 z{tH3vm_>0^rF#a-#eXSyhnwy{BKk7)O&<90QPFudE3Ue7(y`IY2U@Xu`mTI^dUiOM zx>$?kVvO?<*G~TnDx18y*j|LV*P~wzc5mo^4Xv>pGr4~P zToheh?!7OAv{{S#72UmeD+c|HcU$EoXTIvUSDbrk9oTruXFB++h} z*yEV!Q_-)ubL5!h@Ah6_W+IneMzYSd~x&IO+ zd&xQcsqJ6RbvN{9(4CLDKU-nfHsejh*uV9-(b`N$-{GcHk8tkZW1j6^@js)~@0ecJ z3QVZcTC~tT`{nkhqORKjx^s>s&RTe`nVH8`=su5Pe;A$fW^~Uyg1+0O=ahAGo_{R+ zukMP!MY!t9D@V_x4bZ#0yGQ*uYuiWlElPdTsQx_kqUf5QythQxN8WYJINPJ&d(WKF zetV)9#p>f?pIYi&Gol}fe#Mbz}59Yr66Mo{YcxgEig$ z57%_BjTBux|Ieu&(=E z7+rtA3+uYyg>~KU!n*D^VRZ9-u%;XDp)tDOgmr(v3G2GwgwgH))EM1w!Pp(oZ@{|l zH(*`&`!Blj{Qj%!e*dXY<9_hW*qZg(n)X=k9`6Vgdxx2yU`~t9-IcwVskuzuU z3~F-?_7{TpN3I5UA;!192f%4FAI}!`*j@ydyOgo@JqRr4yFq*o2G@NKL6`G=VLttw z^Pyn-YO|l*VPNOKrt&!)ET?a3BWE2mA6xso_D6z!2g%wW1?Hz*du_(>Y?M!*W5r1N zOb7E*_R(e^&rkXEnE`gKwRO>|$96o}F`}OUwtw{5VEdd})$c^GeZ0b*RB_VbUR-h9 zTBF>_aK`tF-<*n*4mY>r>U~dvGZ(M;&8s-+aPuq9zVoYl>Qp%MHc#^k{}Qk{nS*17 zUjTMI;~P(YChbDRSoYJ$dd{Mi&;DNocK@G7YYpB1r-S9Q|6dB0%l>~E*vtK|?F>ZD z{Vz85HP~McHujmc#&$oy0xW+EKHgi4!E$FI?z_aXeUej2pI3q%$Gxb(Vlk6ZVB?SidzOYhBoitZdxz%*VaVj%wKG5>$d_a>s9GJ6=xlngVmjDi?$c>UiTf@ z{_BX}2bQ15SC#iqKUhxjvT@z@*ZwL9-=Qxirurb*KJvMLhQR!k_m6gS@%=$Qd5?h2 zFL76b^?6$r_gt_(^7bF5mDAsI;yiH9%T-|Y#Ci=_fBU_fR$ZHGdjZ(B&~`qp`ZU(i zn6CxzL~A_zY1eNutv1&_YhauGxd!YWa_>2g`|~0=`Rvb&!E)K3uLFCzzS=HD-iWxq zV&nK7dO7&AN;k*XgU#o3>a-614Pbrb&GiymIdgRk;~6)$H;u9B<5=s6^=7dB^<7P? zo;kh+>>T|aM&@`0oP6f^RMzR>7D5Exo6%5 zmRpNBx5Tl{J@{_0<2Wb%9m9L+N?I@T(C0mfoOy_!EjB2zoa4K%-v{0Xu@>*8RZlJ6 z54IM$k6iB$z{#f;9|X%~PdKL!p}Y4or>ntoOOTuc*MRM-&D=ji>t*iRK8(njyV#sO z$F2pN(?@Aj!;gXGbB=u+EO!=S4HL&UHT(qFajb>@j$t03qV+NleLjiEnTOah-LIbp z`#TeL_v>fSCnMRfp9Sk9pEKukV0p#M#xv(F%+0m90Yabakvvas1h2#v$$9j7c>T4z zcGuC$rA}V}kFC=e;pDUSH-YUpuR0fQ2Fod4Hs>(=P{dYPL(a_O@T*gm(_`fQ7CAE>gAK62@^9XR7S zNB7tEaPqg4pZEF>V7Xnv-oKN;Uf#djCL(g)zv9H+5j-~bPH^(KRajOiF7-$p+vKqi*m2BFf5$M7{b-YiKKmka z<{?fV`-9D+3+L41h3N9B#{pou)FW|hlgEp|j$>~6JBE22MC)Ze^f?fbGY@g{I2b%O zkEw9-smCE;xzr4^8PziZZ(9A<*8Mb4Guz;cS0F;cf#aAWIs zJe+*i^8~PCB;VO!edKeVoCtOtuW%<-oa5!Jd@-E9+KglVC)3JLtDb*z!2VtH3|eja z_}(J#{gk@fCdav8eezy&3cAlE-)lU>;x`Xoo9|biPxHZ;%l9kSz&QG3Ozn=DcM4l; z=ey0R*nGFyy^8O<%}e0q^PRTNc*0Vki_&jQOO_g8|w%w5}Jq=^_? zoZK7WdhScm z?B!gv^&qPdV~aDFK5#FRb?67nDPEb2_o(;#tKj9lNA(>*oWnV^+7fpVT(8j(x}4&b zxcVD+7~UA359%Xe`^cw`E5Y?To{O%ZeCjA?9i6{(b# z*cXDeWlooZozpo;YX3So`ON7uu$M@f0C*X~vUvm8<*t#ayPl3ng zdL5j6a{V+|PN|QRTt5S^kFnQOd-Ahj$JZ9W&w=HwU;M5ITVrjm`661m%=rdzFOpnt z1j{M)TylgU|qmHtXv&pq4vv+*?a)beKV z*fZlxaPrCj7O!Uxu%r8MmU_M?T;Aw}I_96VYz&H_^)HJ?1N5bGaIy`g_bB z@Y=E;)`9JtceFdf`p75NSHb@)*4N;*#qaAiKff6>#$E8*GRECt`^x*x>sjy(uw!e> zJJvVB&U3%Y?pfozmh-(GoA%WHTVQiMtr~Y7e&2?ZPwnpk%PC&Q7N++1qPRbj_jkaK zlXuYXg5@IlUE6(d#`1Sfgp#>* zgXOaK{s49l$>+KCDA+hYx3vEe$usFO@FR#mYpT8aC-B3FHpl!Otz7E*IM}+@--rGJ zC!f0h6)dNCIleIOLw`fjCpG*#SRZSs&2j%sE1w!Z3ASd5@eij>0{sL@Y9|gJHd-;vBsn8lV{NcbbaLQ-$5&vHQX5N8eUb! zSjUi?z{zI~HwDWnUgn1T=op*B>*L=Q<_y^i?B5JVa)xXTm$})`zb(|)-29s+?U~y) zVDpW>Em)t_Z9A}gGJALjbUFVnF?mb`%SV#OB)H_^IL6jDHPW7OcLKW)qVJ6E{_{TZ zdAkd`KJwYeyMm4HKGtqr|3*tbaVCT7XTcP7eX`$n2kRrB{k8|#xb8RY_5Jqz%I1F4 zZ%@Q={aY|?soP#)x$J=#fZYQZ(NCXs#)p&79@qygr+AqIB^ccU`@-vE?Cib$!1As~ z=Da`HHBIea2$s_~wU>+ki@^079f)49(Lw0?$frgJgHt2z^%@;g*{qR%{*9{R`uB0# zQlmq`a;ec_;ITD298NwpIsz=Gc#W;mGOs( z>#xnTK;683u1*J!eXe%F$>-mfW`O0MAz^=)cO2Nu=c=}u$O(wgRdH%G3+(-ubL)6? z=j^%Vd?ujiV_rV*wfjzKd}A3iu}%U%REzau^u(GCJ`vF;v9#A?8B;yaf7{e&F1Gp^ zc?!Du64IM_105_P@2f1kV2YYd5wz`nSs3{>?0oK>V9!&w77v z=jDHMa5#ScUAh1N0kMDQxdmcv{O`K*x6HeMCsg`m+9_l7{VUyZ^>x3C_1hh>E_=`# z*Y+Q*>+@;%LcFxUfL6P$b@TnpGsYMSlMhH z_eHb^B3_Pr5UqV}Gw#7)V`SW^V7ZK|?jAFieZ}_Oi1rY~dS>kp1zXenuJWsHe~OPo;ErAXRo_2GoMqy&L`ht^TBfSkpIv}y?&qix6xkyw{YgN4XyX%cC-tS2{rwt zHQlv5v(g>g^%ZU@Z0co&nbPzRLGyU^#sqM?JnT2irgTD=Izj4`-q4 zBOjl|VBQ216?2aJWrQ^-Cy4O#?;4roToNpxks9aT%HHrVEcJ*FGZFi`dDvm zskb%S3Gvzi@hr5?6Dzza?W77@-yIRxWM|r(kv-t7i9W+MeKL5}7=85^{nDCl|FvWE zD{H#BT~*WdzqY1pzhR7ib4}O(mYO~pd{<4k|06YB|3}B@kB`xxs_FK(@b!9(uk@TZ zXX9c`yO6BkIbivGcecQ;Uk9SiHMh?4>C+3A%irYngXQ`V-z8o}>*e!8+W;cx`iZj- z2f?nldqb>Uzl<{icAV_3bHV1eAEM3iRwCx;+G}^N7tp>Ic@1(tVm!~I3lVw!-5ct$ ztpUqjR-IiJgXJzlyeI5`3F7$sBHFbXYc;L5#CjcAF3*h1z;dx)4tA{AUk{ekKA!ds zi1W$3-w1Zx0}x~BuTNh!uQ!49F&Fo=dd7M)cr}u--U613{R*&sVt*@GF1fr7?7f%Y zF}xjY&E=EdJHW}$_{PvL`MndYPx4dG`0oPO$A34vT+ZzGfE`cX{%dLF#NMB~BF5T; z_PvPvJJ0?1Rh+z+Ic8q(2epFoUpEv=kmn%k!k<0gme z!20;R6ZhAr5xLCoGZiPFKA#2KCqAD8%PFqA<6MteQ}0K|G>(3mm-ggzBRDyEFMS@l z6v@8(Ld9jgFQUsSsrgM{<7jh?8))S+#+SeoE8ESq>e_Cl{R(n>rJMU5h(7rn+dIMf z+=1lX>1$y5aY*+6*TL>bZRT(bt(^ES+Pe|oN4`Ppd1mW7@V98cjd*FlhgQ3-Ia-r@ z5o27>e%hDzyA}5xTF+ATZz9IBuQ+}01KT(H{b1|+*(%oe!1~DRGoJSQi1$$RAAp@t z{zm18V14ATt>XL$Y%Fb_^*^TdGCyq(Aadp>cCN`+-m$FzPY~BH`cJ{GhjrHeGek~5 zYpYF8f7kQpNPRtjfv!*1^Ou#6eAe?JuzYcr@YfXTygu*W)I2L_mK5Voqi8?p3(mRwvOgw-TsK^ zBX7J%DlYSR4Ba)3{wJ_GXMg`0tdG3?AElMk-@5+=sn`9l=#G)P|E=3xmao_S z33ThOEx9}iuIKU(bp7Px^G~qrk^27&te?F3K29qa|EIyuA^J08^k>oSlllJ}tdD%g z_zzfqDU#>Xb6{g@bKIwBf-sl$@`fV1CNyzVVEq zpW|uJo_HI9d+^OOd1G{b%J=X-T7NI2k9~bF)Sg^70Z+sy`ljf1PVCLV_DQargXK!8 Wo&{T=^Hb((JY(qRyPY=Q`~MGgd{}q@ diff --git a/include/core/application.hpp b/include/core/application.hpp index 165d11bb..4d10acc7 100644 --- a/include/core/application.hpp +++ b/include/core/application.hpp @@ -236,6 +236,11 @@ private: std::optional pendingWorldEntry_; // Deferred world entry during loading float taxiLandingClampTimer_ = 0.0f; float worldEntryMovementGraceTimer_ = 0.0f; + + // Hearth teleport: freeze player until terrain loads at destination + bool hearthTeleportPending_ = false; + glm::vec3 hearthTeleportPos_{0.0f}; // render coords + float hearthTeleportTimer_ = 0.0f; // timeout safety float facingSendCooldown_ = 0.0f; // Rate-limits MSG_MOVE_SET_FACING float lastSentCanonicalYaw_ = 1000.0f; // Sentinel — triggers first send float taxiStreamCooldown_ = 0.0f; diff --git a/include/game/game_handler.hpp b/include/game/game_handler.hpp index 8a3ee441..3af2f59a 100644 --- a/include/game/game_handler.hpp +++ b/include/game/game_handler.hpp @@ -565,6 +565,8 @@ public: void unstuck(); void setUnstuckGyCallback(UnstuckCallback cb) { unstuckGyCallback_ = std::move(cb); } void unstuckGy(); + void setUnstuckHearthCallback(UnstuckCallback cb) { unstuckHearthCallback_ = std::move(cb); } + void unstuckHearth(); using BindPointCallback = std::function; void setBindPointCallback(BindPointCallback cb) { bindPointCallback_ = std::move(cb); } @@ -1445,6 +1447,7 @@ private: WorldEntryCallback worldEntryCallback_; UnstuckCallback unstuckCallback_; UnstuckCallback unstuckGyCallback_; + UnstuckCallback unstuckHearthCallback_; BindPointCallback bindPointCallback_; CreatureSpawnCallback creatureSpawnCallback_; CreatureDespawnCallback creatureDespawnCallback_; diff --git a/include/rendering/character_renderer.hpp b/include/rendering/character_renderer.hpp index c4676008..7a01c0d7 100644 --- a/include/rendering/character_renderer.hpp +++ b/include/rendering/character_renderer.hpp @@ -66,6 +66,8 @@ public: void update(float deltaTime, const glm::vec3& cameraPos = glm::vec3(0.0f)); + /** Pre-allocate GPU resources (bone SSBOs, descriptors) on main thread before parallel render. */ + void prepareRender(uint32_t frameIndex); void render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera); void recreatePipelines(); bool initializeShadow(VkRenderPass shadowRenderPass); diff --git a/include/rendering/m2_renderer.hpp b/include/rendering/m2_renderer.hpp index 1c35e34b..4b26214f 100644 --- a/include/rendering/m2_renderer.hpp +++ b/include/rendering/m2_renderer.hpp @@ -122,6 +122,7 @@ struct M2ModelGPU { bool isKoboldFlame = false; // Model name matches kobold+(candle/torch/mine) (precomputed) bool isLavaModel = false; // Model name contains lava/molten/magma (UV scroll fallback) bool hasTextureAnimation = false; // True if any batch has UV animation + uint8_t availableLODs = 0; // Bitmask: bit N set if any batch has submeshLevel==N // Particle emitter data (kept from M2Model) std::vector particleEmitters; @@ -193,6 +194,7 @@ struct M2Instance { // Frame-skip optimization (update distant animations less frequently) uint8_t frameSkipCounter = 0; + bool bonesDirty = false; // Set when bones recomputed, cleared after upload // Per-instance bone SSBO (double-buffered) ::VkBuffer boneBuffer[2] = {}; @@ -265,6 +267,8 @@ public: /** * Render all visible instances (Vulkan) */ + /** Pre-allocate GPU resources (bone SSBOs, descriptors) on main thread before parallel render. */ + void prepareRender(uint32_t frameIndex, const Camera& camera); void render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera); /** diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index ab14021c..c7582eea 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -4,10 +4,12 @@ #include #include #include +#include #include #include #include #include "rendering/vk_frame_data.hpp" +#include "rendering/vk_utils.hpp" #include "rendering/sky_system.hpp" namespace wowee { @@ -259,6 +261,14 @@ public: float getShadowDistance() const { return shadowDistance_; } void setMsaaSamples(VkSampleCountFlagBits samples); + // FSR 1.0 (FidelityFX Super Resolution) upscaling + void setFSREnabled(bool enabled); + bool isFSREnabled() const { return fsr_.enabled; } + void setFSRQuality(float scaleFactor); // 0.50=Perf, 0.59=Balanced, 0.67=Quality, 0.77=UltraQuality + void setFSRSharpness(float sharpness); // 0.0 - 2.0 + float getFSRScaleFactor() const { return fsr_.scaleFactor; } + float getFSRSharpness() const { return fsr_.sharpness; } + void setWaterRefractionEnabled(bool enabled); bool isWaterRefractionEnabled() const; @@ -312,7 +322,7 @@ private: VmaAllocation selCircleIdxAlloc = VK_NULL_HANDLE; int selCircleVertCount = 0; void initSelectionCircle(); - void renderSelectionCircle(const glm::mat4& view, const glm::mat4& projection); + void renderSelectionCircle(const glm::mat4& view, const glm::mat4& projection, VkCommandBuffer overrideCmd = VK_NULL_HANDLE); glm::vec3 selCirclePos{0.0f}; glm::vec3 selCircleColor{1.0f, 0.0f, 0.0f}; float selCircleRadius = 1.5f; @@ -322,7 +332,36 @@ private: VkPipeline overlayPipeline = VK_NULL_HANDLE; VkPipelineLayout overlayPipelineLayout = VK_NULL_HANDLE; void initOverlayPipeline(); - void renderOverlay(const glm::vec4& color); + void renderOverlay(const glm::vec4& color, VkCommandBuffer overrideCmd = VK_NULL_HANDLE); + + // FSR 1.0 upscaling state + struct FSRState { + bool enabled = false; + bool needsRecreate = false; + float scaleFactor = 0.77f; // Ultra Quality default + float sharpness = 0.5f; + uint32_t internalWidth = 0; + uint32_t internalHeight = 0; + + // Off-screen scene target (reduced resolution) + AllocatedImage sceneColor{}; // 1x color (non-MSAA render target / MSAA resolve target) + AllocatedImage sceneDepth{}; // Depth (matches current MSAA sample count) + AllocatedImage sceneMsaaColor{}; // MSAA color target (only when MSAA > 1x) + AllocatedImage sceneDepthResolve{}; // Depth resolve (only when MSAA + depth resolve) + VkFramebuffer sceneFramebuffer = VK_NULL_HANDLE; + VkSampler sceneSampler = VK_NULL_HANDLE; + + // Upscale pipeline + VkPipeline pipeline = VK_NULL_HANDLE; + VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; + VkDescriptorSetLayout descSetLayout = VK_NULL_HANDLE; + VkDescriptorPool descPool = VK_NULL_HANDLE; + VkDescriptorSet descSet = VK_NULL_HANDLE; + }; + FSRState fsr_; + bool initFSRResources(); + void destroyFSRResources(); + void renderFSRUpscale(); // Footstep event tracking (animation-driven) uint32_t footstepLastAnimationId = 0; @@ -411,6 +450,36 @@ private: void setupWater1xPass(); void renderReflectionPass(); + // ── Multithreaded secondary command buffer recording ── + // Indices into secondaryCmds_ arrays + static constexpr uint32_t SEC_SKY = 0; // sky (main thread) + static constexpr uint32_t SEC_TERRAIN = 1; // terrain (worker 0) + static constexpr uint32_t SEC_WMO = 2; // WMO (worker 1) + static constexpr uint32_t SEC_CHARS = 3; // selection circle + characters (main thread) + static constexpr uint32_t SEC_M2 = 4; // M2 + particles + glow (worker 2) + static constexpr uint32_t SEC_POST = 5; // water + weather + effects (main thread) + static constexpr uint32_t SEC_IMGUI = 6; // ImGui (main thread, non-FSR only) + static constexpr uint32_t NUM_SECONDARIES = 7; + static constexpr uint32_t NUM_WORKERS = 3; // terrain, WMO, M2 + + // Per-worker command pools (thread-safe: one pool per thread) + VkCommandPool workerCmdPools_[NUM_WORKERS] = {}; + // Main-thread command pool for its secondary buffers + VkCommandPool mainSecondaryCmdPool_ = VK_NULL_HANDLE; + // Pre-allocated secondary command buffers [secondaryIndex][frameInFlight] + VkCommandBuffer secondaryCmds_[NUM_SECONDARIES][MAX_FRAMES] = {}; + + bool parallelRecordingEnabled_ = false; // set true after pools/buffers created + bool createSecondaryCommandResources(); + void destroySecondaryCommandResources(); + VkCommandBuffer beginSecondary(uint32_t secondaryIndex); + void setSecondaryViewportScissor(VkCommandBuffer cmd); + + // Cached render pass state for secondary buffer inheritance + VkRenderPass activeRenderPass_ = VK_NULL_HANDLE; + VkFramebuffer activeFramebuffer_ = VK_NULL_HANDLE; + VkExtent2D activeRenderExtent_ = {0, 0}; + // Active character previews for off-screen rendering std::vector activePreviews_; diff --git a/include/rendering/vk_context.hpp b/include/rendering/vk_context.hpp index 907e21bf..154a4f98 100644 --- a/include/rendering/vk_context.hpp +++ b/include/rendering/vk_context.hpp @@ -84,6 +84,10 @@ public: bool isSwapchainDirty() const { return swapchainDirty; } void markSwapchainDirty() { swapchainDirty = true; } + // VSync (present mode) + bool isVsyncEnabled() const { return vsync_; } + void setVsync(bool enabled) { vsync_ = enabled; } + bool isDeviceLost() const { return deviceLost_; } // MSAA @@ -145,6 +149,7 @@ private: std::vector swapchainFramebuffers; bool swapchainDirty = false; bool deviceLost_ = false; + bool vsync_ = true; // Per-frame resources FrameData frames[MAX_FRAMES_IN_FLIGHT]; diff --git a/include/rendering/wmo_renderer.hpp b/include/rendering/wmo_renderer.hpp index f0d3b36f..b8be9485 100644 --- a/include/rendering/wmo_renderer.hpp +++ b/include/rendering/wmo_renderer.hpp @@ -148,6 +148,8 @@ public: * @param perFrameSet Per-frame descriptor set (set 0) * @param camera Camera for frustum culling */ + /** Pre-update mutable state (frame ID, material UBOs) on main thread before parallel render. */ + void prepareRender(); void render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera); /** @@ -332,6 +334,9 @@ public: // Defer normal/height map generation during streaming to avoid CPU stalls void setDeferNormalMaps(bool defer) { deferNormalMaps_ = defer; } + // Generate normal/height maps for cached textures that were loaded while deferred + void backfillNormalMaps(); + private: // WMO material UBO — matches WMOMaterial in wmo.frag.glsl struct WMOMaterialUBO { @@ -720,6 +725,8 @@ private: uint32_t distanceCulled = 0; }; std::vector> cullFutures_; + std::vector visibleInstances_; // reused per frame + std::vector drawLists_; // reused per frame // Collision query profiling (per frame). mutable double queryTimeMs = 0.0; diff --git a/include/ui/game_screen.hpp b/include/ui/game_screen.hpp index 7e428523..bf7558cd 100644 --- a/include/ui/game_screen.hpp +++ b/include/ui/game_screen.hpp @@ -116,6 +116,10 @@ private: float pendingNormalMapStrength = 0.8f; // 0.0-2.0 bool pendingPOM = true; // on by default int pendingPOMQuality = 1; // 0=Low(16), 1=Medium(32), 2=High(64) + bool pendingFSR = false; + int pendingFSRQuality = 0; // 0=UltraQuality, 1=Quality, 2=Balanced, 3=Performance + float pendingFSRSharpness = 0.5f; + bool fsrSettingsApplied_ = false; // UI element transparency (0.0 = fully transparent, 1.0 = fully opaque) float uiOpacity_ = 0.65f; diff --git a/src/core/application.cpp b/src/core/application.cpp index c1907a15..f9ac557c 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -1015,14 +1015,33 @@ void Application::update(float deltaTime) { if (renderer && renderer->getCameraController()) renderer->getCameraController()->clearMovementInputs(); } + // Hearth teleport: keep player frozen until terrain loads at destination + if (hearthTeleportPending_ && renderer && renderer->getTerrainManager()) { + hearthTeleportTimer_ -= deltaTime; + auto terrainH = renderer->getTerrainManager()->getHeightAt( + hearthTeleportPos_.x, hearthTeleportPos_.y); + if (terrainH || hearthTeleportTimer_ <= 0.0f) { + // Terrain loaded (or timeout) — snap to floor and release + if (terrainH) { + hearthTeleportPos_.z = *terrainH + 0.5f; + renderer->getCameraController()->teleportTo(hearthTeleportPos_); + } + renderer->getCameraController()->setExternalFollow(false); + worldEntryMovementGraceTimer_ = 1.0f; + hearthTeleportPending_ = false; + LOG_INFO("Unstuck hearth: terrain loaded, player released", + terrainH ? "" : " (timeout)"); + } + } if (renderer && renderer->getCameraController()) { const bool externallyDrivenMotion = onTaxi || onWMOTransport || chargeActive_; // Keep physics frozen (externalFollow) during landing clamp when terrain // hasn't loaded yet — prevents gravity from pulling player through void. + bool hearthFreeze = hearthTeleportPending_; bool landingClampActive = !onTaxi && taxiLandingClampTimer_ > 0.0f && worldEntryMovementGraceTimer_ <= 0.0f && !gameHandler->isMounted(); - renderer->getCameraController()->setExternalFollow(externallyDrivenMotion || landingClampActive); + renderer->getCameraController()->setExternalFollow(externallyDrivenMotion || landingClampActive || hearthFreeze); renderer->getCameraController()->setExternalMoving(externallyDrivenMotion); if (externallyDrivenMotion) { // Drop any stale local movement toggles while server drives taxi motion. @@ -1877,9 +1896,43 @@ void Application::setupUICallbacks() { LOG_INFO("Unstuck: high fallback snap"); }); + // /unstuckhearth — teleport to hearthstone bind point (server-synced). + // Freezes player until terrain loads at destination to prevent falling through world. + gameHandler->setUnstuckHearthCallback([this, clearStuckMovement, forceServerTeleportCommand]() { + if (!renderer || !renderer->getCameraController() || !gameHandler) return; + + uint32_t bindMap = 0; + glm::vec3 bindPos(0.0f); + if (!gameHandler->getHomeBind(bindMap, bindPos)) { + LOG_WARNING("Unstuck hearth: no bind point available"); + return; + } + + worldEntryMovementGraceTimer_ = 10.0f; // long grace — terrain load check will clear it + taxiLandingClampTimer_ = 0.0f; + lastTaxiFlight_ = false; + clearStuckMovement(); + + auto* cc = renderer->getCameraController(); + glm::vec3 renderPos = core::coords::canonicalToRender(bindPos); + renderPos.z += 2.0f; + + // Freeze player in place (no gravity/movement) until terrain loads + cc->teleportTo(renderPos); + cc->setExternalFollow(true); + forceServerTeleportCommand(renderPos); + clearStuckMovement(); + + // Set pending state — update loop will unfreeze once terrain is loaded + hearthTeleportPending_ = true; + hearthTeleportPos_ = renderPos; + hearthTeleportTimer_ = 15.0f; // 15s safety timeout + LOG_INFO("Unstuck hearth: teleporting to bind point, waiting for terrain..."); + }); + // Auto-unstuck: falling for > 5 seconds = void fall, teleport to map entry if (renderer->getCameraController()) { - renderer->getCameraController()->setAutoUnstuckCallback([this]() { + renderer->getCameraController()->setAutoUnstuckCallback([this, forceServerTeleportCommand]() { if (!renderer || !renderer->getCameraController()) return; auto* cc = renderer->getCameraController(); @@ -1887,7 +1940,8 @@ void Application::setupUICallbacks() { glm::vec3 spawnPos = cc->getDefaultPosition(); spawnPos.z += 5.0f; cc->teleportTo(spawnPos); - LOG_INFO("Auto-unstuck: teleported to map entry point"); + forceServerTeleportCommand(spawnPos); + LOG_INFO("Auto-unstuck: teleported to map entry point (server synced)"); }); } diff --git a/src/core/window.cpp b/src/core/window.cpp index eed83c97..9f74a81c 100644 --- a/src/core/window.cpp +++ b/src/core/window.cpp @@ -84,6 +84,7 @@ bool Window::initialize() { // Initialize Vulkan context vkContext = std::make_unique(); + vkContext->setVsync(vsync); if (!vkContext->initialize(window)) { LOG_ERROR("Failed to initialize Vulkan context"); return false; @@ -158,11 +159,13 @@ void Window::setFullscreen(bool enable) { } } -void Window::setVsync([[maybe_unused]] bool enable) { - // VSync in Vulkan is controlled by present mode (set at swapchain creation) - // For now, store the preference — applied on next swapchain recreation +void Window::setVsync(bool enable) { vsync = enable; - LOG_INFO("VSync preference set to ", enable ? "on" : "off", " (applied on swapchain recreation)"); + if (vkContext) { + vkContext->setVsync(enable); + vkContext->markSwapchainDirty(); + } + LOG_INFO("VSync ", enable ? "enabled" : "disabled"); } void Window::applyResolution(int w, int h) { diff --git a/src/game/game_handler.cpp b/src/game/game_handler.cpp index 9a7aed97..3cd05d3c 100644 --- a/src/game/game_handler.cpp +++ b/src/game/game_handler.cpp @@ -11435,6 +11435,15 @@ void GameHandler::unstuckGy() { } } +void GameHandler::unstuckHearth() { + if (unstuckHearthCallback_) { + unstuckHearthCallback_(); + addSystemChatMessage("Unstuck: teleported to hearthstone location."); + } else { + addSystemChatMessage("No hearthstone bind point set."); + } +} + void GameHandler::handleLootResponse(network::Packet& packet) { if (!LootResponseParser::parse(packet, currentLoot)) return; lootWindowOpen = true; diff --git a/src/rendering/character_renderer.cpp b/src/rendering/character_renderer.cpp index 9607f755..f69ae75c 100644 --- a/src/rendering/character_renderer.cpp +++ b/src/rendering/character_renderer.cpp @@ -1924,6 +1924,61 @@ glm::mat4 CharacterRenderer::getBoneTransform(const pipeline::M2Bone& bone, floa // --- Rendering --- +void CharacterRenderer::prepareRender(uint32_t frameIndex) { + if (instances.empty() || !opaquePipeline_) return; + + // Pre-allocate bone SSBOs + descriptor sets on main thread (pool ops not thread-safe) + for (auto& [id, instance] : instances) { + int numBones = std::min(static_cast(instance.boneMatrices.size()), MAX_BONES); + if (numBones <= 0) continue; + + if (!instance.boneBuffer[frameIndex]) { + VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bci.size = MAX_BONES * sizeof(glm::mat4); + bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VmaAllocationCreateInfo aci{}; + aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo allocInfo{}; + vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, + &instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo); + instance.boneMapped[frameIndex] = allocInfo.pMappedData; + + VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + ai.descriptorPool = boneDescPool_; + ai.descriptorSetCount = 1; + ai.pSetLayouts = &boneSetLayout_; + VkResult dsRes = vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &instance.boneSet[frameIndex]); + if (dsRes != VK_SUCCESS) { + LOG_ERROR("CharacterRenderer::prepareRender: bone descriptor alloc failed (instance=", + id, ", frame=", frameIndex, ", vk=", static_cast(dsRes), ")"); + if (instance.boneBuffer[frameIndex]) { + vmaDestroyBuffer(vkCtx_->getAllocator(), + instance.boneBuffer[frameIndex], instance.boneAlloc[frameIndex]); + instance.boneBuffer[frameIndex] = VK_NULL_HANDLE; + instance.boneAlloc[frameIndex] = VK_NULL_HANDLE; + instance.boneMapped[frameIndex] = nullptr; + } + continue; + } + + if (instance.boneSet[frameIndex]) { + VkDescriptorBufferInfo bufInfo{}; + bufInfo.buffer = instance.boneBuffer[frameIndex]; + bufInfo.offset = 0; + bufInfo.range = bci.size; + VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + write.dstSet = instance.boneSet[frameIndex]; + write.dstBinding = 0; + write.descriptorCount = 1; + write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write.pBufferInfo = &bufInfo; + vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr); + } + } + } +} + void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, [[maybe_unused]] const Camera& camera) { if (instances.empty() || !opaquePipeline_) { return; diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp index d455e494..3a097217 100644 --- a/src/rendering/m2_renderer.cpp +++ b/src/rendering/m2_renderer.cpp @@ -1602,6 +1602,12 @@ bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) { } } + // Pre-compute available LOD levels to avoid per-instance batch iteration + gpuModel.availableLODs = 0; + for (const auto& b : gpuModel.batches) { + if (b.submeshLevel < 8) gpuModel.availableLODs |= (1u << b.submeshLevel); + } + models[modelId] = std::move(gpuModel); LOG_DEBUG("Loaded M2 model: ", model.name, " (", models[modelId].vertexCount, " vertices, ", @@ -1911,6 +1917,7 @@ static void computeBoneMatrices(const M2ModelGPU& model, M2Instance& instance) { instance.boneMatrices[i] = local; } } + instance.bonesDirty = true; } void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::mat4& viewProjection) { @@ -2172,6 +2179,48 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm:: } +void M2Renderer::prepareRender(uint32_t frameIndex, const Camera& camera) { + if (!initialized_ || instances.empty()) return; + (void)camera; // reserved for future frustum-based culling + + // Pre-allocate bone SSBOs + descriptor sets on main thread (pool ops not thread-safe). + // Only iterate animated instances — static doodads don't need bone buffers. + for (size_t idx : animatedInstanceIndices_) { + if (idx >= instances.size()) continue; + auto& instance = instances[idx]; + + if (instance.boneMatrices.empty()) continue; + + if (!instance.boneBuffer[frameIndex]) { + VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bci.size = 128 * sizeof(glm::mat4); + bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VmaAllocationCreateInfo aci{}; + aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo allocInfo{}; + vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, + &instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo); + instance.boneMapped[frameIndex] = allocInfo.pMappedData; + + instance.boneSet[frameIndex] = allocateBoneSet(); + if (instance.boneSet[frameIndex]) { + VkDescriptorBufferInfo bufInfo{}; + bufInfo.buffer = instance.boneBuffer[frameIndex]; + bufInfo.offset = 0; + bufInfo.range = bci.size; + VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + write.dstSet = instance.boneSet[frameIndex]; + write.dstBinding = 0; + write.descriptorCount = 1; + write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write.pBufferInfo = &bufInfo; + vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr); + } + } + } +} + void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera) { if (instances.empty() || !opaquePipeline_) { return; @@ -2254,8 +2303,8 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const } // Sort by modelId to minimize vertex/index buffer rebinds - std::stable_sort(sortedVisible_.begin(), sortedVisible_.end(), - [](const VisibleEntry& a, const VisibleEntry& b) { return a.modelId < b.modelId; }); + std::sort(sortedVisible_.begin(), sortedVisible_.end(), + [](const VisibleEntry& a, const VisibleEntry& b) { return a.modelId < b.modelId; }); uint32_t currentModelId = UINT32_MAX; const M2ModelGPU* currentModel = nullptr; @@ -2330,44 +2379,22 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const } } - // Upload bone matrices to SSBO if model has skeletal animation - bool useBones = model.hasAnimation && !model.disableAnimation && !instance.boneMatrices.empty(); + // Upload bone matrices to SSBO if model has skeletal animation. + // Bone buffers are pre-allocated by prepareRender() on the main thread. + // If not yet allocated (race/timing), skip this instance entirely to avoid + // a bind-pose flash — it will render correctly next frame. + bool needsBones = model.hasAnimation && !model.disableAnimation && !instance.boneMatrices.empty(); + if (needsBones && (!instance.boneBuffer[frameIndex] || !instance.boneSet[frameIndex])) { + continue; + } + bool useBones = needsBones; if (useBones) { - // Lazy-allocate bone SSBO on first use - if (!instance.boneBuffer[frameIndex]) { - VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; - bci.size = 128 * sizeof(glm::mat4); // max 128 bones - bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - VmaAllocationCreateInfo aci{}; - aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; - aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - VmaAllocationInfo allocInfo{}; - vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, - &instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo); - instance.boneMapped[frameIndex] = allocInfo.pMappedData; - - // Allocate descriptor set for bone SSBO - instance.boneSet[frameIndex] = allocateBoneSet(); - if (instance.boneSet[frameIndex]) { - VkDescriptorBufferInfo bufInfo{}; - bufInfo.buffer = instance.boneBuffer[frameIndex]; - bufInfo.offset = 0; - bufInfo.range = bci.size; - VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; - write.dstSet = instance.boneSet[frameIndex]; - write.dstBinding = 0; - write.descriptorCount = 1; - write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - write.pBufferInfo = &bufInfo; - vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr); - } - } - - // Upload bone matrices - if (instance.boneMapped[frameIndex]) { + // Upload bone matrices only when recomputed (skip frame-skipped instances) + if (instance.bonesDirty && instance.boneMapped[frameIndex]) { int numBones = std::min(static_cast(instance.boneMatrices.size()), 128); memcpy(instance.boneMapped[frameIndex], instance.boneMatrices.data(), numBones * sizeof(glm::mat4)); + instance.bonesDirty = false; } // Bind bone descriptor set (set 2) @@ -2384,12 +2411,8 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1; uint16_t targetLOD = desiredLOD; - if (desiredLOD > 0) { - bool hasDesiredLOD = false; - for (const auto& b : model.batches) { - if (b.submeshLevel == desiredLOD) { hasDesiredLOD = true; break; } - } - if (!hasDesiredLOD) targetLOD = 0; + if (desiredLOD > 0 && !(model.availableLODs & (1u << desiredLOD))) { + targetLOD = 0; } const bool foliageLikeModel = model.isFoliageLike; diff --git a/src/rendering/performance_hud.cpp b/src/rendering/performance_hud.cpp index d939f4f9..86dc2f21 100644 --- a/src/rendering/performance_hud.cpp +++ b/src/rendering/performance_hud.cpp @@ -1,5 +1,6 @@ #include "rendering/performance_hud.hpp" #include "rendering/renderer.hpp" +#include "rendering/vk_context.hpp" #include "rendering/terrain_renderer.hpp" #include "rendering/terrain_manager.hpp" #include "rendering/water_renderer.hpp" @@ -187,6 +188,19 @@ void PerformanceHUD::render(const Renderer* renderer, const Camera* camera) { 0, nullptr, 0.0f, 33.33f, ImVec2(200, 40)); } + // FSR info + if (renderer->isFSREnabled()) { + ImGui::TextColored(ImVec4(0.4f, 1.0f, 0.4f, 1.0f), "FSR 1.0: ON"); + auto* ctx = renderer->getVkContext(); + if (ctx) { + auto ext = ctx->getSwapchainExtent(); + float sf = renderer->getFSRScaleFactor(); + uint32_t iw = static_cast(ext.width * sf) & ~1u; + uint32_t ih = static_cast(ext.height * sf) & ~1u; + ImGui::Text(" %ux%u -> %ux%u (%.0f%%)", iw, ih, ext.width, ext.height, sf * 100.0f); + } + } + ImGui::Spacing(); } diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index d487e05e..9f3d65e7 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -721,11 +721,18 @@ bool Renderer::initialize(core::Window* win) { // TODO Phase 6: Vulkan underwater overlay, post-process, and shadow map // GL versions stubbed during migration + // Create secondary command buffer resources for multithreaded rendering + if (!createSecondaryCommandResources()) { + LOG_WARNING("Failed to create secondary command buffers — falling back to single-threaded rendering"); + } + LOG_INFO("Renderer initialized"); return true; } void Renderer::shutdown() { + destroySecondaryCommandResources(); + LOG_WARNING("Renderer::shutdown - terrainManager stopWorkers..."); if (terrainManager) { terrainManager->stopWorkers(); @@ -828,6 +835,7 @@ void Renderer::shutdown() { if (overlayPipelineLayout) { vkDestroyPipelineLayout(device, overlayPipelineLayout, nullptr); overlayPipelineLayout = VK_NULL_HANDLE; } } + destroyFSRResources(); destroyPerFrameResources(); zoneManager.reset(); @@ -901,12 +909,7 @@ void Renderer::applyMsaaChange() { if (terrainRenderer) terrainRenderer->recreatePipelines(); if (waterRenderer) { waterRenderer->recreatePipelines(); - if (vkCtx->getMsaaSamples() != VK_SAMPLE_COUNT_1_BIT) { - waterRenderer->destroyWater1xResources(); - setupWater1xPass(); - } else { - waterRenderer->destroyWater1xResources(); - } + waterRenderer->destroyWater1xResources(); // no longer used } if (wmoRenderer) wmoRenderer->recreatePipelines(); if (m2Renderer) m2Renderer->recreatePipelines(); @@ -928,10 +931,11 @@ void Renderer::applyMsaaChange() { if (minimap) minimap->recreatePipelines(); - // Selection circle + overlay use lazy init, just destroy them + // Selection circle + overlay + FSR use lazy init, just destroy them VkDevice device = vkCtx->getDevice(); if (selCirclePipeline) { vkDestroyPipeline(device, selCirclePipeline, nullptr); selCirclePipeline = VK_NULL_HANDLE; } if (overlayPipeline) { vkDestroyPipeline(device, overlayPipeline, nullptr); overlayPipeline = VK_NULL_HANDLE; } + if (fsr_.sceneFramebuffer) destroyFSRResources(); // Will be lazily recreated in beginFrame() // Reinitialize ImGui Vulkan backend with new MSAA sample count ImGui_ImplVulkan_Shutdown(); @@ -961,17 +965,30 @@ void Renderer::beginFrame() { applyMsaaChange(); } + // FSR resource management (safe: between frames, no command buffer in flight) + if (fsr_.needsRecreate && fsr_.sceneFramebuffer) { + destroyFSRResources(); + fsr_.needsRecreate = false; + if (!fsr_.enabled) LOG_INFO("FSR: disabled"); + } + if (fsr_.enabled && !fsr_.sceneFramebuffer) { + if (!initFSRResources()) { + LOG_ERROR("FSR: initialization failed, disabling"); + fsr_.enabled = false; + } + } + // Handle swapchain recreation if needed if (vkCtx->isSwapchainDirty()) { vkCtx->recreateSwapchain(window->getWidth(), window->getHeight()); // Rebuild water resources that reference swapchain extent/views if (waterRenderer) { waterRenderer->recreatePipelines(); - if (waterRenderer->hasWater1xPass() - && vkCtx->getMsaaSamples() != VK_SAMPLE_COUNT_1_BIT) { - waterRenderer->destroyWater1xResources(); - setupWater1xPass(); - } + } + // Recreate FSR resources for new swapchain dimensions + if (fsr_.enabled) { + destroyFSRResources(); + initFSRResources(); } } @@ -1018,47 +1035,131 @@ void Renderer::beginFrame() { renderReflectionPass(); } // !skipPrePasses - // --- Begin main render pass (clear color + depth) --- + // --- Begin render pass --- + // If FSR is enabled, render scene to off-screen target at reduced resolution. + // Otherwise, render directly to swapchain. VkRenderPassBeginInfo rpInfo{}; rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; rpInfo.renderPass = vkCtx->getImGuiRenderPass(); - rpInfo.framebuffer = vkCtx->getSwapchainFramebuffers()[currentImageIndex]; - rpInfo.renderArea.offset = {0, 0}; - rpInfo.renderArea.extent = vkCtx->getSwapchainExtent(); - // MSAA render pass has 3 attachments (color, depth, resolve), non-MSAA has 2 - VkClearValue clearValues[3]{}; + VkExtent2D renderExtent; + if (fsr_.enabled && fsr_.sceneFramebuffer) { + rpInfo.framebuffer = fsr_.sceneFramebuffer; + renderExtent = { fsr_.internalWidth, fsr_.internalHeight }; + } else { + rpInfo.framebuffer = vkCtx->getSwapchainFramebuffers()[currentImageIndex]; + renderExtent = vkCtx->getSwapchainExtent(); + } + + rpInfo.renderArea.offset = {0, 0}; + rpInfo.renderArea.extent = renderExtent; + + // Clear values must match attachment count: 2 (no MSAA), 3 (MSAA), or 4 (MSAA+depth resolve) + VkClearValue clearValues[4]{}; clearValues[0].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; clearValues[1].depthStencil = {1.0f, 0}; - clearValues[2].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; // resolve (DONT_CARE, but count must match) + clearValues[2].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; + clearValues[3].depthStencil = {1.0f, 0}; bool msaaOn = (vkCtx->getMsaaSamples() > VK_SAMPLE_COUNT_1_BIT); - rpInfo.clearValueCount = msaaOn ? 3 : 2; + if (msaaOn) { + bool depthRes = (vkCtx->getDepthResolveImageView() != VK_NULL_HANDLE); + rpInfo.clearValueCount = depthRes ? 4 : 3; + } else { + rpInfo.clearValueCount = 2; + } rpInfo.pClearValues = clearValues; - vkCmdBeginRenderPass(currentCmd, &rpInfo, VK_SUBPASS_CONTENTS_INLINE); + // Cache render pass state for secondary command buffer inheritance + activeRenderPass_ = rpInfo.renderPass; + activeFramebuffer_ = rpInfo.framebuffer; + activeRenderExtent_ = renderExtent; - // Set dynamic viewport and scissor - VkExtent2D extent = vkCtx->getSwapchainExtent(); - VkViewport viewport{}; - viewport.x = 0.0f; - viewport.y = 0.0f; - viewport.width = static_cast(extent.width); - viewport.height = static_cast(extent.height); - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - vkCmdSetViewport(currentCmd, 0, 1, &viewport); + VkSubpassContents subpassMode = parallelRecordingEnabled_ + ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS + : VK_SUBPASS_CONTENTS_INLINE; + vkCmdBeginRenderPass(currentCmd, &rpInfo, subpassMode); - VkRect2D scissor{}; - scissor.offset = {0, 0}; - scissor.extent = extent; - vkCmdSetScissor(currentCmd, 0, 1, &scissor); + if (!parallelRecordingEnabled_) { + // Fallback: set dynamic viewport and scissor on primary (inline mode) + VkViewport viewport{}; + viewport.width = static_cast(renderExtent.width); + viewport.height = static_cast(renderExtent.height); + viewport.maxDepth = 1.0f; + vkCmdSetViewport(currentCmd, 0, 1, &viewport); + + VkRect2D scissor{}; + scissor.extent = renderExtent; + vkCmdSetScissor(currentCmd, 0, 1, &scissor); + } } void Renderer::endFrame() { if (!vkCtx || currentCmd == VK_NULL_HANDLE) return; - // ImGui always renders in the main pass (its pipeline matches the main render pass) - ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), currentCmd); + if (fsr_.enabled && fsr_.sceneFramebuffer) { + // End the off-screen scene render pass + vkCmdEndRenderPass(currentCmd); + + // Transition scene color (1x resolve/color target): PRESENT_SRC_KHR → SHADER_READ_ONLY + // The render pass finalLayout puts the resolve/color attachment in PRESENT_SRC_KHR + transitionImageLayout(currentCmd, fsr_.sceneColor.image, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + + // Begin swapchain render pass at full resolution + VkRenderPassBeginInfo rpInfo{}; + rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rpInfo.renderPass = vkCtx->getImGuiRenderPass(); + rpInfo.framebuffer = vkCtx->getSwapchainFramebuffers()[currentImageIndex]; + rpInfo.renderArea.offset = {0, 0}; + rpInfo.renderArea.extent = vkCtx->getSwapchainExtent(); + + // Clear values must match the render pass attachment count + bool msaaOn = (vkCtx->getMsaaSamples() > VK_SAMPLE_COUNT_1_BIT); + VkClearValue clearValues[4]{}; + clearValues[0].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; + clearValues[1].depthStencil = {1.0f, 0}; + clearValues[2].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; + clearValues[3].depthStencil = {1.0f, 0}; + if (msaaOn) { + bool depthRes = (vkCtx->getDepthResolveImageView() != VK_NULL_HANDLE); + rpInfo.clearValueCount = depthRes ? 4 : 3; + } else { + rpInfo.clearValueCount = 2; + } + rpInfo.pClearValues = clearValues; + + vkCmdBeginRenderPass(currentCmd, &rpInfo, VK_SUBPASS_CONTENTS_INLINE); + + // Set full-resolution viewport and scissor + VkExtent2D ext = vkCtx->getSwapchainExtent(); + VkViewport vp{}; + vp.width = static_cast(ext.width); + vp.height = static_cast(ext.height); + vp.maxDepth = 1.0f; + vkCmdSetViewport(currentCmd, 0, 1, &vp); + VkRect2D sc{}; + sc.extent = ext; + vkCmdSetScissor(currentCmd, 0, 1, &sc); + + // Draw FSR upscale fullscreen quad + renderFSRUpscale(); + } + + // ImGui rendering — must respect subpass contents mode + if (!fsr_.enabled && parallelRecordingEnabled_) { + // Scene pass was begun with VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, + // so ImGui must be recorded into a secondary command buffer. + VkCommandBuffer imguiCmd = beginSecondary(SEC_IMGUI); + setSecondaryViewportScissor(imguiCmd); + ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), imguiCmd); + vkEndCommandBuffer(imguiCmd); + vkCmdExecuteCommands(currentCmd, 1, &imguiCmd); + } else { + // FSR swapchain pass uses INLINE mode; non-parallel also uses INLINE. + ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), currentCmd); + } vkCmdEndRenderPass(currentCmd); @@ -1076,16 +1177,7 @@ void Renderer::endFrame() { frame); } - // Render water in separate 1x pass after MSAA resolve + scene capture - bool waterDeferred = waterRenderer && waterRenderer->hasSurfaces() && waterRenderer->hasWater1xPass() - && vkCtx->getMsaaSamples() != VK_SAMPLE_COUNT_1_BIT; - if (waterDeferred && camera) { - VkExtent2D ext = vkCtx->getSwapchainExtent(); - if (waterRenderer->beginWater1xPass(currentCmd, currentImageIndex, ext)) { - waterRenderer->render(currentCmd, perFrameDescSets[frame], *camera, globalTime, true, frame); - waterRenderer->endWater1xPass(currentCmd); - } - } + // Water now renders in the main pass (renderWorld), no separate 1x pass needed. // Submit and present vkCtx->endFrame(currentCmd, currentImageIndex); @@ -3097,10 +3189,11 @@ void Renderer::clearSelectionCircle() { selCircleVisible = false; } -void Renderer::renderSelectionCircle(const glm::mat4& view, const glm::mat4& projection) { +void Renderer::renderSelectionCircle(const glm::mat4& view, const glm::mat4& projection, VkCommandBuffer overrideCmd) { if (!selCircleVisible) return; initSelectionCircle(); - if (selCirclePipeline == VK_NULL_HANDLE || currentCmd == VK_NULL_HANDLE) return; + VkCommandBuffer cmd = (overrideCmd != VK_NULL_HANDLE) ? overrideCmd : currentCmd; + if (selCirclePipeline == VK_NULL_HANDLE || cmd == VK_NULL_HANDLE) return; // Keep circle anchored near target foot Z. Accept nearby floor probes only, // so distant upper/lower WMO planes don't yank the ring away from feet. @@ -3132,19 +3225,19 @@ void Renderer::renderSelectionCircle(const glm::mat4& view, const glm::mat4& pro glm::mat4 mvp = projection * view * model; glm::vec4 color4(selCircleColor, 1.0f); - vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, selCirclePipeline); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, selCirclePipeline); VkDeviceSize offset = 0; - vkCmdBindVertexBuffers(currentCmd, 0, 1, &selCircleVertBuf, &offset); - vkCmdBindIndexBuffer(currentCmd, selCircleIdxBuf, 0, VK_INDEX_TYPE_UINT16); + vkCmdBindVertexBuffers(cmd, 0, 1, &selCircleVertBuf, &offset); + vkCmdBindIndexBuffer(cmd, selCircleIdxBuf, 0, VK_INDEX_TYPE_UINT16); // Push mvp (64 bytes) at offset 0 - vkCmdPushConstants(currentCmd, selCirclePipelineLayout, + vkCmdPushConstants(cmd, selCirclePipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, 64, &mvp[0][0]); // Push color (16 bytes) at offset 64 - vkCmdPushConstants(currentCmd, selCirclePipelineLayout, + vkCmdPushConstants(cmd, selCirclePipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 64, 16, &color4[0]); - vkCmdDrawIndexed(currentCmd, static_cast(selCircleVertCount), 1, 0, 0, 0); + vkCmdDrawIndexed(cmd, static_cast(selCircleVertCount), 1, 0, 0, 0); } // ────────────────────────────────────────────────────────────── @@ -3194,15 +3287,305 @@ void Renderer::initOverlayPipeline() { if (overlayPipeline) LOG_INFO("Renderer: overlay pipeline initialized"); } -void Renderer::renderOverlay(const glm::vec4& color) { +void Renderer::renderOverlay(const glm::vec4& color, VkCommandBuffer overrideCmd) { if (!overlayPipeline) initOverlayPipeline(); - if (!overlayPipeline || currentCmd == VK_NULL_HANDLE) return; - vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, overlayPipeline); - vkCmdPushConstants(currentCmd, overlayPipelineLayout, + VkCommandBuffer cmd = (overrideCmd != VK_NULL_HANDLE) ? overrideCmd : currentCmd; + if (!overlayPipeline || cmd == VK_NULL_HANDLE) return; + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, overlayPipeline); + vkCmdPushConstants(cmd, overlayPipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &color[0]); - vkCmdDraw(currentCmd, 3, 1, 0, 0); // fullscreen triangle + vkCmdDraw(cmd, 3, 1, 0, 0); // fullscreen triangle } +// ========================= FSR 1.0 Upscaling ========================= + +bool Renderer::initFSRResources() { + if (!vkCtx) return false; + + VkDevice device = vkCtx->getDevice(); + VmaAllocator alloc = vkCtx->getAllocator(); + VkExtent2D swapExtent = vkCtx->getSwapchainExtent(); + VkSampleCountFlagBits msaa = vkCtx->getMsaaSamples(); + bool useMsaa = (msaa > VK_SAMPLE_COUNT_1_BIT); + bool useDepthResolve = (vkCtx->getDepthResolveImageView() != VK_NULL_HANDLE); + + fsr_.internalWidth = static_cast(swapExtent.width * fsr_.scaleFactor); + fsr_.internalHeight = static_cast(swapExtent.height * fsr_.scaleFactor); + fsr_.internalWidth = (fsr_.internalWidth + 1) & ~1u; + fsr_.internalHeight = (fsr_.internalHeight + 1) & ~1u; + + LOG_INFO("FSR: initializing at ", fsr_.internalWidth, "x", fsr_.internalHeight, + " -> ", swapExtent.width, "x", swapExtent.height, + " (scale=", fsr_.scaleFactor, ", MSAA=", static_cast(msaa), "x)"); + + VkFormat colorFmt = vkCtx->getSwapchainFormat(); + VkFormat depthFmt = vkCtx->getDepthFormat(); + + // sceneColor: always 1x, always sampled — this is what FSR reads + // Non-MSAA: direct render target. MSAA: resolve target. + fsr_.sceneColor = createImage(device, alloc, fsr_.internalWidth, fsr_.internalHeight, + colorFmt, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!fsr_.sceneColor.image) { + LOG_ERROR("FSR: failed to create scene color image"); + return false; + } + + // sceneDepth: matches current MSAA sample count + fsr_.sceneDepth = createImage(device, alloc, fsr_.internalWidth, fsr_.internalHeight, + depthFmt, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, msaa); + if (!fsr_.sceneDepth.image) { + LOG_ERROR("FSR: failed to create scene depth image"); + destroyFSRResources(); + return false; + } + + if (useMsaa) { + // sceneMsaaColor: multisampled color target + fsr_.sceneMsaaColor = createImage(device, alloc, fsr_.internalWidth, fsr_.internalHeight, + colorFmt, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, msaa); + if (!fsr_.sceneMsaaColor.image) { + LOG_ERROR("FSR: failed to create MSAA color image"); + destroyFSRResources(); + return false; + } + + if (useDepthResolve) { + fsr_.sceneDepthResolve = createImage(device, alloc, fsr_.internalWidth, fsr_.internalHeight, + depthFmt, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + if (!fsr_.sceneDepthResolve.image) { + LOG_ERROR("FSR: failed to create depth resolve image"); + destroyFSRResources(); + return false; + } + } + } + + // Build framebuffer matching the main render pass attachment layout: + // Non-MSAA: [color, depth] + // MSAA (no depth res): [msaaColor, depth, resolve] + // MSAA (depth res): [msaaColor, depth, resolve, depthResolve] + VkImageView fbAttachments[4]{}; + uint32_t fbCount; + if (useMsaa) { + fbAttachments[0] = fsr_.sceneMsaaColor.imageView; + fbAttachments[1] = fsr_.sceneDepth.imageView; + fbAttachments[2] = fsr_.sceneColor.imageView; // resolve target + fbCount = 3; + if (useDepthResolve) { + fbAttachments[3] = fsr_.sceneDepthResolve.imageView; + fbCount = 4; + } + } else { + fbAttachments[0] = fsr_.sceneColor.imageView; + fbAttachments[1] = fsr_.sceneDepth.imageView; + fbCount = 2; + } + + VkFramebufferCreateInfo fbInfo{}; + fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fbInfo.renderPass = vkCtx->getImGuiRenderPass(); + fbInfo.attachmentCount = fbCount; + fbInfo.pAttachments = fbAttachments; + fbInfo.width = fsr_.internalWidth; + fbInfo.height = fsr_.internalHeight; + fbInfo.layers = 1; + + if (vkCreateFramebuffer(device, &fbInfo, nullptr, &fsr_.sceneFramebuffer) != VK_SUCCESS) { + LOG_ERROR("FSR: failed to create scene framebuffer"); + destroyFSRResources(); + return false; + } + + // Sampler for the resolved scene color + VkSamplerCreateInfo samplerInfo{}; + samplerInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + samplerInfo.minFilter = VK_FILTER_LINEAR; + samplerInfo.magFilter = VK_FILTER_LINEAR; + samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + if (vkCreateSampler(device, &samplerInfo, nullptr, &fsr_.sceneSampler) != VK_SUCCESS) { + LOG_ERROR("FSR: failed to create sampler"); + destroyFSRResources(); + return false; + } + + // Descriptor set layout: binding 0 = combined image sampler + VkDescriptorSetLayoutBinding binding{}; + binding.binding = 0; + binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkDescriptorSetLayoutCreateInfo layoutInfo{}; + layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layoutInfo.bindingCount = 1; + layoutInfo.pBindings = &binding; + vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &fsr_.descSetLayout); + + VkDescriptorPoolSize poolSize{}; + poolSize.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + poolSize.descriptorCount = 1; + VkDescriptorPoolCreateInfo poolInfo{}; + poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + poolInfo.maxSets = 1; + poolInfo.poolSizeCount = 1; + poolInfo.pPoolSizes = &poolSize; + vkCreateDescriptorPool(device, &poolInfo, nullptr, &fsr_.descPool); + + VkDescriptorSetAllocateInfo dsAllocInfo{}; + dsAllocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + dsAllocInfo.descriptorPool = fsr_.descPool; + dsAllocInfo.descriptorSetCount = 1; + dsAllocInfo.pSetLayouts = &fsr_.descSetLayout; + vkAllocateDescriptorSets(device, &dsAllocInfo, &fsr_.descSet); + + // Always bind the 1x sceneColor (FSR reads the resolved image) + VkDescriptorImageInfo imgInfo{}; + imgInfo.sampler = fsr_.sceneSampler; + imgInfo.imageView = fsr_.sceneColor.imageView; + imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + VkWriteDescriptorSet write{}; + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.dstSet = fsr_.descSet; + write.dstBinding = 0; + write.descriptorCount = 1; + write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write.pImageInfo = &imgInfo; + vkUpdateDescriptorSets(device, 1, &write, 0, nullptr); + + // Pipeline layout + VkPushConstantRange pc{}; + pc.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + pc.offset = 0; + pc.size = 64; + VkPipelineLayoutCreateInfo plCI{}; + plCI.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &fsr_.descSetLayout; + plCI.pushConstantRangeCount = 1; + plCI.pPushConstantRanges = &pc; + vkCreatePipelineLayout(device, &plCI, nullptr, &fsr_.pipelineLayout); + + // Load shaders + VkShaderModule vertMod, fragMod; + if (!vertMod.loadFromFile(device, "assets/shaders/postprocess.vert.spv") || + !fragMod.loadFromFile(device, "assets/shaders/fsr_easu.frag.spv")) { + LOG_ERROR("FSR: failed to load shaders"); + destroyFSRResources(); + return false; + } + + // FSR upscale pipeline renders into the swapchain pass at full resolution + // Must match swapchain pass MSAA setting + fsr_.pipeline = PipelineBuilder() + .setShaders(vertMod.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), + fragMod.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) + .setVertexInput({}, {}) + .setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) + .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE) + .setNoDepthTest() + .setColorBlendAttachment(PipelineBuilder::blendDisabled()) + .setMultisample(msaa) + .setLayout(fsr_.pipelineLayout) + .setRenderPass(vkCtx->getImGuiRenderPass()) + .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}) + .build(device); + + vertMod.destroy(); + fragMod.destroy(); + + if (!fsr_.pipeline) { + LOG_ERROR("FSR: failed to create upscale pipeline"); + destroyFSRResources(); + return false; + } + + LOG_INFO("FSR: initialized successfully"); + return true; +} + +void Renderer::destroyFSRResources() { + if (!vkCtx) return; + VkDevice device = vkCtx->getDevice(); + VmaAllocator alloc = vkCtx->getAllocator(); + + vkDeviceWaitIdle(device); + + if (fsr_.pipeline) { vkDestroyPipeline(device, fsr_.pipeline, nullptr); fsr_.pipeline = VK_NULL_HANDLE; } + if (fsr_.pipelineLayout) { vkDestroyPipelineLayout(device, fsr_.pipelineLayout, nullptr); fsr_.pipelineLayout = VK_NULL_HANDLE; } + if (fsr_.descPool) { vkDestroyDescriptorPool(device, fsr_.descPool, nullptr); fsr_.descPool = VK_NULL_HANDLE; fsr_.descSet = VK_NULL_HANDLE; } + if (fsr_.descSetLayout) { vkDestroyDescriptorSetLayout(device, fsr_.descSetLayout, nullptr); fsr_.descSetLayout = VK_NULL_HANDLE; } + if (fsr_.sceneFramebuffer) { vkDestroyFramebuffer(device, fsr_.sceneFramebuffer, nullptr); fsr_.sceneFramebuffer = VK_NULL_HANDLE; } + if (fsr_.sceneSampler) { vkDestroySampler(device, fsr_.sceneSampler, nullptr); fsr_.sceneSampler = VK_NULL_HANDLE; } + destroyImage(device, alloc, fsr_.sceneDepthResolve); + destroyImage(device, alloc, fsr_.sceneMsaaColor); + destroyImage(device, alloc, fsr_.sceneDepth); + destroyImage(device, alloc, fsr_.sceneColor); + + fsr_.internalWidth = 0; + fsr_.internalHeight = 0; +} + +void Renderer::renderFSRUpscale() { + if (!fsr_.pipeline || currentCmd == VK_NULL_HANDLE) return; + + VkExtent2D outExtent = vkCtx->getSwapchainExtent(); + float inW = static_cast(fsr_.internalWidth); + float inH = static_cast(fsr_.internalHeight); + float outW = static_cast(outExtent.width); + float outH = static_cast(outExtent.height); + + // FSR push constants + struct { + glm::vec4 con0; // inputSize.xy, 1/inputSize.xy + glm::vec4 con1; // inputSize.xy / outputSize.xy, 0.5 * inputSize.xy / outputSize.xy + glm::vec4 con2; // outputSize.xy, 1/outputSize.xy + glm::vec4 con3; // sharpness, 0, 0, 0 + } fsrConst; + + fsrConst.con0 = glm::vec4(inW, inH, 1.0f / inW, 1.0f / inH); + fsrConst.con1 = glm::vec4(inW / outW, inH / outH, 0.5f * inW / outW, 0.5f * inH / outH); + fsrConst.con2 = glm::vec4(outW, outH, 1.0f / outW, 1.0f / outH); + fsrConst.con3 = glm::vec4(fsr_.sharpness, 0.0f, 0.0f, 0.0f); + + vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, fsr_.pipeline); + vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + fsr_.pipelineLayout, 0, 1, &fsr_.descSet, 0, nullptr); + vkCmdPushConstants(currentCmd, fsr_.pipelineLayout, + VK_SHADER_STAGE_FRAGMENT_BIT, 0, 64, &fsrConst); + vkCmdDraw(currentCmd, 3, 1, 0, 0); +} + +void Renderer::setFSREnabled(bool enabled) { + if (fsr_.enabled == enabled) return; + fsr_.enabled = enabled; + + if (!enabled) { + // Defer destruction to next beginFrame() — can't destroy mid-render + fsr_.needsRecreate = true; + } + // Resources created/destroyed lazily in beginFrame() +} + +void Renderer::setFSRQuality(float scaleFactor) { + scaleFactor = glm::clamp(scaleFactor, 0.5f, 1.0f); + if (fsr_.scaleFactor == scaleFactor) return; + fsr_.scaleFactor = scaleFactor; + // Don't destroy/recreate mid-frame — mark for lazy recreation in next beginFrame() + if (fsr_.enabled && fsr_.sceneFramebuffer) { + fsr_.needsRecreate = true; + } +} + +void Renderer::setFSRSharpness(float sharpness) { + fsr_.sharpness = glm::clamp(sharpness, 0.0f, 2.0f); +} + +// ========================= End FSR ========================= + void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) { (void)world; @@ -3233,153 +3616,283 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) { // Get time of day for sky-related rendering float timeOfDay = (skySystem && skySystem->getSkybox()) ? skySystem->getSkybox()->getTimeOfDay() : 12.0f; - // Render sky system (unified coordinator for skybox, stars, celestial, clouds, lens flare) - if (skySystem && camera && !skipSky) { - rendering::SkyParams skyParams; - skyParams.timeOfDay = timeOfDay; - skyParams.gameTime = gameHandler ? gameHandler->getGameTime() : -1.0f; - - if (lightingManager) { - const auto& lighting = lightingManager->getLightingParams(); - skyParams.directionalDir = lighting.directionalDir; - skyParams.sunColor = lighting.diffuseColor; - skyParams.skyTopColor = lighting.skyTopColor; - skyParams.skyMiddleColor = lighting.skyMiddleColor; - skyParams.skyBand1Color = lighting.skyBand1Color; - skyParams.skyBand2Color = lighting.skyBand2Color; - skyParams.cloudDensity = lighting.cloudDensity; - skyParams.fogDensity = lighting.fogDensity; - skyParams.horizonGlow = lighting.horizonGlow; - } - - // Weather attenuation for lens flare - if (gameHandler) { - skyParams.weatherIntensity = gameHandler->getWeatherIntensity(); - } - - skyParams.skyboxModelId = 0; - skyParams.skyboxHasStars = false; - - skySystem->render(currentCmd, perFrameSet, *camera, skyParams); - } - - // Terrain (opaque pass) - if (terrainRenderer && camera && terrainEnabled && !skipTerrain) { - auto terrainStart = std::chrono::steady_clock::now(); - terrainRenderer->render(currentCmd, perFrameSet, *camera); - lastTerrainRenderMs = std::chrono::duration( - std::chrono::steady_clock::now() - terrainStart).count(); - } - - // WMO buildings (opaque, drawn before characters so selection circle sits on top) - if (wmoRenderer && camera && !skipWMO) { - auto wmoStart = std::chrono::steady_clock::now(); - wmoRenderer->render(currentCmd, perFrameSet, *camera); - lastWMORenderMs = std::chrono::duration( - std::chrono::steady_clock::now() - wmoStart).count(); - } - - // Selection circle (drawn after WMO, before characters) - renderSelectionCircle(view, projection); - - // Characters (after selection circle so units draw over the ring) - if (characterRenderer && camera && !skipChars) { - characterRenderer->render(currentCmd, perFrameSet, *camera); - } - - // M2 doodads, creatures, glow sprites, particles - if (m2Renderer && camera && !skipM2) { - if (cameraController) { + // ── Multithreaded secondary command buffer recording ── + // Terrain, WMO, and M2 record on worker threads while main thread handles + // sky, characters, water, and effects. prepareRender() on main thread first + // to handle thread-unsafe GPU allocations (descriptor pools, bone SSBOs). + if (parallelRecordingEnabled_) { + // --- Pre-compute state + GPU allocations on main thread (not thread-safe) --- + if (m2Renderer && cameraController) { m2Renderer->setInsideInterior(cameraController->isInsideWMO()); m2Renderer->setOnTaxi(cameraController->isOnTaxi()); } - auto m2Start = std::chrono::steady_clock::now(); - m2Renderer->render(currentCmd, perFrameSet, *camera); - m2Renderer->renderSmokeParticles(currentCmd, perFrameSet); - m2Renderer->renderM2Particles(currentCmd, perFrameSet); - lastM2RenderMs = std::chrono::duration( - std::chrono::steady_clock::now() - m2Start).count(); - } + if (wmoRenderer) wmoRenderer->prepareRender(); + if (m2Renderer && camera) m2Renderer->prepareRender(frameIdx, *camera); + if (characterRenderer) characterRenderer->prepareRender(frameIdx); - // Water (transparent, after all opaques) - // When MSAA is on and 1x pass is available, water renders after main pass ends - bool waterDeferred = waterRenderer && waterRenderer->hasWater1xPass() - && vkCtx->getMsaaSamples() != VK_SAMPLE_COUNT_1_BIT; - if (waterRenderer && camera && !waterDeferred) { - waterRenderer->render(currentCmd, perFrameSet, *camera, globalTime, false, vkCtx->getCurrentFrame()); - } + // --- Dispatch worker threads (terrain + WMO + M2) --- + std::future terrainFuture, wmoFuture, m2Future; - // Weather particles - if (weather && camera) { - weather->render(currentCmd, perFrameSet); - } - - // Swim effects (ripples, bubbles) - if (swimEffects && camera) { - swimEffects->render(currentCmd, perFrameSet); - } - - // Mount dust - if (mountDust && camera) { - mountDust->render(currentCmd, perFrameSet); - } - - // Charge effect - if (chargeEffect && camera) { - chargeEffect->render(currentCmd, perFrameSet); - } - - // Quest markers (billboards above NPCs) - if (questMarkerRenderer && camera) { - questMarkerRenderer->render(currentCmd, perFrameSet, *camera); - } - - // Underwater blue fog overlay — only for terrain water, not WMO water. - if (overlayPipeline && waterRenderer && camera) { - glm::vec3 camPos = camera->getPosition(); - auto waterH = waterRenderer->getNearestWaterHeightAt(camPos.x, camPos.y, camPos.z); - constexpr float MIN_SUBMERSION_OVERLAY = 1.5f; - if (waterH && camPos.z < (*waterH - MIN_SUBMERSION_OVERLAY) - && !waterRenderer->isWmoWaterAt(camPos.x, camPos.y)) { - float depth = *waterH - camPos.z - MIN_SUBMERSION_OVERLAY; - - // Check for canal (liquid type 5, 13, 17) — denser/darker fog - bool canal = false; - if (auto lt = waterRenderer->getWaterTypeAt(camPos.x, camPos.y)) - canal = (*lt == 5 || *lt == 13 || *lt == 17); - - // Fog opacity increases with depth: thin at surface, thick deep down - float fogStrength = 1.0f - std::exp(-depth * (canal ? 0.25f : 0.12f)); - fogStrength = glm::clamp(fogStrength, 0.0f, 0.75f); - - glm::vec4 tint = canal - ? glm::vec4(0.01f, 0.04f, 0.10f, fogStrength) - : glm::vec4(0.03f, 0.09f, 0.18f, fogStrength); - renderOverlay(tint); + if (terrainRenderer && camera && terrainEnabled && !skipTerrain) { + terrainFuture = std::async(std::launch::async, [&]() -> double { + auto t0 = std::chrono::steady_clock::now(); + VkCommandBuffer cmd = beginSecondary(SEC_TERRAIN); + setSecondaryViewportScissor(cmd); + terrainRenderer->render(cmd, perFrameSet, *camera); + vkEndCommandBuffer(cmd); + return std::chrono::duration( + std::chrono::steady_clock::now() - t0).count(); + }); } + + if (wmoRenderer && camera && !skipWMO) { + wmoFuture = std::async(std::launch::async, [&]() -> double { + auto t0 = std::chrono::steady_clock::now(); + VkCommandBuffer cmd = beginSecondary(SEC_WMO); + setSecondaryViewportScissor(cmd); + wmoRenderer->render(cmd, perFrameSet, *camera); + vkEndCommandBuffer(cmd); + return std::chrono::duration( + std::chrono::steady_clock::now() - t0).count(); + }); + } + + if (m2Renderer && camera && !skipM2) { + m2Future = std::async(std::launch::async, [&]() -> double { + auto t0 = std::chrono::steady_clock::now(); + VkCommandBuffer cmd = beginSecondary(SEC_M2); + setSecondaryViewportScissor(cmd); + m2Renderer->render(cmd, perFrameSet, *camera); + m2Renderer->renderSmokeParticles(cmd, perFrameSet); + m2Renderer->renderM2Particles(cmd, perFrameSet); + vkEndCommandBuffer(cmd); + return std::chrono::duration( + std::chrono::steady_clock::now() - t0).count(); + }); + } + + // --- Main thread: record sky (SEC_SKY) --- + { + VkCommandBuffer cmd = beginSecondary(SEC_SKY); + setSecondaryViewportScissor(cmd); + if (skySystem && camera && !skipSky) { + rendering::SkyParams skyParams; + skyParams.timeOfDay = timeOfDay; + skyParams.gameTime = gameHandler ? gameHandler->getGameTime() : -1.0f; + if (lightingManager) { + const auto& lighting = lightingManager->getLightingParams(); + skyParams.directionalDir = lighting.directionalDir; + skyParams.sunColor = lighting.diffuseColor; + skyParams.skyTopColor = lighting.skyTopColor; + skyParams.skyMiddleColor = lighting.skyMiddleColor; + skyParams.skyBand1Color = lighting.skyBand1Color; + skyParams.skyBand2Color = lighting.skyBand2Color; + skyParams.cloudDensity = lighting.cloudDensity; + skyParams.fogDensity = lighting.fogDensity; + skyParams.horizonGlow = lighting.horizonGlow; + } + if (gameHandler) skyParams.weatherIntensity = gameHandler->getWeatherIntensity(); + skyParams.skyboxModelId = 0; + skyParams.skyboxHasStars = false; + skySystem->render(cmd, perFrameSet, *camera, skyParams); + } + vkEndCommandBuffer(cmd); + } + + // --- Main thread: record characters + selection circle (SEC_CHARS) --- + { + VkCommandBuffer cmd = beginSecondary(SEC_CHARS); + setSecondaryViewportScissor(cmd); + renderSelectionCircle(view, projection, cmd); + if (characterRenderer && camera && !skipChars) { + characterRenderer->render(cmd, perFrameSet, *camera); + } + vkEndCommandBuffer(cmd); + } + + // --- Wait for workers --- + if (terrainFuture.valid()) lastTerrainRenderMs = terrainFuture.get(); + if (wmoFuture.valid()) lastWMORenderMs = wmoFuture.get(); + if (m2Future.valid()) lastM2RenderMs = m2Future.get(); + + // --- Main thread: record post-opaque (SEC_POST) --- + { + VkCommandBuffer cmd = beginSecondary(SEC_POST); + setSecondaryViewportScissor(cmd); + if (waterRenderer && camera) + waterRenderer->render(cmd, perFrameSet, *camera, globalTime, false, frameIdx); + if (weather && camera) weather->render(cmd, perFrameSet); + if (swimEffects && camera) swimEffects->render(cmd, perFrameSet); + if (mountDust && camera) mountDust->render(cmd, perFrameSet); + if (chargeEffect && camera) chargeEffect->render(cmd, perFrameSet); + if (questMarkerRenderer && camera) questMarkerRenderer->render(cmd, perFrameSet, *camera); + + // Underwater overlay + minimap + if (overlayPipeline && waterRenderer && camera) { + glm::vec3 camPos = camera->getPosition(); + auto waterH = waterRenderer->getNearestWaterHeightAt(camPos.x, camPos.y, camPos.z); + constexpr float MIN_SUBMERSION_OVERLAY = 1.5f; + if (waterH && camPos.z < (*waterH - MIN_SUBMERSION_OVERLAY) + && !waterRenderer->isWmoWaterAt(camPos.x, camPos.y)) { + float depth = *waterH - camPos.z - MIN_SUBMERSION_OVERLAY; + bool canal = false; + if (auto lt = waterRenderer->getWaterTypeAt(camPos.x, camPos.y)) + canal = (*lt == 5 || *lt == 13 || *lt == 17); + float fogStrength = 1.0f - std::exp(-depth * (canal ? 0.25f : 0.12f)); + fogStrength = glm::clamp(fogStrength, 0.0f, 0.75f); + glm::vec4 tint = canal + ? glm::vec4(0.01f, 0.04f, 0.10f, fogStrength) + : glm::vec4(0.03f, 0.09f, 0.18f, fogStrength); + renderOverlay(tint, cmd); + } + } + if (minimap && minimap->isEnabled() && camera && window) { + glm::vec3 minimapCenter = camera->getPosition(); + if (cameraController && cameraController->isThirdPerson()) + minimapCenter = characterPosition; + float minimapPlayerOrientation = 0.0f; + bool hasMinimapPlayerOrientation = false; + if (cameraController) { + float facingRad = glm::radians(characterYaw); + glm::vec3 facingFwd(std::cos(facingRad), std::sin(facingRad), 0.0f); + minimapPlayerOrientation = std::atan2(-facingFwd.x, facingFwd.y); + hasMinimapPlayerOrientation = true; + } else if (gameHandler) { + minimapPlayerOrientation = gameHandler->getMovementInfo().orientation; + hasMinimapPlayerOrientation = true; + } + minimap->render(cmd, *camera, minimapCenter, + window->getWidth(), window->getHeight(), + minimapPlayerOrientation, hasMinimapPlayerOrientation); + } + vkEndCommandBuffer(cmd); + } + + // --- Execute all secondary buffers in correct draw order --- + VkCommandBuffer validCmds[6]; + uint32_t numCmds = 0; + validCmds[numCmds++] = secondaryCmds_[SEC_SKY][frameIdx]; + if (terrainRenderer && camera && terrainEnabled && !skipTerrain) + validCmds[numCmds++] = secondaryCmds_[SEC_TERRAIN][frameIdx]; + if (wmoRenderer && camera && !skipWMO) + validCmds[numCmds++] = secondaryCmds_[SEC_WMO][frameIdx]; + validCmds[numCmds++] = secondaryCmds_[SEC_CHARS][frameIdx]; + if (m2Renderer && camera && !skipM2) + validCmds[numCmds++] = secondaryCmds_[SEC_M2][frameIdx]; + validCmds[numCmds++] = secondaryCmds_[SEC_POST][frameIdx]; + + vkCmdExecuteCommands(currentCmd, numCmds, validCmds); + + } else { + // ── Fallback: single-threaded inline recording (original path) ── + + if (skySystem && camera && !skipSky) { + rendering::SkyParams skyParams; + skyParams.timeOfDay = timeOfDay; + skyParams.gameTime = gameHandler ? gameHandler->getGameTime() : -1.0f; + if (lightingManager) { + const auto& lighting = lightingManager->getLightingParams(); + skyParams.directionalDir = lighting.directionalDir; + skyParams.sunColor = lighting.diffuseColor; + skyParams.skyTopColor = lighting.skyTopColor; + skyParams.skyMiddleColor = lighting.skyMiddleColor; + skyParams.skyBand1Color = lighting.skyBand1Color; + skyParams.skyBand2Color = lighting.skyBand2Color; + skyParams.cloudDensity = lighting.cloudDensity; + skyParams.fogDensity = lighting.fogDensity; + skyParams.horizonGlow = lighting.horizonGlow; + } + if (gameHandler) skyParams.weatherIntensity = gameHandler->getWeatherIntensity(); + skyParams.skyboxModelId = 0; + skyParams.skyboxHasStars = false; + skySystem->render(currentCmd, perFrameSet, *camera, skyParams); + } + + if (terrainRenderer && camera && terrainEnabled && !skipTerrain) { + auto terrainStart = std::chrono::steady_clock::now(); + terrainRenderer->render(currentCmd, perFrameSet, *camera); + lastTerrainRenderMs = std::chrono::duration( + std::chrono::steady_clock::now() - terrainStart).count(); + } + + if (wmoRenderer && camera && !skipWMO) { + wmoRenderer->prepareRender(); + auto wmoStart = std::chrono::steady_clock::now(); + wmoRenderer->render(currentCmd, perFrameSet, *camera); + lastWMORenderMs = std::chrono::duration( + std::chrono::steady_clock::now() - wmoStart).count(); + } + + renderSelectionCircle(view, projection); + + if (characterRenderer && camera && !skipChars) { + characterRenderer->prepareRender(frameIdx); + characterRenderer->render(currentCmd, perFrameSet, *camera); + } + + if (m2Renderer && camera && !skipM2) { + if (cameraController) { + m2Renderer->setInsideInterior(cameraController->isInsideWMO()); + m2Renderer->setOnTaxi(cameraController->isOnTaxi()); + } + m2Renderer->prepareRender(frameIdx, *camera); + auto m2Start = std::chrono::steady_clock::now(); + m2Renderer->render(currentCmd, perFrameSet, *camera); + m2Renderer->renderSmokeParticles(currentCmd, perFrameSet); + m2Renderer->renderM2Particles(currentCmd, perFrameSet); + lastM2RenderMs = std::chrono::duration( + std::chrono::steady_clock::now() - m2Start).count(); + } + + if (waterRenderer && camera) + waterRenderer->render(currentCmd, perFrameSet, *camera, globalTime, false, frameIdx); + if (weather && camera) weather->render(currentCmd, perFrameSet); + if (swimEffects && camera) swimEffects->render(currentCmd, perFrameSet); + if (mountDust && camera) mountDust->render(currentCmd, perFrameSet); + if (chargeEffect && camera) chargeEffect->render(currentCmd, perFrameSet); + if (questMarkerRenderer && camera) questMarkerRenderer->render(currentCmd, perFrameSet, *camera); } - // Minimap overlay - if (minimap && minimap->isEnabled() && camera && window) { - glm::vec3 minimapCenter = camera->getPosition(); - if (cameraController && cameraController->isThirdPerson()) - minimapCenter = characterPosition; - float minimapPlayerOrientation = 0.0f; - bool hasMinimapPlayerOrientation = false; - if (cameraController) { - // Use the same yaw that drives character model rendering so minimap - // orientation cannot drift by a different axis/sign convention. - float facingRad = glm::radians(characterYaw); - glm::vec3 facingFwd(std::cos(facingRad), std::sin(facingRad), 0.0f); - minimapPlayerOrientation = std::atan2(-facingFwd.x, facingFwd.y); - hasMinimapPlayerOrientation = true; - } else if (gameHandler) { - minimapPlayerOrientation = gameHandler->getMovementInfo().orientation; - hasMinimapPlayerOrientation = true; + // Underwater overlay and minimap — in the fallback path these run inline; + // in the parallel path they were already recorded into SEC_POST above. + if (!parallelRecordingEnabled_) { + if (overlayPipeline && waterRenderer && camera) { + glm::vec3 camPos = camera->getPosition(); + auto waterH = waterRenderer->getNearestWaterHeightAt(camPos.x, camPos.y, camPos.z); + constexpr float MIN_SUBMERSION_OVERLAY = 1.5f; + if (waterH && camPos.z < (*waterH - MIN_SUBMERSION_OVERLAY) + && !waterRenderer->isWmoWaterAt(camPos.x, camPos.y)) { + float depth = *waterH - camPos.z - MIN_SUBMERSION_OVERLAY; + bool canal = false; + if (auto lt = waterRenderer->getWaterTypeAt(camPos.x, camPos.y)) + canal = (*lt == 5 || *lt == 13 || *lt == 17); + float fogStrength = 1.0f - std::exp(-depth * (canal ? 0.25f : 0.12f)); + fogStrength = glm::clamp(fogStrength, 0.0f, 0.75f); + glm::vec4 tint = canal + ? glm::vec4(0.01f, 0.04f, 0.10f, fogStrength) + : glm::vec4(0.03f, 0.09f, 0.18f, fogStrength); + renderOverlay(tint); + } + } + if (minimap && minimap->isEnabled() && camera && window) { + glm::vec3 minimapCenter = camera->getPosition(); + if (cameraController && cameraController->isThirdPerson()) + minimapCenter = characterPosition; + float minimapPlayerOrientation = 0.0f; + bool hasMinimapPlayerOrientation = false; + if (cameraController) { + float facingRad = glm::radians(characterYaw); + glm::vec3 facingFwd(std::cos(facingRad), std::sin(facingRad), 0.0f); + minimapPlayerOrientation = std::atan2(-facingFwd.x, facingFwd.y); + hasMinimapPlayerOrientation = true; + } else if (gameHandler) { + minimapPlayerOrientation = gameHandler->getMovementInfo().orientation; + hasMinimapPlayerOrientation = true; + } + minimap->render(currentCmd, *camera, minimapCenter, + window->getWidth(), window->getHeight(), + minimapPlayerOrientation, hasMinimapPlayerOrientation); } - minimap->render(currentCmd, *camera, minimapCenter, - window->getWidth(), window->getHeight(), - minimapPlayerOrientation, hasMinimapPlayerOrientation); } auto renderEnd = std::chrono::steady_clock::now(); @@ -3413,8 +3926,6 @@ bool Renderer::initializeRenderers(pipeline::AssetManager* assetManager, const s if (!waterRenderer->initialize(vkCtx, perFrameSetLayout)) { LOG_ERROR("Failed to initialize water renderer"); waterRenderer.reset(); - } else if (vkCtx->getMsaaSamples() != VK_SAMPLE_COUNT_1_BIT) { - setupWater1xPass(); } } @@ -3868,6 +4379,128 @@ void Renderer::setupWater1xPass() { vkCtx->getSwapchainImageViews(), depthView, vkCtx->getSwapchainExtent()); } +// ========================= Multithreaded Secondary Command Buffers ========================= + +bool Renderer::createSecondaryCommandResources() { + if (!vkCtx) return false; + VkDevice device = vkCtx->getDevice(); + uint32_t queueFamily = vkCtx->getGraphicsQueueFamily(); + + VkCommandPoolCreateInfo poolCI{}; + poolCI.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + poolCI.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + poolCI.queueFamilyIndex = queueFamily; + + // Create worker command pools (one per worker thread) + for (uint32_t w = 0; w < NUM_WORKERS; ++w) { + if (vkCreateCommandPool(device, &poolCI, nullptr, &workerCmdPools_[w]) != VK_SUCCESS) { + LOG_ERROR("Failed to create worker command pool ", w); + return false; + } + } + + // Create main-thread secondary command pool + if (vkCreateCommandPool(device, &poolCI, nullptr, &mainSecondaryCmdPool_) != VK_SUCCESS) { + LOG_ERROR("Failed to create main secondary command pool"); + return false; + } + + // Allocate secondary command buffers + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_SECONDARY; + allocInfo.commandBufferCount = 1; + + // Worker secondaries: SEC_TERRAIN=1, SEC_WMO=2, SEC_M2=4 → worker pools 0,1,2 + const uint32_t workerSecondaries[] = { SEC_TERRAIN, SEC_WMO, SEC_M2 }; + for (uint32_t w = 0; w < NUM_WORKERS; ++w) { + allocInfo.commandPool = workerCmdPools_[w]; + for (uint32_t f = 0; f < MAX_FRAMES; ++f) { + if (vkAllocateCommandBuffers(device, &allocInfo, &secondaryCmds_[workerSecondaries[w]][f]) != VK_SUCCESS) { + LOG_ERROR("Failed to allocate worker secondary buffer w=", w, " f=", f); + return false; + } + } + } + + // Main-thread secondaries: SEC_SKY=0, SEC_CHARS=3, SEC_POST=5, SEC_IMGUI=6 + const uint32_t mainSecondaries[] = { SEC_SKY, SEC_CHARS, SEC_POST, SEC_IMGUI }; + for (uint32_t idx : mainSecondaries) { + allocInfo.commandPool = mainSecondaryCmdPool_; + for (uint32_t f = 0; f < MAX_FRAMES; ++f) { + if (vkAllocateCommandBuffers(device, &allocInfo, &secondaryCmds_[idx][f]) != VK_SUCCESS) { + LOG_ERROR("Failed to allocate main secondary buffer idx=", idx, " f=", f); + return false; + } + } + } + + parallelRecordingEnabled_ = true; + LOG_INFO("Multithreaded rendering: ", NUM_WORKERS, " worker threads, ", + NUM_SECONDARIES, " secondary buffers [ENABLED]"); + return true; +} + +void Renderer::destroySecondaryCommandResources() { + if (!vkCtx) return; + VkDevice device = vkCtx->getDevice(); + vkDeviceWaitIdle(device); + + // Secondary buffers are freed when their pool is destroyed + for (uint32_t w = 0; w < NUM_WORKERS; ++w) { + if (workerCmdPools_[w]) { + vkDestroyCommandPool(device, workerCmdPools_[w], nullptr); + workerCmdPools_[w] = VK_NULL_HANDLE; + } + } + if (mainSecondaryCmdPool_) { + vkDestroyCommandPool(device, mainSecondaryCmdPool_, nullptr); + mainSecondaryCmdPool_ = VK_NULL_HANDLE; + } + + for (auto& arr : secondaryCmds_) + for (auto& cmd : arr) + cmd = VK_NULL_HANDLE; + + parallelRecordingEnabled_ = false; +} + +VkCommandBuffer Renderer::beginSecondary(uint32_t secondaryIndex) { + uint32_t frame = vkCtx->getCurrentFrame(); + VkCommandBuffer cmd = secondaryCmds_[secondaryIndex][frame]; + + VkCommandBufferInheritanceInfo inheritInfo{}; + inheritInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; + inheritInfo.renderPass = activeRenderPass_; + inheritInfo.subpass = 0; + inheritInfo.framebuffer = activeFramebuffer_; + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT + | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; + beginInfo.pInheritanceInfo = &inheritInfo; + + VkResult result = vkBeginCommandBuffer(cmd, &beginInfo); + if (result != VK_SUCCESS) { + LOG_ERROR("vkBeginCommandBuffer failed for secondary ", secondaryIndex, + " frame ", frame, " result=", static_cast(result)); + } + return cmd; +} + +void Renderer::setSecondaryViewportScissor(VkCommandBuffer cmd) { + VkViewport vp{}; + vp.width = static_cast(activeRenderExtent_.width); + vp.height = static_cast(activeRenderExtent_.height); + vp.maxDepth = 1.0f; + vkCmdSetViewport(cmd, 0, 1, &vp); + + VkRect2D sc{}; + sc.extent = activeRenderExtent_; + vkCmdSetScissor(cmd, 0, 1, &sc); +} + void Renderer::renderReflectionPass() { if (!waterRenderer || !camera || !waterRenderer->hasReflectionPass() || !waterRenderer->hasSurfaces()) return; if (currentCmd == VK_NULL_HANDLE || !reflPerFrameUBOMapped) return; diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp index 97527c8c..f15541ea 100644 --- a/src/rendering/terrain_manager.cpp +++ b/src/rendering/terrain_manager.cpp @@ -911,6 +911,8 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { wmoRenderer->setDeferNormalMaps(false); wmoRenderer->setPredecodedBLPCache(nullptr); if (ft.wmoModelIndex < pending->wmoModels.size()) return false; + // All WMO models loaded — backfill normal/height maps that were skipped during streaming + wmoRenderer->backfillNormalMaps(); } ft.phase = FinalizationPhase::WMO_INSTANCES; return false; diff --git a/src/rendering/vk_context.cpp b/src/rendering/vk_context.cpp index 79e7eac3..dc4144fa 100644 --- a/src/rendering/vk_context.cpp +++ b/src/rendering/vk_context.cpp @@ -252,14 +252,22 @@ bool VkContext::createAllocator() { bool VkContext::createSwapchain(int width, int height) { vkb::SwapchainBuilder swapchainBuilder{physicalDevice, device, surface}; - auto swapRet = swapchainBuilder + auto& builder = swapchainBuilder .set_desired_format({VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}) - .set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR) // VSync .set_desired_extent(static_cast(width), static_cast(height)) .set_image_usage_flags(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) .set_desired_min_image_count(2) - .set_old_swapchain(swapchain) // For recreation - .build(); + .set_old_swapchain(swapchain); + + if (vsync_) { + builder.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR); + } else { + builder.set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR); + builder.add_fallback_present_mode(VK_PRESENT_MODE_MAILBOX_KHR); + builder.add_fallback_present_mode(VK_PRESENT_MODE_FIFO_RELAXED_KHR); + } + + auto swapRet = builder.build(); if (!swapRet) { LOG_ERROR("Failed to create Vulkan swapchain: ", swapRet.error().message()); @@ -1026,14 +1034,22 @@ bool VkContext::recreateSwapchain(int width, int height) { VkSwapchainKHR oldSwapchain = swapchain; vkb::SwapchainBuilder swapchainBuilder{physicalDevice, device, surface}; - auto swapRet = swapchainBuilder + auto& builder = swapchainBuilder .set_desired_format({VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}) - .set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR) .set_desired_extent(static_cast(width), static_cast(height)) .set_image_usage_flags(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) .set_desired_min_image_count(2) - .set_old_swapchain(oldSwapchain) - .build(); + .set_old_swapchain(oldSwapchain); + + if (vsync_) { + builder.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR); + } else { + builder.set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR); + builder.add_fallback_present_mode(VK_PRESENT_MODE_MAILBOX_KHR); + builder.add_fallback_present_mode(VK_PRESENT_MODE_FIFO_RELAXED_KHR); + } + + auto swapRet = builder.build(); if (oldSwapchain) { vkDestroySwapchainKHR(device, oldSwapchain, nullptr); diff --git a/src/rendering/wmo_renderer.cpp b/src/rendering/wmo_renderer.cpp index 5dec0e3e..2e5afcc3 100644 --- a/src/rendering/wmo_renderer.cpp +++ b/src/rendering/wmo_renderer.cpp @@ -787,8 +787,8 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) { } // Build doodad's local transform (WoW coordinates) - // WMO doodads use quaternion rotation (X/Y swapped for correct orientation) - glm::quat fixedRotation(doodad.rotation.w, doodad.rotation.y, doodad.rotation.x, doodad.rotation.z); + // WMO doodads use quaternion rotation + glm::quat fixedRotation(doodad.rotation.w, doodad.rotation.x, doodad.rotation.y, doodad.rotation.z); glm::mat4 localTransform(1.0f); localTransform = glm::translate(localTransform, doodad.position); @@ -1318,15 +1318,10 @@ void WMORenderer::gatherCandidates(const glm::vec3& queryMin, const glm::vec3& q } } -void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera) { +void WMORenderer::prepareRender() { ++currentFrameId; - if (!opaquePipeline_ || instances.empty()) { - lastDrawCalls = 0; - return; - } - - // Update material UBOs if settings changed + // Update material UBOs if settings changed (mapped memory writes — main thread only) if (materialSettingsDirty_) { materialSettingsDirty_ = false; static const int pomSampleTable[] = { 16, 32, 64 }; @@ -1335,7 +1330,6 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const for (auto& group : model.groups) { for (auto& mb : group.mergedBatches) { if (!mb.materialUBO) continue; - // Read existing UBO data, update normal/POM fields VmaAllocationInfo allocInfo{}; vmaGetAllocationInfo(vkCtx_->getAllocator(), mb.materialUBOAlloc, &allocInfo); if (allocInfo.pMappedData) { @@ -1351,6 +1345,13 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const } } } +} + +void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera) { + if (!opaquePipeline_ || instances.empty()) { + lastDrawCalls = 0; + return; + } lastDrawCalls = 0; @@ -1362,43 +1363,45 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const lastPortalCulledGroups = 0; lastDistanceCulledGroups = 0; - // ── Phase 1: Parallel visibility culling ────────────────────────── - std::vector visibleInstances; - visibleInstances.reserve(instances.size()); + // ── Phase 1: Visibility culling ────────────────────────── + visibleInstances_.clear(); for (size_t i = 0; i < instances.size(); ++i) { - const auto& instance = instances[i]; - if (loadedModels.find(instance.modelId) == loadedModels.end()) - continue; - visibleInstances.push_back(i); + if (loadedModels.count(instances[i].modelId)) + visibleInstances_.push_back(i); } glm::vec3 camPos = camera.getPosition(); bool doPortalCull = portalCulling; - bool doFrustumCull = false; // Temporarily disabled: can over-cull world WMOs bool doDistanceCull = distanceCulling; - auto cullInstance = [&](size_t instIdx) -> InstanceDrawList { - if (instIdx >= instances.size()) return InstanceDrawList{}; + auto cullInstance = [&](size_t instIdx, InstanceDrawList& result) { + if (instIdx >= instances.size()) return; const auto& instance = instances[instIdx]; auto mdlIt = loadedModels.find(instance.modelId); - if (mdlIt == loadedModels.end()) return InstanceDrawList{}; + if (mdlIt == loadedModels.end()) return; const ModelData& model = mdlIt->second; - InstanceDrawList result; result.instanceIndex = instIdx; + result.visibleGroups.clear(); + result.portalCulled = 0; + result.distanceCulled = 0; - // Portal-based visibility - std::unordered_set portalVisibleGroups; + // Portal-based visibility — use a flat sorted vector instead of unordered_set + std::vector portalVisibleGroups; bool usePortalCulling = doPortalCull && !model.portals.empty() && !model.portalRefs.empty(); if (usePortalCulling) { + std::unordered_set pvgSet; glm::vec4 localCamPos = instance.invModelMatrix * glm::vec4(camPos, 1.0f); getVisibleGroupsViaPortals(model, glm::vec3(localCamPos), frustum, - instance.modelMatrix, portalVisibleGroups); + instance.modelMatrix, pvgSet); + portalVisibleGroups.assign(pvgSet.begin(), pvgSet.end()); + std::sort(portalVisibleGroups.begin(), portalVisibleGroups.end()); } for (size_t gi = 0; gi < model.groups.size(); ++gi) { if (usePortalCulling && - portalVisibleGroups.find(static_cast(gi)) == portalVisibleGroups.end()) { + !std::binary_search(portalVisibleGroups.begin(), portalVisibleGroups.end(), + static_cast(gi))) { result.portalCulled++; continue; } @@ -1414,62 +1417,18 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const continue; } } - - if (doFrustumCull && !frustum.intersectsAABB(gMin, gMax)) - continue; } result.visibleGroups.push_back(static_cast(gi)); } - return result; }; - // Dispatch culling — parallel when enough instances, sequential otherwise. - std::vector drawLists; - drawLists.reserve(visibleInstances.size()); + // Resize drawLists to match (reuses previous capacity) + drawLists_.resize(visibleInstances_.size()); - static const size_t minParallelCullInstances = std::max( - 4, envSizeOrDefault("WOWEE_WMO_CULL_MT_MIN", 128)); - if (visibleInstances.size() >= minParallelCullInstances && numCullThreads_ > 1) { - static const size_t minCullWorkPerThread = std::max( - 16, envSizeOrDefault("WOWEE_WMO_CULL_WORK_PER_THREAD", 64)); - const size_t maxUsefulThreads = std::max( - 1, (visibleInstances.size() + minCullWorkPerThread - 1) / minCullWorkPerThread); - const size_t numThreads = std::min(static_cast(numCullThreads_), maxUsefulThreads); - if (numThreads <= 1) { - for (size_t idx : visibleInstances) { - drawLists.push_back(cullInstance(idx)); - } - } else { - const size_t chunkSize = visibleInstances.size() / numThreads; - const size_t remainder = visibleInstances.size() % numThreads; - - drawLists.resize(visibleInstances.size()); - - cullFutures_.clear(); - if (cullFutures_.capacity() < numThreads) { - cullFutures_.reserve(numThreads); - } - - size_t start = 0; - for (size_t t = 0; t < numThreads; ++t) { - const size_t end = start + chunkSize + (t < remainder ? 1 : 0); - cullFutures_.push_back(std::async(std::launch::async, - [&, start, end]() { - for (size_t j = start; j < end; ++j) { - drawLists[j] = cullInstance(visibleInstances[j]); - } - })); - start = end; - } - - for (auto& f : cullFutures_) { - f.get(); - } - } - } else { - for (size_t idx : visibleInstances) - drawLists.push_back(cullInstance(idx)); + // Sequential culling (parallel dispatch overhead > savings for typical instance counts) + for (size_t j = 0; j < visibleInstances_.size(); ++j) { + cullInstance(visibleInstances_[j], drawLists_[j]); } // ── Phase 2: Vulkan draw ──────────────────────────────── @@ -1484,7 +1443,7 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const // Track which pipeline is currently bound: 0=opaque, 1=transparent, 2=glass int currentPipelineKind = 0; - for (const auto& dl : drawLists) { + for (const auto& dl : drawLists_) { if (dl.instanceIndex >= instances.size()) continue; const auto& instance = instances[dl.instanceIndex]; auto modelIt = loadedModels.find(instance.modelId); @@ -2412,6 +2371,69 @@ VkTexture* WMORenderer::loadTexture(const std::string& path) { return rawPtr; } +void WMORenderer::backfillNormalMaps() { + if (!normalMappingEnabled_ && !pomEnabled_) return; + + if (!assetManager) return; + + int generated = 0; + for (auto& [key, entry] : textureCache) { + if (entry.normalHeightMap) continue; // already has one + if (!entry.texture) continue; + + // Re-load the BLP from MPQ to get pixel data for normal map generation + pipeline::BLPImage blp = assetManager->loadTexture(key); + if (!blp.isValid() || blp.width == 0 || blp.height == 0) continue; + + float variance = 0.0f; + auto nhMap = generateNormalHeightMap(blp.data.data(), blp.width, blp.height, variance); + if (nhMap) { + entry.normalHeightMap = std::move(nhMap); + entry.heightMapVariance = variance; + generated++; + } + } + + if (generated > 0) { + VkDevice device = vkCtx_->getDevice(); + int rebound = 0; + // Update merged batches: assign normal map pointer and rebind descriptor set + for (auto& [modelId, model] : loadedModels) { + for (auto& group : model.groups) { + for (auto& mb : group.mergedBatches) { + if (mb.normalHeightMap) continue; // already set + if (!mb.texture) continue; + // Find this texture in the cache + for (const auto& [cacheKey, cacheEntry] : textureCache) { + if (cacheEntry.texture.get() == mb.texture) { + if (cacheEntry.normalHeightMap) { + mb.normalHeightMap = cacheEntry.normalHeightMap.get(); + mb.heightMapVariance = cacheEntry.heightMapVariance; + // Rebind descriptor set binding 2 to the real normal/height map + if (mb.materialSet) { + VkDescriptorImageInfo nhImgInfo = mb.normalHeightMap->descriptorInfo(); + VkWriteDescriptorSet write{}; + write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write.dstSet = mb.materialSet; + write.dstBinding = 2; + write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write.descriptorCount = 1; + write.pImageInfo = &nhImgInfo; + vkUpdateDescriptorSets(device, 1, &write, 0, nullptr); + rebound++; + } + } + break; + } + } + } + } + } + materialSettingsDirty_ = true; + LOG_INFO("Backfilled ", generated, " normal/height maps (", rebound, " descriptor sets rebound) for deferred WMO textures"); + } +} + // Ray-AABB intersection (slab method) // Returns true if the ray intersects the axis-aligned bounding box static bool rayIntersectsAABB(const glm::vec3& origin, const glm::vec3& dir, @@ -3145,18 +3167,13 @@ bool WMORenderer::checkWallCollision(const glm::vec3& from, const glm::vec3& to, if (triHeight < 1.0f && tb.maxZ <= localFeetZ + 1.2f) continue; // Use MOPY flags to filter wall collision. - // Collidable triangles (flag 0x01) block the player — including - // invisible collision walls (0x01 without 0x20) used in tunnels. - // Skip detail/decorative geometry (0x04) and render-only surfaces. + // Collide with triangles that have the collision flag (0x08) or no flags at all. + // Skip detail/decorative (0x04) and render-only (0x20 without 0x08) surfaces. uint32_t triIdx = triStart / 3; if (!group.triMopyFlags.empty() && triIdx < group.triMopyFlags.size()) { uint8_t mopy = group.triMopyFlags[triIdx]; if (mopy != 0) { - bool collidable = (mopy & 0x01) != 0; - bool detail = (mopy & 0x04) != 0; - if (!collidable || detail) { - continue; - } + if ((mopy & 0x04) || !(mopy & 0x08)) continue; } } @@ -3217,8 +3234,8 @@ bool WMORenderer::checkWallCollision(const glm::vec3& from, const glm::vec3& to, if (absNz >= 0.35f) continue; const float SKIN = 0.005f; // small separation so we don't re-collide immediately - // Stronger push when inside WMO for more responsive indoor collision - const float MAX_PUSH = insideWMO ? 0.35f : 0.15f; + // Push must cover full penetration to prevent gradual clip-through + const float MAX_PUSH = PLAYER_RADIUS; float penetration = (PLAYER_RADIUS - horizDist); float pushDist = glm::clamp(penetration + SKIN, 0.0f, MAX_PUSH); glm::vec2 pushDir2; diff --git a/src/ui/game_screen.cpp b/src/ui/game_screen.cpp index 3f1c0eb9..19db13e9 100644 --- a/src/ui/game_screen.cpp +++ b/src/ui/game_screen.cpp @@ -317,6 +317,20 @@ void GameScreen::render(game::GameHandler& gameHandler) { } } + // Apply saved FSR setting once when renderer is available + if (!fsrSettingsApplied_ && pendingFSR) { + auto* renderer = core::Application::getInstance().getRenderer(); + if (renderer) { + static const float fsrScales[] = { 0.77f, 0.67f, 0.59f, 0.50f }; + renderer->setFSRQuality(fsrScales[pendingFSRQuality]); + renderer->setFSRSharpness(pendingFSRSharpness); + renderer->setFSREnabled(true); + fsrSettingsApplied_ = true; + } + } else { + fsrSettingsApplied_ = true; + } + // Apply auto-loot setting to GameHandler every frame (cheap bool sync) gameHandler.setAutoLoot(pendingAutoLoot); @@ -2687,6 +2701,12 @@ void GameScreen::sendChatMessage(game::GameHandler& gameHandler) { chatInputBuffer[0] = '\0'; return; } + // /unstuckhearth command — teleport to hearthstone bind point + if (cmdLower == "unstuckhearth") { + gameHandler.unstuckHearth(); + chatInputBuffer[0] = '\0'; + return; + } // /transport board — board test transport if (cmdLower == "transport board") { @@ -6270,6 +6290,25 @@ void GameScreen::renderSettingsWindow() { saveSettings(); } } + // FSR 1.0 Upscaling + { + if (ImGui::Checkbox("FSR Upscaling (Experimental)", &pendingFSR)) { + if (renderer) renderer->setFSREnabled(pendingFSR); + saveSettings(); + } + if (pendingFSR) { + const char* fsrQualityLabels[] = { "Ultra Quality (77%)", "Quality (67%)", "Balanced (59%)", "Performance (50%)" }; + static const float fsrScaleFactors[] = { 0.77f, 0.67f, 0.59f, 0.50f }; + if (ImGui::Combo("FSR Quality", &pendingFSRQuality, fsrQualityLabels, 4)) { + if (renderer) renderer->setFSRQuality(fsrScaleFactors[pendingFSRQuality]); + saveSettings(); + } + if (ImGui::SliderFloat("FSR Sharpness", &pendingFSRSharpness, 0.0f, 2.0f, "%.1f")) { + if (renderer) renderer->setFSRSharpness(pendingFSRSharpness); + saveSettings(); + } + } + } if (ImGui::SliderInt("Ground Clutter Density", &pendingGroundClutterDensity, 0, 150, "%d%%")) { if (renderer) { if (auto* tm = renderer->getTerrainManager()) { @@ -7384,6 +7423,9 @@ void GameScreen::saveSettings() { out << "normal_map_strength=" << pendingNormalMapStrength << "\n"; out << "pom=" << (pendingPOM ? 1 : 0) << "\n"; out << "pom_quality=" << pendingPOMQuality << "\n"; + out << "fsr=" << (pendingFSR ? 1 : 0) << "\n"; + out << "fsr_quality=" << pendingFSRQuality << "\n"; + out << "fsr_sharpness=" << pendingFSRSharpness << "\n"; // Controls out << "mouse_sensitivity=" << pendingMouseSensitivity << "\n"; @@ -7470,6 +7512,9 @@ void GameScreen::loadSettings() { else if (key == "normal_map_strength") pendingNormalMapStrength = std::clamp(std::stof(val), 0.0f, 2.0f); else if (key == "pom") pendingPOM = (std::stoi(val) != 0); else if (key == "pom_quality") pendingPOMQuality = std::clamp(std::stoi(val), 0, 2); + else if (key == "fsr") pendingFSR = (std::stoi(val) != 0); + else if (key == "fsr_quality") pendingFSRQuality = std::clamp(std::stoi(val), 0, 3); + else if (key == "fsr_sharpness") pendingFSRSharpness = std::clamp(std::stof(val), 0.0f, 2.0f); // Controls else if (key == "mouse_sensitivity") pendingMouseSensitivity = std::clamp(std::stof(val), 0.05f, 1.0f); else if (key == "invert_mouse") pendingInvertMouse = (std::stoi(val) != 0); From 4cb03c38fe6d68dd92a2a5f6c0878279228b42a1 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 22:29:06 -0800 Subject: [PATCH 05/13] Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization - Overlap M2 and character animation updates via std::async (~2-5ms saved) - Thread-local collision scratch buffers for concurrent floor queries - Parallel terrain/WMO/M2 floor queries in camera controller - Seed new M2 instance bones from existing siblings to eliminate pop-in flash - Fix shadow flicker: snap center along stable light axes instead of in view space - Increase shadow distance default to 300 units (slider max 500) --- include/rendering/m2_renderer.hpp | 4 +- include/rendering/renderer.hpp | 4 +- include/rendering/wmo_renderer.hpp | 4 +- include/ui/game_screen.hpp | 2 +- src/rendering/camera_controller.cpp | 45 ++++++++++++++---- src/rendering/m2_renderer.cpp | 72 ++++++++++++++++++++++------- src/rendering/renderer.cpp | 65 ++++++++++++++------------ src/rendering/wmo_renderer.cpp | 45 ++++++++++-------- src/ui/game_screen.cpp | 6 +-- 9 files changed, 160 insertions(+), 87 deletions(-) diff --git a/include/rendering/m2_renderer.hpp b/include/rendering/m2_renderer.hpp index 4b26214f..75a92565 100644 --- a/include/rendering/m2_renderer.hpp +++ b/include/rendering/m2_renderer.hpp @@ -475,9 +475,7 @@ private: static constexpr float SPATIAL_CELL_SIZE = 64.0f; std::unordered_map, GridCellHash> spatialGrid; std::unordered_map instanceIndexById; - mutable std::vector candidateScratch; - mutable std::unordered_set candidateIdScratch; - mutable std::vector collisionTriScratch_; + // Collision scratch buffers are thread_local (see m2_renderer.cpp) for thread-safety. // Collision query profiling (per frame). mutable double queryTimeMs = 0.0; diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index c7582eea..cbb9c7e1 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -246,7 +246,7 @@ private: glm::vec3 shadowCenter = glm::vec3(0.0f); bool shadowCenterInitialized = false; bool shadowsEnabled = true; - float shadowDistance_ = 72.0f; // Shadow frustum half-extent (default: 72 units) + float shadowDistance_ = 300.0f; // Shadow frustum half-extent (default: 300 units) uint32_t shadowFrameCounter_ = 0; @@ -257,7 +257,7 @@ public: void setShadowsEnabled(bool enabled) { shadowsEnabled = enabled; } bool areShadowsEnabled() const { return shadowsEnabled; } - void setShadowDistance(float dist) { shadowDistance_ = glm::clamp(dist, 40.0f, 200.0f); } + void setShadowDistance(float dist) { shadowDistance_ = glm::clamp(dist, 40.0f, 500.0f); } float getShadowDistance() const { return shadowDistance_; } void setMsaaSamples(VkSampleCountFlagBits samples); diff --git a/include/rendering/wmo_renderer.hpp b/include/rendering/wmo_renderer.hpp index b8be9485..4546d41c 100644 --- a/include/rendering/wmo_renderer.hpp +++ b/include/rendering/wmo_renderer.hpp @@ -711,9 +711,7 @@ private: static constexpr float SPATIAL_CELL_SIZE = 64.0f; std::unordered_map, GridCellHash> spatialGrid; std::unordered_map instanceIndexById; - mutable std::vector candidateScratch; - mutable std::vector triScratch_; // Scratch for collision grid queries - mutable std::unordered_set candidateIdScratch; + // Collision scratch buffers are thread_local (see wmo_renderer.cpp) for thread-safety. // Parallel visibility culling uint32_t numCullThreads_ = 1; diff --git a/include/ui/game_screen.hpp b/include/ui/game_screen.hpp index bf7558cd..3bb99628 100644 --- a/include/ui/game_screen.hpp +++ b/include/ui/game_screen.hpp @@ -87,7 +87,7 @@ private: bool pendingVsync = false; int pendingResIndex = 0; bool pendingShadows = true; - float pendingShadowDistance = 72.0f; + float pendingShadowDistance = 300.0f; bool pendingWaterRefraction = false; int pendingMasterVolume = 100; int pendingMusicVolume = 30; diff --git a/src/rendering/camera_controller.cpp b/src/rendering/camera_controller.cpp index 4103cc9f..891d53ba 100644 --- a/src/rendering/camera_controller.cpp +++ b/src/rendering/camera_controller.cpp @@ -1,5 +1,6 @@ #include "rendering/camera_controller.hpp" #include +#include #include #include "rendering/terrain_manager.hpp" #include "rendering/wmo_renderer.hpp" @@ -808,25 +809,53 @@ void CameraController::update(float deltaTime) { if (useCached) { groundH = cachedFloorHeight_; } else { - // Full collision check + // Full collision check — run terrain/WMO/M2 queries in parallel std::optional terrainH; std::optional wmoH; std::optional m2H; - if (terrainManager) { - terrainH = terrainManager->getHeightAt(targetPos.x, targetPos.y); - } // When airborne, anchor probe to last ground level so the // ceiling doesn't rise with the jump and catch roof geometry. float wmoBaseZ = grounded ? std::max(targetPos.z, lastGroundZ) : lastGroundZ; float wmoProbeZ = wmoBaseZ + stepUpBudget + 0.5f; float wmoNormalZ = 1.0f; + + // Launch WMO + M2 floor queries asynchronously while terrain runs on this thread. + // Collision scratch buffers are thread_local so concurrent calls are safe. + using FloorResult = std::pair, float>; + std::future wmoFuture; + std::future m2Future; + bool wmoAsync = false, m2Async = false; + float px = targetPos.x, py = targetPos.y; if (wmoRenderer) { - wmoH = wmoRenderer->getFloorHeight(targetPos.x, targetPos.y, wmoProbeZ, &wmoNormalZ); + wmoAsync = true; + wmoFuture = std::async(std::launch::async, + [this, px, py, wmoProbeZ]() -> FloorResult { + float nz = 1.0f; + auto h = wmoRenderer->getFloorHeight(px, py, wmoProbeZ, &nz); + return {h, nz}; + }); } if (m2Renderer && !externalFollow_) { - float m2NormalZ = 1.0f; - m2H = m2Renderer->getFloorHeight(targetPos.x, targetPos.y, wmoProbeZ, &m2NormalZ); - if (m2H && m2NormalZ < MIN_WALKABLE_NORMAL_M2) { + m2Async = true; + m2Future = std::async(std::launch::async, + [this, px, py, wmoProbeZ]() -> FloorResult { + float nz = 1.0f; + auto h = m2Renderer->getFloorHeight(px, py, wmoProbeZ, &nz); + return {h, nz}; + }); + } + if (terrainManager) { + terrainH = terrainManager->getHeightAt(targetPos.x, targetPos.y); + } + if (wmoAsync) { + auto [h, nz] = wmoFuture.get(); + wmoH = h; + wmoNormalZ = nz; + } + if (m2Async) { + auto [h, nz] = m2Future.get(); + m2H = h; + if (m2H && nz < MIN_WALKABLE_NORMAL_M2) { m2H = std::nullopt; } } diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp index 3a097217..0ca3f940 100644 --- a/src/rendering/m2_renderer.cpp +++ b/src/rendering/m2_renderer.cpp @@ -282,6 +282,14 @@ glm::vec3 closestPointOnTriangle(const glm::vec3& p, } // namespace +// Thread-local scratch buffers for collision queries (allows concurrent getFloorHeight calls) +static thread_local std::vector tl_m2_candidateScratch; +static thread_local std::unordered_set tl_m2_candidateIdScratch; +static thread_local std::vector tl_m2_collisionTriScratch; + +// Forward declaration (defined after animation helpers) +static void computeBoneMatrices(const M2ModelGPU& model, M2Instance& instance); + void M2Instance::updateModelMatrix() { modelMatrix = glm::mat4(1.0f); modelMatrix = glm::translate(modelMatrix, position); @@ -1673,6 +1681,21 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position, instance.animDuration = static_cast(mdl.sequences[0].duration); instance.animTime = static_cast(rand() % std::max(1u, mdl.sequences[0].duration)); instance.variationTimer = 3000.0f + static_cast(rand() % 8000); + + // Seed bone matrices from an existing instance of the same model so the + // new instance renders immediately instead of being invisible until the + // next update() computes bones (prevents pop-in flash). + for (const auto& existing : instances) { + if (existing.modelId == modelId && !existing.boneMatrices.empty()) { + instance.boneMatrices = existing.boneMatrices; + instance.bonesDirty = true; + break; + } + } + // If no sibling exists yet, compute bones immediately + if (instance.boneMatrices.empty()) { + computeBoneMatrices(mdlRef, instance); + } } // Register in dedup map before pushing (uses original position, not ground-adjusted) @@ -1764,6 +1787,18 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4& instance.animDuration = static_cast(mdl2.sequences[0].duration); instance.animTime = static_cast(rand() % std::max(1u, mdl2.sequences[0].duration)); instance.variationTimer = 3000.0f + static_cast(rand() % 8000); + + // Seed bone matrices from an existing sibling so the instance renders immediately + for (const auto& existing : instances) { + if (existing.modelId == modelId && !existing.boneMatrices.empty()) { + instance.boneMatrices = existing.boneMatrices; + instance.bonesDirty = true; + break; + } + } + if (instance.boneMatrices.empty()) { + computeBoneMatrices(mdl2, instance); + } } else { instance.animTime = static_cast(rand()) / RAND_MAX * 10000.0f; } @@ -2380,12 +2415,15 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const } // Upload bone matrices to SSBO if model has skeletal animation. - // Bone buffers are pre-allocated by prepareRender() on the main thread. - // If not yet allocated (race/timing), skip this instance entirely to avoid - // a bind-pose flash — it will render correctly next frame. - bool needsBones = model.hasAnimation && !model.disableAnimation && !instance.boneMatrices.empty(); + // Skip animated instances entirely until bones are computed + buffers allocated + // to prevent bind-pose/T-pose flash on first appearance. + bool modelNeedsAnimation = model.hasAnimation && !model.disableAnimation; + if (modelNeedsAnimation && instance.boneMatrices.empty()) { + continue; // Bones not yet computed — skip to avoid bind-pose flash + } + bool needsBones = modelNeedsAnimation && !instance.boneMatrices.empty(); if (needsBones && (!instance.boneBuffer[frameIndex] || !instance.boneSet[frameIndex])) { - continue; + continue; // Bone buffers not yet allocated — skip to avoid bind-pose flash } bool useBones = needsBones; if (useBones) { @@ -3620,7 +3658,7 @@ void M2Renderer::rebuildSpatialIndex() { void M2Renderer::gatherCandidates(const glm::vec3& queryMin, const glm::vec3& queryMax, std::vector& outIndices) const { outIndices.clear(); - candidateIdScratch.clear(); + tl_m2_candidateIdScratch.clear(); GridCell minCell = toCell(queryMin); GridCell maxCell = toCell(queryMax); @@ -3630,7 +3668,7 @@ void M2Renderer::gatherCandidates(const glm::vec3& queryMin, const glm::vec3& qu auto it = spatialGrid.find(GridCell{x, y, z}); if (it == spatialGrid.end()) continue; for (uint32_t id : it->second) { - if (!candidateIdScratch.insert(id).second) continue; + if (!tl_m2_candidateIdScratch.insert(id).second) continue; auto idxIt = instanceIndexById.find(id); if (idxIt != instanceIndexById.end()) { outIndices.push_back(idxIt->second); @@ -3803,9 +3841,9 @@ std::optional M2Renderer::getFloorHeight(float glX, float glY, float glZ, glm::vec3 queryMin(glX - 2.0f, glY - 2.0f, glZ - 6.0f); glm::vec3 queryMax(glX + 2.0f, glY + 2.0f, glZ + 8.0f); - gatherCandidates(queryMin, queryMax, candidateScratch); + gatherCandidates(queryMin, queryMax, tl_m2_candidateScratch); - for (size_t idx : candidateScratch) { + for (size_t idx : tl_m2_candidateScratch) { const auto& instance = instances[idx]; if (collisionFocusEnabled && pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) { @@ -3827,14 +3865,14 @@ std::optional M2Renderer::getFloorHeight(float glX, float glY, float glZ, model.collision.getFloorTrisInRange( localPos.x - 1.0f, localPos.y - 1.0f, localPos.x + 1.0f, localPos.y + 1.0f, - collisionTriScratch_); + tl_m2_collisionTriScratch); glm::vec3 rayOrigin(localPos.x, localPos.y, localPos.z + 5.0f); glm::vec3 rayDir(0.0f, 0.0f, -1.0f); float bestHitZ = -std::numeric_limits::max(); bool hitAny = false; - for (uint32_t ti : collisionTriScratch_) { + for (uint32_t ti : tl_m2_collisionTriScratch) { if (ti >= model.collision.triCount) continue; if (model.collision.triBounds[ti].maxZ < localPos.z - 10.0f || model.collision.triBounds[ti].minZ > localPos.z + 5.0f) continue; @@ -3949,10 +3987,10 @@ bool M2Renderer::checkCollision(const glm::vec3& from, const glm::vec3& to, glm::vec3 queryMin = glm::min(from, to) - glm::vec3(7.0f, 7.0f, 5.0f); glm::vec3 queryMax = glm::max(from, to) + glm::vec3(7.0f, 7.0f, 5.0f); - gatherCandidates(queryMin, queryMax, candidateScratch); + gatherCandidates(queryMin, queryMax, tl_m2_candidateScratch); // Check against all M2 instances in local space (rotation-aware). - for (size_t idx : candidateScratch) { + for (size_t idx : tl_m2_candidateScratch) { const auto& instance = instances[idx]; if (collisionFocusEnabled && pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) { @@ -3985,14 +4023,14 @@ bool M2Renderer::checkCollision(const glm::vec3& from, const glm::vec3& to, std::min(localFrom.y, localPos.y) - localRadius - 1.0f, std::max(localFrom.x, localPos.x) + localRadius + 1.0f, std::max(localFrom.y, localPos.y) + localRadius + 1.0f, - collisionTriScratch_); + tl_m2_collisionTriScratch); constexpr float PLAYER_HEIGHT = 2.0f; constexpr float MAX_TOTAL_PUSH = 0.02f; // Cap total push per instance bool pushed = false; float totalPushX = 0.0f, totalPushY = 0.0f; - for (uint32_t ti : collisionTriScratch_) { + for (uint32_t ti : tl_m2_collisionTriScratch) { if (ti >= model.collision.triCount) continue; if (localPos.z + PLAYER_HEIGHT < model.collision.triBounds[ti].minZ || localPos.z > model.collision.triBounds[ti].maxZ) continue; @@ -4190,9 +4228,9 @@ float M2Renderer::raycastBoundingBoxes(const glm::vec3& origin, const glm::vec3& glm::vec3 rayEnd = origin + direction * maxDistance; glm::vec3 queryMin = glm::min(origin, rayEnd) - glm::vec3(1.0f); glm::vec3 queryMax = glm::max(origin, rayEnd) + glm::vec3(1.0f); - gatherCandidates(queryMin, queryMax, candidateScratch); + gatherCandidates(queryMin, queryMax, tl_m2_candidateScratch); - for (size_t idx : candidateScratch) { + for (size_t idx : tl_m2_candidateScratch) { const auto& instance = instances[idx]; if (collisionFocusEnabled && pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) { diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 9f3d65e7..4e2b66f5 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -70,6 +70,7 @@ #include #include #include +#include namespace wowee { namespace rendering { @@ -2678,16 +2679,23 @@ void Renderer::update(float deltaTime) { } - // Update character animations + // Launch M2 doodad animation on background thread (overlaps with character animation + audio) + std::future m2AnimFuture; + bool m2AnimLaunched = false; + if (m2Renderer && camera) { + float m2DeltaTime = deltaTime; + glm::vec3 m2CamPos = camera->getPosition(); + glm::mat4 m2ViewProj = camera->getProjectionMatrix() * camera->getViewMatrix(); + m2AnimFuture = std::async(std::launch::async, + [this, m2DeltaTime, m2CamPos, m2ViewProj]() { + m2Renderer->update(m2DeltaTime, m2CamPos, m2ViewProj); + }); + m2AnimLaunched = true; + } + + // Update character animations (runs in parallel with M2 animation above) if (characterRenderer && camera) { - auto charAnimStart = std::chrono::steady_clock::now(); characterRenderer->update(deltaTime, camera->getPosition()); - float charAnimMs = std::chrono::duration( - std::chrono::steady_clock::now() - charAnimStart).count(); - if (charAnimMs > 5.0f) { - LOG_WARNING("SLOW characterRenderer->update: ", charAnimMs, "ms (", - characterRenderer->getInstanceCount(), " instances)"); - } } // Update AudioEngine (cleanup finished sounds, etc.) @@ -2872,17 +2880,9 @@ void Renderer::update(float deltaTime) { ambientSoundManager->update(deltaTime, camPos, isIndoor, isSwimming, isBlacksmith); } - // Update M2 doodad animations (pass camera for frustum-culling bone computation) - if (m2Renderer && camera) { - auto m2Start = std::chrono::steady_clock::now(); - m2Renderer->update(deltaTime, camera->getPosition(), - camera->getProjectionMatrix() * camera->getViewMatrix()); - float m2Ms = std::chrono::duration( - std::chrono::steady_clock::now() - m2Start).count(); - if (m2Ms > 3.0f) { - LOG_WARNING("SLOW m2Renderer->update: ", m2Ms, "ms (", - m2Renderer->getInstanceCount(), " instances)"); - } + // Wait for M2 doodad animation to finish (was launched earlier in parallel with character anim) + if (m2AnimLaunched) { + m2AnimFuture.get(); } // Helper: play zone music, dispatching local files (file: prefix) vs MPQ paths @@ -4338,27 +4338,32 @@ glm::mat4 Renderer::computeLightSpaceMatrix() { shadowCenter = desiredCenter; glm::vec3 center = shadowCenter; - // Snap to shadow texel grid to keep projection stable while moving. + // Snap shadow frustum to texel grid so the projection is perfectly stable + // while moving. We compute the light's right/up axes from the sun direction + // (these are constant per frame regardless of center) and snap center along + // them before building the view matrix. float halfExtent = kShadowHalfExtent; float texelWorld = (2.0f * halfExtent) / static_cast(SHADOW_MAP_SIZE); - // Build light view to get stable axes + // Stable light-space axes (independent of center position) glm::vec3 up(0.0f, 0.0f, 1.0f); - // If sunDir is nearly parallel to up, pick a different up vector if (std::abs(glm::dot(sunDir, up)) > 0.99f) { up = glm::vec3(0.0f, 1.0f, 0.0f); } - glm::mat4 lightView = glm::lookAt(center - sunDir * kShadowLightDistance, center, up); + glm::vec3 lightRight = glm::normalize(glm::cross(sunDir, up)); + glm::vec3 lightUp = glm::normalize(glm::cross(lightRight, sunDir)); - // Stable texel snapping in light space removes movement shimmer. - glm::vec4 centerLS = lightView * glm::vec4(center, 1.0f); - centerLS.x = std::round(centerLS.x / texelWorld) * texelWorld; - centerLS.y = std::round(centerLS.y / texelWorld) * texelWorld; - glm::vec4 snappedCenter = glm::inverse(lightView) * centerLS; - center = glm::vec3(snappedCenter); + // Snap center along light's right and up axes to align with texel grid. + // This eliminates sub-texel shifts that cause shadow shimmer. + float dotR = glm::dot(center, lightRight); + float dotU = glm::dot(center, lightUp); + dotR = std::floor(dotR / texelWorld) * texelWorld; + dotU = std::floor(dotU / texelWorld) * texelWorld; + float dotD = glm::dot(center, sunDir); // depth axis unchanged + center = lightRight * dotR + lightUp * dotU + sunDir * dotD; shadowCenter = center; - lightView = glm::lookAt(center - sunDir * kShadowLightDistance, center, up); + glm::mat4 lightView = glm::lookAt(center - sunDir * kShadowLightDistance, center, up); glm::mat4 lightProj = glm::ortho(-halfExtent, halfExtent, -halfExtent, halfExtent, kShadowNearPlane, kShadowFarPlane); lightProj[1][1] *= -1.0f; // Vulkan Y-flip for shadow pass diff --git a/src/rendering/wmo_renderer.cpp b/src/rendering/wmo_renderer.cpp index 2e5afcc3..51d8c2a2 100644 --- a/src/rendering/wmo_renderer.cpp +++ b/src/rendering/wmo_renderer.cpp @@ -48,6 +48,11 @@ size_t envSizeOrDefault(const char* name, size_t defValue) { } } // namespace +// Thread-local scratch buffers for collision queries (allows concurrent getFloorHeight/checkWallCollision calls) +static thread_local std::vector tl_candidateScratch; +static thread_local std::vector tl_triScratch; +static thread_local std::unordered_set tl_candidateIdScratch; + static void transformAABB(const glm::mat4& modelMatrix, const glm::vec3& localMin, const glm::vec3& localMax, @@ -1288,7 +1293,7 @@ void WMORenderer::rebuildSpatialIndex() { void WMORenderer::gatherCandidates(const glm::vec3& queryMin, const glm::vec3& queryMax, std::vector& outIndices) const { outIndices.clear(); - candidateIdScratch.clear(); + tl_candidateIdScratch.clear(); GridCell minCell = toCell(queryMin); GridCell maxCell = toCell(queryMax); @@ -1298,7 +1303,7 @@ void WMORenderer::gatherCandidates(const glm::vec3& queryMin, const glm::vec3& q auto it = spatialGrid.find(GridCell{x, y, z}); if (it == spatialGrid.end()) continue; for (uint32_t id : it->second) { - if (!candidateIdScratch.insert(id).second) continue; + if (!tl_candidateIdScratch.insert(id).second) continue; auto idxIt = instanceIndexById.find(id); if (idxIt != instanceIndexById.end()) { outIndices.push_back(idxIt->second); @@ -2830,9 +2835,9 @@ std::optional WMORenderer::getFloorHeight(float glX, float glY, float glZ group.getTrianglesInRange( localOrigin.x - 1.0f, localOrigin.y - 1.0f, localOrigin.x + 1.0f, localOrigin.y + 1.0f, - triScratch_); + tl_triScratch); - for (uint32_t triStart : triScratch_) { + for (uint32_t triStart : tl_triScratch) { const glm::vec3& v0 = verts[indices[triStart]]; const glm::vec3& v1 = verts[indices[triStart + 1]]; const glm::vec3& v2 = verts[indices[triStart + 2]]; @@ -2906,9 +2911,9 @@ std::optional WMORenderer::getFloorHeight(float glX, float glY, float glZ // early-returned because overlapping WMO instances need full coverage). glm::vec3 queryMin(glX - 2.0f, glY - 2.0f, glZ - 8.0f); glm::vec3 queryMax(glX + 2.0f, glY + 2.0f, glZ + 10.0f); - gatherCandidates(queryMin, queryMax, candidateScratch); + gatherCandidates(queryMin, queryMax, tl_candidateScratch); - for (size_t idx : candidateScratch) { + for (size_t idx : tl_candidateScratch) { const auto& instance = instances[idx]; if (collisionFocusEnabled && pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) { @@ -3081,9 +3086,9 @@ bool WMORenderer::checkWallCollision(const glm::vec3& from, const glm::vec3& to, glm::vec3 queryMin = glm::min(from, to) - glm::vec3(8.0f, 8.0f, 5.0f); glm::vec3 queryMax = glm::max(from, to) + glm::vec3(8.0f, 8.0f, 5.0f); - gatherCandidates(queryMin, queryMax, candidateScratch); + gatherCandidates(queryMin, queryMax, tl_candidateScratch); - for (size_t idx : candidateScratch) { + for (size_t idx : tl_candidateScratch) { const auto& instance = instances[idx]; if (collisionFocusEnabled && pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) { @@ -3149,9 +3154,9 @@ bool WMORenderer::checkWallCollision(const glm::vec3& from, const glm::vec3& to, float rangeMinY = std::min(localFrom.y, localTo.y) - PLAYER_RADIUS - 1.5f; float rangeMaxX = std::max(localFrom.x, localTo.x) + PLAYER_RADIUS + 1.5f; float rangeMaxY = std::max(localFrom.y, localTo.y) + PLAYER_RADIUS + 1.5f; - group.getTrianglesInRange(rangeMinX, rangeMinY, rangeMaxX, rangeMaxY, triScratch_); + group.getTrianglesInRange(rangeMinX, rangeMinY, rangeMaxX, rangeMaxY, tl_triScratch); - for (uint32_t triStart : triScratch_) { + for (uint32_t triStart : tl_triScratch) { // Use pre-computed Z bounds for fast vertical reject const auto& tb = group.triBounds[triStart / 3]; @@ -3319,9 +3324,9 @@ void WMORenderer::updateActiveGroup(float glX, float glY, float glZ) { glm::vec3 queryMin(glX - 0.5f, glY - 0.5f, glZ - 0.5f); glm::vec3 queryMax(glX + 0.5f, glY + 0.5f, glZ + 0.5f); - gatherCandidates(queryMin, queryMax, candidateScratch); + gatherCandidates(queryMin, queryMax, tl_candidateScratch); - for (size_t idx : candidateScratch) { + for (size_t idx : tl_candidateScratch) { const auto& instance = instances[idx]; if (glX < instance.worldBoundsMin.x || glX > instance.worldBoundsMax.x || glY < instance.worldBoundsMin.y || glY > instance.worldBoundsMax.y || @@ -3365,9 +3370,9 @@ bool WMORenderer::isInsideWMO(float glX, float glY, float glZ, uint32_t* outMode QueryTimer timer(&queryTimeMs, &queryCallCount); glm::vec3 queryMin(glX - 0.5f, glY - 0.5f, glZ - 0.5f); glm::vec3 queryMax(glX + 0.5f, glY + 0.5f, glZ + 0.5f); - gatherCandidates(queryMin, queryMax, candidateScratch); + gatherCandidates(queryMin, queryMax, tl_candidateScratch); - for (size_t idx : candidateScratch) { + for (size_t idx : tl_candidateScratch) { const auto& instance = instances[idx]; if (collisionFocusEnabled && pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) { @@ -3414,9 +3419,9 @@ bool WMORenderer::isInsideWMO(float glX, float glY, float glZ, uint32_t* outMode bool WMORenderer::isInsideInteriorWMO(float glX, float glY, float glZ) const { glm::vec3 queryMin(glX - 0.5f, glY - 0.5f, glZ - 0.5f); glm::vec3 queryMax(glX + 0.5f, glY + 0.5f, glZ + 0.5f); - gatherCandidates(queryMin, queryMax, candidateScratch); + gatherCandidates(queryMin, queryMax, tl_candidateScratch); - for (size_t idx : candidateScratch) { + for (size_t idx : tl_candidateScratch) { const auto& instance = instances[idx]; if (collisionFocusEnabled && pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) { @@ -3470,9 +3475,9 @@ float WMORenderer::raycastBoundingBoxes(const glm::vec3& origin, const glm::vec3 glm::vec3 rayEnd = origin + direction * maxDistance; glm::vec3 queryMin = glm::min(origin, rayEnd) - glm::vec3(1.0f); glm::vec3 queryMax = glm::max(origin, rayEnd) + glm::vec3(1.0f); - gatherCandidates(queryMin, queryMax, candidateScratch); + gatherCandidates(queryMin, queryMax, tl_candidateScratch); - for (size_t idx : candidateScratch) { + for (size_t idx : tl_candidateScratch) { const auto& instance = instances[idx]; if (collisionFocusEnabled && pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) { @@ -3526,9 +3531,9 @@ float WMORenderer::raycastBoundingBoxes(const glm::vec3& origin, const glm::vec3 float rMinY = std::min(localOrigin.y, localEnd.y) - 1.0f; float rMaxX = std::max(localOrigin.x, localEnd.x) + 1.0f; float rMaxY = std::max(localOrigin.y, localEnd.y) + 1.0f; - group.getWallTrianglesInRange(rMinX, rMinY, rMaxX, rMaxY, triScratch_); + group.getWallTrianglesInRange(rMinX, rMinY, rMaxX, rMaxY, tl_triScratch); - for (uint32_t triStart : triScratch_) { + for (uint32_t triStart : tl_triScratch) { const glm::vec3& v0 = verts[indices[triStart]]; const glm::vec3& v1 = verts[indices[triStart + 1]]; const glm::vec3& v2 = verts[indices[triStart + 2]]; diff --git a/src/ui/game_screen.cpp b/src/ui/game_screen.cpp index 19db13e9..8b79cd4c 100644 --- a/src/ui/game_screen.cpp +++ b/src/ui/game_screen.cpp @@ -6270,7 +6270,7 @@ void GameScreen::renderSettingsWindow() { if (pendingShadows) { ImGui::SameLine(); ImGui::SetNextItemWidth(150.0f); - if (ImGui::SliderFloat("Distance##shadow", &pendingShadowDistance, 40.0f, 200.0f, "%.0f")) { + if (ImGui::SliderFloat("Distance##shadow", &pendingShadowDistance, 40.0f, 500.0f, "%.0f")) { if (renderer) renderer->setShadowDistance(pendingShadowDistance); saveSettings(); } @@ -6387,7 +6387,7 @@ void GameScreen::renderSettingsWindow() { pendingFullscreen = kDefaultFullscreen; pendingVsync = kDefaultVsync; pendingShadows = kDefaultShadows; - pendingShadowDistance = 72.0f; + pendingShadowDistance = 300.0f; pendingGroundClutterDensity = kDefaultGroundClutterDensity; pendingAntiAliasing = 0; pendingNormalMapping = true; @@ -7505,7 +7505,7 @@ void GameScreen::loadSettings() { else if (key == "auto_loot") pendingAutoLoot = (std::stoi(val) != 0); else if (key == "ground_clutter_density") pendingGroundClutterDensity = std::clamp(std::stoi(val), 0, 150); else if (key == "shadows") pendingShadows = (std::stoi(val) != 0); - else if (key == "shadow_distance") pendingShadowDistance = std::clamp(std::stof(val), 40.0f, 200.0f); + else if (key == "shadow_distance") pendingShadowDistance = std::clamp(std::stof(val), 40.0f, 500.0f); else if (key == "water_refraction") pendingWaterRefraction = (std::stoi(val) != 0); else if (key == "antialiasing") pendingAntiAliasing = std::clamp(std::stoi(val), 0, 3); else if (key == "normal_mapping") pendingNormalMapping = (std::stoi(val) != 0); From c13dbf2198acd3770a0d7590e0ea08d4ddf9da7b Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 22:35:18 -0800 Subject: [PATCH 06/13] Proactive tile streaming, faster finalization, tree trunk collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Re-check for unloaded tiles when workers are idle (no tile boundary needed) - Increase M2 upload budget 4→16 and WMO 1→4 per frame when not under pressure - Lower tree collision threshold from 40 to 6 units so large trees block movement --- src/rendering/m2_renderer.cpp | 7 +++---- src/rendering/terrain_manager.cpp | 33 +++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp index 0ca3f940..cbe26302 100644 --- a/src/rendering/m2_renderer.cpp +++ b/src/rendering/m2_renderer.cpp @@ -1036,10 +1036,9 @@ bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) { (lowerName.find("trunk") != std::string::npos) || (lowerName.find("stump") != std::string::npos) || (lowerName.find("log") != std::string::npos); - // Only large trees (canopy > 20 model units wide) get trunk collision. - // Small/mid trees are walkthrough to avoid getting stuck between them. - // Only large trees get trunk collision; all smaller trees are walkthrough. - bool treeWithTrunk = treeLike && !hardTreePart && !foliageName && horiz > 40.0f; + // Trees with visible trunks get collision. Threshold: canopy wider than 6 + // model units AND taller than 4 units (filters out small bushes/saplings). + bool treeWithTrunk = treeLike && !hardTreePart && !foliageName && horiz > 6.0f && vert > 4.0f; bool softTree = treeLike && !hardTreePart && !treeWithTrunk; bool forceSolidCurb = gpuModel.collisionSteppedLowPlatform || knownStormwindPlanter || likelyCurbName || gpuModel.collisionPlanter; bool narrowVerticalName = diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp index f15541ea..89a77d29 100644 --- a/src/rendering/terrain_manager.cpp +++ b/src/rendering/terrain_manager.cpp @@ -199,13 +199,25 @@ void TerrainManager::update(const Camera& camera, float deltaTime) { currentTile = newTile; } - // Stream tiles if we've moved significantly or initial load + // Stream tiles when player crosses a tile boundary if (newTile.x != lastStreamTile.x || newTile.y != lastStreamTile.y) { LOG_DEBUG("Streaming: cam=(", camPos.x, ",", camPos.y, ",", camPos.z, ") tile=[", newTile.x, ",", newTile.y, "] loaded=", loadedTiles.size()); streamTiles(); lastStreamTile = newTile; + } else { + // Proactive loading: when workers are idle, re-check for unloaded tiles + // within range. This catches tiles that weren't queued on the initial + // streamTiles pass (e.g. cache eviction, late-arriving ADT availability). + bool workersIdle; + { + std::lock_guard lock(queueMutex); + workersIdle = loadQueue.empty(); + } + if (workersIdle) { + streamTiles(); + } } } @@ -830,11 +842,19 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { } case FinalizationPhase::M2_MODELS: { - // Upload multiple M2 models per call (batched GPU uploads) + // Upload multiple M2 models per call (batched GPU uploads). + // When no more tiles are queued for background parsing, increase the + // per-frame budget so idle workers don't waste time waiting for the + // main thread to trickle-upload models. if (m2Renderer && ft.m2ModelIndex < pending->m2Models.size()) { // Set pre-decoded BLP cache so loadTexture() skips main-thread BLP decode m2Renderer->setPredecodedBLPCache(&pending->preloadedM2Textures); - constexpr size_t kModelsPerStep = 4; + bool workersIdle; + { + std::lock_guard lk(queueMutex); + workersIdle = loadQueue.empty() && readyQueue.empty(); + } + const size_t kModelsPerStep = workersIdle ? 16 : 4; size_t uploaded = 0; while (ft.m2ModelIndex < pending->m2Models.size() && uploaded < kModelsPerStep) { auto& m2Ready = pending->m2Models[ft.m2ModelIndex]; @@ -896,7 +916,12 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { wmoRenderer->setPredecodedBLPCache(&pending->preloadedWMOTextures); wmoRenderer->setDeferNormalMaps(true); - constexpr size_t kWmosPerStep = 1; + bool wmoWorkersIdle; + { + std::lock_guard lk(queueMutex); + wmoWorkersIdle = loadQueue.empty() && readyQueue.empty(); + } + const size_t kWmosPerStep = wmoWorkersIdle ? 4 : 1; size_t uploaded = 0; while (ft.wmoModelIndex < pending->wmoModels.size() && uploaded < kWmosPerStep) { auto& wmoReady = pending->wmoModels[ft.wmoModelIndex]; From 6cf08fbaa6678eba411c067a0e69b2976111f6c3 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 22:40:07 -0800 Subject: [PATCH 07/13] Throttle proactive tile streaming to reduce post-load hitching Add 2-second cooldown timer before re-checking for unloaded tiles when workers are idle, preventing excessive streamTiles() calls that caused frame hitches right after world load. --- include/rendering/terrain_manager.hpp | 1 + src/rendering/terrain_manager.cpp | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/include/rendering/terrain_manager.hpp b/include/rendering/terrain_manager.hpp index 6f732721..2a746d3e 100644 --- a/include/rendering/terrain_manager.hpp +++ b/include/rendering/terrain_manager.hpp @@ -348,6 +348,7 @@ private: int unloadRadius = 7; // Unload tiles beyond this radius float updateInterval = 0.033f; // Check streaming every 33ms (~30 fps) float timeSinceLastUpdate = 0.0f; + float proactiveStreamTimer_ = 0.0f; bool taxiStreamingMode_ = false; // Tile size constants (WoW ADT specifications) diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp index 89a77d29..6578c71a 100644 --- a/src/rendering/terrain_manager.cpp +++ b/src/rendering/terrain_manager.cpp @@ -207,16 +207,20 @@ void TerrainManager::update(const Camera& camera, float deltaTime) { streamTiles(); lastStreamTile = newTile; } else { - // Proactive loading: when workers are idle, re-check for unloaded tiles - // within range. This catches tiles that weren't queued on the initial - // streamTiles pass (e.g. cache eviction, late-arriving ADT availability). - bool workersIdle; - { - std::lock_guard lock(queueMutex); - workersIdle = loadQueue.empty(); - } - if (workersIdle) { - streamTiles(); + // Proactive loading: when workers are idle, periodically re-check for + // unloaded tiles within range. Throttled to avoid hitching right after + // world load when many tiles finalize simultaneously. + proactiveStreamTimer_ += deltaTime; + if (proactiveStreamTimer_ >= 2.0f) { + proactiveStreamTimer_ = 0.0f; + bool workersIdle; + { + std::lock_guard lock(queueMutex); + workersIdle = loadQueue.empty(); + } + if (workersIdle) { + streamTiles(); + } } } } From ac3c90dd75c748d6482b6031bfcbd65d1e4e58c1 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 22:47:07 -0800 Subject: [PATCH 08/13] Fix M2 animated instance flashing (deer/bird/critter pop-in) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: bonesDirty was a single bool shared across both double-buffered frame indices. When bones were copied to frame 0's SSBO and bonesDirty cleared, frame 1's newly-allocated SSBO would contain garbage/zeros and never get populated — causing animated M2 instances to flash invisible on alternating frames. Fix: Make bonesDirty per-frame-index (bool[2]) so each buffer independently tracks whether it needs bone data uploaded. When bones are recomputed, both indices are marked dirty. When uploaded during render, only the current frame index is cleared. New buffer allocations in prepareRender force their frame index dirty. --- include/rendering/m2_renderer.hpp | 2 +- src/rendering/m2_renderer.cpp | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/rendering/m2_renderer.hpp b/include/rendering/m2_renderer.hpp index 75a92565..ee7d6ebf 100644 --- a/include/rendering/m2_renderer.hpp +++ b/include/rendering/m2_renderer.hpp @@ -194,7 +194,7 @@ struct M2Instance { // Frame-skip optimization (update distant animations less frequently) uint8_t frameSkipCounter = 0; - bool bonesDirty = false; // Set when bones recomputed, cleared after upload + bool bonesDirty[2] = {false, false}; // Per-frame-index: set when bones recomputed, cleared after upload // Per-instance bone SSBO (double-buffered) ::VkBuffer boneBuffer[2] = {}; diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp index cbe26302..eed9a025 100644 --- a/src/rendering/m2_renderer.cpp +++ b/src/rendering/m2_renderer.cpp @@ -1687,7 +1687,7 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position, for (const auto& existing : instances) { if (existing.modelId == modelId && !existing.boneMatrices.empty()) { instance.boneMatrices = existing.boneMatrices; - instance.bonesDirty = true; + instance.bonesDirty[0] = instance.bonesDirty[1] = true; break; } } @@ -1791,7 +1791,7 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4& for (const auto& existing : instances) { if (existing.modelId == modelId && !existing.boneMatrices.empty()) { instance.boneMatrices = existing.boneMatrices; - instance.bonesDirty = true; + instance.bonesDirty[0] = instance.bonesDirty[1] = true; break; } } @@ -1951,7 +1951,7 @@ static void computeBoneMatrices(const M2ModelGPU& model, M2Instance& instance) { instance.boneMatrices[i] = local; } } - instance.bonesDirty = true; + instance.bonesDirty[0] = instance.bonesDirty[1] = true; } void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::mat4& viewProjection) { @@ -2237,6 +2237,11 @@ void M2Renderer::prepareRender(uint32_t frameIndex, const Camera& camera) { &instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo); instance.boneMapped[frameIndex] = allocInfo.pMappedData; + // Force dirty so current boneMatrices get copied into this + // newly-allocated buffer during render (prevents garbage/zero + // bones when the other frame index already cleared bonesDirty). + instance.bonesDirty[frameIndex] = true; + instance.boneSet[frameIndex] = allocateBoneSet(); if (instance.boneSet[frameIndex]) { VkDescriptorBufferInfo bufInfo{}; @@ -2426,12 +2431,13 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const } bool useBones = needsBones; if (useBones) { - // Upload bone matrices only when recomputed (skip frame-skipped instances) - if (instance.bonesDirty && instance.boneMapped[frameIndex]) { + // Upload bone matrices only when recomputed (per-frame-index tracking + // ensures both double-buffered SSBOs get the latest bone data) + if (instance.bonesDirty[frameIndex] && instance.boneMapped[frameIndex]) { int numBones = std::min(static_cast(instance.boneMatrices.size()), 128); memcpy(instance.boneMapped[frameIndex], instance.boneMatrices.data(), numBones * sizeof(glm::mat4)); - instance.bonesDirty = false; + instance.bonesDirty[frameIndex] = false; } // Bind bone descriptor set (set 2) From 7f573fc06ba9667f65723a73b316015dc956d0f7 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 22:51:59 -0800 Subject: [PATCH 09/13] Reduce tile finalization aggressiveness to prevent spawn hitching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Reduce max finalization steps per frame: 2→1 (normal), 8→4 (taxi) - Reduce terrain chunk upload batch: 32→16 chunks per step - Reduce idle M2 model upload budget: 16→6 per step - Reduce idle WMO model upload budget: 4→2 per step Tiles still stream in quickly but spread GPU upload work across more frames, eliminating the frame spikes right after spawning. --- src/rendering/terrain_manager.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp index 6578c71a..e186ed96 100644 --- a/src/rendering/terrain_manager.cpp +++ b/src/rendering/terrain_manager.cpp @@ -816,7 +816,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { } bool allDone = terrainRenderer->loadTerrainIncremental( pending->mesh, pending->terrain.textures, x, y, - ft.terrainChunkNext, 32); + ft.terrainChunkNext, 16); if (!allDone) { return false; // More chunks remain — yield to time budget } @@ -858,7 +858,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { std::lock_guard lk(queueMutex); workersIdle = loadQueue.empty() && readyQueue.empty(); } - const size_t kModelsPerStep = workersIdle ? 16 : 4; + const size_t kModelsPerStep = workersIdle ? 6 : 4; size_t uploaded = 0; while (ft.m2ModelIndex < pending->m2Models.size() && uploaded < kModelsPerStep) { auto& m2Ready = pending->m2Models[ft.m2ModelIndex]; @@ -925,7 +925,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { std::lock_guard lk(queueMutex); wmoWorkersIdle = loadQueue.empty() && readyQueue.empty(); } - const size_t kWmosPerStep = wmoWorkersIdle ? 4 : 1; + const size_t kWmosPerStep = wmoWorkersIdle ? 2 : 1; size_t uploaded = 0; while (ft.wmoModelIndex < pending->wmoModels.size() && uploaded < kWmosPerStep) { auto& wmoReady = pending->wmoModels[ft.wmoModelIndex]; @@ -1207,7 +1207,7 @@ void TerrainManager::processReadyTiles() { // Async upload batch: record GPU copies into a command buffer, submit with // a fence, but DON'T wait. The fence is polled on subsequent frames. // This eliminates the main-thread stall from vkWaitForFences entirely. - const int maxSteps = taxiStreamingMode_ ? 8 : 2; + const int maxSteps = taxiStreamingMode_ ? 4 : 1; int steps = 0; if (vkCtx) vkCtx->beginUploadBatch(); From f681a8b3611811cde3e9e111edbc311a6e97fde0 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 22:55:02 -0800 Subject: [PATCH 10/13] Further reduce tile streaming aggressiveness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Load radius: 4→3 (normal), 6→5 (taxi) - Terrain chunks per step: 16→8 - M2 models per step: 6→2 (removed idle boost) - WMO models per step: 2→1 (removed idle boost) - WMO doodads per step: 4→2 - All budgets now constant (no idle-vs-busy branching) --- include/rendering/terrain_manager.hpp | 2 +- src/core/application.cpp | 4 ++-- src/rendering/terrain_manager.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/rendering/terrain_manager.hpp b/include/rendering/terrain_manager.hpp index 2a746d3e..58138ea4 100644 --- a/include/rendering/terrain_manager.hpp +++ b/include/rendering/terrain_manager.hpp @@ -344,7 +344,7 @@ private: // Streaming parameters bool streamingEnabled = true; - int loadRadius = 4; // Load tiles within this radius (9x9 grid = 81 tiles) + int loadRadius = 3; // Load tiles within this radius (7x7 circular ~29 tiles) int unloadRadius = 7; // Unload tiles beyond this radius float updateInterval = 0.033f; // Check streaming every 33ms (~30 fps) float timeSinceLastUpdate = 0.0f; diff --git a/src/core/application.cpp b/src/core/application.cpp index f9ac557c..65220f79 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -1108,7 +1108,7 @@ void Application::update(float deltaTime) { // Taxi flights move fast (32 u/s) — load further ahead so terrain is ready // before the camera arrives. Keep updates frequent to spot new tiles early. renderer->getTerrainManager()->setUpdateInterval(onTaxi ? 0.033f : 0.033f); - renderer->getTerrainManager()->setLoadRadius(onTaxi ? 6 : 4); + renderer->getTerrainManager()->setLoadRadius(onTaxi ? 5 : 3); renderer->getTerrainManager()->setUnloadRadius(onTaxi ? 9 : 7); renderer->getTerrainManager()->setTaxiStreamingMode(onTaxi); } @@ -4041,7 +4041,7 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float // Use a small radius for the initial load (just immediate tiles), // then restore the full radius after entering the game. // This matches WoW's behavior: load quickly, stream the rest in-game. - const int savedLoadRadius = 4; + const int savedLoadRadius = 3; terrainMgr->setLoadRadius(1); terrainMgr->setUnloadRadius(7); diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp index e186ed96..9f0dc95b 100644 --- a/src/rendering/terrain_manager.cpp +++ b/src/rendering/terrain_manager.cpp @@ -816,7 +816,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { } bool allDone = terrainRenderer->loadTerrainIncremental( pending->mesh, pending->terrain.textures, x, y, - ft.terrainChunkNext, 16); + ft.terrainChunkNext, 8); if (!allDone) { return false; // More chunks remain — yield to time budget } @@ -858,7 +858,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { std::lock_guard lk(queueMutex); workersIdle = loadQueue.empty() && readyQueue.empty(); } - const size_t kModelsPerStep = workersIdle ? 6 : 4; + const size_t kModelsPerStep = 2; size_t uploaded = 0; while (ft.m2ModelIndex < pending->m2Models.size() && uploaded < kModelsPerStep) { auto& m2Ready = pending->m2Models[ft.m2ModelIndex]; @@ -925,7 +925,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { std::lock_guard lk(queueMutex); wmoWorkersIdle = loadQueue.empty() && readyQueue.empty(); } - const size_t kWmosPerStep = wmoWorkersIdle ? 2 : 1; + const size_t kWmosPerStep = 1; size_t uploaded = 0; while (ft.wmoModelIndex < pending->wmoModels.size() && uploaded < kWmosPerStep) { auto& wmoReady = pending->wmoModels[ft.wmoModelIndex]; @@ -1006,7 +1006,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { if (m2Renderer && ft.wmoDoodadIndex < pending->wmoDoodads.size()) { // Set pre-decoded BLP cache for doodad M2 textures m2Renderer->setPredecodedBLPCache(&pending->preloadedM2Textures); - constexpr size_t kDoodadsPerStep = 4; + constexpr size_t kDoodadsPerStep = 2; size_t uploaded = 0; while (ft.wmoDoodadIndex < pending->wmoDoodads.size() && uploaded < kDoodadsPerStep) { auto& doodad = pending->wmoDoodads[ft.wmoDoodadIndex]; From 0ffeabd4ed54f6cc721337f2df418ef37285b989 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 23:02:25 -0800 Subject: [PATCH 11/13] Revert "Further reduce tile streaming aggressiveness" This reverts commit f681a8b3611811cde3e9e111edbc311a6e97fde0. --- include/rendering/terrain_manager.hpp | 2 +- src/core/application.cpp | 4 ++-- src/rendering/terrain_manager.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/rendering/terrain_manager.hpp b/include/rendering/terrain_manager.hpp index 58138ea4..2a746d3e 100644 --- a/include/rendering/terrain_manager.hpp +++ b/include/rendering/terrain_manager.hpp @@ -344,7 +344,7 @@ private: // Streaming parameters bool streamingEnabled = true; - int loadRadius = 3; // Load tiles within this radius (7x7 circular ~29 tiles) + int loadRadius = 4; // Load tiles within this radius (9x9 grid = 81 tiles) int unloadRadius = 7; // Unload tiles beyond this radius float updateInterval = 0.033f; // Check streaming every 33ms (~30 fps) float timeSinceLastUpdate = 0.0f; diff --git a/src/core/application.cpp b/src/core/application.cpp index 65220f79..f9ac557c 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -1108,7 +1108,7 @@ void Application::update(float deltaTime) { // Taxi flights move fast (32 u/s) — load further ahead so terrain is ready // before the camera arrives. Keep updates frequent to spot new tiles early. renderer->getTerrainManager()->setUpdateInterval(onTaxi ? 0.033f : 0.033f); - renderer->getTerrainManager()->setLoadRadius(onTaxi ? 5 : 3); + renderer->getTerrainManager()->setLoadRadius(onTaxi ? 6 : 4); renderer->getTerrainManager()->setUnloadRadius(onTaxi ? 9 : 7); renderer->getTerrainManager()->setTaxiStreamingMode(onTaxi); } @@ -4041,7 +4041,7 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float // Use a small radius for the initial load (just immediate tiles), // then restore the full radius after entering the game. // This matches WoW's behavior: load quickly, stream the rest in-game. - const int savedLoadRadius = 3; + const int savedLoadRadius = 4; terrainMgr->setLoadRadius(1); terrainMgr->setUnloadRadius(7); diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp index 9f0dc95b..e186ed96 100644 --- a/src/rendering/terrain_manager.cpp +++ b/src/rendering/terrain_manager.cpp @@ -816,7 +816,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { } bool allDone = terrainRenderer->loadTerrainIncremental( pending->mesh, pending->terrain.textures, x, y, - ft.terrainChunkNext, 8); + ft.terrainChunkNext, 16); if (!allDone) { return false; // More chunks remain — yield to time budget } @@ -858,7 +858,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { std::lock_guard lk(queueMutex); workersIdle = loadQueue.empty() && readyQueue.empty(); } - const size_t kModelsPerStep = 2; + const size_t kModelsPerStep = workersIdle ? 6 : 4; size_t uploaded = 0; while (ft.m2ModelIndex < pending->m2Models.size() && uploaded < kModelsPerStep) { auto& m2Ready = pending->m2Models[ft.m2ModelIndex]; @@ -925,7 +925,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { std::lock_guard lk(queueMutex); wmoWorkersIdle = loadQueue.empty() && readyQueue.empty(); } - const size_t kWmosPerStep = 1; + const size_t kWmosPerStep = wmoWorkersIdle ? 2 : 1; size_t uploaded = 0; while (ft.wmoModelIndex < pending->wmoModels.size() && uploaded < kWmosPerStep) { auto& wmoReady = pending->wmoModels[ft.wmoModelIndex]; @@ -1006,7 +1006,7 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { if (m2Renderer && ft.wmoDoodadIndex < pending->wmoDoodads.size()) { // Set pre-decoded BLP cache for doodad M2 textures m2Renderer->setPredecodedBLPCache(&pending->preloadedM2Textures); - constexpr size_t kDoodadsPerStep = 2; + constexpr size_t kDoodadsPerStep = 4; size_t uploaded = 0; while (ft.wmoDoodadIndex < pending->wmoDoodads.size() && uploaded < kDoodadsPerStep) { auto& doodad = pending->wmoDoodads[ft.wmoDoodadIndex]; From 52317d1edd027c910fa1b62e65caf5cc73b16e8a Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 23:13:01 -0800 Subject: [PATCH 12/13] Implement FSR 2.2 temporal upscaling Full FSR 2.2 pipeline with depth-based motion vector reprojection, temporal accumulation with YCoCg neighborhood clamping, and RCAS contrast-adaptive sharpening. Architecture (designed for FSR 3.x frame generation readiness): - Camera: Halton(2,3) sub-pixel jitter with unjittered projection stored separately for motion vector computation - Motion vectors: compute shader reconstructs world position from depth + inverse VP, reprojects with previous frame's VP - Temporal accumulation: compute shader blends 5-10% current frame with 90-95% clamped history, adaptive blend for disocclusion - History: ping-pong R16G16B16A16 buffers at display resolution - Sharpening: RCAS fragment pass with contrast-adaptive weights Integration: - FSR2 replaces both FSR1 and MSAA when enabled - Scene renders to internal resolution framebuffer (no MSAA) - Compute passes run between scene and swapchain render passes - Camera cut detection resets history on teleport - Quality presets shared with FSR1 (0.50-0.77 scale factors) - UI: "Upscaling" combo with Off/FSR 1.0/FSR 2.2 options --- assets/shaders/fsr2_accumulate.comp.glsl | 115 ++++ assets/shaders/fsr2_accumulate.comp.spv | Bin 0 -> 8436 bytes assets/shaders/fsr2_motion.comp.glsl | 44 ++ assets/shaders/fsr2_motion.comp.spv | Bin 0 -> 3668 bytes assets/shaders/fsr2_sharpen.frag.glsl | 46 ++ assets/shaders/fsr2_sharpen.frag.spv | Bin 0 -> 3984 bytes include/rendering/camera.hpp | 9 + include/rendering/renderer.hpp | 63 ++- src/rendering/camera.cpp | 22 + src/rendering/renderer.cpp | 654 ++++++++++++++++++++++- src/ui/game_screen.cpp | 16 +- 11 files changed, 957 insertions(+), 12 deletions(-) create mode 100644 assets/shaders/fsr2_accumulate.comp.glsl create mode 100644 assets/shaders/fsr2_accumulate.comp.spv create mode 100644 assets/shaders/fsr2_motion.comp.glsl create mode 100644 assets/shaders/fsr2_motion.comp.spv create mode 100644 assets/shaders/fsr2_sharpen.frag.glsl create mode 100644 assets/shaders/fsr2_sharpen.frag.spv diff --git a/assets/shaders/fsr2_accumulate.comp.glsl b/assets/shaders/fsr2_accumulate.comp.glsl new file mode 100644 index 00000000..a998b52c --- /dev/null +++ b/assets/shaders/fsr2_accumulate.comp.glsl @@ -0,0 +1,115 @@ +#version 450 + +layout(local_size_x = 8, local_size_y = 8) in; + +// Inputs (internal resolution) +layout(set = 0, binding = 0) uniform sampler2D sceneColor; +layout(set = 0, binding = 1) uniform sampler2D depthBuffer; +layout(set = 0, binding = 2) uniform sampler2D motionVectors; + +// History (display resolution) +layout(set = 0, binding = 3) uniform sampler2D historyInput; + +// Output (display resolution) +layout(set = 0, binding = 4, rgba16f) uniform writeonly image2D historyOutput; + +layout(push_constant) uniform PushConstants { + vec4 internalSize; // xy = internal resolution, zw = 1/internal + vec4 displaySize; // xy = display resolution, zw = 1/display + vec4 jitterOffset; // xy = current jitter (pixel-space), zw = unused + vec4 params; // x = resetHistory (1=reset), y = sharpness, zw = unused +} pc; + +// RGB <-> YCoCg for neighborhood clamping +vec3 rgbToYCoCg(vec3 rgb) { + float y = 0.25 * rgb.r + 0.5 * rgb.g + 0.25 * rgb.b; + float co = 0.5 * rgb.r - 0.5 * rgb.b; + float cg = -0.25 * rgb.r + 0.5 * rgb.g - 0.25 * rgb.b; + return vec3(y, co, cg); +} + +vec3 yCoCgToRgb(vec3 ycocg) { + float y = ycocg.x; + float co = ycocg.y; + float cg = ycocg.z; + return vec3(y + co - cg, y + cg, y - co - cg); +} + +void main() { + ivec2 outPixel = ivec2(gl_GlobalInvocationID.xy); + ivec2 outSize = ivec2(pc.displaySize.xy); + if (outPixel.x >= outSize.x || outPixel.y >= outSize.y) return; + + // Output UV in display space + vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw; + + // Map display pixel to internal resolution UV (accounting for jitter) + vec2 internalUV = outUV; + + // Sample current frame color at internal resolution + vec3 currentColor = texture(sceneColor, internalUV).rgb; + + // Sample motion vector at internal resolution + vec2 inUV = outUV; // Approximate — display maps to internal via scale + vec2 motion = texture(motionVectors, inUV).rg; + + // Reproject: where was this pixel in the previous frame's history? + vec2 historyUV = outUV - motion; + + // History reset: on teleport / camera cut, just use current frame + if (pc.params.x > 0.5) { + imageStore(historyOutput, outPixel, vec4(currentColor, 1.0)); + return; + } + + // Sample reprojected history + vec3 historyColor = texture(historyInput, historyUV).rgb; + + // Neighborhood clamping in YCoCg space to prevent ghosting + // Sample 3x3 neighborhood from current frame + vec2 texelSize = pc.internalSize.zw; + vec3 samples[9]; + int idx = 0; + for (int dy = -1; dy <= 1; dy++) { + for (int dx = -1; dx <= 1; dx++) { + samples[idx] = rgbToYCoCg(texture(sceneColor, internalUV + vec2(dx, dy) * texelSize).rgb); + idx++; + } + } + + // Compute AABB in YCoCg + vec3 boxMin = samples[0]; + vec3 boxMax = samples[0]; + for (int i = 1; i < 9; i++) { + boxMin = min(boxMin, samples[i]); + boxMax = max(boxMax, samples[i]); + } + + // Slightly expand the box to reduce flickering on edges + vec3 boxCenter = (boxMin + boxMax) * 0.5; + vec3 boxExtent = (boxMax - boxMin) * 0.5; + boxMin = boxCenter - boxExtent * 1.25; + boxMax = boxCenter + boxExtent * 1.25; + + // Clamp history to the neighborhood AABB + vec3 historyYCoCg = rgbToYCoCg(historyColor); + vec3 clampedHistory = clamp(historyYCoCg, boxMin, boxMax); + historyColor = yCoCgToRgb(clampedHistory); + + // Check if history UV is valid (within [0,1]) + float historyValid = (historyUV.x >= 0.0 && historyUV.x <= 1.0 && + historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0; + + // Blend factor: use more current frame for disoccluded regions + // Luminance difference between clamped history and original → confidence + float clampDist = length(historyYCoCg - clampedHistory); + float blendFactor = mix(0.05, 0.3, clamp(clampDist * 4.0, 0.0, 1.0)); + + // If history is off-screen, use current frame entirely + blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid); + + // Final blend + vec3 result = mix(historyColor, currentColor, blendFactor); + + imageStore(historyOutput, outPixel, vec4(result, 1.0)); +} diff --git a/assets/shaders/fsr2_accumulate.comp.spv b/assets/shaders/fsr2_accumulate.comp.spv new file mode 100644 index 0000000000000000000000000000000000000000..4d31fba7cc189364681e035d9dddc1e48c3845d2 GIT binary patch literal 8436 zcmZ9Q37l1R6~`~k0tzB1Dk$P0?jr6h3BrH_Iy$2uA}-@BZ-!@&_okUQ1ZKr(wwab? z8MbKIqGqYtN6XX{Ei1J>+sm@OEX!O=?fbp&9_D`DWzOgP&hMQ6Ip@FJ<))=`}4G50k((r@F0uW&8Z$YTx{$jy}wQDM>>bXUvYoG@%=j_1(p?j`NYJ z@N(0TiJ2JRz`yb6tr(5T_DNfNS6fF%Tl=!E-oe4H;p(PRp{rCZ7rOe2LxtXIaYMnl ziMhC9xwkaZmk($nB459W+LNd;G|;oEa!zZdb>M&v{YNgK&ju={Jh+*8B*`Y-t14#> z^h8V}D*@kI=^a2j#yM=t@LY6nrN$pDCdinDxtPVdn5Eldnv%8P%1E`dxUo_I<_S!BY38)D>ggi;Id?26goJ4;QMKnkorE!TD1?A08L%8>K6?)B((#bKCDSuf4Wtz%rrNOf?;N!ia`V|=ck z=H%`%enq(!p60$6zFJ_jvc+@_9)v6QZN%S@Yz6mmFvO3FH;NmRC&0tq>jz7P;naQ# zJ6%V#{-@DBm5rx)w`f0;+1=^*hU8i7Vj9Z{VdIExWep21pmW}##x2`eg-yBUWDmG( z9nupIo#vheKUTXr*$cb3M016{<+Ww2#qCoUxBpna)!n6{$1bhCFvc%sbjG#9^^^+b zzLUG%nzU{Q+)!b7q(qQCUx3|L7_6>aGSc7AOxl>Y&k66-S;%r?(js8*F|XhlnsQTf zoaffKmK@iRaXzHl*c0;^XD!d%-iTM&b82R4d%p61ZT&IcGnMyi>u*G?r9Csnjp=#H z$9leuwZC-U<7)Gcc-JN$uWdXk*_jUPVGg4mFWwc@jBY`8qXQn`sja^S+vlyEG6#FR z49n|x9b?SF*yh>^Ns`SAcfsBn)S}6yUwe9L)okNu)a*s!cP`_QuxEqqHS*5`lmCA6 zpABk3jJN+aXgTBU?;N!K$?HECZ4WWO^T3XkUw~iRvlRI*02^;U$M>S`*}a+#@?6N} z#k#y|(zViWUhV#ja~}3MfJB{jVDm=b-m|W?d8fnrGBhu*i*!DCM?AN#)@BaM|v9DR!KSFyZ!I=7|Rk3ZIaV)&O~U$le0&`m)6-=qjCjZU4h-AzzT?97oIY4*Z>_T*&DrjO@46Uo{-^3}-*tKaGdbIO&*p5$`@YM^ z`>xB|zUT6`@3*||`z>$#UJH9F`F*eD?S*x=@3QdQkMA+<6WKq{Ko{b(rUTaJ%!P=Y z;zf9J39&xAx{(W!g`5QUU3)R7p@_Z+=|=_-bN8ZOkI0*Q9lDfp@?PdHXSvsd^_$x> zq-}23bP%zg&unx1+<5~cA2EJQI4&PEj4gi~d3<(N5y#2jL>{>jME(Wpz7g&9GCuhY zh@59$9JMxr?Z1QkC=fE<^0Qg6)31E#o@Lu6;Q&Ba_jLdpovq`kb44{SKsl zuiu&RvDfdymQ%(f4antL&Ub^&YmEDSDO%1x?c+U&oIaly+WLGyydUx15V0Qs8?%P> z^2~h@F-G3H*B~7{^*r+*Li8(Modbk#>9X)&md|hVS??(}1qKA)x zjgcSgp>_{^98SODWe?Whg4P%HZvb0A>`#D=xhz}5Pl9hi-Hi6~Td41@@M6%*!mSOdo;$iz6-5C?uYxquJwJ0wLOPlL*(Orcp&5C&Ex%G zu6xlBWcWd}IpnvZA3}UCJdAez)z6dv8|X(6Fa6&{>sMRnKaQBovEmr}6R>Nb z?f(4~xjM7mzn>xA>kUJ3fNpRk5|FYLq5j529}>p26K3LJU{y)=F=Z*q9ewQ13T}iH6A-(s{z|u z@-e0nEFZO+!2Bf#Am-B_>#5H*a=o0VI%+lJvsUc?1Z+9iJ@!B@zXz^Y^sybjSSRPa zJ+^Zh&F+y&*v80Tm2s27@-wm;es{}7&MDwn4|DE_Z4KY2+EcNOk&m3y!19sPZ+E%K zxf9qrVebq!CdTgqmh*R8)Y%p6IDN5ia*<;lkmwq*54y?F}4LP7xo-*esA{1c3mfD-&k|8`QtZhk~r3QW6bSu z8U2xKU$C6MebCy`*M8tBGTlqLIi@^D_coMef zC+@AqU}NMbXL*-^`Ae1{`pxTaWcgUrljRZL`S!9LEZ2s3uH{yM`Ag5WetWdn@Sl?T z=ff=pFGCz-y;ijTsCOz@F6?%&b9ElpIt^@Y`N)4dn7_pQj@NHoK{tXJT8wlWIQuR)Xb1;$B__$6s30oaQmsnjL8U*0_b3v%t5awb!6mW3SC@Yn+Yk znE1|L1D5j&?wpL14sLD6`Mngs2hN3ayz%;73x5yTgLU=$d*C?4|HHDr&p5B`kYnlJ z@2c$)&-!Gv|9>?VJqZHk$K~(s zJ+S3t+$^x~9{JJiZ0rd(pT5Yo7ua00k?5fXEFZb%fbF3LiClYw&8N>jb?$PJcOP*5 z-2Cp4kG%VV?NL6)?GLW+$8QMv7m$MD(-*mp0-Gzp)<3fxPT;s;h{1kYv{F&bY+cUo-I?nh)aQ@6M zuCteBcC5i7Z1d#L{7KmI&dq$<5x)d%e182~vE_}oo_5490~?=TbN$r-O}=kC+Z{eoQB}y+oZe fz{beOm@~ooURGioCm;L03T%(o)bD=F`HcHNoe)lz literal 0 HcmV?d00001 diff --git a/assets/shaders/fsr2_motion.comp.glsl b/assets/shaders/fsr2_motion.comp.glsl new file mode 100644 index 00000000..f4f68c2c --- /dev/null +++ b/assets/shaders/fsr2_motion.comp.glsl @@ -0,0 +1,44 @@ +#version 450 + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(set = 0, binding = 0) uniform sampler2D depthBuffer; +layout(set = 0, binding = 1, rg16f) uniform writeonly image2D motionVectors; + +layout(push_constant) uniform PushConstants { + mat4 invViewProj; // Inverse of current jittered VP + mat4 prevViewProj; // Previous frame unjittered VP + vec4 resolution; // xy = internal size, zw = 1/internal size + vec4 jitterOffset; // xy = current jitter (NDC), zw = previous jitter +} pc; + +void main() { + ivec2 pixelCoord = ivec2(gl_GlobalInvocationID.xy); + ivec2 imgSize = ivec2(pc.resolution.xy); + if (pixelCoord.x >= imgSize.x || pixelCoord.y >= imgSize.y) return; + + // Sample depth (Vulkan: 0 = near, 1 = far) + float depth = texelFetch(depthBuffer, pixelCoord, 0).r; + + // Pixel center in NDC [-1, 1] + vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw; + vec2 ndc = uv * 2.0 - 1.0; + + // Reconstruct world position from depth + vec4 clipPos = vec4(ndc, depth, 1.0); + vec4 worldPos = pc.invViewProj * clipPos; + worldPos /= worldPos.w; + + // Project into previous frame's clip space (unjittered) + vec4 prevClip = pc.prevViewProj * worldPos; + vec2 prevNdc = prevClip.xy / prevClip.w; + vec2 prevUV = prevNdc * 0.5 + 0.5; + + // Remove jitter from current UV to get unjittered position + vec2 unjitteredUV = uv - pc.jitterOffset.xy * 0.5; + + // Motion = previous position - current unjittered position (in UV space) + vec2 motion = prevUV - unjitteredUV; + + imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0)); +} diff --git a/assets/shaders/fsr2_motion.comp.spv b/assets/shaders/fsr2_motion.comp.spv new file mode 100644 index 0000000000000000000000000000000000000000..813c4b9dbf213b51b98f167c1ceceac241558285 GIT binary patch literal 3668 zcmZved3Tdl5QpE=g-t+Y2gMZJLBs__5hyJf&`OIyRa`#RJTB*Wj?a_#UWh$j&Sd8Io4GS{x3}~!+P*kR`jX|zisYvx8>^E=FiE0*5xqPa z>f-$PsqtN{c4^ludv+VSGU-=!eXEEW#2!Fqrz%o(q z)`hWpt<|2cwOin!z+)zr+T2tny)@aZUxZ!UjdzWWX8ORd5tnfTNi}dMLLP&SIWy&)q2w~SQ_DI}OR`z7 z^XS=nwnpP_-^WuNX}xKlJp1iES&lr490RXMJG@WF_>Fjbm;Eot<>ih&nCHIt&J#Ot z1c|xi9?Z+~dnU%sBQdT&aK4K?=U^Yk*|Yn;4RJ&~oWU-3t-XG+eruacys2vsBtN2E zzjzH5tT9ZC^|jYgZwQ@3oWB8V|8n|sVBeOUV*Ii$&kw_R^UImj_=nKulHVV&=MeL+ z#5Ugi>zTC%okPsE7A^LzZRB?yT28!`uG}ZN$m_G_2DI-{zct0)ZMhv_*W(^o(>j~c z)`@G}f^Dq%*23n{&a=;9#9^In=(s=8zXPr>`1b_Axi^vVRkXJL-VyN@*1r#japr$5 z1j)G$vG>6d*Kr_&-T1@U_Tzf=Tk9yoI27@T439)S*~RmRtal3A`EegV#C9LtU*lc> zImELSyN@%7{YQKiTfghQiT=O!y3f|J<}I|tSl1}NEy0LA-p%0s9`1p?hdb%Od*!{B zQ{4Z(yi99&PaO91Cyeja8qRfH_7Jh(;)p%By*=1{9bJI^-o<$L?eqeCb^$K;VDtH{ zi}S^;1=w$0uYW$^1N85jS0TQm38Ge_{Wi%djy~iXvF7k^qW**IgzFX`W;eX&ehYmY zz6W!A_KzX*=Jqaa4V=8g+_jMVacuqOb`Qno_HArOtT&99+xzkaA|GR(#CBe9%u`_b zU&)h0KaDs~{ulD((9a<9e^B>1wBv7l^3Ni2p1F3cwG-QZyd$xn=dtBu%r0#E>5X{- zEFb&XjqN=7zo-#wsefnBa%lfQV}IT`9&DUG_tATG3|V-ujt4&8tGB>%ihcGYw?fXhvCV6YeU75#?9)EpLFDd* z_=&)sK*wHBc5QKz+z4^+VjHW^yuM?(nD-2}oY?)ELfoUcfA3)%Rxf?;^JK;u=21Hb&l@ zWwcz3pT(Aoccq3cr#Rf}xZXOpvF8xydu|OxKHi1T0w?dfHlv$}dG*;tHE`z8-$vx( z9qR%SKGxc)D|a*D(C$G*PA z)^8v2o_~dH9ewt287&ukxQ=aK5r2(sOq_oM+ZcK0&!gpxzlr_^xgGFg^c`er!0z3* m$UG9?$ajH5-RHg!oNI~q@CR&T_1z6pq^9+(q2F)hKIC65d>`=u literal 0 HcmV?d00001 diff --git a/assets/shaders/fsr2_sharpen.frag.glsl b/assets/shaders/fsr2_sharpen.frag.glsl new file mode 100644 index 00000000..b4dd928b --- /dev/null +++ b/assets/shaders/fsr2_sharpen.frag.glsl @@ -0,0 +1,46 @@ +#version 450 + +layout(location = 0) in vec2 TexCoord; +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 0) uniform sampler2D inputImage; + +layout(push_constant) uniform PushConstants { + vec4 params; // x = 1/width, y = 1/height, z = sharpness (0-2), w = unused +} pc; + +void main() { + vec2 texelSize = pc.params.xy; + float sharpness = pc.params.z; + + // RCAS: Robust Contrast-Adaptive Sharpening + // 5-tap cross pattern + vec3 center = texture(inputImage, TexCoord).rgb; + vec3 north = texture(inputImage, TexCoord + vec2(0.0, -texelSize.y)).rgb; + vec3 south = texture(inputImage, TexCoord + vec2(0.0, texelSize.y)).rgb; + vec3 west = texture(inputImage, TexCoord + vec2(-texelSize.x, 0.0)).rgb; + vec3 east = texture(inputImage, TexCoord + vec2( texelSize.x, 0.0)).rgb; + + // Compute local contrast (min/max of neighborhood) + vec3 minRGB = min(center, min(min(north, south), min(west, east))); + vec3 maxRGB = max(center, max(max(north, south), max(west, east))); + + // Adaptive sharpening weight based on local contrast + // High contrast = less sharpening (prevent ringing) + vec3 range = maxRGB - minRGB; + vec3 rcpRange = 1.0 / (range + 0.001); + + // Sharpening amount: inversely proportional to contrast + float luma = dot(center, vec3(0.299, 0.587, 0.114)); + float lumaRange = max(range.r, max(range.g, range.b)); + float w = clamp(1.0 - lumaRange * 2.0, 0.0, 1.0) * sharpness * 0.25; + + // Apply sharpening via unsharp mask + vec3 avg = (north + south + west + east) * 0.25; + vec3 sharpened = center + (center - avg) * w; + + // Clamp to prevent ringing artifacts + sharpened = clamp(sharpened, minRGB, maxRGB); + + FragColor = vec4(sharpened, 1.0); +} diff --git a/assets/shaders/fsr2_sharpen.frag.spv b/assets/shaders/fsr2_sharpen.frag.spv new file mode 100644 index 0000000000000000000000000000000000000000..99aba03a78ee43bcb0840210d120f2610bb1845b GIT binary patch literal 3984 zcmZ9N`Bq#-5XLXdkceatV2nmcT+kqHB*sJt1j!IZ*`iTV$Lnx~i8FU5vjB-pK#WO5 zWQw5L>_>FGW6FcDk5hA5DjZQdrYQ8SkFGa-(aB0pj z7Q&#!8uq(Wb2R8Be&+$PkdxkJ{j_4FDXUx)Z+&1e|@ zX3h8;W&7gV(1>l(O#e{tv6#~jXszN~HFwDBWCvruO_;&@?V5?Te7l9M9@e)-ar&dd zdU(6cYQ0UE_0&hLdt=O5$$y7t@}cihB=R#<2Rkh0V`-24oD2KO3>7WhVEoSVL#f6aBhP?hfCe-jDv?BePue zKKbaY+hZSVxL-IvdUe07Gv@4dkBq)Nqxz`jUKugyV|p?Ahy48AAm|Xa*)?>o#htFz#QWza$ zxV83*D)G}FuKLOD@;tg%wpaXM(5tidr@ZZ#d`eaTXKBt zPb~E=YF?6A&i94UF2(2jKp1T$vg(;H33GS&TJ-;gy+4$(H{UXTW@0SwW#Lwt<-H=z z8g$b>5=Or&qqb$uXyo7>KbCbl{1ah%-QM|{Ffr)1=FJff#&2tH2$Opv?u}oC(HN%v lCX7Z-(|(V&dd=hN)n?6q$XHK2etLyH*EQq&PZ^HL{s%84D--|# literal 0 HcmV?d00001 diff --git a/include/rendering/camera.hpp b/include/rendering/camera.hpp index 0464007f..99a4879a 100644 --- a/include/rendering/camera.hpp +++ b/include/rendering/camera.hpp @@ -23,9 +23,16 @@ public: const glm::vec3& getPosition() const { return position; } const glm::mat4& getViewMatrix() const { return viewMatrix; } const glm::mat4& getProjectionMatrix() const { return projectionMatrix; } + const glm::mat4& getUnjitteredProjectionMatrix() const { return unjitteredProjectionMatrix; } glm::mat4 getViewProjectionMatrix() const { return projectionMatrix * viewMatrix; } + glm::mat4 getUnjitteredViewProjectionMatrix() const { return unjitteredProjectionMatrix * viewMatrix; } float getAspectRatio() const { return aspectRatio; } + // Sub-pixel jitter for temporal upscaling (FSR 2) + void setJitter(float jx, float jy); + void clearJitter(); + glm::vec2 getJitter() const { return jitterOffset; } + glm::vec3 getForward() const; glm::vec3 getRight() const; glm::vec3 getUp() const; @@ -46,6 +53,8 @@ private: glm::mat4 viewMatrix = glm::mat4(1.0f); glm::mat4 projectionMatrix = glm::mat4(1.0f); + glm::mat4 unjitteredProjectionMatrix = glm::mat4(1.0f); + glm::vec2 jitterOffset = glm::vec2(0.0f); // NDC jitter (applied to projection) }; } // namespace rendering diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index cbb9c7e1..13f77fe2 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -261,13 +261,15 @@ public: float getShadowDistance() const { return shadowDistance_; } void setMsaaSamples(VkSampleCountFlagBits samples); - // FSR 1.0 (FidelityFX Super Resolution) upscaling + // FSR (FidelityFX Super Resolution) upscaling void setFSREnabled(bool enabled); bool isFSREnabled() const { return fsr_.enabled; } void setFSRQuality(float scaleFactor); // 0.50=Perf, 0.59=Balanced, 0.67=Quality, 0.77=UltraQuality void setFSRSharpness(float sharpness); // 0.0 - 2.0 float getFSRScaleFactor() const { return fsr_.scaleFactor; } float getFSRSharpness() const { return fsr_.sharpness; } + void setFSR2Enabled(bool enabled); + bool isFSR2Enabled() const { return fsr2_.enabled; } void setWaterRefractionEnabled(bool enabled); bool isWaterRefractionEnabled() const; @@ -363,6 +365,65 @@ private: void destroyFSRResources(); void renderFSRUpscale(); + // FSR 2.2 temporal upscaling state + struct FSR2State { + bool enabled = false; + bool needsRecreate = false; + float scaleFactor = 0.77f; + float sharpness = 0.5f; + uint32_t internalWidth = 0; + uint32_t internalHeight = 0; + + // Off-screen scene targets (internal resolution, no MSAA — FSR2 replaces AA) + AllocatedImage sceneColor{}; + AllocatedImage sceneDepth{}; + VkFramebuffer sceneFramebuffer = VK_NULL_HANDLE; + + // Samplers + VkSampler linearSampler = VK_NULL_HANDLE; // For color + VkSampler nearestSampler = VK_NULL_HANDLE; // For depth / motion vectors + + // Motion vector buffer (internal resolution) + AllocatedImage motionVectors{}; + + // History buffers (display resolution, ping-pong) + AllocatedImage history[2]{}; + uint32_t currentHistory = 0; // Output index (0 or 1) + + // Compute pipelines + VkPipeline motionVecPipeline = VK_NULL_HANDLE; + VkPipelineLayout motionVecPipelineLayout = VK_NULL_HANDLE; + VkDescriptorSetLayout motionVecDescSetLayout = VK_NULL_HANDLE; + VkDescriptorPool motionVecDescPool = VK_NULL_HANDLE; + VkDescriptorSet motionVecDescSet = VK_NULL_HANDLE; + + VkPipeline accumulatePipeline = VK_NULL_HANDLE; + VkPipelineLayout accumulatePipelineLayout = VK_NULL_HANDLE; + VkDescriptorSetLayout accumulateDescSetLayout = VK_NULL_HANDLE; + VkDescriptorPool accumulateDescPool = VK_NULL_HANDLE; + VkDescriptorSet accumulateDescSets[2] = {}; // Per ping-pong + + // RCAS sharpening pass (display resolution) + VkPipeline sharpenPipeline = VK_NULL_HANDLE; + VkPipelineLayout sharpenPipelineLayout = VK_NULL_HANDLE; + VkDescriptorSetLayout sharpenDescSetLayout = VK_NULL_HANDLE; + VkDescriptorPool sharpenDescPool = VK_NULL_HANDLE; + VkDescriptorSet sharpenDescSet = VK_NULL_HANDLE; + + // Previous frame state for motion vector reprojection + glm::mat4 prevViewProjection = glm::mat4(1.0f); + glm::vec2 prevJitter = glm::vec2(0.0f); + uint32_t frameIndex = 0; + bool needsHistoryReset = true; + }; + FSR2State fsr2_; + bool initFSR2Resources(); + void destroyFSR2Resources(); + void dispatchMotionVectors(); + void dispatchTemporalAccumulate(); + void renderFSR2Sharpen(); + static float halton(uint32_t index, uint32_t base); + // Footstep event tracking (animation-driven) uint32_t footstepLastAnimationId = 0; float footstepLastNormTime = 0.0f; diff --git a/src/rendering/camera.cpp b/src/rendering/camera.cpp index f8b45f3c..bd1ebe0a 100644 --- a/src/rendering/camera.cpp +++ b/src/rendering/camera.cpp @@ -20,6 +20,13 @@ void Camera::updateProjectionMatrix() { projectionMatrix = glm::perspective(glm::radians(fov), aspectRatio, nearPlane, farPlane); // Vulkan clip-space has Y pointing down; flip the projection's Y axis. projectionMatrix[1][1] *= -1.0f; + unjitteredProjectionMatrix = projectionMatrix; + + // Re-apply jitter if active + if (jitterOffset.x != 0.0f || jitterOffset.y != 0.0f) { + projectionMatrix[2][0] += jitterOffset.x; + projectionMatrix[2][1] += jitterOffset.y; + } } glm::vec3 Camera::getForward() const { @@ -40,6 +47,21 @@ glm::vec3 Camera::getUp() const { return glm::normalize(glm::cross(getRight(), getForward())); } +void Camera::setJitter(float jx, float jy) { + // Remove old jitter, apply new + projectionMatrix[2][0] -= jitterOffset.x; + projectionMatrix[2][1] -= jitterOffset.y; + jitterOffset = glm::vec2(jx, jy); + projectionMatrix[2][0] += jitterOffset.x; + projectionMatrix[2][1] += jitterOffset.y; +} + +void Camera::clearJitter() { + projectionMatrix[2][0] -= jitterOffset.x; + projectionMatrix[2][1] -= jitterOffset.y; + jitterOffset = glm::vec2(0.0f); +} + Ray Camera::screenToWorldRay(float screenX, float screenY, float screenW, float screenH) const { float ndcX = (2.0f * screenX / screenW) - 1.0f; // Vulkan Y-flip is baked into projectionMatrix, so NDC Y maps directly: diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 4e2b66f5..81686219 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -837,6 +837,7 @@ void Renderer::shutdown() { } destroyFSRResources(); + destroyFSR2Resources(); destroyPerFrameResources(); zoneManager.reset(); @@ -937,6 +938,7 @@ void Renderer::applyMsaaChange() { if (selCirclePipeline) { vkDestroyPipeline(device, selCirclePipeline, nullptr); selCirclePipeline = VK_NULL_HANDLE; } if (overlayPipeline) { vkDestroyPipeline(device, overlayPipeline, nullptr); overlayPipeline = VK_NULL_HANDLE; } if (fsr_.sceneFramebuffer) destroyFSRResources(); // Will be lazily recreated in beginFrame() + if (fsr2_.sceneFramebuffer) destroyFSR2Resources(); // Reinitialize ImGui Vulkan backend with new MSAA sample count ImGui_ImplVulkan_Shutdown(); @@ -972,13 +974,26 @@ void Renderer::beginFrame() { fsr_.needsRecreate = false; if (!fsr_.enabled) LOG_INFO("FSR: disabled"); } - if (fsr_.enabled && !fsr_.sceneFramebuffer) { + if (fsr_.enabled && !fsr2_.enabled && !fsr_.sceneFramebuffer) { if (!initFSRResources()) { LOG_ERROR("FSR: initialization failed, disabling"); fsr_.enabled = false; } } + // FSR 2.2 resource management + if (fsr2_.needsRecreate && fsr2_.sceneFramebuffer) { + destroyFSR2Resources(); + fsr2_.needsRecreate = false; + if (!fsr2_.enabled) LOG_INFO("FSR2: disabled"); + } + if (fsr2_.enabled && !fsr2_.sceneFramebuffer) { + if (!initFSR2Resources()) { + LOG_ERROR("FSR2: initialization failed, disabling"); + fsr2_.enabled = false; + } + } + // Handle swapchain recreation if needed if (vkCtx->isSwapchainDirty()) { vkCtx->recreateSwapchain(window->getWidth(), window->getHeight()); @@ -987,10 +1002,14 @@ void Renderer::beginFrame() { waterRenderer->recreatePipelines(); } // Recreate FSR resources for new swapchain dimensions - if (fsr_.enabled) { + if (fsr_.enabled && !fsr2_.enabled) { destroyFSRResources(); initFSRResources(); } + if (fsr2_.enabled) { + destroyFSR2Resources(); + initFSR2Resources(); + } } // Acquire swapchain image and begin command buffer @@ -1000,6 +1019,14 @@ void Renderer::beginFrame() { return; } + // Apply FSR2 jitter to camera projection before UBO upload + if (fsr2_.enabled && fsr2_.sceneFramebuffer && camera) { + // Halton(2,3) sequence for sub-pixel jitter, scaled to internal resolution + float jx = (halton(fsr2_.frameIndex + 1, 2) - 0.5f) * 2.0f / static_cast(fsr2_.internalWidth); + float jy = (halton(fsr2_.frameIndex + 1, 3) - 0.5f) * 2.0f / static_cast(fsr2_.internalHeight); + camera->setJitter(jx, jy); + } + // Update per-frame UBO with current camera/lighting state updatePerFrameUBO(); @@ -1044,7 +1071,10 @@ void Renderer::beginFrame() { rpInfo.renderPass = vkCtx->getImGuiRenderPass(); VkExtent2D renderExtent; - if (fsr_.enabled && fsr_.sceneFramebuffer) { + if (fsr2_.enabled && fsr2_.sceneFramebuffer) { + rpInfo.framebuffer = fsr2_.sceneFramebuffer; + renderExtent = { fsr2_.internalWidth, fsr2_.internalHeight }; + } else if (fsr_.enabled && fsr_.sceneFramebuffer) { rpInfo.framebuffer = fsr_.sceneFramebuffer; renderExtent = { fsr_.internalWidth, fsr_.internalHeight }; } else { @@ -1097,7 +1127,60 @@ void Renderer::beginFrame() { void Renderer::endFrame() { if (!vkCtx || currentCmd == VK_NULL_HANDLE) return; - if (fsr_.enabled && fsr_.sceneFramebuffer) { + if (fsr2_.enabled && fsr2_.sceneFramebuffer) { + // End the off-screen scene render pass + vkCmdEndRenderPass(currentCmd); + + // Compute passes: motion vectors → temporal accumulation + dispatchMotionVectors(); + dispatchTemporalAccumulate(); + + // Transition history output: GENERAL → SHADER_READ_ONLY for sharpen pass + transitionImageLayout(currentCmd, fsr2_.history[fsr2_.currentHistory].image, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + + // Begin swapchain render pass at full resolution for sharpening + ImGui + VkRenderPassBeginInfo rpInfo{}; + rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rpInfo.renderPass = vkCtx->getImGuiRenderPass(); + rpInfo.framebuffer = vkCtx->getSwapchainFramebuffers()[currentImageIndex]; + rpInfo.renderArea.offset = {0, 0}; + rpInfo.renderArea.extent = vkCtx->getSwapchainExtent(); + + bool msaaOn = (vkCtx->getMsaaSamples() > VK_SAMPLE_COUNT_1_BIT); + VkClearValue clearValues[4]{}; + clearValues[0].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; + clearValues[1].depthStencil = {1.0f, 0}; + clearValues[2].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; + clearValues[3].depthStencil = {1.0f, 0}; + rpInfo.clearValueCount = msaaOn ? (vkCtx->getDepthResolveImageView() ? 4u : 3u) : 2u; + rpInfo.pClearValues = clearValues; + + vkCmdBeginRenderPass(currentCmd, &rpInfo, VK_SUBPASS_CONTENTS_INLINE); + + VkExtent2D ext = vkCtx->getSwapchainExtent(); + VkViewport vp{}; + vp.width = static_cast(ext.width); + vp.height = static_cast(ext.height); + vp.maxDepth = 1.0f; + vkCmdSetViewport(currentCmd, 0, 1, &vp); + VkRect2D sc{}; + sc.extent = ext; + vkCmdSetScissor(currentCmd, 0, 1, &sc); + + // Draw RCAS sharpening from accumulated history buffer + renderFSR2Sharpen(); + + // Store current VP for next frame's motion vectors, advance frame + fsr2_.prevViewProjection = camera->getUnjitteredViewProjectionMatrix(); + fsr2_.prevJitter = camera->getJitter(); + camera->clearJitter(); + fsr2_.currentHistory = 1 - fsr2_.currentHistory; + fsr2_.frameIndex++; + + } else if (fsr_.enabled && fsr_.sceneFramebuffer) { // End the off-screen scene render pass vkCmdEndRenderPass(currentCmd); @@ -1149,7 +1232,7 @@ void Renderer::endFrame() { } // ImGui rendering — must respect subpass contents mode - if (!fsr_.enabled && parallelRecordingEnabled_) { + if (!fsr_.enabled && !fsr2_.enabled && parallelRecordingEnabled_) { // Scene pass was begun with VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, // so ImGui must be recorded into a secondary command buffer. VkCommandBuffer imguiCmd = beginSecondary(SEC_IMGUI); @@ -3572,19 +3655,576 @@ void Renderer::setFSREnabled(bool enabled) { void Renderer::setFSRQuality(float scaleFactor) { scaleFactor = glm::clamp(scaleFactor, 0.5f, 1.0f); - if (fsr_.scaleFactor == scaleFactor) return; fsr_.scaleFactor = scaleFactor; + fsr2_.scaleFactor = scaleFactor; // Don't destroy/recreate mid-frame — mark for lazy recreation in next beginFrame() if (fsr_.enabled && fsr_.sceneFramebuffer) { fsr_.needsRecreate = true; } + if (fsr2_.enabled && fsr2_.sceneFramebuffer) { + fsr2_.needsRecreate = true; + fsr2_.needsHistoryReset = true; + } } void Renderer::setFSRSharpness(float sharpness) { fsr_.sharpness = glm::clamp(sharpness, 0.0f, 2.0f); + fsr2_.sharpness = glm::clamp(sharpness, 0.0f, 2.0f); } -// ========================= End FSR ========================= +// ========================= End FSR 1.0 ========================= + +// ========================= FSR 2.2 Temporal Upscaling ========================= + +float Renderer::halton(uint32_t index, uint32_t base) { + float f = 1.0f; + float r = 0.0f; + uint32_t current = index; + while (current > 0) { + f /= static_cast(base); + r += f * static_cast(current % base); + current /= base; + } + return r; +} + +bool Renderer::initFSR2Resources() { + if (!vkCtx) return false; + + VkDevice device = vkCtx->getDevice(); + VmaAllocator alloc = vkCtx->getAllocator(); + VkExtent2D swapExtent = vkCtx->getSwapchainExtent(); + + fsr2_.internalWidth = static_cast(swapExtent.width * fsr2_.scaleFactor); + fsr2_.internalHeight = static_cast(swapExtent.height * fsr2_.scaleFactor); + fsr2_.internalWidth = (fsr2_.internalWidth + 1) & ~1u; + fsr2_.internalHeight = (fsr2_.internalHeight + 1) & ~1u; + + LOG_INFO("FSR2: initializing at ", fsr2_.internalWidth, "x", fsr2_.internalHeight, + " -> ", swapExtent.width, "x", swapExtent.height, + " (scale=", fsr2_.scaleFactor, ")"); + + VkFormat colorFmt = vkCtx->getSwapchainFormat(); + VkFormat depthFmt = vkCtx->getDepthFormat(); + + // Scene color (internal resolution, 1x — FSR2 replaces MSAA) + fsr2_.sceneColor = createImage(device, alloc, fsr2_.internalWidth, fsr2_.internalHeight, + colorFmt, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!fsr2_.sceneColor.image) { LOG_ERROR("FSR2: failed to create scene color"); return false; } + + // Scene depth (internal resolution, 1x, sampled for motion vectors) + fsr2_.sceneDepth = createImage(device, alloc, fsr2_.internalWidth, fsr2_.internalHeight, + depthFmt, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!fsr2_.sceneDepth.image) { LOG_ERROR("FSR2: failed to create scene depth"); destroyFSR2Resources(); return false; } + + // Motion vector buffer (internal resolution) + fsr2_.motionVectors = createImage(device, alloc, fsr2_.internalWidth, fsr2_.internalHeight, + VK_FORMAT_R16G16_SFLOAT, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!fsr2_.motionVectors.image) { LOG_ERROR("FSR2: failed to create motion vectors"); destroyFSR2Resources(); return false; } + + // History buffers (display resolution, ping-pong) + for (int i = 0; i < 2; i++) { + fsr2_.history[i] = createImage(device, alloc, swapExtent.width, swapExtent.height, + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!fsr2_.history[i].image) { LOG_ERROR("FSR2: failed to create history buffer ", i); destroyFSR2Resources(); return false; } + } + + // Scene framebuffer (non-MSAA: [color, depth]) + // Must use the same render pass as the swapchain — which must be non-MSAA when FSR2 is active + VkImageView fbAttachments[2] = { fsr2_.sceneColor.imageView, fsr2_.sceneDepth.imageView }; + VkFramebufferCreateInfo fbInfo{}; + fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fbInfo.renderPass = vkCtx->getImGuiRenderPass(); + fbInfo.attachmentCount = 2; + fbInfo.pAttachments = fbAttachments; + fbInfo.width = fsr2_.internalWidth; + fbInfo.height = fsr2_.internalHeight; + fbInfo.layers = 1; + if (vkCreateFramebuffer(device, &fbInfo, nullptr, &fsr2_.sceneFramebuffer) != VK_SUCCESS) { + LOG_ERROR("FSR2: failed to create scene framebuffer"); + destroyFSR2Resources(); + return false; + } + + // Samplers + VkSamplerCreateInfo samplerInfo{}; + samplerInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + samplerInfo.minFilter = VK_FILTER_LINEAR; + samplerInfo.magFilter = VK_FILTER_LINEAR; + samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + vkCreateSampler(device, &samplerInfo, nullptr, &fsr2_.linearSampler); + + samplerInfo.minFilter = VK_FILTER_NEAREST; + samplerInfo.magFilter = VK_FILTER_NEAREST; + vkCreateSampler(device, &samplerInfo, nullptr, &fsr2_.nearestSampler); + + // --- Motion Vector Compute Pipeline --- + { + // Descriptor set layout: binding 0 = depth (sampler), binding 1 = motion vectors (storage image) + VkDescriptorSetLayoutBinding bindings[2] = {}; + bindings[0].binding = 0; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[0].descriptorCount = 1; + bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[1].binding = 1; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + bindings[1].descriptorCount = 1; + bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo layoutInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + layoutInfo.bindingCount = 2; + layoutInfo.pBindings = bindings; + vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &fsr2_.motionVecDescSetLayout); + + VkPushConstantRange pc{}; + pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + pc.offset = 0; + pc.size = 2 * sizeof(glm::mat4) + 2 * sizeof(glm::vec4); // 160 bytes + + VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &fsr2_.motionVecDescSetLayout; + plCI.pushConstantRangeCount = 1; + plCI.pPushConstantRanges = &pc; + vkCreatePipelineLayout(device, &plCI, nullptr, &fsr2_.motionVecPipelineLayout); + + VkShaderModule compMod; + if (!compMod.loadFromFile(device, "assets/shaders/fsr2_motion.comp.spv")) { + LOG_ERROR("FSR2: failed to load motion vector compute shader"); + destroyFSR2Resources(); + return false; + } + + VkComputePipelineCreateInfo cpCI{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + cpCI.stage = compMod.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT); + cpCI.layout = fsr2_.motionVecPipelineLayout; + if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &cpCI, nullptr, &fsr2_.motionVecPipeline) != VK_SUCCESS) { + LOG_ERROR("FSR2: failed to create motion vector pipeline"); + compMod.destroy(); + destroyFSR2Resources(); + return false; + } + compMod.destroy(); + + // Descriptor pool + set + VkDescriptorPoolSize poolSizes[2] = {}; + poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1}; + poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1}; + VkDescriptorPoolCreateInfo poolInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + poolInfo.maxSets = 1; + poolInfo.poolSizeCount = 2; + poolInfo.pPoolSizes = poolSizes; + vkCreateDescriptorPool(device, &poolInfo, nullptr, &fsr2_.motionVecDescPool); + + VkDescriptorSetAllocateInfo dsAI{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + dsAI.descriptorPool = fsr2_.motionVecDescPool; + dsAI.descriptorSetCount = 1; + dsAI.pSetLayouts = &fsr2_.motionVecDescSetLayout; + vkAllocateDescriptorSets(device, &dsAI, &fsr2_.motionVecDescSet); + + // Write descriptors + VkDescriptorImageInfo depthImgInfo{}; + depthImgInfo.sampler = fsr2_.nearestSampler; + depthImgInfo.imageView = fsr2_.sceneDepth.imageView; + depthImgInfo.imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + + VkDescriptorImageInfo mvImgInfo{}; + mvImgInfo.imageView = fsr2_.motionVectors.imageView; + mvImgInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + VkWriteDescriptorSet writes[2] = {}; + writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[0].dstSet = fsr2_.motionVecDescSet; + writes[0].dstBinding = 0; + writes[0].descriptorCount = 1; + writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + writes[0].pImageInfo = &depthImgInfo; + + writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[1].dstSet = fsr2_.motionVecDescSet; + writes[1].dstBinding = 1; + writes[1].descriptorCount = 1; + writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + writes[1].pImageInfo = &mvImgInfo; + + vkUpdateDescriptorSets(device, 2, writes, 0, nullptr); + } + + // --- Temporal Accumulation Compute Pipeline --- + { + // bindings: 0=sceneColor, 1=depth, 2=motionVectors, 3=historyInput, 4=historyOutput + VkDescriptorSetLayoutBinding bindings[5] = {}; + for (int i = 0; i < 4; i++) { + bindings[i].binding = i; + bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[i].descriptorCount = 1; + bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + } + bindings[4].binding = 4; + bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + bindings[4].descriptorCount = 1; + bindings[4].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo layoutInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + layoutInfo.bindingCount = 5; + layoutInfo.pBindings = bindings; + vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &fsr2_.accumulateDescSetLayout); + + VkPushConstantRange pc{}; + pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + pc.offset = 0; + pc.size = 4 * sizeof(glm::vec4); // 64 bytes + + VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &fsr2_.accumulateDescSetLayout; + plCI.pushConstantRangeCount = 1; + plCI.pPushConstantRanges = &pc; + vkCreatePipelineLayout(device, &plCI, nullptr, &fsr2_.accumulatePipelineLayout); + + VkShaderModule compMod; + if (!compMod.loadFromFile(device, "assets/shaders/fsr2_accumulate.comp.spv")) { + LOG_ERROR("FSR2: failed to load accumulation compute shader"); + destroyFSR2Resources(); + return false; + } + + VkComputePipelineCreateInfo cpCI{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + cpCI.stage = compMod.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT); + cpCI.layout = fsr2_.accumulatePipelineLayout; + if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &cpCI, nullptr, &fsr2_.accumulatePipeline) != VK_SUCCESS) { + LOG_ERROR("FSR2: failed to create accumulation pipeline"); + compMod.destroy(); + destroyFSR2Resources(); + return false; + } + compMod.destroy(); + + // Descriptor pool: 2 sets (ping-pong), each with 4 samplers + 1 storage image + VkDescriptorPoolSize poolSizes[2] = {}; + poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 8}; + poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2}; + VkDescriptorPoolCreateInfo poolInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + poolInfo.maxSets = 2; + poolInfo.poolSizeCount = 2; + poolInfo.pPoolSizes = poolSizes; + vkCreateDescriptorPool(device, &poolInfo, nullptr, &fsr2_.accumulateDescPool); + + // Allocate 2 descriptor sets (one per ping-pong direction) + VkDescriptorSetLayout layouts[2] = { fsr2_.accumulateDescSetLayout, fsr2_.accumulateDescSetLayout }; + VkDescriptorSetAllocateInfo dsAI{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + dsAI.descriptorPool = fsr2_.accumulateDescPool; + dsAI.descriptorSetCount = 2; + dsAI.pSetLayouts = layouts; + vkAllocateDescriptorSets(device, &dsAI, fsr2_.accumulateDescSets); + + // Write descriptors for both ping-pong sets + for (int pp = 0; pp < 2; pp++) { + int inputHistory = 1 - pp; // Read from the other + int outputHistory = pp; // Write to this one + + VkDescriptorImageInfo colorInfo{fsr2_.linearSampler, fsr2_.sceneColor.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo depthInfo{fsr2_.nearestSampler, fsr2_.sceneDepth.imageView, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo mvInfo{fsr2_.nearestSampler, fsr2_.motionVectors.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo histInInfo{fsr2_.linearSampler, fsr2_.history[inputHistory].imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo histOutInfo{VK_NULL_HANDLE, fsr2_.history[outputHistory].imageView, VK_IMAGE_LAYOUT_GENERAL}; + + VkWriteDescriptorSet writes[5] = {}; + for (int w = 0; w < 5; w++) { + writes[w].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[w].dstSet = fsr2_.accumulateDescSets[pp]; + writes[w].dstBinding = w; + writes[w].descriptorCount = 1; + } + writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[0].pImageInfo = &colorInfo; + writes[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[1].pImageInfo = &depthInfo; + writes[2].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[2].pImageInfo = &mvInfo; + writes[3].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[3].pImageInfo = &histInInfo; + writes[4].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; writes[4].pImageInfo = &histOutInfo; + + vkUpdateDescriptorSets(device, 5, writes, 0, nullptr); + } + } + + // --- RCAS Sharpening Pipeline (fragment shader, fullscreen pass) --- + { + VkDescriptorSetLayoutBinding binding{}; + binding.binding = 0; + binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkDescriptorSetLayoutCreateInfo layoutInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + layoutInfo.bindingCount = 1; + layoutInfo.pBindings = &binding; + vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &fsr2_.sharpenDescSetLayout); + + VkPushConstantRange pc{}; + pc.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + pc.offset = 0; + pc.size = sizeof(glm::vec4); + + VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &fsr2_.sharpenDescSetLayout; + plCI.pushConstantRangeCount = 1; + plCI.pPushConstantRanges = &pc; + vkCreatePipelineLayout(device, &plCI, nullptr, &fsr2_.sharpenPipelineLayout); + + VkShaderModule vertMod, fragMod; + if (!vertMod.loadFromFile(device, "assets/shaders/postprocess.vert.spv") || + !fragMod.loadFromFile(device, "assets/shaders/fsr2_sharpen.frag.spv")) { + LOG_ERROR("FSR2: failed to load sharpen shaders"); + destroyFSR2Resources(); + return false; + } + + fsr2_.sharpenPipeline = PipelineBuilder() + .setShaders(vertMod.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), + fragMod.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) + .setVertexInput({}, {}) + .setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) + .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE) + .setNoDepthTest() + .setColorBlendAttachment(PipelineBuilder::blendDisabled()) + .setMultisample(VK_SAMPLE_COUNT_1_BIT) + .setLayout(fsr2_.sharpenPipelineLayout) + .setRenderPass(vkCtx->getImGuiRenderPass()) + .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}) + .build(device); + + vertMod.destroy(); + fragMod.destroy(); + + if (!fsr2_.sharpenPipeline) { + LOG_ERROR("FSR2: failed to create sharpen pipeline"); + destroyFSR2Resources(); + return false; + } + + // Descriptor pool + set for sharpen pass (reads from history output) + VkDescriptorPoolSize poolSize{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2}; + VkDescriptorPoolCreateInfo poolInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + poolInfo.maxSets = 1; + poolInfo.poolSizeCount = 1; + poolInfo.pPoolSizes = &poolSize; + vkCreateDescriptorPool(device, &poolInfo, nullptr, &fsr2_.sharpenDescPool); + + VkDescriptorSetAllocateInfo dsAI{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + dsAI.descriptorPool = fsr2_.sharpenDescPool; + dsAI.descriptorSetCount = 1; + dsAI.pSetLayouts = &fsr2_.sharpenDescSetLayout; + vkAllocateDescriptorSets(device, &dsAI, &fsr2_.sharpenDescSet); + // Descriptor updated dynamically each frame to point at the correct history buffer + } + + fsr2_.needsHistoryReset = true; + fsr2_.frameIndex = 0; + LOG_INFO("FSR2: initialized successfully"); + return true; +} + +void Renderer::destroyFSR2Resources() { + if (!vkCtx) return; + VkDevice device = vkCtx->getDevice(); + VmaAllocator alloc = vkCtx->getAllocator(); + + vkDeviceWaitIdle(device); + + if (fsr2_.sharpenPipeline) { vkDestroyPipeline(device, fsr2_.sharpenPipeline, nullptr); fsr2_.sharpenPipeline = VK_NULL_HANDLE; } + if (fsr2_.sharpenPipelineLayout) { vkDestroyPipelineLayout(device, fsr2_.sharpenPipelineLayout, nullptr); fsr2_.sharpenPipelineLayout = VK_NULL_HANDLE; } + if (fsr2_.sharpenDescPool) { vkDestroyDescriptorPool(device, fsr2_.sharpenDescPool, nullptr); fsr2_.sharpenDescPool = VK_NULL_HANDLE; fsr2_.sharpenDescSet = VK_NULL_HANDLE; } + if (fsr2_.sharpenDescSetLayout) { vkDestroyDescriptorSetLayout(device, fsr2_.sharpenDescSetLayout, nullptr); fsr2_.sharpenDescSetLayout = VK_NULL_HANDLE; } + + if (fsr2_.accumulatePipeline) { vkDestroyPipeline(device, fsr2_.accumulatePipeline, nullptr); fsr2_.accumulatePipeline = VK_NULL_HANDLE; } + if (fsr2_.accumulatePipelineLayout) { vkDestroyPipelineLayout(device, fsr2_.accumulatePipelineLayout, nullptr); fsr2_.accumulatePipelineLayout = VK_NULL_HANDLE; } + if (fsr2_.accumulateDescPool) { vkDestroyDescriptorPool(device, fsr2_.accumulateDescPool, nullptr); fsr2_.accumulateDescPool = VK_NULL_HANDLE; fsr2_.accumulateDescSets[0] = fsr2_.accumulateDescSets[1] = VK_NULL_HANDLE; } + if (fsr2_.accumulateDescSetLayout) { vkDestroyDescriptorSetLayout(device, fsr2_.accumulateDescSetLayout, nullptr); fsr2_.accumulateDescSetLayout = VK_NULL_HANDLE; } + + if (fsr2_.motionVecPipeline) { vkDestroyPipeline(device, fsr2_.motionVecPipeline, nullptr); fsr2_.motionVecPipeline = VK_NULL_HANDLE; } + if (fsr2_.motionVecPipelineLayout) { vkDestroyPipelineLayout(device, fsr2_.motionVecPipelineLayout, nullptr); fsr2_.motionVecPipelineLayout = VK_NULL_HANDLE; } + if (fsr2_.motionVecDescPool) { vkDestroyDescriptorPool(device, fsr2_.motionVecDescPool, nullptr); fsr2_.motionVecDescPool = VK_NULL_HANDLE; fsr2_.motionVecDescSet = VK_NULL_HANDLE; } + if (fsr2_.motionVecDescSetLayout) { vkDestroyDescriptorSetLayout(device, fsr2_.motionVecDescSetLayout, nullptr); fsr2_.motionVecDescSetLayout = VK_NULL_HANDLE; } + + if (fsr2_.sceneFramebuffer) { vkDestroyFramebuffer(device, fsr2_.sceneFramebuffer, nullptr); fsr2_.sceneFramebuffer = VK_NULL_HANDLE; } + if (fsr2_.linearSampler) { vkDestroySampler(device, fsr2_.linearSampler, nullptr); fsr2_.linearSampler = VK_NULL_HANDLE; } + if (fsr2_.nearestSampler) { vkDestroySampler(device, fsr2_.nearestSampler, nullptr); fsr2_.nearestSampler = VK_NULL_HANDLE; } + + destroyImage(device, alloc, fsr2_.motionVectors); + for (int i = 0; i < 2; i++) destroyImage(device, alloc, fsr2_.history[i]); + destroyImage(device, alloc, fsr2_.sceneDepth); + destroyImage(device, alloc, fsr2_.sceneColor); + + fsr2_.internalWidth = 0; + fsr2_.internalHeight = 0; +} + +void Renderer::dispatchMotionVectors() { + if (!fsr2_.motionVecPipeline || currentCmd == VK_NULL_HANDLE) return; + + // Transition depth: DEPTH_STENCIL_ATTACHMENT → DEPTH_STENCIL_READ_ONLY + transitionImageLayout(currentCmd, fsr2_.sceneDepth.image, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + // Transition motion vectors: UNDEFINED → GENERAL + transitionImageLayout(currentCmd, fsr2_.motionVectors.image, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.motionVecPipeline); + vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, + fsr2_.motionVecPipelineLayout, 0, 1, &fsr2_.motionVecDescSet, 0, nullptr); + + // Push constants: invViewProj, prevViewProj, resolution, jitterOffset + struct { + glm::mat4 invViewProj; + glm::mat4 prevViewProj; + glm::vec4 resolution; + glm::vec4 jitterOffset; + } pc; + + glm::mat4 currentVP = camera->getProjectionMatrix() * camera->getViewMatrix(); + pc.invViewProj = glm::inverse(currentVP); + pc.prevViewProj = fsr2_.prevViewProjection; + pc.resolution = glm::vec4( + static_cast(fsr2_.internalWidth), + static_cast(fsr2_.internalHeight), + 1.0f / fsr2_.internalWidth, + 1.0f / fsr2_.internalHeight); + glm::vec2 jitter = camera->getJitter(); + pc.jitterOffset = glm::vec4(jitter.x, jitter.y, fsr2_.prevJitter.x, fsr2_.prevJitter.y); + + vkCmdPushConstants(currentCmd, fsr2_.motionVecPipelineLayout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc); + + uint32_t gx = (fsr2_.internalWidth + 7) / 8; + uint32_t gy = (fsr2_.internalHeight + 7) / 8; + vkCmdDispatch(currentCmd, gx, gy, 1); + + // Transition motion vectors: GENERAL → SHADER_READ_ONLY for accumulation + transitionImageLayout(currentCmd, fsr2_.motionVectors.image, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); +} + +void Renderer::dispatchTemporalAccumulate() { + if (!fsr2_.accumulatePipeline || currentCmd == VK_NULL_HANDLE) return; + + VkExtent2D swapExtent = vkCtx->getSwapchainExtent(); + uint32_t outputIdx = fsr2_.currentHistory; + uint32_t inputIdx = 1 - outputIdx; + + // Transition scene color: PRESENT_SRC_KHR → SHADER_READ_ONLY + transitionImageLayout(currentCmd, fsr2_.sceneColor.image, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + // Transition history input: GENERAL/UNDEFINED → SHADER_READ_ONLY + transitionImageLayout(currentCmd, fsr2_.history[inputIdx].image, + fsr2_.needsHistoryReset ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + // Transition history output: UNDEFINED → GENERAL + transitionImageLayout(currentCmd, fsr2_.history[outputIdx].image, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.accumulatePipeline); + vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, + fsr2_.accumulatePipelineLayout, 0, 1, &fsr2_.accumulateDescSets[outputIdx], 0, nullptr); + + // Push constants + struct { + glm::vec4 internalSize; + glm::vec4 displaySize; + glm::vec4 jitterOffset; + glm::vec4 params; + } pc; + + pc.internalSize = glm::vec4( + static_cast(fsr2_.internalWidth), static_cast(fsr2_.internalHeight), + 1.0f / fsr2_.internalWidth, 1.0f / fsr2_.internalHeight); + pc.displaySize = glm::vec4( + static_cast(swapExtent.width), static_cast(swapExtent.height), + 1.0f / swapExtent.width, 1.0f / swapExtent.height); + glm::vec2 jitter = camera->getJitter(); + pc.jitterOffset = glm::vec4(jitter.x, jitter.y, 0.0f, 0.0f); + pc.params = glm::vec4(fsr2_.needsHistoryReset ? 1.0f : 0.0f, fsr2_.sharpness, 0.0f, 0.0f); + + vkCmdPushConstants(currentCmd, fsr2_.accumulatePipelineLayout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc); + + uint32_t gx = (swapExtent.width + 7) / 8; + uint32_t gy = (swapExtent.height + 7) / 8; + vkCmdDispatch(currentCmd, gx, gy, 1); + + fsr2_.needsHistoryReset = false; +} + +void Renderer::renderFSR2Sharpen() { + if (!fsr2_.sharpenPipeline || currentCmd == VK_NULL_HANDLE) return; + + VkExtent2D ext = vkCtx->getSwapchainExtent(); + uint32_t outputIdx = fsr2_.currentHistory; + + // Update sharpen descriptor to point at current history output + VkDescriptorImageInfo imgInfo{}; + imgInfo.sampler = fsr2_.linearSampler; + imgInfo.imageView = fsr2_.history[outputIdx].imageView; + imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + write.dstSet = fsr2_.sharpenDescSet; + write.dstBinding = 0; + write.descriptorCount = 1; + write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write.pImageInfo = &imgInfo; + vkUpdateDescriptorSets(vkCtx->getDevice(), 1, &write, 0, nullptr); + + vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, fsr2_.sharpenPipeline); + vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + fsr2_.sharpenPipelineLayout, 0, 1, &fsr2_.sharpenDescSet, 0, nullptr); + + glm::vec4 params(1.0f / ext.width, 1.0f / ext.height, fsr2_.sharpness, 0.0f); + vkCmdPushConstants(currentCmd, fsr2_.sharpenPipelineLayout, + VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(glm::vec4), ¶ms); + + vkCmdDraw(currentCmd, 3, 1, 0, 0); +} + +void Renderer::setFSR2Enabled(bool enabled) { + if (fsr2_.enabled == enabled) return; + fsr2_.enabled = enabled; + + if (enabled) { + // FSR2 replaces both FSR1 and MSAA + if (fsr_.enabled) { + fsr_.enabled = false; + fsr_.needsRecreate = true; + } + // Use FSR1's scale factor and sharpness as defaults + fsr2_.scaleFactor = fsr_.scaleFactor; + fsr2_.sharpness = fsr_.sharpness; + fsr2_.needsHistoryReset = true; + } else { + fsr2_.needsRecreate = true; + if (camera) camera->clearJitter(); + } +} + +// ========================= End FSR 2.2 ========================= void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) { (void)world; diff --git a/src/ui/game_screen.cpp b/src/ui/game_screen.cpp index 8b79cd4c..96800895 100644 --- a/src/ui/game_screen.cpp +++ b/src/ui/game_screen.cpp @@ -6290,13 +6290,21 @@ void GameScreen::renderSettingsWindow() { saveSettings(); } } - // FSR 1.0 Upscaling + // FSR Upscaling { - if (ImGui::Checkbox("FSR Upscaling (Experimental)", &pendingFSR)) { - if (renderer) renderer->setFSREnabled(pendingFSR); + // FSR mode selection: Off, FSR 1.0 (Spatial), FSR 2.2 (Temporal) + const char* fsrModeLabels[] = { "Off", "FSR 1.0 (Spatial)", "FSR 2.2 (Temporal)" }; + int fsrMode = pendingFSR ? 1 : 0; + if (renderer && renderer->isFSR2Enabled()) fsrMode = 2; + if (ImGui::Combo("Upscaling", &fsrMode, fsrModeLabels, 3)) { + pendingFSR = (fsrMode == 1); + if (renderer) { + renderer->setFSREnabled(fsrMode == 1); + renderer->setFSR2Enabled(fsrMode == 2); + } saveSettings(); } - if (pendingFSR) { + if (fsrMode > 0) { const char* fsrQualityLabels[] = { "Ultra Quality (77%)", "Quality (67%)", "Balanced (59%)", "Performance (50%)" }; static const float fsrScaleFactors[] = { 0.77f, 0.67f, 0.59f, 0.50f }; if (ImGui::Combo("FSR Quality", &pendingFSRQuality, fsrQualityLabels, 4)) { From e94eb7f2d1a59fa5cc11e48658a7102ddb68f2a2 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sun, 8 Mar 2026 01:22:15 -0800 Subject: [PATCH 13/13] FSR2 temporal upscaling fixes: unjittered reprojection, sharpen Y-flip, MSAA guard, descriptor double-buffering - Motion vectors: single unjittered reprojection matrix (80 bytes) instead of two jittered matrices (160 bytes), eliminating numerical instability from jitter amplification through large world coordinates - Sharpen pass: fix Y-flip for correct UV sampling, double-buffer descriptor sets to avoid race with in-flight command buffers - MSAA: auto-disable when FSR2 enabled, grey out AA setting in UI - Accumulation: variance-based neighborhood clamping in YCoCg space, correct history layout transitions - Frame index: wrap at 256 for stable Halton sequence --- assets/shaders/fsr2_accumulate.comp.glsl | 84 ++++++++--------------- assets/shaders/fsr2_accumulate.comp.spv | Bin 8436 -> 10592 bytes assets/shaders/fsr2_motion.comp.glsl | 21 ++---- assets/shaders/fsr2_motion.comp.spv | Bin 3668 -> 3096 bytes assets/shaders/fsr2_sharpen.frag.glsl | 14 ++-- assets/shaders/fsr2_sharpen.frag.spv | Bin 3984 -> 4152 bytes include/rendering/renderer.hpp | 2 +- src/rendering/renderer.cpp | 72 +++++++++++-------- src/ui/game_screen.cpp | 8 ++- 9 files changed, 95 insertions(+), 106 deletions(-) diff --git a/assets/shaders/fsr2_accumulate.comp.glsl b/assets/shaders/fsr2_accumulate.comp.glsl index a998b52c..7fb0cb27 100644 --- a/assets/shaders/fsr2_accumulate.comp.glsl +++ b/assets/shaders/fsr2_accumulate.comp.glsl @@ -2,25 +2,19 @@ layout(local_size_x = 8, local_size_y = 8) in; -// Inputs (internal resolution) layout(set = 0, binding = 0) uniform sampler2D sceneColor; layout(set = 0, binding = 1) uniform sampler2D depthBuffer; layout(set = 0, binding = 2) uniform sampler2D motionVectors; - -// History (display resolution) layout(set = 0, binding = 3) uniform sampler2D historyInput; - -// Output (display resolution) layout(set = 0, binding = 4, rgba16f) uniform writeonly image2D historyOutput; layout(push_constant) uniform PushConstants { vec4 internalSize; // xy = internal resolution, zw = 1/internal vec4 displaySize; // xy = display resolution, zw = 1/display - vec4 jitterOffset; // xy = current jitter (pixel-space), zw = unused + vec4 jitterOffset; // xy = current jitter (NDC-space), zw = unused vec4 params; // x = resetHistory (1=reset), y = sharpness, zw = unused } pc; -// RGB <-> YCoCg for neighborhood clamping vec3 rgbToYCoCg(vec3 rgb) { float y = 0.25 * rgb.r + 0.5 * rgb.g + 0.25 * rgb.b; float co = 0.5 * rgb.r - 0.5 * rgb.b; @@ -40,76 +34,52 @@ void main() { ivec2 outSize = ivec2(pc.displaySize.xy); if (outPixel.x >= outSize.x || outPixel.y >= outSize.y) return; - // Output UV in display space vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw; + vec3 currentColor = texture(sceneColor, outUV).rgb; - // Map display pixel to internal resolution UV (accounting for jitter) - vec2 internalUV = outUV; - - // Sample current frame color at internal resolution - vec3 currentColor = texture(sceneColor, internalUV).rgb; - - // Sample motion vector at internal resolution - vec2 inUV = outUV; // Approximate — display maps to internal via scale - vec2 motion = texture(motionVectors, inUV).rg; - - // Reproject: where was this pixel in the previous frame's history? - vec2 historyUV = outUV - motion; - - // History reset: on teleport / camera cut, just use current frame if (pc.params.x > 0.5) { imageStore(historyOutput, outPixel, vec4(currentColor, 1.0)); return; } - // Sample reprojected history + vec2 motion = texture(motionVectors, outUV).rg; + vec2 historyUV = outUV + motion; + + float historyValid = (historyUV.x >= 0.0 && historyUV.x <= 1.0 && + historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0; + vec3 historyColor = texture(historyInput, historyUV).rgb; - // Neighborhood clamping in YCoCg space to prevent ghosting - // Sample 3x3 neighborhood from current frame + // Neighborhood clamping in YCoCg space vec2 texelSize = pc.internalSize.zw; - vec3 samples[9]; - int idx = 0; - for (int dy = -1; dy <= 1; dy++) { - for (int dx = -1; dx <= 1; dx++) { - samples[idx] = rgbToYCoCg(texture(sceneColor, internalUV + vec2(dx, dy) * texelSize).rgb); - idx++; - } - } + vec3 s0 = rgbToYCoCg(currentColor); + vec3 s1 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, 0.0)).rgb); + vec3 s2 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, 0.0)).rgb); + vec3 s3 = rgbToYCoCg(texture(sceneColor, outUV + vec2(0.0, -texelSize.y)).rgb); + vec3 s4 = rgbToYCoCg(texture(sceneColor, outUV + vec2(0.0, texelSize.y)).rgb); + vec3 s5 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, -texelSize.y)).rgb); + vec3 s6 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, -texelSize.y)).rgb); + vec3 s7 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, texelSize.y)).rgb); + vec3 s8 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, texelSize.y)).rgb); - // Compute AABB in YCoCg - vec3 boxMin = samples[0]; - vec3 boxMax = samples[0]; - for (int i = 1; i < 9; i++) { - boxMin = min(boxMin, samples[i]); - boxMax = max(boxMax, samples[i]); - } + vec3 m1 = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8; + vec3 m2 = s0*s0 + s1*s1 + s2*s2 + s3*s3 + s4*s4 + s5*s5 + s6*s6 + s7*s7 + s8*s8; + vec3 mean = m1 / 9.0; + vec3 variance = max(m2 / 9.0 - mean * mean, vec3(0.0)); + vec3 stddev = sqrt(variance); - // Slightly expand the box to reduce flickering on edges - vec3 boxCenter = (boxMin + boxMax) * 0.5; - vec3 boxExtent = (boxMax - boxMin) * 0.5; - boxMin = boxCenter - boxExtent * 1.25; - boxMax = boxCenter + boxExtent * 1.25; + float gamma = 1.5; + vec3 boxMin = mean - gamma * stddev; + vec3 boxMax = mean + gamma * stddev; - // Clamp history to the neighborhood AABB vec3 historyYCoCg = rgbToYCoCg(historyColor); vec3 clampedHistory = clamp(historyYCoCg, boxMin, boxMax); historyColor = yCoCgToRgb(clampedHistory); - // Check if history UV is valid (within [0,1]) - float historyValid = (historyUV.x >= 0.0 && historyUV.x <= 1.0 && - historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0; - - // Blend factor: use more current frame for disoccluded regions - // Luminance difference between clamped history and original → confidence float clampDist = length(historyYCoCg - clampedHistory); - float blendFactor = mix(0.05, 0.3, clamp(clampDist * 4.0, 0.0, 1.0)); - - // If history is off-screen, use current frame entirely + float blendFactor = mix(0.05, 0.30, clamp(clampDist * 2.0, 0.0, 1.0)); blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid); - // Final blend vec3 result = mix(historyColor, currentColor, blendFactor); - imageStore(historyOutput, outPixel, vec4(result, 1.0)); } diff --git a/assets/shaders/fsr2_accumulate.comp.spv b/assets/shaders/fsr2_accumulate.comp.spv index 4d31fba7cc189364681e035d9dddc1e48c3845d2..47529d75ae3b2f227713851a616bea13ce070b00 100644 GIT binary patch literal 10592 zcmZvh2b>*M6~-so>=GbA=pb!FP*9LwMFJtf5(p*~ks^ebw{JHOclWKkZ({-|BuG&d z8%Pl_f?#3+i4Bnwu!03tQ0$`E3wE*l|IfQ8xz5jF&+mTUcg{Wco-=o5vP;+GnHvl@7RU)(b0wD?PWuag+tAe#=`#QSfk!< zE@>DyHH&MG)Q2Ye^MGz5^7Xq}dt=rZ8(ef+>&!W=IfHvF893l@_SwaXr4H_)j-ptm z`?S_6gNq_&3Y|c&x9Wq8&asAN3D0D#w>tWsVuFmBmBq}?VvboC(_NeeZcVfoG?zAp zfXtg(_G)lwVc$?|QEh1M$dXpQ)^4^&<~q;17Io&F+Pt6v?3ns=F&uNW}w`(Kq zad=O$L8$|a=199SHc}fZCzQU<9DCp293LI3Eh}9y$9r*pv(2RW0|VoYHk5qbWj^on zXl<-E%p%q}Ag_1!wE|2nFF&2peO<*^pBNi!jI`&phFW7#^zPyy=}%G{5kw;SA`)IjTg0&Sd=?;f87mpx(B_msZ!j5 zTk`SoDY^b$xZesjZ3cY9gw^%i$C$Gx++15=MRCcjt>IgOdZk?YWAZ(dzK>K zxnSeX=lnXOd-h)L4Dwv4<>tCRboNTWdF2C1vkvz-h{ZaK!RC#9H^I&~?^dWj*ye39 zT2H6$DaG={oxeRZ-o$7g^X6CZ9jN@xIE*vj)hS5Leab!GUeWKjcWB97_gbtAbN)LS z-KT51{!IzTdeHK+uBh$iq#5g)a-RvWSo7ABU2fR`(4ht-{zeAZO*yh<>1~QzstexuixdI`(4ht z-{qY9J&+`c16G1v23 zV{YGDug26PW(@4SJZ2oOeiwOs=e9BDsjniB+61QlSJr(Equ1Z@sq;^(QuCP>$68Cl z?#FjR>}MHRJz_2ZyPrJfLb!VD=OVE4)c?jBuJ2lof2L>d@3olw^POV6`*5!pGkUpK zV_t`;IbZxQx+_+5kKVsaG56hqdp};Ev;}0BUxsa$Yd75o=Nqs8 zR;%K31{{_voWZe}EY8(Z?_~_vbzO1f!StNZ-dX zHSdu)a^C~4=Kds_`h6+)yT?=-9OF6t6xem_t?{nmKJI7qav#RrhpD*_aqQy( zu>05^vrnFf2Ql@iac$D9(S1Z7$Job1VApYPYcbbwAD?FQav#P#jH$T~aqQzW;Oag; zi>4m^_#9X*`Vn~?V;`RfyN-J^-ZfTJm34gq)9-w1@t&_?RF8A{Mew7EdxpM*xvw~v zUj`eaJ{dDlyaT=h)~|VaE{$=WM;P^6Z*Z}r8=Gt|}uVL!(F8g}Y)Xmd{nd>n| zea`cF{swjxrtitb4`UsD-^4b?)b&4+w5a!6@cf}td`gGOSrl8MNPi~yPm#kO}|0YuX*L?xvtQODcG;p;Ko1m%3oK3-M(U;A@=24HnOb5FkebJZA!R}dKwJ%$u>DRm*jInNi zf6^a)*$Ql5)b;!OmRj^>8*tv2ZQ*%ewu4vuvOSu5^x=6aCO`8#ps7b6b_AA!zDR_n~0tsYl(1 zfn85u)cq2$d(u~}`=x05H7|##+u!r`N8Lw&ty|r?_hVGEZok(@f}daveiYpAnS0Ye z8*Yqx#2gLw`w%g6;Kr#(%rW3+vzTMy#;HflabVw9k*5!CoO;9@5AMMtW-iz`^@uqE z?75DZ6T!x*N6b91_dH@w0vo3uF)st#pNN?cHcs8%`8N}_h(8&e_wJO${r%70oeDQb zJ?5MSw!gt&o_NeT9d3+z?Bf++bLoqDXMnv2!Ou)Q@|*=XMm_SJ4K|m)nD5F;ifxVZ(Yl%mmMQ~%(BTpS{E`2etAME`O-bg(148V<1k3555 zbLoqDi@~0W;LXG%&-rj;)FaQUz~<5y^M=5lo8ZHVN1hS5G3t@01vZzym^TV8pS|?l zy*ly8GX^(CJ@Sl$&85%tw}4SCa!#mY3;1or^Y$9>lFY75=Wr>SvHH!^W>kwj7l6$j zc`gM1zdRSA8>>I&z7}l0;IB(O>be+ijQXW%KbL^brEeNLcke5}{1kg&`j=zwbs3|+ z$p3n<+O$;jWng}aT`+ylxs*{|d^zJ4Sj>L|IQHxBXm5lY6aK5f&c8bC^-W+k<6{4B z2AfBpYh1~w7CGJm&ewP=JYVB&@O+K8!_|z7d;bowdGxu))r@Mf#yi3Oj;HThusq)R z*MWWKtLwjpQ7z`)09FfrBiI^(zYFYo)6(CXZUXbe|EJ(@L;&NBG57V1`eW`bU^RU= zGsgF|O6hT6TF6b*5O)r z!Og86`9BQir!c?s^&1!Y^+%u9fbCuIkHD`=-25MfyM8>+9|Nm}#eMxa8b9Tl<}{D7 zu6Z}3e%H8(m`{LjNo${ZW9G?!KiVegah|q?Z-J@Dyyt!RnD~2G~9HVv%cCu=(_PPpw@o^6mz%*7kff^~n1IuzOUGdAozF`|-W49`jxZ zcAk2i`xk-Dr_cS^d$q{>VsLdoJ`d`VcOURxn0n0H7wo*e*ZaZMW8VH?_oKc%_4)v? z`SeAu1HtCXdwmdGJ#rlkw%6)W+aX}{>9aP^ky_lZL&5ex_+en{5B?IcbK;pj9PAw9 z^!=N2+|kyb`AyNi@-x2)+%vx!W1R6>;QY+buJB_Lj~*NeH&1@%kAkaPoB8Ase>B+m zy#I6H>c+dCJmQZ98=v?1IJmm;?nfTI=_~d1{CK!Im#6zU7i^4r%sByUp140Jf{jy; zn0a7p34RjTIg$5eU}MxHW+sm@OEX!O=?fbp&9_D`DWzOgP&hMQ6Ip@FJ<))=`}4G50k((r@F0uW&8Z$YTx{$jy}wQDM>>bXUvYoG@%=j_1(p?j`NYJ z@N(0TiJ2JRz`yb6tr(5T_DNfNS6fF%Tl=!E-oe4H;p(PRp{rCZ7rOe2LxtXIaYMnl ziMhC9xwkaZmk($nB459W+LNd;G|;oEa!zZdb>M&v{YNgK&ju={Jh+*8B*`Y-t14#> z^h8V}D*@kI=^a2j#yM=t@LY6nrN$pDCdinDxtPVdn5Eldnv%8P%1E`dxUo_I<_S!BY38)D>ggi;Id?26goJ4;QMKnkorE!TD1?A08L%8>K6?)B((#bKCDSuf4Wtz%rrNOf?;N!ia`V|=ck z=H%`%enq(!p60$6zFJ_jvc+@_9)v6QZN%S@Yz6mmFvO3FH;NmRC&0tq>jz7P;naQ# zJ6%V#{-@DBm5rx)w`f0;+1=^*hU8i7Vj9Z{VdIExWep21pmW}##x2`eg-yBUWDmG( z9nupIo#vheKUTXr*$cb3M016{<+Ww2#qCoUxBpna)!n6{$1bhCFvc%sbjG#9^^^+b zzLUG%nzU{Q+)!b7q(qQCUx3|L7_6>aGSc7AOxl>Y&k66-S;%r?(js8*F|XhlnsQTf zoaffKmK@iRaXzHl*c0;^XD!d%-iTM&b82R4d%p61ZT&IcGnMyi>u*G?r9Csnjp=#H z$9leuwZC-U<7)Gcc-JN$uWdXk*_jUPVGg4mFWwc@jBY`8qXQn`sja^S+vlyEG6#FR z49n|x9b?SF*yh>^Ns`SAcfsBn)S}6yUwe9L)okNu)a*s!cP`_QuxEqqHS*5`lmCA6 zpABk3jJN+aXgTBU?;N!K$?HECZ4WWO^T3XkUw~iRvlRI*02^;U$M>S`*}a+#@?6N} z#k#y|(zViWUhV#ja~}3MfJB{jVDm=b-m|W?d8fnrGBhu*i*!DCM?AN#)@BaM|v9DR!KSFyZ!I=7|Rk3ZIaV)&O~U$le0&`m)6-=qjCjZU4h-AzzT?97oIY4*Z>_T*&DrjO@46Uo{-^3}-*tKaGdbIO&*p5$`@YM^ z`>xB|zUT6`@3*||`z>$#UJH9F`F*eD?S*x=@3QdQkMA+<6WKq{Ko{b(rUTaJ%!P=Y z;zf9J39&xAx{(W!g`5QUU3)R7p@_Z+=|=_-bN8ZOkI0*Q9lDfp@?PdHXSvsd^_$x> zq-}23bP%zg&unx1+<5~cA2EJQI4&PEj4gi~d3<(N5y#2jL>{>jME(Wpz7g&9GCuhY zh@59$9JMxr?Z1QkC=fE<^0Qg6)31E#o@Lu6;Q&Ba_jLdpovq`kb44{SKsl zuiu&RvDfdymQ%(f4antL&Ub^&YmEDSDO%1x?c+U&oIaly+WLGyydUx15V0Qs8?%P> z^2~h@F-G3H*B~7{^*r+*Li8(Modbk#>9X)&md|hVS??(}1qKA)x zjgcSgp>_{^98SODWe?Whg4P%HZvb0A>`#D=xhz}5Pl9hi-Hi6~Td41@@M6%*!mSOdo;$iz6-5C?uYxquJwJ0wLOPlL*(Orcp&5C&Ex%G zu6xlBWcWd}IpnvZA3}UCJdAez)z6dv8|X(6Fa6&{>sMRnKaQBovEmr}6R>Nb z?f(4~xjM7mzn>xA>kUJ3fNpRk5|FYLq5j529}>p26K3LJU{y)=F=Z*q9ewQ13T}iH6A-(s{z|u z@-e0nEFZO+!2Bf#Am-B_>#5H*a=o0VI%+lJvsUc?1Z+9iJ@!B@zXz^Y^sybjSSRPa zJ+^Zh&F+y&*v80Tm2s27@-wm;es{}7&MDwn4|DE_Z4KY2+EcNOk&m3y!19sPZ+E%K zxf9qrVebq!CdTgqmh*R8)Y%p6IDN5ia*<;lkmwq*54y?F}4LP7xo-*esA{1c3mfD-&k|8`QtZhk~r3QW6bSu z8U2xKU$C6MebCy`*M8tBGTlqLIi@^D_coMef zC+@AqU}NMbXL*-^`Ae1{`pxTaWcgUrljRZL`S!9LEZ2s3uH{yM`Ag5WetWdn@Sl?T z=ff=pFGCz-y;ijTsCOz@F6?%&b9ElpIt^@Y`N)4dn7_pQj@NHoK{tXJT8wlWIQuR)Xb1;$B__$6s30oaQmsnjL8U*0_b3v%t5awb!6mW3SC@Yn+Yk znE1|L1D5j&?wpL14sLD6`Mngs2hN3ayz%;73x5yTgLU=$d*C?4|HHDr&p5B`kYnlJ z@2c$)&-!Gv|9>?VJqZHk$K~(s zJ+S3t+$^x~9{JJiZ0rd(pT5Yo7ua00k?5fXEFZb%fbF3LiClYw&8N>jb?$PJcOP*5 z-2Cp4kG%VV?NL6)?GLW+$8QMv7m$MD(-*mp0-Gzp)<3fxPT;s;h{1kYv{F&bY+cUo-I?nh)aQ@6M zuCteBcC5i7Z1d#L{7KmI&dq$<5x)d%e182~vE_}oo_5490~?=TbN$r-O}=kC+Z{eoQB}y+oZe fz{beOm@~ooURGioCm;L03T%(o)bD=F`HcHNoe)lz diff --git a/assets/shaders/fsr2_motion.comp.glsl b/assets/shaders/fsr2_motion.comp.glsl index f4f68c2c..b0b39375 100644 --- a/assets/shaders/fsr2_motion.comp.glsl +++ b/assets/shaders/fsr2_motion.comp.glsl @@ -6,10 +6,8 @@ layout(set = 0, binding = 0) uniform sampler2D depthBuffer; layout(set = 0, binding = 1, rg16f) uniform writeonly image2D motionVectors; layout(push_constant) uniform PushConstants { - mat4 invViewProj; // Inverse of current jittered VP - mat4 prevViewProj; // Previous frame unjittered VP + mat4 reprojMatrix; // prevUnjitteredVP * inverse(currentUnjitteredVP) vec4 resolution; // xy = internal size, zw = 1/internal size - vec4 jitterOffset; // xy = current jitter (NDC), zw = previous jitter } pc; void main() { @@ -20,25 +18,18 @@ void main() { // Sample depth (Vulkan: 0 = near, 1 = far) float depth = texelFetch(depthBuffer, pixelCoord, 0).r; - // Pixel center in NDC [-1, 1] + // Pixel center in UV [0,1] and NDC [-1,1] vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw; vec2 ndc = uv * 2.0 - 1.0; - // Reconstruct world position from depth + // Clip-to-clip reprojection: current unjittered clip → previous unjittered clip vec4 clipPos = vec4(ndc, depth, 1.0); - vec4 worldPos = pc.invViewProj * clipPos; - worldPos /= worldPos.w; - - // Project into previous frame's clip space (unjittered) - vec4 prevClip = pc.prevViewProj * worldPos; + vec4 prevClip = pc.reprojMatrix * clipPos; vec2 prevNdc = prevClip.xy / prevClip.w; vec2 prevUV = prevNdc * 0.5 + 0.5; - // Remove jitter from current UV to get unjittered position - vec2 unjitteredUV = uv - pc.jitterOffset.xy * 0.5; - - // Motion = previous position - current unjittered position (in UV space) - vec2 motion = prevUV - unjitteredUV; + // Motion = previous position - current position (both unjittered, in UV space) + vec2 motion = prevUV - uv; imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0)); } diff --git a/assets/shaders/fsr2_motion.comp.spv b/assets/shaders/fsr2_motion.comp.spv index 813c4b9dbf213b51b98f167c1ceceac241558285..faa3d8362634407aa4109d6e924888330bdb0a92 100644 GIT binary patch delta 793 zcmYjPO-lk%6usm4k%<&!kG;E`8Th(l# zWmj&f`0+5N0k+Y!YVB6Vx^mvNq7(5H!YTs`=#la zp3^DcpJfd39Vf1*l1;3Z0geou$d>?aoDtpSQ5EYukNRi8>$=Q31#e2zhA()BOGwax zs05|qeEVWq8;qj?Y{HhD)*K1&8O+x%8}Ui3d%y&+f&+&z_m$Rj4F7994VDMcLf4U; zpZ%Yk!*n3#9c*EP2tozm1UNv_0_LL9sB>sQ8vR+vTn3oSPhmX|XaLl6$YK4N(x^Wv z?Hp_RGPaE-<0f!Y8+$j7HewkWDYLu ZdN+X=lNsb46D4fI4Y^4d_(91A@C#%-RnhyXj{iLAkMfK(Fy($v%*EnIfAl|Jl0f^*YLu2; z6KE9jZTZA!%(&TgXH&VQcVetv;kv5e9DzmEr0hqJ`FukDP&gr6(H_oNXQ_j>V_(869@@^1@_>AdUvzc80 diff --git a/assets/shaders/fsr2_sharpen.frag.glsl b/assets/shaders/fsr2_sharpen.frag.glsl index b4dd928b..2c649d22 100644 --- a/assets/shaders/fsr2_sharpen.frag.glsl +++ b/assets/shaders/fsr2_sharpen.frag.glsl @@ -10,16 +10,20 @@ layout(push_constant) uniform PushConstants { } pc; void main() { + // Undo the vertex shader Y flip (postprocess.vert flips for Vulkan overlay, + // but we need standard UV coords for texture sampling) + vec2 tc = vec2(TexCoord.x, 1.0 - TexCoord.y); + vec2 texelSize = pc.params.xy; float sharpness = pc.params.z; // RCAS: Robust Contrast-Adaptive Sharpening // 5-tap cross pattern - vec3 center = texture(inputImage, TexCoord).rgb; - vec3 north = texture(inputImage, TexCoord + vec2(0.0, -texelSize.y)).rgb; - vec3 south = texture(inputImage, TexCoord + vec2(0.0, texelSize.y)).rgb; - vec3 west = texture(inputImage, TexCoord + vec2(-texelSize.x, 0.0)).rgb; - vec3 east = texture(inputImage, TexCoord + vec2( texelSize.x, 0.0)).rgb; + vec3 center = texture(inputImage, tc).rgb; + vec3 north = texture(inputImage, tc + vec2(0.0, -texelSize.y)).rgb; + vec3 south = texture(inputImage, tc + vec2(0.0, texelSize.y)).rgb; + vec3 west = texture(inputImage, tc + vec2(-texelSize.x, 0.0)).rgb; + vec3 east = texture(inputImage, tc + vec2( texelSize.x, 0.0)).rgb; // Compute local contrast (min/max of neighborhood) vec3 minRGB = min(center, min(min(north, south), min(west, east))); diff --git a/assets/shaders/fsr2_sharpen.frag.spv b/assets/shaders/fsr2_sharpen.frag.spv index 99aba03a78ee43bcb0840210d120f2610bb1845b..f9d2394cd951d6e28aefe6d1a8dc337853427c6d 100644 GIT binary patch literal 4152 zcmZ9N`F2!A5XLWrL{=407I6v29mEX~K>`7BT%rk!8!ll?#xR=9#F+^MT!M&bL#Z>eN|mm-M9Nr+w`srvTRy5C!3f3k+q)r*>sc) zt)n>>dUy7&syBvKtzL7h5f^4{%|K%=$}Y%eFlwX4az)1+>>PG6aq1KRwUIT;{$$dO zY<5=Y9Vqnm6?(S~433Qr)EfszN&_S1N@-xITq_MW%KJ*j%}nCTmBEqmp){bKh_r4y zHD*zxF_>lT{On^_X<}=&S{nj3wZ-~N6Qz-z<%5(!>&TWe_K(;1Zmm}8jbf!yhj-vn z_ZchJild0;UnbAy)@sAjLuhIO6C=e`mLWn!Iv>}B@W}9e{&wB=6dA5U!T~kwJmy{ zd*G`J4zCaYOYozMPIU)=8Gd%#qUPS(%PZJl6Mt`9NoV%?D&`#H^u@drV4lsvKWNg) zZu|TR?3&#-`;?!{wY8jC_sjR?W86O|?Tp6GWOUEnr{I2Hg8L4-67HV*eF(qYZ$Zkt zr|`lQ?zbR~_gj#1?_~iF{U0d3&F;CHzMW|u=-*0IOkH#yQJQaX3a&Ix%74OHfH6Tc|HGEVcr?mbB^y;uFrFR3+5S@TgyL7dwh)k z?BzDh81+6*k7xFFOkMMthI!`9?^)3w`B#I}{A=Lmcm2N0J2BUh?(4c-Ka{_VyWncE zr|ZGqL-p74eck{zx4!88Zm{+BMeiHI>Y7j5yXRVe^u7t4_P!aO_TB@Z+WQtX_2_jg z*k0A6*L%R5F@4eNHn8>dMX&dQ)is~ASMQwu==DBu+Ux!BwATmV_L`pY2jS|m&%I!^ z==~wEn*B%b4};CEFM96-TVG%F-rvOX)|2*LK++$*?*OO0?}Vql?}AV5{Sh?v==D*s zTH5PlaC7O4ULOZrPha%<1Xx}3NqgN6*Ke<${UdZu;HT7xhZubS=-p)3pr4 z)3xk{+mGuqR)6FwgVVJ<4^P+f0zBp#t3Uc3Nqp8YHhiOr&wh<{E&gZdi~3ctTJSNj z>-F1gzb|6Os5`%sYY|_EtM%vq%j4iiVxE8V?8A)JAF=zv^AkIk$4;Q>bB;A@jA~Ky zAlUu?i1-f1LzvpDnDGa4tq1?r;MXvFoQIjiIj>{t5px)9O!_^21Fjx1NAj3!7$eV{ zV0HK1yhpKkU(VMbzGI2c`o`MtB%{8_`4(91NWQ+e!D=V4_|JR??B3~{%m0hD-^HxG z0@LrEHYW1E2kycm@B3iqsE76;Sp5Uc-i|Y>nZx(^5tg3ykKvy6c+a2YG3qhrQ?O?| zVm<>KryenZT=eUR8 zV6Hi8o&}%5)T6g=!Pe5}9-U@Xi@M)|)q;Ny-ktCtz(bht%{~1Qi}&VSWQw5L>_>FGW6FcDk5hA5DjZQdrYQ8SkFGa-(aB0pj z7Q&#!8uq(Wb2R8Be&+$PkdxkJ{j_4FDXUx)Z+&1e|@ zX3h8;W&7gV(1>l(O#e{tv6#~jXszN~HFwDBWCvruO_;&@?V5?Te7l9M9@e)-ar&dd zdU(6cYQ0UE_0&hLdt=O5$$y7t@}cihB=R#<2Rkh0V`-24oD2KO3>7WhVEoSVL#f6aBhP?hfCe-jDv?BePue zKKbaY+hZSVxL-IvdUe07Gv@4dkBq)Nqxz`jUKugyV|p?Ahy48AAm|Xa*)?>o#htFz#QWza$ zxV83*D)G}FuKLOD@;tg%wpaXM(5tidr@ZZ#d`eaTXKBt zPb~E=YF?6A&i94UF2(2jKp1T$vg(;H33GS&TJ-;gy+4$(H{UXTW@0SwW#Lwt<-H=z z8g$b>5=Or&qqb$uXyo7>KbCbl{1ah%-QM|{Ffr)1=FJff#&2tH2$Opv?u}oC(HN%v lCX7Z-(|(V&dd=hN)n?6q$XHK2etLyH*EQq&PZ^HL{s%84D--|# diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index 13f77fe2..0058fbdd 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -408,7 +408,7 @@ private: VkPipelineLayout sharpenPipelineLayout = VK_NULL_HANDLE; VkDescriptorSetLayout sharpenDescSetLayout = VK_NULL_HANDLE; VkDescriptorPool sharpenDescPool = VK_NULL_HANDLE; - VkDescriptorSet sharpenDescSet = VK_NULL_HANDLE; + VkDescriptorSet sharpenDescSets[2] = {}; // Previous frame state for motion vector reprojection glm::mat4 prevViewProjection = glm::mat4(1.0f); diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 81686219..063bae9a 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -876,6 +876,9 @@ bool Renderer::isWaterRefractionEnabled() const { void Renderer::setMsaaSamples(VkSampleCountFlagBits samples) { if (!vkCtx) return; + // FSR2 requires non-MSAA render pass — block MSAA changes while FSR2 is active + if (fsr2_.enabled && samples > VK_SAMPLE_COUNT_1_BIT) return; + // Clamp to device maximum VkSampleCountFlagBits maxSamples = vkCtx->getMaxUsableSampleCount(); if (samples > maxSamples) samples = maxSamples; @@ -1178,7 +1181,7 @@ void Renderer::endFrame() { fsr2_.prevJitter = camera->getJitter(); camera->clearJitter(); fsr2_.currentHistory = 1 - fsr2_.currentHistory; - fsr2_.frameIndex++; + fsr2_.frameIndex = (fsr2_.frameIndex + 1) % 256; // Wrap to keep Halton values well-distributed } else if (fsr_.enabled && fsr_.sceneFramebuffer) { // End the off-screen scene render pass @@ -3782,7 +3785,7 @@ bool Renderer::initFSR2Resources() { VkPushConstantRange pc{}; pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; pc.offset = 0; - pc.size = 2 * sizeof(glm::mat4) + 2 * sizeof(glm::vec4); // 160 bytes + pc.size = sizeof(glm::mat4) + sizeof(glm::vec4); // 80 bytes VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; plCI.setLayoutCount = 1; @@ -4005,20 +4008,21 @@ bool Renderer::initFSR2Resources() { return false; } - // Descriptor pool + set for sharpen pass (reads from history output) + // Descriptor pool + sets for sharpen pass (double-buffered to avoid race condition) VkDescriptorPoolSize poolSize{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2}; VkDescriptorPoolCreateInfo poolInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; - poolInfo.maxSets = 1; + poolInfo.maxSets = 2; poolInfo.poolSizeCount = 1; poolInfo.pPoolSizes = &poolSize; vkCreateDescriptorPool(device, &poolInfo, nullptr, &fsr2_.sharpenDescPool); + VkDescriptorSetLayout layouts[2] = {fsr2_.sharpenDescSetLayout, fsr2_.sharpenDescSetLayout}; VkDescriptorSetAllocateInfo dsAI{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; dsAI.descriptorPool = fsr2_.sharpenDescPool; - dsAI.descriptorSetCount = 1; - dsAI.pSetLayouts = &fsr2_.sharpenDescSetLayout; - vkAllocateDescriptorSets(device, &dsAI, &fsr2_.sharpenDescSet); - // Descriptor updated dynamically each frame to point at the correct history buffer + dsAI.descriptorSetCount = 2; + dsAI.pSetLayouts = layouts; + vkAllocateDescriptorSets(device, &dsAI, fsr2_.sharpenDescSets); + // Descriptors updated dynamically each frame to point at the correct history buffer } fsr2_.needsHistoryReset = true; @@ -4036,7 +4040,7 @@ void Renderer::destroyFSR2Resources() { if (fsr2_.sharpenPipeline) { vkDestroyPipeline(device, fsr2_.sharpenPipeline, nullptr); fsr2_.sharpenPipeline = VK_NULL_HANDLE; } if (fsr2_.sharpenPipelineLayout) { vkDestroyPipelineLayout(device, fsr2_.sharpenPipelineLayout, nullptr); fsr2_.sharpenPipelineLayout = VK_NULL_HANDLE; } - if (fsr2_.sharpenDescPool) { vkDestroyDescriptorPool(device, fsr2_.sharpenDescPool, nullptr); fsr2_.sharpenDescPool = VK_NULL_HANDLE; fsr2_.sharpenDescSet = VK_NULL_HANDLE; } + if (fsr2_.sharpenDescPool) { vkDestroyDescriptorPool(device, fsr2_.sharpenDescPool, nullptr); fsr2_.sharpenDescPool = VK_NULL_HANDLE; fsr2_.sharpenDescSets[0] = fsr2_.sharpenDescSets[1] = VK_NULL_HANDLE; } if (fsr2_.sharpenDescSetLayout) { vkDestroyDescriptorSetLayout(device, fsr2_.sharpenDescSetLayout, nullptr); fsr2_.sharpenDescSetLayout = VK_NULL_HANDLE; } if (fsr2_.accumulatePipeline) { vkDestroyPipeline(device, fsr2_.accumulatePipeline, nullptr); fsr2_.accumulatePipeline = VK_NULL_HANDLE; } @@ -4082,24 +4086,22 @@ void Renderer::dispatchMotionVectors() { vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.motionVecPipelineLayout, 0, 1, &fsr2_.motionVecDescSet, 0, nullptr); - // Push constants: invViewProj, prevViewProj, resolution, jitterOffset + // Single reprojection matrix: prevUnjitteredVP * inv(currentUnjitteredVP) + // Both matrices are unjittered — jitter only affects sub-pixel sampling, + // not motion vector computation. This avoids numerical instability from + // jitter amplification through large world coordinates. struct { - glm::mat4 invViewProj; - glm::mat4 prevViewProj; + glm::mat4 reprojMatrix; // prevUnjitteredVP * inv(currentUnjitteredVP) glm::vec4 resolution; - glm::vec4 jitterOffset; } pc; - glm::mat4 currentVP = camera->getProjectionMatrix() * camera->getViewMatrix(); - pc.invViewProj = glm::inverse(currentVP); - pc.prevViewProj = fsr2_.prevViewProjection; + glm::mat4 currentUnjitteredVP = camera->getUnjitteredViewProjectionMatrix(); + pc.reprojMatrix = fsr2_.prevViewProjection * glm::inverse(currentUnjitteredVP); pc.resolution = glm::vec4( static_cast(fsr2_.internalWidth), static_cast(fsr2_.internalHeight), 1.0f / fsr2_.internalWidth, 1.0f / fsr2_.internalHeight); - glm::vec2 jitter = camera->getJitter(); - pc.jitterOffset = glm::vec4(jitter.x, jitter.y, fsr2_.prevJitter.x, fsr2_.prevJitter.y); vkCmdPushConstants(currentCmd, fsr2_.motionVecPipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc); @@ -4128,17 +4130,24 @@ void Renderer::dispatchTemporalAccumulate() { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); - // Transition history input: GENERAL/UNDEFINED → SHADER_READ_ONLY + // History layout lifecycle: + // First frame: both in UNDEFINED + // Subsequent frames: both in SHADER_READ_ONLY (output was transitioned for sharpen, + // input was left in SHADER_READ_ONLY from its sharpen read) + VkImageLayout historyOldLayout = fsr2_.needsHistoryReset + ? VK_IMAGE_LAYOUT_UNDEFINED + : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + // Transition history input: SHADER_READ_ONLY → SHADER_READ_ONLY (barrier for sync) transitionImageLayout(currentCmd, fsr2_.history[inputIdx].image, - fsr2_.needsHistoryReset ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_GENERAL, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + historyOldLayout, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, // sharpen read in previous frame VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); - // Transition history output: UNDEFINED → GENERAL + // Transition history output: SHADER_READ_ONLY → GENERAL (for compute write) transitionImageLayout(currentCmd, fsr2_.history[outputIdx].image, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + historyOldLayout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.accumulatePipeline); @@ -4179,6 +4188,10 @@ void Renderer::renderFSR2Sharpen() { VkExtent2D ext = vkCtx->getSwapchainExtent(); uint32_t outputIdx = fsr2_.currentHistory; + // Use per-frame descriptor set to avoid race with in-flight command buffers + uint32_t frameIdx = vkCtx->getCurrentFrame(); + VkDescriptorSet descSet = fsr2_.sharpenDescSets[frameIdx]; + // Update sharpen descriptor to point at current history output VkDescriptorImageInfo imgInfo{}; imgInfo.sampler = fsr2_.linearSampler; @@ -4186,7 +4199,7 @@ void Renderer::renderFSR2Sharpen() { imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; - write.dstSet = fsr2_.sharpenDescSet; + write.dstSet = descSet; write.dstBinding = 0; write.descriptorCount = 1; write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; @@ -4195,7 +4208,7 @@ void Renderer::renderFSR2Sharpen() { vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, fsr2_.sharpenPipeline); vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, - fsr2_.sharpenPipelineLayout, 0, 1, &fsr2_.sharpenDescSet, 0, nullptr); + fsr2_.sharpenPipelineLayout, 0, 1, &descSet, 0, nullptr); glm::vec4 params(1.0f / ext.width, 1.0f / ext.height, fsr2_.sharpness, 0.0f); vkCmdPushConstants(currentCmd, fsr2_.sharpenPipelineLayout, @@ -4214,6 +4227,11 @@ void Renderer::setFSR2Enabled(bool enabled) { fsr_.enabled = false; fsr_.needsRecreate = true; } + // FSR2 requires non-MSAA render pass (its framebuffer has 2 attachments) + if (vkCtx && vkCtx->getMsaaSamples() > VK_SAMPLE_COUNT_1_BIT) { + pendingMsaaSamples_ = VK_SAMPLE_COUNT_1_BIT; + msaaChangePending_ = true; + } // Use FSR1's scale factor and sharpness as defaults fsr2_.scaleFactor = fsr_.scaleFactor; fsr2_.sharpness = fsr_.sharpness; diff --git a/src/ui/game_screen.cpp b/src/ui/game_screen.cpp index 96800895..eab00305 100644 --- a/src/ui/game_screen.cpp +++ b/src/ui/game_screen.cpp @@ -6281,7 +6281,13 @@ void GameScreen::renderSettingsWindow() { } { const char* aaLabels[] = { "Off", "2x MSAA", "4x MSAA", "8x MSAA" }; - if (ImGui::Combo("Anti-Aliasing", &pendingAntiAliasing, aaLabels, 4)) { + bool fsr2Active = renderer && renderer->isFSR2Enabled(); + if (fsr2Active) { + ImGui::BeginDisabled(); + int disabled = 0; + ImGui::Combo("Anti-Aliasing (FSR2)", &disabled, "Off (FSR2 active)\0", 1); + ImGui::EndDisabled(); + } else if (ImGui::Combo("Anti-Aliasing", &pendingAntiAliasing, aaLabels, 4)) { static const VkSampleCountFlagBits aaSamples[] = { VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_2_BIT, VK_SAMPLE_COUNT_4_BIT, VK_SAMPLE_COUNT_8_BIT