Fix main-thread hang from terrain finalization; two-pass M2 rendering; tile streaming improvements

Hang/GPU device lost fix: - M2_INSTANCES and WMO_INSTANCES finalization phases now create instances incrementally (32 per step / 4 per step) instead of all at once, eliminating the >1s main-thread stalls that caused GPU fence timeouts and device loss M2 two-pass transparent rendering: - Opaque/alpha-test batches render in pass 1, transparent/additive in pass 2 (back-to-front sorted) to fix wing transparency showing terrain instead of trees — adds hasTransparentBatches flag to skip models with no transparency Tile streaming improvements: - Sort new load queue entries nearest-first so critical tiles load before distant ones during fast taxi flight - Increase taxi load radius 6→8 tiles, unload 9→12 for better coverage Water refraction gated on FSR: - Disable water refraction when FSR is not active (bugged without upscaling) - Auto-disable refraction if FSR is turned off while refraction was on
2026-05-11 19:43:52 +00:00 · 2026-03-09 20:58:49 -07:00 · 2026-03-09 20:58:49 -07:00 · e0d47040d3
commit e0d47040d3
parent a49c013c89
6 changed files with 234 additions and 38 deletions
--- a/include/rendering/m2_renderer.hpp
+++ b/include/rendering/m2_renderer.hpp
@ -122,6 +122,7 @@ struct M2ModelGPU {
    bool isKoboldFlame = false;     // Model name matches kobold+(candle/torch/mine) (precomputed)
    bool isLavaModel = false;       // Model name contains lava/molten/magma (UV scroll fallback)
    bool hasTextureAnimation = false; // True if any batch has UV animation
+    bool hasTransparentBatches = false; // True if any batch uses alpha-blend or additive (blendMode >= 2)
    uint8_t availableLODs = 0;  // Bitmask: bit N set if any batch has submeshLevel==N

    // Particle emitter data (kept from M2Model)
--- a/include/rendering/terrain_manager.hpp
+++ b/include/rendering/terrain_manager.hpp
@ -152,9 +152,11 @@ struct FinalizingTile {
    FinalizationPhase phase = FinalizationPhase::TERRAIN;

    // Progress indices within current phase
-    size_t m2ModelIndex = 0;       // Next M2 model to upload
-    size_t wmoModelIndex = 0;      // Next WMO model to upload
-    size_t wmoDoodadIndex = 0;     // Next WMO doodad to upload
+    size_t m2ModelIndex     = 0;   // Next M2 model to upload
+    size_t m2InstanceIndex  = 0;   // Next M2 placement to instantiate
+    size_t wmoModelIndex    = 0;   // Next WMO model to upload
+    size_t wmoInstanceIndex = 0;   // Next WMO placement to instantiate
+    size_t wmoDoodadIndex   = 0;   // Next WMO doodad to upload

    // Incremental terrain upload state (splits TERRAIN phase across frames)
    bool terrainPreloaded = false;  // True after preloaded textures uploaded
--- a/src/core/application.cpp
+++ b/src/core/application.cpp
@ -1108,8 +1108,8 @@ void Application::update(float deltaTime) {
                // Taxi flights move fast (32 u/s) — load further ahead so terrain is ready
                // before the camera arrives.  Keep updates frequent to spot new tiles early.
                renderer->getTerrainManager()->setUpdateInterval(onTaxi ? 0.033f : 0.033f);
-                renderer->getTerrainManager()->setLoadRadius(onTaxi ? 6 : 4);
-                renderer->getTerrainManager()->setUnloadRadius(onTaxi ? 9 : 7);
+                renderer->getTerrainManager()->setLoadRadius(onTaxi ? 8 : 4);
+                renderer->getTerrainManager()->setUnloadRadius(onTaxi ? 12 : 7);
                renderer->getTerrainManager()->setTaxiStreamingMode(onTaxi);
                }
                lastTaxiFlight_ = onTaxi;
--- a/src/rendering/m2_renderer.cpp
+++ b/src/rendering/m2_renderer.cpp
@ -1357,6 +1357,7 @@ bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) {
            if (batch.materialIndex < model.materials.size()) {
                bgpu.blendMode = model.materials[batch.materialIndex].blendMode;
                bgpu.materialFlags = model.materials[batch.materialIndex].flags;
+                if (bgpu.blendMode >= 2) gpuModel.hasTransparentBatches = true;
            }

            // Copy LOD level from batch
@ -2349,7 +2350,11 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
        sortedVisible_.push_back({i, instance.modelId, distSq, effectiveMaxDistSq});
    }

-    // Sort by modelId to minimize vertex/index buffer rebinds
+    // Two-pass rendering: opaque/alpha-test first (depth write ON), then transparent/additive
+    // (depth write OFF, sorted back-to-front) so transparent geometry composites correctly
+    // against all opaque geometry rather than only against what was rendered before it.
+
+    // Pass 1: sort by modelId for minimum buffer rebinds (opaque batches)
    std::sort(sortedVisible_.begin(), sortedVisible_.end(),
              [](const VisibleEntry& a, const VisibleEntry& b) { return a.modelId < b.modelId; });

@ -2377,6 +2382,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
    // Start with opaque pipeline
    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, opaquePipeline_);
    currentPipeline = opaquePipeline_;
+    bool opaquePass = true; // Pass 1 = opaque, pass 2 = transparent (set below for second pass)

    for (const auto& entry : sortedVisible_) {
        if (entry.index >= instances.size()) continue;
@ -2475,6 +2481,15 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
            if (!model.isGroundDetail && batch.submeshLevel != targetLOD) continue;
            if (batch.batchOpacity < 0.01f) continue;

+            // Two-pass gate: pass 1 = opaque/cutout only, pass 2 = transparent/additive only.
+            // Alpha-test (blendMode==1) and spell effects that force-additive are handled
+            // by their effective blend mode below; gate on raw blendMode here.
+            {
+                const bool rawTransparent = (batch.blendMode >= 2) || model.isSpellEffect;
+                if (opaquePass && rawTransparent) continue;   // skip transparent in opaque pass
+                if (!opaquePass && !rawTransparent) continue; // skip opaque in transparent pass
+            }
+
            const bool koboldFlameCard = batch.colorKeyBlack && model.isKoboldFlame;
            const bool smallCardLikeBatch =
                (batch.glowSize <= 1.35f) ||
@ -2628,6 +2643,163 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
        }
    }

+    // Pass 2: transparent/additive batches — sort back-to-front by distance so
+    // overlapping transparent geometry composites in the correct painter's order.
+    opaquePass = false;
+    std::sort(sortedVisible_.begin(), sortedVisible_.end(),
+              [](const VisibleEntry& a, const VisibleEntry& b) { return a.distSq > b.distSq; });
+
+    currentModelId = UINT32_MAX;
+    currentModel = nullptr;
+    // Reset pipeline to opaque so the first transparent bind always sets explicitly
+    currentPipeline = opaquePipeline_;
+
+    for (const auto& entry : sortedVisible_) {
+        if (entry.index >= instances.size()) continue;
+        auto& instance = instances[entry.index];
+
+        // Quick skip: if model has no transparent batches at all, skip it entirely
+        if (entry.modelId != currentModelId) {
+            auto mdlIt = models.find(entry.modelId);
+            if (mdlIt == models.end()) continue;
+            if (!mdlIt->second.hasTransparentBatches && !mdlIt->second.isSpellEffect) continue;
+        }
+
+        // Reuse the same rendering logic as pass 1 (via fallthrough — the batch gate
+        // `!opaquePass && !rawTransparent → continue` handles opaque skipping)
+        if (entry.modelId != currentModelId) {
+            currentModelId = entry.modelId;
+            auto mdlIt = models.find(currentModelId);
+            if (mdlIt == models.end()) continue;
+            currentModel = &mdlIt->second;
+            if (!currentModel->vertexBuffer) continue;
+            VkDeviceSize offset = 0;
+            vkCmdBindVertexBuffers(cmd, 0, 1, &currentModel->vertexBuffer, &offset);
+            vkCmdBindIndexBuffer(cmd, currentModel->indexBuffer, 0, VK_INDEX_TYPE_UINT16);
+        }
+
+        const M2ModelGPU& model = *currentModel;
+
+        // Distance-based fade alpha (same as pass 1)
+        float fadeAlpha = 1.0f;
+        float fadeFrac = model.disableAnimation ? 0.55f : fadeStartFraction;
+        float fadeStartDistSq = entry.effectiveMaxDistSq * fadeFrac * fadeFrac;
+        if (entry.distSq > fadeStartDistSq) {
+            fadeAlpha = std::clamp((entry.effectiveMaxDistSq - entry.distSq) /
+                                  (entry.effectiveMaxDistSq - fadeStartDistSq), 0.0f, 1.0f);
+        }
+        float instanceFadeAlpha = fadeAlpha;
+        if (model.isGroundDetail) instanceFadeAlpha *= 0.82f;
+        if (model.isInstancePortal) instanceFadeAlpha *= 0.12f;
+
+        bool modelNeedsAnimation = model.hasAnimation && !model.disableAnimation;
+        if (modelNeedsAnimation && instance.boneMatrices.empty()) continue;
+        bool needsBones = modelNeedsAnimation && !instance.boneMatrices.empty();
+        if (needsBones && (!instance.boneBuffer[frameIndex] || !instance.boneSet[frameIndex])) continue;
+        bool useBones = needsBones;
+        if (useBones && instance.boneSet[frameIndex]) {
+            vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                    pipelineLayout_, 2, 1, &instance.boneSet[frameIndex], 0, nullptr);
+        }
+
+        uint16_t desiredLOD = 0;
+        if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3;
+        else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2;
+        else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1;
+        uint16_t targetLOD = desiredLOD;
+        if (desiredLOD > 0 && !(model.availableLODs & (1u << desiredLOD))) targetLOD = 0;
+
+        const bool foliageLikeModel = model.isFoliageLike;
+        const bool particleDominantEffect = model.isSpellEffect &&
+            !model.particleEmitters.empty() && model.batches.size() <= 2;
+
+        for (const auto& batch : model.batches) {
+            if (batch.indexCount == 0) continue;
+            if (!model.isGroundDetail && batch.submeshLevel != targetLOD) continue;
+            if (batch.batchOpacity < 0.01f) continue;
+
+            // Pass 2 gate: only transparent/additive batches
+            {
+                const bool rawTransparent = (batch.blendMode >= 2) || model.isSpellEffect;
+                if (!rawTransparent) continue;
+            }
+
+            // Skip glow sprites (handled after loop)
+            const bool batchUnlit = (batch.materialFlags & 0x01) != 0;
+            const bool shouldUseGlowSprite =
+                !batch.colorKeyBlack &&
+                (model.isElvenLike || model.isLanternLike) &&
+                !model.isSpellEffect &&
+                (batch.glowSize <= 1.35f || (batch.lanternGlowHint && batch.glowSize <= 6.0f)) &&
+                (batch.lanternGlowHint || (batch.blendMode >= 3) ||
+                 (batch.colorKeyBlack && batchUnlit && batch.blendMode >= 1));
+            if (shouldUseGlowSprite) {
+                const bool cardLikeSkipMesh = (batch.blendMode >= 3) || batch.colorKeyBlack || batchUnlit;
+                if ((batch.glowCardLike && model.isLanternLike) || (cardLikeSkipMesh && !model.isLanternLike))
+                    continue;
+            }
+
+            glm::vec2 uvOffset(0.0f, 0.0f);
+            if (batch.textureAnimIndex != 0xFFFF && model.hasTextureAnimation) {
+                uint16_t lookupIdx = batch.textureAnimIndex;
+                if (lookupIdx < model.textureTransformLookup.size()) {
+                    uint16_t transformIdx = model.textureTransformLookup[lookupIdx];
+                    if (transformIdx < model.textureTransforms.size()) {
+                        const auto& tt = model.textureTransforms[transformIdx];
+                        glm::vec3 trans = interpVec3(tt.translation,
+                            instance.currentSequenceIndex, instance.animTime,
+                            glm::vec3(0.0f), model.globalSequenceDurations);
+                        uvOffset = glm::vec2(trans.x, trans.y);
+                    }
+                }
+            }
+            if (model.isLavaModel && uvOffset == glm::vec2(0.0f)) {
+                static auto startTime2 = std::chrono::steady_clock::now();
+                float t = std::chrono::duration<float>(std::chrono::steady_clock::now() - startTime2).count();
+                uvOffset = glm::vec2(t * 0.03f, -t * 0.08f);
+            }
+
+            uint8_t effectiveBlendMode = batch.blendMode;
+            if (model.isSpellEffect) {
+                if (effectiveBlendMode <= 1) effectiveBlendMode = 3;
+                else if (effectiveBlendMode == 4 || effectiveBlendMode == 5) effectiveBlendMode = 3;
+            }
+
+            VkPipeline desiredPipeline;
+            switch (effectiveBlendMode) {
+                case 2: desiredPipeline = alphaPipeline_; break;
+                default: desiredPipeline = additivePipeline_; break;
+            }
+            if (desiredPipeline != currentPipeline) {
+                vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, desiredPipeline);
+                currentPipeline = desiredPipeline;
+            }
+
+            if (batch.materialUBOMapped) {
+                auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
+                mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
+                if (batch.colorKeyBlack)
+                    mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
+            }
+
+            if (!batch.materialSet) continue;
+            vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                    pipelineLayout_, 1, 1, &batch.materialSet, 0, nullptr);
+
+            M2PushConstants pc;
+            pc.model = instance.modelMatrix;
+            pc.uvOffset = uvOffset;
+            pc.texCoordSet = static_cast<int>(batch.textureUnit);
+            pc.useBones = useBones ? 1 : 0;
+            pc.isFoliage = model.shadowWindFoliage ? 1 : 0;
+            pc.fadeAlpha = instanceFadeAlpha;
+            if (particleDominantEffect) continue; // emission-only mesh
+            vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc);
+            vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
+            lastDrawCallCount++;
+        }
+    }
+
    // Render glow sprites as billboarded additive point lights
    if (!glowSprites_.empty() && particleAdditivePipeline_ && glowVB_ && glowTexDescSet_) {
        vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, particleAdditivePipeline_);
--- a/src/rendering/terrain_manager.cpp
+++ b/src/rendering/terrain_manager.cpp
@ -885,13 +885,15 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
    }

    case FinalizationPhase::M2_INSTANCES: {
-        // Create all M2 instances (lightweight struct allocation, no GPU work)
-        if (m2Renderer) {
-            int loadedDoodads = 0;
-            int skippedDedup = 0;
-            for (const auto& p : pending->m2Placements) {
+        // Create M2 instances incrementally to avoid main-thread stalls.
+        // createInstance includes an O(n) bone-sibling scan that becomes expensive
+        // on dense tiles with many placements and a large existing instance list.
+        if (m2Renderer && ft.m2InstanceIndex < pending->m2Placements.size()) {
+            constexpr size_t kInstancesPerStep = 32;
+            size_t created = 0;
+            while (ft.m2InstanceIndex < pending->m2Placements.size() && created < kInstancesPerStep) {
+                const auto& p = pending->m2Placements[ft.m2InstanceIndex++];
                if (p.uniqueId != 0 && placedDoodadIds.count(p.uniqueId)) {
-                    skippedDedup++;
                    continue;
                }
                uint32_t instId = m2Renderer->createInstance(p.modelId, p.position, p.rotation, p.scale);
@ -901,12 +903,14 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
                        placedDoodadIds.insert(p.uniqueId);
                        ft.tileUniqueIds.push_back(p.uniqueId);
                    }
-                    loadedDoodads++;
+                    created++;
                }
            }
+            if (ft.m2InstanceIndex < pending->m2Placements.size()) {
+                return false; // More instances to create — yield
+            }
            LOG_DEBUG("  Loaded doodads for tile [", x, ",", y, "]: ",
-                     loadedDoodads, " instances (", ft.uploadedM2ModelIds.size(), " new models, ",
-                     skippedDedup, " dedup skipped)");
+                     ft.m2InstanceIds.size(), " instances (", ft.uploadedM2ModelIds.size(), " new models)");
        }
        ft.phase = FinalizationPhase::WMO_MODELS;
        return false;
@ -948,17 +952,15 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
    }

    case FinalizationPhase::WMO_INSTANCES: {
-        // Create all WMO instances + load WMO liquids
-        if (wmoRenderer) {
-            int loadedWMOs = 0;
-            int loadedLiquids = 0;
-            int skippedWmoDedup = 0;
-            for (auto& wmoReady : pending->wmoModels) {
+        // Create WMO instances incrementally to avoid stalls on tiles with many WMOs.
+        if (wmoRenderer && ft.wmoInstanceIndex < pending->wmoModels.size()) {
+            constexpr size_t kWmoInstancesPerStep = 4;
+            size_t created = 0;
+            while (ft.wmoInstanceIndex < pending->wmoModels.size() && created < kWmoInstancesPerStep) {
+                auto& wmoReady = pending->wmoModels[ft.wmoInstanceIndex++];
                if (wmoReady.uniqueId != 0 && placedWmoIds.count(wmoReady.uniqueId)) {
-                    skippedWmoDedup++;
                    continue;
                }
-
                uint32_t wmoInstId = wmoRenderer->createInstance(wmoReady.modelId, wmoReady.position, wmoReady.rotation);
                if (wmoInstId) {
                    ft.wmoInstanceIds.push_back(wmoInstId);
@ -966,8 +968,6 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
                        placedWmoIds.insert(wmoReady.uniqueId);
                        ft.tileWmoUniqueIds.push_back(wmoReady.uniqueId);
                    }
-                    loadedWMOs++;
-
                    // Load WMO liquids (canals, pools, etc.)
                    if (waterRenderer) {
                        glm::mat4 modelMatrix = glm::mat4(1.0f);
@ -977,25 +977,21 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
                        modelMatrix = glm::rotate(modelMatrix, wmoReady.rotation.x, glm::vec3(1.0f, 0.0f, 0.0f));
                        for (const auto& group : wmoReady.model.groups) {
                            if (!group.liquid.hasLiquid()) continue;
-                            // Skip interior water/ocean but keep magma/slime (e.g. Ironforge lava)
                            if (group.flags & 0x2000) {
                                uint16_t lt = group.liquid.materialId;
                                uint8_t basicType = (lt == 0) ? 0 : ((lt - 1) % 4);
                                if (basicType < 2) continue;
                            }
                            waterRenderer->loadFromWMO(group.liquid, modelMatrix, wmoInstId);
-                            loadedLiquids++;
                        }
                    }
+                    created++;
                }
            }
-            if (loadedWMOs > 0 || skippedWmoDedup > 0) {
-                LOG_DEBUG("  Loaded WMOs for tile [", x, ",", y, "]: ",
-                         loadedWMOs, " instances, ", skippedWmoDedup, " dedup skipped");
-            }
-            if (loadedLiquids > 0) {
-                LOG_DEBUG("  Loaded WMO liquids for tile [", x, ",", y, "]: ", loadedLiquids);
+            if (ft.wmoInstanceIndex < pending->wmoModels.size()) {
+                return false; // More WMO instances to create — yield
            }
+            LOG_DEBUG("  Loaded WMOs for tile [", x, ",", y, "]: ", ft.wmoInstanceIds.size(), " instances");
        }
        ft.phase = FinalizationPhase::WMO_DOODADS;
        return false;
@ -2213,10 +2209,16 @@ void TerrainManager::streamTiles() {
        return false;
    };

-    // Enqueue tiles in radius around current tile for async loading
+    // Enqueue tiles in radius around current tile for async loading.
+    // Collect all newly-needed tiles, then sort by distance so the closest
+    // (most visible) tiles get loaded first.  This is critical during taxi
+    // flight where new tiles enter the radius faster than they can load.
    {
        std::lock_guard<std::mutex> lock(queueMutex);

+        struct PendingEntry { TileCoord coord; int distSq; };
+        std::vector<PendingEntry> newTiles;
+
        for (int dy = -loadRadius; dy <= loadRadius; dy++) {
            for (int dx = -loadRadius; dx <= loadRadius; dx++) {
                int tileX = currentTile.x + dx;
@ -2240,10 +2242,19 @@ void TerrainManager::streamTiles() {
                if (failedTiles.find(coord) != failedTiles.end()) continue;
                if (shouldSkipMissingAdt(coord)) continue;

-                loadQueue.push_back(coord);
+                newTiles.push_back({coord, dx*dx + dy*dy});
                pendingTiles[coord] = true;
            }
        }
+
+        // Sort nearest tiles first so workers service the most visible tiles
+        std::sort(newTiles.begin(), newTiles.end(),
+                  [](const PendingEntry& a, const PendingEntry& b) { return a.distSq < b.distSq; });
+
+        // Insert at front so new close tiles preempt any distant tiles already queued
+        for (auto it = newTiles.rbegin(); it != newTiles.rend(); ++it) {
+            loadQueue.push_front(it->coord);
+        }
    }

    // Notify workers that there's work
--- a/src/ui/game_screen.cpp
+++ b/src/ui/game_screen.cpp
@ -7184,9 +7184,19 @@ void GameScreen::renderSettingsWindow() {
                        saveSettings();
                    }
                }
-                if (ImGui::Checkbox("Water Refraction", &pendingWaterRefraction)) {
-                    if (renderer) renderer->setWaterRefractionEnabled(pendingWaterRefraction);
-                    saveSettings();
+                {
+                    bool fsrActive = renderer && (renderer->isFSREnabled() || renderer->isFSR2Enabled());
+                    if (!fsrActive && pendingWaterRefraction) {
+                        // FSR was disabled while refraction was on — auto-disable
+                        pendingWaterRefraction = false;
+                        if (renderer) renderer->setWaterRefractionEnabled(false);
+                    }
+                    if (!fsrActive) ImGui::BeginDisabled();
+                    if (ImGui::Checkbox("Water Refraction (requires FSR)", &pendingWaterRefraction)) {
+                        if (renderer) renderer->setWaterRefractionEnabled(pendingWaterRefraction);
+                        saveSettings();
+                    }
+                    if (!fsrActive) ImGui::EndDisabled();
                }
                {
                    const char* aaLabels[] = { "Off", "2x MSAA", "4x MSAA", "8x MSAA" };