Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one atomic advanceFinalization call that couldn't be interrupted by the 5ms time budget. Now split into incremental batches of 16 chunks per call, allowing the time budget to yield between batches. M2 instance creation had O(N) dedup scans iterating ALL instances to check for duplicates. In cities with 5000+ doodads, this caused O(N²) total work during tile loading. Replaced with hash-based DedupKey map for O(1) lookups. Changes: - TerrainRenderer::loadTerrainIncremental: uploads N chunks per call - FinalizingTile tracks terrainChunkNext for cross-frame progress - TERRAIN phase yields after preload and after each chunk batch - M2Renderer::DedupKey hash map replaces linear scan in createInstance and createInstanceWithMatrix - Dedup map maintained through rebuildSpatialIndex and clear paths
2026-03-22 23:30:14 +00:00 · 2026-03-07 11:59:19 -08:00 · 2026-03-07 11:59:19 -08:00 · 884b72bc1c
commit 884b72bc1c
parent f9410cc4bd
6 changed files with 193 additions and 35 deletions
--- a/include/rendering/m2_renderer.hpp
+++ b/include/rendering/m2_renderer.hpp
@ -389,6 +389,25 @@ private:
    std::unordered_map<uint32_t, M2ModelGPU> models;
    std::vector<M2Instance> instances;

+    // O(1) dedup: key = (modelId, quantized x, quantized y, quantized z) → instanceId
+    struct DedupKey {
+        uint32_t modelId;
+        int32_t qx, qy, qz; // position quantized to 0.1 units
+        bool operator==(const DedupKey& o) const {
+            return modelId == o.modelId && qx == o.qx && qy == o.qy && qz == o.qz;
+        }
+    };
+    struct DedupHash {
+        size_t operator()(const DedupKey& k) const {
+            size_t h = std::hash<uint32_t>()(k.modelId);
+            h ^= std::hash<int32_t>()(k.qx) * 2654435761u;
+            h ^= std::hash<int32_t>()(k.qy) * 40503u;
+            h ^= std::hash<int32_t>()(k.qz) * 12289u;
+            return h;
+        }
+    };
+    std::unordered_map<DedupKey, uint32_t, DedupHash> instanceDedupMap_;
+
    uint32_t nextInstanceId = 1;
    uint32_t lastDrawCallCount = 0;
    size_t modelCacheLimit_ = 6000;
--- a/include/rendering/terrain_manager.hpp
+++ b/include/rendering/terrain_manager.hpp
@ -150,6 +150,11 @@ struct FinalizingTile {
    size_t wmoModelIndex = 0;      // Next WMO model to upload
    size_t wmoDoodadIndex = 0;     // Next WMO doodad to upload

+    // Incremental terrain upload state (splits TERRAIN phase across frames)
+    bool terrainPreloaded = false;  // True after preloaded textures uploaded
+    int terrainChunkNext = 0;       // Next chunk index to upload (0-255, row-major)
+    bool terrainMeshDone = false;   // True when all chunks uploaded
+
    // Accumulated results (built up across phases)
    std::vector<uint32_t> m2InstanceIds;
    std::vector<uint32_t> wmoInstanceIds;
--- a/include/rendering/terrain_renderer.hpp
+++ b/include/rendering/terrain_renderer.hpp
@ -86,6 +86,13 @@ public:
                     const std::vector<std::string>& texturePaths,
                     int tileX = -1, int tileY = -1);

+    /// Upload a batch of terrain chunks incrementally. Returns true when all chunks done.
+    /// chunkIndex is updated to the next chunk to process (0-255 row-major).
+    bool loadTerrainIncremental(const pipeline::TerrainMesh& mesh,
+                                const std::vector<std::string>& texturePaths,
+                                int tileX, int tileY,
+                                int& chunkIndex, int maxChunksPerCall = 16);
+
    void removeTile(int tileX, int tileY);

    void uploadPreloadedTextures(const std::unordered_map<std::string, pipeline::BLPImage>& textures);
--- a/src/rendering/m2_renderer.cpp
+++ b/src/rendering/m2_renderer.cpp
@ -678,6 +678,7 @@ void M2Renderer::shutdown() {
    instances.clear();
    spatialGrid.clear();
    instanceIndexById.clear();
+    instanceDedupMap_.clear();

    // Delete cached textures
    textureCache.clear();
@ -1613,17 +1614,16 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
    }
    const auto& mdlRef = modelIt->second;

-    // Ground clutter is procedurally scattered and high-count; avoid O(N) dedup
-    // scans that can hitch when new tiles stream in.
+    // Deduplicate: skip if same model already at nearly the same position.
+    // Uses hash map for O(1) lookup instead of O(N) scan.
    if (!mdlRef.isGroundDetail) {
-        // Deduplicate: skip if same model already at nearly the same position
-        for (const auto& existing : instances) {
-            if (existing.modelId == modelId) {
-                glm::vec3 d = existing.position - position;
-                if (glm::dot(d, d) < 0.01f) {
-                    return existing.id;
-                }
-            }
+        DedupKey dk{modelId,
+                    static_cast<int32_t>(std::round(position.x * 10.0f)),
+                    static_cast<int32_t>(std::round(position.y * 10.0f)),
+                    static_cast<int32_t>(std::round(position.z * 10.0f))};
+        auto dit = instanceDedupMap_.find(dk);
+        if (dit != instanceDedupMap_.end()) {
+            return dit->second;
        }
    }

@ -1662,6 +1662,15 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
        instance.variationTimer = 3000.0f + static_cast<float>(rand() % 8000);
    }

+    // Register in dedup map before pushing (uses original position, not ground-adjusted)
+    if (!mdlRef.isGroundDetail) {
+        DedupKey dk{modelId,
+                    static_cast<int32_t>(std::round(position.x * 10.0f)),
+                    static_cast<int32_t>(std::round(position.y * 10.0f)),
+                    static_cast<int32_t>(std::round(position.z * 10.0f))};
+        instanceDedupMap_[dk] = instance.id;
+    }
+
    instances.push_back(instance);
    size_t idx = instances.size() - 1;
    // Track special instances for fast-path iteration
@ -1700,13 +1709,15 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
        return 0;
    }

-    // Deduplicate: skip if same model already at nearly the same position
-    for (const auto& existing : instances) {
-        if (existing.modelId == modelId) {
-            glm::vec3 d = existing.position - position;
-            if (glm::dot(d, d) < 0.01f) {
-                return existing.id;
-            }
+    // Deduplicate: O(1) hash lookup
+    {
+        DedupKey dk{modelId,
+                    static_cast<int32_t>(std::round(position.x * 10.0f)),
+                    static_cast<int32_t>(std::round(position.y * 10.0f)),
+                    static_cast<int32_t>(std::round(position.z * 10.0f))};
+        auto dit = instanceDedupMap_.find(dk);
+        if (dit != instanceDedupMap_.end()) {
+            return dit->second;
        }
    }

@ -1743,6 +1754,15 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
        instance.animTime = static_cast<float>(rand()) / RAND_MAX * 10000.0f;
    }

+    // Register in dedup map
+    {
+        DedupKey dk{modelId,
+                    static_cast<int32_t>(std::round(position.x * 10.0f)),
+                    static_cast<int32_t>(std::round(position.y * 10.0f)),
+                    static_cast<int32_t>(std::round(position.z * 10.0f))};
+        instanceDedupMap_[dk] = instance.id;
+    }
+
    instances.push_back(instance);
    size_t idx = instances.size() - 1;
    if (mdl2.isSmoke) {
@ -3477,6 +3497,7 @@ void M2Renderer::clear() {
    instances.clear();
    spatialGrid.clear();
    instanceIndexById.clear();
+    instanceDedupMap_.clear();
    smokeParticles.clear();
    smokeInstanceIndices_.clear();
    portalInstanceIndices_.clear();
@ -3513,6 +3534,7 @@ M2Renderer::GridCell M2Renderer::toCell(const glm::vec3& p) const {
 void M2Renderer::rebuildSpatialIndex() {
    spatialGrid.clear();
    instanceIndexById.clear();
+    instanceDedupMap_.clear();
    instanceIndexById.reserve(instances.size());
    smokeInstanceIndices_.clear();
    portalInstanceIndices_.clear();
@ -3524,6 +3546,15 @@ void M2Renderer::rebuildSpatialIndex() {
        const auto& inst = instances[i];
        instanceIndexById[inst.id] = i;

+        // Rebuild dedup map (skip ground detail)
+        if (!inst.cachedIsGroundDetail) {
+            DedupKey dk{inst.modelId,
+                        static_cast<int32_t>(std::round(inst.position.x * 10.0f)),
+                        static_cast<int32_t>(std::round(inst.position.y * 10.0f)),
+                        static_cast<int32_t>(std::round(inst.position.z * 10.0f))};
+            instanceDedupMap_[dk] = inst.id;
+        }
+
        if (inst.cachedIsSmoke) {
            smokeInstanceIndices_.push_back(i);
        }
--- a/src/rendering/terrain_manager.cpp
+++ b/src/rendering/terrain_manager.cpp
@ -695,27 +695,39 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
            return true;
        }

-        LOG_DEBUG("Finalizing tile [", x, ",", y, "] (incremental)");
-
-        // Upload pre-loaded textures
-        if (!pending->preloadedTextures.empty()) {
-            terrainRenderer->uploadPreloadedTextures(pending->preloadedTextures);
-        }
-
-        // Upload terrain mesh to GPU
-        if (!terrainRenderer->loadTerrain(pending->mesh, pending->terrain.textures, x, y)) {
-            LOG_ERROR("Failed to upload terrain to GPU for tile [", x, ",", y, "]");
-            failedTiles[coord] = true;
-            {
-                std::lock_guard<std::mutex> lock(queueMutex);
-                pendingTiles.erase(coord);
+        // Upload pre-loaded textures (once)
+        if (!ft.terrainPreloaded) {
+            LOG_DEBUG("Finalizing tile [", x, ",", y, "] (incremental)");
+            if (!pending->preloadedTextures.empty()) {
+                terrainRenderer->uploadPreloadedTextures(pending->preloadedTextures);
            }
-            ft.phase = FinalizationPhase::DONE;
-            return true;
+            ft.terrainPreloaded = true;
+            // Yield after preload to give time budget a chance to interrupt
+            return false;
        }

-        // Load water immediately after terrain (same frame) — water is now
-        // deduplicated to ~1-2 merged surfaces per tile, so this is fast.
+        // Upload terrain chunks incrementally (16 per call to spread across frames)
+        if (!ft.terrainMeshDone) {
+            if (pending->mesh.validChunkCount == 0) {
+                LOG_ERROR("Failed to upload terrain to GPU for tile [", x, ",", y, "]");
+                failedTiles[coord] = true;
+                {
+                    std::lock_guard<std::mutex> lock(queueMutex);
+                    pendingTiles.erase(coord);
+                }
+                ft.phase = FinalizationPhase::DONE;
+                return true;
+            }
+            bool allDone = terrainRenderer->loadTerrainIncremental(
+                pending->mesh, pending->terrain.textures, x, y,
+                ft.terrainChunkNext, 16);
+            if (!allDone) {
+                return false; // More chunks remain — yield to time budget
+            }
+            ft.terrainMeshDone = true;
+        }
+
+        // Load water after all terrain chunks are uploaded
        if (waterRenderer) {
            size_t beforeSurfaces = waterRenderer->getSurfaceCount();
            waterRenderer->loadFromTerrain(pending->terrain, true, x, y);
--- a/src/rendering/terrain_renderer.cpp
+++ b/src/rendering/terrain_renderer.cpp
@ -409,6 +409,90 @@ bool TerrainRenderer::loadTerrain(const pipeline::TerrainMesh& mesh,
    return !chunks.empty();
 }

+bool TerrainRenderer::loadTerrainIncremental(const pipeline::TerrainMesh& mesh,
+                                              const std::vector<std::string>& texturePaths,
+                                              int tileX, int tileY,
+                                              int& chunkIndex, int maxChunksPerCall) {
+    int uploaded = 0;
+    while (chunkIndex < 256 && uploaded < maxChunksPerCall) {
+        int cy = chunkIndex / 16;
+        int cx = chunkIndex % 16;
+        chunkIndex++;
+
+        const auto& chunk = mesh.getChunk(cx, cy);
+        if (!chunk.isValid()) continue;
+
+        TerrainChunkGPU gpuChunk = uploadChunk(chunk);
+        if (!gpuChunk.isValid()) continue;
+
+        calculateBoundingSphere(gpuChunk, chunk);
+
+        if (!chunk.layers.empty()) {
+            uint32_t baseTexId = chunk.layers[0].textureId;
+            if (baseTexId < texturePaths.size()) {
+                gpuChunk.baseTexture = loadTexture(texturePaths[baseTexId]);
+            } else {
+                gpuChunk.baseTexture = whiteTexture.get();
+            }
+
+            for (size_t i = 1; i < chunk.layers.size() && i < 4; i++) {
+                const auto& layer = chunk.layers[i];
+                int li = static_cast<int>(i) - 1;
+
+                VkTexture* layerTex = whiteTexture.get();
+                if (layer.textureId < texturePaths.size()) {
+                    layerTex = loadTexture(texturePaths[layer.textureId]);
+                }
+                gpuChunk.layerTextures[li] = layerTex;
+
+                VkTexture* alphaTex = opaqueAlphaTexture.get();
+                if (!layer.alphaData.empty()) {
+                    alphaTex = createAlphaTexture(layer.alphaData);
+                }
+                gpuChunk.alphaTextures[li] = alphaTex;
+                gpuChunk.layerCount = static_cast<int>(i);
+            }
+        } else {
+            gpuChunk.baseTexture = whiteTexture.get();
+        }
+
+        gpuChunk.tileX = tileX;
+        gpuChunk.tileY = tileY;
+
+        TerrainParamsUBO params{};
+        params.layerCount = gpuChunk.layerCount;
+        params.hasLayer1 = gpuChunk.layerCount >= 1 ? 1 : 0;
+        params.hasLayer2 = gpuChunk.layerCount >= 2 ? 1 : 0;
+        params.hasLayer3 = gpuChunk.layerCount >= 3 ? 1 : 0;
+
+        VkBufferCreateInfo bufCI{};
+        bufCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+        bufCI.size = sizeof(TerrainParamsUBO);
+        bufCI.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+
+        VmaAllocationCreateInfo allocCI{};
+        allocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
+        allocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+        VmaAllocationInfo mapInfo{};
+        vmaCreateBuffer(vkCtx->getAllocator(), &bufCI, &allocCI,
+                        &gpuChunk.paramsUBO, &gpuChunk.paramsAlloc, &mapInfo);
+        if (mapInfo.pMappedData) {
+            std::memcpy(mapInfo.pMappedData, &params, sizeof(params));
+        }
+
+        gpuChunk.materialSet = allocateMaterialSet();
+        if (gpuChunk.materialSet) {
+            writeMaterialDescriptors(gpuChunk.materialSet, gpuChunk);
+        }
+
+        chunks.push_back(std::move(gpuChunk));
+        uploaded++;
+    }
+
+    return chunkIndex >= 256;
+}
+
 TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
    TerrainChunkGPU gpuChunk;