From 884b72bc1c2cb8e05c7ccf71316f175e81ab3962 Mon Sep 17 00:00:00 2001
From: Kelsi <kelsihates2fa@gmail.com>
Date: Sat, 7 Mar 2026 11:59:19 -0800
Subject: [PATCH] Incremental terrain upload + M2 instance dedup hash for city
 stutter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths
---
 include/rendering/m2_renderer.hpp      | 19 ++++++
 include/rendering/terrain_manager.hpp  |  5 ++
 include/rendering/terrain_renderer.hpp |  7 +++
 src/rendering/m2_renderer.cpp          | 65 ++++++++++++++------
 src/rendering/terrain_manager.cpp      | 48 +++++++++------
 src/rendering/terrain_renderer.cpp     | 84 ++++++++++++++++++++++++++
 6 files changed, 193 insertions(+), 35 deletions(-)
diff --git a/include/rendering/m2_renderer.hpp b/include/rendering/m2_renderer.hpp
index f53fb4bf..91616a28 100644
--- a/include/rendering/m2_renderer.hpp
+++ b/include/rendering/m2_renderer.hpp
@@ -389,6 +389,25 @@ private:
     std::unordered_map<uint32_t, M2ModelGPU> models;
     std::vector<M2Instance> instances;
 
+    // O(1) dedup: key = (modelId, quantized x, quantized y, quantized z) → instanceId
+    struct DedupKey {
+        uint32_t modelId;
+        int32_t qx, qy, qz; // position quantized to 0.1 units
+        bool operator==(const DedupKey& o) const {
+            return modelId == o.modelId && qx == o.qx && qy == o.qy && qz == o.qz;
+        }
+    };
+    struct DedupHash {
+        size_t operator()(const DedupKey& k) const {
+            size_t h = std::hash<uint32_t>()(k.modelId);
+            h ^= std::hash<int32_t>()(k.qx) * 2654435761u;
+            h ^= std::hash<int32_t>()(k.qy) * 40503u;
+            h ^= std::hash<int32_t>()(k.qz) * 12289u;
+            return h;
+        }
+    };
+    std::unordered_map<DedupKey, uint32_t, DedupHash> instanceDedupMap_;
+
     uint32_t nextInstanceId = 1;
     uint32_t lastDrawCallCount = 0;
     size_t modelCacheLimit_ = 6000;
diff --git a/include/rendering/terrain_manager.hpp b/include/rendering/terrain_manager.hpp
index 0090edc4..efede0c9 100644
--- a/include/rendering/terrain_manager.hpp
+++ b/include/rendering/terrain_manager.hpp
@@ -150,6 +150,11 @@ struct FinalizingTile {
     size_t wmoModelIndex = 0;      // Next WMO model to upload
     size_t wmoDoodadIndex = 0;     // Next WMO doodad to upload
 
+    // Incremental terrain upload state (splits TERRAIN phase across frames)
+    bool terrainPreloaded = false;  // True after preloaded textures uploaded
+    int terrainChunkNext = 0;       // Next chunk index to upload (0-255, row-major)
+    bool terrainMeshDone = false;   // True when all chunks uploaded
+
     // Accumulated results (built up across phases)
     std::vector<uint32_t> m2InstanceIds;
     std::vector<uint32_t> wmoInstanceIds;
diff --git a/include/rendering/terrain_renderer.hpp b/include/rendering/terrain_renderer.hpp
index 91279e9c..a1d433d1 100644
--- a/include/rendering/terrain_renderer.hpp
+++ b/include/rendering/terrain_renderer.hpp
@@ -86,6 +86,13 @@ public:
                      const std::vector<std::string>& texturePaths,
                      int tileX = -1, int tileY = -1);
 
+    /// Upload a batch of terrain chunks incrementally. Returns true when all chunks done.
+    /// chunkIndex is updated to the next chunk to process (0-255 row-major).
+    bool loadTerrainIncremental(const pipeline::TerrainMesh& mesh,
+                                const std::vector<std::string>& texturePaths,
+                                int tileX, int tileY,
+                                int& chunkIndex, int maxChunksPerCall = 16);
+
     void removeTile(int tileX, int tileY);
 
     void uploadPreloadedTextures(const std::unordered_map<std::string, pipeline::BLPImage>& textures);
diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp
index d76843a0..d6df9dfe 100644
--- a/src/rendering/m2_renderer.cpp
+++ b/src/rendering/m2_renderer.cpp
@@ -678,6 +678,7 @@ void M2Renderer::shutdown() {
     instances.clear();
     spatialGrid.clear();
     instanceIndexById.clear();
+    instanceDedupMap_.clear();
 
     // Delete cached textures
     textureCache.clear();
@@ -1613,17 +1614,16 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
     }
     const auto& mdlRef = modelIt->second;
 
-    // Ground clutter is procedurally scattered and high-count; avoid O(N) dedup
-    // scans that can hitch when new tiles stream in.
+    // Deduplicate: skip if same model already at nearly the same position.
+    // Uses hash map for O(1) lookup instead of O(N) scan.
     if (!mdlRef.isGroundDetail) {
-        // Deduplicate: skip if same model already at nearly the same position
-        for (const auto& existing : instances) {
-            if (existing.modelId == modelId) {
-                glm::vec3 d = existing.position - position;
-                if (glm::dot(d, d) < 0.01f) {
-                    return existing.id;
-                }
-            }
+        DedupKey dk{modelId,
+                    static_cast<int32_t>(std::round(position.x * 10.0f)),
+                    static_cast<int32_t>(std::round(position.y * 10.0f)),
+                    static_cast<int32_t>(std::round(position.z * 10.0f))};
+        auto dit = instanceDedupMap_.find(dk);
+        if (dit != instanceDedupMap_.end()) {
+            return dit->second;
         }
     }
 
@@ -1662,6 +1662,15 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
         instance.variationTimer = 3000.0f + static_cast<float>(rand() % 8000);
     }
 
+    // Register in dedup map before pushing (uses original position, not ground-adjusted)
+    if (!mdlRef.isGroundDetail) {
+        DedupKey dk{modelId,
+                    static_cast<int32_t>(std::round(position.x * 10.0f)),
+                    static_cast<int32_t>(std::round(position.y * 10.0f)),
+                    static_cast<int32_t>(std::round(position.z * 10.0f))};
+        instanceDedupMap_[dk] = instance.id;
+    }
+
     instances.push_back(instance);
     size_t idx = instances.size() - 1;
     // Track special instances for fast-path iteration
@@ -1700,13 +1709,15 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
         return 0;
     }
 
-    // Deduplicate: skip if same model already at nearly the same position
-    for (const auto& existing : instances) {
-        if (existing.modelId == modelId) {
-            glm::vec3 d = existing.position - position;
-            if (glm::dot(d, d) < 0.01f) {
-                return existing.id;
-            }
+    // Deduplicate: O(1) hash lookup
+    {
+        DedupKey dk{modelId,
+                    static_cast<int32_t>(std::round(position.x * 10.0f)),
+                    static_cast<int32_t>(std::round(position.y * 10.0f)),
+                    static_cast<int32_t>(std::round(position.z * 10.0f))};
+        auto dit = instanceDedupMap_.find(dk);
+        if (dit != instanceDedupMap_.end()) {
+            return dit->second;
         }
     }
 
@@ -1743,6 +1754,15 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
         instance.animTime = static_cast<float>(rand()) / RAND_MAX * 10000.0f;
     }
 
+    // Register in dedup map
+    {
+        DedupKey dk{modelId,
+                    static_cast<int32_t>(std::round(position.x * 10.0f)),
+                    static_cast<int32_t>(std::round(position.y * 10.0f)),
+                    static_cast<int32_t>(std::round(position.z * 10.0f))};
+        instanceDedupMap_[dk] = instance.id;
+    }
+
     instances.push_back(instance);
     size_t idx = instances.size() - 1;
     if (mdl2.isSmoke) {
@@ -3477,6 +3497,7 @@ void M2Renderer::clear() {
     instances.clear();
     spatialGrid.clear();
     instanceIndexById.clear();
+    instanceDedupMap_.clear();
     smokeParticles.clear();
     smokeInstanceIndices_.clear();
     portalInstanceIndices_.clear();
@@ -3513,6 +3534,7 @@ M2Renderer::GridCell M2Renderer::toCell(const glm::vec3& p) const {
 void M2Renderer::rebuildSpatialIndex() {
     spatialGrid.clear();
     instanceIndexById.clear();
+    instanceDedupMap_.clear();
     instanceIndexById.reserve(instances.size());
     smokeInstanceIndices_.clear();
     portalInstanceIndices_.clear();
@@ -3524,6 +3546,15 @@ void M2Renderer::rebuildSpatialIndex() {
         const auto& inst = instances[i];
         instanceIndexById[inst.id] = i;
 
+        // Rebuild dedup map (skip ground detail)
+        if (!inst.cachedIsGroundDetail) {
+            DedupKey dk{inst.modelId,
+                        static_cast<int32_t>(std::round(inst.position.x * 10.0f)),
+                        static_cast<int32_t>(std::round(inst.position.y * 10.0f)),
+                        static_cast<int32_t>(std::round(inst.position.z * 10.0f))};
+            instanceDedupMap_[dk] = inst.id;
+        }
+
         if (inst.cachedIsSmoke) {
             smokeInstanceIndices_.push_back(i);
         }
diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp
index b164d969..11204ca2 100644
--- a/src/rendering/terrain_manager.cpp
+++ b/src/rendering/terrain_manager.cpp
@@ -695,27 +695,39 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
             return true;
         }
 
-        LOG_DEBUG("Finalizing tile [", x, ",", y, "] (incremental)");
-
-        // Upload pre-loaded textures
-        if (!pending->preloadedTextures.empty()) {
-            terrainRenderer->uploadPreloadedTextures(pending->preloadedTextures);
-        }
-
-        // Upload terrain mesh to GPU
-        if (!terrainRenderer->loadTerrain(pending->mesh, pending->terrain.textures, x, y)) {
-            LOG_ERROR("Failed to upload terrain to GPU for tile [", x, ",", y, "]");
-            failedTiles[coord] = true;
-            {
-                std::lock_guard<std::mutex> lock(queueMutex);
-                pendingTiles.erase(coord);
+        // Upload pre-loaded textures (once)
+        if (!ft.terrainPreloaded) {
+            LOG_DEBUG("Finalizing tile [", x, ",", y, "] (incremental)");
+            if (!pending->preloadedTextures.empty()) {
+                terrainRenderer->uploadPreloadedTextures(pending->preloadedTextures);
             }
-            ft.phase = FinalizationPhase::DONE;
-            return true;
+            ft.terrainPreloaded = true;
+            // Yield after preload to give time budget a chance to interrupt
+            return false;
         }
 
-        // Load water immediately after terrain (same frame) — water is now
-        // deduplicated to ~1-2 merged surfaces per tile, so this is fast.
+        // Upload terrain chunks incrementally (16 per call to spread across frames)
+        if (!ft.terrainMeshDone) {
+            if (pending->mesh.validChunkCount == 0) {
+                LOG_ERROR("Failed to upload terrain to GPU for tile [", x, ",", y, "]");
+                failedTiles[coord] = true;
+                {
+                    std::lock_guard<std::mutex> lock(queueMutex);
+                    pendingTiles.erase(coord);
+                }
+                ft.phase = FinalizationPhase::DONE;
+                return true;
+            }
+            bool allDone = terrainRenderer->loadTerrainIncremental(
+                pending->mesh, pending->terrain.textures, x, y,
+                ft.terrainChunkNext, 16);
+            if (!allDone) {
+                return false; // More chunks remain — yield to time budget
+            }
+            ft.terrainMeshDone = true;
+        }
+
+        // Load water after all terrain chunks are uploaded
         if (waterRenderer) {
             size_t beforeSurfaces = waterRenderer->getSurfaceCount();
             waterRenderer->loadFromTerrain(pending->terrain, true, x, y);
diff --git a/src/rendering/terrain_renderer.cpp b/src/rendering/terrain_renderer.cpp
index 6e312233..227178d5 100644
--- a/src/rendering/terrain_renderer.cpp
+++ b/src/rendering/terrain_renderer.cpp
@@ -409,6 +409,90 @@ bool TerrainRenderer::loadTerrain(const pipeline::TerrainMesh& mesh,
     return !chunks.empty();
 }
 
+bool TerrainRenderer::loadTerrainIncremental(const pipeline::TerrainMesh& mesh,
+                                              const std::vector<std::string>& texturePaths,
+                                              int tileX, int tileY,
+                                              int& chunkIndex, int maxChunksPerCall) {
+    int uploaded = 0;
+    while (chunkIndex < 256 && uploaded < maxChunksPerCall) {
+        int cy = chunkIndex / 16;
+        int cx = chunkIndex % 16;
+        chunkIndex++;
+
+        const auto& chunk = mesh.getChunk(cx, cy);
+        if (!chunk.isValid()) continue;
+
+        TerrainChunkGPU gpuChunk = uploadChunk(chunk);
+        if (!gpuChunk.isValid()) continue;
+
+        calculateBoundingSphere(gpuChunk, chunk);
+
+        if (!chunk.layers.empty()) {
+            uint32_t baseTexId = chunk.layers[0].textureId;
+            if (baseTexId < texturePaths.size()) {
+                gpuChunk.baseTexture = loadTexture(texturePaths[baseTexId]);
+            } else {
+                gpuChunk.baseTexture = whiteTexture.get();
+            }
+
+            for (size_t i = 1; i < chunk.layers.size() && i < 4; i++) {
+                const auto& layer = chunk.layers[i];
+                int li = static_cast<int>(i) - 1;
+
+                VkTexture* layerTex = whiteTexture.get();
+                if (layer.textureId < texturePaths.size()) {
+                    layerTex = loadTexture(texturePaths[layer.textureId]);
+                }
+                gpuChunk.layerTextures[li] = layerTex;
+
+                VkTexture* alphaTex = opaqueAlphaTexture.get();
+                if (!layer.alphaData.empty()) {
+                    alphaTex = createAlphaTexture(layer.alphaData);
+                }
+                gpuChunk.alphaTextures[li] = alphaTex;
+                gpuChunk.layerCount = static_cast<int>(i);
+            }
+        } else {
+            gpuChunk.baseTexture = whiteTexture.get();
+        }
+
+        gpuChunk.tileX = tileX;
+        gpuChunk.tileY = tileY;
+
+        TerrainParamsUBO params{};
+        params.layerCount = gpuChunk.layerCount;
+        params.hasLayer1 = gpuChunk.layerCount >= 1 ? 1 : 0;
+        params.hasLayer2 = gpuChunk.layerCount >= 2 ? 1 : 0;
+        params.hasLayer3 = gpuChunk.layerCount >= 3 ? 1 : 0;
+
+        VkBufferCreateInfo bufCI{};
+        bufCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+        bufCI.size = sizeof(TerrainParamsUBO);
+        bufCI.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+
+        VmaAllocationCreateInfo allocCI{};
+        allocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
+        allocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
+
+        VmaAllocationInfo mapInfo{};
+        vmaCreateBuffer(vkCtx->getAllocator(), &bufCI, &allocCI,
+                        &gpuChunk.paramsUBO, &gpuChunk.paramsAlloc, &mapInfo);
+        if (mapInfo.pMappedData) {
+            std::memcpy(mapInfo.pMappedData, &params, sizeof(params));
+        }
+
+        gpuChunk.materialSet = allocateMaterialSet();
+        if (gpuChunk.materialSet) {
+            writeMaterialDescriptors(gpuChunk.materialSet, gpuChunk);
+        }
+
+        chunks.push_back(std::move(gpuChunk));
+        uploaded++;
+    }
+
+    return chunkIndex >= 256;
+}
+
 TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
     TerrainChunkGPU gpuChunk;