Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths
This commit is contained in:
Kelsi 2026-03-07 11:59:19 -08:00
parent f9410cc4bd
commit 884b72bc1c
6 changed files with 193 additions and 35 deletions

View file

@ -389,6 +389,25 @@ private:
std::unordered_map<uint32_t, M2ModelGPU> models;
std::vector<M2Instance> instances;
// O(1) dedup: key = (modelId, quantized x, quantized y, quantized z) → instanceId
struct DedupKey {
uint32_t modelId;
int32_t qx, qy, qz; // position quantized to 0.1 units
bool operator==(const DedupKey& o) const {
return modelId == o.modelId && qx == o.qx && qy == o.qy && qz == o.qz;
}
};
struct DedupHash {
size_t operator()(const DedupKey& k) const {
size_t h = std::hash<uint32_t>()(k.modelId);
h ^= std::hash<int32_t>()(k.qx) * 2654435761u;
h ^= std::hash<int32_t>()(k.qy) * 40503u;
h ^= std::hash<int32_t>()(k.qz) * 12289u;
return h;
}
};
std::unordered_map<DedupKey, uint32_t, DedupHash> instanceDedupMap_;
uint32_t nextInstanceId = 1;
uint32_t lastDrawCallCount = 0;
size_t modelCacheLimit_ = 6000;

View file

@ -150,6 +150,11 @@ struct FinalizingTile {
size_t wmoModelIndex = 0; // Next WMO model to upload
size_t wmoDoodadIndex = 0; // Next WMO doodad to upload
// Incremental terrain upload state (splits TERRAIN phase across frames)
bool terrainPreloaded = false; // True after preloaded textures uploaded
int terrainChunkNext = 0; // Next chunk index to upload (0-255, row-major)
bool terrainMeshDone = false; // True when all chunks uploaded
// Accumulated results (built up across phases)
std::vector<uint32_t> m2InstanceIds;
std::vector<uint32_t> wmoInstanceIds;

View file

@ -86,6 +86,13 @@ public:
const std::vector<std::string>& texturePaths,
int tileX = -1, int tileY = -1);
/// Upload a batch of terrain chunks incrementally. Returns true when all chunks done.
/// chunkIndex is updated to the next chunk to process (0-255 row-major).
bool loadTerrainIncremental(const pipeline::TerrainMesh& mesh,
const std::vector<std::string>& texturePaths,
int tileX, int tileY,
int& chunkIndex, int maxChunksPerCall = 16);
void removeTile(int tileX, int tileY);
void uploadPreloadedTextures(const std::unordered_map<std::string, pipeline::BLPImage>& textures);

View file

@ -678,6 +678,7 @@ void M2Renderer::shutdown() {
instances.clear();
spatialGrid.clear();
instanceIndexById.clear();
instanceDedupMap_.clear();
// Delete cached textures
textureCache.clear();
@ -1613,17 +1614,16 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
}
const auto& mdlRef = modelIt->second;
// Ground clutter is procedurally scattered and high-count; avoid O(N) dedup
// scans that can hitch when new tiles stream in.
// Deduplicate: skip if same model already at nearly the same position.
// Uses hash map for O(1) lookup instead of O(N) scan.
if (!mdlRef.isGroundDetail) {
// Deduplicate: skip if same model already at nearly the same position
for (const auto& existing : instances) {
if (existing.modelId == modelId) {
glm::vec3 d = existing.position - position;
if (glm::dot(d, d) < 0.01f) {
return existing.id;
}
}
DedupKey dk{modelId,
static_cast<int32_t>(std::round(position.x * 10.0f)),
static_cast<int32_t>(std::round(position.y * 10.0f)),
static_cast<int32_t>(std::round(position.z * 10.0f))};
auto dit = instanceDedupMap_.find(dk);
if (dit != instanceDedupMap_.end()) {
return dit->second;
}
}
@ -1662,6 +1662,15 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
instance.variationTimer = 3000.0f + static_cast<float>(rand() % 8000);
}
// Register in dedup map before pushing (uses original position, not ground-adjusted)
if (!mdlRef.isGroundDetail) {
DedupKey dk{modelId,
static_cast<int32_t>(std::round(position.x * 10.0f)),
static_cast<int32_t>(std::round(position.y * 10.0f)),
static_cast<int32_t>(std::round(position.z * 10.0f))};
instanceDedupMap_[dk] = instance.id;
}
instances.push_back(instance);
size_t idx = instances.size() - 1;
// Track special instances for fast-path iteration
@ -1700,13 +1709,15 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
return 0;
}
// Deduplicate: skip if same model already at nearly the same position
for (const auto& existing : instances) {
if (existing.modelId == modelId) {
glm::vec3 d = existing.position - position;
if (glm::dot(d, d) < 0.01f) {
return existing.id;
}
// Deduplicate: O(1) hash lookup
{
DedupKey dk{modelId,
static_cast<int32_t>(std::round(position.x * 10.0f)),
static_cast<int32_t>(std::round(position.y * 10.0f)),
static_cast<int32_t>(std::round(position.z * 10.0f))};
auto dit = instanceDedupMap_.find(dk);
if (dit != instanceDedupMap_.end()) {
return dit->second;
}
}
@ -1743,6 +1754,15 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
instance.animTime = static_cast<float>(rand()) / RAND_MAX * 10000.0f;
}
// Register in dedup map
{
DedupKey dk{modelId,
static_cast<int32_t>(std::round(position.x * 10.0f)),
static_cast<int32_t>(std::round(position.y * 10.0f)),
static_cast<int32_t>(std::round(position.z * 10.0f))};
instanceDedupMap_[dk] = instance.id;
}
instances.push_back(instance);
size_t idx = instances.size() - 1;
if (mdl2.isSmoke) {
@ -3477,6 +3497,7 @@ void M2Renderer::clear() {
instances.clear();
spatialGrid.clear();
instanceIndexById.clear();
instanceDedupMap_.clear();
smokeParticles.clear();
smokeInstanceIndices_.clear();
portalInstanceIndices_.clear();
@ -3513,6 +3534,7 @@ M2Renderer::GridCell M2Renderer::toCell(const glm::vec3& p) const {
void M2Renderer::rebuildSpatialIndex() {
spatialGrid.clear();
instanceIndexById.clear();
instanceDedupMap_.clear();
instanceIndexById.reserve(instances.size());
smokeInstanceIndices_.clear();
portalInstanceIndices_.clear();
@ -3524,6 +3546,15 @@ void M2Renderer::rebuildSpatialIndex() {
const auto& inst = instances[i];
instanceIndexById[inst.id] = i;
// Rebuild dedup map (skip ground detail)
if (!inst.cachedIsGroundDetail) {
DedupKey dk{inst.modelId,
static_cast<int32_t>(std::round(inst.position.x * 10.0f)),
static_cast<int32_t>(std::round(inst.position.y * 10.0f)),
static_cast<int32_t>(std::round(inst.position.z * 10.0f))};
instanceDedupMap_[dk] = inst.id;
}
if (inst.cachedIsSmoke) {
smokeInstanceIndices_.push_back(i);
}

View file

@ -695,15 +695,20 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
return true;
}
// Upload pre-loaded textures (once)
if (!ft.terrainPreloaded) {
LOG_DEBUG("Finalizing tile [", x, ",", y, "] (incremental)");
// Upload pre-loaded textures
if (!pending->preloadedTextures.empty()) {
terrainRenderer->uploadPreloadedTextures(pending->preloadedTextures);
}
ft.terrainPreloaded = true;
// Yield after preload to give time budget a chance to interrupt
return false;
}
// Upload terrain mesh to GPU
if (!terrainRenderer->loadTerrain(pending->mesh, pending->terrain.textures, x, y)) {
// Upload terrain chunks incrementally (16 per call to spread across frames)
if (!ft.terrainMeshDone) {
if (pending->mesh.validChunkCount == 0) {
LOG_ERROR("Failed to upload terrain to GPU for tile [", x, ",", y, "]");
failedTiles[coord] = true;
{
@ -713,9 +718,16 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
ft.phase = FinalizationPhase::DONE;
return true;
}
bool allDone = terrainRenderer->loadTerrainIncremental(
pending->mesh, pending->terrain.textures, x, y,
ft.terrainChunkNext, 16);
if (!allDone) {
return false; // More chunks remain — yield to time budget
}
ft.terrainMeshDone = true;
}
// Load water immediately after terrain (same frame) — water is now
// deduplicated to ~1-2 merged surfaces per tile, so this is fast.
// Load water after all terrain chunks are uploaded
if (waterRenderer) {
size_t beforeSurfaces = waterRenderer->getSurfaceCount();
waterRenderer->loadFromTerrain(pending->terrain, true, x, y);

View file

@ -409,6 +409,90 @@ bool TerrainRenderer::loadTerrain(const pipeline::TerrainMesh& mesh,
return !chunks.empty();
}
bool TerrainRenderer::loadTerrainIncremental(const pipeline::TerrainMesh& mesh,
const std::vector<std::string>& texturePaths,
int tileX, int tileY,
int& chunkIndex, int maxChunksPerCall) {
int uploaded = 0;
while (chunkIndex < 256 && uploaded < maxChunksPerCall) {
int cy = chunkIndex / 16;
int cx = chunkIndex % 16;
chunkIndex++;
const auto& chunk = mesh.getChunk(cx, cy);
if (!chunk.isValid()) continue;
TerrainChunkGPU gpuChunk = uploadChunk(chunk);
if (!gpuChunk.isValid()) continue;
calculateBoundingSphere(gpuChunk, chunk);
if (!chunk.layers.empty()) {
uint32_t baseTexId = chunk.layers[0].textureId;
if (baseTexId < texturePaths.size()) {
gpuChunk.baseTexture = loadTexture(texturePaths[baseTexId]);
} else {
gpuChunk.baseTexture = whiteTexture.get();
}
for (size_t i = 1; i < chunk.layers.size() && i < 4; i++) {
const auto& layer = chunk.layers[i];
int li = static_cast<int>(i) - 1;
VkTexture* layerTex = whiteTexture.get();
if (layer.textureId < texturePaths.size()) {
layerTex = loadTexture(texturePaths[layer.textureId]);
}
gpuChunk.layerTextures[li] = layerTex;
VkTexture* alphaTex = opaqueAlphaTexture.get();
if (!layer.alphaData.empty()) {
alphaTex = createAlphaTexture(layer.alphaData);
}
gpuChunk.alphaTextures[li] = alphaTex;
gpuChunk.layerCount = static_cast<int>(i);
}
} else {
gpuChunk.baseTexture = whiteTexture.get();
}
gpuChunk.tileX = tileX;
gpuChunk.tileY = tileY;
TerrainParamsUBO params{};
params.layerCount = gpuChunk.layerCount;
params.hasLayer1 = gpuChunk.layerCount >= 1 ? 1 : 0;
params.hasLayer2 = gpuChunk.layerCount >= 2 ? 1 : 0;
params.hasLayer3 = gpuChunk.layerCount >= 3 ? 1 : 0;
VkBufferCreateInfo bufCI{};
bufCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufCI.size = sizeof(TerrainParamsUBO);
bufCI.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
VmaAllocationCreateInfo allocCI{};
allocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
allocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo mapInfo{};
vmaCreateBuffer(vkCtx->getAllocator(), &bufCI, &allocCI,
&gpuChunk.paramsUBO, &gpuChunk.paramsAlloc, &mapInfo);
if (mapInfo.pMappedData) {
std::memcpy(mapInfo.pMappedData, &params, sizeof(params));
}
gpuChunk.materialSet = allocateMaterialSet();
if (gpuChunk.materialSet) {
writeMaterialDescriptors(gpuChunk.materialSet, gpuChunk);
}
chunks.push_back(std::move(gpuChunk));
uploaded++;
}
return chunkIndex >= 256;
}
TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
TerrainChunkGPU gpuChunk;