From 2e432fc1233277a70e4c7426dfdfe9bafa37a3bd Mon Sep 17 00:00:00 2001 From: Kelsi Date: Wed, 4 Mar 2026 08:28:21 -0800 Subject: [PATCH] Eliminate per-instance hash lookups in M2 render/shadow culling loops Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail, disableAnimation, boundRadius) on M2Instance instead of models.find() in the hot culling paths. Also complete cached flag initialization in createInstanceWithMatrix(). --- include/rendering/m2_renderer.hpp | 3 ++ src/rendering/m2_renderer.cpp | 67 ++++++++++++++++--------------- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/include/rendering/m2_renderer.hpp b/include/rendering/m2_renderer.hpp index e91e08d3..83762a5c 100644 --- a/include/rendering/m2_renderer.hpp +++ b/include/rendering/m2_renderer.hpp @@ -179,6 +179,9 @@ struct M2Instance { bool cachedDisableAnimation = false; bool cachedIsSmoke = false; bool cachedHasParticleEmitters = false; + bool cachedIsGroundDetail = false; + bool cachedIsInvisibleTrap = false; + bool cachedIsValid = false; float cachedBoundRadius = 0.0f; // Frame-skip optimization (update distant animations less frequently) diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp index e7fbd6ef..130b8b1a 100644 --- a/src/rendering/m2_renderer.cpp +++ b/src/rendering/m2_renderer.cpp @@ -1617,6 +1617,9 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position, instance.cachedIsSmoke = mdlRef.isSmoke; instance.cachedHasParticleEmitters = !mdlRef.particleEmitters.empty(); instance.cachedBoundRadius = mdlRef.boundRadius; + instance.cachedIsGroundDetail = mdlRef.isGroundDetail; + instance.cachedIsInvisibleTrap = mdlRef.isInvisibleTrap; + instance.cachedIsValid = mdlRef.isValid(); // Initialize animation: play first sequence (usually Stand/Idle) const auto& mdl = mdlRef; @@ -1691,6 +1694,9 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4& instance.cachedIsSmoke = mdl2.isSmoke; instance.cachedHasParticleEmitters = !mdl2.particleEmitters.empty(); instance.cachedBoundRadius = mdl2.boundRadius; + instance.cachedIsGroundDetail = mdl2.isGroundDetail; + instance.cachedIsInvisibleTrap = mdl2.isInvisibleTrap; + instance.cachedIsValid = mdl2.isValid(); // Initialize animation if (mdl2.hasAnimation && !mdl2.disableAnimation && !mdl2.sequences.empty()) { @@ -2139,28 +2145,26 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const for (uint32_t i = 0; i < static_cast(instances.size()); ++i) { const auto& instance = instances[i]; + // Use cached model flags — no hash lookup needed + if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue; + glm::vec3 toCam = instance.position - camPos; float distSq = glm::dot(toCam, toCam); + if (distSq > maxPossibleDistSq) continue; - auto it = models.find(instance.modelId); - if (it == models.end()) continue; - const M2ModelGPU& model = it->second; - if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue; - - float worldRadius = model.boundRadius * instance.scale; + float worldRadius = instance.cachedBoundRadius * instance.scale; float cullRadius = worldRadius; - if (model.disableAnimation) { + if (instance.cachedDisableAnimation) { cullRadius = std::max(cullRadius, 3.0f); } float effectiveMaxDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f); - if (model.disableAnimation) { + if (instance.cachedDisableAnimation) { effectiveMaxDistSq *= 2.6f; } - if (model.isGroundDetail) { + if (instance.cachedIsGroundDetail) { effectiveMaxDistSq *= 0.75f; } - if (distSq > maxPossibleDistSq) continue; if (distSq > effectiveMaxDistSq) continue; // Frustum cull with padding @@ -2278,12 +2282,11 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const } } - // LOD selection based on distance - float dist = std::sqrt(entry.distSq); + // LOD selection based on squared distance (avoid sqrt) uint16_t desiredLOD = 0; - if (dist > 150.0f) desiredLOD = 3; - else if (dist > 80.0f) desiredLOD = 2; - else if (dist > 40.0f) desiredLOD = 1; + if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3; + else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2; + else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1; uint16_t targetLOD = desiredLOD; if (desiredLOD > 0) { @@ -2450,23 +2453,21 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const vkCmdPushConstants(cmd, particlePipelineLayout_, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(particlePush), &particlePush); - // Build and upload vertex data - std::vector glowData; - glowData.reserve(glowSprites_.size() * 9); - for (const auto& gs : glowSprites_) { - glowData.push_back(gs.worldPos.x); - glowData.push_back(gs.worldPos.y); - glowData.push_back(gs.worldPos.z); - glowData.push_back(gs.color.r); - glowData.push_back(gs.color.g); - glowData.push_back(gs.color.b); - glowData.push_back(gs.color.a); - glowData.push_back(gs.size); - glowData.push_back(0.0f); - } - + // Write glow vertex data directly to mapped buffer (no temp vector) size_t uploadCount = std::min(glowSprites_.size(), MAX_GLOW_SPRITES); - memcpy(glowVBMapped_, glowData.data(), uploadCount * 9 * sizeof(float)); + float* dst = static_cast(glowVBMapped_); + for (size_t gi = 0; gi < uploadCount; gi++) { + const auto& gs = glowSprites_[gi]; + *dst++ = gs.worldPos.x; + *dst++ = gs.worldPos.y; + *dst++ = gs.worldPos.z; + *dst++ = gs.color.r; + *dst++ = gs.color.g; + *dst++ = gs.color.b; + *dst++ = gs.color.a; + *dst++ = gs.size; + *dst++ = 0.0f; + } VkDeviceSize offset = 0; vkCmdBindVertexBuffers(cmd, 0, 1, &glowVB_, &offset); @@ -2748,6 +2749,9 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa const M2ModelGPU* currentModel = nullptr; for (const auto& instance : instances) { + // Use cached flags to skip early without hash lookup + if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue; + // Distance cull against shadow frustum glm::vec3 diff = instance.position - shadowCenter; if (glm::dot(diff, diff) > shadowRadiusSq) continue; @@ -2755,7 +2759,6 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa auto modelIt = models.find(instance.modelId); if (modelIt == models.end()) continue; const M2ModelGPU& model = modelIt->second; - if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue; // Filter: only draw foliage models in foliage pass, non-foliage in non-foliage pass if (model.shadowWindFoliage != foliagePass) continue;