Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().
This commit is contained in:
Kelsi 2026-03-04 08:28:21 -08:00
parent 30fa9836d9
commit 2e432fc123
2 changed files with 38 additions and 32 deletions

View file

@ -179,6 +179,9 @@ struct M2Instance {
bool cachedDisableAnimation = false; bool cachedDisableAnimation = false;
bool cachedIsSmoke = false; bool cachedIsSmoke = false;
bool cachedHasParticleEmitters = false; bool cachedHasParticleEmitters = false;
bool cachedIsGroundDetail = false;
bool cachedIsInvisibleTrap = false;
bool cachedIsValid = false;
float cachedBoundRadius = 0.0f; float cachedBoundRadius = 0.0f;
// Frame-skip optimization (update distant animations less frequently) // Frame-skip optimization (update distant animations less frequently)

View file

@ -1617,6 +1617,9 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
instance.cachedIsSmoke = mdlRef.isSmoke; instance.cachedIsSmoke = mdlRef.isSmoke;
instance.cachedHasParticleEmitters = !mdlRef.particleEmitters.empty(); instance.cachedHasParticleEmitters = !mdlRef.particleEmitters.empty();
instance.cachedBoundRadius = mdlRef.boundRadius; instance.cachedBoundRadius = mdlRef.boundRadius;
instance.cachedIsGroundDetail = mdlRef.isGroundDetail;
instance.cachedIsInvisibleTrap = mdlRef.isInvisibleTrap;
instance.cachedIsValid = mdlRef.isValid();
// Initialize animation: play first sequence (usually Stand/Idle) // Initialize animation: play first sequence (usually Stand/Idle)
const auto& mdl = mdlRef; const auto& mdl = mdlRef;
@ -1691,6 +1694,9 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
instance.cachedIsSmoke = mdl2.isSmoke; instance.cachedIsSmoke = mdl2.isSmoke;
instance.cachedHasParticleEmitters = !mdl2.particleEmitters.empty(); instance.cachedHasParticleEmitters = !mdl2.particleEmitters.empty();
instance.cachedBoundRadius = mdl2.boundRadius; instance.cachedBoundRadius = mdl2.boundRadius;
instance.cachedIsGroundDetail = mdl2.isGroundDetail;
instance.cachedIsInvisibleTrap = mdl2.isInvisibleTrap;
instance.cachedIsValid = mdl2.isValid();
// Initialize animation // Initialize animation
if (mdl2.hasAnimation && !mdl2.disableAnimation && !mdl2.sequences.empty()) { if (mdl2.hasAnimation && !mdl2.disableAnimation && !mdl2.sequences.empty()) {
@ -2139,28 +2145,26 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
for (uint32_t i = 0; i < static_cast<uint32_t>(instances.size()); ++i) { for (uint32_t i = 0; i < static_cast<uint32_t>(instances.size()); ++i) {
const auto& instance = instances[i]; const auto& instance = instances[i];
// Use cached model flags — no hash lookup needed
if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
glm::vec3 toCam = instance.position - camPos; glm::vec3 toCam = instance.position - camPos;
float distSq = glm::dot(toCam, toCam); float distSq = glm::dot(toCam, toCam);
if (distSq > maxPossibleDistSq) continue;
auto it = models.find(instance.modelId); float worldRadius = instance.cachedBoundRadius * instance.scale;
if (it == models.end()) continue;
const M2ModelGPU& model = it->second;
if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue;
float worldRadius = model.boundRadius * instance.scale;
float cullRadius = worldRadius; float cullRadius = worldRadius;
if (model.disableAnimation) { if (instance.cachedDisableAnimation) {
cullRadius = std::max(cullRadius, 3.0f); cullRadius = std::max(cullRadius, 3.0f);
} }
float effectiveMaxDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f); float effectiveMaxDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f);
if (model.disableAnimation) { if (instance.cachedDisableAnimation) {
effectiveMaxDistSq *= 2.6f; effectiveMaxDistSq *= 2.6f;
} }
if (model.isGroundDetail) { if (instance.cachedIsGroundDetail) {
effectiveMaxDistSq *= 0.75f; effectiveMaxDistSq *= 0.75f;
} }
if (distSq > maxPossibleDistSq) continue;
if (distSq > effectiveMaxDistSq) continue; if (distSq > effectiveMaxDistSq) continue;
// Frustum cull with padding // Frustum cull with padding
@ -2278,12 +2282,11 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
} }
} }
// LOD selection based on distance // LOD selection based on squared distance (avoid sqrt)
float dist = std::sqrt(entry.distSq);
uint16_t desiredLOD = 0; uint16_t desiredLOD = 0;
if (dist > 150.0f) desiredLOD = 3; if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3;
else if (dist > 80.0f) desiredLOD = 2; else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2;
else if (dist > 40.0f) desiredLOD = 1; else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1;
uint16_t targetLOD = desiredLOD; uint16_t targetLOD = desiredLOD;
if (desiredLOD > 0) { if (desiredLOD > 0) {
@ -2450,23 +2453,21 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
vkCmdPushConstants(cmd, particlePipelineLayout_, VK_SHADER_STAGE_FRAGMENT_BIT, 0, vkCmdPushConstants(cmd, particlePipelineLayout_, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
sizeof(particlePush), &particlePush); sizeof(particlePush), &particlePush);
// Build and upload vertex data // Write glow vertex data directly to mapped buffer (no temp vector)
std::vector<float> glowData;
glowData.reserve(glowSprites_.size() * 9);
for (const auto& gs : glowSprites_) {
glowData.push_back(gs.worldPos.x);
glowData.push_back(gs.worldPos.y);
glowData.push_back(gs.worldPos.z);
glowData.push_back(gs.color.r);
glowData.push_back(gs.color.g);
glowData.push_back(gs.color.b);
glowData.push_back(gs.color.a);
glowData.push_back(gs.size);
glowData.push_back(0.0f);
}
size_t uploadCount = std::min(glowSprites_.size(), MAX_GLOW_SPRITES); size_t uploadCount = std::min(glowSprites_.size(), MAX_GLOW_SPRITES);
memcpy(glowVBMapped_, glowData.data(), uploadCount * 9 * sizeof(float)); float* dst = static_cast<float*>(glowVBMapped_);
for (size_t gi = 0; gi < uploadCount; gi++) {
const auto& gs = glowSprites_[gi];
*dst++ = gs.worldPos.x;
*dst++ = gs.worldPos.y;
*dst++ = gs.worldPos.z;
*dst++ = gs.color.r;
*dst++ = gs.color.g;
*dst++ = gs.color.b;
*dst++ = gs.color.a;
*dst++ = gs.size;
*dst++ = 0.0f;
}
VkDeviceSize offset = 0; VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &glowVB_, &offset); vkCmdBindVertexBuffers(cmd, 0, 1, &glowVB_, &offset);
@ -2748,6 +2749,9 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa
const M2ModelGPU* currentModel = nullptr; const M2ModelGPU* currentModel = nullptr;
for (const auto& instance : instances) { for (const auto& instance : instances) {
// Use cached flags to skip early without hash lookup
if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
// Distance cull against shadow frustum // Distance cull against shadow frustum
glm::vec3 diff = instance.position - shadowCenter; glm::vec3 diff = instance.position - shadowCenter;
if (glm::dot(diff, diff) > shadowRadiusSq) continue; if (glm::dot(diff, diff) > shadowRadiusSq) continue;
@ -2755,7 +2759,6 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa
auto modelIt = models.find(instance.modelId); auto modelIt = models.find(instance.modelId);
if (modelIt == models.end()) continue; if (modelIt == models.end()) continue;
const M2ModelGPU& model = modelIt->second; const M2ModelGPU& model = modelIt->second;
if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue;
// Filter: only draw foliage models in foliage pass, non-foliage in non-foliage pass // Filter: only draw foliage models in foliage pass, non-foliage in non-foliage pass
if (model.shadowWindFoliage != foliagePass) continue; if (model.shadowWindFoliage != foliagePass) continue;