Optimize animation hotpaths: binary keyframe search, eliminate sqrt calls

- Replace O(n) linear keyframe search with O(log n) binary search in both
  M2 and Character renderers (runs thousands of times per frame)
- Smoke particle removal: swap-and-pop instead of O(n²) vector erase
- Character render backface cull: eliminate sqrt via squared comparison
- Quaternion validation: use length² instead of sqrt-based length check
This commit is contained in:
Kelsi 2026-03-04 08:33:56 -08:00
parent 2e432fc123
commit e6acb4ac9a
2 changed files with 38 additions and 32 deletions

View file

@ -1580,13 +1580,12 @@ int CharacterRenderer::findKeyframeIndex(const std::vector<uint32_t>& timestamps
if (timestamps.empty()) return -1; if (timestamps.empty()) return -1;
if (timestamps.size() == 1) return 0; if (timestamps.size() == 1) return 0;
// Binary search for the keyframe bracket // Binary search: find first element > t, then back up one
for (size_t i = 0; i < timestamps.size() - 1; i++) { uint32_t t = static_cast<uint32_t>(time);
if (time < static_cast<float>(timestamps[i + 1])) { auto it = std::upper_bound(timestamps.begin(), timestamps.end(), t);
return static_cast<int>(i); if (it == timestamps.begin()) return 0;
} size_t idx = static_cast<size_t>(it - timestamps.begin()) - 1;
} return static_cast<int>(std::min(idx, timestamps.size() - 2));
return static_cast<int>(timestamps.size() - 2);
} }
glm::vec3 CharacterRenderer::interpolateVec3(const pipeline::M2AnimationTrack& track, glm::vec3 CharacterRenderer::interpolateVec3(const pipeline::M2AnimationTrack& track,
@ -1630,8 +1629,8 @@ glm::quat CharacterRenderer::interpolateQuat(const pipeline::M2AnimationTrack& t
if (keys.timestamps.empty() || keys.quatValues.empty()) return identity; if (keys.timestamps.empty() || keys.quatValues.empty()) return identity;
auto safeQuat = [&](const glm::quat& q) -> glm::quat { auto safeQuat = [&](const glm::quat& q) -> glm::quat {
float len = glm::length(q); float lenSq = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
if (len < 0.001f || std::isnan(len)) return identity; if (lenSq < 0.000001f || std::isnan(lenSq)) return identity;
return q; return q;
}; };
@ -1773,9 +1772,14 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet,
float distSq = glm::dot(toInst, toInst); float distSq = glm::dot(toInst, toInst);
if (distSq > renderRadiusSq) continue; if (distSq > renderRadiusSq) continue;
if (distSq > nearNoConeCullSq) { if (distSq > nearNoConeCullSq) {
float invDist = 1.0f / std::sqrt(distSq); // Backface cull without sqrt: dot(toInst, camFwd) / |toInst| < threshold
float facingDot = glm::dot(toInst, camForward) * invDist; // ⟺ dot < 0 || dot² < threshold² * distSq (when threshold < 0, dot must be negative)
if (facingDot < backfaceDotCull) continue; float rawDot = glm::dot(toInst, camForward);
if (backfaceDotCull >= 0.0f) {
if (rawDot < 0.0f || rawDot * rawDot < backfaceDotCull * backfaceDotCull * distSq) continue;
} else {
if (rawDot < 0.0f && rawDot * rawDot > backfaceDotCull * backfaceDotCull * distSq) continue;
}
} }
} }

View file

@ -1741,12 +1741,12 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
static int findKeyframeIndex(const std::vector<uint32_t>& timestamps, float time) { static int findKeyframeIndex(const std::vector<uint32_t>& timestamps, float time) {
if (timestamps.empty()) return -1; if (timestamps.empty()) return -1;
if (timestamps.size() == 1) return 0; if (timestamps.size() == 1) return 0;
for (size_t i = 0; i < timestamps.size() - 1; i++) { uint32_t t = static_cast<uint32_t>(time);
if (time < static_cast<float>(timestamps[i + 1])) { // Binary search: find first element > t, then back up one
return static_cast<int>(i); auto it = std::upper_bound(timestamps.begin(), timestamps.end(), t);
} if (it == timestamps.begin()) return 0;
} size_t idx = static_cast<size_t>(it - timestamps.begin()) - 1;
return static_cast<int>(timestamps.size() - 2); return static_cast<int>(std::min(idx, timestamps.size() - 2));
} }
// Resolve sequence index and time for a track, handling global sequences. // Resolve sequence index and time for a track, handling global sequences.
@ -1803,8 +1803,8 @@ static glm::quat interpQuat(const pipeline::M2AnimationTrack& track,
const auto& keys = track.sequences[si]; const auto& keys = track.sequences[si];
if (keys.timestamps.empty() || keys.quatValues.empty()) return identity; if (keys.timestamps.empty() || keys.quatValues.empty()) return identity;
auto safe = [&](const glm::quat& q) -> glm::quat { auto safe = [&](const glm::quat& q) -> glm::quat {
float len = glm::length(q); float lenSq = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
if (len < 0.001f || std::isnan(len)) return identity; if (lenSq < 0.000001f || std::isnan(lenSq)) return identity;
return q; return q;
}; };
if (keys.quatValues.size() == 1) return safe(keys.quatValues[0]); if (keys.quatValues.size() == 1) return safe(keys.quatValues[0]);
@ -1907,21 +1907,23 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
smokeEmitAccum = 0.0f; smokeEmitAccum = 0.0f;
} }
// --- Update existing smoke particles --- // --- Update existing smoke particles (swap-and-pop for O(1) removal) ---
for (auto it = smokeParticles.begin(); it != smokeParticles.end(); ) { for (size_t i = 0; i < smokeParticles.size(); ) {
it->life += deltaTime; auto& p = smokeParticles[i];
if (it->life >= it->maxLife) { p.life += deltaTime;
it = smokeParticles.erase(it); if (p.life >= p.maxLife) {
smokeParticles[i] = smokeParticles.back();
smokeParticles.pop_back();
continue; continue;
} }
it->position += it->velocity * deltaTime; p.position += p.velocity * deltaTime;
it->velocity.z *= 0.98f; // Slight deceleration p.velocity.z *= 0.98f; // Slight deceleration
it->velocity.x += distDrift(smokeRng) * deltaTime; p.velocity.x += distDrift(smokeRng) * deltaTime;
it->velocity.y += distDrift(smokeRng) * deltaTime; p.velocity.y += distDrift(smokeRng) * deltaTime;
// Grow from 1.0 to 3.5 over lifetime // Grow from 1.0 to 3.5 over lifetime
float t = it->life / it->maxLife; float t = p.life / p.maxLife;
it->size = 1.0f + t * 2.5f; p.size = 1.0f + t * 2.5f;
++it; ++i;
} }
// --- Normal M2 animation update --- // --- Normal M2 animation update ---