perf(rendering): reduce GPU cull buffer and add CPU fallback for overflow

Halve MAX_CULL_INSTANCES to 32768 and iterate all instances in render,
falling back to CPU frustum culling for any beyond the GPU buffer.
This commit is contained in:
Kelsi Davis 2026-04-05 03:25:27 -07:00
parent b62df70d09
commit 8bb3702af4
2 changed files with 7 additions and 6 deletions

View file

@ -478,7 +478,7 @@ private:
uint32_t instanceCount; uint32_t instanceCount;
uint32_t _pad[3] = {}; uint32_t _pad[3] = {};
}; };
static constexpr uint32_t MAX_CULL_INSTANCES = 65536; static constexpr uint32_t MAX_CULL_INSTANCES = 32768;
VkPipeline cullPipeline_ = VK_NULL_HANDLE; VkPipeline cullPipeline_ = VK_NULL_HANDLE;
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE; VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE;
VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE; VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE;

View file

@ -2680,22 +2680,23 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
sortedVisible_.reserve(expectedVisible); sortedVisible_.reserve(expectedVisible);
} }
// GPU frustum culling — build frustum only for CPU fallback path // GPU frustum culling — build frustum for CPU fallback path and overflow instances
Frustum frustum; Frustum frustum;
if (!gpuCullAvailable) { {
const glm::mat4 vp = camera.getProjectionMatrix() * camera.getViewMatrix(); const glm::mat4 vp = camera.getProjectionMatrix() * camera.getViewMatrix();
frustum.extractFromMatrix(vp); frustum.extractFromMatrix(vp);
} }
const float maxPossibleDistSq = maxRenderDistanceSq * 4.0f; const float maxPossibleDistSq = maxRenderDistanceSq * 4.0f;
for (uint32_t i = 0; i < numInstances; ++i) { const uint32_t totalInstances = static_cast<uint32_t>(instances.size());
for (uint32_t i = 0; i < totalInstances; ++i) {
const auto& instance = instances[i]; const auto& instance = instances[i];
if (gpuCullAvailable) { if (gpuCullAvailable && i < numInstances) {
// GPU already tested flags + distance + frustum // GPU already tested flags + distance + frustum
if (!visibility[i]) continue; if (!visibility[i]) continue;
} else { } else {
// CPU fallback: same culling logic as before // CPU fallback: for non-GPU path or instances beyond cull buffer
if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue; if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
glm::vec3 toCam = instance.position - camPos; glm::vec3 toCam = instance.position - camPos;