mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-04-17 17:43:52 +00:00
perf(rendering): reduce GPU cull buffer and add CPU fallback for overflow
Halve MAX_CULL_INSTANCES to 32768 and iterate all instances in render, falling back to CPU frustum culling for any beyond the GPU buffer.
This commit is contained in:
parent
b62df70d09
commit
8bb3702af4
2 changed files with 7 additions and 6 deletions
|
|
@ -478,7 +478,7 @@ private:
|
||||||
uint32_t instanceCount;
|
uint32_t instanceCount;
|
||||||
uint32_t _pad[3] = {};
|
uint32_t _pad[3] = {};
|
||||||
};
|
};
|
||||||
static constexpr uint32_t MAX_CULL_INSTANCES = 65536;
|
static constexpr uint32_t MAX_CULL_INSTANCES = 32768;
|
||||||
VkPipeline cullPipeline_ = VK_NULL_HANDLE;
|
VkPipeline cullPipeline_ = VK_NULL_HANDLE;
|
||||||
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE;
|
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE;
|
||||||
VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE;
|
VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE;
|
||||||
|
|
|
||||||
|
|
@ -2680,22 +2680,23 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
||||||
sortedVisible_.reserve(expectedVisible);
|
sortedVisible_.reserve(expectedVisible);
|
||||||
}
|
}
|
||||||
|
|
||||||
// GPU frustum culling — build frustum only for CPU fallback path
|
// GPU frustum culling — build frustum for CPU fallback path and overflow instances
|
||||||
Frustum frustum;
|
Frustum frustum;
|
||||||
if (!gpuCullAvailable) {
|
{
|
||||||
const glm::mat4 vp = camera.getProjectionMatrix() * camera.getViewMatrix();
|
const glm::mat4 vp = camera.getProjectionMatrix() * camera.getViewMatrix();
|
||||||
frustum.extractFromMatrix(vp);
|
frustum.extractFromMatrix(vp);
|
||||||
}
|
}
|
||||||
const float maxPossibleDistSq = maxRenderDistanceSq * 4.0f;
|
const float maxPossibleDistSq = maxRenderDistanceSq * 4.0f;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < numInstances; ++i) {
|
const uint32_t totalInstances = static_cast<uint32_t>(instances.size());
|
||||||
|
for (uint32_t i = 0; i < totalInstances; ++i) {
|
||||||
const auto& instance = instances[i];
|
const auto& instance = instances[i];
|
||||||
|
|
||||||
if (gpuCullAvailable) {
|
if (gpuCullAvailable && i < numInstances) {
|
||||||
// GPU already tested flags + distance + frustum
|
// GPU already tested flags + distance + frustum
|
||||||
if (!visibility[i]) continue;
|
if (!visibility[i]) continue;
|
||||||
} else {
|
} else {
|
||||||
// CPU fallback: same culling logic as before
|
// CPU fallback: for non-GPU path or instances beyond cull buffer
|
||||||
if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
|
if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
|
||||||
|
|
||||||
glm::vec3 toCam = instance.position - camPos;
|
glm::vec3 toCam = instance.position - camPos;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue