mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-05-07 17:43:51 +00:00
feat(rendering): add HiZ occlusion culling & fix WMO interior shadows
Implement GPU-driven Hierarchical-Z occlusion culling for M2 doodads using a depth pyramid built from the previous frame's depth buffer. The cull shader projects bounding spheres via prevViewProj (temporal reprojection) and samples the HiZ pyramid to reject hidden objects before the main render pass. Key implementation details: - Separate early compute submission (beginSingleTimeCommands + fence wait) eliminates 2-frame visibility staleness - Conservative safeguards prevent false culls: screen-edge guard, full VP row-vector AABB projection (Cauchy-Schwarz), 50% sphere inflation, depth bias, mip+1, min screen size threshold, camera motion dampening (auto-disable on fast rotations), and per-instance previouslyVisible flag tracking - Graceful fallback to frustum-only culling if HiZ init fails Fix dark WMO interiors by gating shadow map sampling on isInterior==0 in the WMO fragment shader. Interior groups (flag 0x2000) now rely solely on pre-baked MOCV vertex-color lighting + MOHD ambient color. Disable interiorDarken globally (was incorrectly darkening outdoor M2s when camera was inside a WMO). Use isInsideInteriorWMO() instead of isInsideWMO() for correct indoor detection. New files: - hiz_system.hpp/cpp: pyramid image management, compute pipeline, descriptors, mip-chain build dispatch, resize handling - hiz_build.comp.glsl: MAX-depth 2x2 reduction compute shader - m2_cull_hiz.comp.glsl: frustum + HiZ occlusion cull compute shader - test_indoor_shadows.cpp: 14 unit tests for shadow/interior contracts Modified: - CullUniformsGPU expanded 128->272 bytes (HiZ params, viewProj, prevViewProj) - Depth buffer images gain VK_IMAGE_USAGE_SAMPLED_BIT for HiZ reads - wmo.frag.glsl: interior branch before unlit, shadow skip for 0x2000 - Render graph: hiz_build + compute_cull disabled (run in early compute) - .gitignore: ignore compiled .spv binaries - MEGA_BONE_MAX_INSTANCES: 2048 -> 4096 Signed-off-by: Pavel Okhlopkov <pavel.okhlopkov@flant.com>
This commit is contained in:
parent
17c1e3ea3b
commit
4b9b3026f4
17 changed files with 1317 additions and 35 deletions
|
|
@ -295,7 +295,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
// Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build.
|
||||
{
|
||||
static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 272, "CullUniformsGPU must be 272 bytes (std140)");
|
||||
|
||||
// Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output)
|
||||
VkDescriptorSetLayoutBinding bindings[3] = {};
|
||||
|
|
@ -338,6 +338,54 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
cullComp.destroy();
|
||||
}
|
||||
|
||||
// HiZ-aware cull pipeline (Phase 6.3 Option B)
|
||||
// Uses set 0 (same as frustum-only) + set 1 (HiZ pyramid sampler from HiZSystem).
|
||||
// The HiZ descriptor set layout is created lazily when hizSystem_ is set, but the
|
||||
// pipeline layout and shader are created now if the shader is available.
|
||||
rendering::VkShaderModule cullHiZComp;
|
||||
if (cullHiZComp.loadFromFile(device, "assets/shaders/m2_cull_hiz.comp.spv")) {
|
||||
// HiZ cull set 1 layout: single combined image sampler (the HiZ pyramid)
|
||||
VkDescriptorSetLayoutBinding hizBinding{};
|
||||
hizBinding.binding = 0;
|
||||
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
hizBinding.descriptorCount = 1;
|
||||
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayout hizSamplerLayout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
hizLayoutCi.bindingCount = 1;
|
||||
hizLayoutCi.pBindings = &hizBinding;
|
||||
vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSamplerLayout);
|
||||
|
||||
VkDescriptorSetLayout hizSetLayouts[2] = {cullSetLayout_, hizSamplerLayout};
|
||||
VkPipelineLayoutCreateInfo hizPlCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||
hizPlCi.setLayoutCount = 2;
|
||||
hizPlCi.pSetLayouts = hizSetLayouts;
|
||||
vkCreatePipelineLayout(device, &hizPlCi, nullptr, &cullHiZPipelineLayout_);
|
||||
|
||||
VkComputePipelineCreateInfo hizCpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
||||
hizCpCi.stage = cullHiZComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
hizCpCi.layout = cullHiZPipelineLayout_;
|
||||
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &hizCpCi, nullptr, &cullHiZPipeline_) != VK_SUCCESS) {
|
||||
LOG_WARNING("M2Renderer: failed to create HiZ cull compute pipeline — HiZ disabled");
|
||||
cullHiZPipeline_ = VK_NULL_HANDLE;
|
||||
vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr);
|
||||
cullHiZPipelineLayout_ = VK_NULL_HANDLE;
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: HiZ occlusion cull pipeline created");
|
||||
}
|
||||
|
||||
// The hizSamplerLayout is now owned by the pipeline layout; we don't track it
|
||||
// separately because the pipeline layout keeps a ref. But actually Vulkan
|
||||
// requires us to keep it alive. Store it where HiZSystem will provide it.
|
||||
// For now, we can destroy it since the pipeline layout was already created.
|
||||
vkDestroyDescriptorSetLayout(device, hizSamplerLayout, nullptr);
|
||||
|
||||
cullHiZComp.destroy();
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: m2_cull_hiz.comp.spv not found — HiZ occlusion culling not available");
|
||||
}
|
||||
|
||||
// Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO)
|
||||
VkDescriptorPoolSize poolSizes[2] = {};
|
||||
poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2};
|
||||
|
|
@ -756,6 +804,14 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
return true;
|
||||
}
|
||||
|
||||
void M2Renderer::invalidateCullOutput(uint32_t frameIndex) {
|
||||
// On non-HOST_COHERENT memory, VMA-mapped GPU→CPU buffers need explicit
|
||||
// invalidation so the CPU cache sees the latest GPU writes.
|
||||
if (frameIndex < 2 && cullOutputAlloc_[frameIndex]) {
|
||||
vmaInvalidateAllocation(vkCtx_->getAllocator(), cullOutputAlloc_[frameIndex], 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
void M2Renderer::shutdown() {
|
||||
LOG_INFO("Shutting down M2 renderer...");
|
||||
if (!vkCtx_) return;
|
||||
|
|
@ -837,6 +893,8 @@ void M2Renderer::shutdown() {
|
|||
if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; }
|
||||
|
||||
// GPU frustum culling compute pipeline + buffers cleanup
|
||||
if (cullHiZPipeline_) { vkDestroyPipeline(device, cullHiZPipeline_, nullptr); cullHiZPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullHiZPipelineLayout_) { vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr); cullHiZPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
for (int i = 0; i < 2; i++) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue