mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-04-17 01:23:51 +00:00
feat(rendering): add HiZ occlusion culling & fix WMO interior shadows
Implement GPU-driven Hierarchical-Z occlusion culling for M2 doodads using a depth pyramid built from the previous frame's depth buffer. The cull shader projects bounding spheres via prevViewProj (temporal reprojection) and samples the HiZ pyramid to reject hidden objects before the main render pass. Key implementation details: - Separate early compute submission (beginSingleTimeCommands + fence wait) eliminates 2-frame visibility staleness - Conservative safeguards prevent false culls: screen-edge guard, full VP row-vector AABB projection (Cauchy-Schwarz), 50% sphere inflation, depth bias, mip+1, min screen size threshold, camera motion dampening (auto-disable on fast rotations), and per-instance previouslyVisible flag tracking - Graceful fallback to frustum-only culling if HiZ init fails Fix dark WMO interiors by gating shadow map sampling on isInterior==0 in the WMO fragment shader. Interior groups (flag 0x2000) now rely solely on pre-baked MOCV vertex-color lighting + MOHD ambient color. Disable interiorDarken globally (was incorrectly darkening outdoor M2s when camera was inside a WMO). Use isInsideInteriorWMO() instead of isInsideWMO() for correct indoor detection. New files: - hiz_system.hpp/cpp: pyramid image management, compute pipeline, descriptors, mip-chain build dispatch, resize handling - hiz_build.comp.glsl: MAX-depth 2x2 reduction compute shader - m2_cull_hiz.comp.glsl: frustum + HiZ occlusion cull compute shader - test_indoor_shadows.cpp: 14 unit tests for shadow/interior contracts Modified: - CullUniformsGPU expanded 128->272 bytes (HiZ params, viewProj, prevViewProj) - Depth buffer images gain VK_IMAGE_USAGE_SAMPLED_BIT for HiZ reads - wmo.frag.glsl: interior branch before unlit, shadow skip for 0x2000 - Render graph: hiz_build + compute_cull disabled (run in early compute) - .gitignore: ignore compiled .spv binaries - MEGA_BONE_MAX_INSTANCES: 2048 -> 4096 Signed-off-by: Pavel Okhlopkov <pavel.okhlopkov@flant.com>
This commit is contained in:
parent
17c1e3ea3b
commit
4b9b3026f4
17 changed files with 1317 additions and 35 deletions
|
|
@ -28,6 +28,7 @@ namespace rendering {
|
|||
class Camera;
|
||||
class VkContext;
|
||||
class VkTexture;
|
||||
class HiZSystem;
|
||||
|
||||
/**
|
||||
* GPU representation of an M2 model
|
||||
|
|
@ -299,6 +300,13 @@ public:
|
|||
void dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, const Camera& camera);
|
||||
void render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera);
|
||||
|
||||
/** Set the HiZ system for occlusion culling (Phase 6.3). nullptr disables HiZ. */
|
||||
void setHiZSystem(HiZSystem* hiz) { hizSystem_ = hiz; }
|
||||
|
||||
/** Ensure GPU→CPU cull output is visible to the host after a fence wait.
|
||||
* Call after the early compute submission finishes (endSingleTimeCommands). */
|
||||
void invalidateCullOutput(uint32_t frameIndex);
|
||||
|
||||
/**
|
||||
* Initialize shadow pipeline (Phase 7)
|
||||
*/
|
||||
|
|
@ -437,7 +445,7 @@ private:
|
|||
|
||||
// Mega bone SSBO — consolidates all per-instance bone matrices into a single buffer per frame.
|
||||
// Replaces per-instance bone SSBOs for fewer descriptor binds and enables GPU instancing.
|
||||
static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 2048;
|
||||
static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 4096;
|
||||
static constexpr uint32_t MAX_BONES_PER_INSTANCE = 128;
|
||||
::VkBuffer megaBoneBuffer_[2] = {};
|
||||
VmaAllocation megaBoneAlloc_[2] = {};
|
||||
|
|
@ -472,19 +480,26 @@ private:
|
|||
uint32_t flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap
|
||||
float _pad[2] = {};
|
||||
};
|
||||
struct CullUniformsGPU { // matches CullUniforms in m2_cull.comp.glsl (128 bytes, std140)
|
||||
glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance
|
||||
glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
|
||||
uint32_t instanceCount;
|
||||
uint32_t _pad[3] = {};
|
||||
};
|
||||
struct CullUniformsGPU { // matches CullUniforms in m2_cull_hiz.comp.glsl (std140)
|
||||
glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance (96 bytes)
|
||||
glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq (16 bytes)
|
||||
uint32_t instanceCount; // (4 bytes)
|
||||
uint32_t hizEnabled; // 1 = HiZ occlusion active (4 bytes)
|
||||
uint32_t hizMipLevels; // mip levels in HiZ pyramid (4 bytes)
|
||||
uint32_t _pad2 = {}; // (4 bytes)
|
||||
glm::vec4 hizParams; // x=pyramidW, y=pyramidH, z=nearPlane, w=unused (16 bytes)
|
||||
glm::mat4 viewProj; // current frame view-projection (64 bytes)
|
||||
glm::mat4 prevViewProj; // previous frame VP for HiZ reprojection (64 bytes)
|
||||
}; // Total: 272 bytes
|
||||
static constexpr uint32_t MAX_CULL_INSTANCES = 24576;
|
||||
VkPipeline cullPipeline_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE;
|
||||
VkPipeline cullPipeline_ = VK_NULL_HANDLE; // frustum-only (fallback)
|
||||
VkPipeline cullHiZPipeline_ = VK_NULL_HANDLE; // frustum + HiZ occlusion
|
||||
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE; // frustum-only layout (set 0)
|
||||
VkPipelineLayout cullHiZPipelineLayout_ = VK_NULL_HANDLE; // HiZ layout (set 0 + set 1)
|
||||
VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE;
|
||||
VkDescriptorPool cullDescPool_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSet cullSet_[2] = {}; // double-buffered
|
||||
::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera (UBO)
|
||||
::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera + HiZ params (UBO)
|
||||
VmaAllocation cullUniformAlloc_[2] = {};
|
||||
void* cullUniformMapped_[2] = {};
|
||||
::VkBuffer cullInputBuffer_[2] = {}; // per-instance bounding sphere + flags (SSBO)
|
||||
|
|
@ -494,6 +509,20 @@ private:
|
|||
VmaAllocation cullOutputAlloc_[2] = {};
|
||||
void* cullOutputMapped_[2] = {};
|
||||
|
||||
// HiZ occlusion culling (Phase 6.3) — optional, driven by Renderer
|
||||
HiZSystem* hizSystem_ = nullptr;
|
||||
|
||||
// Previous frame's view-projection for temporal reprojection in HiZ culling.
|
||||
// Stored each frame so the cull shader can project into the same screen space
|
||||
// as the depth buffer the HiZ pyramid was built from.
|
||||
glm::mat4 prevVP_{1.0f};
|
||||
|
||||
// Per-instance visibility from the previous frame. Used to set the
|
||||
// `previouslyVisible` flag (bit 3) on each CullInstance so the shader
|
||||
// skips the HiZ test for objects that weren't rendered last frame
|
||||
// (their depth data is unreliable).
|
||||
std::vector<uint8_t> prevFrameVisible_;
|
||||
|
||||
// Dynamic ribbon vertex buffer (CPU-written triangle strip)
|
||||
static constexpr size_t MAX_RIBBON_VERTS = 2048; // 9 floats each
|
||||
::VkBuffer ribbonVB_ = VK_NULL_HANDLE;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue