Kelsidavis-WoWee/assets/shaders/m2_cull_hiz.comp.glsl
Pavel Okhlopkov 4b9b3026f4 feat(rendering): add HiZ occlusion culling & fix WMO interior shadows
Implement GPU-driven Hierarchical-Z occlusion culling for M2 doodads
using a depth pyramid built from the previous frame's depth buffer.
The cull shader projects bounding spheres via prevViewProj (temporal
reprojection) and samples the HiZ pyramid to reject hidden objects
before the main render pass.

Key implementation details:
- Separate early compute submission (beginSingleTimeCommands + fence
  wait) eliminates 2-frame visibility staleness
- Conservative safeguards prevent false culls: screen-edge guard,
  full VP row-vector AABB projection (Cauchy-Schwarz), 50% sphere
  inflation, depth bias, mip+1, min screen size threshold, camera
  motion dampening (auto-disable on fast rotations), and per-instance
  previouslyVisible flag tracking
- Graceful fallback to frustum-only culling if HiZ init fails

Fix dark WMO interiors by gating shadow map sampling on isInterior==0
in the WMO fragment shader. Interior groups (flag 0x2000) now rely
solely on pre-baked MOCV vertex-color lighting + MOHD ambient color.
Disable interiorDarken globally (was incorrectly darkening outdoor M2s
when camera was inside a WMO). Use isInsideInteriorWMO() instead of
isInsideWMO() for correct indoor detection.

New files:
- hiz_system.hpp/cpp: pyramid image management, compute pipeline,
  descriptors, mip-chain build dispatch, resize handling
- hiz_build.comp.glsl: MAX-depth 2x2 reduction compute shader
- m2_cull_hiz.comp.glsl: frustum + HiZ occlusion cull compute shader
- test_indoor_shadows.cpp: 14 unit tests for shadow/interior contracts

Modified:
- CullUniformsGPU expanded 128->272 bytes (HiZ params, viewProj,
  prevViewProj)
- Depth buffer images gain VK_IMAGE_USAGE_SAMPLED_BIT for HiZ reads
- wmo.frag.glsl: interior branch before unlit, shadow skip for 0x2000
- Render graph: hiz_build + compute_cull disabled (run in early compute)
- .gitignore: ignore compiled .spv binaries
- MEGA_BONE_MAX_INSTANCES: 2048 -> 4096

Signed-off-by: Pavel Okhlopkov <pavel.okhlopkov@flant.com>
2026-04-06 16:40:59 +03:00

184 lines
7.6 KiB
GLSL

#version 450
// GPU Frustum + HiZ Occlusion Culling for M2 doodads (Phase 6.3).
//
// Two-level culling:
// 1. Frustum — current-frame planes from viewProj.
// 2. HiZ occlusion — projects bounding sphere into the PREVIOUS frame's
// screen space via prevViewProj and samples the Hierarchical-Z pyramid
// (built from said previous depth). Conservative safeguards:
// • Only objects that were visible last frame get the HiZ test.
// • AABB must be fully inside the screen (no border sampling).
// • Bounding sphere is inflated by 50 % for the HiZ AABB.
// • A depth bias is applied before the occlusion comparison.
// • Nearest depth is projected via prevViewProj from sphere center
// (avoids toCam mismatch between current and previous cameras).
//
// Falls back gracefully: if hizEnabled == 0, behaves identically to frustum-only.
layout(local_size_x = 64) in;
struct CullInstance {
vec4 sphere; // xyz = world position, w = padded radius
float effectiveMaxDistSq;
uint flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap,
// bit 3 = previouslyVisible
float _pad0;
float _pad1;
};
layout(std140, set = 0, binding = 0) uniform CullUniforms {
vec4 frustumPlanes[6];
vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
uint instanceCount;
uint hizEnabled;
uint hizMipLevels;
uint _pad2;
vec4 hizParams; // x = pyramidWidth, y = pyramidHeight, z = nearPlane, w = unused
mat4 viewProj; // current frame view-projection
mat4 prevViewProj; // PREVIOUS frame's view-projection for HiZ reprojection
};
layout(std430, set = 0, binding = 1) readonly buffer CullInput {
CullInstance cullInstances[];
};
layout(std430, set = 0, binding = 2) buffer CullOutput {
uint visibility[];
};
layout(set = 1, binding = 0) uniform sampler2D hizPyramid;
// Screen-edge margin — skip HiZ if the AABB touches this border.
// Depth data at screen edges is from unrelated geometry → false culls.
const float SCREEN_EDGE_MARGIN = 0.02;
// Sphere inflation factor for HiZ screen AABB (50 % larger → very conservative).
const float HIZ_SPHERE_INFLATE = 1.5;
// Depth bias — push nearest depth closer to camera so only objects
// significantly behind occluders are culled.
const float HIZ_DEPTH_BIAS = 0.02;
// Minimum screen-space size (pixels) for HiZ to engage.
const float HIZ_MIN_SCREEN_PX = 6.0;
void main() {
uint id = gl_GlobalInvocationID.x;
if (id >= instanceCount) return;
CullInstance inst = cullInstances[id];
// Flag check: must be valid, not smoke, not invisible trap
uint f = inst.flags;
if ((f & 1u) == 0u || (f & 6u) != 0u) {
visibility[id] = 0u;
return;
}
// Early distance rejection (loose upper bound)
vec3 toCam = inst.sphere.xyz - cameraPos.xyz;
float distSq = dot(toCam, toCam);
if (distSq > cameraPos.w) {
visibility[id] = 0u;
return;
}
// Accurate per-instance distance cull
if (distSq > inst.effectiveMaxDistSq) {
visibility[id] = 0u;
return;
}
// Frustum cull: sphere vs 6 planes (current frame)
float radius = inst.sphere.w;
if (radius > 0.0) {
for (int i = 0; i < 6; i++) {
float d = dot(frustumPlanes[i].xyz, inst.sphere.xyz) + frustumPlanes[i].w;
if (d < -radius) {
visibility[id] = 0u;
return;
}
}
}
// --- HiZ Occlusion Test ---
// Skip for objects not rendered last frame (bit 3 = previouslyVisible).
bool previouslyVisible = (f & 8u) != 0u;
if (hizEnabled != 0u && radius > 0.0 && previouslyVisible) {
// Inflate sphere for conservative screen-space AABB
float hizRadius = radius * HIZ_SPHERE_INFLATE;
// Project sphere center into previous frame's clip space
vec4 clipCenter = prevViewProj * vec4(inst.sphere.xyz, 1.0);
if (clipCenter.w > 0.0) {
vec3 ndc = clipCenter.xyz / clipCenter.w;
// --- Correct sphere → screen AABB using VP row-vector lengths ---
// The maximum screen-space extent of a world-space sphere is
// maxDeltaNdcX = R * ‖row_x(VP)‖ / w
// where row_x = (VP[0][0], VP[1][0], VP[2][0]) maps world XYZ
// offsets to clip-X. Using only the diagonal element (VP[0][0])
// underestimates the footprint when the camera is rotated,
// causing false culls at certain view angles.
float rowLenX = length(vec3(prevViewProj[0][0],
prevViewProj[1][0],
prevViewProj[2][0]));
float rowLenY = length(vec3(prevViewProj[0][1],
prevViewProj[1][1],
prevViewProj[2][1]));
float projRadX = hizRadius * rowLenX / clipCenter.w;
float projRadY = hizRadius * rowLenY / clipCenter.w;
float projRad = max(projRadX, projRadY);
vec2 uvCenter = ndc.xy * 0.5 + 0.5;
float uvRad = projRad * 0.5;
vec2 uvMin = uvCenter - uvRad;
vec2 uvMax = uvCenter + uvRad;
// **Screen-edge guard**: skip if AABB extends outside safe area.
// Depth data at borders is from unrelated geometry.
if (uvMin.x >= SCREEN_EDGE_MARGIN && uvMin.y >= SCREEN_EDGE_MARGIN &&
uvMax.x <= (1.0 - SCREEN_EDGE_MARGIN) && uvMax.y <= (1.0 - SCREEN_EDGE_MARGIN) &&
uvMax.x > uvMin.x && uvMax.y > uvMin.y)
{
float aabbW = (uvMax.x - uvMin.x) * hizParams.x;
float aabbH = (uvMax.y - uvMin.y) * hizParams.y;
float screenSize = max(aabbW, aabbH);
if (screenSize >= HIZ_MIN_SCREEN_PX) {
// Mip level: +1 for conservatism (coarser = bigger depth footprint)
float mipLevel = ceil(log2(max(screenSize, 1.0))) + 1.0;
mipLevel = clamp(mipLevel, 0.0, float(hizMipLevels - 1u));
// Sample HiZ at 4 corners — take MAX (farthest occluder)
float pz0 = textureLod(hizPyramid, uvMin, mipLevel).r;
float pz1 = textureLod(hizPyramid, vec2(uvMax.x, uvMin.y), mipLevel).r;
float pz2 = textureLod(hizPyramid, vec2(uvMin.x, uvMax.y), mipLevel).r;
float pz3 = textureLod(hizPyramid, uvMax, mipLevel).r;
float pyramidDepth = max(max(pz0, pz1), max(pz2, pz3));
// Nearest depth: project sphere center's NDC-Z then subtract
// the sphere's depth range. The depth span uses the Z-row
// length of VP (same Cauchy-Schwarz reasoning as X/Y), giving
// the correct NDC-Z extent regardless of camera orientation.
float rowLenZ = length(vec3(prevViewProj[0][2],
prevViewProj[1][2],
prevViewProj[2][2]));
float depthSpan = hizRadius * rowLenZ / clipCenter.w;
float centerDepth = ndc.z;
float nearestDepth = centerDepth - depthSpan - HIZ_DEPTH_BIAS;
if (nearestDepth > pyramidDepth && pyramidDepth < 1.0) {
visibility[id] = 0u;
return;
}
}
}
}
// fallthrough: conservatively visible
}
visibility[id] = 1u;
}