mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-05-02 15:53:51 +00:00
185 lines
7.6 KiB
Text
185 lines
7.6 KiB
Text
|
|
#version 450
|
||
|
|
|
||
|
|
// GPU Frustum + HiZ Occlusion Culling for M2 doodads (Phase 6.3).
|
||
|
|
//
|
||
|
|
// Two-level culling:
|
||
|
|
// 1. Frustum — current-frame planes from viewProj.
|
||
|
|
// 2. HiZ occlusion — projects bounding sphere into the PREVIOUS frame's
|
||
|
|
// screen space via prevViewProj and samples the Hierarchical-Z pyramid
|
||
|
|
// (built from said previous depth). Conservative safeguards:
|
||
|
|
// • Only objects that were visible last frame get the HiZ test.
|
||
|
|
// • AABB must be fully inside the screen (no border sampling).
|
||
|
|
// • Bounding sphere is inflated by 50 % for the HiZ AABB.
|
||
|
|
// • A depth bias is applied before the occlusion comparison.
|
||
|
|
// • Nearest depth is projected via prevViewProj from sphere center
|
||
|
|
// (avoids toCam mismatch between current and previous cameras).
|
||
|
|
//
|
||
|
|
// Falls back gracefully: if hizEnabled == 0, behaves identically to frustum-only.
|
||
|
|
|
||
|
|
layout(local_size_x = 64) in;
|
||
|
|
|
||
|
|
struct CullInstance {
|
||
|
|
vec4 sphere; // xyz = world position, w = padded radius
|
||
|
|
float effectiveMaxDistSq;
|
||
|
|
uint flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap,
|
||
|
|
// bit 3 = previouslyVisible
|
||
|
|
float _pad0;
|
||
|
|
float _pad1;
|
||
|
|
};
|
||
|
|
|
||
|
|
layout(std140, set = 0, binding = 0) uniform CullUniforms {
|
||
|
|
vec4 frustumPlanes[6];
|
||
|
|
vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
|
||
|
|
uint instanceCount;
|
||
|
|
uint hizEnabled;
|
||
|
|
uint hizMipLevels;
|
||
|
|
uint _pad2;
|
||
|
|
vec4 hizParams; // x = pyramidWidth, y = pyramidHeight, z = nearPlane, w = unused
|
||
|
|
mat4 viewProj; // current frame view-projection
|
||
|
|
mat4 prevViewProj; // PREVIOUS frame's view-projection for HiZ reprojection
|
||
|
|
};
|
||
|
|
|
||
|
|
layout(std430, set = 0, binding = 1) readonly buffer CullInput {
|
||
|
|
CullInstance cullInstances[];
|
||
|
|
};
|
||
|
|
|
||
|
|
layout(std430, set = 0, binding = 2) buffer CullOutput {
|
||
|
|
uint visibility[];
|
||
|
|
};
|
||
|
|
|
||
|
|
layout(set = 1, binding = 0) uniform sampler2D hizPyramid;
|
||
|
|
|
||
|
|
// Screen-edge margin — skip HiZ if the AABB touches this border.
|
||
|
|
// Depth data at screen edges is from unrelated geometry → false culls.
|
||
|
|
const float SCREEN_EDGE_MARGIN = 0.02;
|
||
|
|
|
||
|
|
// Sphere inflation factor for HiZ screen AABB (50 % larger → very conservative).
|
||
|
|
const float HIZ_SPHERE_INFLATE = 1.5;
|
||
|
|
|
||
|
|
// Depth bias — push nearest depth closer to camera so only objects
|
||
|
|
// significantly behind occluders are culled.
|
||
|
|
const float HIZ_DEPTH_BIAS = 0.02;
|
||
|
|
|
||
|
|
// Minimum screen-space size (pixels) for HiZ to engage.
|
||
|
|
const float HIZ_MIN_SCREEN_PX = 6.0;
|
||
|
|
|
||
|
|
void main() {
|
||
|
|
uint id = gl_GlobalInvocationID.x;
|
||
|
|
if (id >= instanceCount) return;
|
||
|
|
|
||
|
|
CullInstance inst = cullInstances[id];
|
||
|
|
|
||
|
|
// Flag check: must be valid, not smoke, not invisible trap
|
||
|
|
uint f = inst.flags;
|
||
|
|
if ((f & 1u) == 0u || (f & 6u) != 0u) {
|
||
|
|
visibility[id] = 0u;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Early distance rejection (loose upper bound)
|
||
|
|
vec3 toCam = inst.sphere.xyz - cameraPos.xyz;
|
||
|
|
float distSq = dot(toCam, toCam);
|
||
|
|
if (distSq > cameraPos.w) {
|
||
|
|
visibility[id] = 0u;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Accurate per-instance distance cull
|
||
|
|
if (distSq > inst.effectiveMaxDistSq) {
|
||
|
|
visibility[id] = 0u;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Frustum cull: sphere vs 6 planes (current frame)
|
||
|
|
float radius = inst.sphere.w;
|
||
|
|
if (radius > 0.0) {
|
||
|
|
for (int i = 0; i < 6; i++) {
|
||
|
|
float d = dot(frustumPlanes[i].xyz, inst.sphere.xyz) + frustumPlanes[i].w;
|
||
|
|
if (d < -radius) {
|
||
|
|
visibility[id] = 0u;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// --- HiZ Occlusion Test ---
|
||
|
|
// Skip for objects not rendered last frame (bit 3 = previouslyVisible).
|
||
|
|
bool previouslyVisible = (f & 8u) != 0u;
|
||
|
|
|
||
|
|
if (hizEnabled != 0u && radius > 0.0 && previouslyVisible) {
|
||
|
|
// Inflate sphere for conservative screen-space AABB
|
||
|
|
float hizRadius = radius * HIZ_SPHERE_INFLATE;
|
||
|
|
|
||
|
|
// Project sphere center into previous frame's clip space
|
||
|
|
vec4 clipCenter = prevViewProj * vec4(inst.sphere.xyz, 1.0);
|
||
|
|
if (clipCenter.w > 0.0) {
|
||
|
|
vec3 ndc = clipCenter.xyz / clipCenter.w;
|
||
|
|
|
||
|
|
// --- Correct sphere → screen AABB using VP row-vector lengths ---
|
||
|
|
// The maximum screen-space extent of a world-space sphere is
|
||
|
|
// maxDeltaNdcX = R * ‖row_x(VP)‖ / w
|
||
|
|
// where row_x = (VP[0][0], VP[1][0], VP[2][0]) maps world XYZ
|
||
|
|
// offsets to clip-X. Using only the diagonal element (VP[0][0])
|
||
|
|
// underestimates the footprint when the camera is rotated,
|
||
|
|
// causing false culls at certain view angles.
|
||
|
|
float rowLenX = length(vec3(prevViewProj[0][0],
|
||
|
|
prevViewProj[1][0],
|
||
|
|
prevViewProj[2][0]));
|
||
|
|
float rowLenY = length(vec3(prevViewProj[0][1],
|
||
|
|
prevViewProj[1][1],
|
||
|
|
prevViewProj[2][1]));
|
||
|
|
float projRadX = hizRadius * rowLenX / clipCenter.w;
|
||
|
|
float projRadY = hizRadius * rowLenY / clipCenter.w;
|
||
|
|
float projRad = max(projRadX, projRadY);
|
||
|
|
|
||
|
|
vec2 uvCenter = ndc.xy * 0.5 + 0.5;
|
||
|
|
float uvRad = projRad * 0.5;
|
||
|
|
vec2 uvMin = uvCenter - uvRad;
|
||
|
|
vec2 uvMax = uvCenter + uvRad;
|
||
|
|
|
||
|
|
// **Screen-edge guard**: skip if AABB extends outside safe area.
|
||
|
|
// Depth data at borders is from unrelated geometry.
|
||
|
|
if (uvMin.x >= SCREEN_EDGE_MARGIN && uvMin.y >= SCREEN_EDGE_MARGIN &&
|
||
|
|
uvMax.x <= (1.0 - SCREEN_EDGE_MARGIN) && uvMax.y <= (1.0 - SCREEN_EDGE_MARGIN) &&
|
||
|
|
uvMax.x > uvMin.x && uvMax.y > uvMin.y)
|
||
|
|
{
|
||
|
|
float aabbW = (uvMax.x - uvMin.x) * hizParams.x;
|
||
|
|
float aabbH = (uvMax.y - uvMin.y) * hizParams.y;
|
||
|
|
float screenSize = max(aabbW, aabbH);
|
||
|
|
|
||
|
|
if (screenSize >= HIZ_MIN_SCREEN_PX) {
|
||
|
|
// Mip level: +1 for conservatism (coarser = bigger depth footprint)
|
||
|
|
float mipLevel = ceil(log2(max(screenSize, 1.0))) + 1.0;
|
||
|
|
mipLevel = clamp(mipLevel, 0.0, float(hizMipLevels - 1u));
|
||
|
|
|
||
|
|
// Sample HiZ at 4 corners — take MAX (farthest occluder)
|
||
|
|
float pz0 = textureLod(hizPyramid, uvMin, mipLevel).r;
|
||
|
|
float pz1 = textureLod(hizPyramid, vec2(uvMax.x, uvMin.y), mipLevel).r;
|
||
|
|
float pz2 = textureLod(hizPyramid, vec2(uvMin.x, uvMax.y), mipLevel).r;
|
||
|
|
float pz3 = textureLod(hizPyramid, uvMax, mipLevel).r;
|
||
|
|
float pyramidDepth = max(max(pz0, pz1), max(pz2, pz3));
|
||
|
|
|
||
|
|
// Nearest depth: project sphere center's NDC-Z then subtract
|
||
|
|
// the sphere's depth range. The depth span uses the Z-row
|
||
|
|
// length of VP (same Cauchy-Schwarz reasoning as X/Y), giving
|
||
|
|
// the correct NDC-Z extent regardless of camera orientation.
|
||
|
|
float rowLenZ = length(vec3(prevViewProj[0][2],
|
||
|
|
prevViewProj[1][2],
|
||
|
|
prevViewProj[2][2]));
|
||
|
|
float depthSpan = hizRadius * rowLenZ / clipCenter.w;
|
||
|
|
float centerDepth = ndc.z;
|
||
|
|
float nearestDepth = centerDepth - depthSpan - HIZ_DEPTH_BIAS;
|
||
|
|
|
||
|
|
if (nearestDepth > pyramidDepth && pyramidDepth < 1.0) {
|
||
|
|
visibility[id] = 0u;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// fallthrough: conservatively visible
|
||
|
|
}
|
||
|
|
|
||
|
|
visibility[id] = 1u;
|
||
|
|
}
|