mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-04-14 08:23:52 +00:00
feat(rendering): add HiZ occlusion culling & fix WMO interior shadows
Implement GPU-driven Hierarchical-Z occlusion culling for M2 doodads using a depth pyramid built from the previous frame's depth buffer. The cull shader projects bounding spheres via prevViewProj (temporal reprojection) and samples the HiZ pyramid to reject hidden objects before the main render pass. Key implementation details: - Separate early compute submission (beginSingleTimeCommands + fence wait) eliminates 2-frame visibility staleness - Conservative safeguards prevent false culls: screen-edge guard, full VP row-vector AABB projection (Cauchy-Schwarz), 50% sphere inflation, depth bias, mip+1, min screen size threshold, camera motion dampening (auto-disable on fast rotations), and per-instance previouslyVisible flag tracking - Graceful fallback to frustum-only culling if HiZ init fails Fix dark WMO interiors by gating shadow map sampling on isInterior==0 in the WMO fragment shader. Interior groups (flag 0x2000) now rely solely on pre-baked MOCV vertex-color lighting + MOHD ambient color. Disable interiorDarken globally (was incorrectly darkening outdoor M2s when camera was inside a WMO). Use isInsideInteriorWMO() instead of isInsideWMO() for correct indoor detection. New files: - hiz_system.hpp/cpp: pyramid image management, compute pipeline, descriptors, mip-chain build dispatch, resize handling - hiz_build.comp.glsl: MAX-depth 2x2 reduction compute shader - m2_cull_hiz.comp.glsl: frustum + HiZ occlusion cull compute shader - test_indoor_shadows.cpp: 14 unit tests for shadow/interior contracts Modified: - CullUniformsGPU expanded 128->272 bytes (HiZ params, viewProj, prevViewProj) - Depth buffer images gain VK_IMAGE_USAGE_SAMPLED_BIT for HiZ reads - wmo.frag.glsl: interior branch before unlit, shadow skip for 0x2000 - Render graph: hiz_build + compute_cull disabled (run in early compute) - .gitignore: ignore compiled .spv binaries - MEGA_BONE_MAX_INSTANCES: 2048 -> 4096 Signed-off-by: Pavel Okhlopkov <pavel.okhlopkov@flant.com>
This commit is contained in:
parent
17c1e3ea3b
commit
4b9b3026f4
17 changed files with 1317 additions and 35 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -19,6 +19,7 @@ Makefile
|
|||
*.obj
|
||||
*.slo
|
||||
*.lo
|
||||
*.spv
|
||||
|
||||
# Compiled Dynamic libraries
|
||||
*.so
|
||||
|
|
|
|||
|
|
@ -624,6 +624,7 @@ set(WOWEE_SOURCES
|
|||
src/rendering/m2_renderer_instance.cpp
|
||||
src/rendering/m2_model_classifier.cpp
|
||||
src/rendering/render_graph.cpp
|
||||
src/rendering/hiz_system.cpp
|
||||
src/rendering/quest_marker_renderer.cpp
|
||||
src/rendering/minimap.cpp
|
||||
src/rendering/world_map.cpp
|
||||
|
|
|
|||
57
assets/shaders/hiz_build.comp.glsl
Normal file
57
assets/shaders/hiz_build.comp.glsl
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
#version 450
|
||||
|
||||
// Hierarchical-Z depth pyramid builder.
|
||||
// Builds successive mip levels from the scene depth buffer.
|
||||
// Each 2×2 block is reduced to its MAXIMUM depth (farthest/largest value).
|
||||
// This is conservative for occlusion: an object is only culled when its nearest
|
||||
// depth exceeds the farthest occluder depth in the pyramid region.
|
||||
//
|
||||
// Two modes controlled by push constant:
|
||||
// mipLevel == 0: Sample from the source depth texture (mip 0 of the full-res depth).
|
||||
// mipLevel > 0: Sample from the previous HiZ mip level.
|
||||
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
// Source depth texture (full-resolution scene depth, or previous mip via same image)
|
||||
layout(set = 0, binding = 0) uniform sampler2D srcDepth;
|
||||
|
||||
// Destination mip level (written as storage image)
|
||||
layout(r32f, set = 0, binding = 1) uniform writeonly image2D dstMip;
|
||||
|
||||
layout(push_constant) uniform PushConstants {
|
||||
ivec2 dstSize; // Width and height of the destination mip level
|
||||
int mipLevel; // Current mip level being built (0 = from scene depth)
|
||||
};
|
||||
|
||||
void main() {
|
||||
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
|
||||
if (pos.x >= dstSize.x || pos.y >= dstSize.y) return;
|
||||
|
||||
// Each output texel covers a 2×2 block of the source.
|
||||
// Use texelFetch for precise texel access (no filtering).
|
||||
ivec2 srcPos = pos * 2;
|
||||
|
||||
float d00, d10, d01, d11;
|
||||
|
||||
if (mipLevel == 0) {
|
||||
// Sample from full-res scene depth (sampler2D, lod 0)
|
||||
d00 = texelFetch(srcDepth, srcPos + ivec2(0, 0), 0).r;
|
||||
d10 = texelFetch(srcDepth, srcPos + ivec2(1, 0), 0).r;
|
||||
d01 = texelFetch(srcDepth, srcPos + ivec2(0, 1), 0).r;
|
||||
d11 = texelFetch(srcDepth, srcPos + ivec2(1, 1), 0).r;
|
||||
} else {
|
||||
// Sample from previous HiZ mip level (mipLevel - 1)
|
||||
d00 = texelFetch(srcDepth, srcPos + ivec2(0, 0), mipLevel - 1).r;
|
||||
d10 = texelFetch(srcDepth, srcPos + ivec2(1, 0), mipLevel - 1).r;
|
||||
d01 = texelFetch(srcDepth, srcPos + ivec2(0, 1), mipLevel - 1).r;
|
||||
d11 = texelFetch(srcDepth, srcPos + ivec2(1, 1), mipLevel - 1).r;
|
||||
}
|
||||
|
||||
// Conservative maximum (standard depth buffer: 0=near, 1=far).
|
||||
// We store the farthest (largest) depth in each 2×2 block.
|
||||
// An object is occluded only when its nearest depth > the farthest occluder
|
||||
// depth in the covered screen region — guaranteeing it's behind EVERYTHING.
|
||||
float maxDepth = max(max(d00, d10), max(d01, d11));
|
||||
|
||||
imageStore(dstMip, pos, vec4(maxDepth));
|
||||
}
|
||||
184
assets/shaders/m2_cull_hiz.comp.glsl
Normal file
184
assets/shaders/m2_cull_hiz.comp.glsl
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
#version 450
|
||||
|
||||
// GPU Frustum + HiZ Occlusion Culling for M2 doodads (Phase 6.3).
|
||||
//
|
||||
// Two-level culling:
|
||||
// 1. Frustum — current-frame planes from viewProj.
|
||||
// 2. HiZ occlusion — projects bounding sphere into the PREVIOUS frame's
|
||||
// screen space via prevViewProj and samples the Hierarchical-Z pyramid
|
||||
// (built from said previous depth). Conservative safeguards:
|
||||
// • Only objects that were visible last frame get the HiZ test.
|
||||
// • AABB must be fully inside the screen (no border sampling).
|
||||
// • Bounding sphere is inflated by 50 % for the HiZ AABB.
|
||||
// • A depth bias is applied before the occlusion comparison.
|
||||
// • Nearest depth is projected via prevViewProj from sphere center
|
||||
// (avoids toCam mismatch between current and previous cameras).
|
||||
//
|
||||
// Falls back gracefully: if hizEnabled == 0, behaves identically to frustum-only.
|
||||
|
||||
layout(local_size_x = 64) in;
|
||||
|
||||
struct CullInstance {
|
||||
vec4 sphere; // xyz = world position, w = padded radius
|
||||
float effectiveMaxDistSq;
|
||||
uint flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap,
|
||||
// bit 3 = previouslyVisible
|
||||
float _pad0;
|
||||
float _pad1;
|
||||
};
|
||||
|
||||
layout(std140, set = 0, binding = 0) uniform CullUniforms {
|
||||
vec4 frustumPlanes[6];
|
||||
vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
|
||||
uint instanceCount;
|
||||
uint hizEnabled;
|
||||
uint hizMipLevels;
|
||||
uint _pad2;
|
||||
vec4 hizParams; // x = pyramidWidth, y = pyramidHeight, z = nearPlane, w = unused
|
||||
mat4 viewProj; // current frame view-projection
|
||||
mat4 prevViewProj; // PREVIOUS frame's view-projection for HiZ reprojection
|
||||
};
|
||||
|
||||
layout(std430, set = 0, binding = 1) readonly buffer CullInput {
|
||||
CullInstance cullInstances[];
|
||||
};
|
||||
|
||||
layout(std430, set = 0, binding = 2) buffer CullOutput {
|
||||
uint visibility[];
|
||||
};
|
||||
|
||||
layout(set = 1, binding = 0) uniform sampler2D hizPyramid;
|
||||
|
||||
// Screen-edge margin — skip HiZ if the AABB touches this border.
|
||||
// Depth data at screen edges is from unrelated geometry → false culls.
|
||||
const float SCREEN_EDGE_MARGIN = 0.02;
|
||||
|
||||
// Sphere inflation factor for HiZ screen AABB (50 % larger → very conservative).
|
||||
const float HIZ_SPHERE_INFLATE = 1.5;
|
||||
|
||||
// Depth bias — push nearest depth closer to camera so only objects
|
||||
// significantly behind occluders are culled.
|
||||
const float HIZ_DEPTH_BIAS = 0.02;
|
||||
|
||||
// Minimum screen-space size (pixels) for HiZ to engage.
|
||||
const float HIZ_MIN_SCREEN_PX = 6.0;
|
||||
|
||||
void main() {
|
||||
uint id = gl_GlobalInvocationID.x;
|
||||
if (id >= instanceCount) return;
|
||||
|
||||
CullInstance inst = cullInstances[id];
|
||||
|
||||
// Flag check: must be valid, not smoke, not invisible trap
|
||||
uint f = inst.flags;
|
||||
if ((f & 1u) == 0u || (f & 6u) != 0u) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
|
||||
// Early distance rejection (loose upper bound)
|
||||
vec3 toCam = inst.sphere.xyz - cameraPos.xyz;
|
||||
float distSq = dot(toCam, toCam);
|
||||
if (distSq > cameraPos.w) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
|
||||
// Accurate per-instance distance cull
|
||||
if (distSq > inst.effectiveMaxDistSq) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
|
||||
// Frustum cull: sphere vs 6 planes (current frame)
|
||||
float radius = inst.sphere.w;
|
||||
if (radius > 0.0) {
|
||||
for (int i = 0; i < 6; i++) {
|
||||
float d = dot(frustumPlanes[i].xyz, inst.sphere.xyz) + frustumPlanes[i].w;
|
||||
if (d < -radius) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- HiZ Occlusion Test ---
|
||||
// Skip for objects not rendered last frame (bit 3 = previouslyVisible).
|
||||
bool previouslyVisible = (f & 8u) != 0u;
|
||||
|
||||
if (hizEnabled != 0u && radius > 0.0 && previouslyVisible) {
|
||||
// Inflate sphere for conservative screen-space AABB
|
||||
float hizRadius = radius * HIZ_SPHERE_INFLATE;
|
||||
|
||||
// Project sphere center into previous frame's clip space
|
||||
vec4 clipCenter = prevViewProj * vec4(inst.sphere.xyz, 1.0);
|
||||
if (clipCenter.w > 0.0) {
|
||||
vec3 ndc = clipCenter.xyz / clipCenter.w;
|
||||
|
||||
// --- Correct sphere → screen AABB using VP row-vector lengths ---
|
||||
// The maximum screen-space extent of a world-space sphere is
|
||||
// maxDeltaNdcX = R * ‖row_x(VP)‖ / w
|
||||
// where row_x = (VP[0][0], VP[1][0], VP[2][0]) maps world XYZ
|
||||
// offsets to clip-X. Using only the diagonal element (VP[0][0])
|
||||
// underestimates the footprint when the camera is rotated,
|
||||
// causing false culls at certain view angles.
|
||||
float rowLenX = length(vec3(prevViewProj[0][0],
|
||||
prevViewProj[1][0],
|
||||
prevViewProj[2][0]));
|
||||
float rowLenY = length(vec3(prevViewProj[0][1],
|
||||
prevViewProj[1][1],
|
||||
prevViewProj[2][1]));
|
||||
float projRadX = hizRadius * rowLenX / clipCenter.w;
|
||||
float projRadY = hizRadius * rowLenY / clipCenter.w;
|
||||
float projRad = max(projRadX, projRadY);
|
||||
|
||||
vec2 uvCenter = ndc.xy * 0.5 + 0.5;
|
||||
float uvRad = projRad * 0.5;
|
||||
vec2 uvMin = uvCenter - uvRad;
|
||||
vec2 uvMax = uvCenter + uvRad;
|
||||
|
||||
// **Screen-edge guard**: skip if AABB extends outside safe area.
|
||||
// Depth data at borders is from unrelated geometry.
|
||||
if (uvMin.x >= SCREEN_EDGE_MARGIN && uvMin.y >= SCREEN_EDGE_MARGIN &&
|
||||
uvMax.x <= (1.0 - SCREEN_EDGE_MARGIN) && uvMax.y <= (1.0 - SCREEN_EDGE_MARGIN) &&
|
||||
uvMax.x > uvMin.x && uvMax.y > uvMin.y)
|
||||
{
|
||||
float aabbW = (uvMax.x - uvMin.x) * hizParams.x;
|
||||
float aabbH = (uvMax.y - uvMin.y) * hizParams.y;
|
||||
float screenSize = max(aabbW, aabbH);
|
||||
|
||||
if (screenSize >= HIZ_MIN_SCREEN_PX) {
|
||||
// Mip level: +1 for conservatism (coarser = bigger depth footprint)
|
||||
float mipLevel = ceil(log2(max(screenSize, 1.0))) + 1.0;
|
||||
mipLevel = clamp(mipLevel, 0.0, float(hizMipLevels - 1u));
|
||||
|
||||
// Sample HiZ at 4 corners — take MAX (farthest occluder)
|
||||
float pz0 = textureLod(hizPyramid, uvMin, mipLevel).r;
|
||||
float pz1 = textureLod(hizPyramid, vec2(uvMax.x, uvMin.y), mipLevel).r;
|
||||
float pz2 = textureLod(hizPyramid, vec2(uvMin.x, uvMax.y), mipLevel).r;
|
||||
float pz3 = textureLod(hizPyramid, uvMax, mipLevel).r;
|
||||
float pyramidDepth = max(max(pz0, pz1), max(pz2, pz3));
|
||||
|
||||
// Nearest depth: project sphere center's NDC-Z then subtract
|
||||
// the sphere's depth range. The depth span uses the Z-row
|
||||
// length of VP (same Cauchy-Schwarz reasoning as X/Y), giving
|
||||
// the correct NDC-Z extent regardless of camera orientation.
|
||||
float rowLenZ = length(vec3(prevViewProj[0][2],
|
||||
prevViewProj[1][2],
|
||||
prevViewProj[2][2]));
|
||||
float depthSpan = hizRadius * rowLenZ / clipCenter.w;
|
||||
float centerDepth = ndc.z;
|
||||
float nearestDepth = centerDepth - depthSpan - HIZ_DEPTH_BIAS;
|
||||
|
||||
if (nearestDepth > pyramidDepth && pyramidDepth < 1.0) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// fallthrough: conservatively visible
|
||||
}
|
||||
|
||||
visibility[id] = 1u;
|
||||
}
|
||||
|
|
@ -163,10 +163,11 @@ void main() {
|
|||
|
||||
vec3 result;
|
||||
|
||||
// Sample shadow map for all WMO groups (interior groups with 0x2000 flag
|
||||
// include covered outdoor areas like archways/streets that should receive shadows)
|
||||
// Sample shadow map — skip entirely for interior groups (flag 0x2000).
|
||||
// Interior surfaces rely on pre-baked MOCV vertex-color lighting and the
|
||||
// directional shadow map only makes them darker without any benefit.
|
||||
float shadow = 1.0;
|
||||
if (shadowParams.x > 0.5) {
|
||||
if (isInterior == 0 && shadowParams.x > 0.5) {
|
||||
vec3 ldir = normalize(-lightDir.xyz);
|
||||
float normalOffset = SHADOW_TEXEL * 2.0 * (1.0 - abs(dot(norm, ldir)));
|
||||
vec3 biasedPos = FragPos + norm * normalOffset;
|
||||
|
|
@ -185,17 +186,20 @@ void main() {
|
|||
if (isLava != 0) {
|
||||
// Lava is self-luminous — bright emissive, no shadows
|
||||
result = texColor.rgb * 1.5;
|
||||
} else if (unlit != 0) {
|
||||
result = texColor.rgb * shadow;
|
||||
} else if (isInterior != 0) {
|
||||
// WMO interior: vertex colors (MOCV) are pre-baked lighting from the artist.
|
||||
// The MOHD ambient color tints/floors the vertex colors so dark spots don't
|
||||
// go completely black, matching the WoW client's interior shading.
|
||||
// We handle BOTH lit and unlit interior materials — directional
|
||||
// sun shadows and lighting are skipped for all interior groups.
|
||||
vec3 wmoAmbient = vec3(wmoAmbientR, wmoAmbientG, wmoAmbientB);
|
||||
// Clamp ambient to at least 0.3 to avoid total darkness when MOHD color is zero
|
||||
wmoAmbient = max(wmoAmbient, vec3(0.3));
|
||||
vec3 mocv = max(VertColor.rgb, wmoAmbient);
|
||||
result = texColor.rgb * mocv * shadow;
|
||||
result = texColor.rgb * mocv;
|
||||
} else if (unlit != 0) {
|
||||
// Outdoor unlit surface — still receives directional shadows
|
||||
result = texColor.rgb * shadow;
|
||||
} else {
|
||||
vec3 ldir = normalize(-lightDir.xyz);
|
||||
float diff = max(dot(norm, ldir), 0.0);
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -83,6 +83,7 @@ public:
|
|||
bool isSitting() const { return sitting; }
|
||||
bool isSwimming() const { return swimming; }
|
||||
bool isInsideWMO() const { return cachedInsideWMO; }
|
||||
bool isInsideInteriorWMO() const { return cachedInsideInteriorWMO; }
|
||||
void setGrounded(bool g) { grounded = g; }
|
||||
void setSitting(bool s) { sitting = s; }
|
||||
bool isOnTaxi() const { return externalFollow_; }
|
||||
|
|
|
|||
150
include/rendering/hiz_system.hpp
Normal file
150
include/rendering/hiz_system.hpp
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
#pragma once
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <vk_mem_alloc.h>
|
||||
#include <glm/glm.hpp>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace wowee {
|
||||
namespace rendering {
|
||||
|
||||
class VkContext;
|
||||
|
||||
/**
|
||||
* Hierarchical-Z (HiZ) depth pyramid for GPU occlusion culling (Phase 6.3 Option B).
|
||||
*
|
||||
* Builds a min-depth mip chain from the previous frame's depth buffer each frame.
|
||||
* The M2 cull compute shader samples this pyramid to reject objects hidden behind
|
||||
* geometry, complementing the existing frustum culling.
|
||||
*
|
||||
* Lifecycle:
|
||||
* initialize() — create pyramid image, sampler, compute pipeline, descriptors
|
||||
* buildPyramid() — dispatch compute to reduce depth → mip chain (once per frame)
|
||||
* shutdown() — destroy all Vulkan resources
|
||||
*
|
||||
* The pyramid is double-buffered (per frame-in-flight) so builds and reads
|
||||
* never race across concurrent GPU submissions.
|
||||
*/
|
||||
class HiZSystem {
|
||||
public:
|
||||
HiZSystem() = default;
|
||||
~HiZSystem();
|
||||
|
||||
HiZSystem(const HiZSystem&) = delete;
|
||||
HiZSystem& operator=(const HiZSystem&) = delete;
|
||||
|
||||
/**
|
||||
* Create all Vulkan resources.
|
||||
* @param ctx Vulkan context (device, allocator, etc.)
|
||||
* @param width Full-resolution depth buffer width
|
||||
* @param height Full-resolution depth buffer height
|
||||
* @return true on success
|
||||
*/
|
||||
[[nodiscard]] bool initialize(VkContext* ctx, uint32_t width, uint32_t height);
|
||||
|
||||
/**
|
||||
* Release all Vulkan resources.
|
||||
*/
|
||||
void shutdown();
|
||||
|
||||
/**
|
||||
* Rebuild the pyramid after a swapchain resize.
|
||||
* Safe to call repeatedly — destroys old resources first.
|
||||
*/
|
||||
[[nodiscard]] bool resize(uint32_t width, uint32_t height);
|
||||
|
||||
/**
|
||||
* Dispatch compute shader to build the HiZ pyramid from the current depth buffer.
|
||||
* Must be called AFTER the main scene pass has finished writing to the depth buffer.
|
||||
*
|
||||
* @param cmd Active command buffer (in recording state)
|
||||
* @param frameIndex Current frame-in-flight index (0 or 1)
|
||||
* @param depthImage Source depth image (VK_FORMAT_D32_SFLOAT)
|
||||
*/
|
||||
void buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage);
|
||||
|
||||
/**
|
||||
* @return Descriptor set layout for the HiZ pyramid sampler (set 1 for m2_cull_hiz).
|
||||
*/
|
||||
VkDescriptorSetLayout getDescriptorSetLayout() const { return hizSetLayout_; }
|
||||
|
||||
/**
|
||||
* @return Descriptor set for the given frame (sampler2D of the HiZ pyramid).
|
||||
* Bind as set 1 in the M2 HiZ cull pipeline.
|
||||
*/
|
||||
VkDescriptorSet getDescriptorSet(uint32_t frameIndex) const { return hizDescSet_[frameIndex]; }
|
||||
|
||||
/**
|
||||
* @return true if HiZ system is initialized and ready.
|
||||
*/
|
||||
bool isReady() const { return ready_; }
|
||||
|
||||
/**
|
||||
* @return Number of mip levels in the pyramid.
|
||||
*/
|
||||
uint32_t getMipLevels() const { return mipLevels_; }
|
||||
|
||||
/**
|
||||
* @return Pyramid base resolution (mip 0).
|
||||
*/
|
||||
uint32_t getPyramidWidth() const { return pyramidWidth_; }
|
||||
uint32_t getPyramidHeight() const { return pyramidHeight_; }
|
||||
|
||||
private:
|
||||
bool createPyramidImage();
|
||||
void destroyPyramidImage();
|
||||
bool createComputePipeline();
|
||||
void destroyComputePipeline();
|
||||
bool createDescriptors();
|
||||
void destroyDescriptors();
|
||||
|
||||
VkContext* ctx_ = nullptr;
|
||||
bool ready_ = false;
|
||||
|
||||
// Pyramid dimensions (mip 0 = half of full-res depth)
|
||||
uint32_t fullWidth_ = 0;
|
||||
uint32_t fullHeight_ = 0;
|
||||
uint32_t pyramidWidth_ = 0;
|
||||
uint32_t pyramidHeight_ = 0;
|
||||
uint32_t mipLevels_ = 0;
|
||||
|
||||
static constexpr uint32_t MAX_FRAMES = 2;
|
||||
|
||||
// Per-frame HiZ pyramid images (R32_SFLOAT, full mip chain)
|
||||
VkImage pyramidImage_[MAX_FRAMES] = {};
|
||||
VmaAllocation pyramidAlloc_[MAX_FRAMES] = {};
|
||||
VkImageView pyramidViewAll_[MAX_FRAMES] = {}; // View of all mip levels (for sampling)
|
||||
std::vector<VkImageView> pyramidMipViews_[MAX_FRAMES]; // Per-mip views (for storage image writes)
|
||||
|
||||
// Depth input — image view for sampling the depth buffer as a texture
|
||||
VkImageView depthSamplerView_[MAX_FRAMES] = {};
|
||||
|
||||
// Sampler for depth reads (nearest, clamp-to-edge)
|
||||
VkSampler depthSampler_ = VK_NULL_HANDLE;
|
||||
|
||||
// Compute pipeline for building the pyramid
|
||||
VkPipeline buildPipeline_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout buildPipelineLayout_ = VK_NULL_HANDLE;
|
||||
|
||||
// Descriptor set layout for build pipeline (set 0: src sampler + dst storage image)
|
||||
VkDescriptorSetLayout buildSetLayout_ = VK_NULL_HANDLE;
|
||||
VkDescriptorPool buildDescPool_ = VK_NULL_HANDLE;
|
||||
// Per-frame, per-mip descriptor sets for pyramid build
|
||||
std::vector<VkDescriptorSet> buildDescSets_[MAX_FRAMES];
|
||||
|
||||
// HiZ sampling descriptor: exposed to M2 cull shader (set 1: combined image sampler)
|
||||
VkDescriptorSetLayout hizSetLayout_ = VK_NULL_HANDLE;
|
||||
VkDescriptorPool hizDescPool_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSet hizDescSet_[MAX_FRAMES] = {};
|
||||
|
||||
// Push constant for build shader
|
||||
struct HiZBuildPushConstants {
|
||||
int32_t dstWidth;
|
||||
int32_t dstHeight;
|
||||
int32_t mipLevel;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace rendering
|
||||
} // namespace wowee
|
||||
|
|
@ -28,6 +28,7 @@ namespace rendering {
|
|||
class Camera;
|
||||
class VkContext;
|
||||
class VkTexture;
|
||||
class HiZSystem;
|
||||
|
||||
/**
|
||||
* GPU representation of an M2 model
|
||||
|
|
@ -299,6 +300,13 @@ public:
|
|||
void dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, const Camera& camera);
|
||||
void render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera);
|
||||
|
||||
/** Set the HiZ system for occlusion culling (Phase 6.3). nullptr disables HiZ. */
|
||||
void setHiZSystem(HiZSystem* hiz) { hizSystem_ = hiz; }
|
||||
|
||||
/** Ensure GPU→CPU cull output is visible to the host after a fence wait.
|
||||
* Call after the early compute submission finishes (endSingleTimeCommands). */
|
||||
void invalidateCullOutput(uint32_t frameIndex);
|
||||
|
||||
/**
|
||||
* Initialize shadow pipeline (Phase 7)
|
||||
*/
|
||||
|
|
@ -437,7 +445,7 @@ private:
|
|||
|
||||
// Mega bone SSBO — consolidates all per-instance bone matrices into a single buffer per frame.
|
||||
// Replaces per-instance bone SSBOs for fewer descriptor binds and enables GPU instancing.
|
||||
static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 2048;
|
||||
static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 4096;
|
||||
static constexpr uint32_t MAX_BONES_PER_INSTANCE = 128;
|
||||
::VkBuffer megaBoneBuffer_[2] = {};
|
||||
VmaAllocation megaBoneAlloc_[2] = {};
|
||||
|
|
@ -472,19 +480,26 @@ private:
|
|||
uint32_t flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap
|
||||
float _pad[2] = {};
|
||||
};
|
||||
struct CullUniformsGPU { // matches CullUniforms in m2_cull.comp.glsl (128 bytes, std140)
|
||||
glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance
|
||||
glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
|
||||
uint32_t instanceCount;
|
||||
uint32_t _pad[3] = {};
|
||||
};
|
||||
struct CullUniformsGPU { // matches CullUniforms in m2_cull_hiz.comp.glsl (std140)
|
||||
glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance (96 bytes)
|
||||
glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq (16 bytes)
|
||||
uint32_t instanceCount; // (4 bytes)
|
||||
uint32_t hizEnabled; // 1 = HiZ occlusion active (4 bytes)
|
||||
uint32_t hizMipLevels; // mip levels in HiZ pyramid (4 bytes)
|
||||
uint32_t _pad2 = {}; // (4 bytes)
|
||||
glm::vec4 hizParams; // x=pyramidW, y=pyramidH, z=nearPlane, w=unused (16 bytes)
|
||||
glm::mat4 viewProj; // current frame view-projection (64 bytes)
|
||||
glm::mat4 prevViewProj; // previous frame VP for HiZ reprojection (64 bytes)
|
||||
}; // Total: 272 bytes
|
||||
static constexpr uint32_t MAX_CULL_INSTANCES = 24576;
|
||||
VkPipeline cullPipeline_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE;
|
||||
VkPipeline cullPipeline_ = VK_NULL_HANDLE; // frustum-only (fallback)
|
||||
VkPipeline cullHiZPipeline_ = VK_NULL_HANDLE; // frustum + HiZ occlusion
|
||||
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE; // frustum-only layout (set 0)
|
||||
VkPipelineLayout cullHiZPipelineLayout_ = VK_NULL_HANDLE; // HiZ layout (set 0 + set 1)
|
||||
VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE;
|
||||
VkDescriptorPool cullDescPool_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSet cullSet_[2] = {}; // double-buffered
|
||||
::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera (UBO)
|
||||
::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera + HiZ params (UBO)
|
||||
VmaAllocation cullUniformAlloc_[2] = {};
|
||||
void* cullUniformMapped_[2] = {};
|
||||
::VkBuffer cullInputBuffer_[2] = {}; // per-instance bounding sphere + flags (SSBO)
|
||||
|
|
@ -494,6 +509,20 @@ private:
|
|||
VmaAllocation cullOutputAlloc_[2] = {};
|
||||
void* cullOutputMapped_[2] = {};
|
||||
|
||||
// HiZ occlusion culling (Phase 6.3) — optional, driven by Renderer
|
||||
HiZSystem* hizSystem_ = nullptr;
|
||||
|
||||
// Previous frame's view-projection for temporal reprojection in HiZ culling.
|
||||
// Stored each frame so the cull shader can project into the same screen space
|
||||
// as the depth buffer the HiZ pyramid was built from.
|
||||
glm::mat4 prevVP_{1.0f};
|
||||
|
||||
// Per-instance visibility from the previous frame. Used to set the
|
||||
// `previouslyVisible` flag (bit 3) on each CullInstance so the shader
|
||||
// skips the HiZ test for objects that weren't rendered last frame
|
||||
// (their depth data is unreliable).
|
||||
std::vector<uint8_t> prevFrameVisible_;
|
||||
|
||||
// Dynamic ribbon vertex buffer (CPU-written triangle strip)
|
||||
static constexpr size_t MAX_RIBBON_VERTS = 2048; // 9 floats each
|
||||
::VkBuffer ribbonVB_ = VK_NULL_HANDLE;
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ class ChargeEffect;
|
|||
class SwimEffects;
|
||||
class RenderGraph;
|
||||
class OverlaySystem;
|
||||
class HiZSystem;
|
||||
|
||||
class Renderer {
|
||||
public:
|
||||
|
|
@ -363,6 +364,9 @@ private:
|
|||
std::unique_ptr<RenderGraph> renderGraph_;
|
||||
void buildFrameGraph(game::GameHandler* gameHandler);
|
||||
|
||||
// HiZ occlusion culling — builds depth pyramid each frame
|
||||
std::unique_ptr<HiZSystem> hizSystem_;
|
||||
|
||||
// CPU timing stats (last frame/update).
|
||||
double lastUpdateMs = 0.0;
|
||||
double lastRenderMs = 0.0;
|
||||
|
|
|
|||
517
src/rendering/hiz_system.cpp
Normal file
517
src/rendering/hiz_system.cpp
Normal file
|
|
@ -0,0 +1,517 @@
|
|||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/vk_context.hpp"
|
||||
#include "rendering/vk_shader.hpp"
|
||||
#include "core/logger.hpp"
|
||||
#include "core/profiler.hpp"
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
namespace wowee {
|
||||
namespace rendering {
|
||||
|
||||
HiZSystem::~HiZSystem() {
|
||||
shutdown();
|
||||
}
|
||||
|
||||
bool HiZSystem::initialize(VkContext* ctx, uint32_t width, uint32_t height) {
|
||||
if (!ctx || width == 0 || height == 0) return false;
|
||||
ctx_ = ctx;
|
||||
fullWidth_ = width;
|
||||
fullHeight_ = height;
|
||||
|
||||
// Pyramid mip 0 is half the full resolution (the first downscale)
|
||||
pyramidWidth_ = std::max(1u, width / 2);
|
||||
pyramidHeight_ = std::max(1u, height / 2);
|
||||
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
|
||||
|
||||
if (!createComputePipeline()) return false;
|
||||
if (!createPyramidImage()) { destroyComputePipeline(); return false; }
|
||||
if (!createDescriptors()) { destroyPyramidImage(); destroyComputePipeline(); return false; }
|
||||
|
||||
ready_ = true;
|
||||
LOG_INFO("HiZSystem: initialized ", pyramidWidth_, "x", pyramidHeight_,
|
||||
" pyramid (", mipLevels_, " mips) from ", width, "x", height, " depth");
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::shutdown() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
vkDeviceWaitIdle(device);
|
||||
|
||||
destroyDescriptors();
|
||||
destroyPyramidImage();
|
||||
destroyComputePipeline();
|
||||
|
||||
ctx_ = nullptr;
|
||||
ready_ = false;
|
||||
}
|
||||
|
||||
bool HiZSystem::resize(uint32_t width, uint32_t height) {
|
||||
if (!ctx_) return false;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
vkDeviceWaitIdle(device);
|
||||
|
||||
destroyDescriptors();
|
||||
destroyPyramidImage();
|
||||
|
||||
fullWidth_ = width;
|
||||
fullHeight_ = height;
|
||||
pyramidWidth_ = std::max(1u, width / 2);
|
||||
pyramidHeight_ = std::max(1u, height / 2);
|
||||
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
|
||||
|
||||
if (!createPyramidImage()) return false;
|
||||
if (!createDescriptors()) { destroyPyramidImage(); return false; }
|
||||
|
||||
ready_ = true;
|
||||
LOG_INFO("HiZSystem: resized to ", pyramidWidth_, "x", pyramidHeight_,
|
||||
" (", mipLevels_, " mips)");
|
||||
return true;
|
||||
}
|
||||
|
||||
// --- Pyramid image creation ---
|
||||
|
||||
bool HiZSystem::createPyramidImage() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
VmaAllocator alloc = ctx_->getAllocator();
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
// Create R32F image with full mip chain
|
||||
VkImageCreateInfo imgCi{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
|
||||
imgCi.imageType = VK_IMAGE_TYPE_2D;
|
||||
imgCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
imgCi.extent = {pyramidWidth_, pyramidHeight_, 1};
|
||||
imgCi.mipLevels = mipLevels_;
|
||||
imgCi.arrayLayers = 1;
|
||||
imgCi.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imgCi.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgCi.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
imgCi.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
VmaAllocationCreateInfo allocCi{};
|
||||
allocCi.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
||||
if (vmaCreateImage(alloc, &imgCi, &allocCi, &pyramidImage_[f], &pyramidAlloc_[f], nullptr) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create pyramid image for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
// View of ALL mip levels (for sampling in the cull shader)
|
||||
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
viewCi.image = pyramidImage_[f];
|
||||
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
viewCi.subresourceRange.baseMipLevel = 0;
|
||||
viewCi.subresourceRange.levelCount = mipLevels_;
|
||||
viewCi.subresourceRange.layerCount = 1;
|
||||
|
||||
if (vkCreateImageView(device, &viewCi, nullptr, &pyramidViewAll_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create pyramid view-all for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Per-mip views (for storage image writes in the build shader)
|
||||
pyramidMipViews_[f].resize(mipLevels_, VK_NULL_HANDLE);
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
VkImageViewCreateInfo mipViewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
mipViewCi.image = pyramidImage_[f];
|
||||
mipViewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
mipViewCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
mipViewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
mipViewCi.subresourceRange.baseMipLevel = mip;
|
||||
mipViewCi.subresourceRange.levelCount = 1;
|
||||
mipViewCi.subresourceRange.layerCount = 1;
|
||||
|
||||
if (vkCreateImageView(device, &mipViewCi, nullptr, &pyramidMipViews_[f][mip]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create mip ", mip, " view for frame ", f);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sampler for depth reads and HiZ pyramid reads (nearest, clamp)
|
||||
VkSamplerCreateInfo samplerCi{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
|
||||
samplerCi.magFilter = VK_FILTER_NEAREST;
|
||||
samplerCi.minFilter = VK_FILTER_NEAREST;
|
||||
samplerCi.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
samplerCi.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.maxLod = static_cast<float>(mipLevels_);
|
||||
|
||||
if (vkCreateSampler(device, &samplerCi, nullptr, &depthSampler_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create sampler");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyPyramidImage() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
VmaAllocator alloc = ctx_->getAllocator();
|
||||
|
||||
if (depthSampler_) { vkDestroySampler(device, depthSampler_, nullptr); depthSampler_ = VK_NULL_HANDLE; }
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
for (auto& view : pyramidMipViews_[f]) {
|
||||
if (view) { vkDestroyImageView(device, view, nullptr); view = VK_NULL_HANDLE; }
|
||||
}
|
||||
pyramidMipViews_[f].clear();
|
||||
|
||||
if (pyramidViewAll_[f]) { vkDestroyImageView(device, pyramidViewAll_[f], nullptr); pyramidViewAll_[f] = VK_NULL_HANDLE; }
|
||||
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
|
||||
if (pyramidImage_[f]) { vmaDestroyImage(alloc, pyramidImage_[f], pyramidAlloc_[f]); pyramidImage_[f] = VK_NULL_HANDLE; }
|
||||
}
|
||||
}
|
||||
|
||||
// --- Compute pipeline ---
|
||||
|
||||
bool HiZSystem::createComputePipeline() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// Build descriptor set layout for pyramid build (set 0):
|
||||
// binding 0: combined image sampler (source depth / previous mip)
|
||||
// binding 1: storage image (destination mip)
|
||||
VkDescriptorSetLayoutBinding bindings[2] = {};
|
||||
bindings[0].binding = 0;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
bindings[1].binding = 1;
|
||||
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
bindings[1].descriptorCount = 1;
|
||||
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
layoutCi.bindingCount = 2;
|
||||
layoutCi.pBindings = bindings;
|
||||
if (vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &buildSetLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build set layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// HiZ sampling layout (for M2 cull shader, set 1):
|
||||
// binding 0: combined image sampler (HiZ pyramid, all mips)
|
||||
VkDescriptorSetLayoutBinding hizBinding{};
|
||||
hizBinding.binding = 0;
|
||||
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
hizBinding.descriptorCount = 1;
|
||||
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
hizLayoutCi.bindingCount = 1;
|
||||
hizLayoutCi.pBindings = &hizBinding;
|
||||
if (vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSetLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create HiZ set layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Push constant range for build shader
|
||||
VkPushConstantRange pushRange{};
|
||||
pushRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
pushRange.offset = 0;
|
||||
pushRange.size = sizeof(HiZBuildPushConstants);
|
||||
|
||||
VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||
plCi.setLayoutCount = 1;
|
||||
plCi.pSetLayouts = &buildSetLayout_;
|
||||
plCi.pushConstantRangeCount = 1;
|
||||
plCi.pPushConstantRanges = &pushRange;
|
||||
if (vkCreatePipelineLayout(device, &plCi, nullptr, &buildPipelineLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build pipeline layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load and create compute pipeline
|
||||
VkShaderModule buildShader;
|
||||
if (!buildShader.loadFromFile(device, "assets/shaders/hiz_build.comp.spv")) {
|
||||
LOG_ERROR("HiZSystem: failed to load hiz_build.comp.spv");
|
||||
return false;
|
||||
}
|
||||
|
||||
VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
||||
cpCi.stage = buildShader.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
cpCi.layout = buildPipelineLayout_;
|
||||
if (vkCreateComputePipelines(device, ctx_->getPipelineCache(), 1, &cpCi, nullptr, &buildPipeline_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build compute pipeline");
|
||||
buildShader.destroy();
|
||||
return false;
|
||||
}
|
||||
buildShader.destroy();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyComputePipeline() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
if (buildPipeline_) { vkDestroyPipeline(device, buildPipeline_, nullptr); buildPipeline_ = VK_NULL_HANDLE; }
|
||||
if (buildPipelineLayout_) { vkDestroyPipelineLayout(device, buildPipelineLayout_, nullptr); buildPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
if (buildSetLayout_) { vkDestroyDescriptorSetLayout(device, buildSetLayout_, nullptr); buildSetLayout_ = VK_NULL_HANDLE; }
|
||||
if (hizSetLayout_) { vkDestroyDescriptorSetLayout(device, hizSetLayout_, nullptr); hizSetLayout_ = VK_NULL_HANDLE; }
|
||||
}
|
||||
|
||||
// --- Descriptors ---
|
||||
|
||||
bool HiZSystem::createDescriptors() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// Pool: per-frame × per-mip build sets + 2 HiZ sampling sets
|
||||
// Each build set needs 1 sampler + 1 storage image
|
||||
// Each HiZ sampling set needs 1 sampler
|
||||
const uint32_t totalBuildSets = MAX_FRAMES * mipLevels_;
|
||||
const uint32_t totalHizSets = MAX_FRAMES;
|
||||
const uint32_t totalSets = totalBuildSets + totalHizSets;
|
||||
|
||||
VkDescriptorPoolSize poolSizes[2] = {};
|
||||
poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, totalBuildSets + totalHizSets};
|
||||
poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, totalBuildSets};
|
||||
|
||||
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
|
||||
poolCi.maxSets = totalSets;
|
||||
poolCi.poolSizeCount = 2;
|
||||
poolCi.pPoolSizes = poolSizes;
|
||||
if (vkCreateDescriptorPool(device, &poolCi, nullptr, &buildDescPool_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create descriptor pool");
|
||||
return false;
|
||||
}
|
||||
|
||||
// We use the same pool for both build and HiZ sets — simpler cleanup
|
||||
hizDescPool_ = VK_NULL_HANDLE; // sharing buildDescPool_
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
// Create a temporary depth image view for sampling the depth buffer.
|
||||
// This is SEPARATE from the VkContext's depth image view because we need
|
||||
// DEPTH aspect sampling which requires specific format view.
|
||||
{
|
||||
VkImage depthSrc = ctx_->getDepthCopySourceImage();
|
||||
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
viewCi.image = depthSrc;
|
||||
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewCi.format = ctx_->getDepthFormat();
|
||||
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
viewCi.subresourceRange.levelCount = 1;
|
||||
viewCi.subresourceRange.layerCount = 1;
|
||||
if (vkCreateImageView(device, &viewCi, nullptr, &depthSamplerView_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create depth sampler view for frame ", f);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate per-mip build descriptor sets
|
||||
buildDescSets_[f].resize(mipLevels_);
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
||||
allocInfo.descriptorPool = buildDescPool_;
|
||||
allocInfo.descriptorSetCount = 1;
|
||||
allocInfo.pSetLayouts = &buildSetLayout_;
|
||||
if (vkAllocateDescriptorSets(device, &allocInfo, &buildDescSets_[f][mip]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to allocate build desc set frame=", f, " mip=", mip);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Write descriptors:
|
||||
// Binding 0 (sampler): mip 0 reads depth buffer, mip N reads pyramid mip N-1
|
||||
VkDescriptorImageInfo srcInfo{};
|
||||
srcInfo.sampler = depthSampler_;
|
||||
if (mip == 0) {
|
||||
srcInfo.imageView = depthSamplerView_[f];
|
||||
srcInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
} else {
|
||||
srcInfo.imageView = pyramidViewAll_[f]; // shader uses texelFetch with explicit mip
|
||||
srcInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
|
||||
// Binding 1 (storage image): write to current mip
|
||||
VkDescriptorImageInfo dstInfo{};
|
||||
dstInfo.imageView = pyramidMipViews_[f][mip];
|
||||
dstInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkWriteDescriptorSet writes[2] = {};
|
||||
writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
writes[0].dstSet = buildDescSets_[f][mip];
|
||||
writes[0].dstBinding = 0;
|
||||
writes[0].descriptorCount = 1;
|
||||
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
writes[0].pImageInfo = &srcInfo;
|
||||
|
||||
writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
writes[1].dstSet = buildDescSets_[f][mip];
|
||||
writes[1].dstBinding = 1;
|
||||
writes[1].descriptorCount = 1;
|
||||
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
writes[1].pImageInfo = &dstInfo;
|
||||
|
||||
vkUpdateDescriptorSets(device, 2, writes, 0, nullptr);
|
||||
}
|
||||
|
||||
// Allocate HiZ sampling descriptor set (for M2 cull shader)
|
||||
{
|
||||
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
||||
allocInfo.descriptorPool = buildDescPool_;
|
||||
allocInfo.descriptorSetCount = 1;
|
||||
allocInfo.pSetLayouts = &hizSetLayout_;
|
||||
if (vkAllocateDescriptorSets(device, &allocInfo, &hizDescSet_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to allocate HiZ sampling desc set for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
VkDescriptorImageInfo hizInfo{};
|
||||
hizInfo.sampler = depthSampler_;
|
||||
hizInfo.imageView = pyramidViewAll_[f];
|
||||
hizInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
write.dstSet = hizDescSet_[f];
|
||||
write.dstBinding = 0;
|
||||
write.descriptorCount = 1;
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
write.pImageInfo = &hizInfo;
|
||||
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyDescriptors() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// All descriptor sets are freed when pool is destroyed
|
||||
if (buildDescPool_) { vkDestroyDescriptorPool(device, buildDescPool_, nullptr); buildDescPool_ = VK_NULL_HANDLE; }
|
||||
// hizDescPool_ shares buildDescPool_, so nothing extra to destroy
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
buildDescSets_[f].clear();
|
||||
hizDescSet_[f] = VK_NULL_HANDLE;
|
||||
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
|
||||
}
|
||||
}
|
||||
|
||||
// --- Pyramid build dispatch ---
|
||||
|
||||
void HiZSystem::buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage) {
|
||||
ZoneScopedN("HiZSystem::buildPyramid");
|
||||
if (!ready_ || !buildPipeline_) return;
|
||||
|
||||
// Transition depth image from DEPTH_STENCIL_ATTACHMENT to SHADER_READ_ONLY for sampling
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = depthImage;
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
// Transition entire pyramid to GENERAL layout for storage writes
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = 0;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = pyramidImage_[frameIndex];
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
barrier.subresourceRange.baseMipLevel = 0;
|
||||
barrier.subresourceRange.levelCount = mipLevels_;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, buildPipeline_);
|
||||
|
||||
// Build each mip level sequentially
|
||||
uint32_t mipW = pyramidWidth_;
|
||||
uint32_t mipH = pyramidHeight_;
|
||||
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
// Bind descriptor set for this mip level
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
buildPipelineLayout_, 0, 1, &buildDescSets_[frameIndex][mip], 0, nullptr);
|
||||
|
||||
// Push constants: destination size + mip level
|
||||
HiZBuildPushConstants pc{};
|
||||
pc.dstWidth = static_cast<int32_t>(mipW);
|
||||
pc.dstHeight = static_cast<int32_t>(mipH);
|
||||
pc.mipLevel = static_cast<int32_t>(mip);
|
||||
vkCmdPushConstants(cmd, buildPipelineLayout_, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pc), &pc);
|
||||
|
||||
// Dispatch compute
|
||||
uint32_t groupsX = (mipW + 7) / 8;
|
||||
uint32_t groupsY = (mipH + 7) / 8;
|
||||
vkCmdDispatch(cmd, groupsX, groupsY, 1);
|
||||
|
||||
// Barrier between mip levels: ensure writes to mip N are visible before reads for mip N+1
|
||||
if (mip + 1 < mipLevels_) {
|
||||
VkImageMemoryBarrier mipBarrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
mipBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
mipBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
mipBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
mipBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
mipBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
mipBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
mipBarrier.image = pyramidImage_[frameIndex];
|
||||
mipBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
mipBarrier.subresourceRange.baseMipLevel = mip;
|
||||
mipBarrier.subresourceRange.levelCount = 1;
|
||||
mipBarrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &mipBarrier);
|
||||
}
|
||||
|
||||
// Next mip level dimensions
|
||||
mipW = std::max(1u, mipW / 2);
|
||||
mipH = std::max(1u, mipH / 2);
|
||||
}
|
||||
|
||||
// Transition depth back to DEPTH_STENCIL_ATTACHMENT for next frame
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = depthImage;
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rendering
|
||||
} // namespace wowee
|
||||
|
|
@ -295,7 +295,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
// Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build.
|
||||
{
|
||||
static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 272, "CullUniformsGPU must be 272 bytes (std140)");
|
||||
|
||||
// Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output)
|
||||
VkDescriptorSetLayoutBinding bindings[3] = {};
|
||||
|
|
@ -338,6 +338,54 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
cullComp.destroy();
|
||||
}
|
||||
|
||||
// HiZ-aware cull pipeline (Phase 6.3 Option B)
|
||||
// Uses set 0 (same as frustum-only) + set 1 (HiZ pyramid sampler from HiZSystem).
|
||||
// The HiZ descriptor set layout is created lazily when hizSystem_ is set, but the
|
||||
// pipeline layout and shader are created now if the shader is available.
|
||||
rendering::VkShaderModule cullHiZComp;
|
||||
if (cullHiZComp.loadFromFile(device, "assets/shaders/m2_cull_hiz.comp.spv")) {
|
||||
// HiZ cull set 1 layout: single combined image sampler (the HiZ pyramid)
|
||||
VkDescriptorSetLayoutBinding hizBinding{};
|
||||
hizBinding.binding = 0;
|
||||
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
hizBinding.descriptorCount = 1;
|
||||
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayout hizSamplerLayout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
hizLayoutCi.bindingCount = 1;
|
||||
hizLayoutCi.pBindings = &hizBinding;
|
||||
vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSamplerLayout);
|
||||
|
||||
VkDescriptorSetLayout hizSetLayouts[2] = {cullSetLayout_, hizSamplerLayout};
|
||||
VkPipelineLayoutCreateInfo hizPlCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||
hizPlCi.setLayoutCount = 2;
|
||||
hizPlCi.pSetLayouts = hizSetLayouts;
|
||||
vkCreatePipelineLayout(device, &hizPlCi, nullptr, &cullHiZPipelineLayout_);
|
||||
|
||||
VkComputePipelineCreateInfo hizCpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
||||
hizCpCi.stage = cullHiZComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
hizCpCi.layout = cullHiZPipelineLayout_;
|
||||
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &hizCpCi, nullptr, &cullHiZPipeline_) != VK_SUCCESS) {
|
||||
LOG_WARNING("M2Renderer: failed to create HiZ cull compute pipeline — HiZ disabled");
|
||||
cullHiZPipeline_ = VK_NULL_HANDLE;
|
||||
vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr);
|
||||
cullHiZPipelineLayout_ = VK_NULL_HANDLE;
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: HiZ occlusion cull pipeline created");
|
||||
}
|
||||
|
||||
// The hizSamplerLayout is now owned by the pipeline layout; we don't track it
|
||||
// separately because the pipeline layout keeps a ref. But actually Vulkan
|
||||
// requires us to keep it alive. Store it where HiZSystem will provide it.
|
||||
// For now, we can destroy it since the pipeline layout was already created.
|
||||
vkDestroyDescriptorSetLayout(device, hizSamplerLayout, nullptr);
|
||||
|
||||
cullHiZComp.destroy();
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: m2_cull_hiz.comp.spv not found — HiZ occlusion culling not available");
|
||||
}
|
||||
|
||||
// Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO)
|
||||
VkDescriptorPoolSize poolSizes[2] = {};
|
||||
poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2};
|
||||
|
|
@ -756,6 +804,14 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
return true;
|
||||
}
|
||||
|
||||
void M2Renderer::invalidateCullOutput(uint32_t frameIndex) {
|
||||
// On non-HOST_COHERENT memory, VMA-mapped GPU→CPU buffers need explicit
|
||||
// invalidation so the CPU cache sees the latest GPU writes.
|
||||
if (frameIndex < 2 && cullOutputAlloc_[frameIndex]) {
|
||||
vmaInvalidateAllocation(vkCtx_->getAllocator(), cullOutputAlloc_[frameIndex], 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
void M2Renderer::shutdown() {
|
||||
LOG_INFO("Shutting down M2 renderer...");
|
||||
if (!vkCtx_) return;
|
||||
|
|
@ -837,6 +893,8 @@ void M2Renderer::shutdown() {
|
|||
if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; }
|
||||
|
||||
// GPU frustum culling compute pipeline + buffers cleanup
|
||||
if (cullHiZPipeline_) { vkDestroyPipeline(device, cullHiZPipeline_, nullptr); cullHiZPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullHiZPipelineLayout_) { vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr); cullHiZPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
for (int i = 0; i < 2; i++) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "rendering/m2_renderer.hpp"
|
||||
#include "rendering/m2_renderer_internal.h"
|
||||
#include "rendering/m2_model_classifier.hpp"
|
||||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/vk_context.hpp"
|
||||
#include "rendering/vk_buffer.hpp"
|
||||
#include "rendering/vk_texture.hpp"
|
||||
|
|
@ -600,6 +601,49 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
}
|
||||
ubo->cameraPos = glm::vec4(camPos, maxPossibleDistSq);
|
||||
ubo->instanceCount = numInstances;
|
||||
|
||||
// HiZ occlusion culling fields
|
||||
const bool hizReady = hizSystem_ && hizSystem_->isReady();
|
||||
|
||||
// Auto-disable HiZ when the camera has moved/rotated significantly.
|
||||
// Large VP changes make the depth pyramid unreliable because the
|
||||
// reprojected screen positions diverge from the actual pyramid data.
|
||||
bool hizSafe = hizReady;
|
||||
if (hizReady) {
|
||||
// Compare current VP against previous VP — Frobenius-style max diff.
|
||||
float maxDiff = 0.0f;
|
||||
const float* curM = &vp[0][0];
|
||||
const float* prevM = &prevVP_[0][0];
|
||||
for (int k = 0; k < 16; ++k)
|
||||
maxDiff = std::max(maxDiff, std::abs(curM[k] - prevM[k]));
|
||||
// Threshold: typical small camera motion produces diffs < 0.05.
|
||||
// A fast rotation easily exceeds 0.3. Skip HiZ when diff is large.
|
||||
if (maxDiff > 0.15f) hizSafe = false;
|
||||
}
|
||||
|
||||
ubo->hizEnabled = hizSafe ? 1u : 0u;
|
||||
ubo->hizMipLevels = hizReady ? hizSystem_->getMipLevels() : 0u;
|
||||
ubo->_pad2 = 0;
|
||||
if (hizReady) {
|
||||
ubo->hizParams = glm::vec4(
|
||||
static_cast<float>(hizSystem_->getPyramidWidth()),
|
||||
static_cast<float>(hizSystem_->getPyramidHeight()),
|
||||
camera.getNearPlane(),
|
||||
0.0f
|
||||
);
|
||||
ubo->viewProj = vp;
|
||||
// Use previous frame's VP for HiZ reprojection — the HiZ pyramid
|
||||
// was built from the previous frame's depth, so we must project
|
||||
// into the same screen space to sample the correct depths.
|
||||
ubo->prevViewProj = prevVP_;
|
||||
} else {
|
||||
ubo->hizParams = glm::vec4(0.0f);
|
||||
ubo->viewProj = glm::mat4(1.0f);
|
||||
ubo->prevViewProj = glm::mat4(1.0f);
|
||||
}
|
||||
|
||||
// Save current VP for next frame's temporal reprojection
|
||||
prevVP_ = vp;
|
||||
}
|
||||
|
||||
// --- Upload per-instance cull data (SSBO, binding 1) ---
|
||||
|
|
@ -622,6 +666,10 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
if (inst.cachedIsValid) flags |= 1u;
|
||||
if (inst.cachedIsSmoke) flags |= 2u;
|
||||
if (inst.cachedIsInvisibleTrap) flags |= 4u;
|
||||
// Bit 3: previouslyVisible — the shader skips HiZ for objects
|
||||
// that were NOT rendered last frame (no reliable depth data).
|
||||
if (i < prevFrameVisible_.size() && prevFrameVisible_[i])
|
||||
flags |= 8u;
|
||||
|
||||
input[i].sphere = glm::vec4(inst.position, paddedRadius);
|
||||
input[i].effectiveMaxDistSq = effectiveMaxDistSq;
|
||||
|
|
@ -630,9 +678,22 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
}
|
||||
|
||||
// --- Dispatch compute shader ---
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
const bool useHiZ = (cullHiZPipeline_ != VK_NULL_HANDLE)
|
||||
&& hizSystem_ && hizSystem_->isReady();
|
||||
if (useHiZ) {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullHiZPipeline_);
|
||||
// Set 0: cull UBO + input/output SSBOs
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullHiZPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
// Set 1: HiZ pyramid sampler
|
||||
VkDescriptorSet hizSet = hizSystem_->getDescriptorSet(frameIndex);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullHiZPipelineLayout_, 1, 1, &hizSet, 0, nullptr);
|
||||
} else {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
}
|
||||
|
||||
const uint32_t groupCount = (numInstances + 63) / 64;
|
||||
vkCmdDispatch(cmd, groupCount, 1, 1);
|
||||
|
|
@ -693,6 +754,19 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
const uint32_t* visibility = static_cast<const uint32_t*>(cullOutputMapped_[frameIndex]);
|
||||
const bool gpuCullAvailable = (cullPipeline_ != VK_NULL_HANDLE && visibility != nullptr);
|
||||
|
||||
// Snapshot the GPU visibility results into prevFrameVisible_ so the NEXT
|
||||
// frame's compute dispatch can set the per-instance `previouslyVisible`
|
||||
// flag (bit 3). Objects not visible this frame will skip HiZ next frame,
|
||||
// avoiding false culls from stale depth data.
|
||||
if (gpuCullAvailable) {
|
||||
prevFrameVisible_.resize(numInstances);
|
||||
for (uint32_t i = 0; i < numInstances; ++i)
|
||||
prevFrameVisible_[i] = visibility[i] ? 1u : 0u;
|
||||
} else {
|
||||
// No GPU cull data — conservatively mark all as visible
|
||||
prevFrameVisible_.assign(static_cast<size_t>(instances.size()), 1u);
|
||||
}
|
||||
|
||||
// If GPU culling was not dispatched, fallback: compute distances on CPU
|
||||
float maxRenderDistanceSq;
|
||||
if (!gpuCullAvailable) {
|
||||
|
|
@ -1074,7 +1148,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
// Update material UBO
|
||||
if (batch.materialUBOMapped) {
|
||||
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
|
||||
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
|
||||
// interiorDarken is a camera-based flag — it darkens ALL M2s (incl.
|
||||
// outdoor trees) when the camera is inside a WMO. Disable it; indoor
|
||||
// M2s already look correct from the darker ambient/lighting.
|
||||
mat->interiorDarken = 0.0f;
|
||||
if (batch.colorKeyBlack)
|
||||
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
|
||||
if (forceCutout) {
|
||||
|
|
@ -1265,7 +1342,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
|
||||
if (batch.materialUBOMapped) {
|
||||
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
|
||||
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
|
||||
mat->interiorDarken = 0.0f;
|
||||
if (batch.colorKeyBlack)
|
||||
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include "rendering/character_preview.hpp"
|
||||
#include "rendering/wmo_renderer.hpp"
|
||||
#include "rendering/m2_renderer.hpp"
|
||||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/minimap.hpp"
|
||||
#include "rendering/world_map.hpp"
|
||||
#include "rendering/quest_marker_renderer.hpp"
|
||||
|
|
@ -580,7 +581,6 @@ bool Renderer::initialize(core::Window* win) {
|
|||
overlaySystem_ = std::make_unique<OverlaySystem>(vkCtx);
|
||||
renderGraph_->registerResource("shadow_depth");
|
||||
renderGraph_->registerResource("reflection_texture");
|
||||
renderGraph_->registerResource("cull_visibility");
|
||||
renderGraph_->registerResource("scene_color");
|
||||
renderGraph_->registerResource("scene_depth");
|
||||
renderGraph_->registerResource("final_image");
|
||||
|
|
@ -672,6 +672,10 @@ void Renderer::shutdown() {
|
|||
}
|
||||
|
||||
LOG_DEBUG("Renderer::shutdown - m2Renderer...");
|
||||
if (hizSystem_) {
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
if (m2Renderer) {
|
||||
m2Renderer->shutdown();
|
||||
m2Renderer.reset();
|
||||
|
|
@ -798,6 +802,17 @@ void Renderer::applyMsaaChange() {
|
|||
|
||||
if (minimap) minimap->recreatePipelines();
|
||||
|
||||
// Resize HiZ pyramid (depth format/MSAA may have changed)
|
||||
if (hizSystem_) {
|
||||
auto ext = vkCtx->getSwapchainExtent();
|
||||
if (!hizSystem_->resize(ext.width, ext.height)) {
|
||||
LOG_WARNING("HiZ resize failed after MSAA change");
|
||||
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// Selection circle + overlay + FSR use lazy init, just destroy them
|
||||
if (overlaySystem_) overlaySystem_->recreatePipelines();
|
||||
if (postProcessPipeline_) postProcessPipeline_->destroyAllResources(); // Will be lazily recreated in beginFrame()
|
||||
|
|
@ -846,6 +861,16 @@ void Renderer::beginFrame() {
|
|||
}
|
||||
// Recreate post-process resources for new swapchain dimensions
|
||||
if (postProcessPipeline_) postProcessPipeline_->handleSwapchainResize();
|
||||
// Resize HiZ depth pyramid for new swapchain dimensions
|
||||
if (hizSystem_) {
|
||||
auto ext = vkCtx->getSwapchainExtent();
|
||||
if (!hizSystem_->resize(ext.width, ext.height)) {
|
||||
LOG_WARNING("HiZ resize failed — disabling occlusion culling");
|
||||
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Acquire swapchain image and begin command buffer
|
||||
|
|
@ -864,6 +889,31 @@ void Renderer::beginFrame() {
|
|||
// Update per-frame UBO with current camera/lighting state
|
||||
updatePerFrameUBO();
|
||||
|
||||
// ── Early compute: HiZ pyramid build + M2 frustum/occlusion cull ──
|
||||
// These run in a SEPARATE command buffer submission so the GPU executes
|
||||
// them immediately. The CPU then reads the fresh visibility results
|
||||
// before recording the main render pass — eliminating the 2-frame
|
||||
// staleness that occurs when compute + render share one submission.
|
||||
if (m2Renderer && camera && vkCtx) {
|
||||
VkCommandBuffer computeCmd = vkCtx->beginSingleTimeCommands();
|
||||
uint32_t frame = vkCtx->getCurrentFrame();
|
||||
|
||||
// Build HiZ depth pyramid from previous frame's depth buffer
|
||||
if (hizSystem_ && hizSystem_->isReady()) {
|
||||
VkImage depthSrc = vkCtx->getDepthCopySourceImage();
|
||||
hizSystem_->buildPyramid(computeCmd, frame, depthSrc);
|
||||
}
|
||||
|
||||
// Dispatch GPU frustum + HiZ occlusion culling
|
||||
m2Renderer->dispatchCullCompute(computeCmd, frame, *camera);
|
||||
|
||||
vkCtx->endSingleTimeCommands(computeCmd);
|
||||
|
||||
// Ensure GPU→CPU buffer writes are visible to host (non-coherent memory).
|
||||
m2Renderer->invalidateCullOutput(frame);
|
||||
// Visibility results are now in cullOutputMapped_[frame], readable by CPU.
|
||||
}
|
||||
|
||||
// --- Off-screen pre-passes ---
|
||||
// Build frame graph: registers pre-passes as graph nodes with dependencies.
|
||||
// compile() topologically sorts; execute() runs them with auto barriers.
|
||||
|
|
@ -1489,7 +1539,9 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
|
|||
if (parallelRecordingEnabled_) {
|
||||
// --- Pre-compute state + GPU allocations on main thread (not thread-safe) ---
|
||||
if (m2Renderer && cameraController) {
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
|
||||
// Use isInsideInteriorWMO (flag 0x2000) — not isInsideWMO which includes
|
||||
// outdoor WMO groups like archways/bridges that should receive shadows.
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
|
||||
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
|
||||
}
|
||||
if (wmoRenderer) wmoRenderer->prepareRender();
|
||||
|
|
@ -1734,7 +1786,8 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
|
|||
|
||||
if (m2Renderer && camera && !skipM2) {
|
||||
if (cameraController) {
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
|
||||
// Use isInsideInteriorWMO (flag 0x2000) for correct indoor detection
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
|
||||
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
|
||||
}
|
||||
m2Renderer->prepareRender(frameIdx, *camera);
|
||||
|
|
@ -1887,6 +1940,23 @@ bool Renderer::initializeRenderers(pipeline::AssetManager* assetManager, const s
|
|||
spellVisualSystem_->initialize(m2Renderer.get());
|
||||
}
|
||||
}
|
||||
|
||||
// HiZ occlusion culling — temporal reprojection.
|
||||
// The HiZ pyramid is built from the previous frame's depth buffer. The cull
|
||||
// compute shader uses prevViewProj to project objects into the previous frame's
|
||||
// screen space so that depth samples match the pyramid, eliminating flicker
|
||||
// caused by camera movement between frames.
|
||||
if (!hizSystem_ && m2Renderer && vkCtx) {
|
||||
hizSystem_ = std::make_unique<HiZSystem>();
|
||||
auto extent = vkCtx->getSwapchainExtent();
|
||||
if (hizSystem_->initialize(vkCtx, extent.width, extent.height)) {
|
||||
m2Renderer->setHiZSystem(hizSystem_.get());
|
||||
LOG_INFO("HiZ occlusion culling initialized (", extent.width, "x", extent.height, ")");
|
||||
} else {
|
||||
LOG_WARNING("HiZ occlusion culling unavailable — falling back to frustum-only culling");
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
if (!wmoRenderer) {
|
||||
wmoRenderer = std::make_unique<WMORenderer>();
|
||||
wmoRenderer->initialize(vkCtx, perFrameSetLayout, assetManager);
|
||||
|
|
@ -2627,7 +2697,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
|
|||
|
||||
auto shadowDepth = renderGraph_->findResource("shadow_depth");
|
||||
auto reflTex = renderGraph_->findResource("reflection_texture");
|
||||
auto cullVis = renderGraph_->findResource("cull_visibility");
|
||||
|
||||
// Minimap composites (no dependencies — standalone off-screen render target)
|
||||
renderGraph_->addPass("minimap_composite", {}, {},
|
||||
|
|
@ -2670,13 +2739,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
|
|||
renderReflectionPass();
|
||||
});
|
||||
|
||||
// GPU frustum cull compute → outputs cull_visibility
|
||||
renderGraph_->addPass("compute_cull", {}, {cullVis},
|
||||
[this](VkCommandBuffer cmd) {
|
||||
if (m2Renderer && camera)
|
||||
m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera);
|
||||
});
|
||||
|
||||
renderGraph_->compile();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -798,7 +798,8 @@ bool VkContext::createDepthBuffer() {
|
|||
imgInfo.arrayLayers = 1;
|
||||
imgInfo.samples = msaaSamples_;
|
||||
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
|
||||
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
|
||||
|
||||
VmaAllocationCreateInfo allocInfo{};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
|
@ -911,7 +912,8 @@ bool VkContext::createDepthResolveImage() {
|
|||
imgInfo.arrayLayers = 1;
|
||||
imgInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
|
||||
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
|
||||
|
||||
VmaAllocationCreateInfo allocInfo{};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
|
|
|||
|
|
@ -197,6 +197,19 @@ target_link_libraries(test_anim_capability PRIVATE catch2_main)
|
|||
add_test(NAME anim_capability COMMAND test_anim_capability)
|
||||
register_test_target(test_anim_capability)
|
||||
|
||||
# ── test_indoor_shadows ──────────────────────────────────────
|
||||
add_executable(test_indoor_shadows
|
||||
test_indoor_shadows.cpp
|
||||
)
|
||||
target_include_directories(test_indoor_shadows PRIVATE ${TEST_INCLUDE_DIRS})
|
||||
target_include_directories(test_indoor_shadows SYSTEM PRIVATE ${TEST_SYSTEM_INCLUDE_DIRS})
|
||||
target_link_libraries(test_indoor_shadows PRIVATE catch2_main)
|
||||
if(TARGET glm::glm)
|
||||
target_link_libraries(test_indoor_shadows PRIVATE glm::glm)
|
||||
endif()
|
||||
add_test(NAME indoor_shadows COMMAND test_indoor_shadows)
|
||||
register_test_target(test_indoor_shadows)
|
||||
|
||||
# ── ASAN / UBSan for test targets ────────────────────────────
|
||||
if(WOWEE_ENABLE_ASAN AND NOT MSVC)
|
||||
foreach(_t IN LISTS ALL_TEST_TARGETS)
|
||||
|
|
|
|||
122
tests/test_indoor_shadows.cpp
Normal file
122
tests/test_indoor_shadows.cpp
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
// Tests for indoor shadow disable logic (WMO interior groups)
|
||||
//
|
||||
// WMO interior groups (flag 0x2000) should NOT receive directional sun shadows
|
||||
// because they rely on pre-baked vertex color lighting (MOCV) and the shadow map
|
||||
// only makes them darker. The fix is in the fragment shader: interior groups
|
||||
// skip the shadow map sample entirely.
|
||||
//
|
||||
// These tests verify the data contract between the renderer and the shader:
|
||||
// - GPUPerFrameData.shadowParams.x controls global shadow enable
|
||||
// - WMOMaterial.isInterior controls per-group interior flag
|
||||
// - Interior groups ignore shadows regardless of global shadow state
|
||||
|
||||
#include <catch_amalgamated.hpp>
|
||||
#include "rendering/vk_frame_data.hpp"
|
||||
|
||||
#include <glm/glm.hpp>
|
||||
|
||||
using wowee::rendering::GPUPerFrameData;
|
||||
|
||||
// Replicates the shadow params logic from Renderer::updatePerFrameUBO()
|
||||
// This should NOT be affected by indoor state — shadows remain globally enabled
|
||||
static void applyShadowParams(GPUPerFrameData& fd,
|
||||
bool shadowsEnabled,
|
||||
float shadowDistance = 300.0f) {
|
||||
float shadowBias = glm::clamp(0.8f * (shadowDistance / 300.0f), 0.0f, 1.0f);
|
||||
fd.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, shadowBias, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
// Replicates the WMO interior shader logic:
|
||||
// interior groups skip shadow sampling entirely (shadow factor = 1.0 = fully lit).
|
||||
// This covers both lit and unlit interior materials — isInterior takes priority.
|
||||
static float computeWmoShadowFactor(bool isInterior, float globalShadowEnabled, float rawShadow) {
|
||||
if (isInterior) {
|
||||
// Interior groups always get shadow factor 1.0 (no shadow darkening)
|
||||
// regardless of unlit flag — isInterior is checked first in shader
|
||||
return 1.0f;
|
||||
}
|
||||
if (globalShadowEnabled > 0.5f) {
|
||||
return rawShadow; // exterior: use shadow map result
|
||||
}
|
||||
return 1.0f; // shadows globally disabled
|
||||
}
|
||||
|
||||
TEST_CASE("Global shadow params are not affected by indoor state", "[indoor_shadows]") {
|
||||
GPUPerFrameData fd{};
|
||||
|
||||
// Shadows enabled — should stay 1.0 regardless of any indoor logic
|
||||
applyShadowParams(fd, /*shadowsEnabled=*/true);
|
||||
REQUIRE(fd.shadowParams.x == Catch::Approx(1.0f));
|
||||
|
||||
// Shadows disabled — should be 0.0
|
||||
applyShadowParams(fd, /*shadowsEnabled=*/false);
|
||||
REQUIRE(fd.shadowParams.x == Catch::Approx(0.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Interior WMO groups skip shadow sampling", "[indoor_shadows]") {
|
||||
// Even when shadows are globally on and the shadow map says 0.2 (dark shadow),
|
||||
// interior groups should get 1.0 (no shadow)
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.2f);
|
||||
REQUIRE(factor == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Exterior WMO groups receive shadows normally", "[indoor_shadows]") {
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.3f);
|
||||
REQUIRE(factor == Catch::Approx(0.3f));
|
||||
}
|
||||
|
||||
TEST_CASE("Exterior WMO groups skip shadows when globally disabled", "[indoor_shadows]") {
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/0.0f, /*rawShadow=*/0.3f);
|
||||
REQUIRE(factor == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Interior WMO groups skip shadows even when globally disabled", "[indoor_shadows]") {
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/0.0f, /*rawShadow=*/0.5f);
|
||||
REQUIRE(factor == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Unlit interior surfaces skip shadows (isInterior takes priority over unlit)", "[indoor_shadows]") {
|
||||
// Many interior walls use F_UNLIT material flag (0x01). The shader must check
|
||||
// isInterior BEFORE unlit so these surfaces don't receive shadow darkening.
|
||||
// Even though the surface is unlit, it's interior → shadow factor = 1.0
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.1f);
|
||||
REQUIRE(factor == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Outdoor unlit surfaces still receive shadows", "[indoor_shadows]") {
|
||||
// Exterior unlit surfaces (isInterior=false, unlit=true in shader) should
|
||||
// still receive shadow darkening from the shadow map
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.25f);
|
||||
REQUIRE(factor == Catch::Approx(0.25f));
|
||||
}
|
||||
|
||||
TEST_CASE("Shadow bias scales with shadow distance", "[indoor_shadows]") {
|
||||
GPUPerFrameData fd{};
|
||||
|
||||
// At default 300.0f, bias = 0.8
|
||||
applyShadowParams(fd, true, 300.0f);
|
||||
REQUIRE(fd.shadowParams.y == Catch::Approx(0.8f));
|
||||
|
||||
// At 150.0f, bias = 0.4
|
||||
applyShadowParams(fd, true, 150.0f);
|
||||
REQUIRE(fd.shadowParams.y == Catch::Approx(0.4f));
|
||||
|
||||
// Bias is clamped to [0, 1]
|
||||
applyShadowParams(fd, true, 600.0f);
|
||||
REQUIRE(fd.shadowParams.y == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Ambient color is NOT modified globally for indoor state", "[indoor_shadows]") {
|
||||
// The global UBO ambient color should never be modified based on indoor state.
|
||||
// Indoor lighting is handled per-group in the WMO shader via MOCV vertex colors
|
||||
// and MOHD ambient color.
|
||||
GPUPerFrameData fd{};
|
||||
fd.ambientColor = glm::vec4(0.3f, 0.3f, 0.3f, 1.0f);
|
||||
|
||||
applyShadowParams(fd, true);
|
||||
|
||||
// Ambient should be untouched
|
||||
REQUIRE(fd.ambientColor.x == Catch::Approx(0.3f));
|
||||
REQUIRE(fd.ambientColor.y == Catch::Approx(0.3f));
|
||||
REQUIRE(fd.ambientColor.z == Catch::Approx(0.3f));
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue