mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-04-17 01:23:51 +00:00
Merge pull request #52 from ldmonster/feat/hiz-occlusion-culling
[feat] rendering: Hierarchical-Z occlusion culling
This commit is contained in:
commit
5d0d140c61
17 changed files with 1317 additions and 35 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -19,6 +19,7 @@ Makefile
|
|||
*.obj
|
||||
*.slo
|
||||
*.lo
|
||||
*.spv
|
||||
|
||||
# Compiled Dynamic libraries
|
||||
*.so
|
||||
|
|
|
|||
|
|
@ -624,6 +624,7 @@ set(WOWEE_SOURCES
|
|||
src/rendering/m2_renderer_instance.cpp
|
||||
src/rendering/m2_model_classifier.cpp
|
||||
src/rendering/render_graph.cpp
|
||||
src/rendering/hiz_system.cpp
|
||||
src/rendering/quest_marker_renderer.cpp
|
||||
src/rendering/minimap.cpp
|
||||
src/rendering/world_map.cpp
|
||||
|
|
|
|||
57
assets/shaders/hiz_build.comp.glsl
Normal file
57
assets/shaders/hiz_build.comp.glsl
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
#version 450
|
||||
|
||||
// Hierarchical-Z depth pyramid builder.
|
||||
// Builds successive mip levels from the scene depth buffer.
|
||||
// Each 2×2 block is reduced to its MAXIMUM depth (farthest/largest value).
|
||||
// This is conservative for occlusion: an object is only culled when its nearest
|
||||
// depth exceeds the farthest occluder depth in the pyramid region.
|
||||
//
|
||||
// Two modes controlled by push constant:
|
||||
// mipLevel == 0: Sample from the source depth texture (mip 0 of the full-res depth).
|
||||
// mipLevel > 0: Sample from the previous HiZ mip level.
|
||||
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
// Source depth texture (full-resolution scene depth, or previous mip via same image)
|
||||
layout(set = 0, binding = 0) uniform sampler2D srcDepth;
|
||||
|
||||
// Destination mip level (written as storage image)
|
||||
layout(r32f, set = 0, binding = 1) uniform writeonly image2D dstMip;
|
||||
|
||||
layout(push_constant) uniform PushConstants {
|
||||
ivec2 dstSize; // Width and height of the destination mip level
|
||||
int mipLevel; // Current mip level being built (0 = from scene depth)
|
||||
};
|
||||
|
||||
void main() {
|
||||
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
|
||||
if (pos.x >= dstSize.x || pos.y >= dstSize.y) return;
|
||||
|
||||
// Each output texel covers a 2×2 block of the source.
|
||||
// Use texelFetch for precise texel access (no filtering).
|
||||
ivec2 srcPos = pos * 2;
|
||||
|
||||
float d00, d10, d01, d11;
|
||||
|
||||
if (mipLevel == 0) {
|
||||
// Sample from full-res scene depth (sampler2D, lod 0)
|
||||
d00 = texelFetch(srcDepth, srcPos + ivec2(0, 0), 0).r;
|
||||
d10 = texelFetch(srcDepth, srcPos + ivec2(1, 0), 0).r;
|
||||
d01 = texelFetch(srcDepth, srcPos + ivec2(0, 1), 0).r;
|
||||
d11 = texelFetch(srcDepth, srcPos + ivec2(1, 1), 0).r;
|
||||
} else {
|
||||
// Sample from previous HiZ mip level (mipLevel - 1)
|
||||
d00 = texelFetch(srcDepth, srcPos + ivec2(0, 0), mipLevel - 1).r;
|
||||
d10 = texelFetch(srcDepth, srcPos + ivec2(1, 0), mipLevel - 1).r;
|
||||
d01 = texelFetch(srcDepth, srcPos + ivec2(0, 1), mipLevel - 1).r;
|
||||
d11 = texelFetch(srcDepth, srcPos + ivec2(1, 1), mipLevel - 1).r;
|
||||
}
|
||||
|
||||
// Conservative maximum (standard depth buffer: 0=near, 1=far).
|
||||
// We store the farthest (largest) depth in each 2×2 block.
|
||||
// An object is occluded only when its nearest depth > the farthest occluder
|
||||
// depth in the covered screen region — guaranteeing it's behind EVERYTHING.
|
||||
float maxDepth = max(max(d00, d10), max(d01, d11));
|
||||
|
||||
imageStore(dstMip, pos, vec4(maxDepth));
|
||||
}
|
||||
184
assets/shaders/m2_cull_hiz.comp.glsl
Normal file
184
assets/shaders/m2_cull_hiz.comp.glsl
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
#version 450
|
||||
|
||||
// GPU Frustum + HiZ Occlusion Culling for M2 doodads (Phase 6.3).
|
||||
//
|
||||
// Two-level culling:
|
||||
// 1. Frustum — current-frame planes from viewProj.
|
||||
// 2. HiZ occlusion — projects bounding sphere into the PREVIOUS frame's
|
||||
// screen space via prevViewProj and samples the Hierarchical-Z pyramid
|
||||
// (built from said previous depth). Conservative safeguards:
|
||||
// • Only objects that were visible last frame get the HiZ test.
|
||||
// • AABB must be fully inside the screen (no border sampling).
|
||||
// • Bounding sphere is inflated by 50 % for the HiZ AABB.
|
||||
// • A depth bias is applied before the occlusion comparison.
|
||||
// • Nearest depth is projected via prevViewProj from sphere center
|
||||
// (avoids toCam mismatch between current and previous cameras).
|
||||
//
|
||||
// Falls back gracefully: if hizEnabled == 0, behaves identically to frustum-only.
|
||||
|
||||
layout(local_size_x = 64) in;
|
||||
|
||||
struct CullInstance {
|
||||
vec4 sphere; // xyz = world position, w = padded radius
|
||||
float effectiveMaxDistSq;
|
||||
uint flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap,
|
||||
// bit 3 = previouslyVisible
|
||||
float _pad0;
|
||||
float _pad1;
|
||||
};
|
||||
|
||||
layout(std140, set = 0, binding = 0) uniform CullUniforms {
|
||||
vec4 frustumPlanes[6];
|
||||
vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
|
||||
uint instanceCount;
|
||||
uint hizEnabled;
|
||||
uint hizMipLevels;
|
||||
uint _pad2;
|
||||
vec4 hizParams; // x = pyramidWidth, y = pyramidHeight, z = nearPlane, w = unused
|
||||
mat4 viewProj; // current frame view-projection
|
||||
mat4 prevViewProj; // PREVIOUS frame's view-projection for HiZ reprojection
|
||||
};
|
||||
|
||||
layout(std430, set = 0, binding = 1) readonly buffer CullInput {
|
||||
CullInstance cullInstances[];
|
||||
};
|
||||
|
||||
layout(std430, set = 0, binding = 2) buffer CullOutput {
|
||||
uint visibility[];
|
||||
};
|
||||
|
||||
layout(set = 1, binding = 0) uniform sampler2D hizPyramid;
|
||||
|
||||
// Screen-edge margin — skip HiZ if the AABB touches this border.
|
||||
// Depth data at screen edges is from unrelated geometry → false culls.
|
||||
const float SCREEN_EDGE_MARGIN = 0.02;
|
||||
|
||||
// Sphere inflation factor for HiZ screen AABB (50 % larger → very conservative).
|
||||
const float HIZ_SPHERE_INFLATE = 1.5;
|
||||
|
||||
// Depth bias — push nearest depth closer to camera so only objects
|
||||
// significantly behind occluders are culled.
|
||||
const float HIZ_DEPTH_BIAS = 0.02;
|
||||
|
||||
// Minimum screen-space size (pixels) for HiZ to engage.
|
||||
const float HIZ_MIN_SCREEN_PX = 6.0;
|
||||
|
||||
void main() {
|
||||
uint id = gl_GlobalInvocationID.x;
|
||||
if (id >= instanceCount) return;
|
||||
|
||||
CullInstance inst = cullInstances[id];
|
||||
|
||||
// Flag check: must be valid, not smoke, not invisible trap
|
||||
uint f = inst.flags;
|
||||
if ((f & 1u) == 0u || (f & 6u) != 0u) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
|
||||
// Early distance rejection (loose upper bound)
|
||||
vec3 toCam = inst.sphere.xyz - cameraPos.xyz;
|
||||
float distSq = dot(toCam, toCam);
|
||||
if (distSq > cameraPos.w) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
|
||||
// Accurate per-instance distance cull
|
||||
if (distSq > inst.effectiveMaxDistSq) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
|
||||
// Frustum cull: sphere vs 6 planes (current frame)
|
||||
float radius = inst.sphere.w;
|
||||
if (radius > 0.0) {
|
||||
for (int i = 0; i < 6; i++) {
|
||||
float d = dot(frustumPlanes[i].xyz, inst.sphere.xyz) + frustumPlanes[i].w;
|
||||
if (d < -radius) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- HiZ Occlusion Test ---
|
||||
// Skip for objects not rendered last frame (bit 3 = previouslyVisible).
|
||||
bool previouslyVisible = (f & 8u) != 0u;
|
||||
|
||||
if (hizEnabled != 0u && radius > 0.0 && previouslyVisible) {
|
||||
// Inflate sphere for conservative screen-space AABB
|
||||
float hizRadius = radius * HIZ_SPHERE_INFLATE;
|
||||
|
||||
// Project sphere center into previous frame's clip space
|
||||
vec4 clipCenter = prevViewProj * vec4(inst.sphere.xyz, 1.0);
|
||||
if (clipCenter.w > 0.0) {
|
||||
vec3 ndc = clipCenter.xyz / clipCenter.w;
|
||||
|
||||
// --- Correct sphere → screen AABB using VP row-vector lengths ---
|
||||
// The maximum screen-space extent of a world-space sphere is
|
||||
// maxDeltaNdcX = R * ‖row_x(VP)‖ / w
|
||||
// where row_x = (VP[0][0], VP[1][0], VP[2][0]) maps world XYZ
|
||||
// offsets to clip-X. Using only the diagonal element (VP[0][0])
|
||||
// underestimates the footprint when the camera is rotated,
|
||||
// causing false culls at certain view angles.
|
||||
float rowLenX = length(vec3(prevViewProj[0][0],
|
||||
prevViewProj[1][0],
|
||||
prevViewProj[2][0]));
|
||||
float rowLenY = length(vec3(prevViewProj[0][1],
|
||||
prevViewProj[1][1],
|
||||
prevViewProj[2][1]));
|
||||
float projRadX = hizRadius * rowLenX / clipCenter.w;
|
||||
float projRadY = hizRadius * rowLenY / clipCenter.w;
|
||||
float projRad = max(projRadX, projRadY);
|
||||
|
||||
vec2 uvCenter = ndc.xy * 0.5 + 0.5;
|
||||
float uvRad = projRad * 0.5;
|
||||
vec2 uvMin = uvCenter - uvRad;
|
||||
vec2 uvMax = uvCenter + uvRad;
|
||||
|
||||
// **Screen-edge guard**: skip if AABB extends outside safe area.
|
||||
// Depth data at borders is from unrelated geometry.
|
||||
if (uvMin.x >= SCREEN_EDGE_MARGIN && uvMin.y >= SCREEN_EDGE_MARGIN &&
|
||||
uvMax.x <= (1.0 - SCREEN_EDGE_MARGIN) && uvMax.y <= (1.0 - SCREEN_EDGE_MARGIN) &&
|
||||
uvMax.x > uvMin.x && uvMax.y > uvMin.y)
|
||||
{
|
||||
float aabbW = (uvMax.x - uvMin.x) * hizParams.x;
|
||||
float aabbH = (uvMax.y - uvMin.y) * hizParams.y;
|
||||
float screenSize = max(aabbW, aabbH);
|
||||
|
||||
if (screenSize >= HIZ_MIN_SCREEN_PX) {
|
||||
// Mip level: +1 for conservatism (coarser = bigger depth footprint)
|
||||
float mipLevel = ceil(log2(max(screenSize, 1.0))) + 1.0;
|
||||
mipLevel = clamp(mipLevel, 0.0, float(hizMipLevels - 1u));
|
||||
|
||||
// Sample HiZ at 4 corners — take MAX (farthest occluder)
|
||||
float pz0 = textureLod(hizPyramid, uvMin, mipLevel).r;
|
||||
float pz1 = textureLod(hizPyramid, vec2(uvMax.x, uvMin.y), mipLevel).r;
|
||||
float pz2 = textureLod(hizPyramid, vec2(uvMin.x, uvMax.y), mipLevel).r;
|
||||
float pz3 = textureLod(hizPyramid, uvMax, mipLevel).r;
|
||||
float pyramidDepth = max(max(pz0, pz1), max(pz2, pz3));
|
||||
|
||||
// Nearest depth: project sphere center's NDC-Z then subtract
|
||||
// the sphere's depth range. The depth span uses the Z-row
|
||||
// length of VP (same Cauchy-Schwarz reasoning as X/Y), giving
|
||||
// the correct NDC-Z extent regardless of camera orientation.
|
||||
float rowLenZ = length(vec3(prevViewProj[0][2],
|
||||
prevViewProj[1][2],
|
||||
prevViewProj[2][2]));
|
||||
float depthSpan = hizRadius * rowLenZ / clipCenter.w;
|
||||
float centerDepth = ndc.z;
|
||||
float nearestDepth = centerDepth - depthSpan - HIZ_DEPTH_BIAS;
|
||||
|
||||
if (nearestDepth > pyramidDepth && pyramidDepth < 1.0) {
|
||||
visibility[id] = 0u;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// fallthrough: conservatively visible
|
||||
}
|
||||
|
||||
visibility[id] = 1u;
|
||||
}
|
||||
|
|
@ -163,10 +163,11 @@ void main() {
|
|||
|
||||
vec3 result;
|
||||
|
||||
// Sample shadow map for all WMO groups (interior groups with 0x2000 flag
|
||||
// include covered outdoor areas like archways/streets that should receive shadows)
|
||||
// Sample shadow map — skip entirely for interior groups (flag 0x2000).
|
||||
// Interior surfaces rely on pre-baked MOCV vertex-color lighting and the
|
||||
// directional shadow map only makes them darker without any benefit.
|
||||
float shadow = 1.0;
|
||||
if (shadowParams.x > 0.5) {
|
||||
if (isInterior == 0 && shadowParams.x > 0.5) {
|
||||
vec3 ldir = normalize(-lightDir.xyz);
|
||||
float normalOffset = SHADOW_TEXEL * 2.0 * (1.0 - abs(dot(norm, ldir)));
|
||||
vec3 biasedPos = FragPos + norm * normalOffset;
|
||||
|
|
@ -185,17 +186,20 @@ void main() {
|
|||
if (isLava != 0) {
|
||||
// Lava is self-luminous — bright emissive, no shadows
|
||||
result = texColor.rgb * 1.5;
|
||||
} else if (unlit != 0) {
|
||||
result = texColor.rgb * shadow;
|
||||
} else if (isInterior != 0) {
|
||||
// WMO interior: vertex colors (MOCV) are pre-baked lighting from the artist.
|
||||
// The MOHD ambient color tints/floors the vertex colors so dark spots don't
|
||||
// go completely black, matching the WoW client's interior shading.
|
||||
// We handle BOTH lit and unlit interior materials — directional
|
||||
// sun shadows and lighting are skipped for all interior groups.
|
||||
vec3 wmoAmbient = vec3(wmoAmbientR, wmoAmbientG, wmoAmbientB);
|
||||
// Clamp ambient to at least 0.3 to avoid total darkness when MOHD color is zero
|
||||
wmoAmbient = max(wmoAmbient, vec3(0.3));
|
||||
vec3 mocv = max(VertColor.rgb, wmoAmbient);
|
||||
result = texColor.rgb * mocv * shadow;
|
||||
result = texColor.rgb * mocv;
|
||||
} else if (unlit != 0) {
|
||||
// Outdoor unlit surface — still receives directional shadows
|
||||
result = texColor.rgb * shadow;
|
||||
} else {
|
||||
vec3 ldir = normalize(-lightDir.xyz);
|
||||
float diff = max(dot(norm, ldir), 0.0);
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -83,6 +83,7 @@ public:
|
|||
bool isSitting() const { return sitting; }
|
||||
bool isSwimming() const { return swimming; }
|
||||
bool isInsideWMO() const { return cachedInsideWMO; }
|
||||
bool isInsideInteriorWMO() const { return cachedInsideInteriorWMO; }
|
||||
void setGrounded(bool g) { grounded = g; }
|
||||
void setSitting(bool s) { sitting = s; }
|
||||
bool isOnTaxi() const { return externalFollow_; }
|
||||
|
|
|
|||
150
include/rendering/hiz_system.hpp
Normal file
150
include/rendering/hiz_system.hpp
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
#pragma once
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <vk_mem_alloc.h>
|
||||
#include <glm/glm.hpp>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace wowee {
|
||||
namespace rendering {
|
||||
|
||||
class VkContext;
|
||||
|
||||
/**
|
||||
* Hierarchical-Z (HiZ) depth pyramid for GPU occlusion culling (Phase 6.3 Option B).
|
||||
*
|
||||
* Builds a min-depth mip chain from the previous frame's depth buffer each frame.
|
||||
* The M2 cull compute shader samples this pyramid to reject objects hidden behind
|
||||
* geometry, complementing the existing frustum culling.
|
||||
*
|
||||
* Lifecycle:
|
||||
* initialize() — create pyramid image, sampler, compute pipeline, descriptors
|
||||
* buildPyramid() — dispatch compute to reduce depth → mip chain (once per frame)
|
||||
* shutdown() — destroy all Vulkan resources
|
||||
*
|
||||
* The pyramid is double-buffered (per frame-in-flight) so builds and reads
|
||||
* never race across concurrent GPU submissions.
|
||||
*/
|
||||
class HiZSystem {
|
||||
public:
|
||||
HiZSystem() = default;
|
||||
~HiZSystem();
|
||||
|
||||
HiZSystem(const HiZSystem&) = delete;
|
||||
HiZSystem& operator=(const HiZSystem&) = delete;
|
||||
|
||||
/**
|
||||
* Create all Vulkan resources.
|
||||
* @param ctx Vulkan context (device, allocator, etc.)
|
||||
* @param width Full-resolution depth buffer width
|
||||
* @param height Full-resolution depth buffer height
|
||||
* @return true on success
|
||||
*/
|
||||
[[nodiscard]] bool initialize(VkContext* ctx, uint32_t width, uint32_t height);
|
||||
|
||||
/**
|
||||
* Release all Vulkan resources.
|
||||
*/
|
||||
void shutdown();
|
||||
|
||||
/**
|
||||
* Rebuild the pyramid after a swapchain resize.
|
||||
* Safe to call repeatedly — destroys old resources first.
|
||||
*/
|
||||
[[nodiscard]] bool resize(uint32_t width, uint32_t height);
|
||||
|
||||
/**
|
||||
* Dispatch compute shader to build the HiZ pyramid from the current depth buffer.
|
||||
* Must be called AFTER the main scene pass has finished writing to the depth buffer.
|
||||
*
|
||||
* @param cmd Active command buffer (in recording state)
|
||||
* @param frameIndex Current frame-in-flight index (0 or 1)
|
||||
* @param depthImage Source depth image (VK_FORMAT_D32_SFLOAT)
|
||||
*/
|
||||
void buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage);
|
||||
|
||||
/**
|
||||
* @return Descriptor set layout for the HiZ pyramid sampler (set 1 for m2_cull_hiz).
|
||||
*/
|
||||
VkDescriptorSetLayout getDescriptorSetLayout() const { return hizSetLayout_; }
|
||||
|
||||
/**
|
||||
* @return Descriptor set for the given frame (sampler2D of the HiZ pyramid).
|
||||
* Bind as set 1 in the M2 HiZ cull pipeline.
|
||||
*/
|
||||
VkDescriptorSet getDescriptorSet(uint32_t frameIndex) const { return hizDescSet_[frameIndex]; }
|
||||
|
||||
/**
|
||||
* @return true if HiZ system is initialized and ready.
|
||||
*/
|
||||
bool isReady() const { return ready_; }
|
||||
|
||||
/**
|
||||
* @return Number of mip levels in the pyramid.
|
||||
*/
|
||||
uint32_t getMipLevels() const { return mipLevels_; }
|
||||
|
||||
/**
|
||||
* @return Pyramid base resolution (mip 0).
|
||||
*/
|
||||
uint32_t getPyramidWidth() const { return pyramidWidth_; }
|
||||
uint32_t getPyramidHeight() const { return pyramidHeight_; }
|
||||
|
||||
private:
|
||||
bool createPyramidImage();
|
||||
void destroyPyramidImage();
|
||||
bool createComputePipeline();
|
||||
void destroyComputePipeline();
|
||||
bool createDescriptors();
|
||||
void destroyDescriptors();
|
||||
|
||||
VkContext* ctx_ = nullptr;
|
||||
bool ready_ = false;
|
||||
|
||||
// Pyramid dimensions (mip 0 = half of full-res depth)
|
||||
uint32_t fullWidth_ = 0;
|
||||
uint32_t fullHeight_ = 0;
|
||||
uint32_t pyramidWidth_ = 0;
|
||||
uint32_t pyramidHeight_ = 0;
|
||||
uint32_t mipLevels_ = 0;
|
||||
|
||||
static constexpr uint32_t MAX_FRAMES = 2;
|
||||
|
||||
// Per-frame HiZ pyramid images (R32_SFLOAT, full mip chain)
|
||||
VkImage pyramidImage_[MAX_FRAMES] = {};
|
||||
VmaAllocation pyramidAlloc_[MAX_FRAMES] = {};
|
||||
VkImageView pyramidViewAll_[MAX_FRAMES] = {}; // View of all mip levels (for sampling)
|
||||
std::vector<VkImageView> pyramidMipViews_[MAX_FRAMES]; // Per-mip views (for storage image writes)
|
||||
|
||||
// Depth input — image view for sampling the depth buffer as a texture
|
||||
VkImageView depthSamplerView_[MAX_FRAMES] = {};
|
||||
|
||||
// Sampler for depth reads (nearest, clamp-to-edge)
|
||||
VkSampler depthSampler_ = VK_NULL_HANDLE;
|
||||
|
||||
// Compute pipeline for building the pyramid
|
||||
VkPipeline buildPipeline_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout buildPipelineLayout_ = VK_NULL_HANDLE;
|
||||
|
||||
// Descriptor set layout for build pipeline (set 0: src sampler + dst storage image)
|
||||
VkDescriptorSetLayout buildSetLayout_ = VK_NULL_HANDLE;
|
||||
VkDescriptorPool buildDescPool_ = VK_NULL_HANDLE;
|
||||
// Per-frame, per-mip descriptor sets for pyramid build
|
||||
std::vector<VkDescriptorSet> buildDescSets_[MAX_FRAMES];
|
||||
|
||||
// HiZ sampling descriptor: exposed to M2 cull shader (set 1: combined image sampler)
|
||||
VkDescriptorSetLayout hizSetLayout_ = VK_NULL_HANDLE;
|
||||
VkDescriptorPool hizDescPool_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSet hizDescSet_[MAX_FRAMES] = {};
|
||||
|
||||
// Push constant for build shader
|
||||
struct HiZBuildPushConstants {
|
||||
int32_t dstWidth;
|
||||
int32_t dstHeight;
|
||||
int32_t mipLevel;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace rendering
|
||||
} // namespace wowee
|
||||
|
|
@ -28,6 +28,7 @@ namespace rendering {
|
|||
class Camera;
|
||||
class VkContext;
|
||||
class VkTexture;
|
||||
class HiZSystem;
|
||||
|
||||
/**
|
||||
* GPU representation of an M2 model
|
||||
|
|
@ -299,6 +300,13 @@ public:
|
|||
void dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, const Camera& camera);
|
||||
void render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera);
|
||||
|
||||
/** Set the HiZ system for occlusion culling (Phase 6.3). nullptr disables HiZ. */
|
||||
void setHiZSystem(HiZSystem* hiz) { hizSystem_ = hiz; }
|
||||
|
||||
/** Ensure GPU→CPU cull output is visible to the host after a fence wait.
|
||||
* Call after the early compute submission finishes (endSingleTimeCommands). */
|
||||
void invalidateCullOutput(uint32_t frameIndex);
|
||||
|
||||
/**
|
||||
* Initialize shadow pipeline (Phase 7)
|
||||
*/
|
||||
|
|
@ -437,7 +445,7 @@ private:
|
|||
|
||||
// Mega bone SSBO — consolidates all per-instance bone matrices into a single buffer per frame.
|
||||
// Replaces per-instance bone SSBOs for fewer descriptor binds and enables GPU instancing.
|
||||
static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 2048;
|
||||
static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 4096;
|
||||
static constexpr uint32_t MAX_BONES_PER_INSTANCE = 128;
|
||||
::VkBuffer megaBoneBuffer_[2] = {};
|
||||
VmaAllocation megaBoneAlloc_[2] = {};
|
||||
|
|
@ -472,19 +480,26 @@ private:
|
|||
uint32_t flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap
|
||||
float _pad[2] = {};
|
||||
};
|
||||
struct CullUniformsGPU { // matches CullUniforms in m2_cull.comp.glsl (128 bytes, std140)
|
||||
glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance
|
||||
glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
|
||||
uint32_t instanceCount;
|
||||
uint32_t _pad[3] = {};
|
||||
};
|
||||
struct CullUniformsGPU { // matches CullUniforms in m2_cull_hiz.comp.glsl (std140)
|
||||
glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance (96 bytes)
|
||||
glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq (16 bytes)
|
||||
uint32_t instanceCount; // (4 bytes)
|
||||
uint32_t hizEnabled; // 1 = HiZ occlusion active (4 bytes)
|
||||
uint32_t hizMipLevels; // mip levels in HiZ pyramid (4 bytes)
|
||||
uint32_t _pad2 = {}; // (4 bytes)
|
||||
glm::vec4 hizParams; // x=pyramidW, y=pyramidH, z=nearPlane, w=unused (16 bytes)
|
||||
glm::mat4 viewProj; // current frame view-projection (64 bytes)
|
||||
glm::mat4 prevViewProj; // previous frame VP for HiZ reprojection (64 bytes)
|
||||
}; // Total: 272 bytes
|
||||
static constexpr uint32_t MAX_CULL_INSTANCES = 24576;
|
||||
VkPipeline cullPipeline_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE;
|
||||
VkPipeline cullPipeline_ = VK_NULL_HANDLE; // frustum-only (fallback)
|
||||
VkPipeline cullHiZPipeline_ = VK_NULL_HANDLE; // frustum + HiZ occlusion
|
||||
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE; // frustum-only layout (set 0)
|
||||
VkPipelineLayout cullHiZPipelineLayout_ = VK_NULL_HANDLE; // HiZ layout (set 0 + set 1)
|
||||
VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE;
|
||||
VkDescriptorPool cullDescPool_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSet cullSet_[2] = {}; // double-buffered
|
||||
::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera (UBO)
|
||||
::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera + HiZ params (UBO)
|
||||
VmaAllocation cullUniformAlloc_[2] = {};
|
||||
void* cullUniformMapped_[2] = {};
|
||||
::VkBuffer cullInputBuffer_[2] = {}; // per-instance bounding sphere + flags (SSBO)
|
||||
|
|
@ -494,6 +509,20 @@ private:
|
|||
VmaAllocation cullOutputAlloc_[2] = {};
|
||||
void* cullOutputMapped_[2] = {};
|
||||
|
||||
// HiZ occlusion culling (Phase 6.3) — optional, driven by Renderer
|
||||
HiZSystem* hizSystem_ = nullptr;
|
||||
|
||||
// Previous frame's view-projection for temporal reprojection in HiZ culling.
|
||||
// Stored each frame so the cull shader can project into the same screen space
|
||||
// as the depth buffer the HiZ pyramid was built from.
|
||||
glm::mat4 prevVP_{1.0f};
|
||||
|
||||
// Per-instance visibility from the previous frame. Used to set the
|
||||
// `previouslyVisible` flag (bit 3) on each CullInstance so the shader
|
||||
// skips the HiZ test for objects that weren't rendered last frame
|
||||
// (their depth data is unreliable).
|
||||
std::vector<uint8_t> prevFrameVisible_;
|
||||
|
||||
// Dynamic ribbon vertex buffer (CPU-written triangle strip)
|
||||
static constexpr size_t MAX_RIBBON_VERTS = 2048; // 9 floats each
|
||||
::VkBuffer ribbonVB_ = VK_NULL_HANDLE;
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ class ChargeEffect;
|
|||
class SwimEffects;
|
||||
class RenderGraph;
|
||||
class OverlaySystem;
|
||||
class HiZSystem;
|
||||
|
||||
class Renderer {
|
||||
public:
|
||||
|
|
@ -363,6 +364,9 @@ private:
|
|||
std::unique_ptr<RenderGraph> renderGraph_;
|
||||
void buildFrameGraph(game::GameHandler* gameHandler);
|
||||
|
||||
// HiZ occlusion culling — builds depth pyramid each frame
|
||||
std::unique_ptr<HiZSystem> hizSystem_;
|
||||
|
||||
// CPU timing stats (last frame/update).
|
||||
double lastUpdateMs = 0.0;
|
||||
double lastRenderMs = 0.0;
|
||||
|
|
|
|||
517
src/rendering/hiz_system.cpp
Normal file
517
src/rendering/hiz_system.cpp
Normal file
|
|
@ -0,0 +1,517 @@
|
|||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/vk_context.hpp"
|
||||
#include "rendering/vk_shader.hpp"
|
||||
#include "core/logger.hpp"
|
||||
#include "core/profiler.hpp"
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
namespace wowee {
|
||||
namespace rendering {
|
||||
|
||||
HiZSystem::~HiZSystem() {
|
||||
shutdown();
|
||||
}
|
||||
|
||||
bool HiZSystem::initialize(VkContext* ctx, uint32_t width, uint32_t height) {
|
||||
if (!ctx || width == 0 || height == 0) return false;
|
||||
ctx_ = ctx;
|
||||
fullWidth_ = width;
|
||||
fullHeight_ = height;
|
||||
|
||||
// Pyramid mip 0 is half the full resolution (the first downscale)
|
||||
pyramidWidth_ = std::max(1u, width / 2);
|
||||
pyramidHeight_ = std::max(1u, height / 2);
|
||||
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
|
||||
|
||||
if (!createComputePipeline()) return false;
|
||||
if (!createPyramidImage()) { destroyComputePipeline(); return false; }
|
||||
if (!createDescriptors()) { destroyPyramidImage(); destroyComputePipeline(); return false; }
|
||||
|
||||
ready_ = true;
|
||||
LOG_INFO("HiZSystem: initialized ", pyramidWidth_, "x", pyramidHeight_,
|
||||
" pyramid (", mipLevels_, " mips) from ", width, "x", height, " depth");
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::shutdown() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
vkDeviceWaitIdle(device);
|
||||
|
||||
destroyDescriptors();
|
||||
destroyPyramidImage();
|
||||
destroyComputePipeline();
|
||||
|
||||
ctx_ = nullptr;
|
||||
ready_ = false;
|
||||
}
|
||||
|
||||
bool HiZSystem::resize(uint32_t width, uint32_t height) {
|
||||
if (!ctx_) return false;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
vkDeviceWaitIdle(device);
|
||||
|
||||
destroyDescriptors();
|
||||
destroyPyramidImage();
|
||||
|
||||
fullWidth_ = width;
|
||||
fullHeight_ = height;
|
||||
pyramidWidth_ = std::max(1u, width / 2);
|
||||
pyramidHeight_ = std::max(1u, height / 2);
|
||||
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
|
||||
|
||||
if (!createPyramidImage()) return false;
|
||||
if (!createDescriptors()) { destroyPyramidImage(); return false; }
|
||||
|
||||
ready_ = true;
|
||||
LOG_INFO("HiZSystem: resized to ", pyramidWidth_, "x", pyramidHeight_,
|
||||
" (", mipLevels_, " mips)");
|
||||
return true;
|
||||
}
|
||||
|
||||
// --- Pyramid image creation ---
|
||||
|
||||
bool HiZSystem::createPyramidImage() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
VmaAllocator alloc = ctx_->getAllocator();
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
// Create R32F image with full mip chain
|
||||
VkImageCreateInfo imgCi{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
|
||||
imgCi.imageType = VK_IMAGE_TYPE_2D;
|
||||
imgCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
imgCi.extent = {pyramidWidth_, pyramidHeight_, 1};
|
||||
imgCi.mipLevels = mipLevels_;
|
||||
imgCi.arrayLayers = 1;
|
||||
imgCi.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imgCi.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgCi.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
imgCi.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
VmaAllocationCreateInfo allocCi{};
|
||||
allocCi.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
||||
if (vmaCreateImage(alloc, &imgCi, &allocCi, &pyramidImage_[f], &pyramidAlloc_[f], nullptr) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create pyramid image for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
// View of ALL mip levels (for sampling in the cull shader)
|
||||
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
viewCi.image = pyramidImage_[f];
|
||||
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
viewCi.subresourceRange.baseMipLevel = 0;
|
||||
viewCi.subresourceRange.levelCount = mipLevels_;
|
||||
viewCi.subresourceRange.layerCount = 1;
|
||||
|
||||
if (vkCreateImageView(device, &viewCi, nullptr, &pyramidViewAll_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create pyramid view-all for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Per-mip views (for storage image writes in the build shader)
|
||||
pyramidMipViews_[f].resize(mipLevels_, VK_NULL_HANDLE);
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
VkImageViewCreateInfo mipViewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
mipViewCi.image = pyramidImage_[f];
|
||||
mipViewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
mipViewCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
mipViewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
mipViewCi.subresourceRange.baseMipLevel = mip;
|
||||
mipViewCi.subresourceRange.levelCount = 1;
|
||||
mipViewCi.subresourceRange.layerCount = 1;
|
||||
|
||||
if (vkCreateImageView(device, &mipViewCi, nullptr, &pyramidMipViews_[f][mip]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create mip ", mip, " view for frame ", f);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sampler for depth reads and HiZ pyramid reads (nearest, clamp)
|
||||
VkSamplerCreateInfo samplerCi{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
|
||||
samplerCi.magFilter = VK_FILTER_NEAREST;
|
||||
samplerCi.minFilter = VK_FILTER_NEAREST;
|
||||
samplerCi.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
samplerCi.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.maxLod = static_cast<float>(mipLevels_);
|
||||
|
||||
if (vkCreateSampler(device, &samplerCi, nullptr, &depthSampler_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create sampler");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyPyramidImage() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
VmaAllocator alloc = ctx_->getAllocator();
|
||||
|
||||
if (depthSampler_) { vkDestroySampler(device, depthSampler_, nullptr); depthSampler_ = VK_NULL_HANDLE; }
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
for (auto& view : pyramidMipViews_[f]) {
|
||||
if (view) { vkDestroyImageView(device, view, nullptr); view = VK_NULL_HANDLE; }
|
||||
}
|
||||
pyramidMipViews_[f].clear();
|
||||
|
||||
if (pyramidViewAll_[f]) { vkDestroyImageView(device, pyramidViewAll_[f], nullptr); pyramidViewAll_[f] = VK_NULL_HANDLE; }
|
||||
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
|
||||
if (pyramidImage_[f]) { vmaDestroyImage(alloc, pyramidImage_[f], pyramidAlloc_[f]); pyramidImage_[f] = VK_NULL_HANDLE; }
|
||||
}
|
||||
}
|
||||
|
||||
// --- Compute pipeline ---
|
||||
|
||||
bool HiZSystem::createComputePipeline() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// Build descriptor set layout for pyramid build (set 0):
|
||||
// binding 0: combined image sampler (source depth / previous mip)
|
||||
// binding 1: storage image (destination mip)
|
||||
VkDescriptorSetLayoutBinding bindings[2] = {};
|
||||
bindings[0].binding = 0;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
bindings[1].binding = 1;
|
||||
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
bindings[1].descriptorCount = 1;
|
||||
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
layoutCi.bindingCount = 2;
|
||||
layoutCi.pBindings = bindings;
|
||||
if (vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &buildSetLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build set layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// HiZ sampling layout (for M2 cull shader, set 1):
|
||||
// binding 0: combined image sampler (HiZ pyramid, all mips)
|
||||
VkDescriptorSetLayoutBinding hizBinding{};
|
||||
hizBinding.binding = 0;
|
||||
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
hizBinding.descriptorCount = 1;
|
||||
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
hizLayoutCi.bindingCount = 1;
|
||||
hizLayoutCi.pBindings = &hizBinding;
|
||||
if (vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSetLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create HiZ set layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Push constant range for build shader
|
||||
VkPushConstantRange pushRange{};
|
||||
pushRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
pushRange.offset = 0;
|
||||
pushRange.size = sizeof(HiZBuildPushConstants);
|
||||
|
||||
VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||
plCi.setLayoutCount = 1;
|
||||
plCi.pSetLayouts = &buildSetLayout_;
|
||||
plCi.pushConstantRangeCount = 1;
|
||||
plCi.pPushConstantRanges = &pushRange;
|
||||
if (vkCreatePipelineLayout(device, &plCi, nullptr, &buildPipelineLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build pipeline layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load and create compute pipeline
|
||||
VkShaderModule buildShader;
|
||||
if (!buildShader.loadFromFile(device, "assets/shaders/hiz_build.comp.spv")) {
|
||||
LOG_ERROR("HiZSystem: failed to load hiz_build.comp.spv");
|
||||
return false;
|
||||
}
|
||||
|
||||
VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
||||
cpCi.stage = buildShader.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
cpCi.layout = buildPipelineLayout_;
|
||||
if (vkCreateComputePipelines(device, ctx_->getPipelineCache(), 1, &cpCi, nullptr, &buildPipeline_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build compute pipeline");
|
||||
buildShader.destroy();
|
||||
return false;
|
||||
}
|
||||
buildShader.destroy();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyComputePipeline() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
if (buildPipeline_) { vkDestroyPipeline(device, buildPipeline_, nullptr); buildPipeline_ = VK_NULL_HANDLE; }
|
||||
if (buildPipelineLayout_) { vkDestroyPipelineLayout(device, buildPipelineLayout_, nullptr); buildPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
if (buildSetLayout_) { vkDestroyDescriptorSetLayout(device, buildSetLayout_, nullptr); buildSetLayout_ = VK_NULL_HANDLE; }
|
||||
if (hizSetLayout_) { vkDestroyDescriptorSetLayout(device, hizSetLayout_, nullptr); hizSetLayout_ = VK_NULL_HANDLE; }
|
||||
}
|
||||
|
||||
// --- Descriptors ---
|
||||
|
||||
bool HiZSystem::createDescriptors() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// Pool: per-frame × per-mip build sets + 2 HiZ sampling sets
|
||||
// Each build set needs 1 sampler + 1 storage image
|
||||
// Each HiZ sampling set needs 1 sampler
|
||||
const uint32_t totalBuildSets = MAX_FRAMES * mipLevels_;
|
||||
const uint32_t totalHizSets = MAX_FRAMES;
|
||||
const uint32_t totalSets = totalBuildSets + totalHizSets;
|
||||
|
||||
VkDescriptorPoolSize poolSizes[2] = {};
|
||||
poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, totalBuildSets + totalHizSets};
|
||||
poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, totalBuildSets};
|
||||
|
||||
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
|
||||
poolCi.maxSets = totalSets;
|
||||
poolCi.poolSizeCount = 2;
|
||||
poolCi.pPoolSizes = poolSizes;
|
||||
if (vkCreateDescriptorPool(device, &poolCi, nullptr, &buildDescPool_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create descriptor pool");
|
||||
return false;
|
||||
}
|
||||
|
||||
// We use the same pool for both build and HiZ sets — simpler cleanup
|
||||
hizDescPool_ = VK_NULL_HANDLE; // sharing buildDescPool_
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
// Create a temporary depth image view for sampling the depth buffer.
|
||||
// This is SEPARATE from the VkContext's depth image view because we need
|
||||
// DEPTH aspect sampling which requires specific format view.
|
||||
{
|
||||
VkImage depthSrc = ctx_->getDepthCopySourceImage();
|
||||
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
viewCi.image = depthSrc;
|
||||
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewCi.format = ctx_->getDepthFormat();
|
||||
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
viewCi.subresourceRange.levelCount = 1;
|
||||
viewCi.subresourceRange.layerCount = 1;
|
||||
if (vkCreateImageView(device, &viewCi, nullptr, &depthSamplerView_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create depth sampler view for frame ", f);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate per-mip build descriptor sets
|
||||
buildDescSets_[f].resize(mipLevels_);
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
||||
allocInfo.descriptorPool = buildDescPool_;
|
||||
allocInfo.descriptorSetCount = 1;
|
||||
allocInfo.pSetLayouts = &buildSetLayout_;
|
||||
if (vkAllocateDescriptorSets(device, &allocInfo, &buildDescSets_[f][mip]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to allocate build desc set frame=", f, " mip=", mip);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Write descriptors:
|
||||
// Binding 0 (sampler): mip 0 reads depth buffer, mip N reads pyramid mip N-1
|
||||
VkDescriptorImageInfo srcInfo{};
|
||||
srcInfo.sampler = depthSampler_;
|
||||
if (mip == 0) {
|
||||
srcInfo.imageView = depthSamplerView_[f];
|
||||
srcInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
} else {
|
||||
srcInfo.imageView = pyramidViewAll_[f]; // shader uses texelFetch with explicit mip
|
||||
srcInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
|
||||
// Binding 1 (storage image): write to current mip
|
||||
VkDescriptorImageInfo dstInfo{};
|
||||
dstInfo.imageView = pyramidMipViews_[f][mip];
|
||||
dstInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkWriteDescriptorSet writes[2] = {};
|
||||
writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
writes[0].dstSet = buildDescSets_[f][mip];
|
||||
writes[0].dstBinding = 0;
|
||||
writes[0].descriptorCount = 1;
|
||||
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
writes[0].pImageInfo = &srcInfo;
|
||||
|
||||
writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
writes[1].dstSet = buildDescSets_[f][mip];
|
||||
writes[1].dstBinding = 1;
|
||||
writes[1].descriptorCount = 1;
|
||||
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
writes[1].pImageInfo = &dstInfo;
|
||||
|
||||
vkUpdateDescriptorSets(device, 2, writes, 0, nullptr);
|
||||
}
|
||||
|
||||
// Allocate HiZ sampling descriptor set (for M2 cull shader)
|
||||
{
|
||||
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
||||
allocInfo.descriptorPool = buildDescPool_;
|
||||
allocInfo.descriptorSetCount = 1;
|
||||
allocInfo.pSetLayouts = &hizSetLayout_;
|
||||
if (vkAllocateDescriptorSets(device, &allocInfo, &hizDescSet_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to allocate HiZ sampling desc set for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
VkDescriptorImageInfo hizInfo{};
|
||||
hizInfo.sampler = depthSampler_;
|
||||
hizInfo.imageView = pyramidViewAll_[f];
|
||||
hizInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
write.dstSet = hizDescSet_[f];
|
||||
write.dstBinding = 0;
|
||||
write.descriptorCount = 1;
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
write.pImageInfo = &hizInfo;
|
||||
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyDescriptors() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// All descriptor sets are freed when pool is destroyed
|
||||
if (buildDescPool_) { vkDestroyDescriptorPool(device, buildDescPool_, nullptr); buildDescPool_ = VK_NULL_HANDLE; }
|
||||
// hizDescPool_ shares buildDescPool_, so nothing extra to destroy
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
buildDescSets_[f].clear();
|
||||
hizDescSet_[f] = VK_NULL_HANDLE;
|
||||
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
|
||||
}
|
||||
}
|
||||
|
||||
// --- Pyramid build dispatch ---
|
||||
|
||||
void HiZSystem::buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage) {
|
||||
ZoneScopedN("HiZSystem::buildPyramid");
|
||||
if (!ready_ || !buildPipeline_) return;
|
||||
|
||||
// Transition depth image from DEPTH_STENCIL_ATTACHMENT to SHADER_READ_ONLY for sampling
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = depthImage;
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
// Transition entire pyramid to GENERAL layout for storage writes
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = 0;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = pyramidImage_[frameIndex];
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
barrier.subresourceRange.baseMipLevel = 0;
|
||||
barrier.subresourceRange.levelCount = mipLevels_;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, buildPipeline_);
|
||||
|
||||
// Build each mip level sequentially
|
||||
uint32_t mipW = pyramidWidth_;
|
||||
uint32_t mipH = pyramidHeight_;
|
||||
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
// Bind descriptor set for this mip level
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
buildPipelineLayout_, 0, 1, &buildDescSets_[frameIndex][mip], 0, nullptr);
|
||||
|
||||
// Push constants: destination size + mip level
|
||||
HiZBuildPushConstants pc{};
|
||||
pc.dstWidth = static_cast<int32_t>(mipW);
|
||||
pc.dstHeight = static_cast<int32_t>(mipH);
|
||||
pc.mipLevel = static_cast<int32_t>(mip);
|
||||
vkCmdPushConstants(cmd, buildPipelineLayout_, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pc), &pc);
|
||||
|
||||
// Dispatch compute
|
||||
uint32_t groupsX = (mipW + 7) / 8;
|
||||
uint32_t groupsY = (mipH + 7) / 8;
|
||||
vkCmdDispatch(cmd, groupsX, groupsY, 1);
|
||||
|
||||
// Barrier between mip levels: ensure writes to mip N are visible before reads for mip N+1
|
||||
if (mip + 1 < mipLevels_) {
|
||||
VkImageMemoryBarrier mipBarrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
mipBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
mipBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
mipBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
mipBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
mipBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
mipBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
mipBarrier.image = pyramidImage_[frameIndex];
|
||||
mipBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
mipBarrier.subresourceRange.baseMipLevel = mip;
|
||||
mipBarrier.subresourceRange.levelCount = 1;
|
||||
mipBarrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &mipBarrier);
|
||||
}
|
||||
|
||||
// Next mip level dimensions
|
||||
mipW = std::max(1u, mipW / 2);
|
||||
mipH = std::max(1u, mipH / 2);
|
||||
}
|
||||
|
||||
// Transition depth back to DEPTH_STENCIL_ATTACHMENT for next frame
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = depthImage;
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rendering
|
||||
} // namespace wowee
|
||||
|
|
@ -295,7 +295,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
// Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build.
|
||||
{
|
||||
static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 272, "CullUniformsGPU must be 272 bytes (std140)");
|
||||
|
||||
// Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output)
|
||||
VkDescriptorSetLayoutBinding bindings[3] = {};
|
||||
|
|
@ -338,6 +338,54 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
cullComp.destroy();
|
||||
}
|
||||
|
||||
// HiZ-aware cull pipeline (Phase 6.3 Option B)
|
||||
// Uses set 0 (same as frustum-only) + set 1 (HiZ pyramid sampler from HiZSystem).
|
||||
// The HiZ descriptor set layout is created lazily when hizSystem_ is set, but the
|
||||
// pipeline layout and shader are created now if the shader is available.
|
||||
rendering::VkShaderModule cullHiZComp;
|
||||
if (cullHiZComp.loadFromFile(device, "assets/shaders/m2_cull_hiz.comp.spv")) {
|
||||
// HiZ cull set 1 layout: single combined image sampler (the HiZ pyramid)
|
||||
VkDescriptorSetLayoutBinding hizBinding{};
|
||||
hizBinding.binding = 0;
|
||||
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
hizBinding.descriptorCount = 1;
|
||||
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayout hizSamplerLayout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
hizLayoutCi.bindingCount = 1;
|
||||
hizLayoutCi.pBindings = &hizBinding;
|
||||
vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSamplerLayout);
|
||||
|
||||
VkDescriptorSetLayout hizSetLayouts[2] = {cullSetLayout_, hizSamplerLayout};
|
||||
VkPipelineLayoutCreateInfo hizPlCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||
hizPlCi.setLayoutCount = 2;
|
||||
hizPlCi.pSetLayouts = hizSetLayouts;
|
||||
vkCreatePipelineLayout(device, &hizPlCi, nullptr, &cullHiZPipelineLayout_);
|
||||
|
||||
VkComputePipelineCreateInfo hizCpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
||||
hizCpCi.stage = cullHiZComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
hizCpCi.layout = cullHiZPipelineLayout_;
|
||||
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &hizCpCi, nullptr, &cullHiZPipeline_) != VK_SUCCESS) {
|
||||
LOG_WARNING("M2Renderer: failed to create HiZ cull compute pipeline — HiZ disabled");
|
||||
cullHiZPipeline_ = VK_NULL_HANDLE;
|
||||
vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr);
|
||||
cullHiZPipelineLayout_ = VK_NULL_HANDLE;
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: HiZ occlusion cull pipeline created");
|
||||
}
|
||||
|
||||
// The hizSamplerLayout is now owned by the pipeline layout; we don't track it
|
||||
// separately because the pipeline layout keeps a ref. But actually Vulkan
|
||||
// requires us to keep it alive. Store it where HiZSystem will provide it.
|
||||
// For now, we can destroy it since the pipeline layout was already created.
|
||||
vkDestroyDescriptorSetLayout(device, hizSamplerLayout, nullptr);
|
||||
|
||||
cullHiZComp.destroy();
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: m2_cull_hiz.comp.spv not found — HiZ occlusion culling not available");
|
||||
}
|
||||
|
||||
// Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO)
|
||||
VkDescriptorPoolSize poolSizes[2] = {};
|
||||
poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2};
|
||||
|
|
@ -756,6 +804,14 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
return true;
|
||||
}
|
||||
|
||||
void M2Renderer::invalidateCullOutput(uint32_t frameIndex) {
|
||||
// On non-HOST_COHERENT memory, VMA-mapped GPU→CPU buffers need explicit
|
||||
// invalidation so the CPU cache sees the latest GPU writes.
|
||||
if (frameIndex < 2 && cullOutputAlloc_[frameIndex]) {
|
||||
vmaInvalidateAllocation(vkCtx_->getAllocator(), cullOutputAlloc_[frameIndex], 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
void M2Renderer::shutdown() {
|
||||
LOG_INFO("Shutting down M2 renderer...");
|
||||
if (!vkCtx_) return;
|
||||
|
|
@ -837,6 +893,8 @@ void M2Renderer::shutdown() {
|
|||
if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; }
|
||||
|
||||
// GPU frustum culling compute pipeline + buffers cleanup
|
||||
if (cullHiZPipeline_) { vkDestroyPipeline(device, cullHiZPipeline_, nullptr); cullHiZPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullHiZPipelineLayout_) { vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr); cullHiZPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
for (int i = 0; i < 2; i++) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "rendering/m2_renderer.hpp"
|
||||
#include "rendering/m2_renderer_internal.h"
|
||||
#include "rendering/m2_model_classifier.hpp"
|
||||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/vk_context.hpp"
|
||||
#include "rendering/vk_buffer.hpp"
|
||||
#include "rendering/vk_texture.hpp"
|
||||
|
|
@ -600,6 +601,49 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
}
|
||||
ubo->cameraPos = glm::vec4(camPos, maxPossibleDistSq);
|
||||
ubo->instanceCount = numInstances;
|
||||
|
||||
// HiZ occlusion culling fields
|
||||
const bool hizReady = hizSystem_ && hizSystem_->isReady();
|
||||
|
||||
// Auto-disable HiZ when the camera has moved/rotated significantly.
|
||||
// Large VP changes make the depth pyramid unreliable because the
|
||||
// reprojected screen positions diverge from the actual pyramid data.
|
||||
bool hizSafe = hizReady;
|
||||
if (hizReady) {
|
||||
// Compare current VP against previous VP — Frobenius-style max diff.
|
||||
float maxDiff = 0.0f;
|
||||
const float* curM = &vp[0][0];
|
||||
const float* prevM = &prevVP_[0][0];
|
||||
for (int k = 0; k < 16; ++k)
|
||||
maxDiff = std::max(maxDiff, std::abs(curM[k] - prevM[k]));
|
||||
// Threshold: typical small camera motion produces diffs < 0.05.
|
||||
// A fast rotation easily exceeds 0.3. Skip HiZ when diff is large.
|
||||
if (maxDiff > 0.15f) hizSafe = false;
|
||||
}
|
||||
|
||||
ubo->hizEnabled = hizSafe ? 1u : 0u;
|
||||
ubo->hizMipLevels = hizReady ? hizSystem_->getMipLevels() : 0u;
|
||||
ubo->_pad2 = 0;
|
||||
if (hizReady) {
|
||||
ubo->hizParams = glm::vec4(
|
||||
static_cast<float>(hizSystem_->getPyramidWidth()),
|
||||
static_cast<float>(hizSystem_->getPyramidHeight()),
|
||||
camera.getNearPlane(),
|
||||
0.0f
|
||||
);
|
||||
ubo->viewProj = vp;
|
||||
// Use previous frame's VP for HiZ reprojection — the HiZ pyramid
|
||||
// was built from the previous frame's depth, so we must project
|
||||
// into the same screen space to sample the correct depths.
|
||||
ubo->prevViewProj = prevVP_;
|
||||
} else {
|
||||
ubo->hizParams = glm::vec4(0.0f);
|
||||
ubo->viewProj = glm::mat4(1.0f);
|
||||
ubo->prevViewProj = glm::mat4(1.0f);
|
||||
}
|
||||
|
||||
// Save current VP for next frame's temporal reprojection
|
||||
prevVP_ = vp;
|
||||
}
|
||||
|
||||
// --- Upload per-instance cull data (SSBO, binding 1) ---
|
||||
|
|
@ -622,6 +666,10 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
if (inst.cachedIsValid) flags |= 1u;
|
||||
if (inst.cachedIsSmoke) flags |= 2u;
|
||||
if (inst.cachedIsInvisibleTrap) flags |= 4u;
|
||||
// Bit 3: previouslyVisible — the shader skips HiZ for objects
|
||||
// that were NOT rendered last frame (no reliable depth data).
|
||||
if (i < prevFrameVisible_.size() && prevFrameVisible_[i])
|
||||
flags |= 8u;
|
||||
|
||||
input[i].sphere = glm::vec4(inst.position, paddedRadius);
|
||||
input[i].effectiveMaxDistSq = effectiveMaxDistSq;
|
||||
|
|
@ -630,9 +678,22 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
}
|
||||
|
||||
// --- Dispatch compute shader ---
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
const bool useHiZ = (cullHiZPipeline_ != VK_NULL_HANDLE)
|
||||
&& hizSystem_ && hizSystem_->isReady();
|
||||
if (useHiZ) {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullHiZPipeline_);
|
||||
// Set 0: cull UBO + input/output SSBOs
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullHiZPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
// Set 1: HiZ pyramid sampler
|
||||
VkDescriptorSet hizSet = hizSystem_->getDescriptorSet(frameIndex);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullHiZPipelineLayout_, 1, 1, &hizSet, 0, nullptr);
|
||||
} else {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
}
|
||||
|
||||
const uint32_t groupCount = (numInstances + 63) / 64;
|
||||
vkCmdDispatch(cmd, groupCount, 1, 1);
|
||||
|
|
@ -693,6 +754,19 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
const uint32_t* visibility = static_cast<const uint32_t*>(cullOutputMapped_[frameIndex]);
|
||||
const bool gpuCullAvailable = (cullPipeline_ != VK_NULL_HANDLE && visibility != nullptr);
|
||||
|
||||
// Snapshot the GPU visibility results into prevFrameVisible_ so the NEXT
|
||||
// frame's compute dispatch can set the per-instance `previouslyVisible`
|
||||
// flag (bit 3). Objects not visible this frame will skip HiZ next frame,
|
||||
// avoiding false culls from stale depth data.
|
||||
if (gpuCullAvailable) {
|
||||
prevFrameVisible_.resize(numInstances);
|
||||
for (uint32_t i = 0; i < numInstances; ++i)
|
||||
prevFrameVisible_[i] = visibility[i] ? 1u : 0u;
|
||||
} else {
|
||||
// No GPU cull data — conservatively mark all as visible
|
||||
prevFrameVisible_.assign(static_cast<size_t>(instances.size()), 1u);
|
||||
}
|
||||
|
||||
// If GPU culling was not dispatched, fallback: compute distances on CPU
|
||||
float maxRenderDistanceSq;
|
||||
if (!gpuCullAvailable) {
|
||||
|
|
@ -1074,7 +1148,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
// Update material UBO
|
||||
if (batch.materialUBOMapped) {
|
||||
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
|
||||
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
|
||||
// interiorDarken is a camera-based flag — it darkens ALL M2s (incl.
|
||||
// outdoor trees) when the camera is inside a WMO. Disable it; indoor
|
||||
// M2s already look correct from the darker ambient/lighting.
|
||||
mat->interiorDarken = 0.0f;
|
||||
if (batch.colorKeyBlack)
|
||||
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
|
||||
if (forceCutout) {
|
||||
|
|
@ -1265,7 +1342,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
|
||||
if (batch.materialUBOMapped) {
|
||||
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
|
||||
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
|
||||
mat->interiorDarken = 0.0f;
|
||||
if (batch.colorKeyBlack)
|
||||
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include "rendering/character_preview.hpp"
|
||||
#include "rendering/wmo_renderer.hpp"
|
||||
#include "rendering/m2_renderer.hpp"
|
||||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/minimap.hpp"
|
||||
#include "rendering/world_map.hpp"
|
||||
#include "rendering/quest_marker_renderer.hpp"
|
||||
|
|
@ -580,7 +581,6 @@ bool Renderer::initialize(core::Window* win) {
|
|||
overlaySystem_ = std::make_unique<OverlaySystem>(vkCtx);
|
||||
renderGraph_->registerResource("shadow_depth");
|
||||
renderGraph_->registerResource("reflection_texture");
|
||||
renderGraph_->registerResource("cull_visibility");
|
||||
renderGraph_->registerResource("scene_color");
|
||||
renderGraph_->registerResource("scene_depth");
|
||||
renderGraph_->registerResource("final_image");
|
||||
|
|
@ -672,6 +672,10 @@ void Renderer::shutdown() {
|
|||
}
|
||||
|
||||
LOG_DEBUG("Renderer::shutdown - m2Renderer...");
|
||||
if (hizSystem_) {
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
if (m2Renderer) {
|
||||
m2Renderer->shutdown();
|
||||
m2Renderer.reset();
|
||||
|
|
@ -798,6 +802,17 @@ void Renderer::applyMsaaChange() {
|
|||
|
||||
if (minimap) minimap->recreatePipelines();
|
||||
|
||||
// Resize HiZ pyramid (depth format/MSAA may have changed)
|
||||
if (hizSystem_) {
|
||||
auto ext = vkCtx->getSwapchainExtent();
|
||||
if (!hizSystem_->resize(ext.width, ext.height)) {
|
||||
LOG_WARNING("HiZ resize failed after MSAA change");
|
||||
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// Selection circle + overlay + FSR use lazy init, just destroy them
|
||||
if (overlaySystem_) overlaySystem_->recreatePipelines();
|
||||
if (postProcessPipeline_) postProcessPipeline_->destroyAllResources(); // Will be lazily recreated in beginFrame()
|
||||
|
|
@ -846,6 +861,16 @@ void Renderer::beginFrame() {
|
|||
}
|
||||
// Recreate post-process resources for new swapchain dimensions
|
||||
if (postProcessPipeline_) postProcessPipeline_->handleSwapchainResize();
|
||||
// Resize HiZ depth pyramid for new swapchain dimensions
|
||||
if (hizSystem_) {
|
||||
auto ext = vkCtx->getSwapchainExtent();
|
||||
if (!hizSystem_->resize(ext.width, ext.height)) {
|
||||
LOG_WARNING("HiZ resize failed — disabling occlusion culling");
|
||||
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Acquire swapchain image and begin command buffer
|
||||
|
|
@ -864,6 +889,31 @@ void Renderer::beginFrame() {
|
|||
// Update per-frame UBO with current camera/lighting state
|
||||
updatePerFrameUBO();
|
||||
|
||||
// ── Early compute: HiZ pyramid build + M2 frustum/occlusion cull ──
|
||||
// These run in a SEPARATE command buffer submission so the GPU executes
|
||||
// them immediately. The CPU then reads the fresh visibility results
|
||||
// before recording the main render pass — eliminating the 2-frame
|
||||
// staleness that occurs when compute + render share one submission.
|
||||
if (m2Renderer && camera && vkCtx) {
|
||||
VkCommandBuffer computeCmd = vkCtx->beginSingleTimeCommands();
|
||||
uint32_t frame = vkCtx->getCurrentFrame();
|
||||
|
||||
// Build HiZ depth pyramid from previous frame's depth buffer
|
||||
if (hizSystem_ && hizSystem_->isReady()) {
|
||||
VkImage depthSrc = vkCtx->getDepthCopySourceImage();
|
||||
hizSystem_->buildPyramid(computeCmd, frame, depthSrc);
|
||||
}
|
||||
|
||||
// Dispatch GPU frustum + HiZ occlusion culling
|
||||
m2Renderer->dispatchCullCompute(computeCmd, frame, *camera);
|
||||
|
||||
vkCtx->endSingleTimeCommands(computeCmd);
|
||||
|
||||
// Ensure GPU→CPU buffer writes are visible to host (non-coherent memory).
|
||||
m2Renderer->invalidateCullOutput(frame);
|
||||
// Visibility results are now in cullOutputMapped_[frame], readable by CPU.
|
||||
}
|
||||
|
||||
// --- Off-screen pre-passes ---
|
||||
// Build frame graph: registers pre-passes as graph nodes with dependencies.
|
||||
// compile() topologically sorts; execute() runs them with auto barriers.
|
||||
|
|
@ -1489,7 +1539,9 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
|
|||
if (parallelRecordingEnabled_) {
|
||||
// --- Pre-compute state + GPU allocations on main thread (not thread-safe) ---
|
||||
if (m2Renderer && cameraController) {
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
|
||||
// Use isInsideInteriorWMO (flag 0x2000) — not isInsideWMO which includes
|
||||
// outdoor WMO groups like archways/bridges that should receive shadows.
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
|
||||
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
|
||||
}
|
||||
if (wmoRenderer) wmoRenderer->prepareRender();
|
||||
|
|
@ -1734,7 +1786,8 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
|
|||
|
||||
if (m2Renderer && camera && !skipM2) {
|
||||
if (cameraController) {
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
|
||||
// Use isInsideInteriorWMO (flag 0x2000) for correct indoor detection
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
|
||||
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
|
||||
}
|
||||
m2Renderer->prepareRender(frameIdx, *camera);
|
||||
|
|
@ -1887,6 +1940,23 @@ bool Renderer::initializeRenderers(pipeline::AssetManager* assetManager, const s
|
|||
spellVisualSystem_->initialize(m2Renderer.get());
|
||||
}
|
||||
}
|
||||
|
||||
// HiZ occlusion culling — temporal reprojection.
|
||||
// The HiZ pyramid is built from the previous frame's depth buffer. The cull
|
||||
// compute shader uses prevViewProj to project objects into the previous frame's
|
||||
// screen space so that depth samples match the pyramid, eliminating flicker
|
||||
// caused by camera movement between frames.
|
||||
if (!hizSystem_ && m2Renderer && vkCtx) {
|
||||
hizSystem_ = std::make_unique<HiZSystem>();
|
||||
auto extent = vkCtx->getSwapchainExtent();
|
||||
if (hizSystem_->initialize(vkCtx, extent.width, extent.height)) {
|
||||
m2Renderer->setHiZSystem(hizSystem_.get());
|
||||
LOG_INFO("HiZ occlusion culling initialized (", extent.width, "x", extent.height, ")");
|
||||
} else {
|
||||
LOG_WARNING("HiZ occlusion culling unavailable — falling back to frustum-only culling");
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
if (!wmoRenderer) {
|
||||
wmoRenderer = std::make_unique<WMORenderer>();
|
||||
wmoRenderer->initialize(vkCtx, perFrameSetLayout, assetManager);
|
||||
|
|
@ -2627,7 +2697,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
|
|||
|
||||
auto shadowDepth = renderGraph_->findResource("shadow_depth");
|
||||
auto reflTex = renderGraph_->findResource("reflection_texture");
|
||||
auto cullVis = renderGraph_->findResource("cull_visibility");
|
||||
|
||||
// Minimap composites (no dependencies — standalone off-screen render target)
|
||||
renderGraph_->addPass("minimap_composite", {}, {},
|
||||
|
|
@ -2670,13 +2739,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
|
|||
renderReflectionPass();
|
||||
});
|
||||
|
||||
// GPU frustum cull compute → outputs cull_visibility
|
||||
renderGraph_->addPass("compute_cull", {}, {cullVis},
|
||||
[this](VkCommandBuffer cmd) {
|
||||
if (m2Renderer && camera)
|
||||
m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera);
|
||||
});
|
||||
|
||||
renderGraph_->compile();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -798,7 +798,8 @@ bool VkContext::createDepthBuffer() {
|
|||
imgInfo.arrayLayers = 1;
|
||||
imgInfo.samples = msaaSamples_;
|
||||
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
|
||||
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
|
||||
|
||||
VmaAllocationCreateInfo allocInfo{};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
|
@ -911,7 +912,8 @@ bool VkContext::createDepthResolveImage() {
|
|||
imgInfo.arrayLayers = 1;
|
||||
imgInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
|
||||
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
|
||||
|
||||
VmaAllocationCreateInfo allocInfo{};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
|
|
|||
|
|
@ -197,6 +197,19 @@ target_link_libraries(test_anim_capability PRIVATE catch2_main)
|
|||
add_test(NAME anim_capability COMMAND test_anim_capability)
|
||||
register_test_target(test_anim_capability)
|
||||
|
||||
# ── test_indoor_shadows ──────────────────────────────────────
|
||||
add_executable(test_indoor_shadows
|
||||
test_indoor_shadows.cpp
|
||||
)
|
||||
target_include_directories(test_indoor_shadows PRIVATE ${TEST_INCLUDE_DIRS})
|
||||
target_include_directories(test_indoor_shadows SYSTEM PRIVATE ${TEST_SYSTEM_INCLUDE_DIRS})
|
||||
target_link_libraries(test_indoor_shadows PRIVATE catch2_main)
|
||||
if(TARGET glm::glm)
|
||||
target_link_libraries(test_indoor_shadows PRIVATE glm::glm)
|
||||
endif()
|
||||
add_test(NAME indoor_shadows COMMAND test_indoor_shadows)
|
||||
register_test_target(test_indoor_shadows)
|
||||
|
||||
# ── ASAN / UBSan for test targets ────────────────────────────
|
||||
if(WOWEE_ENABLE_ASAN AND NOT MSVC)
|
||||
foreach(_t IN LISTS ALL_TEST_TARGETS)
|
||||
|
|
|
|||
122
tests/test_indoor_shadows.cpp
Normal file
122
tests/test_indoor_shadows.cpp
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
// Tests for indoor shadow disable logic (WMO interior groups)
|
||||
//
|
||||
// WMO interior groups (flag 0x2000) should NOT receive directional sun shadows
|
||||
// because they rely on pre-baked vertex color lighting (MOCV) and the shadow map
|
||||
// only makes them darker. The fix is in the fragment shader: interior groups
|
||||
// skip the shadow map sample entirely.
|
||||
//
|
||||
// These tests verify the data contract between the renderer and the shader:
|
||||
// - GPUPerFrameData.shadowParams.x controls global shadow enable
|
||||
// - WMOMaterial.isInterior controls per-group interior flag
|
||||
// - Interior groups ignore shadows regardless of global shadow state
|
||||
|
||||
#include <catch_amalgamated.hpp>
|
||||
#include "rendering/vk_frame_data.hpp"
|
||||
|
||||
#include <glm/glm.hpp>
|
||||
|
||||
using wowee::rendering::GPUPerFrameData;
|
||||
|
||||
// Replicates the shadow params logic from Renderer::updatePerFrameUBO()
|
||||
// This should NOT be affected by indoor state — shadows remain globally enabled
|
||||
static void applyShadowParams(GPUPerFrameData& fd,
|
||||
bool shadowsEnabled,
|
||||
float shadowDistance = 300.0f) {
|
||||
float shadowBias = glm::clamp(0.8f * (shadowDistance / 300.0f), 0.0f, 1.0f);
|
||||
fd.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, shadowBias, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
// Replicates the WMO interior shader logic:
|
||||
// interior groups skip shadow sampling entirely (shadow factor = 1.0 = fully lit).
|
||||
// This covers both lit and unlit interior materials — isInterior takes priority.
|
||||
static float computeWmoShadowFactor(bool isInterior, float globalShadowEnabled, float rawShadow) {
|
||||
if (isInterior) {
|
||||
// Interior groups always get shadow factor 1.0 (no shadow darkening)
|
||||
// regardless of unlit flag — isInterior is checked first in shader
|
||||
return 1.0f;
|
||||
}
|
||||
if (globalShadowEnabled > 0.5f) {
|
||||
return rawShadow; // exterior: use shadow map result
|
||||
}
|
||||
return 1.0f; // shadows globally disabled
|
||||
}
|
||||
|
||||
TEST_CASE("Global shadow params are not affected by indoor state", "[indoor_shadows]") {
|
||||
GPUPerFrameData fd{};
|
||||
|
||||
// Shadows enabled — should stay 1.0 regardless of any indoor logic
|
||||
applyShadowParams(fd, /*shadowsEnabled=*/true);
|
||||
REQUIRE(fd.shadowParams.x == Catch::Approx(1.0f));
|
||||
|
||||
// Shadows disabled — should be 0.0
|
||||
applyShadowParams(fd, /*shadowsEnabled=*/false);
|
||||
REQUIRE(fd.shadowParams.x == Catch::Approx(0.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Interior WMO groups skip shadow sampling", "[indoor_shadows]") {
|
||||
// Even when shadows are globally on and the shadow map says 0.2 (dark shadow),
|
||||
// interior groups should get 1.0 (no shadow)
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.2f);
|
||||
REQUIRE(factor == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Exterior WMO groups receive shadows normally", "[indoor_shadows]") {
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.3f);
|
||||
REQUIRE(factor == Catch::Approx(0.3f));
|
||||
}
|
||||
|
||||
TEST_CASE("Exterior WMO groups skip shadows when globally disabled", "[indoor_shadows]") {
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/0.0f, /*rawShadow=*/0.3f);
|
||||
REQUIRE(factor == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Interior WMO groups skip shadows even when globally disabled", "[indoor_shadows]") {
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/0.0f, /*rawShadow=*/0.5f);
|
||||
REQUIRE(factor == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Unlit interior surfaces skip shadows (isInterior takes priority over unlit)", "[indoor_shadows]") {
|
||||
// Many interior walls use F_UNLIT material flag (0x01). The shader must check
|
||||
// isInterior BEFORE unlit so these surfaces don't receive shadow darkening.
|
||||
// Even though the surface is unlit, it's interior → shadow factor = 1.0
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.1f);
|
||||
REQUIRE(factor == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Outdoor unlit surfaces still receive shadows", "[indoor_shadows]") {
|
||||
// Exterior unlit surfaces (isInterior=false, unlit=true in shader) should
|
||||
// still receive shadow darkening from the shadow map
|
||||
float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.25f);
|
||||
REQUIRE(factor == Catch::Approx(0.25f));
|
||||
}
|
||||
|
||||
TEST_CASE("Shadow bias scales with shadow distance", "[indoor_shadows]") {
|
||||
GPUPerFrameData fd{};
|
||||
|
||||
// At default 300.0f, bias = 0.8
|
||||
applyShadowParams(fd, true, 300.0f);
|
||||
REQUIRE(fd.shadowParams.y == Catch::Approx(0.8f));
|
||||
|
||||
// At 150.0f, bias = 0.4
|
||||
applyShadowParams(fd, true, 150.0f);
|
||||
REQUIRE(fd.shadowParams.y == Catch::Approx(0.4f));
|
||||
|
||||
// Bias is clamped to [0, 1]
|
||||
applyShadowParams(fd, true, 600.0f);
|
||||
REQUIRE(fd.shadowParams.y == Catch::Approx(1.0f));
|
||||
}
|
||||
|
||||
TEST_CASE("Ambient color is NOT modified globally for indoor state", "[indoor_shadows]") {
|
||||
// The global UBO ambient color should never be modified based on indoor state.
|
||||
// Indoor lighting is handled per-group in the WMO shader via MOCV vertex colors
|
||||
// and MOHD ambient color.
|
||||
GPUPerFrameData fd{};
|
||||
fd.ambientColor = glm::vec4(0.3f, 0.3f, 0.3f, 1.0f);
|
||||
|
||||
applyShadowParams(fd, true);
|
||||
|
||||
// Ambient should be untouched
|
||||
REQUIRE(fd.ambientColor.x == Catch::Approx(0.3f));
|
||||
REQUIRE(fd.ambientColor.y == Catch::Approx(0.3f));
|
||||
REQUIRE(fd.ambientColor.z == Catch::Approx(0.3f));
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue