Merge pull request #52 from ldmonster/feat/hiz-occlusion-culling

[feat] rendering: Hierarchical-Z occlusion culling
This commit is contained in:
Kelsi Rae Davis 2026-04-06 13:44:44 -07:00 committed by GitHub
commit 5d0d140c61
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 1317 additions and 35 deletions

1
.gitignore vendored
View file

@ -19,6 +19,7 @@ Makefile
*.obj
*.slo
*.lo
*.spv
# Compiled Dynamic libraries
*.so

View file

@ -624,6 +624,7 @@ set(WOWEE_SOURCES
src/rendering/m2_renderer_instance.cpp
src/rendering/m2_model_classifier.cpp
src/rendering/render_graph.cpp
src/rendering/hiz_system.cpp
src/rendering/quest_marker_renderer.cpp
src/rendering/minimap.cpp
src/rendering/world_map.cpp

View file

@ -0,0 +1,57 @@
#version 450
// Hierarchical-Z depth pyramid builder.
// Builds successive mip levels from the scene depth buffer.
// Each 2×2 block is reduced to its MAXIMUM depth (farthest/largest value).
// This is conservative for occlusion: an object is only culled when its nearest
// depth exceeds the farthest occluder depth in the pyramid region.
//
// Two modes controlled by push constant:
// mipLevel == 0: Sample from the source depth texture (mip 0 of the full-res depth).
// mipLevel > 0: Sample from the previous HiZ mip level.
layout(local_size_x = 8, local_size_y = 8) in;
// Source depth texture (full-resolution scene depth, or previous mip via same image)
layout(set = 0, binding = 0) uniform sampler2D srcDepth;
// Destination mip level (written as storage image)
layout(r32f, set = 0, binding = 1) uniform writeonly image2D dstMip;
layout(push_constant) uniform PushConstants {
ivec2 dstSize; // Width and height of the destination mip level
int mipLevel; // Current mip level being built (0 = from scene depth)
};
void main() {
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
if (pos.x >= dstSize.x || pos.y >= dstSize.y) return;
// Each output texel covers a 2×2 block of the source.
// Use texelFetch for precise texel access (no filtering).
ivec2 srcPos = pos * 2;
float d00, d10, d01, d11;
if (mipLevel == 0) {
// Sample from full-res scene depth (sampler2D, lod 0)
d00 = texelFetch(srcDepth, srcPos + ivec2(0, 0), 0).r;
d10 = texelFetch(srcDepth, srcPos + ivec2(1, 0), 0).r;
d01 = texelFetch(srcDepth, srcPos + ivec2(0, 1), 0).r;
d11 = texelFetch(srcDepth, srcPos + ivec2(1, 1), 0).r;
} else {
// Sample from previous HiZ mip level (mipLevel - 1)
d00 = texelFetch(srcDepth, srcPos + ivec2(0, 0), mipLevel - 1).r;
d10 = texelFetch(srcDepth, srcPos + ivec2(1, 0), mipLevel - 1).r;
d01 = texelFetch(srcDepth, srcPos + ivec2(0, 1), mipLevel - 1).r;
d11 = texelFetch(srcDepth, srcPos + ivec2(1, 1), mipLevel - 1).r;
}
// Conservative maximum (standard depth buffer: 0=near, 1=far).
// We store the farthest (largest) depth in each 2×2 block.
// An object is occluded only when its nearest depth > the farthest occluder
// depth in the covered screen region — guaranteeing it's behind EVERYTHING.
float maxDepth = max(max(d00, d10), max(d01, d11));
imageStore(dstMip, pos, vec4(maxDepth));
}

View file

@ -0,0 +1,184 @@
#version 450
// GPU Frustum + HiZ Occlusion Culling for M2 doodads (Phase 6.3).
//
// Two-level culling:
// 1. Frustum — current-frame planes from viewProj.
// 2. HiZ occlusion — projects bounding sphere into the PREVIOUS frame's
// screen space via prevViewProj and samples the Hierarchical-Z pyramid
// (built from said previous depth). Conservative safeguards:
// • Only objects that were visible last frame get the HiZ test.
// • AABB must be fully inside the screen (no border sampling).
// • Bounding sphere is inflated by 50 % for the HiZ AABB.
// • A depth bias is applied before the occlusion comparison.
// • Nearest depth is projected via prevViewProj from sphere center
// (avoids toCam mismatch between current and previous cameras).
//
// Falls back gracefully: if hizEnabled == 0, behaves identically to frustum-only.
layout(local_size_x = 64) in;
struct CullInstance {
vec4 sphere; // xyz = world position, w = padded radius
float effectiveMaxDistSq;
uint flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap,
// bit 3 = previouslyVisible
float _pad0;
float _pad1;
};
layout(std140, set = 0, binding = 0) uniform CullUniforms {
vec4 frustumPlanes[6];
vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
uint instanceCount;
uint hizEnabled;
uint hizMipLevels;
uint _pad2;
vec4 hizParams; // x = pyramidWidth, y = pyramidHeight, z = nearPlane, w = unused
mat4 viewProj; // current frame view-projection
mat4 prevViewProj; // PREVIOUS frame's view-projection for HiZ reprojection
};
layout(std430, set = 0, binding = 1) readonly buffer CullInput {
CullInstance cullInstances[];
};
layout(std430, set = 0, binding = 2) buffer CullOutput {
uint visibility[];
};
layout(set = 1, binding = 0) uniform sampler2D hizPyramid;
// Screen-edge margin — skip HiZ if the AABB touches this border.
// Depth data at screen edges is from unrelated geometry → false culls.
const float SCREEN_EDGE_MARGIN = 0.02;
// Sphere inflation factor for HiZ screen AABB (50 % larger → very conservative).
const float HIZ_SPHERE_INFLATE = 1.5;
// Depth bias — push nearest depth closer to camera so only objects
// significantly behind occluders are culled.
const float HIZ_DEPTH_BIAS = 0.02;
// Minimum screen-space size (pixels) for HiZ to engage.
const float HIZ_MIN_SCREEN_PX = 6.0;
void main() {
uint id = gl_GlobalInvocationID.x;
if (id >= instanceCount) return;
CullInstance inst = cullInstances[id];
// Flag check: must be valid, not smoke, not invisible trap
uint f = inst.flags;
if ((f & 1u) == 0u || (f & 6u) != 0u) {
visibility[id] = 0u;
return;
}
// Early distance rejection (loose upper bound)
vec3 toCam = inst.sphere.xyz - cameraPos.xyz;
float distSq = dot(toCam, toCam);
if (distSq > cameraPos.w) {
visibility[id] = 0u;
return;
}
// Accurate per-instance distance cull
if (distSq > inst.effectiveMaxDistSq) {
visibility[id] = 0u;
return;
}
// Frustum cull: sphere vs 6 planes (current frame)
float radius = inst.sphere.w;
if (radius > 0.0) {
for (int i = 0; i < 6; i++) {
float d = dot(frustumPlanes[i].xyz, inst.sphere.xyz) + frustumPlanes[i].w;
if (d < -radius) {
visibility[id] = 0u;
return;
}
}
}
// --- HiZ Occlusion Test ---
// Skip for objects not rendered last frame (bit 3 = previouslyVisible).
bool previouslyVisible = (f & 8u) != 0u;
if (hizEnabled != 0u && radius > 0.0 && previouslyVisible) {
// Inflate sphere for conservative screen-space AABB
float hizRadius = radius * HIZ_SPHERE_INFLATE;
// Project sphere center into previous frame's clip space
vec4 clipCenter = prevViewProj * vec4(inst.sphere.xyz, 1.0);
if (clipCenter.w > 0.0) {
vec3 ndc = clipCenter.xyz / clipCenter.w;
// --- Correct sphere → screen AABB using VP row-vector lengths ---
// The maximum screen-space extent of a world-space sphere is
// maxDeltaNdcX = R * ‖row_x(VP)‖ / w
// where row_x = (VP[0][0], VP[1][0], VP[2][0]) maps world XYZ
// offsets to clip-X. Using only the diagonal element (VP[0][0])
// underestimates the footprint when the camera is rotated,
// causing false culls at certain view angles.
float rowLenX = length(vec3(prevViewProj[0][0],
prevViewProj[1][0],
prevViewProj[2][0]));
float rowLenY = length(vec3(prevViewProj[0][1],
prevViewProj[1][1],
prevViewProj[2][1]));
float projRadX = hizRadius * rowLenX / clipCenter.w;
float projRadY = hizRadius * rowLenY / clipCenter.w;
float projRad = max(projRadX, projRadY);
vec2 uvCenter = ndc.xy * 0.5 + 0.5;
float uvRad = projRad * 0.5;
vec2 uvMin = uvCenter - uvRad;
vec2 uvMax = uvCenter + uvRad;
// **Screen-edge guard**: skip if AABB extends outside safe area.
// Depth data at borders is from unrelated geometry.
if (uvMin.x >= SCREEN_EDGE_MARGIN && uvMin.y >= SCREEN_EDGE_MARGIN &&
uvMax.x <= (1.0 - SCREEN_EDGE_MARGIN) && uvMax.y <= (1.0 - SCREEN_EDGE_MARGIN) &&
uvMax.x > uvMin.x && uvMax.y > uvMin.y)
{
float aabbW = (uvMax.x - uvMin.x) * hizParams.x;
float aabbH = (uvMax.y - uvMin.y) * hizParams.y;
float screenSize = max(aabbW, aabbH);
if (screenSize >= HIZ_MIN_SCREEN_PX) {
// Mip level: +1 for conservatism (coarser = bigger depth footprint)
float mipLevel = ceil(log2(max(screenSize, 1.0))) + 1.0;
mipLevel = clamp(mipLevel, 0.0, float(hizMipLevels - 1u));
// Sample HiZ at 4 corners — take MAX (farthest occluder)
float pz0 = textureLod(hizPyramid, uvMin, mipLevel).r;
float pz1 = textureLod(hizPyramid, vec2(uvMax.x, uvMin.y), mipLevel).r;
float pz2 = textureLod(hizPyramid, vec2(uvMin.x, uvMax.y), mipLevel).r;
float pz3 = textureLod(hizPyramid, uvMax, mipLevel).r;
float pyramidDepth = max(max(pz0, pz1), max(pz2, pz3));
// Nearest depth: project sphere center's NDC-Z then subtract
// the sphere's depth range. The depth span uses the Z-row
// length of VP (same Cauchy-Schwarz reasoning as X/Y), giving
// the correct NDC-Z extent regardless of camera orientation.
float rowLenZ = length(vec3(prevViewProj[0][2],
prevViewProj[1][2],
prevViewProj[2][2]));
float depthSpan = hizRadius * rowLenZ / clipCenter.w;
float centerDepth = ndc.z;
float nearestDepth = centerDepth - depthSpan - HIZ_DEPTH_BIAS;
if (nearestDepth > pyramidDepth && pyramidDepth < 1.0) {
visibility[id] = 0u;
return;
}
}
}
}
// fallthrough: conservatively visible
}
visibility[id] = 1u;
}

View file

@ -163,10 +163,11 @@ void main() {
vec3 result;
// Sample shadow map for all WMO groups (interior groups with 0x2000 flag
// include covered outdoor areas like archways/streets that should receive shadows)
// Sample shadow map — skip entirely for interior groups (flag 0x2000).
// Interior surfaces rely on pre-baked MOCV vertex-color lighting and the
// directional shadow map only makes them darker without any benefit.
float shadow = 1.0;
if (shadowParams.x > 0.5) {
if (isInterior == 0 && shadowParams.x > 0.5) {
vec3 ldir = normalize(-lightDir.xyz);
float normalOffset = SHADOW_TEXEL * 2.0 * (1.0 - abs(dot(norm, ldir)));
vec3 biasedPos = FragPos + norm * normalOffset;
@ -185,17 +186,20 @@ void main() {
if (isLava != 0) {
// Lava is self-luminous — bright emissive, no shadows
result = texColor.rgb * 1.5;
} else if (unlit != 0) {
result = texColor.rgb * shadow;
} else if (isInterior != 0) {
// WMO interior: vertex colors (MOCV) are pre-baked lighting from the artist.
// The MOHD ambient color tints/floors the vertex colors so dark spots don't
// go completely black, matching the WoW client's interior shading.
// We handle BOTH lit and unlit interior materials — directional
// sun shadows and lighting are skipped for all interior groups.
vec3 wmoAmbient = vec3(wmoAmbientR, wmoAmbientG, wmoAmbientB);
// Clamp ambient to at least 0.3 to avoid total darkness when MOHD color is zero
wmoAmbient = max(wmoAmbient, vec3(0.3));
vec3 mocv = max(VertColor.rgb, wmoAmbient);
result = texColor.rgb * mocv * shadow;
result = texColor.rgb * mocv;
} else if (unlit != 0) {
// Outdoor unlit surface — still receives directional shadows
result = texColor.rgb * shadow;
} else {
vec3 ldir = normalize(-lightDir.xyz);
float diff = max(dot(norm, ldir), 0.0);

Binary file not shown.

View file

@ -83,6 +83,7 @@ public:
bool isSitting() const { return sitting; }
bool isSwimming() const { return swimming; }
bool isInsideWMO() const { return cachedInsideWMO; }
bool isInsideInteriorWMO() const { return cachedInsideInteriorWMO; }
void setGrounded(bool g) { grounded = g; }
void setSitting(bool s) { sitting = s; }
bool isOnTaxi() const { return externalFollow_; }

View file

@ -0,0 +1,150 @@
#pragma once
#include <vulkan/vulkan.h>
#include <vk_mem_alloc.h>
#include <glm/glm.hpp>
#include <cstdint>
#include <vector>
namespace wowee {
namespace rendering {
class VkContext;
/**
* Hierarchical-Z (HiZ) depth pyramid for GPU occlusion culling (Phase 6.3 Option B).
*
* Builds a min-depth mip chain from the previous frame's depth buffer each frame.
* The M2 cull compute shader samples this pyramid to reject objects hidden behind
* geometry, complementing the existing frustum culling.
*
* Lifecycle:
* initialize() create pyramid image, sampler, compute pipeline, descriptors
* buildPyramid() dispatch compute to reduce depth mip chain (once per frame)
* shutdown() destroy all Vulkan resources
*
* The pyramid is double-buffered (per frame-in-flight) so builds and reads
* never race across concurrent GPU submissions.
*/
class HiZSystem {
public:
HiZSystem() = default;
~HiZSystem();
HiZSystem(const HiZSystem&) = delete;
HiZSystem& operator=(const HiZSystem&) = delete;
/**
* Create all Vulkan resources.
* @param ctx Vulkan context (device, allocator, etc.)
* @param width Full-resolution depth buffer width
* @param height Full-resolution depth buffer height
* @return true on success
*/
[[nodiscard]] bool initialize(VkContext* ctx, uint32_t width, uint32_t height);
/**
* Release all Vulkan resources.
*/
void shutdown();
/**
* Rebuild the pyramid after a swapchain resize.
* Safe to call repeatedly destroys old resources first.
*/
[[nodiscard]] bool resize(uint32_t width, uint32_t height);
/**
* Dispatch compute shader to build the HiZ pyramid from the current depth buffer.
* Must be called AFTER the main scene pass has finished writing to the depth buffer.
*
* @param cmd Active command buffer (in recording state)
* @param frameIndex Current frame-in-flight index (0 or 1)
* @param depthImage Source depth image (VK_FORMAT_D32_SFLOAT)
*/
void buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage);
/**
* @return Descriptor set layout for the HiZ pyramid sampler (set 1 for m2_cull_hiz).
*/
VkDescriptorSetLayout getDescriptorSetLayout() const { return hizSetLayout_; }
/**
* @return Descriptor set for the given frame (sampler2D of the HiZ pyramid).
* Bind as set 1 in the M2 HiZ cull pipeline.
*/
VkDescriptorSet getDescriptorSet(uint32_t frameIndex) const { return hizDescSet_[frameIndex]; }
/**
* @return true if HiZ system is initialized and ready.
*/
bool isReady() const { return ready_; }
/**
* @return Number of mip levels in the pyramid.
*/
uint32_t getMipLevels() const { return mipLevels_; }
/**
* @return Pyramid base resolution (mip 0).
*/
uint32_t getPyramidWidth() const { return pyramidWidth_; }
uint32_t getPyramidHeight() const { return pyramidHeight_; }
private:
bool createPyramidImage();
void destroyPyramidImage();
bool createComputePipeline();
void destroyComputePipeline();
bool createDescriptors();
void destroyDescriptors();
VkContext* ctx_ = nullptr;
bool ready_ = false;
// Pyramid dimensions (mip 0 = half of full-res depth)
uint32_t fullWidth_ = 0;
uint32_t fullHeight_ = 0;
uint32_t pyramidWidth_ = 0;
uint32_t pyramidHeight_ = 0;
uint32_t mipLevels_ = 0;
static constexpr uint32_t MAX_FRAMES = 2;
// Per-frame HiZ pyramid images (R32_SFLOAT, full mip chain)
VkImage pyramidImage_[MAX_FRAMES] = {};
VmaAllocation pyramidAlloc_[MAX_FRAMES] = {};
VkImageView pyramidViewAll_[MAX_FRAMES] = {}; // View of all mip levels (for sampling)
std::vector<VkImageView> pyramidMipViews_[MAX_FRAMES]; // Per-mip views (for storage image writes)
// Depth input — image view for sampling the depth buffer as a texture
VkImageView depthSamplerView_[MAX_FRAMES] = {};
// Sampler for depth reads (nearest, clamp-to-edge)
VkSampler depthSampler_ = VK_NULL_HANDLE;
// Compute pipeline for building the pyramid
VkPipeline buildPipeline_ = VK_NULL_HANDLE;
VkPipelineLayout buildPipelineLayout_ = VK_NULL_HANDLE;
// Descriptor set layout for build pipeline (set 0: src sampler + dst storage image)
VkDescriptorSetLayout buildSetLayout_ = VK_NULL_HANDLE;
VkDescriptorPool buildDescPool_ = VK_NULL_HANDLE;
// Per-frame, per-mip descriptor sets for pyramid build
std::vector<VkDescriptorSet> buildDescSets_[MAX_FRAMES];
// HiZ sampling descriptor: exposed to M2 cull shader (set 1: combined image sampler)
VkDescriptorSetLayout hizSetLayout_ = VK_NULL_HANDLE;
VkDescriptorPool hizDescPool_ = VK_NULL_HANDLE;
VkDescriptorSet hizDescSet_[MAX_FRAMES] = {};
// Push constant for build shader
struct HiZBuildPushConstants {
int32_t dstWidth;
int32_t dstHeight;
int32_t mipLevel;
};
};
} // namespace rendering
} // namespace wowee

View file

@ -28,6 +28,7 @@ namespace rendering {
class Camera;
class VkContext;
class VkTexture;
class HiZSystem;
/**
* GPU representation of an M2 model
@ -299,6 +300,13 @@ public:
void dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, const Camera& camera);
void render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera);
/** Set the HiZ system for occlusion culling (Phase 6.3). nullptr disables HiZ. */
void setHiZSystem(HiZSystem* hiz) { hizSystem_ = hiz; }
/** Ensure GPU→CPU cull output is visible to the host after a fence wait.
* Call after the early compute submission finishes (endSingleTimeCommands). */
void invalidateCullOutput(uint32_t frameIndex);
/**
* Initialize shadow pipeline (Phase 7)
*/
@ -437,7 +445,7 @@ private:
// Mega bone SSBO — consolidates all per-instance bone matrices into a single buffer per frame.
// Replaces per-instance bone SSBOs for fewer descriptor binds and enables GPU instancing.
static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 2048;
static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 4096;
static constexpr uint32_t MAX_BONES_PER_INSTANCE = 128;
::VkBuffer megaBoneBuffer_[2] = {};
VmaAllocation megaBoneAlloc_[2] = {};
@ -472,19 +480,26 @@ private:
uint32_t flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap
float _pad[2] = {};
};
struct CullUniformsGPU { // matches CullUniforms in m2_cull.comp.glsl (128 bytes, std140)
glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance
glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
uint32_t instanceCount;
uint32_t _pad[3] = {};
};
struct CullUniformsGPU { // matches CullUniforms in m2_cull_hiz.comp.glsl (std140)
glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance (96 bytes)
glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq (16 bytes)
uint32_t instanceCount; // (4 bytes)
uint32_t hizEnabled; // 1 = HiZ occlusion active (4 bytes)
uint32_t hizMipLevels; // mip levels in HiZ pyramid (4 bytes)
uint32_t _pad2 = {}; // (4 bytes)
glm::vec4 hizParams; // x=pyramidW, y=pyramidH, z=nearPlane, w=unused (16 bytes)
glm::mat4 viewProj; // current frame view-projection (64 bytes)
glm::mat4 prevViewProj; // previous frame VP for HiZ reprojection (64 bytes)
}; // Total: 272 bytes
static constexpr uint32_t MAX_CULL_INSTANCES = 24576;
VkPipeline cullPipeline_ = VK_NULL_HANDLE;
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE;
VkPipeline cullPipeline_ = VK_NULL_HANDLE; // frustum-only (fallback)
VkPipeline cullHiZPipeline_ = VK_NULL_HANDLE; // frustum + HiZ occlusion
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE; // frustum-only layout (set 0)
VkPipelineLayout cullHiZPipelineLayout_ = VK_NULL_HANDLE; // HiZ layout (set 0 + set 1)
VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE;
VkDescriptorPool cullDescPool_ = VK_NULL_HANDLE;
VkDescriptorSet cullSet_[2] = {}; // double-buffered
::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera (UBO)
::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera + HiZ params (UBO)
VmaAllocation cullUniformAlloc_[2] = {};
void* cullUniformMapped_[2] = {};
::VkBuffer cullInputBuffer_[2] = {}; // per-instance bounding sphere + flags (SSBO)
@ -494,6 +509,20 @@ private:
VmaAllocation cullOutputAlloc_[2] = {};
void* cullOutputMapped_[2] = {};
// HiZ occlusion culling (Phase 6.3) — optional, driven by Renderer
HiZSystem* hizSystem_ = nullptr;
// Previous frame's view-projection for temporal reprojection in HiZ culling.
// Stored each frame so the cull shader can project into the same screen space
// as the depth buffer the HiZ pyramid was built from.
glm::mat4 prevVP_{1.0f};
// Per-instance visibility from the previous frame. Used to set the
// `previouslyVisible` flag (bit 3) on each CullInstance so the shader
// skips the HiZ test for objects that weren't rendered last frame
// (their depth data is unreliable).
std::vector<uint8_t> prevFrameVisible_;
// Dynamic ribbon vertex buffer (CPU-written triangle strip)
static constexpr size_t MAX_RIBBON_VERTS = 2048; // 9 floats each
::VkBuffer ribbonVB_ = VK_NULL_HANDLE;

View file

@ -58,6 +58,7 @@ class ChargeEffect;
class SwimEffects;
class RenderGraph;
class OverlaySystem;
class HiZSystem;
class Renderer {
public:
@ -363,6 +364,9 @@ private:
std::unique_ptr<RenderGraph> renderGraph_;
void buildFrameGraph(game::GameHandler* gameHandler);
// HiZ occlusion culling — builds depth pyramid each frame
std::unique_ptr<HiZSystem> hizSystem_;
// CPU timing stats (last frame/update).
double lastUpdateMs = 0.0;
double lastRenderMs = 0.0;

View file

@ -0,0 +1,517 @@
#include "rendering/hiz_system.hpp"
#include "rendering/vk_context.hpp"
#include "rendering/vk_shader.hpp"
#include "core/logger.hpp"
#include "core/profiler.hpp"
#include <cmath>
#include <algorithm>
namespace wowee {
namespace rendering {
HiZSystem::~HiZSystem() {
shutdown();
}
bool HiZSystem::initialize(VkContext* ctx, uint32_t width, uint32_t height) {
if (!ctx || width == 0 || height == 0) return false;
ctx_ = ctx;
fullWidth_ = width;
fullHeight_ = height;
// Pyramid mip 0 is half the full resolution (the first downscale)
pyramidWidth_ = std::max(1u, width / 2);
pyramidHeight_ = std::max(1u, height / 2);
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
if (!createComputePipeline()) return false;
if (!createPyramidImage()) { destroyComputePipeline(); return false; }
if (!createDescriptors()) { destroyPyramidImage(); destroyComputePipeline(); return false; }
ready_ = true;
LOG_INFO("HiZSystem: initialized ", pyramidWidth_, "x", pyramidHeight_,
" pyramid (", mipLevels_, " mips) from ", width, "x", height, " depth");
return true;
}
void HiZSystem::shutdown() {
if (!ctx_) return;
VkDevice device = ctx_->getDevice();
vkDeviceWaitIdle(device);
destroyDescriptors();
destroyPyramidImage();
destroyComputePipeline();
ctx_ = nullptr;
ready_ = false;
}
bool HiZSystem::resize(uint32_t width, uint32_t height) {
if (!ctx_) return false;
VkDevice device = ctx_->getDevice();
vkDeviceWaitIdle(device);
destroyDescriptors();
destroyPyramidImage();
fullWidth_ = width;
fullHeight_ = height;
pyramidWidth_ = std::max(1u, width / 2);
pyramidHeight_ = std::max(1u, height / 2);
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
if (!createPyramidImage()) return false;
if (!createDescriptors()) { destroyPyramidImage(); return false; }
ready_ = true;
LOG_INFO("HiZSystem: resized to ", pyramidWidth_, "x", pyramidHeight_,
" (", mipLevels_, " mips)");
return true;
}
// --- Pyramid image creation ---
bool HiZSystem::createPyramidImage() {
VkDevice device = ctx_->getDevice();
VmaAllocator alloc = ctx_->getAllocator();
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
// Create R32F image with full mip chain
VkImageCreateInfo imgCi{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
imgCi.imageType = VK_IMAGE_TYPE_2D;
imgCi.format = VK_FORMAT_R32_SFLOAT;
imgCi.extent = {pyramidWidth_, pyramidHeight_, 1};
imgCi.mipLevels = mipLevels_;
imgCi.arrayLayers = 1;
imgCi.samples = VK_SAMPLE_COUNT_1_BIT;
imgCi.tiling = VK_IMAGE_TILING_OPTIMAL;
imgCi.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
imgCi.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VmaAllocationCreateInfo allocCi{};
allocCi.usage = VMA_MEMORY_USAGE_GPU_ONLY;
if (vmaCreateImage(alloc, &imgCi, &allocCi, &pyramidImage_[f], &pyramidAlloc_[f], nullptr) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create pyramid image for frame ", f);
return false;
}
// View of ALL mip levels (for sampling in the cull shader)
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
viewCi.image = pyramidImage_[f];
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
viewCi.format = VK_FORMAT_R32_SFLOAT;
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
viewCi.subresourceRange.baseMipLevel = 0;
viewCi.subresourceRange.levelCount = mipLevels_;
viewCi.subresourceRange.layerCount = 1;
if (vkCreateImageView(device, &viewCi, nullptr, &pyramidViewAll_[f]) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create pyramid view-all for frame ", f);
return false;
}
// Per-mip views (for storage image writes in the build shader)
pyramidMipViews_[f].resize(mipLevels_, VK_NULL_HANDLE);
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
VkImageViewCreateInfo mipViewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
mipViewCi.image = pyramidImage_[f];
mipViewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
mipViewCi.format = VK_FORMAT_R32_SFLOAT;
mipViewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
mipViewCi.subresourceRange.baseMipLevel = mip;
mipViewCi.subresourceRange.levelCount = 1;
mipViewCi.subresourceRange.layerCount = 1;
if (vkCreateImageView(device, &mipViewCi, nullptr, &pyramidMipViews_[f][mip]) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create mip ", mip, " view for frame ", f);
return false;
}
}
}
// Sampler for depth reads and HiZ pyramid reads (nearest, clamp)
VkSamplerCreateInfo samplerCi{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
samplerCi.magFilter = VK_FILTER_NEAREST;
samplerCi.minFilter = VK_FILTER_NEAREST;
samplerCi.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
samplerCi.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCi.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCi.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCi.maxLod = static_cast<float>(mipLevels_);
if (vkCreateSampler(device, &samplerCi, nullptr, &depthSampler_) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create sampler");
return false;
}
return true;
}
void HiZSystem::destroyPyramidImage() {
if (!ctx_) return;
VkDevice device = ctx_->getDevice();
VmaAllocator alloc = ctx_->getAllocator();
if (depthSampler_) { vkDestroySampler(device, depthSampler_, nullptr); depthSampler_ = VK_NULL_HANDLE; }
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
for (auto& view : pyramidMipViews_[f]) {
if (view) { vkDestroyImageView(device, view, nullptr); view = VK_NULL_HANDLE; }
}
pyramidMipViews_[f].clear();
if (pyramidViewAll_[f]) { vkDestroyImageView(device, pyramidViewAll_[f], nullptr); pyramidViewAll_[f] = VK_NULL_HANDLE; }
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
if (pyramidImage_[f]) { vmaDestroyImage(alloc, pyramidImage_[f], pyramidAlloc_[f]); pyramidImage_[f] = VK_NULL_HANDLE; }
}
}
// --- Compute pipeline ---
bool HiZSystem::createComputePipeline() {
VkDevice device = ctx_->getDevice();
// Build descriptor set layout for pyramid build (set 0):
// binding 0: combined image sampler (source depth / previous mip)
// binding 1: storage image (destination mip)
VkDescriptorSetLayoutBinding bindings[2] = {};
bindings[0].binding = 0;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[1].binding = 1;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
bindings[1].descriptorCount = 1;
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
layoutCi.bindingCount = 2;
layoutCi.pBindings = bindings;
if (vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &buildSetLayout_) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create build set layout");
return false;
}
// HiZ sampling layout (for M2 cull shader, set 1):
// binding 0: combined image sampler (HiZ pyramid, all mips)
VkDescriptorSetLayoutBinding hizBinding{};
hizBinding.binding = 0;
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
hizBinding.descriptorCount = 1;
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
hizLayoutCi.bindingCount = 1;
hizLayoutCi.pBindings = &hizBinding;
if (vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSetLayout_) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create HiZ set layout");
return false;
}
// Push constant range for build shader
VkPushConstantRange pushRange{};
pushRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
pushRange.offset = 0;
pushRange.size = sizeof(HiZBuildPushConstants);
VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
plCi.setLayoutCount = 1;
plCi.pSetLayouts = &buildSetLayout_;
plCi.pushConstantRangeCount = 1;
plCi.pPushConstantRanges = &pushRange;
if (vkCreatePipelineLayout(device, &plCi, nullptr, &buildPipelineLayout_) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create build pipeline layout");
return false;
}
// Load and create compute pipeline
VkShaderModule buildShader;
if (!buildShader.loadFromFile(device, "assets/shaders/hiz_build.comp.spv")) {
LOG_ERROR("HiZSystem: failed to load hiz_build.comp.spv");
return false;
}
VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
cpCi.stage = buildShader.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
cpCi.layout = buildPipelineLayout_;
if (vkCreateComputePipelines(device, ctx_->getPipelineCache(), 1, &cpCi, nullptr, &buildPipeline_) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create build compute pipeline");
buildShader.destroy();
return false;
}
buildShader.destroy();
return true;
}
void HiZSystem::destroyComputePipeline() {
if (!ctx_) return;
VkDevice device = ctx_->getDevice();
if (buildPipeline_) { vkDestroyPipeline(device, buildPipeline_, nullptr); buildPipeline_ = VK_NULL_HANDLE; }
if (buildPipelineLayout_) { vkDestroyPipelineLayout(device, buildPipelineLayout_, nullptr); buildPipelineLayout_ = VK_NULL_HANDLE; }
if (buildSetLayout_) { vkDestroyDescriptorSetLayout(device, buildSetLayout_, nullptr); buildSetLayout_ = VK_NULL_HANDLE; }
if (hizSetLayout_) { vkDestroyDescriptorSetLayout(device, hizSetLayout_, nullptr); hizSetLayout_ = VK_NULL_HANDLE; }
}
// --- Descriptors ---
bool HiZSystem::createDescriptors() {
VkDevice device = ctx_->getDevice();
// Pool: per-frame × per-mip build sets + 2 HiZ sampling sets
// Each build set needs 1 sampler + 1 storage image
// Each HiZ sampling set needs 1 sampler
const uint32_t totalBuildSets = MAX_FRAMES * mipLevels_;
const uint32_t totalHizSets = MAX_FRAMES;
const uint32_t totalSets = totalBuildSets + totalHizSets;
VkDescriptorPoolSize poolSizes[2] = {};
poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, totalBuildSets + totalHizSets};
poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, totalBuildSets};
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
poolCi.maxSets = totalSets;
poolCi.poolSizeCount = 2;
poolCi.pPoolSizes = poolSizes;
if (vkCreateDescriptorPool(device, &poolCi, nullptr, &buildDescPool_) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create descriptor pool");
return false;
}
// We use the same pool for both build and HiZ sets — simpler cleanup
hizDescPool_ = VK_NULL_HANDLE; // sharing buildDescPool_
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
// Create a temporary depth image view for sampling the depth buffer.
// This is SEPARATE from the VkContext's depth image view because we need
// DEPTH aspect sampling which requires specific format view.
{
VkImage depthSrc = ctx_->getDepthCopySourceImage();
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
viewCi.image = depthSrc;
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
viewCi.format = ctx_->getDepthFormat();
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
viewCi.subresourceRange.levelCount = 1;
viewCi.subresourceRange.layerCount = 1;
if (vkCreateImageView(device, &viewCi, nullptr, &depthSamplerView_[f]) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to create depth sampler view for frame ", f);
return false;
}
}
// Allocate per-mip build descriptor sets
buildDescSets_[f].resize(mipLevels_);
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
allocInfo.descriptorPool = buildDescPool_;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = &buildSetLayout_;
if (vkAllocateDescriptorSets(device, &allocInfo, &buildDescSets_[f][mip]) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to allocate build desc set frame=", f, " mip=", mip);
return false;
}
// Write descriptors:
// Binding 0 (sampler): mip 0 reads depth buffer, mip N reads pyramid mip N-1
VkDescriptorImageInfo srcInfo{};
srcInfo.sampler = depthSampler_;
if (mip == 0) {
srcInfo.imageView = depthSamplerView_[f];
srcInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
} else {
srcInfo.imageView = pyramidViewAll_[f]; // shader uses texelFetch with explicit mip
srcInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
}
// Binding 1 (storage image): write to current mip
VkDescriptorImageInfo dstInfo{};
dstInfo.imageView = pyramidMipViews_[f][mip];
dstInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
VkWriteDescriptorSet writes[2] = {};
writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writes[0].dstSet = buildDescSets_[f][mip];
writes[0].dstBinding = 0;
writes[0].descriptorCount = 1;
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[0].pImageInfo = &srcInfo;
writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writes[1].dstSet = buildDescSets_[f][mip];
writes[1].dstBinding = 1;
writes[1].descriptorCount = 1;
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
writes[1].pImageInfo = &dstInfo;
vkUpdateDescriptorSets(device, 2, writes, 0, nullptr);
}
// Allocate HiZ sampling descriptor set (for M2 cull shader)
{
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
allocInfo.descriptorPool = buildDescPool_;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = &hizSetLayout_;
if (vkAllocateDescriptorSets(device, &allocInfo, &hizDescSet_[f]) != VK_SUCCESS) {
LOG_ERROR("HiZSystem: failed to allocate HiZ sampling desc set for frame ", f);
return false;
}
VkDescriptorImageInfo hizInfo{};
hizInfo.sampler = depthSampler_;
hizInfo.imageView = pyramidViewAll_[f];
hizInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = hizDescSet_[f];
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = &hizInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
return true;
}
void HiZSystem::destroyDescriptors() {
if (!ctx_) return;
VkDevice device = ctx_->getDevice();
// All descriptor sets are freed when pool is destroyed
if (buildDescPool_) { vkDestroyDescriptorPool(device, buildDescPool_, nullptr); buildDescPool_ = VK_NULL_HANDLE; }
// hizDescPool_ shares buildDescPool_, so nothing extra to destroy
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
buildDescSets_[f].clear();
hizDescSet_[f] = VK_NULL_HANDLE;
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
}
}
// --- Pyramid build dispatch ---
void HiZSystem::buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage) {
ZoneScopedN("HiZSystem::buildPyramid");
if (!ready_ || !buildPipeline_) return;
// Transition depth image from DEPTH_STENCIL_ATTACHMENT to SHADER_READ_ONLY for sampling
{
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = depthImage;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
barrier.subresourceRange.levelCount = 1;
barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(cmd,
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &barrier);
}
// Transition entire pyramid to GENERAL layout for storage writes
{
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = pyramidImage_[frameIndex];
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = mipLevels_;
barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(cmd,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &barrier);
}
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, buildPipeline_);
// Build each mip level sequentially
uint32_t mipW = pyramidWidth_;
uint32_t mipH = pyramidHeight_;
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
// Bind descriptor set for this mip level
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
buildPipelineLayout_, 0, 1, &buildDescSets_[frameIndex][mip], 0, nullptr);
// Push constants: destination size + mip level
HiZBuildPushConstants pc{};
pc.dstWidth = static_cast<int32_t>(mipW);
pc.dstHeight = static_cast<int32_t>(mipH);
pc.mipLevel = static_cast<int32_t>(mip);
vkCmdPushConstants(cmd, buildPipelineLayout_, VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pc), &pc);
// Dispatch compute
uint32_t groupsX = (mipW + 7) / 8;
uint32_t groupsY = (mipH + 7) / 8;
vkCmdDispatch(cmd, groupsX, groupsY, 1);
// Barrier between mip levels: ensure writes to mip N are visible before reads for mip N+1
if (mip + 1 < mipLevels_) {
VkImageMemoryBarrier mipBarrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
mipBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
mipBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
mipBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
mipBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
mipBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
mipBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
mipBarrier.image = pyramidImage_[frameIndex];
mipBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
mipBarrier.subresourceRange.baseMipLevel = mip;
mipBarrier.subresourceRange.levelCount = 1;
mipBarrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(cmd,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &mipBarrier);
}
// Next mip level dimensions
mipW = std::max(1u, mipW / 2);
mipH = std::max(1u, mipH / 2);
}
// Transition depth back to DEPTH_STENCIL_ATTACHMENT for next frame
{
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = depthImage;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
barrier.subresourceRange.levelCount = 1;
barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(cmd,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
0, 0, nullptr, 0, nullptr, 1, &barrier);
}
}
} // namespace rendering
} // namespace wowee

View file

@ -295,7 +295,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
// Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build.
{
static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)");
static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)");
static_assert(sizeof(CullUniformsGPU) == 272, "CullUniformsGPU must be 272 bytes (std140)");
// Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output)
VkDescriptorSetLayoutBinding bindings[3] = {};
@ -338,6 +338,54 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
cullComp.destroy();
}
// HiZ-aware cull pipeline (Phase 6.3 Option B)
// Uses set 0 (same as frustum-only) + set 1 (HiZ pyramid sampler from HiZSystem).
// The HiZ descriptor set layout is created lazily when hizSystem_ is set, but the
// pipeline layout and shader are created now if the shader is available.
rendering::VkShaderModule cullHiZComp;
if (cullHiZComp.loadFromFile(device, "assets/shaders/m2_cull_hiz.comp.spv")) {
// HiZ cull set 1 layout: single combined image sampler (the HiZ pyramid)
VkDescriptorSetLayoutBinding hizBinding{};
hizBinding.binding = 0;
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
hizBinding.descriptorCount = 1;
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayout hizSamplerLayout = VK_NULL_HANDLE;
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
hizLayoutCi.bindingCount = 1;
hizLayoutCi.pBindings = &hizBinding;
vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSamplerLayout);
VkDescriptorSetLayout hizSetLayouts[2] = {cullSetLayout_, hizSamplerLayout};
VkPipelineLayoutCreateInfo hizPlCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
hizPlCi.setLayoutCount = 2;
hizPlCi.pSetLayouts = hizSetLayouts;
vkCreatePipelineLayout(device, &hizPlCi, nullptr, &cullHiZPipelineLayout_);
VkComputePipelineCreateInfo hizCpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
hizCpCi.stage = cullHiZComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
hizCpCi.layout = cullHiZPipelineLayout_;
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &hizCpCi, nullptr, &cullHiZPipeline_) != VK_SUCCESS) {
LOG_WARNING("M2Renderer: failed to create HiZ cull compute pipeline — HiZ disabled");
cullHiZPipeline_ = VK_NULL_HANDLE;
vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr);
cullHiZPipelineLayout_ = VK_NULL_HANDLE;
} else {
LOG_INFO("M2Renderer: HiZ occlusion cull pipeline created");
}
// The hizSamplerLayout is now owned by the pipeline layout; we don't track it
// separately because the pipeline layout keeps a ref. But actually Vulkan
// requires us to keep it alive. Store it where HiZSystem will provide it.
// For now, we can destroy it since the pipeline layout was already created.
vkDestroyDescriptorSetLayout(device, hizSamplerLayout, nullptr);
cullHiZComp.destroy();
} else {
LOG_INFO("M2Renderer: m2_cull_hiz.comp.spv not found — HiZ occlusion culling not available");
}
// Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO)
VkDescriptorPoolSize poolSizes[2] = {};
poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2};
@ -756,6 +804,14 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
return true;
}
void M2Renderer::invalidateCullOutput(uint32_t frameIndex) {
// On non-HOST_COHERENT memory, VMA-mapped GPU→CPU buffers need explicit
// invalidation so the CPU cache sees the latest GPU writes.
if (frameIndex < 2 && cullOutputAlloc_[frameIndex]) {
vmaInvalidateAllocation(vkCtx_->getAllocator(), cullOutputAlloc_[frameIndex], 0, VK_WHOLE_SIZE);
}
}
void M2Renderer::shutdown() {
LOG_INFO("Shutting down M2 renderer...");
if (!vkCtx_) return;
@ -837,6 +893,8 @@ void M2Renderer::shutdown() {
if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; }
// GPU frustum culling compute pipeline + buffers cleanup
if (cullHiZPipeline_) { vkDestroyPipeline(device, cullHiZPipeline_, nullptr); cullHiZPipeline_ = VK_NULL_HANDLE; }
if (cullHiZPipelineLayout_) { vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr); cullHiZPipelineLayout_ = VK_NULL_HANDLE; }
if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; }
if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; }
for (int i = 0; i < 2; i++) {

View file

@ -1,6 +1,7 @@
#include "rendering/m2_renderer.hpp"
#include "rendering/m2_renderer_internal.h"
#include "rendering/m2_model_classifier.hpp"
#include "rendering/hiz_system.hpp"
#include "rendering/vk_context.hpp"
#include "rendering/vk_buffer.hpp"
#include "rendering/vk_texture.hpp"
@ -600,6 +601,49 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
}
ubo->cameraPos = glm::vec4(camPos, maxPossibleDistSq);
ubo->instanceCount = numInstances;
// HiZ occlusion culling fields
const bool hizReady = hizSystem_ && hizSystem_->isReady();
// Auto-disable HiZ when the camera has moved/rotated significantly.
// Large VP changes make the depth pyramid unreliable because the
// reprojected screen positions diverge from the actual pyramid data.
bool hizSafe = hizReady;
if (hizReady) {
// Compare current VP against previous VP — Frobenius-style max diff.
float maxDiff = 0.0f;
const float* curM = &vp[0][0];
const float* prevM = &prevVP_[0][0];
for (int k = 0; k < 16; ++k)
maxDiff = std::max(maxDiff, std::abs(curM[k] - prevM[k]));
// Threshold: typical small camera motion produces diffs < 0.05.
// A fast rotation easily exceeds 0.3. Skip HiZ when diff is large.
if (maxDiff > 0.15f) hizSafe = false;
}
ubo->hizEnabled = hizSafe ? 1u : 0u;
ubo->hizMipLevels = hizReady ? hizSystem_->getMipLevels() : 0u;
ubo->_pad2 = 0;
if (hizReady) {
ubo->hizParams = glm::vec4(
static_cast<float>(hizSystem_->getPyramidWidth()),
static_cast<float>(hizSystem_->getPyramidHeight()),
camera.getNearPlane(),
0.0f
);
ubo->viewProj = vp;
// Use previous frame's VP for HiZ reprojection — the HiZ pyramid
// was built from the previous frame's depth, so we must project
// into the same screen space to sample the correct depths.
ubo->prevViewProj = prevVP_;
} else {
ubo->hizParams = glm::vec4(0.0f);
ubo->viewProj = glm::mat4(1.0f);
ubo->prevViewProj = glm::mat4(1.0f);
}
// Save current VP for next frame's temporal reprojection
prevVP_ = vp;
}
// --- Upload per-instance cull data (SSBO, binding 1) ---
@ -622,6 +666,10 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
if (inst.cachedIsValid) flags |= 1u;
if (inst.cachedIsSmoke) flags |= 2u;
if (inst.cachedIsInvisibleTrap) flags |= 4u;
// Bit 3: previouslyVisible — the shader skips HiZ for objects
// that were NOT rendered last frame (no reliable depth data).
if (i < prevFrameVisible_.size() && prevFrameVisible_[i])
flags |= 8u;
input[i].sphere = glm::vec4(inst.position, paddedRadius);
input[i].effectiveMaxDistSq = effectiveMaxDistSq;
@ -630,9 +678,22 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
}
// --- Dispatch compute shader ---
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
const bool useHiZ = (cullHiZPipeline_ != VK_NULL_HANDLE)
&& hizSystem_ && hizSystem_->isReady();
if (useHiZ) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullHiZPipeline_);
// Set 0: cull UBO + input/output SSBOs
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
cullHiZPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
// Set 1: HiZ pyramid sampler
VkDescriptorSet hizSet = hizSystem_->getDescriptorSet(frameIndex);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
cullHiZPipelineLayout_, 1, 1, &hizSet, 0, nullptr);
} else {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
}
const uint32_t groupCount = (numInstances + 63) / 64;
vkCmdDispatch(cmd, groupCount, 1, 1);
@ -693,6 +754,19 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
const uint32_t* visibility = static_cast<const uint32_t*>(cullOutputMapped_[frameIndex]);
const bool gpuCullAvailable = (cullPipeline_ != VK_NULL_HANDLE && visibility != nullptr);
// Snapshot the GPU visibility results into prevFrameVisible_ so the NEXT
// frame's compute dispatch can set the per-instance `previouslyVisible`
// flag (bit 3). Objects not visible this frame will skip HiZ next frame,
// avoiding false culls from stale depth data.
if (gpuCullAvailable) {
prevFrameVisible_.resize(numInstances);
for (uint32_t i = 0; i < numInstances; ++i)
prevFrameVisible_[i] = visibility[i] ? 1u : 0u;
} else {
// No GPU cull data — conservatively mark all as visible
prevFrameVisible_.assign(static_cast<size_t>(instances.size()), 1u);
}
// If GPU culling was not dispatched, fallback: compute distances on CPU
float maxRenderDistanceSq;
if (!gpuCullAvailable) {
@ -1074,7 +1148,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
// Update material UBO
if (batch.materialUBOMapped) {
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
// interiorDarken is a camera-based flag — it darkens ALL M2s (incl.
// outdoor trees) when the camera is inside a WMO. Disable it; indoor
// M2s already look correct from the darker ambient/lighting.
mat->interiorDarken = 0.0f;
if (batch.colorKeyBlack)
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
if (forceCutout) {
@ -1265,7 +1342,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
if (batch.materialUBOMapped) {
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
mat->interiorDarken = 0.0f;
if (batch.colorKeyBlack)
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
}

View file

@ -23,6 +23,7 @@
#include "rendering/character_preview.hpp"
#include "rendering/wmo_renderer.hpp"
#include "rendering/m2_renderer.hpp"
#include "rendering/hiz_system.hpp"
#include "rendering/minimap.hpp"
#include "rendering/world_map.hpp"
#include "rendering/quest_marker_renderer.hpp"
@ -580,7 +581,6 @@ bool Renderer::initialize(core::Window* win) {
overlaySystem_ = std::make_unique<OverlaySystem>(vkCtx);
renderGraph_->registerResource("shadow_depth");
renderGraph_->registerResource("reflection_texture");
renderGraph_->registerResource("cull_visibility");
renderGraph_->registerResource("scene_color");
renderGraph_->registerResource("scene_depth");
renderGraph_->registerResource("final_image");
@ -672,6 +672,10 @@ void Renderer::shutdown() {
}
LOG_DEBUG("Renderer::shutdown - m2Renderer...");
if (hizSystem_) {
hizSystem_->shutdown();
hizSystem_.reset();
}
if (m2Renderer) {
m2Renderer->shutdown();
m2Renderer.reset();
@ -798,6 +802,17 @@ void Renderer::applyMsaaChange() {
if (minimap) minimap->recreatePipelines();
// Resize HiZ pyramid (depth format/MSAA may have changed)
if (hizSystem_) {
auto ext = vkCtx->getSwapchainExtent();
if (!hizSystem_->resize(ext.width, ext.height)) {
LOG_WARNING("HiZ resize failed after MSAA change");
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
hizSystem_->shutdown();
hizSystem_.reset();
}
}
// Selection circle + overlay + FSR use lazy init, just destroy them
if (overlaySystem_) overlaySystem_->recreatePipelines();
if (postProcessPipeline_) postProcessPipeline_->destroyAllResources(); // Will be lazily recreated in beginFrame()
@ -846,6 +861,16 @@ void Renderer::beginFrame() {
}
// Recreate post-process resources for new swapchain dimensions
if (postProcessPipeline_) postProcessPipeline_->handleSwapchainResize();
// Resize HiZ depth pyramid for new swapchain dimensions
if (hizSystem_) {
auto ext = vkCtx->getSwapchainExtent();
if (!hizSystem_->resize(ext.width, ext.height)) {
LOG_WARNING("HiZ resize failed — disabling occlusion culling");
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
hizSystem_->shutdown();
hizSystem_.reset();
}
}
}
// Acquire swapchain image and begin command buffer
@ -864,6 +889,31 @@ void Renderer::beginFrame() {
// Update per-frame UBO with current camera/lighting state
updatePerFrameUBO();
// ── Early compute: HiZ pyramid build + M2 frustum/occlusion cull ──
// These run in a SEPARATE command buffer submission so the GPU executes
// them immediately. The CPU then reads the fresh visibility results
// before recording the main render pass — eliminating the 2-frame
// staleness that occurs when compute + render share one submission.
if (m2Renderer && camera && vkCtx) {
VkCommandBuffer computeCmd = vkCtx->beginSingleTimeCommands();
uint32_t frame = vkCtx->getCurrentFrame();
// Build HiZ depth pyramid from previous frame's depth buffer
if (hizSystem_ && hizSystem_->isReady()) {
VkImage depthSrc = vkCtx->getDepthCopySourceImage();
hizSystem_->buildPyramid(computeCmd, frame, depthSrc);
}
// Dispatch GPU frustum + HiZ occlusion culling
m2Renderer->dispatchCullCompute(computeCmd, frame, *camera);
vkCtx->endSingleTimeCommands(computeCmd);
// Ensure GPU→CPU buffer writes are visible to host (non-coherent memory).
m2Renderer->invalidateCullOutput(frame);
// Visibility results are now in cullOutputMapped_[frame], readable by CPU.
}
// --- Off-screen pre-passes ---
// Build frame graph: registers pre-passes as graph nodes with dependencies.
// compile() topologically sorts; execute() runs them with auto barriers.
@ -1489,7 +1539,9 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
if (parallelRecordingEnabled_) {
// --- Pre-compute state + GPU allocations on main thread (not thread-safe) ---
if (m2Renderer && cameraController) {
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
// Use isInsideInteriorWMO (flag 0x2000) — not isInsideWMO which includes
// outdoor WMO groups like archways/bridges that should receive shadows.
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
}
if (wmoRenderer) wmoRenderer->prepareRender();
@ -1734,7 +1786,8 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
if (m2Renderer && camera && !skipM2) {
if (cameraController) {
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
// Use isInsideInteriorWMO (flag 0x2000) for correct indoor detection
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
}
m2Renderer->prepareRender(frameIdx, *camera);
@ -1887,6 +1940,23 @@ bool Renderer::initializeRenderers(pipeline::AssetManager* assetManager, const s
spellVisualSystem_->initialize(m2Renderer.get());
}
}
// HiZ occlusion culling — temporal reprojection.
// The HiZ pyramid is built from the previous frame's depth buffer. The cull
// compute shader uses prevViewProj to project objects into the previous frame's
// screen space so that depth samples match the pyramid, eliminating flicker
// caused by camera movement between frames.
if (!hizSystem_ && m2Renderer && vkCtx) {
hizSystem_ = std::make_unique<HiZSystem>();
auto extent = vkCtx->getSwapchainExtent();
if (hizSystem_->initialize(vkCtx, extent.width, extent.height)) {
m2Renderer->setHiZSystem(hizSystem_.get());
LOG_INFO("HiZ occlusion culling initialized (", extent.width, "x", extent.height, ")");
} else {
LOG_WARNING("HiZ occlusion culling unavailable — falling back to frustum-only culling");
hizSystem_.reset();
}
}
if (!wmoRenderer) {
wmoRenderer = std::make_unique<WMORenderer>();
wmoRenderer->initialize(vkCtx, perFrameSetLayout, assetManager);
@ -2627,7 +2697,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
auto shadowDepth = renderGraph_->findResource("shadow_depth");
auto reflTex = renderGraph_->findResource("reflection_texture");
auto cullVis = renderGraph_->findResource("cull_visibility");
// Minimap composites (no dependencies — standalone off-screen render target)
renderGraph_->addPass("minimap_composite", {}, {},
@ -2670,13 +2739,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
renderReflectionPass();
});
// GPU frustum cull compute → outputs cull_visibility
renderGraph_->addPass("compute_cull", {}, {cullVis},
[this](VkCommandBuffer cmd) {
if (m2Renderer && camera)
m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera);
});
renderGraph_->compile();
}

View file

@ -798,7 +798,8 @@ bool VkContext::createDepthBuffer() {
imgInfo.arrayLayers = 1;
imgInfo.samples = msaaSamples_;
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
VmaAllocationCreateInfo allocInfo{};
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
@ -911,7 +912,8 @@ bool VkContext::createDepthResolveImage() {
imgInfo.arrayLayers = 1;
imgInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
VmaAllocationCreateInfo allocInfo{};
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;

View file

@ -197,6 +197,19 @@ target_link_libraries(test_anim_capability PRIVATE catch2_main)
add_test(NAME anim_capability COMMAND test_anim_capability)
register_test_target(test_anim_capability)
# ── test_indoor_shadows ──────────────────────────────────────
add_executable(test_indoor_shadows
test_indoor_shadows.cpp
)
target_include_directories(test_indoor_shadows PRIVATE ${TEST_INCLUDE_DIRS})
target_include_directories(test_indoor_shadows SYSTEM PRIVATE ${TEST_SYSTEM_INCLUDE_DIRS})
target_link_libraries(test_indoor_shadows PRIVATE catch2_main)
if(TARGET glm::glm)
target_link_libraries(test_indoor_shadows PRIVATE glm::glm)
endif()
add_test(NAME indoor_shadows COMMAND test_indoor_shadows)
register_test_target(test_indoor_shadows)
# ── ASAN / UBSan for test targets ────────────────────────────
if(WOWEE_ENABLE_ASAN AND NOT MSVC)
foreach(_t IN LISTS ALL_TEST_TARGETS)

View file

@ -0,0 +1,122 @@
// Tests for indoor shadow disable logic (WMO interior groups)
//
// WMO interior groups (flag 0x2000) should NOT receive directional sun shadows
// because they rely on pre-baked vertex color lighting (MOCV) and the shadow map
// only makes them darker. The fix is in the fragment shader: interior groups
// skip the shadow map sample entirely.
//
// These tests verify the data contract between the renderer and the shader:
// - GPUPerFrameData.shadowParams.x controls global shadow enable
// - WMOMaterial.isInterior controls per-group interior flag
// - Interior groups ignore shadows regardless of global shadow state
#include <catch_amalgamated.hpp>
#include "rendering/vk_frame_data.hpp"
#include <glm/glm.hpp>
using wowee::rendering::GPUPerFrameData;
// Replicates the shadow params logic from Renderer::updatePerFrameUBO()
// This should NOT be affected by indoor state — shadows remain globally enabled
static void applyShadowParams(GPUPerFrameData& fd,
bool shadowsEnabled,
float shadowDistance = 300.0f) {
float shadowBias = glm::clamp(0.8f * (shadowDistance / 300.0f), 0.0f, 1.0f);
fd.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, shadowBias, 0.0f, 0.0f);
}
// Replicates the WMO interior shader logic:
// interior groups skip shadow sampling entirely (shadow factor = 1.0 = fully lit).
// This covers both lit and unlit interior materials — isInterior takes priority.
static float computeWmoShadowFactor(bool isInterior, float globalShadowEnabled, float rawShadow) {
if (isInterior) {
// Interior groups always get shadow factor 1.0 (no shadow darkening)
// regardless of unlit flag — isInterior is checked first in shader
return 1.0f;
}
if (globalShadowEnabled > 0.5f) {
return rawShadow; // exterior: use shadow map result
}
return 1.0f; // shadows globally disabled
}
TEST_CASE("Global shadow params are not affected by indoor state", "[indoor_shadows]") {
GPUPerFrameData fd{};
// Shadows enabled — should stay 1.0 regardless of any indoor logic
applyShadowParams(fd, /*shadowsEnabled=*/true);
REQUIRE(fd.shadowParams.x == Catch::Approx(1.0f));
// Shadows disabled — should be 0.0
applyShadowParams(fd, /*shadowsEnabled=*/false);
REQUIRE(fd.shadowParams.x == Catch::Approx(0.0f));
}
TEST_CASE("Interior WMO groups skip shadow sampling", "[indoor_shadows]") {
// Even when shadows are globally on and the shadow map says 0.2 (dark shadow),
// interior groups should get 1.0 (no shadow)
float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.2f);
REQUIRE(factor == Catch::Approx(1.0f));
}
TEST_CASE("Exterior WMO groups receive shadows normally", "[indoor_shadows]") {
float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.3f);
REQUIRE(factor == Catch::Approx(0.3f));
}
TEST_CASE("Exterior WMO groups skip shadows when globally disabled", "[indoor_shadows]") {
float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/0.0f, /*rawShadow=*/0.3f);
REQUIRE(factor == Catch::Approx(1.0f));
}
TEST_CASE("Interior WMO groups skip shadows even when globally disabled", "[indoor_shadows]") {
float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/0.0f, /*rawShadow=*/0.5f);
REQUIRE(factor == Catch::Approx(1.0f));
}
TEST_CASE("Unlit interior surfaces skip shadows (isInterior takes priority over unlit)", "[indoor_shadows]") {
// Many interior walls use F_UNLIT material flag (0x01). The shader must check
// isInterior BEFORE unlit so these surfaces don't receive shadow darkening.
// Even though the surface is unlit, it's interior → shadow factor = 1.0
float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.1f);
REQUIRE(factor == Catch::Approx(1.0f));
}
TEST_CASE("Outdoor unlit surfaces still receive shadows", "[indoor_shadows]") {
// Exterior unlit surfaces (isInterior=false, unlit=true in shader) should
// still receive shadow darkening from the shadow map
float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.25f);
REQUIRE(factor == Catch::Approx(0.25f));
}
TEST_CASE("Shadow bias scales with shadow distance", "[indoor_shadows]") {
GPUPerFrameData fd{};
// At default 300.0f, bias = 0.8
applyShadowParams(fd, true, 300.0f);
REQUIRE(fd.shadowParams.y == Catch::Approx(0.8f));
// At 150.0f, bias = 0.4
applyShadowParams(fd, true, 150.0f);
REQUIRE(fd.shadowParams.y == Catch::Approx(0.4f));
// Bias is clamped to [0, 1]
applyShadowParams(fd, true, 600.0f);
REQUIRE(fd.shadowParams.y == Catch::Approx(1.0f));
}
TEST_CASE("Ambient color is NOT modified globally for indoor state", "[indoor_shadows]") {
// The global UBO ambient color should never be modified based on indoor state.
// Indoor lighting is handled per-group in the WMO shader via MOCV vertex colors
// and MOHD ambient color.
GPUPerFrameData fd{};
fd.ambientColor = glm::vec4(0.3f, 0.3f, 0.3f, 1.0f);
applyShadowParams(fd, true);
// Ambient should be untouched
REQUIRE(fd.ambientColor.x == Catch::Approx(0.3f));
REQUIRE(fd.ambientColor.y == Catch::Approx(0.3f));
REQUIRE(fd.ambientColor.z == Catch::Approx(0.3f));
}