diff --git a/.gitignore b/.gitignore index 7df4f225..507641cd 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ Makefile *.obj *.slo *.lo +*.spv # Compiled Dynamic libraries *.so diff --git a/CMakeLists.txt b/CMakeLists.txt index f752df5a..72594684 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -624,6 +624,7 @@ set(WOWEE_SOURCES src/rendering/m2_renderer_instance.cpp src/rendering/m2_model_classifier.cpp src/rendering/render_graph.cpp + src/rendering/hiz_system.cpp src/rendering/quest_marker_renderer.cpp src/rendering/minimap.cpp src/rendering/world_map.cpp diff --git a/assets/shaders/hiz_build.comp.glsl b/assets/shaders/hiz_build.comp.glsl new file mode 100644 index 00000000..b60bc332 --- /dev/null +++ b/assets/shaders/hiz_build.comp.glsl @@ -0,0 +1,57 @@ +#version 450 + +// Hierarchical-Z depth pyramid builder. +// Builds successive mip levels from the scene depth buffer. +// Each 2×2 block is reduced to its MAXIMUM depth (farthest/largest value). +// This is conservative for occlusion: an object is only culled when its nearest +// depth exceeds the farthest occluder depth in the pyramid region. +// +// Two modes controlled by push constant: +// mipLevel == 0: Sample from the source depth texture (mip 0 of the full-res depth). +// mipLevel > 0: Sample from the previous HiZ mip level. + +layout(local_size_x = 8, local_size_y = 8) in; + +// Source depth texture (full-resolution scene depth, or previous mip via same image) +layout(set = 0, binding = 0) uniform sampler2D srcDepth; + +// Destination mip level (written as storage image) +layout(r32f, set = 0, binding = 1) uniform writeonly image2D dstMip; + +layout(push_constant) uniform PushConstants { + ivec2 dstSize; // Width and height of the destination mip level + int mipLevel; // Current mip level being built (0 = from scene depth) +}; + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (pos.x >= dstSize.x || pos.y >= dstSize.y) return; + + // Each output texel covers a 2×2 block of the source. + // Use texelFetch for precise texel access (no filtering). + ivec2 srcPos = pos * 2; + + float d00, d10, d01, d11; + + if (mipLevel == 0) { + // Sample from full-res scene depth (sampler2D, lod 0) + d00 = texelFetch(srcDepth, srcPos + ivec2(0, 0), 0).r; + d10 = texelFetch(srcDepth, srcPos + ivec2(1, 0), 0).r; + d01 = texelFetch(srcDepth, srcPos + ivec2(0, 1), 0).r; + d11 = texelFetch(srcDepth, srcPos + ivec2(1, 1), 0).r; + } else { + // Sample from previous HiZ mip level (mipLevel - 1) + d00 = texelFetch(srcDepth, srcPos + ivec2(0, 0), mipLevel - 1).r; + d10 = texelFetch(srcDepth, srcPos + ivec2(1, 0), mipLevel - 1).r; + d01 = texelFetch(srcDepth, srcPos + ivec2(0, 1), mipLevel - 1).r; + d11 = texelFetch(srcDepth, srcPos + ivec2(1, 1), mipLevel - 1).r; + } + + // Conservative maximum (standard depth buffer: 0=near, 1=far). + // We store the farthest (largest) depth in each 2×2 block. + // An object is occluded only when its nearest depth > the farthest occluder + // depth in the covered screen region — guaranteeing it's behind EVERYTHING. + float maxDepth = max(max(d00, d10), max(d01, d11)); + + imageStore(dstMip, pos, vec4(maxDepth)); +} diff --git a/assets/shaders/m2_cull_hiz.comp.glsl b/assets/shaders/m2_cull_hiz.comp.glsl new file mode 100644 index 00000000..28fd8d10 --- /dev/null +++ b/assets/shaders/m2_cull_hiz.comp.glsl @@ -0,0 +1,184 @@ +#version 450 + +// GPU Frustum + HiZ Occlusion Culling for M2 doodads (Phase 6.3). +// +// Two-level culling: +// 1. Frustum — current-frame planes from viewProj. +// 2. HiZ occlusion — projects bounding sphere into the PREVIOUS frame's +// screen space via prevViewProj and samples the Hierarchical-Z pyramid +// (built from said previous depth). Conservative safeguards: +// • Only objects that were visible last frame get the HiZ test. +// • AABB must be fully inside the screen (no border sampling). +// • Bounding sphere is inflated by 50 % for the HiZ AABB. +// • A depth bias is applied before the occlusion comparison. +// • Nearest depth is projected via prevViewProj from sphere center +// (avoids toCam mismatch between current and previous cameras). +// +// Falls back gracefully: if hizEnabled == 0, behaves identically to frustum-only. + +layout(local_size_x = 64) in; + +struct CullInstance { + vec4 sphere; // xyz = world position, w = padded radius + float effectiveMaxDistSq; + uint flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap, + // bit 3 = previouslyVisible + float _pad0; + float _pad1; +}; + +layout(std140, set = 0, binding = 0) uniform CullUniforms { + vec4 frustumPlanes[6]; + vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq + uint instanceCount; + uint hizEnabled; + uint hizMipLevels; + uint _pad2; + vec4 hizParams; // x = pyramidWidth, y = pyramidHeight, z = nearPlane, w = unused + mat4 viewProj; // current frame view-projection + mat4 prevViewProj; // PREVIOUS frame's view-projection for HiZ reprojection +}; + +layout(std430, set = 0, binding = 1) readonly buffer CullInput { + CullInstance cullInstances[]; +}; + +layout(std430, set = 0, binding = 2) buffer CullOutput { + uint visibility[]; +}; + +layout(set = 1, binding = 0) uniform sampler2D hizPyramid; + +// Screen-edge margin — skip HiZ if the AABB touches this border. +// Depth data at screen edges is from unrelated geometry → false culls. +const float SCREEN_EDGE_MARGIN = 0.02; + +// Sphere inflation factor for HiZ screen AABB (50 % larger → very conservative). +const float HIZ_SPHERE_INFLATE = 1.5; + +// Depth bias — push nearest depth closer to camera so only objects +// significantly behind occluders are culled. +const float HIZ_DEPTH_BIAS = 0.02; + +// Minimum screen-space size (pixels) for HiZ to engage. +const float HIZ_MIN_SCREEN_PX = 6.0; + +void main() { + uint id = gl_GlobalInvocationID.x; + if (id >= instanceCount) return; + + CullInstance inst = cullInstances[id]; + + // Flag check: must be valid, not smoke, not invisible trap + uint f = inst.flags; + if ((f & 1u) == 0u || (f & 6u) != 0u) { + visibility[id] = 0u; + return; + } + + // Early distance rejection (loose upper bound) + vec3 toCam = inst.sphere.xyz - cameraPos.xyz; + float distSq = dot(toCam, toCam); + if (distSq > cameraPos.w) { + visibility[id] = 0u; + return; + } + + // Accurate per-instance distance cull + if (distSq > inst.effectiveMaxDistSq) { + visibility[id] = 0u; + return; + } + + // Frustum cull: sphere vs 6 planes (current frame) + float radius = inst.sphere.w; + if (radius > 0.0) { + for (int i = 0; i < 6; i++) { + float d = dot(frustumPlanes[i].xyz, inst.sphere.xyz) + frustumPlanes[i].w; + if (d < -radius) { + visibility[id] = 0u; + return; + } + } + } + + // --- HiZ Occlusion Test --- + // Skip for objects not rendered last frame (bit 3 = previouslyVisible). + bool previouslyVisible = (f & 8u) != 0u; + + if (hizEnabled != 0u && radius > 0.0 && previouslyVisible) { + // Inflate sphere for conservative screen-space AABB + float hizRadius = radius * HIZ_SPHERE_INFLATE; + + // Project sphere center into previous frame's clip space + vec4 clipCenter = prevViewProj * vec4(inst.sphere.xyz, 1.0); + if (clipCenter.w > 0.0) { + vec3 ndc = clipCenter.xyz / clipCenter.w; + + // --- Correct sphere → screen AABB using VP row-vector lengths --- + // The maximum screen-space extent of a world-space sphere is + // maxDeltaNdcX = R * ‖row_x(VP)‖ / w + // where row_x = (VP[0][0], VP[1][0], VP[2][0]) maps world XYZ + // offsets to clip-X. Using only the diagonal element (VP[0][0]) + // underestimates the footprint when the camera is rotated, + // causing false culls at certain view angles. + float rowLenX = length(vec3(prevViewProj[0][0], + prevViewProj[1][0], + prevViewProj[2][0])); + float rowLenY = length(vec3(prevViewProj[0][1], + prevViewProj[1][1], + prevViewProj[2][1])); + float projRadX = hizRadius * rowLenX / clipCenter.w; + float projRadY = hizRadius * rowLenY / clipCenter.w; + float projRad = max(projRadX, projRadY); + + vec2 uvCenter = ndc.xy * 0.5 + 0.5; + float uvRad = projRad * 0.5; + vec2 uvMin = uvCenter - uvRad; + vec2 uvMax = uvCenter + uvRad; + + // **Screen-edge guard**: skip if AABB extends outside safe area. + // Depth data at borders is from unrelated geometry. + if (uvMin.x >= SCREEN_EDGE_MARGIN && uvMin.y >= SCREEN_EDGE_MARGIN && + uvMax.x <= (1.0 - SCREEN_EDGE_MARGIN) && uvMax.y <= (1.0 - SCREEN_EDGE_MARGIN) && + uvMax.x > uvMin.x && uvMax.y > uvMin.y) + { + float aabbW = (uvMax.x - uvMin.x) * hizParams.x; + float aabbH = (uvMax.y - uvMin.y) * hizParams.y; + float screenSize = max(aabbW, aabbH); + + if (screenSize >= HIZ_MIN_SCREEN_PX) { + // Mip level: +1 for conservatism (coarser = bigger depth footprint) + float mipLevel = ceil(log2(max(screenSize, 1.0))) + 1.0; + mipLevel = clamp(mipLevel, 0.0, float(hizMipLevels - 1u)); + + // Sample HiZ at 4 corners — take MAX (farthest occluder) + float pz0 = textureLod(hizPyramid, uvMin, mipLevel).r; + float pz1 = textureLod(hizPyramid, vec2(uvMax.x, uvMin.y), mipLevel).r; + float pz2 = textureLod(hizPyramid, vec2(uvMin.x, uvMax.y), mipLevel).r; + float pz3 = textureLod(hizPyramid, uvMax, mipLevel).r; + float pyramidDepth = max(max(pz0, pz1), max(pz2, pz3)); + + // Nearest depth: project sphere center's NDC-Z then subtract + // the sphere's depth range. The depth span uses the Z-row + // length of VP (same Cauchy-Schwarz reasoning as X/Y), giving + // the correct NDC-Z extent regardless of camera orientation. + float rowLenZ = length(vec3(prevViewProj[0][2], + prevViewProj[1][2], + prevViewProj[2][2])); + float depthSpan = hizRadius * rowLenZ / clipCenter.w; + float centerDepth = ndc.z; + float nearestDepth = centerDepth - depthSpan - HIZ_DEPTH_BIAS; + + if (nearestDepth > pyramidDepth && pyramidDepth < 1.0) { + visibility[id] = 0u; + return; + } + } + } + } + // fallthrough: conservatively visible + } + + visibility[id] = 1u; +} diff --git a/assets/shaders/wmo.frag.glsl b/assets/shaders/wmo.frag.glsl index ce29a0d1..e99d535d 100644 --- a/assets/shaders/wmo.frag.glsl +++ b/assets/shaders/wmo.frag.glsl @@ -163,10 +163,11 @@ void main() { vec3 result; - // Sample shadow map for all WMO groups (interior groups with 0x2000 flag - // include covered outdoor areas like archways/streets that should receive shadows) + // Sample shadow map — skip entirely for interior groups (flag 0x2000). + // Interior surfaces rely on pre-baked MOCV vertex-color lighting and the + // directional shadow map only makes them darker without any benefit. float shadow = 1.0; - if (shadowParams.x > 0.5) { + if (isInterior == 0 && shadowParams.x > 0.5) { vec3 ldir = normalize(-lightDir.xyz); float normalOffset = SHADOW_TEXEL * 2.0 * (1.0 - abs(dot(norm, ldir))); vec3 biasedPos = FragPos + norm * normalOffset; @@ -185,17 +186,20 @@ void main() { if (isLava != 0) { // Lava is self-luminous — bright emissive, no shadows result = texColor.rgb * 1.5; - } else if (unlit != 0) { - result = texColor.rgb * shadow; } else if (isInterior != 0) { // WMO interior: vertex colors (MOCV) are pre-baked lighting from the artist. // The MOHD ambient color tints/floors the vertex colors so dark spots don't // go completely black, matching the WoW client's interior shading. + // We handle BOTH lit and unlit interior materials — directional + // sun shadows and lighting are skipped for all interior groups. vec3 wmoAmbient = vec3(wmoAmbientR, wmoAmbientG, wmoAmbientB); // Clamp ambient to at least 0.3 to avoid total darkness when MOHD color is zero wmoAmbient = max(wmoAmbient, vec3(0.3)); vec3 mocv = max(VertColor.rgb, wmoAmbient); - result = texColor.rgb * mocv * shadow; + result = texColor.rgb * mocv; + } else if (unlit != 0) { + // Outdoor unlit surface — still receives directional shadows + result = texColor.rgb * shadow; } else { vec3 ldir = normalize(-lightDir.xyz); float diff = max(dot(norm, ldir), 0.0); diff --git a/assets/shaders/wmo.frag.spv b/assets/shaders/wmo.frag.spv deleted file mode 100644 index 3507f3c9..00000000 Binary files a/assets/shaders/wmo.frag.spv and /dev/null differ diff --git a/include/rendering/camera_controller.hpp b/include/rendering/camera_controller.hpp index a5593b06..99bd395f 100644 --- a/include/rendering/camera_controller.hpp +++ b/include/rendering/camera_controller.hpp @@ -83,6 +83,7 @@ public: bool isSitting() const { return sitting; } bool isSwimming() const { return swimming; } bool isInsideWMO() const { return cachedInsideWMO; } + bool isInsideInteriorWMO() const { return cachedInsideInteriorWMO; } void setGrounded(bool g) { grounded = g; } void setSitting(bool s) { sitting = s; } bool isOnTaxi() const { return externalFollow_; } diff --git a/include/rendering/hiz_system.hpp b/include/rendering/hiz_system.hpp new file mode 100644 index 00000000..38bdc5b0 --- /dev/null +++ b/include/rendering/hiz_system.hpp @@ -0,0 +1,150 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace wowee { +namespace rendering { + +class VkContext; + +/** + * Hierarchical-Z (HiZ) depth pyramid for GPU occlusion culling (Phase 6.3 Option B). + * + * Builds a min-depth mip chain from the previous frame's depth buffer each frame. + * The M2 cull compute shader samples this pyramid to reject objects hidden behind + * geometry, complementing the existing frustum culling. + * + * Lifecycle: + * initialize() — create pyramid image, sampler, compute pipeline, descriptors + * buildPyramid() — dispatch compute to reduce depth → mip chain (once per frame) + * shutdown() — destroy all Vulkan resources + * + * The pyramid is double-buffered (per frame-in-flight) so builds and reads + * never race across concurrent GPU submissions. + */ +class HiZSystem { +public: + HiZSystem() = default; + ~HiZSystem(); + + HiZSystem(const HiZSystem&) = delete; + HiZSystem& operator=(const HiZSystem&) = delete; + + /** + * Create all Vulkan resources. + * @param ctx Vulkan context (device, allocator, etc.) + * @param width Full-resolution depth buffer width + * @param height Full-resolution depth buffer height + * @return true on success + */ + [[nodiscard]] bool initialize(VkContext* ctx, uint32_t width, uint32_t height); + + /** + * Release all Vulkan resources. + */ + void shutdown(); + + /** + * Rebuild the pyramid after a swapchain resize. + * Safe to call repeatedly — destroys old resources first. + */ + [[nodiscard]] bool resize(uint32_t width, uint32_t height); + + /** + * Dispatch compute shader to build the HiZ pyramid from the current depth buffer. + * Must be called AFTER the main scene pass has finished writing to the depth buffer. + * + * @param cmd Active command buffer (in recording state) + * @param frameIndex Current frame-in-flight index (0 or 1) + * @param depthImage Source depth image (VK_FORMAT_D32_SFLOAT) + */ + void buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage); + + /** + * @return Descriptor set layout for the HiZ pyramid sampler (set 1 for m2_cull_hiz). + */ + VkDescriptorSetLayout getDescriptorSetLayout() const { return hizSetLayout_; } + + /** + * @return Descriptor set for the given frame (sampler2D of the HiZ pyramid). + * Bind as set 1 in the M2 HiZ cull pipeline. + */ + VkDescriptorSet getDescriptorSet(uint32_t frameIndex) const { return hizDescSet_[frameIndex]; } + + /** + * @return true if HiZ system is initialized and ready. + */ + bool isReady() const { return ready_; } + + /** + * @return Number of mip levels in the pyramid. + */ + uint32_t getMipLevels() const { return mipLevels_; } + + /** + * @return Pyramid base resolution (mip 0). + */ + uint32_t getPyramidWidth() const { return pyramidWidth_; } + uint32_t getPyramidHeight() const { return pyramidHeight_; } + +private: + bool createPyramidImage(); + void destroyPyramidImage(); + bool createComputePipeline(); + void destroyComputePipeline(); + bool createDescriptors(); + void destroyDescriptors(); + + VkContext* ctx_ = nullptr; + bool ready_ = false; + + // Pyramid dimensions (mip 0 = half of full-res depth) + uint32_t fullWidth_ = 0; + uint32_t fullHeight_ = 0; + uint32_t pyramidWidth_ = 0; + uint32_t pyramidHeight_ = 0; + uint32_t mipLevels_ = 0; + + static constexpr uint32_t MAX_FRAMES = 2; + + // Per-frame HiZ pyramid images (R32_SFLOAT, full mip chain) + VkImage pyramidImage_[MAX_FRAMES] = {}; + VmaAllocation pyramidAlloc_[MAX_FRAMES] = {}; + VkImageView pyramidViewAll_[MAX_FRAMES] = {}; // View of all mip levels (for sampling) + std::vector pyramidMipViews_[MAX_FRAMES]; // Per-mip views (for storage image writes) + + // Depth input — image view for sampling the depth buffer as a texture + VkImageView depthSamplerView_[MAX_FRAMES] = {}; + + // Sampler for depth reads (nearest, clamp-to-edge) + VkSampler depthSampler_ = VK_NULL_HANDLE; + + // Compute pipeline for building the pyramid + VkPipeline buildPipeline_ = VK_NULL_HANDLE; + VkPipelineLayout buildPipelineLayout_ = VK_NULL_HANDLE; + + // Descriptor set layout for build pipeline (set 0: src sampler + dst storage image) + VkDescriptorSetLayout buildSetLayout_ = VK_NULL_HANDLE; + VkDescriptorPool buildDescPool_ = VK_NULL_HANDLE; + // Per-frame, per-mip descriptor sets for pyramid build + std::vector buildDescSets_[MAX_FRAMES]; + + // HiZ sampling descriptor: exposed to M2 cull shader (set 1: combined image sampler) + VkDescriptorSetLayout hizSetLayout_ = VK_NULL_HANDLE; + VkDescriptorPool hizDescPool_ = VK_NULL_HANDLE; + VkDescriptorSet hizDescSet_[MAX_FRAMES] = {}; + + // Push constant for build shader + struct HiZBuildPushConstants { + int32_t dstWidth; + int32_t dstHeight; + int32_t mipLevel; + }; +}; + +} // namespace rendering +} // namespace wowee diff --git a/include/rendering/m2_renderer.hpp b/include/rendering/m2_renderer.hpp index 85c62df9..0c381d7e 100644 --- a/include/rendering/m2_renderer.hpp +++ b/include/rendering/m2_renderer.hpp @@ -28,6 +28,7 @@ namespace rendering { class Camera; class VkContext; class VkTexture; +class HiZSystem; /** * GPU representation of an M2 model @@ -299,6 +300,13 @@ public: void dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, const Camera& camera); void render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera); + /** Set the HiZ system for occlusion culling (Phase 6.3). nullptr disables HiZ. */ + void setHiZSystem(HiZSystem* hiz) { hizSystem_ = hiz; } + + /** Ensure GPU→CPU cull output is visible to the host after a fence wait. + * Call after the early compute submission finishes (endSingleTimeCommands). */ + void invalidateCullOutput(uint32_t frameIndex); + /** * Initialize shadow pipeline (Phase 7) */ @@ -437,7 +445,7 @@ private: // Mega bone SSBO — consolidates all per-instance bone matrices into a single buffer per frame. // Replaces per-instance bone SSBOs for fewer descriptor binds and enables GPU instancing. - static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 2048; + static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 4096; static constexpr uint32_t MAX_BONES_PER_INSTANCE = 128; ::VkBuffer megaBoneBuffer_[2] = {}; VmaAllocation megaBoneAlloc_[2] = {}; @@ -472,19 +480,26 @@ private: uint32_t flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap float _pad[2] = {}; }; - struct CullUniformsGPU { // matches CullUniforms in m2_cull.comp.glsl (128 bytes, std140) - glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance - glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq - uint32_t instanceCount; - uint32_t _pad[3] = {}; - }; + struct CullUniformsGPU { // matches CullUniforms in m2_cull_hiz.comp.glsl (std140) + glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance (96 bytes) + glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq (16 bytes) + uint32_t instanceCount; // (4 bytes) + uint32_t hizEnabled; // 1 = HiZ occlusion active (4 bytes) + uint32_t hizMipLevels; // mip levels in HiZ pyramid (4 bytes) + uint32_t _pad2 = {}; // (4 bytes) + glm::vec4 hizParams; // x=pyramidW, y=pyramidH, z=nearPlane, w=unused (16 bytes) + glm::mat4 viewProj; // current frame view-projection (64 bytes) + glm::mat4 prevViewProj; // previous frame VP for HiZ reprojection (64 bytes) + }; // Total: 272 bytes static constexpr uint32_t MAX_CULL_INSTANCES = 24576; - VkPipeline cullPipeline_ = VK_NULL_HANDLE; - VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE; + VkPipeline cullPipeline_ = VK_NULL_HANDLE; // frustum-only (fallback) + VkPipeline cullHiZPipeline_ = VK_NULL_HANDLE; // frustum + HiZ occlusion + VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE; // frustum-only layout (set 0) + VkPipelineLayout cullHiZPipelineLayout_ = VK_NULL_HANDLE; // HiZ layout (set 0 + set 1) VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE; VkDescriptorPool cullDescPool_ = VK_NULL_HANDLE; VkDescriptorSet cullSet_[2] = {}; // double-buffered - ::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera (UBO) + ::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera + HiZ params (UBO) VmaAllocation cullUniformAlloc_[2] = {}; void* cullUniformMapped_[2] = {}; ::VkBuffer cullInputBuffer_[2] = {}; // per-instance bounding sphere + flags (SSBO) @@ -494,6 +509,20 @@ private: VmaAllocation cullOutputAlloc_[2] = {}; void* cullOutputMapped_[2] = {}; + // HiZ occlusion culling (Phase 6.3) — optional, driven by Renderer + HiZSystem* hizSystem_ = nullptr; + + // Previous frame's view-projection for temporal reprojection in HiZ culling. + // Stored each frame so the cull shader can project into the same screen space + // as the depth buffer the HiZ pyramid was built from. + glm::mat4 prevVP_{1.0f}; + + // Per-instance visibility from the previous frame. Used to set the + // `previouslyVisible` flag (bit 3) on each CullInstance so the shader + // skips the HiZ test for objects that weren't rendered last frame + // (their depth data is unreliable). + std::vector prevFrameVisible_; + // Dynamic ribbon vertex buffer (CPU-written triangle strip) static constexpr size_t MAX_RIBBON_VERTS = 2048; // 9 floats each ::VkBuffer ribbonVB_ = VK_NULL_HANDLE; diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index 6e3f12d7..56746125 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -58,6 +58,7 @@ class ChargeEffect; class SwimEffects; class RenderGraph; class OverlaySystem; +class HiZSystem; class Renderer { public: @@ -363,6 +364,9 @@ private: std::unique_ptr renderGraph_; void buildFrameGraph(game::GameHandler* gameHandler); + // HiZ occlusion culling — builds depth pyramid each frame + std::unique_ptr hizSystem_; + // CPU timing stats (last frame/update). double lastUpdateMs = 0.0; double lastRenderMs = 0.0; diff --git a/src/rendering/hiz_system.cpp b/src/rendering/hiz_system.cpp new file mode 100644 index 00000000..a57f7597 --- /dev/null +++ b/src/rendering/hiz_system.cpp @@ -0,0 +1,517 @@ +#include "rendering/hiz_system.hpp" +#include "rendering/vk_context.hpp" +#include "rendering/vk_shader.hpp" +#include "core/logger.hpp" +#include "core/profiler.hpp" +#include +#include + +namespace wowee { +namespace rendering { + +HiZSystem::~HiZSystem() { + shutdown(); +} + +bool HiZSystem::initialize(VkContext* ctx, uint32_t width, uint32_t height) { + if (!ctx || width == 0 || height == 0) return false; + ctx_ = ctx; + fullWidth_ = width; + fullHeight_ = height; + + // Pyramid mip 0 is half the full resolution (the first downscale) + pyramidWidth_ = std::max(1u, width / 2); + pyramidHeight_ = std::max(1u, height / 2); + mipLevels_ = static_cast(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1; + + if (!createComputePipeline()) return false; + if (!createPyramidImage()) { destroyComputePipeline(); return false; } + if (!createDescriptors()) { destroyPyramidImage(); destroyComputePipeline(); return false; } + + ready_ = true; + LOG_INFO("HiZSystem: initialized ", pyramidWidth_, "x", pyramidHeight_, + " pyramid (", mipLevels_, " mips) from ", width, "x", height, " depth"); + return true; +} + +void HiZSystem::shutdown() { + if (!ctx_) return; + VkDevice device = ctx_->getDevice(); + vkDeviceWaitIdle(device); + + destroyDescriptors(); + destroyPyramidImage(); + destroyComputePipeline(); + + ctx_ = nullptr; + ready_ = false; +} + +bool HiZSystem::resize(uint32_t width, uint32_t height) { + if (!ctx_) return false; + VkDevice device = ctx_->getDevice(); + vkDeviceWaitIdle(device); + + destroyDescriptors(); + destroyPyramidImage(); + + fullWidth_ = width; + fullHeight_ = height; + pyramidWidth_ = std::max(1u, width / 2); + pyramidHeight_ = std::max(1u, height / 2); + mipLevels_ = static_cast(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1; + + if (!createPyramidImage()) return false; + if (!createDescriptors()) { destroyPyramidImage(); return false; } + + ready_ = true; + LOG_INFO("HiZSystem: resized to ", pyramidWidth_, "x", pyramidHeight_, + " (", mipLevels_, " mips)"); + return true; +} + +// --- Pyramid image creation --- + +bool HiZSystem::createPyramidImage() { + VkDevice device = ctx_->getDevice(); + VmaAllocator alloc = ctx_->getAllocator(); + + for (uint32_t f = 0; f < MAX_FRAMES; f++) { + // Create R32F image with full mip chain + VkImageCreateInfo imgCi{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + imgCi.imageType = VK_IMAGE_TYPE_2D; + imgCi.format = VK_FORMAT_R32_SFLOAT; + imgCi.extent = {pyramidWidth_, pyramidHeight_, 1}; + imgCi.mipLevels = mipLevels_; + imgCi.arrayLayers = 1; + imgCi.samples = VK_SAMPLE_COUNT_1_BIT; + imgCi.tiling = VK_IMAGE_TILING_OPTIMAL; + imgCi.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT; + imgCi.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VmaAllocationCreateInfo allocCi{}; + allocCi.usage = VMA_MEMORY_USAGE_GPU_ONLY; + + if (vmaCreateImage(alloc, &imgCi, &allocCi, &pyramidImage_[f], &pyramidAlloc_[f], nullptr) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create pyramid image for frame ", f); + return false; + } + + // View of ALL mip levels (for sampling in the cull shader) + VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + viewCi.image = pyramidImage_[f]; + viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D; + viewCi.format = VK_FORMAT_R32_SFLOAT; + viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + viewCi.subresourceRange.baseMipLevel = 0; + viewCi.subresourceRange.levelCount = mipLevels_; + viewCi.subresourceRange.layerCount = 1; + + if (vkCreateImageView(device, &viewCi, nullptr, &pyramidViewAll_[f]) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create pyramid view-all for frame ", f); + return false; + } + + // Per-mip views (for storage image writes in the build shader) + pyramidMipViews_[f].resize(mipLevels_, VK_NULL_HANDLE); + for (uint32_t mip = 0; mip < mipLevels_; mip++) { + VkImageViewCreateInfo mipViewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + mipViewCi.image = pyramidImage_[f]; + mipViewCi.viewType = VK_IMAGE_VIEW_TYPE_2D; + mipViewCi.format = VK_FORMAT_R32_SFLOAT; + mipViewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + mipViewCi.subresourceRange.baseMipLevel = mip; + mipViewCi.subresourceRange.levelCount = 1; + mipViewCi.subresourceRange.layerCount = 1; + + if (vkCreateImageView(device, &mipViewCi, nullptr, &pyramidMipViews_[f][mip]) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create mip ", mip, " view for frame ", f); + return false; + } + } + } + + // Sampler for depth reads and HiZ pyramid reads (nearest, clamp) + VkSamplerCreateInfo samplerCi{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + samplerCi.magFilter = VK_FILTER_NEAREST; + samplerCi.minFilter = VK_FILTER_NEAREST; + samplerCi.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + samplerCi.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCi.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCi.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCi.maxLod = static_cast(mipLevels_); + + if (vkCreateSampler(device, &samplerCi, nullptr, &depthSampler_) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create sampler"); + return false; + } + + return true; +} + +void HiZSystem::destroyPyramidImage() { + if (!ctx_) return; + VkDevice device = ctx_->getDevice(); + VmaAllocator alloc = ctx_->getAllocator(); + + if (depthSampler_) { vkDestroySampler(device, depthSampler_, nullptr); depthSampler_ = VK_NULL_HANDLE; } + + for (uint32_t f = 0; f < MAX_FRAMES; f++) { + for (auto& view : pyramidMipViews_[f]) { + if (view) { vkDestroyImageView(device, view, nullptr); view = VK_NULL_HANDLE; } + } + pyramidMipViews_[f].clear(); + + if (pyramidViewAll_[f]) { vkDestroyImageView(device, pyramidViewAll_[f], nullptr); pyramidViewAll_[f] = VK_NULL_HANDLE; } + if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; } + if (pyramidImage_[f]) { vmaDestroyImage(alloc, pyramidImage_[f], pyramidAlloc_[f]); pyramidImage_[f] = VK_NULL_HANDLE; } + } +} + +// --- Compute pipeline --- + +bool HiZSystem::createComputePipeline() { + VkDevice device = ctx_->getDevice(); + + // Build descriptor set layout for pyramid build (set 0): + // binding 0: combined image sampler (source depth / previous mip) + // binding 1: storage image (destination mip) + VkDescriptorSetLayoutBinding bindings[2] = {}; + bindings[0].binding = 0; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[0].descriptorCount = 1; + bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[1].binding = 1; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + bindings[1].descriptorCount = 1; + bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + layoutCi.bindingCount = 2; + layoutCi.pBindings = bindings; + if (vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &buildSetLayout_) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create build set layout"); + return false; + } + + // HiZ sampling layout (for M2 cull shader, set 1): + // binding 0: combined image sampler (HiZ pyramid, all mips) + VkDescriptorSetLayoutBinding hizBinding{}; + hizBinding.binding = 0; + hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + hizBinding.descriptorCount = 1; + hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + hizLayoutCi.bindingCount = 1; + hizLayoutCi.pBindings = &hizBinding; + if (vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSetLayout_) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create HiZ set layout"); + return false; + } + + // Push constant range for build shader + VkPushConstantRange pushRange{}; + pushRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + pushRange.offset = 0; + pushRange.size = sizeof(HiZBuildPushConstants); + + VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCi.setLayoutCount = 1; + plCi.pSetLayouts = &buildSetLayout_; + plCi.pushConstantRangeCount = 1; + plCi.pPushConstantRanges = &pushRange; + if (vkCreatePipelineLayout(device, &plCi, nullptr, &buildPipelineLayout_) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create build pipeline layout"); + return false; + } + + // Load and create compute pipeline + VkShaderModule buildShader; + if (!buildShader.loadFromFile(device, "assets/shaders/hiz_build.comp.spv")) { + LOG_ERROR("HiZSystem: failed to load hiz_build.comp.spv"); + return false; + } + + VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + cpCi.stage = buildShader.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT); + cpCi.layout = buildPipelineLayout_; + if (vkCreateComputePipelines(device, ctx_->getPipelineCache(), 1, &cpCi, nullptr, &buildPipeline_) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create build compute pipeline"); + buildShader.destroy(); + return false; + } + buildShader.destroy(); + + return true; +} + +void HiZSystem::destroyComputePipeline() { + if (!ctx_) return; + VkDevice device = ctx_->getDevice(); + + if (buildPipeline_) { vkDestroyPipeline(device, buildPipeline_, nullptr); buildPipeline_ = VK_NULL_HANDLE; } + if (buildPipelineLayout_) { vkDestroyPipelineLayout(device, buildPipelineLayout_, nullptr); buildPipelineLayout_ = VK_NULL_HANDLE; } + if (buildSetLayout_) { vkDestroyDescriptorSetLayout(device, buildSetLayout_, nullptr); buildSetLayout_ = VK_NULL_HANDLE; } + if (hizSetLayout_) { vkDestroyDescriptorSetLayout(device, hizSetLayout_, nullptr); hizSetLayout_ = VK_NULL_HANDLE; } +} + +// --- Descriptors --- + +bool HiZSystem::createDescriptors() { + VkDevice device = ctx_->getDevice(); + + // Pool: per-frame × per-mip build sets + 2 HiZ sampling sets + // Each build set needs 1 sampler + 1 storage image + // Each HiZ sampling set needs 1 sampler + const uint32_t totalBuildSets = MAX_FRAMES * mipLevels_; + const uint32_t totalHizSets = MAX_FRAMES; + const uint32_t totalSets = totalBuildSets + totalHizSets; + + VkDescriptorPoolSize poolSizes[2] = {}; + poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, totalBuildSets + totalHizSets}; + poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, totalBuildSets}; + + VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + poolCi.maxSets = totalSets; + poolCi.poolSizeCount = 2; + poolCi.pPoolSizes = poolSizes; + if (vkCreateDescriptorPool(device, &poolCi, nullptr, &buildDescPool_) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create descriptor pool"); + return false; + } + + // We use the same pool for both build and HiZ sets — simpler cleanup + hizDescPool_ = VK_NULL_HANDLE; // sharing buildDescPool_ + + for (uint32_t f = 0; f < MAX_FRAMES; f++) { + // Create a temporary depth image view for sampling the depth buffer. + // This is SEPARATE from the VkContext's depth image view because we need + // DEPTH aspect sampling which requires specific format view. + { + VkImage depthSrc = ctx_->getDepthCopySourceImage(); + VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + viewCi.image = depthSrc; + viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D; + viewCi.format = ctx_->getDepthFormat(); + viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + viewCi.subresourceRange.levelCount = 1; + viewCi.subresourceRange.layerCount = 1; + if (vkCreateImageView(device, &viewCi, nullptr, &depthSamplerView_[f]) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to create depth sampler view for frame ", f); + return false; + } + } + + // Allocate per-mip build descriptor sets + buildDescSets_[f].resize(mipLevels_); + for (uint32_t mip = 0; mip < mipLevels_; mip++) { + VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + allocInfo.descriptorPool = buildDescPool_; + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &buildSetLayout_; + if (vkAllocateDescriptorSets(device, &allocInfo, &buildDescSets_[f][mip]) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to allocate build desc set frame=", f, " mip=", mip); + return false; + } + + // Write descriptors: + // Binding 0 (sampler): mip 0 reads depth buffer, mip N reads pyramid mip N-1 + VkDescriptorImageInfo srcInfo{}; + srcInfo.sampler = depthSampler_; + if (mip == 0) { + srcInfo.imageView = depthSamplerView_[f]; + srcInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } else { + srcInfo.imageView = pyramidViewAll_[f]; // shader uses texelFetch with explicit mip + srcInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + // Binding 1 (storage image): write to current mip + VkDescriptorImageInfo dstInfo{}; + dstInfo.imageView = pyramidMipViews_[f][mip]; + dstInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + VkWriteDescriptorSet writes[2] = {}; + writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + writes[0].dstSet = buildDescSets_[f][mip]; + writes[0].dstBinding = 0; + writes[0].descriptorCount = 1; + writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + writes[0].pImageInfo = &srcInfo; + + writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + writes[1].dstSet = buildDescSets_[f][mip]; + writes[1].dstBinding = 1; + writes[1].descriptorCount = 1; + writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + writes[1].pImageInfo = &dstInfo; + + vkUpdateDescriptorSets(device, 2, writes, 0, nullptr); + } + + // Allocate HiZ sampling descriptor set (for M2 cull shader) + { + VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + allocInfo.descriptorPool = buildDescPool_; + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &hizSetLayout_; + if (vkAllocateDescriptorSets(device, &allocInfo, &hizDescSet_[f]) != VK_SUCCESS) { + LOG_ERROR("HiZSystem: failed to allocate HiZ sampling desc set for frame ", f); + return false; + } + + VkDescriptorImageInfo hizInfo{}; + hizInfo.sampler = depthSampler_; + hizInfo.imageView = pyramidViewAll_[f]; + hizInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + write.dstSet = hizDescSet_[f]; + write.dstBinding = 0; + write.descriptorCount = 1; + write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write.pImageInfo = &hizInfo; + vkUpdateDescriptorSets(device, 1, &write, 0, nullptr); + } + } + + return true; +} + +void HiZSystem::destroyDescriptors() { + if (!ctx_) return; + VkDevice device = ctx_->getDevice(); + + // All descriptor sets are freed when pool is destroyed + if (buildDescPool_) { vkDestroyDescriptorPool(device, buildDescPool_, nullptr); buildDescPool_ = VK_NULL_HANDLE; } + // hizDescPool_ shares buildDescPool_, so nothing extra to destroy + + for (uint32_t f = 0; f < MAX_FRAMES; f++) { + buildDescSets_[f].clear(); + hizDescSet_[f] = VK_NULL_HANDLE; + if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; } + } +} + +// --- Pyramid build dispatch --- + +void HiZSystem::buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage) { + ZoneScopedN("HiZSystem::buildPyramid"); + if (!ready_ || !buildPipeline_) return; + + // Transition depth image from DEPTH_STENCIL_ATTACHMENT to SHADER_READ_ONLY for sampling + { + VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = depthImage; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.layerCount = 1; + + vkCmdPipelineBarrier(cmd, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &barrier); + } + + // Transition entire pyramid to GENERAL layout for storage writes + { + VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pyramidImage_[frameIndex]; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels_; + barrier.subresourceRange.layerCount = 1; + + vkCmdPipelineBarrier(cmd, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &barrier); + } + + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, buildPipeline_); + + // Build each mip level sequentially + uint32_t mipW = pyramidWidth_; + uint32_t mipH = pyramidHeight_; + + for (uint32_t mip = 0; mip < mipLevels_; mip++) { + // Bind descriptor set for this mip level + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + buildPipelineLayout_, 0, 1, &buildDescSets_[frameIndex][mip], 0, nullptr); + + // Push constants: destination size + mip level + HiZBuildPushConstants pc{}; + pc.dstWidth = static_cast(mipW); + pc.dstHeight = static_cast(mipH); + pc.mipLevel = static_cast(mip); + vkCmdPushConstants(cmd, buildPipelineLayout_, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pc), &pc); + + // Dispatch compute + uint32_t groupsX = (mipW + 7) / 8; + uint32_t groupsY = (mipH + 7) / 8; + vkCmdDispatch(cmd, groupsX, groupsY, 1); + + // Barrier between mip levels: ensure writes to mip N are visible before reads for mip N+1 + if (mip + 1 < mipLevels_) { + VkImageMemoryBarrier mipBarrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + mipBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + mipBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + mipBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + mipBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + mipBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + mipBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + mipBarrier.image = pyramidImage_[frameIndex]; + mipBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + mipBarrier.subresourceRange.baseMipLevel = mip; + mipBarrier.subresourceRange.levelCount = 1; + mipBarrier.subresourceRange.layerCount = 1; + + vkCmdPipelineBarrier(cmd, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &mipBarrier); + } + + // Next mip level dimensions + mipW = std::max(1u, mipW / 2); + mipH = std::max(1u, mipH / 2); + } + + // Transition depth back to DEPTH_STENCIL_ATTACHMENT for next frame + { + VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = depthImage; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.layerCount = 1; + + vkCmdPipelineBarrier(cmd, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, + 0, 0, nullptr, 0, nullptr, 1, &barrier); + } +} + +} // namespace rendering +} // namespace wowee diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp index 8cd5adeb..5281aad6 100644 --- a/src/rendering/m2_renderer.cpp +++ b/src/rendering/m2_renderer.cpp @@ -295,7 +295,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout // Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build. { static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)"); - static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)"); + static_assert(sizeof(CullUniformsGPU) == 272, "CullUniformsGPU must be 272 bytes (std140)"); // Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output) VkDescriptorSetLayoutBinding bindings[3] = {}; @@ -338,6 +338,54 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout cullComp.destroy(); } + // HiZ-aware cull pipeline (Phase 6.3 Option B) + // Uses set 0 (same as frustum-only) + set 1 (HiZ pyramid sampler from HiZSystem). + // The HiZ descriptor set layout is created lazily when hizSystem_ is set, but the + // pipeline layout and shader are created now if the shader is available. + rendering::VkShaderModule cullHiZComp; + if (cullHiZComp.loadFromFile(device, "assets/shaders/m2_cull_hiz.comp.spv")) { + // HiZ cull set 1 layout: single combined image sampler (the HiZ pyramid) + VkDescriptorSetLayoutBinding hizBinding{}; + hizBinding.binding = 0; + hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + hizBinding.descriptorCount = 1; + hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayout hizSamplerLayout = VK_NULL_HANDLE; + VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + hizLayoutCi.bindingCount = 1; + hizLayoutCi.pBindings = &hizBinding; + vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSamplerLayout); + + VkDescriptorSetLayout hizSetLayouts[2] = {cullSetLayout_, hizSamplerLayout}; + VkPipelineLayoutCreateInfo hizPlCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + hizPlCi.setLayoutCount = 2; + hizPlCi.pSetLayouts = hizSetLayouts; + vkCreatePipelineLayout(device, &hizPlCi, nullptr, &cullHiZPipelineLayout_); + + VkComputePipelineCreateInfo hizCpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + hizCpCi.stage = cullHiZComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT); + hizCpCi.layout = cullHiZPipelineLayout_; + if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &hizCpCi, nullptr, &cullHiZPipeline_) != VK_SUCCESS) { + LOG_WARNING("M2Renderer: failed to create HiZ cull compute pipeline — HiZ disabled"); + cullHiZPipeline_ = VK_NULL_HANDLE; + vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr); + cullHiZPipelineLayout_ = VK_NULL_HANDLE; + } else { + LOG_INFO("M2Renderer: HiZ occlusion cull pipeline created"); + } + + // The hizSamplerLayout is now owned by the pipeline layout; we don't track it + // separately because the pipeline layout keeps a ref. But actually Vulkan + // requires us to keep it alive. Store it where HiZSystem will provide it. + // For now, we can destroy it since the pipeline layout was already created. + vkDestroyDescriptorSetLayout(device, hizSamplerLayout, nullptr); + + cullHiZComp.destroy(); + } else { + LOG_INFO("M2Renderer: m2_cull_hiz.comp.spv not found — HiZ occlusion culling not available"); + } + // Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO) VkDescriptorPoolSize poolSizes[2] = {}; poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2}; @@ -756,6 +804,14 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout return true; } +void M2Renderer::invalidateCullOutput(uint32_t frameIndex) { + // On non-HOST_COHERENT memory, VMA-mapped GPU→CPU buffers need explicit + // invalidation so the CPU cache sees the latest GPU writes. + if (frameIndex < 2 && cullOutputAlloc_[frameIndex]) { + vmaInvalidateAllocation(vkCtx_->getAllocator(), cullOutputAlloc_[frameIndex], 0, VK_WHOLE_SIZE); + } +} + void M2Renderer::shutdown() { LOG_INFO("Shutting down M2 renderer..."); if (!vkCtx_) return; @@ -837,6 +893,8 @@ void M2Renderer::shutdown() { if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; } // GPU frustum culling compute pipeline + buffers cleanup + if (cullHiZPipeline_) { vkDestroyPipeline(device, cullHiZPipeline_, nullptr); cullHiZPipeline_ = VK_NULL_HANDLE; } + if (cullHiZPipelineLayout_) { vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr); cullHiZPipelineLayout_ = VK_NULL_HANDLE; } if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; } if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; } for (int i = 0; i < 2; i++) { diff --git a/src/rendering/m2_renderer_render.cpp b/src/rendering/m2_renderer_render.cpp index 6b2e33f4..19776ab6 100644 --- a/src/rendering/m2_renderer_render.cpp +++ b/src/rendering/m2_renderer_render.cpp @@ -1,6 +1,7 @@ #include "rendering/m2_renderer.hpp" #include "rendering/m2_renderer_internal.h" #include "rendering/m2_model_classifier.hpp" +#include "rendering/hiz_system.hpp" #include "rendering/vk_context.hpp" #include "rendering/vk_buffer.hpp" #include "rendering/vk_texture.hpp" @@ -600,6 +601,49 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c } ubo->cameraPos = glm::vec4(camPos, maxPossibleDistSq); ubo->instanceCount = numInstances; + + // HiZ occlusion culling fields + const bool hizReady = hizSystem_ && hizSystem_->isReady(); + + // Auto-disable HiZ when the camera has moved/rotated significantly. + // Large VP changes make the depth pyramid unreliable because the + // reprojected screen positions diverge from the actual pyramid data. + bool hizSafe = hizReady; + if (hizReady) { + // Compare current VP against previous VP — Frobenius-style max diff. + float maxDiff = 0.0f; + const float* curM = &vp[0][0]; + const float* prevM = &prevVP_[0][0]; + for (int k = 0; k < 16; ++k) + maxDiff = std::max(maxDiff, std::abs(curM[k] - prevM[k])); + // Threshold: typical small camera motion produces diffs < 0.05. + // A fast rotation easily exceeds 0.3. Skip HiZ when diff is large. + if (maxDiff > 0.15f) hizSafe = false; + } + + ubo->hizEnabled = hizSafe ? 1u : 0u; + ubo->hizMipLevels = hizReady ? hizSystem_->getMipLevels() : 0u; + ubo->_pad2 = 0; + if (hizReady) { + ubo->hizParams = glm::vec4( + static_cast(hizSystem_->getPyramidWidth()), + static_cast(hizSystem_->getPyramidHeight()), + camera.getNearPlane(), + 0.0f + ); + ubo->viewProj = vp; + // Use previous frame's VP for HiZ reprojection — the HiZ pyramid + // was built from the previous frame's depth, so we must project + // into the same screen space to sample the correct depths. + ubo->prevViewProj = prevVP_; + } else { + ubo->hizParams = glm::vec4(0.0f); + ubo->viewProj = glm::mat4(1.0f); + ubo->prevViewProj = glm::mat4(1.0f); + } + + // Save current VP for next frame's temporal reprojection + prevVP_ = vp; } // --- Upload per-instance cull data (SSBO, binding 1) --- @@ -622,6 +666,10 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c if (inst.cachedIsValid) flags |= 1u; if (inst.cachedIsSmoke) flags |= 2u; if (inst.cachedIsInvisibleTrap) flags |= 4u; + // Bit 3: previouslyVisible — the shader skips HiZ for objects + // that were NOT rendered last frame (no reliable depth data). + if (i < prevFrameVisible_.size() && prevFrameVisible_[i]) + flags |= 8u; input[i].sphere = glm::vec4(inst.position, paddedRadius); input[i].effectiveMaxDistSq = effectiveMaxDistSq; @@ -630,9 +678,22 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c } // --- Dispatch compute shader --- - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_); - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, - cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr); + const bool useHiZ = (cullHiZPipeline_ != VK_NULL_HANDLE) + && hizSystem_ && hizSystem_->isReady(); + if (useHiZ) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullHiZPipeline_); + // Set 0: cull UBO + input/output SSBOs + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + cullHiZPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr); + // Set 1: HiZ pyramid sampler + VkDescriptorSet hizSet = hizSystem_->getDescriptorSet(frameIndex); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + cullHiZPipelineLayout_, 1, 1, &hizSet, 0, nullptr); + } else { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr); + } const uint32_t groupCount = (numInstances + 63) / 64; vkCmdDispatch(cmd, groupCount, 1, 1); @@ -693,6 +754,19 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const const uint32_t* visibility = static_cast(cullOutputMapped_[frameIndex]); const bool gpuCullAvailable = (cullPipeline_ != VK_NULL_HANDLE && visibility != nullptr); + // Snapshot the GPU visibility results into prevFrameVisible_ so the NEXT + // frame's compute dispatch can set the per-instance `previouslyVisible` + // flag (bit 3). Objects not visible this frame will skip HiZ next frame, + // avoiding false culls from stale depth data. + if (gpuCullAvailable) { + prevFrameVisible_.resize(numInstances); + for (uint32_t i = 0; i < numInstances; ++i) + prevFrameVisible_[i] = visibility[i] ? 1u : 0u; + } else { + // No GPU cull data — conservatively mark all as visible + prevFrameVisible_.assign(static_cast(instances.size()), 1u); + } + // If GPU culling was not dispatched, fallback: compute distances on CPU float maxRenderDistanceSq; if (!gpuCullAvailable) { @@ -1074,7 +1148,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const // Update material UBO if (batch.materialUBOMapped) { auto* mat = static_cast(batch.materialUBOMapped); - mat->interiorDarken = insideInterior ? 1.0f : 0.0f; + // interiorDarken is a camera-based flag — it darkens ALL M2s (incl. + // outdoor trees) when the camera is inside a WMO. Disable it; indoor + // M2s already look correct from the darker ambient/lighting. + mat->interiorDarken = 0.0f; if (batch.colorKeyBlack) mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f; if (forceCutout) { @@ -1265,7 +1342,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const if (batch.materialUBOMapped) { auto* mat = static_cast(batch.materialUBOMapped); - mat->interiorDarken = insideInterior ? 1.0f : 0.0f; + mat->interiorDarken = 0.0f; if (batch.colorKeyBlack) mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f; } diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 1c7fa9ab..8e2f6fa2 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -23,6 +23,7 @@ #include "rendering/character_preview.hpp" #include "rendering/wmo_renderer.hpp" #include "rendering/m2_renderer.hpp" +#include "rendering/hiz_system.hpp" #include "rendering/minimap.hpp" #include "rendering/world_map.hpp" #include "rendering/quest_marker_renderer.hpp" @@ -580,7 +581,6 @@ bool Renderer::initialize(core::Window* win) { overlaySystem_ = std::make_unique(vkCtx); renderGraph_->registerResource("shadow_depth"); renderGraph_->registerResource("reflection_texture"); - renderGraph_->registerResource("cull_visibility"); renderGraph_->registerResource("scene_color"); renderGraph_->registerResource("scene_depth"); renderGraph_->registerResource("final_image"); @@ -672,6 +672,10 @@ void Renderer::shutdown() { } LOG_DEBUG("Renderer::shutdown - m2Renderer..."); + if (hizSystem_) { + hizSystem_->shutdown(); + hizSystem_.reset(); + } if (m2Renderer) { m2Renderer->shutdown(); m2Renderer.reset(); @@ -798,6 +802,17 @@ void Renderer::applyMsaaChange() { if (minimap) minimap->recreatePipelines(); + // Resize HiZ pyramid (depth format/MSAA may have changed) + if (hizSystem_) { + auto ext = vkCtx->getSwapchainExtent(); + if (!hizSystem_->resize(ext.width, ext.height)) { + LOG_WARNING("HiZ resize failed after MSAA change"); + if (m2Renderer) m2Renderer->setHiZSystem(nullptr); + hizSystem_->shutdown(); + hizSystem_.reset(); + } + } + // Selection circle + overlay + FSR use lazy init, just destroy them if (overlaySystem_) overlaySystem_->recreatePipelines(); if (postProcessPipeline_) postProcessPipeline_->destroyAllResources(); // Will be lazily recreated in beginFrame() @@ -846,6 +861,16 @@ void Renderer::beginFrame() { } // Recreate post-process resources for new swapchain dimensions if (postProcessPipeline_) postProcessPipeline_->handleSwapchainResize(); + // Resize HiZ depth pyramid for new swapchain dimensions + if (hizSystem_) { + auto ext = vkCtx->getSwapchainExtent(); + if (!hizSystem_->resize(ext.width, ext.height)) { + LOG_WARNING("HiZ resize failed — disabling occlusion culling"); + if (m2Renderer) m2Renderer->setHiZSystem(nullptr); + hizSystem_->shutdown(); + hizSystem_.reset(); + } + } } // Acquire swapchain image and begin command buffer @@ -864,6 +889,31 @@ void Renderer::beginFrame() { // Update per-frame UBO with current camera/lighting state updatePerFrameUBO(); + // ── Early compute: HiZ pyramid build + M2 frustum/occlusion cull ── + // These run in a SEPARATE command buffer submission so the GPU executes + // them immediately. The CPU then reads the fresh visibility results + // before recording the main render pass — eliminating the 2-frame + // staleness that occurs when compute + render share one submission. + if (m2Renderer && camera && vkCtx) { + VkCommandBuffer computeCmd = vkCtx->beginSingleTimeCommands(); + uint32_t frame = vkCtx->getCurrentFrame(); + + // Build HiZ depth pyramid from previous frame's depth buffer + if (hizSystem_ && hizSystem_->isReady()) { + VkImage depthSrc = vkCtx->getDepthCopySourceImage(); + hizSystem_->buildPyramid(computeCmd, frame, depthSrc); + } + + // Dispatch GPU frustum + HiZ occlusion culling + m2Renderer->dispatchCullCompute(computeCmd, frame, *camera); + + vkCtx->endSingleTimeCommands(computeCmd); + + // Ensure GPU→CPU buffer writes are visible to host (non-coherent memory). + m2Renderer->invalidateCullOutput(frame); + // Visibility results are now in cullOutputMapped_[frame], readable by CPU. + } + // --- Off-screen pre-passes --- // Build frame graph: registers pre-passes as graph nodes with dependencies. // compile() topologically sorts; execute() runs them with auto barriers. @@ -1489,7 +1539,9 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) { if (parallelRecordingEnabled_) { // --- Pre-compute state + GPU allocations on main thread (not thread-safe) --- if (m2Renderer && cameraController) { - m2Renderer->setInsideInterior(cameraController->isInsideWMO()); + // Use isInsideInteriorWMO (flag 0x2000) — not isInsideWMO which includes + // outdoor WMO groups like archways/bridges that should receive shadows. + m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO()); m2Renderer->setOnTaxi(cameraController->isOnTaxi()); } if (wmoRenderer) wmoRenderer->prepareRender(); @@ -1734,7 +1786,8 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) { if (m2Renderer && camera && !skipM2) { if (cameraController) { - m2Renderer->setInsideInterior(cameraController->isInsideWMO()); + // Use isInsideInteriorWMO (flag 0x2000) for correct indoor detection + m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO()); m2Renderer->setOnTaxi(cameraController->isOnTaxi()); } m2Renderer->prepareRender(frameIdx, *camera); @@ -1887,6 +1940,23 @@ bool Renderer::initializeRenderers(pipeline::AssetManager* assetManager, const s spellVisualSystem_->initialize(m2Renderer.get()); } } + + // HiZ occlusion culling — temporal reprojection. + // The HiZ pyramid is built from the previous frame's depth buffer. The cull + // compute shader uses prevViewProj to project objects into the previous frame's + // screen space so that depth samples match the pyramid, eliminating flicker + // caused by camera movement between frames. + if (!hizSystem_ && m2Renderer && vkCtx) { + hizSystem_ = std::make_unique(); + auto extent = vkCtx->getSwapchainExtent(); + if (hizSystem_->initialize(vkCtx, extent.width, extent.height)) { + m2Renderer->setHiZSystem(hizSystem_.get()); + LOG_INFO("HiZ occlusion culling initialized (", extent.width, "x", extent.height, ")"); + } else { + LOG_WARNING("HiZ occlusion culling unavailable — falling back to frustum-only culling"); + hizSystem_.reset(); + } + } if (!wmoRenderer) { wmoRenderer = std::make_unique(); wmoRenderer->initialize(vkCtx, perFrameSetLayout, assetManager); @@ -2627,7 +2697,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) { auto shadowDepth = renderGraph_->findResource("shadow_depth"); auto reflTex = renderGraph_->findResource("reflection_texture"); - auto cullVis = renderGraph_->findResource("cull_visibility"); // Minimap composites (no dependencies — standalone off-screen render target) renderGraph_->addPass("minimap_composite", {}, {}, @@ -2670,13 +2739,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) { renderReflectionPass(); }); - // GPU frustum cull compute → outputs cull_visibility - renderGraph_->addPass("compute_cull", {}, {cullVis}, - [this](VkCommandBuffer cmd) { - if (m2Renderer && camera) - m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera); - }); - renderGraph_->compile(); } diff --git a/src/rendering/vk_context.cpp b/src/rendering/vk_context.cpp index 2de87569..1e5b39ea 100644 --- a/src/rendering/vk_context.cpp +++ b/src/rendering/vk_context.cpp @@ -798,7 +798,8 @@ bool VkContext::createDepthBuffer() { imgInfo.arrayLayers = 1; imgInfo.samples = msaaSamples_; imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL; - imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT + | VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture VmaAllocationCreateInfo allocInfo{}; allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; @@ -911,7 +912,8 @@ bool VkContext::createDepthResolveImage() { imgInfo.arrayLayers = 1; imgInfo.samples = VK_SAMPLE_COUNT_1_BIT; imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL; - imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT + | VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture VmaAllocationCreateInfo allocInfo{}; allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b5773534..ae168aec 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -197,6 +197,19 @@ target_link_libraries(test_anim_capability PRIVATE catch2_main) add_test(NAME anim_capability COMMAND test_anim_capability) register_test_target(test_anim_capability) +# ── test_indoor_shadows ────────────────────────────────────── +add_executable(test_indoor_shadows + test_indoor_shadows.cpp +) +target_include_directories(test_indoor_shadows PRIVATE ${TEST_INCLUDE_DIRS}) +target_include_directories(test_indoor_shadows SYSTEM PRIVATE ${TEST_SYSTEM_INCLUDE_DIRS}) +target_link_libraries(test_indoor_shadows PRIVATE catch2_main) +if(TARGET glm::glm) + target_link_libraries(test_indoor_shadows PRIVATE glm::glm) +endif() +add_test(NAME indoor_shadows COMMAND test_indoor_shadows) +register_test_target(test_indoor_shadows) + # ── ASAN / UBSan for test targets ──────────────────────────── if(WOWEE_ENABLE_ASAN AND NOT MSVC) foreach(_t IN LISTS ALL_TEST_TARGETS) diff --git a/tests/test_indoor_shadows.cpp b/tests/test_indoor_shadows.cpp new file mode 100644 index 00000000..ea6e209a --- /dev/null +++ b/tests/test_indoor_shadows.cpp @@ -0,0 +1,122 @@ +// Tests for indoor shadow disable logic (WMO interior groups) +// +// WMO interior groups (flag 0x2000) should NOT receive directional sun shadows +// because they rely on pre-baked vertex color lighting (MOCV) and the shadow map +// only makes them darker. The fix is in the fragment shader: interior groups +// skip the shadow map sample entirely. +// +// These tests verify the data contract between the renderer and the shader: +// - GPUPerFrameData.shadowParams.x controls global shadow enable +// - WMOMaterial.isInterior controls per-group interior flag +// - Interior groups ignore shadows regardless of global shadow state + +#include +#include "rendering/vk_frame_data.hpp" + +#include + +using wowee::rendering::GPUPerFrameData; + +// Replicates the shadow params logic from Renderer::updatePerFrameUBO() +// This should NOT be affected by indoor state — shadows remain globally enabled +static void applyShadowParams(GPUPerFrameData& fd, + bool shadowsEnabled, + float shadowDistance = 300.0f) { + float shadowBias = glm::clamp(0.8f * (shadowDistance / 300.0f), 0.0f, 1.0f); + fd.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, shadowBias, 0.0f, 0.0f); +} + +// Replicates the WMO interior shader logic: +// interior groups skip shadow sampling entirely (shadow factor = 1.0 = fully lit). +// This covers both lit and unlit interior materials — isInterior takes priority. +static float computeWmoShadowFactor(bool isInterior, float globalShadowEnabled, float rawShadow) { + if (isInterior) { + // Interior groups always get shadow factor 1.0 (no shadow darkening) + // regardless of unlit flag — isInterior is checked first in shader + return 1.0f; + } + if (globalShadowEnabled > 0.5f) { + return rawShadow; // exterior: use shadow map result + } + return 1.0f; // shadows globally disabled +} + +TEST_CASE("Global shadow params are not affected by indoor state", "[indoor_shadows]") { + GPUPerFrameData fd{}; + + // Shadows enabled — should stay 1.0 regardless of any indoor logic + applyShadowParams(fd, /*shadowsEnabled=*/true); + REQUIRE(fd.shadowParams.x == Catch::Approx(1.0f)); + + // Shadows disabled — should be 0.0 + applyShadowParams(fd, /*shadowsEnabled=*/false); + REQUIRE(fd.shadowParams.x == Catch::Approx(0.0f)); +} + +TEST_CASE("Interior WMO groups skip shadow sampling", "[indoor_shadows]") { + // Even when shadows are globally on and the shadow map says 0.2 (dark shadow), + // interior groups should get 1.0 (no shadow) + float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.2f); + REQUIRE(factor == Catch::Approx(1.0f)); +} + +TEST_CASE("Exterior WMO groups receive shadows normally", "[indoor_shadows]") { + float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.3f); + REQUIRE(factor == Catch::Approx(0.3f)); +} + +TEST_CASE("Exterior WMO groups skip shadows when globally disabled", "[indoor_shadows]") { + float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/0.0f, /*rawShadow=*/0.3f); + REQUIRE(factor == Catch::Approx(1.0f)); +} + +TEST_CASE("Interior WMO groups skip shadows even when globally disabled", "[indoor_shadows]") { + float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/0.0f, /*rawShadow=*/0.5f); + REQUIRE(factor == Catch::Approx(1.0f)); +} + +TEST_CASE("Unlit interior surfaces skip shadows (isInterior takes priority over unlit)", "[indoor_shadows]") { + // Many interior walls use F_UNLIT material flag (0x01). The shader must check + // isInterior BEFORE unlit so these surfaces don't receive shadow darkening. + // Even though the surface is unlit, it's interior → shadow factor = 1.0 + float factor = computeWmoShadowFactor(/*isInterior=*/true, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.1f); + REQUIRE(factor == Catch::Approx(1.0f)); +} + +TEST_CASE("Outdoor unlit surfaces still receive shadows", "[indoor_shadows]") { + // Exterior unlit surfaces (isInterior=false, unlit=true in shader) should + // still receive shadow darkening from the shadow map + float factor = computeWmoShadowFactor(/*isInterior=*/false, /*globalShadowEnabled=*/1.0f, /*rawShadow=*/0.25f); + REQUIRE(factor == Catch::Approx(0.25f)); +} + +TEST_CASE("Shadow bias scales with shadow distance", "[indoor_shadows]") { + GPUPerFrameData fd{}; + + // At default 300.0f, bias = 0.8 + applyShadowParams(fd, true, 300.0f); + REQUIRE(fd.shadowParams.y == Catch::Approx(0.8f)); + + // At 150.0f, bias = 0.4 + applyShadowParams(fd, true, 150.0f); + REQUIRE(fd.shadowParams.y == Catch::Approx(0.4f)); + + // Bias is clamped to [0, 1] + applyShadowParams(fd, true, 600.0f); + REQUIRE(fd.shadowParams.y == Catch::Approx(1.0f)); +} + +TEST_CASE("Ambient color is NOT modified globally for indoor state", "[indoor_shadows]") { + // The global UBO ambient color should never be modified based on indoor state. + // Indoor lighting is handled per-group in the WMO shader via MOCV vertex colors + // and MOHD ambient color. + GPUPerFrameData fd{}; + fd.ambientColor = glm::vec4(0.3f, 0.3f, 0.3f, 1.0f); + + applyShadowParams(fd, true); + + // Ambient should be untouched + REQUIRE(fd.ambientColor.x == Catch::Approx(0.3f)); + REQUIRE(fd.ambientColor.y == Catch::Approx(0.3f)); + REQUIRE(fd.ambientColor.z == Catch::Approx(0.3f)); +}