feat(rendering): add HiZ occlusion culling & fix WMO interior shadows

Implement GPU-driven Hierarchical-Z occlusion culling for M2 doodads
using a depth pyramid built from the previous frame's depth buffer.
The cull shader projects bounding spheres via prevViewProj (temporal
reprojection) and samples the HiZ pyramid to reject hidden objects
before the main render pass.

Key implementation details:
- Separate early compute submission (beginSingleTimeCommands + fence
  wait) eliminates 2-frame visibility staleness
- Conservative safeguards prevent false culls: screen-edge guard,
  full VP row-vector AABB projection (Cauchy-Schwarz), 50% sphere
  inflation, depth bias, mip+1, min screen size threshold, camera
  motion dampening (auto-disable on fast rotations), and per-instance
  previouslyVisible flag tracking
- Graceful fallback to frustum-only culling if HiZ init fails

Fix dark WMO interiors by gating shadow map sampling on isInterior==0
in the WMO fragment shader. Interior groups (flag 0x2000) now rely
solely on pre-baked MOCV vertex-color lighting + MOHD ambient color.
Disable interiorDarken globally (was incorrectly darkening outdoor M2s
when camera was inside a WMO). Use isInsideInteriorWMO() instead of
isInsideWMO() for correct indoor detection.

New files:
- hiz_system.hpp/cpp: pyramid image management, compute pipeline,
  descriptors, mip-chain build dispatch, resize handling
- hiz_build.comp.glsl: MAX-depth 2x2 reduction compute shader
- m2_cull_hiz.comp.glsl: frustum + HiZ occlusion cull compute shader
- test_indoor_shadows.cpp: 14 unit tests for shadow/interior contracts

Modified:
- CullUniformsGPU expanded 128->272 bytes (HiZ params, viewProj,
  prevViewProj)
- Depth buffer images gain VK_IMAGE_USAGE_SAMPLED_BIT for HiZ reads
- wmo.frag.glsl: interior branch before unlit, shadow skip for 0x2000
- Render graph: hiz_build + compute_cull disabled (run in early compute)
- .gitignore: ignore compiled .spv binaries
- MEGA_BONE_MAX_INSTANCES: 2048 -> 4096

Signed-off-by: Pavel Okhlopkov <pavel.okhlopkov@flant.com>
This commit is contained in:
Pavel Okhlopkov 2026-04-06 16:40:59 +03:00
parent 17c1e3ea3b
commit 4b9b3026f4
17 changed files with 1317 additions and 35 deletions

View file

@ -23,6 +23,7 @@
#include "rendering/character_preview.hpp"
#include "rendering/wmo_renderer.hpp"
#include "rendering/m2_renderer.hpp"
#include "rendering/hiz_system.hpp"
#include "rendering/minimap.hpp"
#include "rendering/world_map.hpp"
#include "rendering/quest_marker_renderer.hpp"
@ -580,7 +581,6 @@ bool Renderer::initialize(core::Window* win) {
overlaySystem_ = std::make_unique<OverlaySystem>(vkCtx);
renderGraph_->registerResource("shadow_depth");
renderGraph_->registerResource("reflection_texture");
renderGraph_->registerResource("cull_visibility");
renderGraph_->registerResource("scene_color");
renderGraph_->registerResource("scene_depth");
renderGraph_->registerResource("final_image");
@ -672,6 +672,10 @@ void Renderer::shutdown() {
}
LOG_DEBUG("Renderer::shutdown - m2Renderer...");
if (hizSystem_) {
hizSystem_->shutdown();
hizSystem_.reset();
}
if (m2Renderer) {
m2Renderer->shutdown();
m2Renderer.reset();
@ -798,6 +802,17 @@ void Renderer::applyMsaaChange() {
if (minimap) minimap->recreatePipelines();
// Resize HiZ pyramid (depth format/MSAA may have changed)
if (hizSystem_) {
auto ext = vkCtx->getSwapchainExtent();
if (!hizSystem_->resize(ext.width, ext.height)) {
LOG_WARNING("HiZ resize failed after MSAA change");
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
hizSystem_->shutdown();
hizSystem_.reset();
}
}
// Selection circle + overlay + FSR use lazy init, just destroy them
if (overlaySystem_) overlaySystem_->recreatePipelines();
if (postProcessPipeline_) postProcessPipeline_->destroyAllResources(); // Will be lazily recreated in beginFrame()
@ -846,6 +861,16 @@ void Renderer::beginFrame() {
}
// Recreate post-process resources for new swapchain dimensions
if (postProcessPipeline_) postProcessPipeline_->handleSwapchainResize();
// Resize HiZ depth pyramid for new swapchain dimensions
if (hizSystem_) {
auto ext = vkCtx->getSwapchainExtent();
if (!hizSystem_->resize(ext.width, ext.height)) {
LOG_WARNING("HiZ resize failed — disabling occlusion culling");
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
hizSystem_->shutdown();
hizSystem_.reset();
}
}
}
// Acquire swapchain image and begin command buffer
@ -864,6 +889,31 @@ void Renderer::beginFrame() {
// Update per-frame UBO with current camera/lighting state
updatePerFrameUBO();
// ── Early compute: HiZ pyramid build + M2 frustum/occlusion cull ──
// These run in a SEPARATE command buffer submission so the GPU executes
// them immediately. The CPU then reads the fresh visibility results
// before recording the main render pass — eliminating the 2-frame
// staleness that occurs when compute + render share one submission.
if (m2Renderer && camera && vkCtx) {
VkCommandBuffer computeCmd = vkCtx->beginSingleTimeCommands();
uint32_t frame = vkCtx->getCurrentFrame();
// Build HiZ depth pyramid from previous frame's depth buffer
if (hizSystem_ && hizSystem_->isReady()) {
VkImage depthSrc = vkCtx->getDepthCopySourceImage();
hizSystem_->buildPyramid(computeCmd, frame, depthSrc);
}
// Dispatch GPU frustum + HiZ occlusion culling
m2Renderer->dispatchCullCompute(computeCmd, frame, *camera);
vkCtx->endSingleTimeCommands(computeCmd);
// Ensure GPU→CPU buffer writes are visible to host (non-coherent memory).
m2Renderer->invalidateCullOutput(frame);
// Visibility results are now in cullOutputMapped_[frame], readable by CPU.
}
// --- Off-screen pre-passes ---
// Build frame graph: registers pre-passes as graph nodes with dependencies.
// compile() topologically sorts; execute() runs them with auto barriers.
@ -1489,7 +1539,9 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
if (parallelRecordingEnabled_) {
// --- Pre-compute state + GPU allocations on main thread (not thread-safe) ---
if (m2Renderer && cameraController) {
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
// Use isInsideInteriorWMO (flag 0x2000) — not isInsideWMO which includes
// outdoor WMO groups like archways/bridges that should receive shadows.
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
}
if (wmoRenderer) wmoRenderer->prepareRender();
@ -1734,7 +1786,8 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
if (m2Renderer && camera && !skipM2) {
if (cameraController) {
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
// Use isInsideInteriorWMO (flag 0x2000) for correct indoor detection
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
}
m2Renderer->prepareRender(frameIdx, *camera);
@ -1887,6 +1940,23 @@ bool Renderer::initializeRenderers(pipeline::AssetManager* assetManager, const s
spellVisualSystem_->initialize(m2Renderer.get());
}
}
// HiZ occlusion culling — temporal reprojection.
// The HiZ pyramid is built from the previous frame's depth buffer. The cull
// compute shader uses prevViewProj to project objects into the previous frame's
// screen space so that depth samples match the pyramid, eliminating flicker
// caused by camera movement between frames.
if (!hizSystem_ && m2Renderer && vkCtx) {
hizSystem_ = std::make_unique<HiZSystem>();
auto extent = vkCtx->getSwapchainExtent();
if (hizSystem_->initialize(vkCtx, extent.width, extent.height)) {
m2Renderer->setHiZSystem(hizSystem_.get());
LOG_INFO("HiZ occlusion culling initialized (", extent.width, "x", extent.height, ")");
} else {
LOG_WARNING("HiZ occlusion culling unavailable — falling back to frustum-only culling");
hizSystem_.reset();
}
}
if (!wmoRenderer) {
wmoRenderer = std::make_unique<WMORenderer>();
wmoRenderer->initialize(vkCtx, perFrameSetLayout, assetManager);
@ -2627,7 +2697,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
auto shadowDepth = renderGraph_->findResource("shadow_depth");
auto reflTex = renderGraph_->findResource("reflection_texture");
auto cullVis = renderGraph_->findResource("cull_visibility");
// Minimap composites (no dependencies — standalone off-screen render target)
renderGraph_->addPass("minimap_composite", {}, {},
@ -2670,13 +2739,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
renderReflectionPass();
});
// GPU frustum cull compute → outputs cull_visibility
renderGraph_->addPass("compute_cull", {}, {cullVis},
[this](VkCommandBuffer cmd) {
if (m2Renderer && camera)
m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera);
});
renderGraph_->compile();
}