mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-04-17 01:23:51 +00:00
feat(rendering): add HiZ occlusion culling & fix WMO interior shadows
Implement GPU-driven Hierarchical-Z occlusion culling for M2 doodads using a depth pyramid built from the previous frame's depth buffer. The cull shader projects bounding spheres via prevViewProj (temporal reprojection) and samples the HiZ pyramid to reject hidden objects before the main render pass. Key implementation details: - Separate early compute submission (beginSingleTimeCommands + fence wait) eliminates 2-frame visibility staleness - Conservative safeguards prevent false culls: screen-edge guard, full VP row-vector AABB projection (Cauchy-Schwarz), 50% sphere inflation, depth bias, mip+1, min screen size threshold, camera motion dampening (auto-disable on fast rotations), and per-instance previouslyVisible flag tracking - Graceful fallback to frustum-only culling if HiZ init fails Fix dark WMO interiors by gating shadow map sampling on isInterior==0 in the WMO fragment shader. Interior groups (flag 0x2000) now rely solely on pre-baked MOCV vertex-color lighting + MOHD ambient color. Disable interiorDarken globally (was incorrectly darkening outdoor M2s when camera was inside a WMO). Use isInsideInteriorWMO() instead of isInsideWMO() for correct indoor detection. New files: - hiz_system.hpp/cpp: pyramid image management, compute pipeline, descriptors, mip-chain build dispatch, resize handling - hiz_build.comp.glsl: MAX-depth 2x2 reduction compute shader - m2_cull_hiz.comp.glsl: frustum + HiZ occlusion cull compute shader - test_indoor_shadows.cpp: 14 unit tests for shadow/interior contracts Modified: - CullUniformsGPU expanded 128->272 bytes (HiZ params, viewProj, prevViewProj) - Depth buffer images gain VK_IMAGE_USAGE_SAMPLED_BIT for HiZ reads - wmo.frag.glsl: interior branch before unlit, shadow skip for 0x2000 - Render graph: hiz_build + compute_cull disabled (run in early compute) - .gitignore: ignore compiled .spv binaries - MEGA_BONE_MAX_INSTANCES: 2048 -> 4096 Signed-off-by: Pavel Okhlopkov <pavel.okhlopkov@flant.com>
This commit is contained in:
parent
17c1e3ea3b
commit
4b9b3026f4
17 changed files with 1317 additions and 35 deletions
517
src/rendering/hiz_system.cpp
Normal file
517
src/rendering/hiz_system.cpp
Normal file
|
|
@ -0,0 +1,517 @@
|
|||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/vk_context.hpp"
|
||||
#include "rendering/vk_shader.hpp"
|
||||
#include "core/logger.hpp"
|
||||
#include "core/profiler.hpp"
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
namespace wowee {
|
||||
namespace rendering {
|
||||
|
||||
HiZSystem::~HiZSystem() {
|
||||
shutdown();
|
||||
}
|
||||
|
||||
bool HiZSystem::initialize(VkContext* ctx, uint32_t width, uint32_t height) {
|
||||
if (!ctx || width == 0 || height == 0) return false;
|
||||
ctx_ = ctx;
|
||||
fullWidth_ = width;
|
||||
fullHeight_ = height;
|
||||
|
||||
// Pyramid mip 0 is half the full resolution (the first downscale)
|
||||
pyramidWidth_ = std::max(1u, width / 2);
|
||||
pyramidHeight_ = std::max(1u, height / 2);
|
||||
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
|
||||
|
||||
if (!createComputePipeline()) return false;
|
||||
if (!createPyramidImage()) { destroyComputePipeline(); return false; }
|
||||
if (!createDescriptors()) { destroyPyramidImage(); destroyComputePipeline(); return false; }
|
||||
|
||||
ready_ = true;
|
||||
LOG_INFO("HiZSystem: initialized ", pyramidWidth_, "x", pyramidHeight_,
|
||||
" pyramid (", mipLevels_, " mips) from ", width, "x", height, " depth");
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::shutdown() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
vkDeviceWaitIdle(device);
|
||||
|
||||
destroyDescriptors();
|
||||
destroyPyramidImage();
|
||||
destroyComputePipeline();
|
||||
|
||||
ctx_ = nullptr;
|
||||
ready_ = false;
|
||||
}
|
||||
|
||||
bool HiZSystem::resize(uint32_t width, uint32_t height) {
|
||||
if (!ctx_) return false;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
vkDeviceWaitIdle(device);
|
||||
|
||||
destroyDescriptors();
|
||||
destroyPyramidImage();
|
||||
|
||||
fullWidth_ = width;
|
||||
fullHeight_ = height;
|
||||
pyramidWidth_ = std::max(1u, width / 2);
|
||||
pyramidHeight_ = std::max(1u, height / 2);
|
||||
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
|
||||
|
||||
if (!createPyramidImage()) return false;
|
||||
if (!createDescriptors()) { destroyPyramidImage(); return false; }
|
||||
|
||||
ready_ = true;
|
||||
LOG_INFO("HiZSystem: resized to ", pyramidWidth_, "x", pyramidHeight_,
|
||||
" (", mipLevels_, " mips)");
|
||||
return true;
|
||||
}
|
||||
|
||||
// --- Pyramid image creation ---
|
||||
|
||||
bool HiZSystem::createPyramidImage() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
VmaAllocator alloc = ctx_->getAllocator();
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
// Create R32F image with full mip chain
|
||||
VkImageCreateInfo imgCi{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
|
||||
imgCi.imageType = VK_IMAGE_TYPE_2D;
|
||||
imgCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
imgCi.extent = {pyramidWidth_, pyramidHeight_, 1};
|
||||
imgCi.mipLevels = mipLevels_;
|
||||
imgCi.arrayLayers = 1;
|
||||
imgCi.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imgCi.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgCi.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
imgCi.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
VmaAllocationCreateInfo allocCi{};
|
||||
allocCi.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
||||
if (vmaCreateImage(alloc, &imgCi, &allocCi, &pyramidImage_[f], &pyramidAlloc_[f], nullptr) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create pyramid image for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
// View of ALL mip levels (for sampling in the cull shader)
|
||||
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
viewCi.image = pyramidImage_[f];
|
||||
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
viewCi.subresourceRange.baseMipLevel = 0;
|
||||
viewCi.subresourceRange.levelCount = mipLevels_;
|
||||
viewCi.subresourceRange.layerCount = 1;
|
||||
|
||||
if (vkCreateImageView(device, &viewCi, nullptr, &pyramidViewAll_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create pyramid view-all for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Per-mip views (for storage image writes in the build shader)
|
||||
pyramidMipViews_[f].resize(mipLevels_, VK_NULL_HANDLE);
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
VkImageViewCreateInfo mipViewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
mipViewCi.image = pyramidImage_[f];
|
||||
mipViewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
mipViewCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
mipViewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
mipViewCi.subresourceRange.baseMipLevel = mip;
|
||||
mipViewCi.subresourceRange.levelCount = 1;
|
||||
mipViewCi.subresourceRange.layerCount = 1;
|
||||
|
||||
if (vkCreateImageView(device, &mipViewCi, nullptr, &pyramidMipViews_[f][mip]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create mip ", mip, " view for frame ", f);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sampler for depth reads and HiZ pyramid reads (nearest, clamp)
|
||||
VkSamplerCreateInfo samplerCi{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
|
||||
samplerCi.magFilter = VK_FILTER_NEAREST;
|
||||
samplerCi.minFilter = VK_FILTER_NEAREST;
|
||||
samplerCi.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
samplerCi.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.maxLod = static_cast<float>(mipLevels_);
|
||||
|
||||
if (vkCreateSampler(device, &samplerCi, nullptr, &depthSampler_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create sampler");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyPyramidImage() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
VmaAllocator alloc = ctx_->getAllocator();
|
||||
|
||||
if (depthSampler_) { vkDestroySampler(device, depthSampler_, nullptr); depthSampler_ = VK_NULL_HANDLE; }
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
for (auto& view : pyramidMipViews_[f]) {
|
||||
if (view) { vkDestroyImageView(device, view, nullptr); view = VK_NULL_HANDLE; }
|
||||
}
|
||||
pyramidMipViews_[f].clear();
|
||||
|
||||
if (pyramidViewAll_[f]) { vkDestroyImageView(device, pyramidViewAll_[f], nullptr); pyramidViewAll_[f] = VK_NULL_HANDLE; }
|
||||
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
|
||||
if (pyramidImage_[f]) { vmaDestroyImage(alloc, pyramidImage_[f], pyramidAlloc_[f]); pyramidImage_[f] = VK_NULL_HANDLE; }
|
||||
}
|
||||
}
|
||||
|
||||
// --- Compute pipeline ---
|
||||
|
||||
bool HiZSystem::createComputePipeline() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// Build descriptor set layout for pyramid build (set 0):
|
||||
// binding 0: combined image sampler (source depth / previous mip)
|
||||
// binding 1: storage image (destination mip)
|
||||
VkDescriptorSetLayoutBinding bindings[2] = {};
|
||||
bindings[0].binding = 0;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
bindings[1].binding = 1;
|
||||
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
bindings[1].descriptorCount = 1;
|
||||
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
layoutCi.bindingCount = 2;
|
||||
layoutCi.pBindings = bindings;
|
||||
if (vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &buildSetLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build set layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// HiZ sampling layout (for M2 cull shader, set 1):
|
||||
// binding 0: combined image sampler (HiZ pyramid, all mips)
|
||||
VkDescriptorSetLayoutBinding hizBinding{};
|
||||
hizBinding.binding = 0;
|
||||
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
hizBinding.descriptorCount = 1;
|
||||
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
hizLayoutCi.bindingCount = 1;
|
||||
hizLayoutCi.pBindings = &hizBinding;
|
||||
if (vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSetLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create HiZ set layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Push constant range for build shader
|
||||
VkPushConstantRange pushRange{};
|
||||
pushRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
pushRange.offset = 0;
|
||||
pushRange.size = sizeof(HiZBuildPushConstants);
|
||||
|
||||
VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||
plCi.setLayoutCount = 1;
|
||||
plCi.pSetLayouts = &buildSetLayout_;
|
||||
plCi.pushConstantRangeCount = 1;
|
||||
plCi.pPushConstantRanges = &pushRange;
|
||||
if (vkCreatePipelineLayout(device, &plCi, nullptr, &buildPipelineLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build pipeline layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load and create compute pipeline
|
||||
VkShaderModule buildShader;
|
||||
if (!buildShader.loadFromFile(device, "assets/shaders/hiz_build.comp.spv")) {
|
||||
LOG_ERROR("HiZSystem: failed to load hiz_build.comp.spv");
|
||||
return false;
|
||||
}
|
||||
|
||||
VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
||||
cpCi.stage = buildShader.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
cpCi.layout = buildPipelineLayout_;
|
||||
if (vkCreateComputePipelines(device, ctx_->getPipelineCache(), 1, &cpCi, nullptr, &buildPipeline_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build compute pipeline");
|
||||
buildShader.destroy();
|
||||
return false;
|
||||
}
|
||||
buildShader.destroy();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyComputePipeline() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
if (buildPipeline_) { vkDestroyPipeline(device, buildPipeline_, nullptr); buildPipeline_ = VK_NULL_HANDLE; }
|
||||
if (buildPipelineLayout_) { vkDestroyPipelineLayout(device, buildPipelineLayout_, nullptr); buildPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
if (buildSetLayout_) { vkDestroyDescriptorSetLayout(device, buildSetLayout_, nullptr); buildSetLayout_ = VK_NULL_HANDLE; }
|
||||
if (hizSetLayout_) { vkDestroyDescriptorSetLayout(device, hizSetLayout_, nullptr); hizSetLayout_ = VK_NULL_HANDLE; }
|
||||
}
|
||||
|
||||
// --- Descriptors ---
|
||||
|
||||
bool HiZSystem::createDescriptors() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// Pool: per-frame × per-mip build sets + 2 HiZ sampling sets
|
||||
// Each build set needs 1 sampler + 1 storage image
|
||||
// Each HiZ sampling set needs 1 sampler
|
||||
const uint32_t totalBuildSets = MAX_FRAMES * mipLevels_;
|
||||
const uint32_t totalHizSets = MAX_FRAMES;
|
||||
const uint32_t totalSets = totalBuildSets + totalHizSets;
|
||||
|
||||
VkDescriptorPoolSize poolSizes[2] = {};
|
||||
poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, totalBuildSets + totalHizSets};
|
||||
poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, totalBuildSets};
|
||||
|
||||
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
|
||||
poolCi.maxSets = totalSets;
|
||||
poolCi.poolSizeCount = 2;
|
||||
poolCi.pPoolSizes = poolSizes;
|
||||
if (vkCreateDescriptorPool(device, &poolCi, nullptr, &buildDescPool_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create descriptor pool");
|
||||
return false;
|
||||
}
|
||||
|
||||
// We use the same pool for both build and HiZ sets — simpler cleanup
|
||||
hizDescPool_ = VK_NULL_HANDLE; // sharing buildDescPool_
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
// Create a temporary depth image view for sampling the depth buffer.
|
||||
// This is SEPARATE from the VkContext's depth image view because we need
|
||||
// DEPTH aspect sampling which requires specific format view.
|
||||
{
|
||||
VkImage depthSrc = ctx_->getDepthCopySourceImage();
|
||||
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
viewCi.image = depthSrc;
|
||||
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewCi.format = ctx_->getDepthFormat();
|
||||
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
viewCi.subresourceRange.levelCount = 1;
|
||||
viewCi.subresourceRange.layerCount = 1;
|
||||
if (vkCreateImageView(device, &viewCi, nullptr, &depthSamplerView_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create depth sampler view for frame ", f);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate per-mip build descriptor sets
|
||||
buildDescSets_[f].resize(mipLevels_);
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
||||
allocInfo.descriptorPool = buildDescPool_;
|
||||
allocInfo.descriptorSetCount = 1;
|
||||
allocInfo.pSetLayouts = &buildSetLayout_;
|
||||
if (vkAllocateDescriptorSets(device, &allocInfo, &buildDescSets_[f][mip]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to allocate build desc set frame=", f, " mip=", mip);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Write descriptors:
|
||||
// Binding 0 (sampler): mip 0 reads depth buffer, mip N reads pyramid mip N-1
|
||||
VkDescriptorImageInfo srcInfo{};
|
||||
srcInfo.sampler = depthSampler_;
|
||||
if (mip == 0) {
|
||||
srcInfo.imageView = depthSamplerView_[f];
|
||||
srcInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
} else {
|
||||
srcInfo.imageView = pyramidViewAll_[f]; // shader uses texelFetch with explicit mip
|
||||
srcInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
|
||||
// Binding 1 (storage image): write to current mip
|
||||
VkDescriptorImageInfo dstInfo{};
|
||||
dstInfo.imageView = pyramidMipViews_[f][mip];
|
||||
dstInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkWriteDescriptorSet writes[2] = {};
|
||||
writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
writes[0].dstSet = buildDescSets_[f][mip];
|
||||
writes[0].dstBinding = 0;
|
||||
writes[0].descriptorCount = 1;
|
||||
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
writes[0].pImageInfo = &srcInfo;
|
||||
|
||||
writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
writes[1].dstSet = buildDescSets_[f][mip];
|
||||
writes[1].dstBinding = 1;
|
||||
writes[1].descriptorCount = 1;
|
||||
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
writes[1].pImageInfo = &dstInfo;
|
||||
|
||||
vkUpdateDescriptorSets(device, 2, writes, 0, nullptr);
|
||||
}
|
||||
|
||||
// Allocate HiZ sampling descriptor set (for M2 cull shader)
|
||||
{
|
||||
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
||||
allocInfo.descriptorPool = buildDescPool_;
|
||||
allocInfo.descriptorSetCount = 1;
|
||||
allocInfo.pSetLayouts = &hizSetLayout_;
|
||||
if (vkAllocateDescriptorSets(device, &allocInfo, &hizDescSet_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to allocate HiZ sampling desc set for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
VkDescriptorImageInfo hizInfo{};
|
||||
hizInfo.sampler = depthSampler_;
|
||||
hizInfo.imageView = pyramidViewAll_[f];
|
||||
hizInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
write.dstSet = hizDescSet_[f];
|
||||
write.dstBinding = 0;
|
||||
write.descriptorCount = 1;
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
write.pImageInfo = &hizInfo;
|
||||
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyDescriptors() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// All descriptor sets are freed when pool is destroyed
|
||||
if (buildDescPool_) { vkDestroyDescriptorPool(device, buildDescPool_, nullptr); buildDescPool_ = VK_NULL_HANDLE; }
|
||||
// hizDescPool_ shares buildDescPool_, so nothing extra to destroy
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
buildDescSets_[f].clear();
|
||||
hizDescSet_[f] = VK_NULL_HANDLE;
|
||||
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
|
||||
}
|
||||
}
|
||||
|
||||
// --- Pyramid build dispatch ---
|
||||
|
||||
void HiZSystem::buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage) {
|
||||
ZoneScopedN("HiZSystem::buildPyramid");
|
||||
if (!ready_ || !buildPipeline_) return;
|
||||
|
||||
// Transition depth image from DEPTH_STENCIL_ATTACHMENT to SHADER_READ_ONLY for sampling
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = depthImage;
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
// Transition entire pyramid to GENERAL layout for storage writes
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = 0;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = pyramidImage_[frameIndex];
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
barrier.subresourceRange.baseMipLevel = 0;
|
||||
barrier.subresourceRange.levelCount = mipLevels_;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, buildPipeline_);
|
||||
|
||||
// Build each mip level sequentially
|
||||
uint32_t mipW = pyramidWidth_;
|
||||
uint32_t mipH = pyramidHeight_;
|
||||
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
// Bind descriptor set for this mip level
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
buildPipelineLayout_, 0, 1, &buildDescSets_[frameIndex][mip], 0, nullptr);
|
||||
|
||||
// Push constants: destination size + mip level
|
||||
HiZBuildPushConstants pc{};
|
||||
pc.dstWidth = static_cast<int32_t>(mipW);
|
||||
pc.dstHeight = static_cast<int32_t>(mipH);
|
||||
pc.mipLevel = static_cast<int32_t>(mip);
|
||||
vkCmdPushConstants(cmd, buildPipelineLayout_, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pc), &pc);
|
||||
|
||||
// Dispatch compute
|
||||
uint32_t groupsX = (mipW + 7) / 8;
|
||||
uint32_t groupsY = (mipH + 7) / 8;
|
||||
vkCmdDispatch(cmd, groupsX, groupsY, 1);
|
||||
|
||||
// Barrier between mip levels: ensure writes to mip N are visible before reads for mip N+1
|
||||
if (mip + 1 < mipLevels_) {
|
||||
VkImageMemoryBarrier mipBarrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
mipBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
mipBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
mipBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
mipBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
mipBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
mipBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
mipBarrier.image = pyramidImage_[frameIndex];
|
||||
mipBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
mipBarrier.subresourceRange.baseMipLevel = mip;
|
||||
mipBarrier.subresourceRange.levelCount = 1;
|
||||
mipBarrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &mipBarrier);
|
||||
}
|
||||
|
||||
// Next mip level dimensions
|
||||
mipW = std::max(1u, mipW / 2);
|
||||
mipH = std::max(1u, mipH / 2);
|
||||
}
|
||||
|
||||
// Transition depth back to DEPTH_STENCIL_ATTACHMENT for next frame
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = depthImage;
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rendering
|
||||
} // namespace wowee
|
||||
|
|
@ -295,7 +295,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
// Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build.
|
||||
{
|
||||
static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 272, "CullUniformsGPU must be 272 bytes (std140)");
|
||||
|
||||
// Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output)
|
||||
VkDescriptorSetLayoutBinding bindings[3] = {};
|
||||
|
|
@ -338,6 +338,54 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
cullComp.destroy();
|
||||
}
|
||||
|
||||
// HiZ-aware cull pipeline (Phase 6.3 Option B)
|
||||
// Uses set 0 (same as frustum-only) + set 1 (HiZ pyramid sampler from HiZSystem).
|
||||
// The HiZ descriptor set layout is created lazily when hizSystem_ is set, but the
|
||||
// pipeline layout and shader are created now if the shader is available.
|
||||
rendering::VkShaderModule cullHiZComp;
|
||||
if (cullHiZComp.loadFromFile(device, "assets/shaders/m2_cull_hiz.comp.spv")) {
|
||||
// HiZ cull set 1 layout: single combined image sampler (the HiZ pyramid)
|
||||
VkDescriptorSetLayoutBinding hizBinding{};
|
||||
hizBinding.binding = 0;
|
||||
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
hizBinding.descriptorCount = 1;
|
||||
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayout hizSamplerLayout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
hizLayoutCi.bindingCount = 1;
|
||||
hizLayoutCi.pBindings = &hizBinding;
|
||||
vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSamplerLayout);
|
||||
|
||||
VkDescriptorSetLayout hizSetLayouts[2] = {cullSetLayout_, hizSamplerLayout};
|
||||
VkPipelineLayoutCreateInfo hizPlCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||
hizPlCi.setLayoutCount = 2;
|
||||
hizPlCi.pSetLayouts = hizSetLayouts;
|
||||
vkCreatePipelineLayout(device, &hizPlCi, nullptr, &cullHiZPipelineLayout_);
|
||||
|
||||
VkComputePipelineCreateInfo hizCpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
||||
hizCpCi.stage = cullHiZComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
hizCpCi.layout = cullHiZPipelineLayout_;
|
||||
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &hizCpCi, nullptr, &cullHiZPipeline_) != VK_SUCCESS) {
|
||||
LOG_WARNING("M2Renderer: failed to create HiZ cull compute pipeline — HiZ disabled");
|
||||
cullHiZPipeline_ = VK_NULL_HANDLE;
|
||||
vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr);
|
||||
cullHiZPipelineLayout_ = VK_NULL_HANDLE;
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: HiZ occlusion cull pipeline created");
|
||||
}
|
||||
|
||||
// The hizSamplerLayout is now owned by the pipeline layout; we don't track it
|
||||
// separately because the pipeline layout keeps a ref. But actually Vulkan
|
||||
// requires us to keep it alive. Store it where HiZSystem will provide it.
|
||||
// For now, we can destroy it since the pipeline layout was already created.
|
||||
vkDestroyDescriptorSetLayout(device, hizSamplerLayout, nullptr);
|
||||
|
||||
cullHiZComp.destroy();
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: m2_cull_hiz.comp.spv not found — HiZ occlusion culling not available");
|
||||
}
|
||||
|
||||
// Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO)
|
||||
VkDescriptorPoolSize poolSizes[2] = {};
|
||||
poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2};
|
||||
|
|
@ -756,6 +804,14 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
return true;
|
||||
}
|
||||
|
||||
void M2Renderer::invalidateCullOutput(uint32_t frameIndex) {
|
||||
// On non-HOST_COHERENT memory, VMA-mapped GPU→CPU buffers need explicit
|
||||
// invalidation so the CPU cache sees the latest GPU writes.
|
||||
if (frameIndex < 2 && cullOutputAlloc_[frameIndex]) {
|
||||
vmaInvalidateAllocation(vkCtx_->getAllocator(), cullOutputAlloc_[frameIndex], 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
void M2Renderer::shutdown() {
|
||||
LOG_INFO("Shutting down M2 renderer...");
|
||||
if (!vkCtx_) return;
|
||||
|
|
@ -837,6 +893,8 @@ void M2Renderer::shutdown() {
|
|||
if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; }
|
||||
|
||||
// GPU frustum culling compute pipeline + buffers cleanup
|
||||
if (cullHiZPipeline_) { vkDestroyPipeline(device, cullHiZPipeline_, nullptr); cullHiZPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullHiZPipelineLayout_) { vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr); cullHiZPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
for (int i = 0; i < 2; i++) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "rendering/m2_renderer.hpp"
|
||||
#include "rendering/m2_renderer_internal.h"
|
||||
#include "rendering/m2_model_classifier.hpp"
|
||||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/vk_context.hpp"
|
||||
#include "rendering/vk_buffer.hpp"
|
||||
#include "rendering/vk_texture.hpp"
|
||||
|
|
@ -600,6 +601,49 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
}
|
||||
ubo->cameraPos = glm::vec4(camPos, maxPossibleDistSq);
|
||||
ubo->instanceCount = numInstances;
|
||||
|
||||
// HiZ occlusion culling fields
|
||||
const bool hizReady = hizSystem_ && hizSystem_->isReady();
|
||||
|
||||
// Auto-disable HiZ when the camera has moved/rotated significantly.
|
||||
// Large VP changes make the depth pyramid unreliable because the
|
||||
// reprojected screen positions diverge from the actual pyramid data.
|
||||
bool hizSafe = hizReady;
|
||||
if (hizReady) {
|
||||
// Compare current VP against previous VP — Frobenius-style max diff.
|
||||
float maxDiff = 0.0f;
|
||||
const float* curM = &vp[0][0];
|
||||
const float* prevM = &prevVP_[0][0];
|
||||
for (int k = 0; k < 16; ++k)
|
||||
maxDiff = std::max(maxDiff, std::abs(curM[k] - prevM[k]));
|
||||
// Threshold: typical small camera motion produces diffs < 0.05.
|
||||
// A fast rotation easily exceeds 0.3. Skip HiZ when diff is large.
|
||||
if (maxDiff > 0.15f) hizSafe = false;
|
||||
}
|
||||
|
||||
ubo->hizEnabled = hizSafe ? 1u : 0u;
|
||||
ubo->hizMipLevels = hizReady ? hizSystem_->getMipLevels() : 0u;
|
||||
ubo->_pad2 = 0;
|
||||
if (hizReady) {
|
||||
ubo->hizParams = glm::vec4(
|
||||
static_cast<float>(hizSystem_->getPyramidWidth()),
|
||||
static_cast<float>(hizSystem_->getPyramidHeight()),
|
||||
camera.getNearPlane(),
|
||||
0.0f
|
||||
);
|
||||
ubo->viewProj = vp;
|
||||
// Use previous frame's VP for HiZ reprojection — the HiZ pyramid
|
||||
// was built from the previous frame's depth, so we must project
|
||||
// into the same screen space to sample the correct depths.
|
||||
ubo->prevViewProj = prevVP_;
|
||||
} else {
|
||||
ubo->hizParams = glm::vec4(0.0f);
|
||||
ubo->viewProj = glm::mat4(1.0f);
|
||||
ubo->prevViewProj = glm::mat4(1.0f);
|
||||
}
|
||||
|
||||
// Save current VP for next frame's temporal reprojection
|
||||
prevVP_ = vp;
|
||||
}
|
||||
|
||||
// --- Upload per-instance cull data (SSBO, binding 1) ---
|
||||
|
|
@ -622,6 +666,10 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
if (inst.cachedIsValid) flags |= 1u;
|
||||
if (inst.cachedIsSmoke) flags |= 2u;
|
||||
if (inst.cachedIsInvisibleTrap) flags |= 4u;
|
||||
// Bit 3: previouslyVisible — the shader skips HiZ for objects
|
||||
// that were NOT rendered last frame (no reliable depth data).
|
||||
if (i < prevFrameVisible_.size() && prevFrameVisible_[i])
|
||||
flags |= 8u;
|
||||
|
||||
input[i].sphere = glm::vec4(inst.position, paddedRadius);
|
||||
input[i].effectiveMaxDistSq = effectiveMaxDistSq;
|
||||
|
|
@ -630,9 +678,22 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
}
|
||||
|
||||
// --- Dispatch compute shader ---
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
const bool useHiZ = (cullHiZPipeline_ != VK_NULL_HANDLE)
|
||||
&& hizSystem_ && hizSystem_->isReady();
|
||||
if (useHiZ) {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullHiZPipeline_);
|
||||
// Set 0: cull UBO + input/output SSBOs
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullHiZPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
// Set 1: HiZ pyramid sampler
|
||||
VkDescriptorSet hizSet = hizSystem_->getDescriptorSet(frameIndex);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullHiZPipelineLayout_, 1, 1, &hizSet, 0, nullptr);
|
||||
} else {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
}
|
||||
|
||||
const uint32_t groupCount = (numInstances + 63) / 64;
|
||||
vkCmdDispatch(cmd, groupCount, 1, 1);
|
||||
|
|
@ -693,6 +754,19 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
const uint32_t* visibility = static_cast<const uint32_t*>(cullOutputMapped_[frameIndex]);
|
||||
const bool gpuCullAvailable = (cullPipeline_ != VK_NULL_HANDLE && visibility != nullptr);
|
||||
|
||||
// Snapshot the GPU visibility results into prevFrameVisible_ so the NEXT
|
||||
// frame's compute dispatch can set the per-instance `previouslyVisible`
|
||||
// flag (bit 3). Objects not visible this frame will skip HiZ next frame,
|
||||
// avoiding false culls from stale depth data.
|
||||
if (gpuCullAvailable) {
|
||||
prevFrameVisible_.resize(numInstances);
|
||||
for (uint32_t i = 0; i < numInstances; ++i)
|
||||
prevFrameVisible_[i] = visibility[i] ? 1u : 0u;
|
||||
} else {
|
||||
// No GPU cull data — conservatively mark all as visible
|
||||
prevFrameVisible_.assign(static_cast<size_t>(instances.size()), 1u);
|
||||
}
|
||||
|
||||
// If GPU culling was not dispatched, fallback: compute distances on CPU
|
||||
float maxRenderDistanceSq;
|
||||
if (!gpuCullAvailable) {
|
||||
|
|
@ -1074,7 +1148,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
// Update material UBO
|
||||
if (batch.materialUBOMapped) {
|
||||
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
|
||||
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
|
||||
// interiorDarken is a camera-based flag — it darkens ALL M2s (incl.
|
||||
// outdoor trees) when the camera is inside a WMO. Disable it; indoor
|
||||
// M2s already look correct from the darker ambient/lighting.
|
||||
mat->interiorDarken = 0.0f;
|
||||
if (batch.colorKeyBlack)
|
||||
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
|
||||
if (forceCutout) {
|
||||
|
|
@ -1265,7 +1342,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
|
||||
if (batch.materialUBOMapped) {
|
||||
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
|
||||
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
|
||||
mat->interiorDarken = 0.0f;
|
||||
if (batch.colorKeyBlack)
|
||||
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include "rendering/character_preview.hpp"
|
||||
#include "rendering/wmo_renderer.hpp"
|
||||
#include "rendering/m2_renderer.hpp"
|
||||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/minimap.hpp"
|
||||
#include "rendering/world_map.hpp"
|
||||
#include "rendering/quest_marker_renderer.hpp"
|
||||
|
|
@ -580,7 +581,6 @@ bool Renderer::initialize(core::Window* win) {
|
|||
overlaySystem_ = std::make_unique<OverlaySystem>(vkCtx);
|
||||
renderGraph_->registerResource("shadow_depth");
|
||||
renderGraph_->registerResource("reflection_texture");
|
||||
renderGraph_->registerResource("cull_visibility");
|
||||
renderGraph_->registerResource("scene_color");
|
||||
renderGraph_->registerResource("scene_depth");
|
||||
renderGraph_->registerResource("final_image");
|
||||
|
|
@ -672,6 +672,10 @@ void Renderer::shutdown() {
|
|||
}
|
||||
|
||||
LOG_DEBUG("Renderer::shutdown - m2Renderer...");
|
||||
if (hizSystem_) {
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
if (m2Renderer) {
|
||||
m2Renderer->shutdown();
|
||||
m2Renderer.reset();
|
||||
|
|
@ -798,6 +802,17 @@ void Renderer::applyMsaaChange() {
|
|||
|
||||
if (minimap) minimap->recreatePipelines();
|
||||
|
||||
// Resize HiZ pyramid (depth format/MSAA may have changed)
|
||||
if (hizSystem_) {
|
||||
auto ext = vkCtx->getSwapchainExtent();
|
||||
if (!hizSystem_->resize(ext.width, ext.height)) {
|
||||
LOG_WARNING("HiZ resize failed after MSAA change");
|
||||
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// Selection circle + overlay + FSR use lazy init, just destroy them
|
||||
if (overlaySystem_) overlaySystem_->recreatePipelines();
|
||||
if (postProcessPipeline_) postProcessPipeline_->destroyAllResources(); // Will be lazily recreated in beginFrame()
|
||||
|
|
@ -846,6 +861,16 @@ void Renderer::beginFrame() {
|
|||
}
|
||||
// Recreate post-process resources for new swapchain dimensions
|
||||
if (postProcessPipeline_) postProcessPipeline_->handleSwapchainResize();
|
||||
// Resize HiZ depth pyramid for new swapchain dimensions
|
||||
if (hizSystem_) {
|
||||
auto ext = vkCtx->getSwapchainExtent();
|
||||
if (!hizSystem_->resize(ext.width, ext.height)) {
|
||||
LOG_WARNING("HiZ resize failed — disabling occlusion culling");
|
||||
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Acquire swapchain image and begin command buffer
|
||||
|
|
@ -864,6 +889,31 @@ void Renderer::beginFrame() {
|
|||
// Update per-frame UBO with current camera/lighting state
|
||||
updatePerFrameUBO();
|
||||
|
||||
// ── Early compute: HiZ pyramid build + M2 frustum/occlusion cull ──
|
||||
// These run in a SEPARATE command buffer submission so the GPU executes
|
||||
// them immediately. The CPU then reads the fresh visibility results
|
||||
// before recording the main render pass — eliminating the 2-frame
|
||||
// staleness that occurs when compute + render share one submission.
|
||||
if (m2Renderer && camera && vkCtx) {
|
||||
VkCommandBuffer computeCmd = vkCtx->beginSingleTimeCommands();
|
||||
uint32_t frame = vkCtx->getCurrentFrame();
|
||||
|
||||
// Build HiZ depth pyramid from previous frame's depth buffer
|
||||
if (hizSystem_ && hizSystem_->isReady()) {
|
||||
VkImage depthSrc = vkCtx->getDepthCopySourceImage();
|
||||
hizSystem_->buildPyramid(computeCmd, frame, depthSrc);
|
||||
}
|
||||
|
||||
// Dispatch GPU frustum + HiZ occlusion culling
|
||||
m2Renderer->dispatchCullCompute(computeCmd, frame, *camera);
|
||||
|
||||
vkCtx->endSingleTimeCommands(computeCmd);
|
||||
|
||||
// Ensure GPU→CPU buffer writes are visible to host (non-coherent memory).
|
||||
m2Renderer->invalidateCullOutput(frame);
|
||||
// Visibility results are now in cullOutputMapped_[frame], readable by CPU.
|
||||
}
|
||||
|
||||
// --- Off-screen pre-passes ---
|
||||
// Build frame graph: registers pre-passes as graph nodes with dependencies.
|
||||
// compile() topologically sorts; execute() runs them with auto barriers.
|
||||
|
|
@ -1489,7 +1539,9 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
|
|||
if (parallelRecordingEnabled_) {
|
||||
// --- Pre-compute state + GPU allocations on main thread (not thread-safe) ---
|
||||
if (m2Renderer && cameraController) {
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
|
||||
// Use isInsideInteriorWMO (flag 0x2000) — not isInsideWMO which includes
|
||||
// outdoor WMO groups like archways/bridges that should receive shadows.
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
|
||||
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
|
||||
}
|
||||
if (wmoRenderer) wmoRenderer->prepareRender();
|
||||
|
|
@ -1734,7 +1786,8 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
|
|||
|
||||
if (m2Renderer && camera && !skipM2) {
|
||||
if (cameraController) {
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
|
||||
// Use isInsideInteriorWMO (flag 0x2000) for correct indoor detection
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
|
||||
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
|
||||
}
|
||||
m2Renderer->prepareRender(frameIdx, *camera);
|
||||
|
|
@ -1887,6 +1940,23 @@ bool Renderer::initializeRenderers(pipeline::AssetManager* assetManager, const s
|
|||
spellVisualSystem_->initialize(m2Renderer.get());
|
||||
}
|
||||
}
|
||||
|
||||
// HiZ occlusion culling — temporal reprojection.
|
||||
// The HiZ pyramid is built from the previous frame's depth buffer. The cull
|
||||
// compute shader uses prevViewProj to project objects into the previous frame's
|
||||
// screen space so that depth samples match the pyramid, eliminating flicker
|
||||
// caused by camera movement between frames.
|
||||
if (!hizSystem_ && m2Renderer && vkCtx) {
|
||||
hizSystem_ = std::make_unique<HiZSystem>();
|
||||
auto extent = vkCtx->getSwapchainExtent();
|
||||
if (hizSystem_->initialize(vkCtx, extent.width, extent.height)) {
|
||||
m2Renderer->setHiZSystem(hizSystem_.get());
|
||||
LOG_INFO("HiZ occlusion culling initialized (", extent.width, "x", extent.height, ")");
|
||||
} else {
|
||||
LOG_WARNING("HiZ occlusion culling unavailable — falling back to frustum-only culling");
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
if (!wmoRenderer) {
|
||||
wmoRenderer = std::make_unique<WMORenderer>();
|
||||
wmoRenderer->initialize(vkCtx, perFrameSetLayout, assetManager);
|
||||
|
|
@ -2627,7 +2697,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
|
|||
|
||||
auto shadowDepth = renderGraph_->findResource("shadow_depth");
|
||||
auto reflTex = renderGraph_->findResource("reflection_texture");
|
||||
auto cullVis = renderGraph_->findResource("cull_visibility");
|
||||
|
||||
// Minimap composites (no dependencies — standalone off-screen render target)
|
||||
renderGraph_->addPass("minimap_composite", {}, {},
|
||||
|
|
@ -2670,13 +2739,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
|
|||
renderReflectionPass();
|
||||
});
|
||||
|
||||
// GPU frustum cull compute → outputs cull_visibility
|
||||
renderGraph_->addPass("compute_cull", {}, {cullVis},
|
||||
[this](VkCommandBuffer cmd) {
|
||||
if (m2Renderer && camera)
|
||||
m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera);
|
||||
});
|
||||
|
||||
renderGraph_->compile();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -798,7 +798,8 @@ bool VkContext::createDepthBuffer() {
|
|||
imgInfo.arrayLayers = 1;
|
||||
imgInfo.samples = msaaSamples_;
|
||||
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
|
||||
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
|
||||
|
||||
VmaAllocationCreateInfo allocInfo{};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
|
@ -911,7 +912,8 @@ bool VkContext::createDepthResolveImage() {
|
|||
imgInfo.arrayLayers = 1;
|
||||
imgInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
|
||||
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
|
||||
|
||||
VmaAllocationCreateInfo allocInfo{};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue