mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-05-04 16:23:52 +00:00
Merge pull request #52 from ldmonster/feat/hiz-occlusion-culling
[feat] rendering: Hierarchical-Z occlusion culling
This commit is contained in:
commit
5d0d140c61
17 changed files with 1317 additions and 35 deletions
517
src/rendering/hiz_system.cpp
Normal file
517
src/rendering/hiz_system.cpp
Normal file
|
|
@ -0,0 +1,517 @@
|
|||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/vk_context.hpp"
|
||||
#include "rendering/vk_shader.hpp"
|
||||
#include "core/logger.hpp"
|
||||
#include "core/profiler.hpp"
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
namespace wowee {
|
||||
namespace rendering {
|
||||
|
||||
HiZSystem::~HiZSystem() {
|
||||
shutdown();
|
||||
}
|
||||
|
||||
bool HiZSystem::initialize(VkContext* ctx, uint32_t width, uint32_t height) {
|
||||
if (!ctx || width == 0 || height == 0) return false;
|
||||
ctx_ = ctx;
|
||||
fullWidth_ = width;
|
||||
fullHeight_ = height;
|
||||
|
||||
// Pyramid mip 0 is half the full resolution (the first downscale)
|
||||
pyramidWidth_ = std::max(1u, width / 2);
|
||||
pyramidHeight_ = std::max(1u, height / 2);
|
||||
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
|
||||
|
||||
if (!createComputePipeline()) return false;
|
||||
if (!createPyramidImage()) { destroyComputePipeline(); return false; }
|
||||
if (!createDescriptors()) { destroyPyramidImage(); destroyComputePipeline(); return false; }
|
||||
|
||||
ready_ = true;
|
||||
LOG_INFO("HiZSystem: initialized ", pyramidWidth_, "x", pyramidHeight_,
|
||||
" pyramid (", mipLevels_, " mips) from ", width, "x", height, " depth");
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::shutdown() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
vkDeviceWaitIdle(device);
|
||||
|
||||
destroyDescriptors();
|
||||
destroyPyramidImage();
|
||||
destroyComputePipeline();
|
||||
|
||||
ctx_ = nullptr;
|
||||
ready_ = false;
|
||||
}
|
||||
|
||||
bool HiZSystem::resize(uint32_t width, uint32_t height) {
|
||||
if (!ctx_) return false;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
vkDeviceWaitIdle(device);
|
||||
|
||||
destroyDescriptors();
|
||||
destroyPyramidImage();
|
||||
|
||||
fullWidth_ = width;
|
||||
fullHeight_ = height;
|
||||
pyramidWidth_ = std::max(1u, width / 2);
|
||||
pyramidHeight_ = std::max(1u, height / 2);
|
||||
mipLevels_ = static_cast<uint32_t>(std::floor(std::log2(std::max(pyramidWidth_, pyramidHeight_)))) + 1;
|
||||
|
||||
if (!createPyramidImage()) return false;
|
||||
if (!createDescriptors()) { destroyPyramidImage(); return false; }
|
||||
|
||||
ready_ = true;
|
||||
LOG_INFO("HiZSystem: resized to ", pyramidWidth_, "x", pyramidHeight_,
|
||||
" (", mipLevels_, " mips)");
|
||||
return true;
|
||||
}
|
||||
|
||||
// --- Pyramid image creation ---
|
||||
|
||||
bool HiZSystem::createPyramidImage() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
VmaAllocator alloc = ctx_->getAllocator();
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
// Create R32F image with full mip chain
|
||||
VkImageCreateInfo imgCi{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
|
||||
imgCi.imageType = VK_IMAGE_TYPE_2D;
|
||||
imgCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
imgCi.extent = {pyramidWidth_, pyramidHeight_, 1};
|
||||
imgCi.mipLevels = mipLevels_;
|
||||
imgCi.arrayLayers = 1;
|
||||
imgCi.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imgCi.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgCi.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
imgCi.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
VmaAllocationCreateInfo allocCi{};
|
||||
allocCi.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
||||
if (vmaCreateImage(alloc, &imgCi, &allocCi, &pyramidImage_[f], &pyramidAlloc_[f], nullptr) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create pyramid image for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
// View of ALL mip levels (for sampling in the cull shader)
|
||||
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
viewCi.image = pyramidImage_[f];
|
||||
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
viewCi.subresourceRange.baseMipLevel = 0;
|
||||
viewCi.subresourceRange.levelCount = mipLevels_;
|
||||
viewCi.subresourceRange.layerCount = 1;
|
||||
|
||||
if (vkCreateImageView(device, &viewCi, nullptr, &pyramidViewAll_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create pyramid view-all for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Per-mip views (for storage image writes in the build shader)
|
||||
pyramidMipViews_[f].resize(mipLevels_, VK_NULL_HANDLE);
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
VkImageViewCreateInfo mipViewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
mipViewCi.image = pyramidImage_[f];
|
||||
mipViewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
mipViewCi.format = VK_FORMAT_R32_SFLOAT;
|
||||
mipViewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
mipViewCi.subresourceRange.baseMipLevel = mip;
|
||||
mipViewCi.subresourceRange.levelCount = 1;
|
||||
mipViewCi.subresourceRange.layerCount = 1;
|
||||
|
||||
if (vkCreateImageView(device, &mipViewCi, nullptr, &pyramidMipViews_[f][mip]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create mip ", mip, " view for frame ", f);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sampler for depth reads and HiZ pyramid reads (nearest, clamp)
|
||||
VkSamplerCreateInfo samplerCi{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
|
||||
samplerCi.magFilter = VK_FILTER_NEAREST;
|
||||
samplerCi.minFilter = VK_FILTER_NEAREST;
|
||||
samplerCi.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
samplerCi.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
samplerCi.maxLod = static_cast<float>(mipLevels_);
|
||||
|
||||
if (vkCreateSampler(device, &samplerCi, nullptr, &depthSampler_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create sampler");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyPyramidImage() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
VmaAllocator alloc = ctx_->getAllocator();
|
||||
|
||||
if (depthSampler_) { vkDestroySampler(device, depthSampler_, nullptr); depthSampler_ = VK_NULL_HANDLE; }
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
for (auto& view : pyramidMipViews_[f]) {
|
||||
if (view) { vkDestroyImageView(device, view, nullptr); view = VK_NULL_HANDLE; }
|
||||
}
|
||||
pyramidMipViews_[f].clear();
|
||||
|
||||
if (pyramidViewAll_[f]) { vkDestroyImageView(device, pyramidViewAll_[f], nullptr); pyramidViewAll_[f] = VK_NULL_HANDLE; }
|
||||
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
|
||||
if (pyramidImage_[f]) { vmaDestroyImage(alloc, pyramidImage_[f], pyramidAlloc_[f]); pyramidImage_[f] = VK_NULL_HANDLE; }
|
||||
}
|
||||
}
|
||||
|
||||
// --- Compute pipeline ---
|
||||
|
||||
bool HiZSystem::createComputePipeline() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// Build descriptor set layout for pyramid build (set 0):
|
||||
// binding 0: combined image sampler (source depth / previous mip)
|
||||
// binding 1: storage image (destination mip)
|
||||
VkDescriptorSetLayoutBinding bindings[2] = {};
|
||||
bindings[0].binding = 0;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
bindings[1].binding = 1;
|
||||
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
bindings[1].descriptorCount = 1;
|
||||
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
layoutCi.bindingCount = 2;
|
||||
layoutCi.pBindings = bindings;
|
||||
if (vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &buildSetLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build set layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// HiZ sampling layout (for M2 cull shader, set 1):
|
||||
// binding 0: combined image sampler (HiZ pyramid, all mips)
|
||||
VkDescriptorSetLayoutBinding hizBinding{};
|
||||
hizBinding.binding = 0;
|
||||
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
hizBinding.descriptorCount = 1;
|
||||
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
hizLayoutCi.bindingCount = 1;
|
||||
hizLayoutCi.pBindings = &hizBinding;
|
||||
if (vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSetLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create HiZ set layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Push constant range for build shader
|
||||
VkPushConstantRange pushRange{};
|
||||
pushRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
pushRange.offset = 0;
|
||||
pushRange.size = sizeof(HiZBuildPushConstants);
|
||||
|
||||
VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||
plCi.setLayoutCount = 1;
|
||||
plCi.pSetLayouts = &buildSetLayout_;
|
||||
plCi.pushConstantRangeCount = 1;
|
||||
plCi.pPushConstantRanges = &pushRange;
|
||||
if (vkCreatePipelineLayout(device, &plCi, nullptr, &buildPipelineLayout_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build pipeline layout");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load and create compute pipeline
|
||||
VkShaderModule buildShader;
|
||||
if (!buildShader.loadFromFile(device, "assets/shaders/hiz_build.comp.spv")) {
|
||||
LOG_ERROR("HiZSystem: failed to load hiz_build.comp.spv");
|
||||
return false;
|
||||
}
|
||||
|
||||
VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
||||
cpCi.stage = buildShader.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
cpCi.layout = buildPipelineLayout_;
|
||||
if (vkCreateComputePipelines(device, ctx_->getPipelineCache(), 1, &cpCi, nullptr, &buildPipeline_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create build compute pipeline");
|
||||
buildShader.destroy();
|
||||
return false;
|
||||
}
|
||||
buildShader.destroy();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyComputePipeline() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
if (buildPipeline_) { vkDestroyPipeline(device, buildPipeline_, nullptr); buildPipeline_ = VK_NULL_HANDLE; }
|
||||
if (buildPipelineLayout_) { vkDestroyPipelineLayout(device, buildPipelineLayout_, nullptr); buildPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
if (buildSetLayout_) { vkDestroyDescriptorSetLayout(device, buildSetLayout_, nullptr); buildSetLayout_ = VK_NULL_HANDLE; }
|
||||
if (hizSetLayout_) { vkDestroyDescriptorSetLayout(device, hizSetLayout_, nullptr); hizSetLayout_ = VK_NULL_HANDLE; }
|
||||
}
|
||||
|
||||
// --- Descriptors ---
|
||||
|
||||
bool HiZSystem::createDescriptors() {
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// Pool: per-frame × per-mip build sets + 2 HiZ sampling sets
|
||||
// Each build set needs 1 sampler + 1 storage image
|
||||
// Each HiZ sampling set needs 1 sampler
|
||||
const uint32_t totalBuildSets = MAX_FRAMES * mipLevels_;
|
||||
const uint32_t totalHizSets = MAX_FRAMES;
|
||||
const uint32_t totalSets = totalBuildSets + totalHizSets;
|
||||
|
||||
VkDescriptorPoolSize poolSizes[2] = {};
|
||||
poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, totalBuildSets + totalHizSets};
|
||||
poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, totalBuildSets};
|
||||
|
||||
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
|
||||
poolCi.maxSets = totalSets;
|
||||
poolCi.poolSizeCount = 2;
|
||||
poolCi.pPoolSizes = poolSizes;
|
||||
if (vkCreateDescriptorPool(device, &poolCi, nullptr, &buildDescPool_) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create descriptor pool");
|
||||
return false;
|
||||
}
|
||||
|
||||
// We use the same pool for both build and HiZ sets — simpler cleanup
|
||||
hizDescPool_ = VK_NULL_HANDLE; // sharing buildDescPool_
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
// Create a temporary depth image view for sampling the depth buffer.
|
||||
// This is SEPARATE from the VkContext's depth image view because we need
|
||||
// DEPTH aspect sampling which requires specific format view.
|
||||
{
|
||||
VkImage depthSrc = ctx_->getDepthCopySourceImage();
|
||||
VkImageViewCreateInfo viewCi{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
viewCi.image = depthSrc;
|
||||
viewCi.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewCi.format = ctx_->getDepthFormat();
|
||||
viewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
viewCi.subresourceRange.levelCount = 1;
|
||||
viewCi.subresourceRange.layerCount = 1;
|
||||
if (vkCreateImageView(device, &viewCi, nullptr, &depthSamplerView_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to create depth sampler view for frame ", f);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate per-mip build descriptor sets
|
||||
buildDescSets_[f].resize(mipLevels_);
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
||||
allocInfo.descriptorPool = buildDescPool_;
|
||||
allocInfo.descriptorSetCount = 1;
|
||||
allocInfo.pSetLayouts = &buildSetLayout_;
|
||||
if (vkAllocateDescriptorSets(device, &allocInfo, &buildDescSets_[f][mip]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to allocate build desc set frame=", f, " mip=", mip);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Write descriptors:
|
||||
// Binding 0 (sampler): mip 0 reads depth buffer, mip N reads pyramid mip N-1
|
||||
VkDescriptorImageInfo srcInfo{};
|
||||
srcInfo.sampler = depthSampler_;
|
||||
if (mip == 0) {
|
||||
srcInfo.imageView = depthSamplerView_[f];
|
||||
srcInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
} else {
|
||||
srcInfo.imageView = pyramidViewAll_[f]; // shader uses texelFetch with explicit mip
|
||||
srcInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
|
||||
// Binding 1 (storage image): write to current mip
|
||||
VkDescriptorImageInfo dstInfo{};
|
||||
dstInfo.imageView = pyramidMipViews_[f][mip];
|
||||
dstInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkWriteDescriptorSet writes[2] = {};
|
||||
writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
writes[0].dstSet = buildDescSets_[f][mip];
|
||||
writes[0].dstBinding = 0;
|
||||
writes[0].descriptorCount = 1;
|
||||
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
writes[0].pImageInfo = &srcInfo;
|
||||
|
||||
writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
writes[1].dstSet = buildDescSets_[f][mip];
|
||||
writes[1].dstBinding = 1;
|
||||
writes[1].descriptorCount = 1;
|
||||
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
writes[1].pImageInfo = &dstInfo;
|
||||
|
||||
vkUpdateDescriptorSets(device, 2, writes, 0, nullptr);
|
||||
}
|
||||
|
||||
// Allocate HiZ sampling descriptor set (for M2 cull shader)
|
||||
{
|
||||
VkDescriptorSetAllocateInfo allocInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
||||
allocInfo.descriptorPool = buildDescPool_;
|
||||
allocInfo.descriptorSetCount = 1;
|
||||
allocInfo.pSetLayouts = &hizSetLayout_;
|
||||
if (vkAllocateDescriptorSets(device, &allocInfo, &hizDescSet_[f]) != VK_SUCCESS) {
|
||||
LOG_ERROR("HiZSystem: failed to allocate HiZ sampling desc set for frame ", f);
|
||||
return false;
|
||||
}
|
||||
|
||||
VkDescriptorImageInfo hizInfo{};
|
||||
hizInfo.sampler = depthSampler_;
|
||||
hizInfo.imageView = pyramidViewAll_[f];
|
||||
hizInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
write.dstSet = hizDescSet_[f];
|
||||
write.dstBinding = 0;
|
||||
write.descriptorCount = 1;
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
write.pImageInfo = &hizInfo;
|
||||
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HiZSystem::destroyDescriptors() {
|
||||
if (!ctx_) return;
|
||||
VkDevice device = ctx_->getDevice();
|
||||
|
||||
// All descriptor sets are freed when pool is destroyed
|
||||
if (buildDescPool_) { vkDestroyDescriptorPool(device, buildDescPool_, nullptr); buildDescPool_ = VK_NULL_HANDLE; }
|
||||
// hizDescPool_ shares buildDescPool_, so nothing extra to destroy
|
||||
|
||||
for (uint32_t f = 0; f < MAX_FRAMES; f++) {
|
||||
buildDescSets_[f].clear();
|
||||
hizDescSet_[f] = VK_NULL_HANDLE;
|
||||
if (depthSamplerView_[f]) { vkDestroyImageView(device, depthSamplerView_[f], nullptr); depthSamplerView_[f] = VK_NULL_HANDLE; }
|
||||
}
|
||||
}
|
||||
|
||||
// --- Pyramid build dispatch ---
|
||||
|
||||
void HiZSystem::buildPyramid(VkCommandBuffer cmd, uint32_t frameIndex, VkImage depthImage) {
|
||||
ZoneScopedN("HiZSystem::buildPyramid");
|
||||
if (!ready_ || !buildPipeline_) return;
|
||||
|
||||
// Transition depth image from DEPTH_STENCIL_ATTACHMENT to SHADER_READ_ONLY for sampling
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = depthImage;
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
// Transition entire pyramid to GENERAL layout for storage writes
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = 0;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = pyramidImage_[frameIndex];
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
barrier.subresourceRange.baseMipLevel = 0;
|
||||
barrier.subresourceRange.levelCount = mipLevels_;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, buildPipeline_);
|
||||
|
||||
// Build each mip level sequentially
|
||||
uint32_t mipW = pyramidWidth_;
|
||||
uint32_t mipH = pyramidHeight_;
|
||||
|
||||
for (uint32_t mip = 0; mip < mipLevels_; mip++) {
|
||||
// Bind descriptor set for this mip level
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
buildPipelineLayout_, 0, 1, &buildDescSets_[frameIndex][mip], 0, nullptr);
|
||||
|
||||
// Push constants: destination size + mip level
|
||||
HiZBuildPushConstants pc{};
|
||||
pc.dstWidth = static_cast<int32_t>(mipW);
|
||||
pc.dstHeight = static_cast<int32_t>(mipH);
|
||||
pc.mipLevel = static_cast<int32_t>(mip);
|
||||
vkCmdPushConstants(cmd, buildPipelineLayout_, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pc), &pc);
|
||||
|
||||
// Dispatch compute
|
||||
uint32_t groupsX = (mipW + 7) / 8;
|
||||
uint32_t groupsY = (mipH + 7) / 8;
|
||||
vkCmdDispatch(cmd, groupsX, groupsY, 1);
|
||||
|
||||
// Barrier between mip levels: ensure writes to mip N are visible before reads for mip N+1
|
||||
if (mip + 1 < mipLevels_) {
|
||||
VkImageMemoryBarrier mipBarrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
mipBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
mipBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
mipBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
mipBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
mipBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
mipBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
mipBarrier.image = pyramidImage_[frameIndex];
|
||||
mipBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
mipBarrier.subresourceRange.baseMipLevel = mip;
|
||||
mipBarrier.subresourceRange.levelCount = 1;
|
||||
mipBarrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &mipBarrier);
|
||||
}
|
||||
|
||||
// Next mip level dimensions
|
||||
mipW = std::max(1u, mipW / 2);
|
||||
mipH = std::max(1u, mipH / 2);
|
||||
}
|
||||
|
||||
// Transition depth back to DEPTH_STENCIL_ATTACHMENT for next frame
|
||||
{
|
||||
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = depthImage;
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
vkCmdPipelineBarrier(cmd,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rendering
|
||||
} // namespace wowee
|
||||
|
|
@ -295,7 +295,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
// Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build.
|
||||
{
|
||||
static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)");
|
||||
static_assert(sizeof(CullUniformsGPU) == 272, "CullUniformsGPU must be 272 bytes (std140)");
|
||||
|
||||
// Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output)
|
||||
VkDescriptorSetLayoutBinding bindings[3] = {};
|
||||
|
|
@ -338,6 +338,54 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
cullComp.destroy();
|
||||
}
|
||||
|
||||
// HiZ-aware cull pipeline (Phase 6.3 Option B)
|
||||
// Uses set 0 (same as frustum-only) + set 1 (HiZ pyramid sampler from HiZSystem).
|
||||
// The HiZ descriptor set layout is created lazily when hizSystem_ is set, but the
|
||||
// pipeline layout and shader are created now if the shader is available.
|
||||
rendering::VkShaderModule cullHiZComp;
|
||||
if (cullHiZComp.loadFromFile(device, "assets/shaders/m2_cull_hiz.comp.spv")) {
|
||||
// HiZ cull set 1 layout: single combined image sampler (the HiZ pyramid)
|
||||
VkDescriptorSetLayoutBinding hizBinding{};
|
||||
hizBinding.binding = 0;
|
||||
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
hizBinding.descriptorCount = 1;
|
||||
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayout hizSamplerLayout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
||||
hizLayoutCi.bindingCount = 1;
|
||||
hizLayoutCi.pBindings = &hizBinding;
|
||||
vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSamplerLayout);
|
||||
|
||||
VkDescriptorSetLayout hizSetLayouts[2] = {cullSetLayout_, hizSamplerLayout};
|
||||
VkPipelineLayoutCreateInfo hizPlCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||
hizPlCi.setLayoutCount = 2;
|
||||
hizPlCi.pSetLayouts = hizSetLayouts;
|
||||
vkCreatePipelineLayout(device, &hizPlCi, nullptr, &cullHiZPipelineLayout_);
|
||||
|
||||
VkComputePipelineCreateInfo hizCpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
||||
hizCpCi.stage = cullHiZComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
hizCpCi.layout = cullHiZPipelineLayout_;
|
||||
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &hizCpCi, nullptr, &cullHiZPipeline_) != VK_SUCCESS) {
|
||||
LOG_WARNING("M2Renderer: failed to create HiZ cull compute pipeline — HiZ disabled");
|
||||
cullHiZPipeline_ = VK_NULL_HANDLE;
|
||||
vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr);
|
||||
cullHiZPipelineLayout_ = VK_NULL_HANDLE;
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: HiZ occlusion cull pipeline created");
|
||||
}
|
||||
|
||||
// The hizSamplerLayout is now owned by the pipeline layout; we don't track it
|
||||
// separately because the pipeline layout keeps a ref. But actually Vulkan
|
||||
// requires us to keep it alive. Store it where HiZSystem will provide it.
|
||||
// For now, we can destroy it since the pipeline layout was already created.
|
||||
vkDestroyDescriptorSetLayout(device, hizSamplerLayout, nullptr);
|
||||
|
||||
cullHiZComp.destroy();
|
||||
} else {
|
||||
LOG_INFO("M2Renderer: m2_cull_hiz.comp.spv not found — HiZ occlusion culling not available");
|
||||
}
|
||||
|
||||
// Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO)
|
||||
VkDescriptorPoolSize poolSizes[2] = {};
|
||||
poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2};
|
||||
|
|
@ -756,6 +804,14 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
|
|||
return true;
|
||||
}
|
||||
|
||||
void M2Renderer::invalidateCullOutput(uint32_t frameIndex) {
|
||||
// On non-HOST_COHERENT memory, VMA-mapped GPU→CPU buffers need explicit
|
||||
// invalidation so the CPU cache sees the latest GPU writes.
|
||||
if (frameIndex < 2 && cullOutputAlloc_[frameIndex]) {
|
||||
vmaInvalidateAllocation(vkCtx_->getAllocator(), cullOutputAlloc_[frameIndex], 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
void M2Renderer::shutdown() {
|
||||
LOG_INFO("Shutting down M2 renderer...");
|
||||
if (!vkCtx_) return;
|
||||
|
|
@ -837,6 +893,8 @@ void M2Renderer::shutdown() {
|
|||
if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; }
|
||||
|
||||
// GPU frustum culling compute pipeline + buffers cleanup
|
||||
if (cullHiZPipeline_) { vkDestroyPipeline(device, cullHiZPipeline_, nullptr); cullHiZPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullHiZPipelineLayout_) { vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr); cullHiZPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; }
|
||||
if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; }
|
||||
for (int i = 0; i < 2; i++) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "rendering/m2_renderer.hpp"
|
||||
#include "rendering/m2_renderer_internal.h"
|
||||
#include "rendering/m2_model_classifier.hpp"
|
||||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/vk_context.hpp"
|
||||
#include "rendering/vk_buffer.hpp"
|
||||
#include "rendering/vk_texture.hpp"
|
||||
|
|
@ -600,6 +601,49 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
}
|
||||
ubo->cameraPos = glm::vec4(camPos, maxPossibleDistSq);
|
||||
ubo->instanceCount = numInstances;
|
||||
|
||||
// HiZ occlusion culling fields
|
||||
const bool hizReady = hizSystem_ && hizSystem_->isReady();
|
||||
|
||||
// Auto-disable HiZ when the camera has moved/rotated significantly.
|
||||
// Large VP changes make the depth pyramid unreliable because the
|
||||
// reprojected screen positions diverge from the actual pyramid data.
|
||||
bool hizSafe = hizReady;
|
||||
if (hizReady) {
|
||||
// Compare current VP against previous VP — Frobenius-style max diff.
|
||||
float maxDiff = 0.0f;
|
||||
const float* curM = &vp[0][0];
|
||||
const float* prevM = &prevVP_[0][0];
|
||||
for (int k = 0; k < 16; ++k)
|
||||
maxDiff = std::max(maxDiff, std::abs(curM[k] - prevM[k]));
|
||||
// Threshold: typical small camera motion produces diffs < 0.05.
|
||||
// A fast rotation easily exceeds 0.3. Skip HiZ when diff is large.
|
||||
if (maxDiff > 0.15f) hizSafe = false;
|
||||
}
|
||||
|
||||
ubo->hizEnabled = hizSafe ? 1u : 0u;
|
||||
ubo->hizMipLevels = hizReady ? hizSystem_->getMipLevels() : 0u;
|
||||
ubo->_pad2 = 0;
|
||||
if (hizReady) {
|
||||
ubo->hizParams = glm::vec4(
|
||||
static_cast<float>(hizSystem_->getPyramidWidth()),
|
||||
static_cast<float>(hizSystem_->getPyramidHeight()),
|
||||
camera.getNearPlane(),
|
||||
0.0f
|
||||
);
|
||||
ubo->viewProj = vp;
|
||||
// Use previous frame's VP for HiZ reprojection — the HiZ pyramid
|
||||
// was built from the previous frame's depth, so we must project
|
||||
// into the same screen space to sample the correct depths.
|
||||
ubo->prevViewProj = prevVP_;
|
||||
} else {
|
||||
ubo->hizParams = glm::vec4(0.0f);
|
||||
ubo->viewProj = glm::mat4(1.0f);
|
||||
ubo->prevViewProj = glm::mat4(1.0f);
|
||||
}
|
||||
|
||||
// Save current VP for next frame's temporal reprojection
|
||||
prevVP_ = vp;
|
||||
}
|
||||
|
||||
// --- Upload per-instance cull data (SSBO, binding 1) ---
|
||||
|
|
@ -622,6 +666,10 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
if (inst.cachedIsValid) flags |= 1u;
|
||||
if (inst.cachedIsSmoke) flags |= 2u;
|
||||
if (inst.cachedIsInvisibleTrap) flags |= 4u;
|
||||
// Bit 3: previouslyVisible — the shader skips HiZ for objects
|
||||
// that were NOT rendered last frame (no reliable depth data).
|
||||
if (i < prevFrameVisible_.size() && prevFrameVisible_[i])
|
||||
flags |= 8u;
|
||||
|
||||
input[i].sphere = glm::vec4(inst.position, paddedRadius);
|
||||
input[i].effectiveMaxDistSq = effectiveMaxDistSq;
|
||||
|
|
@ -630,9 +678,22 @@ void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, c
|
|||
}
|
||||
|
||||
// --- Dispatch compute shader ---
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
const bool useHiZ = (cullHiZPipeline_ != VK_NULL_HANDLE)
|
||||
&& hizSystem_ && hizSystem_->isReady();
|
||||
if (useHiZ) {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullHiZPipeline_);
|
||||
// Set 0: cull UBO + input/output SSBOs
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullHiZPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
// Set 1: HiZ pyramid sampler
|
||||
VkDescriptorSet hizSet = hizSystem_->getDescriptorSet(frameIndex);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullHiZPipelineLayout_, 1, 1, &hizSet, 0, nullptr);
|
||||
} else {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_);
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr);
|
||||
}
|
||||
|
||||
const uint32_t groupCount = (numInstances + 63) / 64;
|
||||
vkCmdDispatch(cmd, groupCount, 1, 1);
|
||||
|
|
@ -693,6 +754,19 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
const uint32_t* visibility = static_cast<const uint32_t*>(cullOutputMapped_[frameIndex]);
|
||||
const bool gpuCullAvailable = (cullPipeline_ != VK_NULL_HANDLE && visibility != nullptr);
|
||||
|
||||
// Snapshot the GPU visibility results into prevFrameVisible_ so the NEXT
|
||||
// frame's compute dispatch can set the per-instance `previouslyVisible`
|
||||
// flag (bit 3). Objects not visible this frame will skip HiZ next frame,
|
||||
// avoiding false culls from stale depth data.
|
||||
if (gpuCullAvailable) {
|
||||
prevFrameVisible_.resize(numInstances);
|
||||
for (uint32_t i = 0; i < numInstances; ++i)
|
||||
prevFrameVisible_[i] = visibility[i] ? 1u : 0u;
|
||||
} else {
|
||||
// No GPU cull data — conservatively mark all as visible
|
||||
prevFrameVisible_.assign(static_cast<size_t>(instances.size()), 1u);
|
||||
}
|
||||
|
||||
// If GPU culling was not dispatched, fallback: compute distances on CPU
|
||||
float maxRenderDistanceSq;
|
||||
if (!gpuCullAvailable) {
|
||||
|
|
@ -1074,7 +1148,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
// Update material UBO
|
||||
if (batch.materialUBOMapped) {
|
||||
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
|
||||
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
|
||||
// interiorDarken is a camera-based flag — it darkens ALL M2s (incl.
|
||||
// outdoor trees) when the camera is inside a WMO. Disable it; indoor
|
||||
// M2s already look correct from the darker ambient/lighting.
|
||||
mat->interiorDarken = 0.0f;
|
||||
if (batch.colorKeyBlack)
|
||||
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
|
||||
if (forceCutout) {
|
||||
|
|
@ -1265,7 +1342,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
|
||||
if (batch.materialUBOMapped) {
|
||||
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
|
||||
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
|
||||
mat->interiorDarken = 0.0f;
|
||||
if (batch.colorKeyBlack)
|
||||
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include "rendering/character_preview.hpp"
|
||||
#include "rendering/wmo_renderer.hpp"
|
||||
#include "rendering/m2_renderer.hpp"
|
||||
#include "rendering/hiz_system.hpp"
|
||||
#include "rendering/minimap.hpp"
|
||||
#include "rendering/world_map.hpp"
|
||||
#include "rendering/quest_marker_renderer.hpp"
|
||||
|
|
@ -580,7 +581,6 @@ bool Renderer::initialize(core::Window* win) {
|
|||
overlaySystem_ = std::make_unique<OverlaySystem>(vkCtx);
|
||||
renderGraph_->registerResource("shadow_depth");
|
||||
renderGraph_->registerResource("reflection_texture");
|
||||
renderGraph_->registerResource("cull_visibility");
|
||||
renderGraph_->registerResource("scene_color");
|
||||
renderGraph_->registerResource("scene_depth");
|
||||
renderGraph_->registerResource("final_image");
|
||||
|
|
@ -672,6 +672,10 @@ void Renderer::shutdown() {
|
|||
}
|
||||
|
||||
LOG_DEBUG("Renderer::shutdown - m2Renderer...");
|
||||
if (hizSystem_) {
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
if (m2Renderer) {
|
||||
m2Renderer->shutdown();
|
||||
m2Renderer.reset();
|
||||
|
|
@ -798,6 +802,17 @@ void Renderer::applyMsaaChange() {
|
|||
|
||||
if (minimap) minimap->recreatePipelines();
|
||||
|
||||
// Resize HiZ pyramid (depth format/MSAA may have changed)
|
||||
if (hizSystem_) {
|
||||
auto ext = vkCtx->getSwapchainExtent();
|
||||
if (!hizSystem_->resize(ext.width, ext.height)) {
|
||||
LOG_WARNING("HiZ resize failed after MSAA change");
|
||||
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// Selection circle + overlay + FSR use lazy init, just destroy them
|
||||
if (overlaySystem_) overlaySystem_->recreatePipelines();
|
||||
if (postProcessPipeline_) postProcessPipeline_->destroyAllResources(); // Will be lazily recreated in beginFrame()
|
||||
|
|
@ -846,6 +861,16 @@ void Renderer::beginFrame() {
|
|||
}
|
||||
// Recreate post-process resources for new swapchain dimensions
|
||||
if (postProcessPipeline_) postProcessPipeline_->handleSwapchainResize();
|
||||
// Resize HiZ depth pyramid for new swapchain dimensions
|
||||
if (hizSystem_) {
|
||||
auto ext = vkCtx->getSwapchainExtent();
|
||||
if (!hizSystem_->resize(ext.width, ext.height)) {
|
||||
LOG_WARNING("HiZ resize failed — disabling occlusion culling");
|
||||
if (m2Renderer) m2Renderer->setHiZSystem(nullptr);
|
||||
hizSystem_->shutdown();
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Acquire swapchain image and begin command buffer
|
||||
|
|
@ -864,6 +889,31 @@ void Renderer::beginFrame() {
|
|||
// Update per-frame UBO with current camera/lighting state
|
||||
updatePerFrameUBO();
|
||||
|
||||
// ── Early compute: HiZ pyramid build + M2 frustum/occlusion cull ──
|
||||
// These run in a SEPARATE command buffer submission so the GPU executes
|
||||
// them immediately. The CPU then reads the fresh visibility results
|
||||
// before recording the main render pass — eliminating the 2-frame
|
||||
// staleness that occurs when compute + render share one submission.
|
||||
if (m2Renderer && camera && vkCtx) {
|
||||
VkCommandBuffer computeCmd = vkCtx->beginSingleTimeCommands();
|
||||
uint32_t frame = vkCtx->getCurrentFrame();
|
||||
|
||||
// Build HiZ depth pyramid from previous frame's depth buffer
|
||||
if (hizSystem_ && hizSystem_->isReady()) {
|
||||
VkImage depthSrc = vkCtx->getDepthCopySourceImage();
|
||||
hizSystem_->buildPyramid(computeCmd, frame, depthSrc);
|
||||
}
|
||||
|
||||
// Dispatch GPU frustum + HiZ occlusion culling
|
||||
m2Renderer->dispatchCullCompute(computeCmd, frame, *camera);
|
||||
|
||||
vkCtx->endSingleTimeCommands(computeCmd);
|
||||
|
||||
// Ensure GPU→CPU buffer writes are visible to host (non-coherent memory).
|
||||
m2Renderer->invalidateCullOutput(frame);
|
||||
// Visibility results are now in cullOutputMapped_[frame], readable by CPU.
|
||||
}
|
||||
|
||||
// --- Off-screen pre-passes ---
|
||||
// Build frame graph: registers pre-passes as graph nodes with dependencies.
|
||||
// compile() topologically sorts; execute() runs them with auto barriers.
|
||||
|
|
@ -1489,7 +1539,9 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
|
|||
if (parallelRecordingEnabled_) {
|
||||
// --- Pre-compute state + GPU allocations on main thread (not thread-safe) ---
|
||||
if (m2Renderer && cameraController) {
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
|
||||
// Use isInsideInteriorWMO (flag 0x2000) — not isInsideWMO which includes
|
||||
// outdoor WMO groups like archways/bridges that should receive shadows.
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
|
||||
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
|
||||
}
|
||||
if (wmoRenderer) wmoRenderer->prepareRender();
|
||||
|
|
@ -1734,7 +1786,8 @@ void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) {
|
|||
|
||||
if (m2Renderer && camera && !skipM2) {
|
||||
if (cameraController) {
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideWMO());
|
||||
// Use isInsideInteriorWMO (flag 0x2000) for correct indoor detection
|
||||
m2Renderer->setInsideInterior(cameraController->isInsideInteriorWMO());
|
||||
m2Renderer->setOnTaxi(cameraController->isOnTaxi());
|
||||
}
|
||||
m2Renderer->prepareRender(frameIdx, *camera);
|
||||
|
|
@ -1887,6 +1940,23 @@ bool Renderer::initializeRenderers(pipeline::AssetManager* assetManager, const s
|
|||
spellVisualSystem_->initialize(m2Renderer.get());
|
||||
}
|
||||
}
|
||||
|
||||
// HiZ occlusion culling — temporal reprojection.
|
||||
// The HiZ pyramid is built from the previous frame's depth buffer. The cull
|
||||
// compute shader uses prevViewProj to project objects into the previous frame's
|
||||
// screen space so that depth samples match the pyramid, eliminating flicker
|
||||
// caused by camera movement between frames.
|
||||
if (!hizSystem_ && m2Renderer && vkCtx) {
|
||||
hizSystem_ = std::make_unique<HiZSystem>();
|
||||
auto extent = vkCtx->getSwapchainExtent();
|
||||
if (hizSystem_->initialize(vkCtx, extent.width, extent.height)) {
|
||||
m2Renderer->setHiZSystem(hizSystem_.get());
|
||||
LOG_INFO("HiZ occlusion culling initialized (", extent.width, "x", extent.height, ")");
|
||||
} else {
|
||||
LOG_WARNING("HiZ occlusion culling unavailable — falling back to frustum-only culling");
|
||||
hizSystem_.reset();
|
||||
}
|
||||
}
|
||||
if (!wmoRenderer) {
|
||||
wmoRenderer = std::make_unique<WMORenderer>();
|
||||
wmoRenderer->initialize(vkCtx, perFrameSetLayout, assetManager);
|
||||
|
|
@ -2627,7 +2697,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
|
|||
|
||||
auto shadowDepth = renderGraph_->findResource("shadow_depth");
|
||||
auto reflTex = renderGraph_->findResource("reflection_texture");
|
||||
auto cullVis = renderGraph_->findResource("cull_visibility");
|
||||
|
||||
// Minimap composites (no dependencies — standalone off-screen render target)
|
||||
renderGraph_->addPass("minimap_composite", {}, {},
|
||||
|
|
@ -2670,13 +2739,6 @@ void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
|
|||
renderReflectionPass();
|
||||
});
|
||||
|
||||
// GPU frustum cull compute → outputs cull_visibility
|
||||
renderGraph_->addPass("compute_cull", {}, {cullVis},
|
||||
[this](VkCommandBuffer cmd) {
|
||||
if (m2Renderer && camera)
|
||||
m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera);
|
||||
});
|
||||
|
||||
renderGraph_->compile();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -798,7 +798,8 @@ bool VkContext::createDepthBuffer() {
|
|||
imgInfo.arrayLayers = 1;
|
||||
imgInfo.samples = msaaSamples_;
|
||||
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
|
||||
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
|
||||
|
||||
VmaAllocationCreateInfo allocInfo{};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
|
@ -911,7 +912,8 @@ bool VkContext::createDepthResolveImage() {
|
|||
imgInfo.arrayLayers = 1;
|
||||
imgInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
||||
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
|
||||
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
|
||||
|
||||
VmaAllocationCreateInfo allocInfo{};
|
||||
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue