feat(rendering): GPU architecture + visual quality fixes

M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex

Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
  host-mapped buffer race condition that caused terrain flickering

GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node

Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot

Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
  shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)

Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers

Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
  one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
  to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
  view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
  eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
  M2 render distance (2800u) and eliminate pop-in when camera turns;
  unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
  early pop of grass and debris
This commit is contained in:
Paul 2026-04-04 13:43:16 +03:00
parent ca3cea078b
commit d54e262048
22 changed files with 1579 additions and 494 deletions

View file

@ -128,7 +128,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
vertexAttribs[3] = { 3, 0, VK_FORMAT_R32G32_SFLOAT,
static_cast<uint32_t>(offsetof(pipeline::TerrainVertex, layerUV)) };
// --- Build fill pipeline ---
// --- Build fill pipeline (base for derivatives — shared state optimization) ---
VkRenderPass mainPass = vkCtx->getImGuiRenderPass();
pipeline = PipelineBuilder()
@ -143,6 +143,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx->getPipelineCache());
if (!pipeline) {
@ -152,7 +153,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
return false;
}
// --- Build wireframe pipeline ---
// --- Build wireframe pipeline (derivative of fill) ---
wireframePipeline = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -165,6 +166,8 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(pipeline)
.build(device, vkCtx->getPipelineCache());
if (!wireframePipeline) {
@ -190,6 +193,64 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
envSizeMBOrDefault("WOWEE_TERRAIN_TEX_CACHE_MB", 4096) * 1024ull * 1024ull;
LOG_INFO("Terrain texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB");
// Phase 2.2: Allocate mega vertex/index buffers and indirect draw buffer.
// All terrain chunks share these buffers, eliminating per-chunk VB/IB rebinds.
{
VmaAllocator allocator = vkCtx->getAllocator();
// Mega vertex buffer (host-visible for direct write during chunk upload)
VkBufferCreateInfo vbCI{};
vbCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
vbCI.size = static_cast<VkDeviceSize>(MEGA_VB_MAX_VERTS) * sizeof(pipeline::TerrainVertex);
vbCI.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VmaAllocationCreateInfo vbAllocCI{};
vbAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
vbAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo vbInfo{};
if (vmaCreateBuffer(allocator, &vbCI, &vbAllocCI,
&megaVB_, &megaVBAlloc_, &vbInfo) == VK_SUCCESS) {
megaVBMapped_ = vbInfo.pMappedData;
} else {
LOG_WARNING("TerrainRenderer: mega VB allocation failed, per-chunk fallback");
}
// Mega index buffer
VkBufferCreateInfo ibCI{};
ibCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
ibCI.size = static_cast<VkDeviceSize>(MEGA_IB_MAX_INDICES) * sizeof(uint32_t);
ibCI.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
VmaAllocationCreateInfo ibAllocCI{};
ibAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
ibAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo ibInfo{};
if (vmaCreateBuffer(allocator, &ibCI, &ibAllocCI,
&megaIB_, &megaIBAlloc_, &ibInfo) == VK_SUCCESS) {
megaIBMapped_ = ibInfo.pMappedData;
} else {
LOG_WARNING("TerrainRenderer: mega IB allocation failed, per-chunk fallback");
}
// Indirect draw command buffer
VkBufferCreateInfo indCI{};
indCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
indCI.size = MAX_INDIRECT_DRAWS * sizeof(VkDrawIndexedIndirectCommand);
indCI.usage = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
VmaAllocationCreateInfo indAllocCI{};
indAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
indAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo indInfo{};
if (vmaCreateBuffer(allocator, &indCI, &indAllocCI,
&indirectBuffer_, &indirectAlloc_, &indInfo) == VK_SUCCESS) {
indirectMapped_ = indInfo.pMappedData;
} else {
LOG_WARNING("TerrainRenderer: indirect buffer allocation failed");
}
LOG_INFO("Terrain mega buffers: VB=", vbCI.size / (1024*1024), "MB IB=",
ibCI.size / (1024*1024), "MB indirect=",
indCI.size / 1024, "KB");
}
LOG_INFO("Terrain renderer initialized (Vulkan)");
return true;
}
@ -232,7 +293,7 @@ void TerrainRenderer::recreatePipelines() {
VkRenderPass mainPass = vkCtx->getImGuiRenderPass();
// Rebuild fill pipeline
// Rebuild fill pipeline (base for derivatives — shared state optimization)
pipeline = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -245,13 +306,14 @@ void TerrainRenderer::recreatePipelines() {
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx->getPipelineCache());
if (!pipeline) {
LOG_ERROR("TerrainRenderer::recreatePipelines: failed to create fill pipeline");
}
// Rebuild wireframe pipeline
// Rebuild wireframe pipeline (derivative of fill)
wireframePipeline = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -264,6 +326,8 @@ void TerrainRenderer::recreatePipelines() {
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(pipeline)
.build(device, vkCtx->getPipelineCache());
if (!wireframePipeline) {
@ -311,6 +375,13 @@ void TerrainRenderer::shutdown() {
if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; }
if (shadowParamsUBO_) { vmaDestroyBuffer(allocator, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; shadowParamsAlloc_ = VK_NULL_HANDLE; }
// Phase 2.2: Destroy mega buffers and indirect draw buffer
if (megaVB_) { vmaDestroyBuffer(allocator, megaVB_, megaVBAlloc_); megaVB_ = VK_NULL_HANDLE; megaVBAlloc_ = VK_NULL_HANDLE; megaVBMapped_ = nullptr; }
if (megaIB_) { vmaDestroyBuffer(allocator, megaIB_, megaIBAlloc_); megaIB_ = VK_NULL_HANDLE; megaIBAlloc_ = VK_NULL_HANDLE; megaIBMapped_ = nullptr; }
if (indirectBuffer_) { vmaDestroyBuffer(allocator, indirectBuffer_, indirectAlloc_); indirectBuffer_ = VK_NULL_HANDLE; indirectAlloc_ = VK_NULL_HANDLE; indirectMapped_ = nullptr; }
megaVBUsed_ = 0;
megaIBUsed_ = 0;
vkCtx = nullptr;
}
@ -537,6 +608,7 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
gpuChunk.worldY = chunk.worldY;
gpuChunk.worldZ = chunk.worldZ;
gpuChunk.indexCount = static_cast<uint32_t>(chunk.indices.size());
gpuChunk.vertexCount = static_cast<uint32_t>(chunk.vertices.size());
VkDeviceSize vbSize = chunk.vertices.size() * sizeof(pipeline::TerrainVertex);
AllocatedBuffer vb = uploadBuffer(*vkCtx, chunk.vertices.data(), vbSize,
@ -550,6 +622,25 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
gpuChunk.indexBuffer = ib.buffer;
gpuChunk.indexAlloc = ib.allocation;
// Phase 2.2: Also copy into mega buffers for indirect drawing
uint32_t vertCount = static_cast<uint32_t>(chunk.vertices.size());
uint32_t idxCount = static_cast<uint32_t>(chunk.indices.size());
if (megaVBMapped_ && megaIBMapped_ &&
megaVBUsed_ + vertCount <= MEGA_VB_MAX_VERTS &&
megaIBUsed_ + idxCount <= MEGA_IB_MAX_INDICES) {
// Copy vertices
auto* vbDst = static_cast<pipeline::TerrainVertex*>(megaVBMapped_) + megaVBUsed_;
std::memcpy(vbDst, chunk.vertices.data(), vertCount * sizeof(pipeline::TerrainVertex));
// Copy indices
auto* ibDst = static_cast<uint32_t*>(megaIBMapped_) + megaIBUsed_;
std::memcpy(ibDst, chunk.indices.data(), idxCount * sizeof(uint32_t));
gpuChunk.megaBaseVertex = static_cast<int32_t>(megaVBUsed_);
gpuChunk.megaFirstIndex = megaIBUsed_;
megaVBUsed_ += vertCount;
megaIBUsed_ += idxCount;
}
return gpuChunk;
}
@ -789,6 +880,15 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c
renderedChunks = 0;
culledChunks = 0;
// Phase 2.2: Use mega VB + IB when available.
// Bind mega buffers once, then use direct draws with base vertex/index offsets.
const bool useMegaBuffers = (megaVB_ && megaIB_);
if (useMegaBuffers) {
VkDeviceSize megaOffset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset);
vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32);
}
for (const auto& chunk : chunks) {
if (!chunk.isValid() || !chunk.materialSet) continue;
@ -808,11 +908,17 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout,
1, 1, &chunk.materialSet, 0, nullptr);
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
if (useMegaBuffers && chunk.megaBaseVertex >= 0) {
// Direct draw from mega buffer — single VB/IB already bound
vkCmdDrawIndexed(cmd, chunk.indexCount, 1,
chunk.megaFirstIndex, chunk.megaBaseVertex, 0);
} else {
// Fallback: per-chunk VB/IB bind + direct draw
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
}
renderedChunks++;
}
@ -986,6 +1092,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp
vkCmdPushConstants(cmd, shadowPipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT,
0, 128, &push);
// Phase 2.2: Bind mega buffers once for shadow pass (same as opaque)
const bool useMegaShadow = (megaVB_ && megaIB_);
if (useMegaShadow) {
VkDeviceSize megaOffset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset);
vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32);
}
for (const auto& chunk : chunks) {
if (!chunk.isValid()) continue;
@ -995,10 +1109,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp
float combinedRadius = shadowRadius + chunk.boundingSphereRadius;
if (distSq > combinedRadius * combinedRadius) continue;
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT16);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
if (useMegaShadow && chunk.megaBaseVertex >= 0) {
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, chunk.megaFirstIndex, chunk.megaBaseVertex, 0);
} else {
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
}
}
}