mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-04-26 21:13:51 +00:00
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
This commit is contained in:
parent
ca3cea078b
commit
d54e262048
22 changed files with 1579 additions and 494 deletions
|
|
@ -128,7 +128,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
|
|||
vertexAttribs[3] = { 3, 0, VK_FORMAT_R32G32_SFLOAT,
|
||||
static_cast<uint32_t>(offsetof(pipeline::TerrainVertex, layerUV)) };
|
||||
|
||||
// --- Build fill pipeline ---
|
||||
// --- Build fill pipeline (base for derivatives — shared state optimization) ---
|
||||
VkRenderPass mainPass = vkCtx->getImGuiRenderPass();
|
||||
|
||||
pipeline = PipelineBuilder()
|
||||
|
|
@ -143,6 +143,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
|
|||
.setLayout(pipelineLayout)
|
||||
.setRenderPass(mainPass)
|
||||
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
|
||||
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
|
||||
.build(device, vkCtx->getPipelineCache());
|
||||
|
||||
if (!pipeline) {
|
||||
|
|
@ -152,7 +153,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
|
|||
return false;
|
||||
}
|
||||
|
||||
// --- Build wireframe pipeline ---
|
||||
// --- Build wireframe pipeline (derivative of fill) ---
|
||||
wireframePipeline = PipelineBuilder()
|
||||
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
|
||||
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
|
||||
|
|
@ -165,6 +166,8 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
|
|||
.setLayout(pipelineLayout)
|
||||
.setRenderPass(mainPass)
|
||||
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
|
||||
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
|
||||
.setBasePipeline(pipeline)
|
||||
.build(device, vkCtx->getPipelineCache());
|
||||
|
||||
if (!wireframePipeline) {
|
||||
|
|
@ -190,6 +193,64 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
|
|||
envSizeMBOrDefault("WOWEE_TERRAIN_TEX_CACHE_MB", 4096) * 1024ull * 1024ull;
|
||||
LOG_INFO("Terrain texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB");
|
||||
|
||||
// Phase 2.2: Allocate mega vertex/index buffers and indirect draw buffer.
|
||||
// All terrain chunks share these buffers, eliminating per-chunk VB/IB rebinds.
|
||||
{
|
||||
VmaAllocator allocator = vkCtx->getAllocator();
|
||||
|
||||
// Mega vertex buffer (host-visible for direct write during chunk upload)
|
||||
VkBufferCreateInfo vbCI{};
|
||||
vbCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
vbCI.size = static_cast<VkDeviceSize>(MEGA_VB_MAX_VERTS) * sizeof(pipeline::TerrainVertex);
|
||||
vbCI.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
||||
VmaAllocationCreateInfo vbAllocCI{};
|
||||
vbAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
vbAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
VmaAllocationInfo vbInfo{};
|
||||
if (vmaCreateBuffer(allocator, &vbCI, &vbAllocCI,
|
||||
&megaVB_, &megaVBAlloc_, &vbInfo) == VK_SUCCESS) {
|
||||
megaVBMapped_ = vbInfo.pMappedData;
|
||||
} else {
|
||||
LOG_WARNING("TerrainRenderer: mega VB allocation failed, per-chunk fallback");
|
||||
}
|
||||
|
||||
// Mega index buffer
|
||||
VkBufferCreateInfo ibCI{};
|
||||
ibCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
ibCI.size = static_cast<VkDeviceSize>(MEGA_IB_MAX_INDICES) * sizeof(uint32_t);
|
||||
ibCI.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
|
||||
VmaAllocationCreateInfo ibAllocCI{};
|
||||
ibAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
ibAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
VmaAllocationInfo ibInfo{};
|
||||
if (vmaCreateBuffer(allocator, &ibCI, &ibAllocCI,
|
||||
&megaIB_, &megaIBAlloc_, &ibInfo) == VK_SUCCESS) {
|
||||
megaIBMapped_ = ibInfo.pMappedData;
|
||||
} else {
|
||||
LOG_WARNING("TerrainRenderer: mega IB allocation failed, per-chunk fallback");
|
||||
}
|
||||
|
||||
// Indirect draw command buffer
|
||||
VkBufferCreateInfo indCI{};
|
||||
indCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
indCI.size = MAX_INDIRECT_DRAWS * sizeof(VkDrawIndexedIndirectCommand);
|
||||
indCI.usage = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
|
||||
VmaAllocationCreateInfo indAllocCI{};
|
||||
indAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
indAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
VmaAllocationInfo indInfo{};
|
||||
if (vmaCreateBuffer(allocator, &indCI, &indAllocCI,
|
||||
&indirectBuffer_, &indirectAlloc_, &indInfo) == VK_SUCCESS) {
|
||||
indirectMapped_ = indInfo.pMappedData;
|
||||
} else {
|
||||
LOG_WARNING("TerrainRenderer: indirect buffer allocation failed");
|
||||
}
|
||||
|
||||
LOG_INFO("Terrain mega buffers: VB=", vbCI.size / (1024*1024), "MB IB=",
|
||||
ibCI.size / (1024*1024), "MB indirect=",
|
||||
indCI.size / 1024, "KB");
|
||||
}
|
||||
|
||||
LOG_INFO("Terrain renderer initialized (Vulkan)");
|
||||
return true;
|
||||
}
|
||||
|
|
@ -232,7 +293,7 @@ void TerrainRenderer::recreatePipelines() {
|
|||
|
||||
VkRenderPass mainPass = vkCtx->getImGuiRenderPass();
|
||||
|
||||
// Rebuild fill pipeline
|
||||
// Rebuild fill pipeline (base for derivatives — shared state optimization)
|
||||
pipeline = PipelineBuilder()
|
||||
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
|
||||
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
|
||||
|
|
@ -245,13 +306,14 @@ void TerrainRenderer::recreatePipelines() {
|
|||
.setLayout(pipelineLayout)
|
||||
.setRenderPass(mainPass)
|
||||
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
|
||||
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
|
||||
.build(device, vkCtx->getPipelineCache());
|
||||
|
||||
if (!pipeline) {
|
||||
LOG_ERROR("TerrainRenderer::recreatePipelines: failed to create fill pipeline");
|
||||
}
|
||||
|
||||
// Rebuild wireframe pipeline
|
||||
// Rebuild wireframe pipeline (derivative of fill)
|
||||
wireframePipeline = PipelineBuilder()
|
||||
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
|
||||
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
|
||||
|
|
@ -264,6 +326,8 @@ void TerrainRenderer::recreatePipelines() {
|
|||
.setLayout(pipelineLayout)
|
||||
.setRenderPass(mainPass)
|
||||
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
|
||||
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
|
||||
.setBasePipeline(pipeline)
|
||||
.build(device, vkCtx->getPipelineCache());
|
||||
|
||||
if (!wireframePipeline) {
|
||||
|
|
@ -311,6 +375,13 @@ void TerrainRenderer::shutdown() {
|
|||
if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; }
|
||||
if (shadowParamsUBO_) { vmaDestroyBuffer(allocator, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; shadowParamsAlloc_ = VK_NULL_HANDLE; }
|
||||
|
||||
// Phase 2.2: Destroy mega buffers and indirect draw buffer
|
||||
if (megaVB_) { vmaDestroyBuffer(allocator, megaVB_, megaVBAlloc_); megaVB_ = VK_NULL_HANDLE; megaVBAlloc_ = VK_NULL_HANDLE; megaVBMapped_ = nullptr; }
|
||||
if (megaIB_) { vmaDestroyBuffer(allocator, megaIB_, megaIBAlloc_); megaIB_ = VK_NULL_HANDLE; megaIBAlloc_ = VK_NULL_HANDLE; megaIBMapped_ = nullptr; }
|
||||
if (indirectBuffer_) { vmaDestroyBuffer(allocator, indirectBuffer_, indirectAlloc_); indirectBuffer_ = VK_NULL_HANDLE; indirectAlloc_ = VK_NULL_HANDLE; indirectMapped_ = nullptr; }
|
||||
megaVBUsed_ = 0;
|
||||
megaIBUsed_ = 0;
|
||||
|
||||
vkCtx = nullptr;
|
||||
}
|
||||
|
||||
|
|
@ -537,6 +608,7 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
|
|||
gpuChunk.worldY = chunk.worldY;
|
||||
gpuChunk.worldZ = chunk.worldZ;
|
||||
gpuChunk.indexCount = static_cast<uint32_t>(chunk.indices.size());
|
||||
gpuChunk.vertexCount = static_cast<uint32_t>(chunk.vertices.size());
|
||||
|
||||
VkDeviceSize vbSize = chunk.vertices.size() * sizeof(pipeline::TerrainVertex);
|
||||
AllocatedBuffer vb = uploadBuffer(*vkCtx, chunk.vertices.data(), vbSize,
|
||||
|
|
@ -550,6 +622,25 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
|
|||
gpuChunk.indexBuffer = ib.buffer;
|
||||
gpuChunk.indexAlloc = ib.allocation;
|
||||
|
||||
// Phase 2.2: Also copy into mega buffers for indirect drawing
|
||||
uint32_t vertCount = static_cast<uint32_t>(chunk.vertices.size());
|
||||
uint32_t idxCount = static_cast<uint32_t>(chunk.indices.size());
|
||||
if (megaVBMapped_ && megaIBMapped_ &&
|
||||
megaVBUsed_ + vertCount <= MEGA_VB_MAX_VERTS &&
|
||||
megaIBUsed_ + idxCount <= MEGA_IB_MAX_INDICES) {
|
||||
// Copy vertices
|
||||
auto* vbDst = static_cast<pipeline::TerrainVertex*>(megaVBMapped_) + megaVBUsed_;
|
||||
std::memcpy(vbDst, chunk.vertices.data(), vertCount * sizeof(pipeline::TerrainVertex));
|
||||
// Copy indices
|
||||
auto* ibDst = static_cast<uint32_t*>(megaIBMapped_) + megaIBUsed_;
|
||||
std::memcpy(ibDst, chunk.indices.data(), idxCount * sizeof(uint32_t));
|
||||
|
||||
gpuChunk.megaBaseVertex = static_cast<int32_t>(megaVBUsed_);
|
||||
gpuChunk.megaFirstIndex = megaIBUsed_;
|
||||
megaVBUsed_ += vertCount;
|
||||
megaIBUsed_ += idxCount;
|
||||
}
|
||||
|
||||
return gpuChunk;
|
||||
}
|
||||
|
||||
|
|
@ -789,6 +880,15 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c
|
|||
renderedChunks = 0;
|
||||
culledChunks = 0;
|
||||
|
||||
// Phase 2.2: Use mega VB + IB when available.
|
||||
// Bind mega buffers once, then use direct draws with base vertex/index offsets.
|
||||
const bool useMegaBuffers = (megaVB_ && megaIB_);
|
||||
if (useMegaBuffers) {
|
||||
VkDeviceSize megaOffset = 0;
|
||||
vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset);
|
||||
vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32);
|
||||
}
|
||||
|
||||
for (const auto& chunk : chunks) {
|
||||
if (!chunk.isValid() || !chunk.materialSet) continue;
|
||||
|
||||
|
|
@ -808,11 +908,17 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c
|
|||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout,
|
||||
1, 1, &chunk.materialSet, 0, nullptr);
|
||||
|
||||
VkDeviceSize offset = 0;
|
||||
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
|
||||
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
|
||||
|
||||
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
|
||||
if (useMegaBuffers && chunk.megaBaseVertex >= 0) {
|
||||
// Direct draw from mega buffer — single VB/IB already bound
|
||||
vkCmdDrawIndexed(cmd, chunk.indexCount, 1,
|
||||
chunk.megaFirstIndex, chunk.megaBaseVertex, 0);
|
||||
} else {
|
||||
// Fallback: per-chunk VB/IB bind + direct draw
|
||||
VkDeviceSize offset = 0;
|
||||
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
|
||||
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
|
||||
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
|
||||
}
|
||||
renderedChunks++;
|
||||
}
|
||||
|
||||
|
|
@ -986,6 +1092,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp
|
|||
vkCmdPushConstants(cmd, shadowPipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT,
|
||||
0, 128, &push);
|
||||
|
||||
// Phase 2.2: Bind mega buffers once for shadow pass (same as opaque)
|
||||
const bool useMegaShadow = (megaVB_ && megaIB_);
|
||||
if (useMegaShadow) {
|
||||
VkDeviceSize megaOffset = 0;
|
||||
vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset);
|
||||
vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32);
|
||||
}
|
||||
|
||||
for (const auto& chunk : chunks) {
|
||||
if (!chunk.isValid()) continue;
|
||||
|
||||
|
|
@ -995,10 +1109,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp
|
|||
float combinedRadius = shadowRadius + chunk.boundingSphereRadius;
|
||||
if (distSq > combinedRadius * combinedRadius) continue;
|
||||
|
||||
VkDeviceSize offset = 0;
|
||||
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
|
||||
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT16);
|
||||
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
|
||||
if (useMegaShadow && chunk.megaBaseVertex >= 0) {
|
||||
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, chunk.megaFirstIndex, chunk.megaBaseVertex, 0);
|
||||
} else {
|
||||
VkDeviceSize offset = 0;
|
||||
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
|
||||
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
|
||||
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue