feat(rendering): GPU architecture + visual quality fixes

M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex

Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
  host-mapped buffer race condition that caused terrain flickering

GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node

Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot

Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
  shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)

Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers

Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
  one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
  to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
  view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
  eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
  M2 render distance (2800u) and eliminate pop-in when camera turns;
  unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
  early pop of grass and debris
This commit is contained in:
Paul 2026-04-04 13:43:16 +03:00
parent ca3cea078b
commit d54e262048
22 changed files with 1579 additions and 494 deletions

View file

@ -734,9 +734,9 @@ void WorldLoader::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float
// Use a small radius for the initial load (just immediate tiles),
// then restore the full radius after entering the game.
// This matches WoW's behavior: load quickly, stream the rest in-game.
const int savedLoadRadius = 4;
terrainMgr->setLoadRadius(3); // 7x7=49 tiles — prevents hitches on spawn
terrainMgr->setUnloadRadius(7);
const int savedLoadRadius = 6;
terrainMgr->setLoadRadius(4); // 9x9=81 tiles — prevents hitches on spawn
terrainMgr->setUnloadRadius(9);
// Trigger tile streaming for surrounding area
terrainMgr->update(*camera, 1.0f);

View file

@ -111,13 +111,13 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) {
_NSGetExecutablePath(nullptr, &bufSize);
std::string exePath(bufSize, '\0');
_NSGetExecutablePath(exePath.data(), &bufSize);
chdir(dirname(exePath.data()));
if (chdir(dirname(exePath.data())) != 0) {}
}
#elif defined(__linux__)
{
char buf[4096];
ssize_t len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
if (len > 0) { buf[len] = '\0'; chdir(dirname(buf)); }
if (len > 0) { buf[len] = '\0'; if (chdir(dirname(buf)) != 0) {} }
}
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,194 @@
#include "rendering/render_graph.hpp"
#include "core/logger.hpp"
#include <algorithm>
#include <unordered_map>
#include <queue>
namespace wowee {
namespace rendering {
void RenderGraph::reset() {
passes_.clear();
executionOrder_.clear();
compiled_ = false;
// Keep resource registry — resources are stable across frames
}
RGResource RenderGraph::registerResource(const std::string& name) {
// Check for duplicate
for (const auto& r : resources_) {
if (r.name == name) return {r.id};
}
uint32_t id = nextResourceId_++;
resources_.push_back({name, id});
return {id};
}
RGResource RenderGraph::findResource(const std::string& name) const {
for (const auto& r : resources_) {
if (r.name == name) return {r.id};
}
return {}; // invalid
}
void RenderGraph::addPass(const std::string& name,
const std::vector<RGResource>& inputs,
const std::vector<RGResource>& outputs,
std::function<void(VkCommandBuffer cmd)> execute) {
RGPass pass;
pass.name = name;
pass.inputs = inputs;
pass.outputs = outputs;
pass.execute = std::move(execute);
pass.enabled = true;
passes_.push_back(std::move(pass));
}
void RenderGraph::setPassEnabled(const std::string& name, bool enabled) {
for (auto& pass : passes_) {
if (pass.name == name) {
pass.enabled = enabled;
return;
}
}
}
void RenderGraph::compile() {
topologicalSort();
compiled_ = true;
}
void RenderGraph::topologicalSort() {
const uint32_t n = static_cast<uint32_t>(passes_.size());
if (n == 0) { executionOrder_.clear(); return; }
// Build adjacency: if pass A outputs resource R and pass B inputs resource R,
// then A must execute before B (edge A → B).
// Map: resource id → index of pass that produces it
std::unordered_map<uint32_t, uint32_t> producer;
for (uint32_t i = 0; i < n; ++i) {
for (const auto& out : passes_[i].outputs) {
producer[out.id] = i;
}
}
// Build in-degree and adjacency list
std::vector<uint32_t> inDegree(n, 0);
std::vector<std::vector<uint32_t>> adj(n);
for (uint32_t i = 0; i < n; ++i) {
for (const auto& inp : passes_[i].inputs) {
auto it = producer.find(inp.id);
if (it != producer.end() && it->second != i) {
adj[it->second].push_back(i);
inDegree[i]++;
}
}
}
// Kahn's algorithm
std::queue<uint32_t> queue;
for (uint32_t i = 0; i < n; ++i) {
if (inDegree[i] == 0) queue.push(i);
}
executionOrder_.clear();
executionOrder_.reserve(n);
while (!queue.empty()) {
uint32_t u = queue.front();
queue.pop();
executionOrder_.push_back(u);
for (uint32_t v : adj[u]) {
if (--inDegree[v] == 0) queue.push(v);
}
}
// If not all passes are in the order, there's a cycle — fall back to insertion order
if (executionOrder_.size() != n) {
LOG_WARNING("RenderGraph: dependency cycle detected, falling back to insertion order");
executionOrder_.clear();
for (uint32_t i = 0; i < n; ++i) executionOrder_.push_back(i);
}
}
void RenderGraph::execute(VkCommandBuffer cmd) {
if (!compiled_) {
LOG_WARNING("RenderGraph::execute called without compile()");
compile();
}
for (uint32_t idx : executionOrder_) {
const auto& pass = passes_[idx];
if (!pass.enabled) continue;
// Insert image barriers declared for this pass
if (!pass.imageBarriers.empty()) {
std::vector<VkImageMemoryBarrier> barriers;
barriers.reserve(pass.imageBarriers.size());
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
for (const auto& b : pass.imageBarriers) {
VkImageMemoryBarrier ib{};
ib.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
ib.oldLayout = b.oldLayout;
ib.newLayout = b.newLayout;
ib.srcAccessMask = b.srcAccess;
ib.dstAccessMask = b.dstAccess;
ib.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ib.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ib.image = b.image;
ib.subresourceRange = {b.aspectMask, 0, 1, 0, 1};
barriers.push_back(ib);
srcStages |= b.srcStage;
dstStages |= b.dstStage;
}
vkCmdPipelineBarrier(cmd,
srcStages, dstStages,
0,
0, nullptr,
0, nullptr,
static_cast<uint32_t>(barriers.size()), barriers.data());
}
// Insert buffer barriers declared for this pass
if (!pass.bufferBarriers.empty()) {
std::vector<VkBufferMemoryBarrier> barriers;
barriers.reserve(pass.bufferBarriers.size());
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
for (const auto& b : pass.bufferBarriers) {
VkBufferMemoryBarrier bb{};
bb.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
bb.srcAccessMask = b.srcAccess;
bb.dstAccessMask = b.dstAccess;
bb.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bb.buffer = b.buffer;
bb.offset = b.offset;
bb.size = b.size;
barriers.push_back(bb);
srcStages |= b.srcStage;
dstStages |= b.dstStage;
}
vkCmdPipelineBarrier(cmd,
srcStages, dstStages,
0,
0, nullptr,
static_cast<uint32_t>(barriers.size()), barriers.data(),
0, nullptr);
}
// Execute the pass
pass.execute(cmd);
}
}
} // namespace rendering
} // namespace wowee

View file

@ -61,6 +61,7 @@
#include "rendering/spell_visual_system.hpp"
#include "rendering/post_process_pipeline.hpp"
#include "rendering/animation_controller.hpp"
#include "rendering/render_graph.hpp"
#include <imgui.h>
#include <imgui_impl_vulkan.h>
#include <glm/gtc/matrix_transform.hpp>
@ -458,7 +459,9 @@ void Renderer::updatePerFrameUBO() {
}
currentFrameData.lightSpaceMatrix = lightSpaceMatrix;
currentFrameData.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, 0.8f, 0.0f, 0.0f);
// Scale shadow bias proportionally to ortho extent to avoid acne at close range / gaps at far range
float shadowBias = 0.8f * (shadowDistance_ / 300.0f);
currentFrameData.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, shadowBias, 0.0f, 0.0f);
// Player water ripple data: pack player XY into shadowParams.zw, ripple strength into fogParams.w
if (cameraController) {
@ -563,6 +566,15 @@ bool Renderer::initialize(core::Window* win) {
postProcessPipeline_ = std::make_unique<PostProcessPipeline>();
postProcessPipeline_->initialize(vkCtx);
// Phase 2.5: Create render graph and register virtual resources
renderGraph_ = std::make_unique<RenderGraph>();
renderGraph_->registerResource("shadow_depth");
renderGraph_->registerResource("reflection_texture");
renderGraph_->registerResource("cull_visibility");
renderGraph_->registerResource("scene_color");
renderGraph_->registerResource("scene_depth");
renderGraph_->registerResource("final_image");
LOG_INFO("Renderer initialized");
return true;
}
@ -674,6 +686,10 @@ void Renderer::shutdown() {
postProcessPipeline_->shutdown();
postProcessPipeline_.reset();
}
// Phase 2.5: Destroy render graph
renderGraph_.reset();
destroyPerFrameResources();
zoneManager.reset();
@ -839,36 +855,19 @@ void Renderer::beginFrame() {
// FSR2 jitter pattern (§4.3 — delegates to PostProcessPipeline)
if (postProcessPipeline_ && camera) postProcessPipeline_->applyJitter(camera.get());
// Compute fresh shadow matrix BEFORE UBO update so shaders get current-frame data.
lightSpaceMatrix = computeLightSpaceMatrix();
// Update per-frame UBO with current camera/lighting state
updatePerFrameUBO();
// --- Off-screen pre-passes (before main render pass) ---
// Minimap composite (renders 3x3 tile grid into 768x768 render target)
if (minimap && minimap->isEnabled() && camera) {
glm::vec3 minimapCenter = camera->getPosition();
if (cameraController && cameraController->isThirdPerson())
minimapCenter = characterPosition;
minimap->compositePass(currentCmd, minimapCenter);
// --- Off-screen pre-passes (Phase 2.5: render graph) ---
// Build frame graph: registers pre-passes as graph nodes with dependencies.
// compile() topologically sorts; execute() runs them with auto barriers.
buildFrameGraph(nullptr);
if (renderGraph_) {
renderGraph_->execute(currentCmd);
}
// World map composite (renders zone tiles into 1024x768 render target)
if (worldMap) {
worldMap->compositePass(currentCmd);
}
// Character preview composite passes
for (auto* preview : activePreviews_) {
if (preview && preview->isModelLoaded()) {
preview->compositePass(currentCmd, vkCtx->getCurrentFrame());
}
}
// Shadow pre-pass (before main render pass)
if (shadowsEnabled && shadowDepthImage[0] != VK_NULL_HANDLE) {
renderShadowPass();
}
// Water reflection pre-pass (renders scene from mirrored camera into 512x512 texture)
renderReflectionPass();
// --- Begin render pass ---
// Select framebuffer: PP off-screen target or swapchain (§4.3 — PostProcessPipeline)
@ -3063,17 +3062,10 @@ void Renderer::renderShadowPass() {
// Shadows render every frame — throttling causes visible flicker on player/NPCs
// Compute and store light space matrix; write to per-frame UBO
lightSpaceMatrix = computeLightSpaceMatrix();
// lightSpaceMatrix was already computed at frame start (before updatePerFrameUBO).
// Zero matrix means character position isn't set yet — skip shadow pass entirely.
if (lightSpaceMatrix == glm::mat4(0.0f)) return;
uint32_t frame = vkCtx->getCurrentFrame();
auto* ubo = reinterpret_cast<GPUPerFrameData*>(perFrameUBOMapped[frame]);
if (ubo) {
ubo->lightSpaceMatrix = lightSpaceMatrix;
ubo->shadowParams.x = shadowsEnabled ? 1.0f : 0.0f;
ubo->shadowParams.y = 0.8f;
}
// Barrier 1: transition this frame's shadow map into writable depth layout.
VkImageMemoryBarrier b1{};
@ -3147,5 +3139,69 @@ void Renderer::renderShadowPass() {
shadowDepthLayout_[frame] = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
// Phase 2.5: Build the per-frame render graph for off-screen pre-passes.
// Declares passes as graph nodes with input/output dependencies.
// compile() performs topological sort; execute() runs them with auto barriers.
void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
(void)gameHandler;
if (!renderGraph_) return;
renderGraph_->reset();
auto shadowDepth = renderGraph_->findResource("shadow_depth");
auto reflTex = renderGraph_->findResource("reflection_texture");
auto cullVis = renderGraph_->findResource("cull_visibility");
// Minimap composites (no dependencies — standalone off-screen render target)
renderGraph_->addPass("minimap_composite", {}, {},
[this](VkCommandBuffer cmd) {
if (minimap && minimap->isEnabled() && camera) {
glm::vec3 minimapCenter = camera->getPosition();
if (cameraController && cameraController->isThirdPerson())
minimapCenter = characterPosition;
minimap->compositePass(cmd, minimapCenter);
}
});
// World map composite (standalone)
renderGraph_->addPass("worldmap_composite", {}, {},
[this](VkCommandBuffer cmd) {
if (worldMap) worldMap->compositePass(cmd);
});
// Character preview composites (standalone)
renderGraph_->addPass("preview_composite", {}, {},
[this](VkCommandBuffer cmd) {
uint32_t frame = vkCtx->getCurrentFrame();
for (auto* preview : activePreviews_) {
if (preview && preview->isModelLoaded())
preview->compositePass(cmd, frame);
}
});
// Shadow pre-pass → outputs shadow_depth
renderGraph_->addPass("shadow_pass", {}, {shadowDepth},
[this](VkCommandBuffer) {
if (shadowsEnabled && shadowDepthImage[0] != VK_NULL_HANDLE)
renderShadowPass();
});
renderGraph_->setPassEnabled("shadow_pass", shadowsEnabled && shadowDepthImage[0] != VK_NULL_HANDLE);
// Reflection pre-pass → outputs reflection_texture (reads scene, so after shadow)
renderGraph_->addPass("reflection_pass", {shadowDepth}, {reflTex},
[this](VkCommandBuffer) {
renderReflectionPass();
});
// GPU frustum cull compute → outputs cull_visibility
renderGraph_->addPass("compute_cull", {}, {cullVis},
[this](VkCommandBuffer cmd) {
if (m2Renderer && camera)
m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera);
});
renderGraph_->compile();
}
} // namespace rendering
} // namespace wowee

View file

@ -128,7 +128,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
vertexAttribs[3] = { 3, 0, VK_FORMAT_R32G32_SFLOAT,
static_cast<uint32_t>(offsetof(pipeline::TerrainVertex, layerUV)) };
// --- Build fill pipeline ---
// --- Build fill pipeline (base for derivatives — shared state optimization) ---
VkRenderPass mainPass = vkCtx->getImGuiRenderPass();
pipeline = PipelineBuilder()
@ -143,6 +143,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx->getPipelineCache());
if (!pipeline) {
@ -152,7 +153,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
return false;
}
// --- Build wireframe pipeline ---
// --- Build wireframe pipeline (derivative of fill) ---
wireframePipeline = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -165,6 +166,8 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(pipeline)
.build(device, vkCtx->getPipelineCache());
if (!wireframePipeline) {
@ -190,6 +193,64 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
envSizeMBOrDefault("WOWEE_TERRAIN_TEX_CACHE_MB", 4096) * 1024ull * 1024ull;
LOG_INFO("Terrain texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB");
// Phase 2.2: Allocate mega vertex/index buffers and indirect draw buffer.
// All terrain chunks share these buffers, eliminating per-chunk VB/IB rebinds.
{
VmaAllocator allocator = vkCtx->getAllocator();
// Mega vertex buffer (host-visible for direct write during chunk upload)
VkBufferCreateInfo vbCI{};
vbCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
vbCI.size = static_cast<VkDeviceSize>(MEGA_VB_MAX_VERTS) * sizeof(pipeline::TerrainVertex);
vbCI.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VmaAllocationCreateInfo vbAllocCI{};
vbAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
vbAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo vbInfo{};
if (vmaCreateBuffer(allocator, &vbCI, &vbAllocCI,
&megaVB_, &megaVBAlloc_, &vbInfo) == VK_SUCCESS) {
megaVBMapped_ = vbInfo.pMappedData;
} else {
LOG_WARNING("TerrainRenderer: mega VB allocation failed, per-chunk fallback");
}
// Mega index buffer
VkBufferCreateInfo ibCI{};
ibCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
ibCI.size = static_cast<VkDeviceSize>(MEGA_IB_MAX_INDICES) * sizeof(uint32_t);
ibCI.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
VmaAllocationCreateInfo ibAllocCI{};
ibAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
ibAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo ibInfo{};
if (vmaCreateBuffer(allocator, &ibCI, &ibAllocCI,
&megaIB_, &megaIBAlloc_, &ibInfo) == VK_SUCCESS) {
megaIBMapped_ = ibInfo.pMappedData;
} else {
LOG_WARNING("TerrainRenderer: mega IB allocation failed, per-chunk fallback");
}
// Indirect draw command buffer
VkBufferCreateInfo indCI{};
indCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
indCI.size = MAX_INDIRECT_DRAWS * sizeof(VkDrawIndexedIndirectCommand);
indCI.usage = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
VmaAllocationCreateInfo indAllocCI{};
indAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
indAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo indInfo{};
if (vmaCreateBuffer(allocator, &indCI, &indAllocCI,
&indirectBuffer_, &indirectAlloc_, &indInfo) == VK_SUCCESS) {
indirectMapped_ = indInfo.pMappedData;
} else {
LOG_WARNING("TerrainRenderer: indirect buffer allocation failed");
}
LOG_INFO("Terrain mega buffers: VB=", vbCI.size / (1024*1024), "MB IB=",
ibCI.size / (1024*1024), "MB indirect=",
indCI.size / 1024, "KB");
}
LOG_INFO("Terrain renderer initialized (Vulkan)");
return true;
}
@ -232,7 +293,7 @@ void TerrainRenderer::recreatePipelines() {
VkRenderPass mainPass = vkCtx->getImGuiRenderPass();
// Rebuild fill pipeline
// Rebuild fill pipeline (base for derivatives — shared state optimization)
pipeline = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -245,13 +306,14 @@ void TerrainRenderer::recreatePipelines() {
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx->getPipelineCache());
if (!pipeline) {
LOG_ERROR("TerrainRenderer::recreatePipelines: failed to create fill pipeline");
}
// Rebuild wireframe pipeline
// Rebuild wireframe pipeline (derivative of fill)
wireframePipeline = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -264,6 +326,8 @@ void TerrainRenderer::recreatePipelines() {
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(pipeline)
.build(device, vkCtx->getPipelineCache());
if (!wireframePipeline) {
@ -311,6 +375,13 @@ void TerrainRenderer::shutdown() {
if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; }
if (shadowParamsUBO_) { vmaDestroyBuffer(allocator, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; shadowParamsAlloc_ = VK_NULL_HANDLE; }
// Phase 2.2: Destroy mega buffers and indirect draw buffer
if (megaVB_) { vmaDestroyBuffer(allocator, megaVB_, megaVBAlloc_); megaVB_ = VK_NULL_HANDLE; megaVBAlloc_ = VK_NULL_HANDLE; megaVBMapped_ = nullptr; }
if (megaIB_) { vmaDestroyBuffer(allocator, megaIB_, megaIBAlloc_); megaIB_ = VK_NULL_HANDLE; megaIBAlloc_ = VK_NULL_HANDLE; megaIBMapped_ = nullptr; }
if (indirectBuffer_) { vmaDestroyBuffer(allocator, indirectBuffer_, indirectAlloc_); indirectBuffer_ = VK_NULL_HANDLE; indirectAlloc_ = VK_NULL_HANDLE; indirectMapped_ = nullptr; }
megaVBUsed_ = 0;
megaIBUsed_ = 0;
vkCtx = nullptr;
}
@ -537,6 +608,7 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
gpuChunk.worldY = chunk.worldY;
gpuChunk.worldZ = chunk.worldZ;
gpuChunk.indexCount = static_cast<uint32_t>(chunk.indices.size());
gpuChunk.vertexCount = static_cast<uint32_t>(chunk.vertices.size());
VkDeviceSize vbSize = chunk.vertices.size() * sizeof(pipeline::TerrainVertex);
AllocatedBuffer vb = uploadBuffer(*vkCtx, chunk.vertices.data(), vbSize,
@ -550,6 +622,25 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
gpuChunk.indexBuffer = ib.buffer;
gpuChunk.indexAlloc = ib.allocation;
// Phase 2.2: Also copy into mega buffers for indirect drawing
uint32_t vertCount = static_cast<uint32_t>(chunk.vertices.size());
uint32_t idxCount = static_cast<uint32_t>(chunk.indices.size());
if (megaVBMapped_ && megaIBMapped_ &&
megaVBUsed_ + vertCount <= MEGA_VB_MAX_VERTS &&
megaIBUsed_ + idxCount <= MEGA_IB_MAX_INDICES) {
// Copy vertices
auto* vbDst = static_cast<pipeline::TerrainVertex*>(megaVBMapped_) + megaVBUsed_;
std::memcpy(vbDst, chunk.vertices.data(), vertCount * sizeof(pipeline::TerrainVertex));
// Copy indices
auto* ibDst = static_cast<uint32_t*>(megaIBMapped_) + megaIBUsed_;
std::memcpy(ibDst, chunk.indices.data(), idxCount * sizeof(uint32_t));
gpuChunk.megaBaseVertex = static_cast<int32_t>(megaVBUsed_);
gpuChunk.megaFirstIndex = megaIBUsed_;
megaVBUsed_ += vertCount;
megaIBUsed_ += idxCount;
}
return gpuChunk;
}
@ -789,6 +880,15 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c
renderedChunks = 0;
culledChunks = 0;
// Phase 2.2: Use mega VB + IB when available.
// Bind mega buffers once, then use direct draws with base vertex/index offsets.
const bool useMegaBuffers = (megaVB_ && megaIB_);
if (useMegaBuffers) {
VkDeviceSize megaOffset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset);
vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32);
}
for (const auto& chunk : chunks) {
if (!chunk.isValid() || !chunk.materialSet) continue;
@ -808,11 +908,17 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout,
1, 1, &chunk.materialSet, 0, nullptr);
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
if (useMegaBuffers && chunk.megaBaseVertex >= 0) {
// Direct draw from mega buffer — single VB/IB already bound
vkCmdDrawIndexed(cmd, chunk.indexCount, 1,
chunk.megaFirstIndex, chunk.megaBaseVertex, 0);
} else {
// Fallback: per-chunk VB/IB bind + direct draw
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
}
renderedChunks++;
}
@ -986,6 +1092,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp
vkCmdPushConstants(cmd, shadowPipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT,
0, 128, &push);
// Phase 2.2: Bind mega buffers once for shadow pass (same as opaque)
const bool useMegaShadow = (megaVB_ && megaIB_);
if (useMegaShadow) {
VkDeviceSize megaOffset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset);
vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32);
}
for (const auto& chunk : chunks) {
if (!chunk.isValid()) continue;
@ -995,10 +1109,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp
float combinedRadius = shadowRadius + chunk.boundingSphereRadius;
if (distSq > combinedRadius * combinedRadius) continue;
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT16);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
if (useMegaShadow && chunk.megaBaseVertex >= 0) {
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, chunk.megaFirstIndex, chunk.megaBaseVertex, 0);
} else {
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
}
}
}

View file

@ -334,7 +334,7 @@ bool VkContext::selectPhysicalDevice() {
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(physicalDevice, &props);
uint32_t apiVersion = props.apiVersion;
(void)props.apiVersion; // Available if needed for version checks
gpuVendorId_ = props.vendorID;
std::strncpy(gpuName_, props.deviceName, sizeof(gpuName_) - 1);
gpuName_[sizeof(gpuName_) - 1] = '\0';

View file

@ -111,6 +111,17 @@ PipelineBuilder& PipelineBuilder::setDynamicStates(const std::vector<VkDynamicSt
return *this;
}
// Pipeline derivatives — hint driver to share compiled state between similar pipelines
PipelineBuilder& PipelineBuilder::setFlags(VkPipelineCreateFlags flags) {
flags_ = flags;
return *this;
}
PipelineBuilder& PipelineBuilder::setBasePipeline(VkPipeline basePipeline) {
basePipelineHandle_ = basePipeline;
return *this;
}
VkPipeline PipelineBuilder::build(VkDevice device, VkPipelineCache cache) const {
// Vertex input
VkPipelineVertexInputStateCreateInfo vertexInput{};
@ -188,6 +199,9 @@ VkPipeline PipelineBuilder::build(VkDevice device, VkPipelineCache cache) const
pipelineInfo.pColorBlendState = colorBlendAttachments_.empty() ? nullptr : &colorBlending;
pipelineInfo.pDynamicState = dynamicStates_.empty() ? nullptr : &dynamicState;
pipelineInfo.layout = pipelineLayout_;
pipelineInfo.flags = flags_;
pipelineInfo.basePipelineHandle = basePipelineHandle_;
pipelineInfo.basePipelineIndex = -1;
pipelineInfo.renderPass = renderPass_;
pipelineInfo.subpass = subpass_;

View file

@ -169,7 +169,7 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
vertexAttribs[4] = { 4, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
static_cast<uint32_t>(offsetof(WMOVertexData, tangent)) };
// --- Build opaque pipeline ---
// --- Build opaque pipeline (base for derivatives — shared state optimization) ---
VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();
opaquePipeline_ = PipelineBuilder()
@ -184,6 +184,7 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx_->getPipelineCache());
if (!opaquePipeline_) {
@ -193,7 +194,7 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
return false;
}
// --- Build transparent pipeline ---
// --- Build transparent pipeline (derivative of opaque) ---
transparentPipeline_ = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -206,13 +207,15 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
if (!transparentPipeline_) {
core::Logger::getInstance().warning("WMORenderer: transparent pipeline not available");
}
// --- Build glass pipeline (alpha blend WITH depth write for windows) ---
// --- Build glass pipeline (derivative — alpha blend WITH depth write for windows) ---
glassPipeline_ = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -225,9 +228,11 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
// --- Build wireframe pipeline ---
// --- Build wireframe pipeline (derivative of opaque) ---
wireframePipeline_ = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -240,6 +245,8 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
if (!wireframePipeline_) {
@ -1434,7 +1441,7 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
if (doDistanceCull) {
glm::vec3 closestPoint = glm::clamp(camPos, gMin, gMax);
float distSq = glm::dot(closestPoint - camPos, closestPoint - camPos);
if (distSq > 250000.0f) {
if (distSq > 1440000.0f) { // 1200 units — matches terrain view distance
result.distanceCulled++;
continue;
}
@ -3733,6 +3740,7 @@ void WMORenderer::recreatePipelines() {
VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();
// Pipeline derivatives — opaque is the base, others derive for shared state optimization
opaquePipeline_ = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -3745,6 +3753,7 @@ void WMORenderer::recreatePipelines() {
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx_->getPipelineCache());
transparentPipeline_ = PipelineBuilder()
@ -3759,6 +3768,8 @@ void WMORenderer::recreatePipelines() {
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
glassPipeline_ = PipelineBuilder()
@ -3773,6 +3784,8 @@ void WMORenderer::recreatePipelines() {
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
wireframePipeline_ = PipelineBuilder()
@ -3787,6 +3800,8 @@ void WMORenderer::recreatePipelines() {
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
vertShader.destroy();