Kelsidavis-WoWee/src/rendering/render_graph.cpp
Paul d54e262048 feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex

Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
  host-mapped buffer race condition that caused terrain flickering

GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node

Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot

Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
  shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)

Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers

Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
  one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
  to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
  view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
  eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
  M2 render distance (2800u) and eliminate pop-in when camera turns;
  unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
  early pop of grass and debris
2026-04-04 13:43:16 +03:00

194 lines
6 KiB
C++

#include "rendering/render_graph.hpp"
#include "core/logger.hpp"
#include <algorithm>
#include <unordered_map>
#include <queue>
namespace wowee {
namespace rendering {
void RenderGraph::reset() {
passes_.clear();
executionOrder_.clear();
compiled_ = false;
// Keep resource registry — resources are stable across frames
}
RGResource RenderGraph::registerResource(const std::string& name) {
// Check for duplicate
for (const auto& r : resources_) {
if (r.name == name) return {r.id};
}
uint32_t id = nextResourceId_++;
resources_.push_back({name, id});
return {id};
}
RGResource RenderGraph::findResource(const std::string& name) const {
for (const auto& r : resources_) {
if (r.name == name) return {r.id};
}
return {}; // invalid
}
void RenderGraph::addPass(const std::string& name,
const std::vector<RGResource>& inputs,
const std::vector<RGResource>& outputs,
std::function<void(VkCommandBuffer cmd)> execute) {
RGPass pass;
pass.name = name;
pass.inputs = inputs;
pass.outputs = outputs;
pass.execute = std::move(execute);
pass.enabled = true;
passes_.push_back(std::move(pass));
}
void RenderGraph::setPassEnabled(const std::string& name, bool enabled) {
for (auto& pass : passes_) {
if (pass.name == name) {
pass.enabled = enabled;
return;
}
}
}
void RenderGraph::compile() {
topologicalSort();
compiled_ = true;
}
void RenderGraph::topologicalSort() {
const uint32_t n = static_cast<uint32_t>(passes_.size());
if (n == 0) { executionOrder_.clear(); return; }
// Build adjacency: if pass A outputs resource R and pass B inputs resource R,
// then A must execute before B (edge A → B).
// Map: resource id → index of pass that produces it
std::unordered_map<uint32_t, uint32_t> producer;
for (uint32_t i = 0; i < n; ++i) {
for (const auto& out : passes_[i].outputs) {
producer[out.id] = i;
}
}
// Build in-degree and adjacency list
std::vector<uint32_t> inDegree(n, 0);
std::vector<std::vector<uint32_t>> adj(n);
for (uint32_t i = 0; i < n; ++i) {
for (const auto& inp : passes_[i].inputs) {
auto it = producer.find(inp.id);
if (it != producer.end() && it->second != i) {
adj[it->second].push_back(i);
inDegree[i]++;
}
}
}
// Kahn's algorithm
std::queue<uint32_t> queue;
for (uint32_t i = 0; i < n; ++i) {
if (inDegree[i] == 0) queue.push(i);
}
executionOrder_.clear();
executionOrder_.reserve(n);
while (!queue.empty()) {
uint32_t u = queue.front();
queue.pop();
executionOrder_.push_back(u);
for (uint32_t v : adj[u]) {
if (--inDegree[v] == 0) queue.push(v);
}
}
// If not all passes are in the order, there's a cycle — fall back to insertion order
if (executionOrder_.size() != n) {
LOG_WARNING("RenderGraph: dependency cycle detected, falling back to insertion order");
executionOrder_.clear();
for (uint32_t i = 0; i < n; ++i) executionOrder_.push_back(i);
}
}
void RenderGraph::execute(VkCommandBuffer cmd) {
if (!compiled_) {
LOG_WARNING("RenderGraph::execute called without compile()");
compile();
}
for (uint32_t idx : executionOrder_) {
const auto& pass = passes_[idx];
if (!pass.enabled) continue;
// Insert image barriers declared for this pass
if (!pass.imageBarriers.empty()) {
std::vector<VkImageMemoryBarrier> barriers;
barriers.reserve(pass.imageBarriers.size());
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
for (const auto& b : pass.imageBarriers) {
VkImageMemoryBarrier ib{};
ib.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
ib.oldLayout = b.oldLayout;
ib.newLayout = b.newLayout;
ib.srcAccessMask = b.srcAccess;
ib.dstAccessMask = b.dstAccess;
ib.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ib.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ib.image = b.image;
ib.subresourceRange = {b.aspectMask, 0, 1, 0, 1};
barriers.push_back(ib);
srcStages |= b.srcStage;
dstStages |= b.dstStage;
}
vkCmdPipelineBarrier(cmd,
srcStages, dstStages,
0,
0, nullptr,
0, nullptr,
static_cast<uint32_t>(barriers.size()), barriers.data());
}
// Insert buffer barriers declared for this pass
if (!pass.bufferBarriers.empty()) {
std::vector<VkBufferMemoryBarrier> barriers;
barriers.reserve(pass.bufferBarriers.size());
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
for (const auto& b : pass.bufferBarriers) {
VkBufferMemoryBarrier bb{};
bb.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
bb.srcAccessMask = b.srcAccess;
bb.dstAccessMask = b.dstAccess;
bb.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bb.buffer = b.buffer;
bb.offset = b.offset;
bb.size = b.size;
barriers.push_back(bb);
srcStages |= b.srcStage;
dstStages |= b.dstStage;
}
vkCmdPipelineBarrier(cmd,
srcStages, dstStages,
0,
0, nullptr,
static_cast<uint32_t>(barriers.size()), barriers.data(),
0, nullptr);
}
// Execute the pass
pass.execute(cmd);
}
}
} // namespace rendering
} // namespace wowee