Merge pull request #45 from ldmonster/feat/rendering-performance-architecture

[feat] Rendering Architecture & GPU Performance + Bug Fixes
This commit is contained in:
Kelsi Rae Davis 2026-04-05 01:09:03 -07:00 committed by GitHub
commit 6d60717545
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 1579 additions and 494 deletions

View file

@ -605,6 +605,7 @@ set(WOWEE_SOURCES
src/rendering/wmo_renderer.cpp
src/rendering/m2_renderer.cpp
src/rendering/m2_model_classifier.cpp
src/rendering/render_graph.cpp
src/rendering/quest_marker_renderer.cpp
src/rendering/minimap.cpp
src/rendering/world_map.cpp

View file

@ -13,19 +13,29 @@ layout(set = 0, binding = 0) uniform PerFrame {
vec4 shadowParams;
};
// Phase 2.1: Per-draw push constants (batch-level data only)
layout(push_constant) uniform Push {
mat4 model;
vec2 uvOffset;
int texCoordSet;
int useBones;
int isFoliage;
float fadeAlpha;
int texCoordSet; // UV set index (0 or 1)
int isFoliage; // Foliage wind animation flag
int instanceDataOffset; // Base index into InstanceSSBO for this draw group
} push;
layout(set = 2, binding = 0) readonly buffer BoneSSBO {
mat4 bones[];
};
// Phase 2.1: Per-instance data read via gl_InstanceIndex (GPU instancing)
struct InstanceData {
mat4 model;
vec2 uvOffset;
float fadeAlpha;
int useBones;
int boneBase;
};
layout(set = 3, binding = 0) readonly buffer InstanceSSBO {
InstanceData instanceData[];
};
layout(location = 0) in vec3 aPos;
layout(location = 1) in vec3 aNormal;
layout(location = 2) in vec2 aTexCoord;
@ -41,15 +51,23 @@ layout(location = 4) out float ModelHeight;
layout(location = 5) out float vFadeAlpha;
void main() {
// Phase 2.1: Fetch per-instance data from SSBO
int instIdx = push.instanceDataOffset + gl_InstanceIndex;
mat4 model = instanceData[instIdx].model;
vec2 uvOff = instanceData[instIdx].uvOffset;
float fade = instanceData[instIdx].fadeAlpha;
int uBones = instanceData[instIdx].useBones;
int bBase = instanceData[instIdx].boneBase;
vec4 pos = vec4(aPos, 1.0);
vec4 norm = vec4(aNormal, 0.0);
if (push.useBones != 0) {
if (uBones != 0) {
ivec4 bi = ivec4(aBoneIndicesF);
mat4 skinMat = bones[bi.x] * aBoneWeights.x
+ bones[bi.y] * aBoneWeights.y
+ bones[bi.z] * aBoneWeights.z
+ bones[bi.w] * aBoneWeights.w;
mat4 skinMat = bones[bBase + bi.x] * aBoneWeights.x
+ bones[bBase + bi.y] * aBoneWeights.y
+ bones[bBase + bi.z] * aBoneWeights.z
+ bones[bBase + bi.w] * aBoneWeights.w;
pos = skinMat * pos;
norm = skinMat * norm;
}
@ -57,7 +75,7 @@ void main() {
// Wind animation for foliage
if (push.isFoliage != 0) {
float windTime = fogParams.z;
vec3 worldRef = push.model[3].xyz;
vec3 worldRef = model[3].xyz;
float heightFactor = clamp(pos.z / 20.0, 0.0, 1.0);
heightFactor *= heightFactor; // quadratic — base stays grounded
@ -80,15 +98,15 @@ void main() {
pos.y += trunkSwayY + branchSwayY + leafFlutterY;
}
vec4 worldPos = push.model * pos;
vec4 worldPos = model * pos;
FragPos = worldPos.xyz;
Normal = mat3(push.model) * norm.xyz;
Normal = mat3(model) * norm.xyz;
TexCoord = (push.texCoordSet == 1 ? aTexCoord2 : aTexCoord) + push.uvOffset;
TexCoord = (push.texCoordSet == 1 ? aTexCoord2 : aTexCoord) + uvOff;
InstanceOrigin = push.model[3].xyz;
InstanceOrigin = model[3].xyz;
ModelHeight = pos.z;
vFadeAlpha = push.fadeAlpha;
vFadeAlpha = fade;
gl_Position = projection * view * worldPos;
}

Binary file not shown.

View file

@ -0,0 +1,76 @@
#version 450
// Phase 2.3: GPU Frustum Culling for M2 doodads
// Each compute thread tests one M2 instance against 6 frustum planes.
// Input: per-instance bounding sphere + flags.
// Output: uint visibility array (1 = visible, 0 = culled).
layout(local_size_x = 64) in;
// Per-instance cull data (uploaded from CPU each frame)
struct CullInstance {
vec4 sphere; // xyz = world position, w = padded radius
float effectiveMaxDistSq; // adaptive distance cull threshold
uint flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap
float _pad0;
float _pad1;
};
layout(std140, set = 0, binding = 0) uniform CullUniforms {
vec4 frustumPlanes[6]; // xyz = normal, w = distance
vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
uint instanceCount;
uint _pad0;
uint _pad1;
uint _pad2;
};
layout(std430, set = 0, binding = 1) readonly buffer CullInput {
CullInstance cullInstances[];
};
layout(std430, set = 0, binding = 2) writeonly buffer CullOutput {
uint visibility[];
};
void main() {
uint id = gl_GlobalInvocationID.x;
if (id >= instanceCount) return;
CullInstance inst = cullInstances[id];
// Flag check: must be valid, not smoke, not invisible trap
uint f = inst.flags;
if ((f & 1u) == 0u || (f & 6u) != 0u) {
visibility[id] = 0u;
return;
}
// Early distance rejection (loose upper bound)
vec3 toCam = inst.sphere.xyz - cameraPos.xyz;
float distSq = dot(toCam, toCam);
if (distSq > cameraPos.w) {
visibility[id] = 0u;
return;
}
// Accurate per-instance distance cull
if (distSq > inst.effectiveMaxDistSq) {
visibility[id] = 0u;
return;
}
// Frustum cull: sphere vs 6 planes
float radius = inst.sphere.w;
if (radius > 0.0) {
for (int i = 0; i < 6; i++) {
float d = dot(frustumPlanes[i].xyz, inst.sphere.xyz) + frustumPlanes[i].w;
if (d < -radius) {
visibility[id] = 0u;
return;
}
}
}
visibility[id] = 1u;
}

Binary file not shown.

View file

@ -51,7 +51,7 @@ private:
float pitch = 0.0f;
float fov = 45.0f;
float aspectRatio = 16.0f / 9.0f;
float nearPlane = 0.05f;
float nearPlane = 0.5f;
float farPlane = 30000.0f; // Improves depth precision vs extremely large far clip
glm::mat4 viewMatrix = glm::mat4(1.0f);

View file

@ -219,12 +219,15 @@ struct M2Instance {
uint8_t frameSkipCounter = 0;
bool bonesDirty[2] = {false, false}; // Per-frame-index: set when bones recomputed, cleared after upload
// Per-instance bone SSBO (double-buffered)
// Per-instance bone SSBO (double-buffered) — legacy; see mega bone SSBO in M2Renderer
::VkBuffer boneBuffer[2] = {};
VmaAllocation boneAlloc[2] = {};
void* boneMapped[2] = {};
VkDescriptorSet boneSet[2] = {};
// Mega bone SSBO offset — base bone index for this instance (set per-frame in prepareRender)
uint32_t megaBoneOffset = 0;
void updateModelMatrix();
};
@ -292,6 +295,8 @@ public:
*/
/** Pre-allocate GPU resources (bone SSBOs, descriptors) on main thread before parallel render. */
void prepareRender(uint32_t frameIndex, const Camera& camera);
/** Phase 2.3: Dispatch GPU frustum culling compute shader on primary cmd before render pass. */
void dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, const Camera& camera);
void render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera);
/**
@ -425,6 +430,65 @@ private:
VmaAllocation dummyBoneAlloc_ = VK_NULL_HANDLE;
VkDescriptorSet dummyBoneSet_ = VK_NULL_HANDLE;
// Mega bone SSBO — consolidates all per-instance bone matrices into a single buffer per frame.
// Replaces per-instance bone SSBOs for fewer descriptor binds and enables GPU instancing.
static constexpr uint32_t MEGA_BONE_MAX_INSTANCES = 2048;
static constexpr uint32_t MAX_BONES_PER_INSTANCE = 128;
::VkBuffer megaBoneBuffer_[2] = {};
VmaAllocation megaBoneAlloc_[2] = {};
void* megaBoneMapped_[2] = {};
VkDescriptorSet megaBoneSet_[2] = {};
// Phase 2.1: GPU instance data SSBO — per-instance transforms, fade, bones for instanced draws.
// Shader reads instanceData[push.instanceDataOffset + gl_InstanceIndex].
struct M2InstanceGPU {
glm::mat4 model; // 64 bytes @ offset 0
glm::vec2 uvOffset; // 8 bytes @ offset 64
float fadeAlpha; // 4 bytes @ offset 72
int32_t useBones; // 4 bytes @ offset 76
int32_t boneBase; // 4 bytes @ offset 80
int32_t _pad[3] = {}; // 12 bytes @ offset 84 — align to 96 (std430)
};
static constexpr uint32_t MAX_INSTANCE_DATA = 16384;
VkDescriptorSetLayout instanceSetLayout_ = VK_NULL_HANDLE;
VkDescriptorPool instanceDescPool_ = VK_NULL_HANDLE;
::VkBuffer instanceBuffer_[2] = {};
VmaAllocation instanceAlloc_[2] = {};
void* instanceMapped_[2] = {};
VkDescriptorSet instanceSet_[2] = {};
uint32_t instanceDataCount_ = 0; // reset each frame in render()
// Phase 2.3: GPU Frustum Culling via Compute Shader
// Compute shader tests each M2 instance against frustum planes + distance, writes visibility[].
// CPU reads back visibility to build sortedVisible_ without per-instance frustum/distance tests.
struct CullInstanceGPU { // matches CullInstance in m2_cull.comp.glsl (32 bytes, std430)
glm::vec4 sphere; // xyz = world position, w = padded radius
float effectiveMaxDistSq; // adaptive distance cull threshold
uint32_t flags; // bit 0 = valid, bit 1 = smoke, bit 2 = invisibleTrap
float _pad[2] = {};
};
struct CullUniformsGPU { // matches CullUniforms in m2_cull.comp.glsl (128 bytes, std140)
glm::vec4 frustumPlanes[6]; // xyz = normal, w = distance
glm::vec4 cameraPos; // xyz = camera position, w = maxPossibleDistSq
uint32_t instanceCount;
uint32_t _pad[3] = {};
};
static constexpr uint32_t MAX_CULL_INSTANCES = 16384;
VkPipeline cullPipeline_ = VK_NULL_HANDLE;
VkPipelineLayout cullPipelineLayout_ = VK_NULL_HANDLE;
VkDescriptorSetLayout cullSetLayout_ = VK_NULL_HANDLE;
VkDescriptorPool cullDescPool_ = VK_NULL_HANDLE;
VkDescriptorSet cullSet_[2] = {}; // double-buffered
::VkBuffer cullUniformBuffer_[2] = {}; // frustum planes + camera (UBO)
VmaAllocation cullUniformAlloc_[2] = {};
void* cullUniformMapped_[2] = {};
::VkBuffer cullInputBuffer_[2] = {}; // per-instance bounding sphere + flags (SSBO)
VmaAllocation cullInputAlloc_[2] = {};
void* cullInputMapped_[2] = {};
::VkBuffer cullOutputBuffer_[2] = {}; // uint visibility[] (SSBO, host-readable)
VmaAllocation cullOutputAlloc_[2] = {};
void* cullOutputMapped_[2] = {};
// Dynamic ribbon vertex buffer (CPU-written triangle strip)
static constexpr size_t MAX_RIBBON_VERTS = 2048; // 9 floats each
::VkBuffer ribbonVB_ = VK_NULL_HANDLE;

View file

@ -0,0 +1,117 @@
#pragma once
#include <vulkan/vulkan.h>
#include <string>
#include <vector>
#include <functional>
#include <cstdint>
namespace wowee {
namespace rendering {
// Phase 2.5: Lightweight Render Graph / Frame Graph
// Converts hardcoded pass sequence (shadow → reflection → compute cull →
// main → post-process → ImGui → present) into declarative graph nodes.
// Graph auto-inserts VkImageMemoryBarrier between passes.
// Resource handle — identifies a virtual resource (image or buffer) within the graph.
struct RGResource {
uint32_t id = UINT32_MAX;
bool valid() const { return id != UINT32_MAX; }
};
// Image barrier descriptor for automatic synchronization between passes.
struct RGImageBarrier {
VkImage image;
VkImageLayout oldLayout;
VkImageLayout newLayout;
VkAccessFlags srcAccess;
VkAccessFlags dstAccess;
VkPipelineStageFlags srcStage;
VkPipelineStageFlags dstStage;
VkImageAspectFlags aspectMask;
};
// Buffer barrier descriptor for automatic synchronization between passes.
struct RGBufferBarrier {
VkBuffer buffer;
VkDeviceSize offset;
VkDeviceSize size;
VkAccessFlags srcAccess;
VkAccessFlags dstAccess;
VkPipelineStageFlags srcStage;
VkPipelineStageFlags dstStage;
};
// Render pass node — wraps an execution callback with declared inputs/outputs.
struct RGPass {
std::string name;
std::vector<RGResource> inputs;
std::vector<RGResource> outputs;
std::function<void(VkCommandBuffer cmd)> execute;
bool enabled = true; // Can be dynamically disabled per-frame
// Barriers to insert before this pass executes
std::vector<RGImageBarrier> imageBarriers;
std::vector<RGBufferBarrier> bufferBarriers;
};
class RenderGraph {
public:
RenderGraph() = default;
~RenderGraph() = default;
// Reset graph for a new frame (clears passes, keeps resource registry).
void reset();
// Register a virtual resource (returns handle for input/output declarations).
RGResource registerResource(const std::string& name);
// Look up a previously registered resource by name.
RGResource findResource(const std::string& name) const;
// Add a render pass node.
// inputs: resources this pass reads from
// outputs: resources this pass writes to
// execute: callback invoked with the frame's command buffer
void addPass(const std::string& name,
const std::vector<RGResource>& inputs,
const std::vector<RGResource>& outputs,
std::function<void(VkCommandBuffer cmd)> execute);
// Enable/disable a pass by name (for dynamic toggling, e.g. shadows off).
void setPassEnabled(const std::string& name, bool enabled);
// Compile: topological sort by dependency order, insert barriers.
// Must be called after all addPass() calls and before execute().
void compile();
// Execute all enabled passes in compiled order on the given command buffer.
void execute(VkCommandBuffer cmd);
// Query: get the compiled execution order (pass names, for debug HUD).
const std::vector<uint32_t>& getExecutionOrder() const { return executionOrder_; }
const std::vector<RGPass>& getPasses() const { return passes_; }
private:
// Topological sort helper (Kahn's algorithm).
void topologicalSort();
// Resource registry: name → id
struct ResourceEntry {
std::string name;
uint32_t id;
};
std::vector<ResourceEntry> resources_;
uint32_t nextResourceId_ = 0;
// Pass storage
std::vector<RGPass> passes_;
// Compiled execution order (indices into passes_)
std::vector<uint32_t> executionOrder_;
bool compiled_ = false;
};
} // namespace rendering
} // namespace wowee

View file

@ -56,6 +56,7 @@ class AnimationController;
class LevelUpEffect;
class ChargeEffect;
class SwimEffects;
class RenderGraph;
class Renderer {
public:
@ -433,6 +434,10 @@ private:
bool ghostMode_ = false; // set each frame from gameHandler->isPlayerGhost()
// Phase 2.5: Render Graph — declarative pass ordering with automatic barriers
std::unique_ptr<RenderGraph> renderGraph_;
void buildFrameGraph(game::GameHandler* gameHandler);
// CPU timing stats (last frame/update).
double lastUpdateMs = 0.0;
double lastRenderMs = 0.0;

View file

@ -346,8 +346,8 @@ private:
// Streaming parameters
bool streamingEnabled = true;
int loadRadius = 4; // Load tiles within this radius (9x9 grid = 81 tiles)
int unloadRadius = 7; // Unload tiles beyond this radius
int loadRadius = 6; // Load tiles within this radius (13x13 grid = 169 tiles)
int unloadRadius = 9; // Unload tiles beyond this radius
float updateInterval = 0.033f; // Check streaming every 33ms (~30 fps)
float timeSinceLastUpdate = 0.0f;
float proactiveStreamTimer_ = 0.0f;

View file

@ -60,6 +60,11 @@ struct TerrainChunkGPU {
float boundingSphereRadius = 0.0f;
glm::vec3 boundingSphereCenter = glm::vec3(0.0f);
// Phase 2.2: Offsets into mega buffers for indirect drawing (-1 = not in mega buffer)
int32_t megaBaseVertex = -1;
uint32_t megaFirstIndex = 0;
uint32_t vertexCount = 0;
bool isValid() const { return vertexBuffer != VK_NULL_HANDLE && indexBuffer != VK_NULL_HANDLE; }
};
@ -200,6 +205,25 @@ private:
bool fogEnabled = true;
int renderedChunks = 0;
int culledChunks = 0;
// Phase 2.2: Mega vertex/index buffers for indirect drawing
// All terrain chunks share a single VB + IB, eliminating per-chunk rebinds.
// Indirect draw commands are built CPU-side each frame for visible chunks.
VkBuffer megaVB_ = VK_NULL_HANDLE;
VmaAllocation megaVBAlloc_ = VK_NULL_HANDLE;
void* megaVBMapped_ = nullptr;
VkBuffer megaIB_ = VK_NULL_HANDLE;
VmaAllocation megaIBAlloc_ = VK_NULL_HANDLE;
void* megaIBMapped_ = nullptr;
uint32_t megaVBUsed_ = 0; // vertices used
uint32_t megaIBUsed_ = 0; // indices used
static constexpr uint32_t MEGA_VB_MAX_VERTS = 1536 * 1024; // ~1.5M verts × 44B ≈ 64MB
static constexpr uint32_t MEGA_IB_MAX_INDICES = 6 * 1024 * 1024; // 6M indices × 4B = 24MB
VkBuffer indirectBuffer_ = VK_NULL_HANDLE;
VmaAllocation indirectAlloc_ = VK_NULL_HANDLE;
void* indirectMapped_ = nullptr;
static constexpr uint32_t MAX_INDIRECT_DRAWS = 8192;
};
} // namespace rendering

View file

@ -75,6 +75,10 @@ public:
// Dynamic state
PipelineBuilder& setDynamicStates(const std::vector<VkDynamicState>& states);
// Pipeline derivatives — hint driver to share compiled state between similar pipelines
PipelineBuilder& setFlags(VkPipelineCreateFlags flags);
PipelineBuilder& setBasePipeline(VkPipeline basePipeline);
// Build the pipeline (pass a VkPipelineCache for faster creation)
VkPipeline build(VkDevice device, VkPipelineCache cache = VK_NULL_HANDLE) const;
@ -106,6 +110,8 @@ private:
VkRenderPass renderPass_ = VK_NULL_HANDLE;
uint32_t subpass_ = 0;
std::vector<VkDynamicState> dynamicStates_;
VkPipelineCreateFlags flags_ = 0;
VkPipeline basePipelineHandle_ = VK_NULL_HANDLE;
};
// Helper to create a pipeline layout from descriptor set layouts and push constant ranges

View file

@ -734,9 +734,9 @@ void WorldLoader::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float
// Use a small radius for the initial load (just immediate tiles),
// then restore the full radius after entering the game.
// This matches WoW's behavior: load quickly, stream the rest in-game.
const int savedLoadRadius = 4;
terrainMgr->setLoadRadius(3); // 7x7=49 tiles — prevents hitches on spawn
terrainMgr->setUnloadRadius(7);
const int savedLoadRadius = 6;
terrainMgr->setLoadRadius(4); // 9x9=81 tiles — prevents hitches on spawn
terrainMgr->setUnloadRadius(9);
// Trigger tile streaming for surrounding area
terrainMgr->update(*camera, 1.0f);

View file

@ -111,13 +111,13 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) {
_NSGetExecutablePath(nullptr, &bufSize);
std::string exePath(bufSize, '\0');
_NSGetExecutablePath(exePath.data(), &bufSize);
chdir(dirname(exePath.data()));
if (chdir(dirname(exePath.data())) != 0) {}
}
#elif defined(__linux__)
{
char buf[4096];
ssize_t len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
if (len > 0) { buf[len] = '\0'; chdir(dirname(buf)); }
if (len > 0) { buf[len] = '\0'; if (chdir(dirname(buf)) != 0) {} }
}
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,194 @@
#include "rendering/render_graph.hpp"
#include "core/logger.hpp"
#include <algorithm>
#include <unordered_map>
#include <queue>
namespace wowee {
namespace rendering {
void RenderGraph::reset() {
passes_.clear();
executionOrder_.clear();
compiled_ = false;
// Keep resource registry — resources are stable across frames
}
RGResource RenderGraph::registerResource(const std::string& name) {
// Check for duplicate
for (const auto& r : resources_) {
if (r.name == name) return {r.id};
}
uint32_t id = nextResourceId_++;
resources_.push_back({name, id});
return {id};
}
RGResource RenderGraph::findResource(const std::string& name) const {
for (const auto& r : resources_) {
if (r.name == name) return {r.id};
}
return {}; // invalid
}
void RenderGraph::addPass(const std::string& name,
const std::vector<RGResource>& inputs,
const std::vector<RGResource>& outputs,
std::function<void(VkCommandBuffer cmd)> execute) {
RGPass pass;
pass.name = name;
pass.inputs = inputs;
pass.outputs = outputs;
pass.execute = std::move(execute);
pass.enabled = true;
passes_.push_back(std::move(pass));
}
void RenderGraph::setPassEnabled(const std::string& name, bool enabled) {
for (auto& pass : passes_) {
if (pass.name == name) {
pass.enabled = enabled;
return;
}
}
}
void RenderGraph::compile() {
topologicalSort();
compiled_ = true;
}
void RenderGraph::topologicalSort() {
const uint32_t n = static_cast<uint32_t>(passes_.size());
if (n == 0) { executionOrder_.clear(); return; }
// Build adjacency: if pass A outputs resource R and pass B inputs resource R,
// then A must execute before B (edge A → B).
// Map: resource id → index of pass that produces it
std::unordered_map<uint32_t, uint32_t> producer;
for (uint32_t i = 0; i < n; ++i) {
for (const auto& out : passes_[i].outputs) {
producer[out.id] = i;
}
}
// Build in-degree and adjacency list
std::vector<uint32_t> inDegree(n, 0);
std::vector<std::vector<uint32_t>> adj(n);
for (uint32_t i = 0; i < n; ++i) {
for (const auto& inp : passes_[i].inputs) {
auto it = producer.find(inp.id);
if (it != producer.end() && it->second != i) {
adj[it->second].push_back(i);
inDegree[i]++;
}
}
}
// Kahn's algorithm
std::queue<uint32_t> queue;
for (uint32_t i = 0; i < n; ++i) {
if (inDegree[i] == 0) queue.push(i);
}
executionOrder_.clear();
executionOrder_.reserve(n);
while (!queue.empty()) {
uint32_t u = queue.front();
queue.pop();
executionOrder_.push_back(u);
for (uint32_t v : adj[u]) {
if (--inDegree[v] == 0) queue.push(v);
}
}
// If not all passes are in the order, there's a cycle — fall back to insertion order
if (executionOrder_.size() != n) {
LOG_WARNING("RenderGraph: dependency cycle detected, falling back to insertion order");
executionOrder_.clear();
for (uint32_t i = 0; i < n; ++i) executionOrder_.push_back(i);
}
}
void RenderGraph::execute(VkCommandBuffer cmd) {
if (!compiled_) {
LOG_WARNING("RenderGraph::execute called without compile()");
compile();
}
for (uint32_t idx : executionOrder_) {
const auto& pass = passes_[idx];
if (!pass.enabled) continue;
// Insert image barriers declared for this pass
if (!pass.imageBarriers.empty()) {
std::vector<VkImageMemoryBarrier> barriers;
barriers.reserve(pass.imageBarriers.size());
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
for (const auto& b : pass.imageBarriers) {
VkImageMemoryBarrier ib{};
ib.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
ib.oldLayout = b.oldLayout;
ib.newLayout = b.newLayout;
ib.srcAccessMask = b.srcAccess;
ib.dstAccessMask = b.dstAccess;
ib.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ib.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
ib.image = b.image;
ib.subresourceRange = {b.aspectMask, 0, 1, 0, 1};
barriers.push_back(ib);
srcStages |= b.srcStage;
dstStages |= b.dstStage;
}
vkCmdPipelineBarrier(cmd,
srcStages, dstStages,
0,
0, nullptr,
0, nullptr,
static_cast<uint32_t>(barriers.size()), barriers.data());
}
// Insert buffer barriers declared for this pass
if (!pass.bufferBarriers.empty()) {
std::vector<VkBufferMemoryBarrier> barriers;
barriers.reserve(pass.bufferBarriers.size());
VkPipelineStageFlags srcStages = 0;
VkPipelineStageFlags dstStages = 0;
for (const auto& b : pass.bufferBarriers) {
VkBufferMemoryBarrier bb{};
bb.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
bb.srcAccessMask = b.srcAccess;
bb.dstAccessMask = b.dstAccess;
bb.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bb.buffer = b.buffer;
bb.offset = b.offset;
bb.size = b.size;
barriers.push_back(bb);
srcStages |= b.srcStage;
dstStages |= b.dstStage;
}
vkCmdPipelineBarrier(cmd,
srcStages, dstStages,
0,
0, nullptr,
static_cast<uint32_t>(barriers.size()), barriers.data(),
0, nullptr);
}
// Execute the pass
pass.execute(cmd);
}
}
} // namespace rendering
} // namespace wowee

View file

@ -61,6 +61,7 @@
#include "rendering/spell_visual_system.hpp"
#include "rendering/post_process_pipeline.hpp"
#include "rendering/animation_controller.hpp"
#include "rendering/render_graph.hpp"
#include <imgui.h>
#include <imgui_impl_vulkan.h>
#include <glm/gtc/matrix_transform.hpp>
@ -458,7 +459,9 @@ void Renderer::updatePerFrameUBO() {
}
currentFrameData.lightSpaceMatrix = lightSpaceMatrix;
currentFrameData.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, 0.8f, 0.0f, 0.0f);
// Scale shadow bias proportionally to ortho extent to avoid acne at close range / gaps at far range
float shadowBias = 0.8f * (shadowDistance_ / 300.0f);
currentFrameData.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, shadowBias, 0.0f, 0.0f);
// Player water ripple data: pack player XY into shadowParams.zw, ripple strength into fogParams.w
if (cameraController) {
@ -563,6 +566,15 @@ bool Renderer::initialize(core::Window* win) {
postProcessPipeline_ = std::make_unique<PostProcessPipeline>();
postProcessPipeline_->initialize(vkCtx);
// Phase 2.5: Create render graph and register virtual resources
renderGraph_ = std::make_unique<RenderGraph>();
renderGraph_->registerResource("shadow_depth");
renderGraph_->registerResource("reflection_texture");
renderGraph_->registerResource("cull_visibility");
renderGraph_->registerResource("scene_color");
renderGraph_->registerResource("scene_depth");
renderGraph_->registerResource("final_image");
LOG_INFO("Renderer initialized");
return true;
}
@ -674,6 +686,10 @@ void Renderer::shutdown() {
postProcessPipeline_->shutdown();
postProcessPipeline_.reset();
}
// Phase 2.5: Destroy render graph
renderGraph_.reset();
destroyPerFrameResources();
zoneManager.reset();
@ -839,36 +855,19 @@ void Renderer::beginFrame() {
// FSR2 jitter pattern (§4.3 — delegates to PostProcessPipeline)
if (postProcessPipeline_ && camera) postProcessPipeline_->applyJitter(camera.get());
// Compute fresh shadow matrix BEFORE UBO update so shaders get current-frame data.
lightSpaceMatrix = computeLightSpaceMatrix();
// Update per-frame UBO with current camera/lighting state
updatePerFrameUBO();
// --- Off-screen pre-passes (before main render pass) ---
// Minimap composite (renders 3x3 tile grid into 768x768 render target)
if (minimap && minimap->isEnabled() && camera) {
glm::vec3 minimapCenter = camera->getPosition();
if (cameraController && cameraController->isThirdPerson())
minimapCenter = characterPosition;
minimap->compositePass(currentCmd, minimapCenter);
// --- Off-screen pre-passes (Phase 2.5: render graph) ---
// Build frame graph: registers pre-passes as graph nodes with dependencies.
// compile() topologically sorts; execute() runs them with auto barriers.
buildFrameGraph(nullptr);
if (renderGraph_) {
renderGraph_->execute(currentCmd);
}
// World map composite (renders zone tiles into 1024x768 render target)
if (worldMap) {
worldMap->compositePass(currentCmd);
}
// Character preview composite passes
for (auto* preview : activePreviews_) {
if (preview && preview->isModelLoaded()) {
preview->compositePass(currentCmd, vkCtx->getCurrentFrame());
}
}
// Shadow pre-pass (before main render pass)
if (shadowsEnabled && shadowDepthImage[0] != VK_NULL_HANDLE) {
renderShadowPass();
}
// Water reflection pre-pass (renders scene from mirrored camera into 512x512 texture)
renderReflectionPass();
// --- Begin render pass ---
// Select framebuffer: PP off-screen target or swapchain (§4.3 — PostProcessPipeline)
@ -3063,17 +3062,10 @@ void Renderer::renderShadowPass() {
// Shadows render every frame — throttling causes visible flicker on player/NPCs
// Compute and store light space matrix; write to per-frame UBO
lightSpaceMatrix = computeLightSpaceMatrix();
// lightSpaceMatrix was already computed at frame start (before updatePerFrameUBO).
// Zero matrix means character position isn't set yet — skip shadow pass entirely.
if (lightSpaceMatrix == glm::mat4(0.0f)) return;
uint32_t frame = vkCtx->getCurrentFrame();
auto* ubo = reinterpret_cast<GPUPerFrameData*>(perFrameUBOMapped[frame]);
if (ubo) {
ubo->lightSpaceMatrix = lightSpaceMatrix;
ubo->shadowParams.x = shadowsEnabled ? 1.0f : 0.0f;
ubo->shadowParams.y = 0.8f;
}
// Barrier 1: transition this frame's shadow map into writable depth layout.
VkImageMemoryBarrier b1{};
@ -3147,5 +3139,69 @@ void Renderer::renderShadowPass() {
shadowDepthLayout_[frame] = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
// Phase 2.5: Build the per-frame render graph for off-screen pre-passes.
// Declares passes as graph nodes with input/output dependencies.
// compile() performs topological sort; execute() runs them with auto barriers.
void Renderer::buildFrameGraph(game::GameHandler* gameHandler) {
(void)gameHandler;
if (!renderGraph_) return;
renderGraph_->reset();
auto shadowDepth = renderGraph_->findResource("shadow_depth");
auto reflTex = renderGraph_->findResource("reflection_texture");
auto cullVis = renderGraph_->findResource("cull_visibility");
// Minimap composites (no dependencies — standalone off-screen render target)
renderGraph_->addPass("minimap_composite", {}, {},
[this](VkCommandBuffer cmd) {
if (minimap && minimap->isEnabled() && camera) {
glm::vec3 minimapCenter = camera->getPosition();
if (cameraController && cameraController->isThirdPerson())
minimapCenter = characterPosition;
minimap->compositePass(cmd, minimapCenter);
}
});
// World map composite (standalone)
renderGraph_->addPass("worldmap_composite", {}, {},
[this](VkCommandBuffer cmd) {
if (worldMap) worldMap->compositePass(cmd);
});
// Character preview composites (standalone)
renderGraph_->addPass("preview_composite", {}, {},
[this](VkCommandBuffer cmd) {
uint32_t frame = vkCtx->getCurrentFrame();
for (auto* preview : activePreviews_) {
if (preview && preview->isModelLoaded())
preview->compositePass(cmd, frame);
}
});
// Shadow pre-pass → outputs shadow_depth
renderGraph_->addPass("shadow_pass", {}, {shadowDepth},
[this](VkCommandBuffer) {
if (shadowsEnabled && shadowDepthImage[0] != VK_NULL_HANDLE)
renderShadowPass();
});
renderGraph_->setPassEnabled("shadow_pass", shadowsEnabled && shadowDepthImage[0] != VK_NULL_HANDLE);
// Reflection pre-pass → outputs reflection_texture (reads scene, so after shadow)
renderGraph_->addPass("reflection_pass", {shadowDepth}, {reflTex},
[this](VkCommandBuffer) {
renderReflectionPass();
});
// GPU frustum cull compute → outputs cull_visibility
renderGraph_->addPass("compute_cull", {}, {cullVis},
[this](VkCommandBuffer cmd) {
if (m2Renderer && camera)
m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera);
});
renderGraph_->compile();
}
} // namespace rendering
} // namespace wowee

View file

@ -128,7 +128,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
vertexAttribs[3] = { 3, 0, VK_FORMAT_R32G32_SFLOAT,
static_cast<uint32_t>(offsetof(pipeline::TerrainVertex, layerUV)) };
// --- Build fill pipeline ---
// --- Build fill pipeline (base for derivatives — shared state optimization) ---
VkRenderPass mainPass = vkCtx->getImGuiRenderPass();
pipeline = PipelineBuilder()
@ -143,6 +143,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx->getPipelineCache());
if (!pipeline) {
@ -152,7 +153,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
return false;
}
// --- Build wireframe pipeline ---
// --- Build wireframe pipeline (derivative of fill) ---
wireframePipeline = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -165,6 +166,8 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(pipeline)
.build(device, vkCtx->getPipelineCache());
if (!wireframePipeline) {
@ -190,6 +193,64 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL
envSizeMBOrDefault("WOWEE_TERRAIN_TEX_CACHE_MB", 4096) * 1024ull * 1024ull;
LOG_INFO("Terrain texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB");
// Phase 2.2: Allocate mega vertex/index buffers and indirect draw buffer.
// All terrain chunks share these buffers, eliminating per-chunk VB/IB rebinds.
{
VmaAllocator allocator = vkCtx->getAllocator();
// Mega vertex buffer (host-visible for direct write during chunk upload)
VkBufferCreateInfo vbCI{};
vbCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
vbCI.size = static_cast<VkDeviceSize>(MEGA_VB_MAX_VERTS) * sizeof(pipeline::TerrainVertex);
vbCI.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VmaAllocationCreateInfo vbAllocCI{};
vbAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
vbAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo vbInfo{};
if (vmaCreateBuffer(allocator, &vbCI, &vbAllocCI,
&megaVB_, &megaVBAlloc_, &vbInfo) == VK_SUCCESS) {
megaVBMapped_ = vbInfo.pMappedData;
} else {
LOG_WARNING("TerrainRenderer: mega VB allocation failed, per-chunk fallback");
}
// Mega index buffer
VkBufferCreateInfo ibCI{};
ibCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
ibCI.size = static_cast<VkDeviceSize>(MEGA_IB_MAX_INDICES) * sizeof(uint32_t);
ibCI.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
VmaAllocationCreateInfo ibAllocCI{};
ibAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
ibAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo ibInfo{};
if (vmaCreateBuffer(allocator, &ibCI, &ibAllocCI,
&megaIB_, &megaIBAlloc_, &ibInfo) == VK_SUCCESS) {
megaIBMapped_ = ibInfo.pMappedData;
} else {
LOG_WARNING("TerrainRenderer: mega IB allocation failed, per-chunk fallback");
}
// Indirect draw command buffer
VkBufferCreateInfo indCI{};
indCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
indCI.size = MAX_INDIRECT_DRAWS * sizeof(VkDrawIndexedIndirectCommand);
indCI.usage = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
VmaAllocationCreateInfo indAllocCI{};
indAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
indAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo indInfo{};
if (vmaCreateBuffer(allocator, &indCI, &indAllocCI,
&indirectBuffer_, &indirectAlloc_, &indInfo) == VK_SUCCESS) {
indirectMapped_ = indInfo.pMappedData;
} else {
LOG_WARNING("TerrainRenderer: indirect buffer allocation failed");
}
LOG_INFO("Terrain mega buffers: VB=", vbCI.size / (1024*1024), "MB IB=",
ibCI.size / (1024*1024), "MB indirect=",
indCI.size / 1024, "KB");
}
LOG_INFO("Terrain renderer initialized (Vulkan)");
return true;
}
@ -232,7 +293,7 @@ void TerrainRenderer::recreatePipelines() {
VkRenderPass mainPass = vkCtx->getImGuiRenderPass();
// Rebuild fill pipeline
// Rebuild fill pipeline (base for derivatives — shared state optimization)
pipeline = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -245,13 +306,14 @@ void TerrainRenderer::recreatePipelines() {
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx->getPipelineCache());
if (!pipeline) {
LOG_ERROR("TerrainRenderer::recreatePipelines: failed to create fill pipeline");
}
// Rebuild wireframe pipeline
// Rebuild wireframe pipeline (derivative of fill)
wireframePipeline = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -264,6 +326,8 @@ void TerrainRenderer::recreatePipelines() {
.setLayout(pipelineLayout)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(pipeline)
.build(device, vkCtx->getPipelineCache());
if (!wireframePipeline) {
@ -311,6 +375,13 @@ void TerrainRenderer::shutdown() {
if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; }
if (shadowParamsUBO_) { vmaDestroyBuffer(allocator, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; shadowParamsAlloc_ = VK_NULL_HANDLE; }
// Phase 2.2: Destroy mega buffers and indirect draw buffer
if (megaVB_) { vmaDestroyBuffer(allocator, megaVB_, megaVBAlloc_); megaVB_ = VK_NULL_HANDLE; megaVBAlloc_ = VK_NULL_HANDLE; megaVBMapped_ = nullptr; }
if (megaIB_) { vmaDestroyBuffer(allocator, megaIB_, megaIBAlloc_); megaIB_ = VK_NULL_HANDLE; megaIBAlloc_ = VK_NULL_HANDLE; megaIBMapped_ = nullptr; }
if (indirectBuffer_) { vmaDestroyBuffer(allocator, indirectBuffer_, indirectAlloc_); indirectBuffer_ = VK_NULL_HANDLE; indirectAlloc_ = VK_NULL_HANDLE; indirectMapped_ = nullptr; }
megaVBUsed_ = 0;
megaIBUsed_ = 0;
vkCtx = nullptr;
}
@ -537,6 +608,7 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
gpuChunk.worldY = chunk.worldY;
gpuChunk.worldZ = chunk.worldZ;
gpuChunk.indexCount = static_cast<uint32_t>(chunk.indices.size());
gpuChunk.vertexCount = static_cast<uint32_t>(chunk.vertices.size());
VkDeviceSize vbSize = chunk.vertices.size() * sizeof(pipeline::TerrainVertex);
AllocatedBuffer vb = uploadBuffer(*vkCtx, chunk.vertices.data(), vbSize,
@ -550,6 +622,25 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) {
gpuChunk.indexBuffer = ib.buffer;
gpuChunk.indexAlloc = ib.allocation;
// Phase 2.2: Also copy into mega buffers for indirect drawing
uint32_t vertCount = static_cast<uint32_t>(chunk.vertices.size());
uint32_t idxCount = static_cast<uint32_t>(chunk.indices.size());
if (megaVBMapped_ && megaIBMapped_ &&
megaVBUsed_ + vertCount <= MEGA_VB_MAX_VERTS &&
megaIBUsed_ + idxCount <= MEGA_IB_MAX_INDICES) {
// Copy vertices
auto* vbDst = static_cast<pipeline::TerrainVertex*>(megaVBMapped_) + megaVBUsed_;
std::memcpy(vbDst, chunk.vertices.data(), vertCount * sizeof(pipeline::TerrainVertex));
// Copy indices
auto* ibDst = static_cast<uint32_t*>(megaIBMapped_) + megaIBUsed_;
std::memcpy(ibDst, chunk.indices.data(), idxCount * sizeof(uint32_t));
gpuChunk.megaBaseVertex = static_cast<int32_t>(megaVBUsed_);
gpuChunk.megaFirstIndex = megaIBUsed_;
megaVBUsed_ += vertCount;
megaIBUsed_ += idxCount;
}
return gpuChunk;
}
@ -789,6 +880,15 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c
renderedChunks = 0;
culledChunks = 0;
// Phase 2.2: Use mega VB + IB when available.
// Bind mega buffers once, then use direct draws with base vertex/index offsets.
const bool useMegaBuffers = (megaVB_ && megaIB_);
if (useMegaBuffers) {
VkDeviceSize megaOffset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset);
vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32);
}
for (const auto& chunk : chunks) {
if (!chunk.isValid() || !chunk.materialSet) continue;
@ -808,11 +908,17 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout,
1, 1, &chunk.materialSet, 0, nullptr);
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
if (useMegaBuffers && chunk.megaBaseVertex >= 0) {
// Direct draw from mega buffer — single VB/IB already bound
vkCmdDrawIndexed(cmd, chunk.indexCount, 1,
chunk.megaFirstIndex, chunk.megaBaseVertex, 0);
} else {
// Fallback: per-chunk VB/IB bind + direct draw
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
}
renderedChunks++;
}
@ -986,6 +1092,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp
vkCmdPushConstants(cmd, shadowPipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT,
0, 128, &push);
// Phase 2.2: Bind mega buffers once for shadow pass (same as opaque)
const bool useMegaShadow = (megaVB_ && megaIB_);
if (useMegaShadow) {
VkDeviceSize megaOffset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset);
vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32);
}
for (const auto& chunk : chunks) {
if (!chunk.isValid()) continue;
@ -995,10 +1109,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp
float combinedRadius = shadowRadius + chunk.boundingSphereRadius;
if (distSq > combinedRadius * combinedRadius) continue;
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT16);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
if (useMegaShadow && chunk.megaBaseVertex >= 0) {
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, chunk.megaFirstIndex, chunk.megaBaseVertex, 0);
} else {
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0);
}
}
}

View file

@ -334,7 +334,7 @@ bool VkContext::selectPhysicalDevice() {
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(physicalDevice, &props);
uint32_t apiVersion = props.apiVersion;
(void)props.apiVersion; // Available if needed for version checks
gpuVendorId_ = props.vendorID;
std::strncpy(gpuName_, props.deviceName, sizeof(gpuName_) - 1);
gpuName_[sizeof(gpuName_) - 1] = '\0';

View file

@ -111,6 +111,17 @@ PipelineBuilder& PipelineBuilder::setDynamicStates(const std::vector<VkDynamicSt
return *this;
}
// Pipeline derivatives — hint driver to share compiled state between similar pipelines
PipelineBuilder& PipelineBuilder::setFlags(VkPipelineCreateFlags flags) {
flags_ = flags;
return *this;
}
PipelineBuilder& PipelineBuilder::setBasePipeline(VkPipeline basePipeline) {
basePipelineHandle_ = basePipeline;
return *this;
}
VkPipeline PipelineBuilder::build(VkDevice device, VkPipelineCache cache) const {
// Vertex input
VkPipelineVertexInputStateCreateInfo vertexInput{};
@ -188,6 +199,9 @@ VkPipeline PipelineBuilder::build(VkDevice device, VkPipelineCache cache) const
pipelineInfo.pColorBlendState = colorBlendAttachments_.empty() ? nullptr : &colorBlending;
pipelineInfo.pDynamicState = dynamicStates_.empty() ? nullptr : &dynamicState;
pipelineInfo.layout = pipelineLayout_;
pipelineInfo.flags = flags_;
pipelineInfo.basePipelineHandle = basePipelineHandle_;
pipelineInfo.basePipelineIndex = -1;
pipelineInfo.renderPass = renderPass_;
pipelineInfo.subpass = subpass_;

View file

@ -169,7 +169,7 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
vertexAttribs[4] = { 4, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
static_cast<uint32_t>(offsetof(WMOVertexData, tangent)) };
// --- Build opaque pipeline ---
// --- Build opaque pipeline (base for derivatives — shared state optimization) ---
VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();
opaquePipeline_ = PipelineBuilder()
@ -184,6 +184,7 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx_->getPipelineCache());
if (!opaquePipeline_) {
@ -193,7 +194,7 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
return false;
}
// --- Build transparent pipeline ---
// --- Build transparent pipeline (derivative of opaque) ---
transparentPipeline_ = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -206,13 +207,15 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
if (!transparentPipeline_) {
core::Logger::getInstance().warning("WMORenderer: transparent pipeline not available");
}
// --- Build glass pipeline (alpha blend WITH depth write for windows) ---
// --- Build glass pipeline (derivative — alpha blend WITH depth write for windows) ---
glassPipeline_ = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -225,9 +228,11 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
// --- Build wireframe pipeline ---
// --- Build wireframe pipeline (derivative of opaque) ---
wireframePipeline_ = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -240,6 +245,8 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
if (!wireframePipeline_) {
@ -1434,7 +1441,7 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
if (doDistanceCull) {
glm::vec3 closestPoint = glm::clamp(camPos, gMin, gMax);
float distSq = glm::dot(closestPoint - camPos, closestPoint - camPos);
if (distSq > 250000.0f) {
if (distSq > 1440000.0f) { // 1200 units — matches terrain view distance
result.distanceCulled++;
continue;
}
@ -3733,6 +3740,7 @@ void WMORenderer::recreatePipelines() {
VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();
// Pipeline derivatives — opaque is the base, others derive for shared state optimization
opaquePipeline_ = PipelineBuilder()
.setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
@ -3745,6 +3753,7 @@ void WMORenderer::recreatePipelines() {
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT)
.build(device, vkCtx_->getPipelineCache());
transparentPipeline_ = PipelineBuilder()
@ -3759,6 +3768,8 @@ void WMORenderer::recreatePipelines() {
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
glassPipeline_ = PipelineBuilder()
@ -3773,6 +3784,8 @@ void WMORenderer::recreatePipelines() {
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
wireframePipeline_ = PipelineBuilder()
@ -3787,6 +3800,8 @@ void WMORenderer::recreatePipelines() {
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR })
.setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT)
.setBasePipeline(opaquePipeline_)
.build(device, vkCtx_->getPipelineCache());
vertShader.destroy();

View file

@ -537,20 +537,6 @@ static std::vector<ArchiveDesc> discoverArchives(const std::string& mpqDir,
return result;
}
// Read a text file into a vector of lines (for external listfile loading)
static std::vector<std::string> readLines(const std::string& path) {
std::vector<std::string> lines;
std::ifstream f(path);
if (!f) return lines;
std::string line;
while (std::getline(f, line)) {
// Trim trailing \r
if (!line.empty() && line.back() == '\r') line.pop_back();
if (!line.empty()) lines.push_back(std::move(line));
}
return lines;
}
// Extract the (listfile) from an MPQ archive into a set of filenames
static void extractInternalListfile(HANDLE hMpq, std::set<std::string>& out) {
HANDLE hFile = nullptr;
@ -595,14 +581,9 @@ bool Extractor::enumerateFiles(const Options& opts,
std::cout << "Found " << archives.size() << " MPQ archives\n";
// Load external listfile into memory once (avoids repeated file I/O)
std::vector<std::string> externalEntries;
std::vector<const char*> externalPtrs;
if (!opts.listFile.empty()) {
externalEntries = readLines(opts.listFile);
externalPtrs.reserve(externalEntries.size());
for (const auto& e : externalEntries) externalPtrs.push_back(e.c_str());
std::cout << " Loaded external listfile: " << externalEntries.size() << " entries\n";
const bool haveExternalListFile = !opts.listFile.empty();
if (haveExternalListFile) {
std::cout << " Using external listfile: " << opts.listFile << "\n";
}
const auto wantedDbcs = buildWantedDbcSet(opts);
@ -616,12 +597,11 @@ bool Extractor::enumerateFiles(const Options& opts,
continue;
}
// Inject external listfile entries into archive's in-memory name table.
// SFileAddListFileEntries is fast — it only hashes the names against the
// archive's hash table, no file I/O involved.
if (!externalPtrs.empty()) {
SFileAddListFileEntries(hMpq, externalPtrs.data(),
static_cast<DWORD>(externalPtrs.size()));
// Inject external listfile into archive's in-memory name table.
// SFileAddListFile reads the file and hashes names against the
// archive's hash table.
if (haveExternalListFile) {
SFileAddListFile(hMpq, opts.listFile.c_str());
}
if (opts.verbose) {