Kelsidavis-WoWee/src/rendering/m2_renderer.cpp

1642 lines
78 KiB
C++
Raw Normal View History

#include "rendering/m2_renderer.hpp"
#include "rendering/m2_renderer_internal.h"
#include "rendering/m2_model_classifier.hpp"
#include "rendering/vk_context.hpp"
#include "rendering/vk_buffer.hpp"
#include "rendering/vk_texture.hpp"
#include "rendering/vk_pipeline.hpp"
#include "rendering/vk_shader.hpp"
#include "rendering/vk_utils.hpp"
#include "rendering/vk_frame_data.hpp"
#include "rendering/camera.hpp"
#include "rendering/frustum.hpp"
#include "pipeline/asset_manager.hpp"
#include "pipeline/blp_loader.hpp"
#include "core/logger.hpp"
#include "core/profiler.hpp"
#include <chrono>
#include <cctype>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <glm/gtx/quaternion.hpp>
#include <unordered_set>
2026-02-07 19:20:37 -08:00
#include <functional>
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <random>
#include <limits>
#include <future>
#include <thread>
namespace wowee {
namespace rendering {
namespace {
bool envFlagEnabled(const char* key, bool defaultValue) {
const char* raw = std::getenv(key);
if (!raw || !*raw) return defaultValue;
std::string v(raw);
std::transform(v.begin(), v.end(), v.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return !(v == "0" || v == "false" || v == "off" || v == "no");
}
} // namespace
void M2Instance::updateModelMatrix() {
modelMatrix = glm::mat4(1.0f);
modelMatrix = glm::translate(modelMatrix, position);
// Rotation in radians
modelMatrix = glm::rotate(modelMatrix, rotation.x, glm::vec3(1.0f, 0.0f, 0.0f));
modelMatrix = glm::rotate(modelMatrix, rotation.y, glm::vec3(0.0f, 1.0f, 0.0f));
modelMatrix = glm::rotate(modelMatrix, rotation.z, glm::vec3(0.0f, 0.0f, 1.0f));
modelMatrix = glm::scale(modelMatrix, glm::vec3(scale));
invModelMatrix = glm::inverse(modelMatrix);
}
M2Renderer::M2Renderer() {
}
M2Renderer::~M2Renderer() {
shutdown();
}
bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout,
pipeline::AssetManager* assets) {
if (initialized_) { assetManager = assets; return true; }
vkCtx_ = ctx;
assetManager = assets;
const unsigned hc = std::thread::hardware_concurrency();
const size_t availableCores = (hc > 1u) ? static_cast<size_t>(hc - 1u) : 1ull;
// Keep headroom for other frame tasks: M2 gets about half of non-main cores by default.
const size_t defaultAnimThreads = std::max<size_t>(1, availableCores / 2);
numAnimThreads_ = static_cast<uint32_t>(std::max<size_t>(
1, envSizeOrDefault("WOWEE_M2_ANIM_THREADS", defaultAnimThreads)));
LOG_INFO("Initializing M2 renderer (Vulkan, ", numAnimThreads_, " anim threads)...");
VkDevice device = vkCtx_->getDevice();
// --- Descriptor set layouts ---
// Material set layout (set 1): binding 0 = sampler2D, binding 2 = M2Material UBO
// (M2Params moved to push constants alongside model matrix)
{
VkDescriptorSetLayoutBinding bindings[2] = {};
bindings[0].binding = 0;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[1].binding = 2;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[1].descriptorCount = 1;
bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
ci.bindingCount = 2;
ci.pBindings = bindings;
vkCreateDescriptorSetLayout(device, &ci, nullptr, &materialSetLayout_);
}
// Bone set layout (set 2): binding 0 = STORAGE_BUFFER (bone matrices)
{
VkDescriptorSetLayoutBinding binding{};
binding.binding = 0;
binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
binding.descriptorCount = 1;
binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
ci.bindingCount = 1;
ci.pBindings = &binding;
vkCreateDescriptorSetLayout(device, &ci, nullptr, &boneSetLayout_);
}
feat(animation): decompose AnimationController into FSM-based architecture Replace the 2,200-line monolithic AnimationController (goto-driven, single class, untestable) with a composed FSM architecture per refactor.md. New subsystem (src/rendering/animation/ — 16 headers, 10 sources): - CharacterAnimator: FSM composer implementing ICharacterAnimator - LocomotionFSM: idle/walk/run/sprint/jump/swim/strafe - CombatFSM: melee/ranged/spell cast/stun/hit reaction/charge - ActivityFSM: emote/loot/sit-down/sitting/sit-up - MountFSM: idle/run/flight/taxi/fidget/rear-up (per-instance RNG) - AnimCapabilitySet + AnimCapabilityProbe: probe once at model load, eliminate per-frame hasAnimation() linear search - AnimationManager: registry of CharacterAnimator by GUID - EmoteRegistry: DBC-backed emote command → animId singleton - FootstepDriver, SfxStateDriver: extracted from AnimationController animation_ids.hpp/.cpp moved to animation/ subdirectory (452 named constants); all include paths updated. AnimationController retained as thin adapter (~400 LOC): collects FrameInput, delegates to CharacterAnimator, applies AnimOutput. Priority order: Mount > Stun > HitReaction > Spell > Charge > Melee/Ranged > CombatIdle > Emote > Loot > Sit > Locomotion. STAY_IN_STATE policy when all FSMs return valid=false. Bugs fixed: - Remove static mt19937 in mount fidget (shared state across all mounted units) — replaced with per-instance seeded RNG - Remove goto from mounted animation branch (skipped init) - Remove per-frame hasAnimation() calls (now one probe at load) - Fix VK_INDEX_TYPE_UINT16 → UINT32 in shadow pass Tests (4 new suites, all ASAN+UBSan clean): - test_locomotion_fsm: 167 assertions - test_combat_fsm: 125 cases - test_activity_fsm: 112 cases - test_anim_capability: 56 cases docs/ANIMATION_SYSTEM.md added (architecture reference).
2026-04-05 12:27:35 +03:00
// Instance data set layout (set 3): binding 0 = STORAGE_BUFFER (per-instance data)
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
{
VkDescriptorSetLayoutBinding binding{};
binding.binding = 0;
binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
binding.descriptorCount = 1;
binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
ci.bindingCount = 1;
ci.pBindings = &binding;
vkCreateDescriptorSetLayout(device, &ci, nullptr, &instanceSetLayout_);
}
// Particle texture set layout (set 1 for particles): binding 0 = sampler2D
{
VkDescriptorSetLayoutBinding binding{};
binding.binding = 0;
binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
binding.descriptorCount = 1;
binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
ci.bindingCount = 1;
ci.pBindings = &binding;
vkCreateDescriptorSetLayout(device, &ci, nullptr, &particleTexLayout_);
}
// --- Descriptor pools ---
{
VkDescriptorPoolSize sizes[] = {
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_MATERIAL_SETS + 256},
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, MAX_MATERIAL_SETS + 256},
};
VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
ci.maxSets = MAX_MATERIAL_SETS + 256;
ci.poolSizeCount = 2;
ci.pPoolSizes = sizes;
ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
vkCreateDescriptorPool(device, &ci, nullptr, &materialDescPool_);
}
{
VkDescriptorPoolSize sizes[] = {
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BONE_SETS},
};
VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
ci.maxSets = MAX_BONE_SETS;
ci.poolSizeCount = 1;
ci.pPoolSizes = sizes;
ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
vkCreateDescriptorPool(device, &ci, nullptr, &boneDescPool_);
}
2026-03-22 21:47:12 +03:00
// Create a small identity-bone SSBO + descriptor set so that non-animated
// draws always have a valid set 2 bound. The Intel ANV driver segfaults
// on vkCmdDrawIndexed when a declared descriptor set slot is unbound.
{
// Single identity matrix (bone 0 = identity)
glm::mat4 identity(1.0f);
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = sizeof(glm::mat4);
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&dummyBoneBuffer_, &dummyBoneAlloc_, &allocInfo);
if (allocInfo.pMappedData) {
memcpy(allocInfo.pMappedData, &identity, sizeof(identity));
}
dummyBoneSet_ = allocateBoneSet();
if (dummyBoneSet_) {
VkDescriptorBufferInfo bufInfo{};
bufInfo.buffer = dummyBoneBuffer_;
bufInfo.offset = 0;
bufInfo.range = sizeof(glm::mat4);
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = dummyBoneSet_;
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write.pBufferInfo = &bufInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
// Mega bone SSBO — consolidates all animated instance bones into one buffer per frame.
// Slot 0 = identity matrix (for non-animated instances), slots 1..N = animated instances.
{
const VkDeviceSize megaSize = MEGA_BONE_MAX_INSTANCES * MAX_BONES_PER_INSTANCE * sizeof(glm::mat4);
glm::mat4 identity(1.0f);
for (int i = 0; i < 2; i++) {
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = megaSize;
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&megaBoneBuffer_[i], &megaBoneAlloc_[i], &allocInfo);
megaBoneMapped_[i] = allocInfo.pMappedData;
// Slot 0: identity matrix (for non-animated instances)
if (megaBoneMapped_[i]) {
memcpy(megaBoneMapped_[i], &identity, sizeof(identity));
}
megaBoneSet_[i] = allocateBoneSet();
if (megaBoneSet_[i]) {
VkDescriptorBufferInfo bufInfo{};
bufInfo.buffer = megaBoneBuffer_[i];
bufInfo.offset = 0;
bufInfo.range = megaSize;
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = megaBoneSet_[i];
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write.pBufferInfo = &bufInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
}
feat(animation): decompose AnimationController into FSM-based architecture Replace the 2,200-line monolithic AnimationController (goto-driven, single class, untestable) with a composed FSM architecture per refactor.md. New subsystem (src/rendering/animation/ — 16 headers, 10 sources): - CharacterAnimator: FSM composer implementing ICharacterAnimator - LocomotionFSM: idle/walk/run/sprint/jump/swim/strafe - CombatFSM: melee/ranged/spell cast/stun/hit reaction/charge - ActivityFSM: emote/loot/sit-down/sitting/sit-up - MountFSM: idle/run/flight/taxi/fidget/rear-up (per-instance RNG) - AnimCapabilitySet + AnimCapabilityProbe: probe once at model load, eliminate per-frame hasAnimation() linear search - AnimationManager: registry of CharacterAnimator by GUID - EmoteRegistry: DBC-backed emote command → animId singleton - FootstepDriver, SfxStateDriver: extracted from AnimationController animation_ids.hpp/.cpp moved to animation/ subdirectory (452 named constants); all include paths updated. AnimationController retained as thin adapter (~400 LOC): collects FrameInput, delegates to CharacterAnimator, applies AnimOutput. Priority order: Mount > Stun > HitReaction > Spell > Charge > Melee/Ranged > CombatIdle > Emote > Loot > Sit > Locomotion. STAY_IN_STATE policy when all FSMs return valid=false. Bugs fixed: - Remove static mt19937 in mount fidget (shared state across all mounted units) — replaced with per-instance seeded RNG - Remove goto from mounted animation branch (skipped init) - Remove per-frame hasAnimation() calls (now one probe at load) - Fix VK_INDEX_TYPE_UINT16 → UINT32 in shadow pass Tests (4 new suites, all ASAN+UBSan clean): - test_locomotion_fsm: 167 assertions - test_combat_fsm: 125 cases - test_activity_fsm: 112 cases - test_anim_capability: 56 cases docs/ANIMATION_SYSTEM.md added (architecture reference).
2026-04-05 12:27:35 +03:00
// Instance data SSBO — per-frame buffer holding per-instance transforms, fade, bones.
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
// Shader reads instanceData[push.instanceDataOffset + gl_InstanceIndex].
{
static_assert(sizeof(M2InstanceGPU) == 96, "M2InstanceGPU must be 96 bytes (std430)");
const VkDeviceSize instBufSize = MAX_INSTANCE_DATA * sizeof(M2InstanceGPU);
// Descriptor pool for 2 sets (double-buffered)
VkDescriptorPoolSize poolSize{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2};
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
poolCi.maxSets = 2;
poolCi.poolSizeCount = 1;
poolCi.pPoolSizes = &poolSize;
vkCreateDescriptorPool(device, &poolCi, nullptr, &instanceDescPool_);
for (int i = 0; i < 2; i++) {
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = instBufSize;
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&instanceBuffer_[i], &instanceAlloc_[i], &allocInfo);
instanceMapped_[i] = allocInfo.pMappedData;
VkDescriptorSetAllocateInfo setAi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
setAi.descriptorPool = instanceDescPool_;
setAi.descriptorSetCount = 1;
setAi.pSetLayouts = &instanceSetLayout_;
vkAllocateDescriptorSets(device, &setAi, &instanceSet_[i]);
VkDescriptorBufferInfo bufInfo{};
bufInfo.buffer = instanceBuffer_[i];
bufInfo.offset = 0;
bufInfo.range = instBufSize;
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = instanceSet_[i];
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write.pBufferInfo = &bufInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
feat(animation): decompose AnimationController into FSM-based architecture Replace the 2,200-line monolithic AnimationController (goto-driven, single class, untestable) with a composed FSM architecture per refactor.md. New subsystem (src/rendering/animation/ — 16 headers, 10 sources): - CharacterAnimator: FSM composer implementing ICharacterAnimator - LocomotionFSM: idle/walk/run/sprint/jump/swim/strafe - CombatFSM: melee/ranged/spell cast/stun/hit reaction/charge - ActivityFSM: emote/loot/sit-down/sitting/sit-up - MountFSM: idle/run/flight/taxi/fidget/rear-up (per-instance RNG) - AnimCapabilitySet + AnimCapabilityProbe: probe once at model load, eliminate per-frame hasAnimation() linear search - AnimationManager: registry of CharacterAnimator by GUID - EmoteRegistry: DBC-backed emote command → animId singleton - FootstepDriver, SfxStateDriver: extracted from AnimationController animation_ids.hpp/.cpp moved to animation/ subdirectory (452 named constants); all include paths updated. AnimationController retained as thin adapter (~400 LOC): collects FrameInput, delegates to CharacterAnimator, applies AnimOutput. Priority order: Mount > Stun > HitReaction > Spell > Charge > Melee/Ranged > CombatIdle > Emote > Loot > Sit > Locomotion. STAY_IN_STATE policy when all FSMs return valid=false. Bugs fixed: - Remove static mt19937 in mount fidget (shared state across all mounted units) — replaced with per-instance seeded RNG - Remove goto from mounted animation branch (skipped init) - Remove per-frame hasAnimation() calls (now one probe at load) - Fix VK_INDEX_TYPE_UINT16 → UINT32 in shadow pass Tests (4 new suites, all ASAN+UBSan clean): - test_locomotion_fsm: 167 assertions - test_combat_fsm: 125 cases - test_activity_fsm: 112 cases - test_anim_capability: 56 cases docs/ANIMATION_SYSTEM.md added (architecture reference).
2026-04-05 12:27:35 +03:00
// GPU frustum culling — compute pipeline, buffers, descriptors.
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
// Compute shader tests each instance bounding sphere against 6 frustum planes + distance.
// Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build.
{
static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)");
static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)");
// Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output)
VkDescriptorSetLayoutBinding bindings[3] = {};
bindings[0].binding = 0;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[1].binding = 1;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[1].descriptorCount = 1;
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[2].binding = 2;
bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[2].descriptorCount = 1;
bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
layoutCi.bindingCount = 3;
layoutCi.pBindings = bindings;
vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &cullSetLayout_);
// Pipeline layout (no push constants — everything via UBO)
VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
plCi.setLayoutCount = 1;
plCi.pSetLayouts = &cullSetLayout_;
vkCreatePipelineLayout(device, &plCi, nullptr, &cullPipelineLayout_);
// Load compute shader
rendering::VkShaderModule cullComp;
if (!cullComp.loadFromFile(device, "assets/shaders/m2_cull.comp.spv")) {
LOG_ERROR("M2Renderer: failed to load m2_cull.comp.spv — GPU culling disabled");
} else {
VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
cpCi.stage = cullComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
cpCi.layout = cullPipelineLayout_;
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &cpCi, nullptr, &cullPipeline_) != VK_SUCCESS) {
LOG_ERROR("M2Renderer: failed to create cull compute pipeline");
cullPipeline_ = VK_NULL_HANDLE;
}
cullComp.destroy();
}
// Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO)
VkDescriptorPoolSize poolSizes[2] = {};
poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2};
poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 4}; // 2 input + 2 output
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
poolCi.maxSets = 2;
poolCi.poolSizeCount = 2;
poolCi.pPoolSizes = poolSizes;
vkCreateDescriptorPool(device, &poolCi, nullptr, &cullDescPool_);
const VkDeviceSize uniformSize = sizeof(CullUniformsGPU);
const VkDeviceSize inputSize = MAX_CULL_INSTANCES * sizeof(CullInstanceGPU);
const VkDeviceSize outputSize = MAX_CULL_INSTANCES * sizeof(uint32_t);
for (int i = 0; i < 2; i++) {
// Uniform buffer (frustum planes + camera)
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = uniformSize;
bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo ai{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&cullUniformBuffer_[i], &cullUniformAlloc_[i], &ai);
cullUniformMapped_[i] = ai.pMappedData;
}
// Input SSBO (per-instance cull data)
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = inputSize;
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo ai{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&cullInputBuffer_[i], &cullInputAlloc_[i], &ai);
cullInputMapped_[i] = ai.pMappedData;
}
// Output SSBO (visibility flags — GPU writes, CPU reads)
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = outputSize;
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo ai{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&cullOutputBuffer_[i], &cullOutputAlloc_[i], &ai);
cullOutputMapped_[i] = ai.pMappedData;
}
// Allocate and write descriptor set
VkDescriptorSetAllocateInfo setAi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
setAi.descriptorPool = cullDescPool_;
setAi.descriptorSetCount = 1;
setAi.pSetLayouts = &cullSetLayout_;
vkAllocateDescriptorSets(device, &setAi, &cullSet_[i]);
VkDescriptorBufferInfo uboInfo{cullUniformBuffer_[i], 0, uniformSize};
VkDescriptorBufferInfo inputInfo{cullInputBuffer_[i], 0, inputSize};
VkDescriptorBufferInfo outputInfo{cullOutputBuffer_[i], 0, outputSize};
VkWriteDescriptorSet writes[3] = {};
writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writes[0].dstSet = cullSet_[i];
writes[0].dstBinding = 0;
writes[0].descriptorCount = 1;
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
writes[0].pBufferInfo = &uboInfo;
writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writes[1].dstSet = cullSet_[i];
writes[1].dstBinding = 1;
writes[1].descriptorCount = 1;
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[1].pBufferInfo = &inputInfo;
writes[2] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writes[2].dstSet = cullSet_[i];
writes[2].dstBinding = 2;
writes[2].descriptorCount = 1;
writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[2].pBufferInfo = &outputInfo;
vkUpdateDescriptorSets(device, 3, writes, 0, nullptr);
}
}
// --- Pipeline layouts ---
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
// Main M2 pipeline layout: set 0 = perFrame, set 1 = material, set 2 = bones, set 3 = instances
// Push constant: int texCoordSet + int isFoliage + int instanceDataOffset (12 bytes)
{
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
VkDescriptorSetLayout setLayouts[] = {perFrameLayout, materialSetLayout_, boneSetLayout_, instanceSetLayout_};
VkPushConstantRange pushRange{};
pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
pushRange.offset = 0;
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
pushRange.size = 12; // int texCoordSet + int isFoliage + int instanceDataOffset
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
ci.setLayoutCount = 4;
ci.pSetLayouts = setLayouts;
ci.pushConstantRangeCount = 1;
ci.pPushConstantRanges = &pushRange;
vkCreatePipelineLayout(device, &ci, nullptr, &pipelineLayout_);
}
// Particle pipeline layout: set 0 = perFrame, set 1 = particleTex
// Push constant: vec2 tileCount + int alphaKey (12 bytes)
{
VkDescriptorSetLayout setLayouts[] = {perFrameLayout, particleTexLayout_};
VkPushConstantRange pushRange{};
pushRange.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
pushRange.offset = 0;
pushRange.size = 12; // vec2 + int
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
ci.setLayoutCount = 2;
ci.pSetLayouts = setLayouts;
ci.pushConstantRangeCount = 1;
ci.pPushConstantRanges = &pushRange;
vkCreatePipelineLayout(device, &ci, nullptr, &particlePipelineLayout_);
}
// Smoke pipeline layout: set 0 = perFrame
// Push constant: float screenHeight (4 bytes)
{
VkDescriptorSetLayout setLayouts[] = {perFrameLayout};
VkPushConstantRange pushRange{};
pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
pushRange.offset = 0;
pushRange.size = 4;
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
ci.setLayoutCount = 1;
ci.pSetLayouts = setLayouts;
ci.pushConstantRangeCount = 1;
ci.pPushConstantRanges = &pushRange;
vkCreatePipelineLayout(device, &ci, nullptr, &smokePipelineLayout_);
}
// --- Load shaders ---
rendering::VkShaderModule m2Vert, m2Frag;
rendering::VkShaderModule particleVert, particleFrag;
rendering::VkShaderModule smokeVert, smokeFrag;
(void)m2Vert.loadFromFile(device, "assets/shaders/m2.vert.spv");
(void)m2Frag.loadFromFile(device, "assets/shaders/m2.frag.spv");
(void)particleVert.loadFromFile(device, "assets/shaders/m2_particle.vert.spv");
(void)particleFrag.loadFromFile(device, "assets/shaders/m2_particle.frag.spv");
(void)smokeVert.loadFromFile(device, "assets/shaders/m2_smoke.vert.spv");
(void)smokeFrag.loadFromFile(device, "assets/shaders/m2_smoke.frag.spv");
if (!m2Vert.isValid() || !m2Frag.isValid()) {
LOG_ERROR("M2: Missing required shaders, cannot initialize");
return false;
}
VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();
// --- Build M2 model pipelines ---
// Vertex input: 18 floats = 72 bytes stride
// loc 0: vec3 pos (0), loc 1: vec3 normal (12), loc 2: vec2 uv0 (24),
// loc 5: vec2 uv1 (32), loc 3: vec4 boneWeights (40), loc 4: vec4 boneIndices (56)
VkVertexInputBindingDescription m2Binding{};
m2Binding.binding = 0;
m2Binding.stride = 18 * sizeof(float);
m2Binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
std::vector<VkVertexInputAttributeDescription> m2Attrs = {
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // position
{1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)}, // normal
{2, 0, VK_FORMAT_R32G32_SFLOAT, 6 * sizeof(float)}, // texCoord0
{5, 0, VK_FORMAT_R32G32_SFLOAT, 8 * sizeof(float)}, // texCoord1
{3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 10 * sizeof(float)}, // boneWeights
{4, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)}, // boneIndices (float)
};
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
// Pipeline derivatives — opaque is the base, others derive from it for shared state optimization
auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite,
VkPipelineCreateFlags flags = 0, VkPipeline basePipeline = VK_NULL_HANDLE) -> VkPipeline {
return PipelineBuilder()
.setShaders(m2Vert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
m2Frag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
.setVertexInput({m2Binding}, m2Attrs)
.setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
.setDepthTest(true, depthWrite, VK_COMPARE_OP_LESS_OR_EQUAL)
.setColorBlendAttachment(blendState)
.setMultisample(vkCtx_->getMsaaSamples())
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
.setFlags(flags)
.setBasePipeline(basePipeline)
.build(device, vkCtx_->getPipelineCache());
};
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true,
VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT);
alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true,
VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_);
alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false,
VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_);
additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false,
VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_);
// --- Build particle pipelines ---
if (particleVert.isValid() && particleFrag.isValid()) {
VkVertexInputBindingDescription pBind{};
pBind.binding = 0;
pBind.stride = 9 * sizeof(float); // pos3 + color4 + size1 + tile1
pBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
std::vector<VkVertexInputAttributeDescription> pAttrs = {
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // position
{1, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 3 * sizeof(float)}, // color
{2, 0, VK_FORMAT_R32_SFLOAT, 7 * sizeof(float)}, // size
{3, 0, VK_FORMAT_R32_SFLOAT, 8 * sizeof(float)}, // tile
};
auto buildParticlePipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
return PipelineBuilder()
.setShaders(particleVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
particleFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
.setVertexInput({pBind}, pAttrs)
.setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
.setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
.setColorBlendAttachment(blend)
.setMultisample(vkCtx_->getMsaaSamples())
.setLayout(particlePipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
.build(device, vkCtx_->getPipelineCache());
};
particlePipeline_ = buildParticlePipeline(PipelineBuilder::blendAlpha());
particleAdditivePipeline_ = buildParticlePipeline(PipelineBuilder::blendAdditive());
}
// --- Build smoke pipeline ---
if (smokeVert.isValid() && smokeFrag.isValid()) {
VkVertexInputBindingDescription sBind{};
sBind.binding = 0;
sBind.stride = 6 * sizeof(float); // pos3 + lifeRatio1 + size1 + isSpark1
sBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
std::vector<VkVertexInputAttributeDescription> sAttrs = {
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // position
{1, 0, VK_FORMAT_R32_SFLOAT, 3 * sizeof(float)}, // lifeRatio
{2, 0, VK_FORMAT_R32_SFLOAT, 4 * sizeof(float)}, // size
{3, 0, VK_FORMAT_R32_SFLOAT, 5 * sizeof(float)}, // isSpark
};
smokePipeline_ = PipelineBuilder()
.setShaders(smokeVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
smokeFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
.setVertexInput({sBind}, sAttrs)
.setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
.setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
.setColorBlendAttachment(PipelineBuilder::blendAlpha())
.setMultisample(vkCtx_->getMsaaSamples())
.setLayout(smokePipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
.build(device, vkCtx_->getPipelineCache());
}
// --- Build ribbon pipelines ---
// Vertex format: pos(3) + color(3) + alpha(1) + uv(2) = 9 floats = 36 bytes
{
rendering::VkShaderModule ribVert, ribFrag;
(void)ribVert.loadFromFile(device, "assets/shaders/m2_ribbon.vert.spv");
(void)ribFrag.loadFromFile(device, "assets/shaders/m2_ribbon.frag.spv");
if (ribVert.isValid() && ribFrag.isValid()) {
// Reuse particleTexLayout_ for set 1 (single texture sampler)
VkDescriptorSetLayout ribLayouts[] = {perFrameLayout, particleTexLayout_};
VkPipelineLayoutCreateInfo lci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
lci.setLayoutCount = 2;
lci.pSetLayouts = ribLayouts;
vkCreatePipelineLayout(device, &lci, nullptr, &ribbonPipelineLayout_);
VkVertexInputBindingDescription rBind{};
rBind.binding = 0;
rBind.stride = 9 * sizeof(float);
rBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
std::vector<VkVertexInputAttributeDescription> rAttrs = {
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // pos
{1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)}, // color
{2, 0, VK_FORMAT_R32_SFLOAT, 6 * sizeof(float)}, // alpha
{3, 0, VK_FORMAT_R32G32_SFLOAT, 7 * sizeof(float)}, // uv
};
auto buildRibbonPipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
return PipelineBuilder()
.setShaders(ribVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
ribFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
.setVertexInput({rBind}, rAttrs)
.setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP)
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
.setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
.setColorBlendAttachment(blend)
.setMultisample(vkCtx_->getMsaaSamples())
.setLayout(ribbonPipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
.build(device, vkCtx_->getPipelineCache());
};
ribbonPipeline_ = buildRibbonPipeline(PipelineBuilder::blendAlpha());
ribbonAdditivePipeline_ = buildRibbonPipeline(PipelineBuilder::blendAdditive());
}
ribVert.destroy(); ribFrag.destroy();
}
// Clean up shader modules
m2Vert.destroy(); m2Frag.destroy();
particleVert.destroy(); particleFrag.destroy();
smokeVert.destroy(); smokeFrag.destroy();
// --- Create dynamic particle buffers (mapped for CPU writes) ---
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo{};
// Smoke particle buffer
bci.size = MAX_SMOKE_PARTICLES * 6 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &smokeVB_, &smokeVBAlloc_, &allocInfo);
smokeVBMapped_ = allocInfo.pMappedData;
// M2 particle buffer
bci.size = MAX_M2_PARTICLES * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &m2ParticleVB_, &m2ParticleVBAlloc_, &allocInfo);
m2ParticleVBMapped_ = allocInfo.pMappedData;
// Dedicated glow sprite buffer (separate from particle VB to avoid data race)
bci.size = MAX_GLOW_SPRITES * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &glowVB_, &glowVBAlloc_, &allocInfo);
glowVBMapped_ = allocInfo.pMappedData;
// Ribbon vertex buffer — triangle strip: pos(3)+color(3)+alpha(1)+uv(2)=9 floats/vert
bci.size = MAX_RIBBON_VERTS * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &ribbonVB_, &ribbonVBAlloc_, &allocInfo);
ribbonVBMapped_ = allocInfo.pMappedData;
}
// --- Create white fallback texture ---
{
uint8_t white[] = {255, 255, 255, 255};
whiteTexture_ = std::make_unique<VkTexture>();
whiteTexture_->upload(*vkCtx_, white, 1, 1, VK_FORMAT_R8G8B8A8_UNORM);
whiteTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_REPEAT);
}
// --- Generate soft radial gradient glow texture ---
{
static constexpr int SZ = 64;
std::vector<uint8_t> px(SZ * SZ * 4);
float half = SZ / 2.0f;
for (int y = 0; y < SZ; y++) {
for (int x = 0; x < SZ; x++) {
float dx = (x + 0.5f - half) / half;
float dy = (y + 0.5f - half) / half;
float r = std::sqrt(dx * dx + dy * dy);
float a = std::max(0.0f, 1.0f - r);
a = a * a; // Quadratic falloff
int idx = (y * SZ + x) * 4;
px[idx + 0] = 255;
px[idx + 1] = 255;
px[idx + 2] = 255;
px[idx + 3] = static_cast<uint8_t>(a * 255);
}
}
glowTexture_ = std::make_unique<VkTexture>();
glowTexture_->upload(*vkCtx_, px.data(), SZ, SZ, VK_FORMAT_R8G8B8A8_UNORM);
glowTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE);
// Pre-allocate glow texture descriptor set (reused every frame)
if (particleTexLayout_ && materialDescPool_) {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = materialDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &particleTexLayout_;
if (vkAllocateDescriptorSets(device, &ai, &glowTexDescSet_) == VK_SUCCESS) {
VkDescriptorImageInfo imgInfo = glowTexture_->descriptorInfo();
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = glowTexDescSet_;
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = &imgInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
}
textureCacheBudgetBytes_ =
envSizeMBOrDefault("WOWEE_M2_TEX_CACHE_MB", 4096) * 1024ull * 1024ull;
modelCacheLimit_ = envSizeMBOrDefault("WOWEE_M2_MODEL_LIMIT", 6000);
LOG_INFO("M2 texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB");
LOG_INFO("M2 model cache limit: ", modelCacheLimit_);
LOG_INFO("M2 renderer initialized (Vulkan)");
initialized_ = true;
return true;
}
void M2Renderer::shutdown() {
LOG_INFO("Shutting down M2 renderer...");
if (!vkCtx_) return;
vkDeviceWaitIdle(vkCtx_->getDevice());
VkDevice device = vkCtx_->getDevice();
VmaAllocator alloc = vkCtx_->getAllocator();
// Delete model GPU resources
for (auto& [id, model] : models) {
destroyModelGPU(model);
}
models.clear();
// Destroy instance bone buffers
for (auto& inst : instances) {
destroyInstanceBones(inst);
}
instances.clear();
spatialGrid.clear();
instanceIndexById.clear();
instanceDedupMap_.clear();
// Delete cached textures
textureCache.clear();
textureCacheBytes_ = 0;
textureCacheCounter_ = 0;
texturePropsByPtr_.clear();
failedTextureCache_.clear();
failedTextureRetryAt_.clear();
loggedTextureLoadFails_.clear();
textureLookupSerial_ = 0;
textureBudgetRejectWarnings_ = 0;
whiteTexture_.reset();
glowTexture_.reset();
// Clean up particle/ribbon buffers
if (smokeVB_) { vmaDestroyBuffer(alloc, smokeVB_, smokeVBAlloc_); smokeVB_ = VK_NULL_HANDLE; }
if (m2ParticleVB_) { vmaDestroyBuffer(alloc, m2ParticleVB_, m2ParticleVBAlloc_); m2ParticleVB_ = VK_NULL_HANDLE; }
if (glowVB_) { vmaDestroyBuffer(alloc, glowVB_, glowVBAlloc_); glowVB_ = VK_NULL_HANDLE; }
if (ribbonVB_) { vmaDestroyBuffer(alloc, ribbonVB_, ribbonVBAlloc_); ribbonVB_ = VK_NULL_HANDLE; }
smokeParticles.clear();
// Destroy pipelines
auto destroyPipeline = [&](VkPipeline& p) { if (p) { vkDestroyPipeline(device, p, nullptr); p = VK_NULL_HANDLE; } };
destroyPipeline(opaquePipeline_);
destroyPipeline(alphaTestPipeline_);
destroyPipeline(alphaPipeline_);
destroyPipeline(additivePipeline_);
destroyPipeline(particlePipeline_);
destroyPipeline(particleAdditivePipeline_);
destroyPipeline(smokePipeline_);
destroyPipeline(ribbonPipeline_);
destroyPipeline(ribbonAdditivePipeline_);
if (pipelineLayout_) { vkDestroyPipelineLayout(device, pipelineLayout_, nullptr); pipelineLayout_ = VK_NULL_HANDLE; }
if (particlePipelineLayout_) { vkDestroyPipelineLayout(device, particlePipelineLayout_, nullptr); particlePipelineLayout_ = VK_NULL_HANDLE; }
if (smokePipelineLayout_) { vkDestroyPipelineLayout(device, smokePipelineLayout_, nullptr); smokePipelineLayout_ = VK_NULL_HANDLE; }
if (ribbonPipelineLayout_) { vkDestroyPipelineLayout(device, ribbonPipelineLayout_, nullptr); ribbonPipelineLayout_ = VK_NULL_HANDLE; }
// Destroy descriptor pools and layouts
2026-03-22 21:47:12 +03:00
if (dummyBoneBuffer_) { vmaDestroyBuffer(alloc, dummyBoneBuffer_, dummyBoneAlloc_); dummyBoneBuffer_ = VK_NULL_HANDLE; }
// dummyBoneSet_ is freed implicitly when boneDescPool_ is destroyed
dummyBoneSet_ = VK_NULL_HANDLE;
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
// Mega bone SSBO cleanup (sets freed implicitly with boneDescPool_)
for (int i = 0; i < 2; i++) {
if (megaBoneBuffer_[i]) { vmaDestroyBuffer(alloc, megaBoneBuffer_[i], megaBoneAlloc_[i]); megaBoneBuffer_[i] = VK_NULL_HANDLE; }
megaBoneMapped_[i] = nullptr;
megaBoneSet_[i] = VK_NULL_HANDLE;
}
if (materialDescPool_) { vkDestroyDescriptorPool(device, materialDescPool_, nullptr); materialDescPool_ = VK_NULL_HANDLE; }
if (boneDescPool_) { vkDestroyDescriptorPool(device, boneDescPool_, nullptr); boneDescPool_ = VK_NULL_HANDLE; }
feat(animation): decompose AnimationController into FSM-based architecture Replace the 2,200-line monolithic AnimationController (goto-driven, single class, untestable) with a composed FSM architecture per refactor.md. New subsystem (src/rendering/animation/ — 16 headers, 10 sources): - CharacterAnimator: FSM composer implementing ICharacterAnimator - LocomotionFSM: idle/walk/run/sprint/jump/swim/strafe - CombatFSM: melee/ranged/spell cast/stun/hit reaction/charge - ActivityFSM: emote/loot/sit-down/sitting/sit-up - MountFSM: idle/run/flight/taxi/fidget/rear-up (per-instance RNG) - AnimCapabilitySet + AnimCapabilityProbe: probe once at model load, eliminate per-frame hasAnimation() linear search - AnimationManager: registry of CharacterAnimator by GUID - EmoteRegistry: DBC-backed emote command → animId singleton - FootstepDriver, SfxStateDriver: extracted from AnimationController animation_ids.hpp/.cpp moved to animation/ subdirectory (452 named constants); all include paths updated. AnimationController retained as thin adapter (~400 LOC): collects FrameInput, delegates to CharacterAnimator, applies AnimOutput. Priority order: Mount > Stun > HitReaction > Spell > Charge > Melee/Ranged > CombatIdle > Emote > Loot > Sit > Locomotion. STAY_IN_STATE policy when all FSMs return valid=false. Bugs fixed: - Remove static mt19937 in mount fidget (shared state across all mounted units) — replaced with per-instance seeded RNG - Remove goto from mounted animation branch (skipped init) - Remove per-frame hasAnimation() calls (now one probe at load) - Fix VK_INDEX_TYPE_UINT16 → UINT32 in shadow pass Tests (4 new suites, all ASAN+UBSan clean): - test_locomotion_fsm: 167 assertions - test_combat_fsm: 125 cases - test_activity_fsm: 112 cases - test_anim_capability: 56 cases docs/ANIMATION_SYSTEM.md added (architecture reference).
2026-04-05 12:27:35 +03:00
// Instance data SSBO cleanup (sets freed with instanceDescPool_)
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
for (int i = 0; i < 2; i++) {
if (instanceBuffer_[i]) { vmaDestroyBuffer(alloc, instanceBuffer_[i], instanceAlloc_[i]); instanceBuffer_[i] = VK_NULL_HANDLE; }
instanceMapped_[i] = nullptr;
instanceSet_[i] = VK_NULL_HANDLE;
}
if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; }
feat(animation): decompose AnimationController into FSM-based architecture Replace the 2,200-line monolithic AnimationController (goto-driven, single class, untestable) with a composed FSM architecture per refactor.md. New subsystem (src/rendering/animation/ — 16 headers, 10 sources): - CharacterAnimator: FSM composer implementing ICharacterAnimator - LocomotionFSM: idle/walk/run/sprint/jump/swim/strafe - CombatFSM: melee/ranged/spell cast/stun/hit reaction/charge - ActivityFSM: emote/loot/sit-down/sitting/sit-up - MountFSM: idle/run/flight/taxi/fidget/rear-up (per-instance RNG) - AnimCapabilitySet + AnimCapabilityProbe: probe once at model load, eliminate per-frame hasAnimation() linear search - AnimationManager: registry of CharacterAnimator by GUID - EmoteRegistry: DBC-backed emote command → animId singleton - FootstepDriver, SfxStateDriver: extracted from AnimationController animation_ids.hpp/.cpp moved to animation/ subdirectory (452 named constants); all include paths updated. AnimationController retained as thin adapter (~400 LOC): collects FrameInput, delegates to CharacterAnimator, applies AnimOutput. Priority order: Mount > Stun > HitReaction > Spell > Charge > Melee/Ranged > CombatIdle > Emote > Loot > Sit > Locomotion. STAY_IN_STATE policy when all FSMs return valid=false. Bugs fixed: - Remove static mt19937 in mount fidget (shared state across all mounted units) — replaced with per-instance seeded RNG - Remove goto from mounted animation branch (skipped init) - Remove per-frame hasAnimation() calls (now one probe at load) - Fix VK_INDEX_TYPE_UINT16 → UINT32 in shadow pass Tests (4 new suites, all ASAN+UBSan clean): - test_locomotion_fsm: 167 assertions - test_combat_fsm: 125 cases - test_activity_fsm: 112 cases - test_anim_capability: 56 cases docs/ANIMATION_SYSTEM.md added (architecture reference).
2026-04-05 12:27:35 +03:00
// GPU frustum culling compute pipeline + buffers cleanup
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; }
if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; }
for (int i = 0; i < 2; i++) {
if (cullUniformBuffer_[i]) { vmaDestroyBuffer(alloc, cullUniformBuffer_[i], cullUniformAlloc_[i]); cullUniformBuffer_[i] = VK_NULL_HANDLE; }
if (cullInputBuffer_[i]) { vmaDestroyBuffer(alloc, cullInputBuffer_[i], cullInputAlloc_[i]); cullInputBuffer_[i] = VK_NULL_HANDLE; }
if (cullOutputBuffer_[i]) { vmaDestroyBuffer(alloc, cullOutputBuffer_[i], cullOutputAlloc_[i]); cullOutputBuffer_[i] = VK_NULL_HANDLE; }
cullUniformMapped_[i] = cullInputMapped_[i] = cullOutputMapped_[i] = nullptr;
cullSet_[i] = VK_NULL_HANDLE;
}
if (cullDescPool_) { vkDestroyDescriptorPool(device, cullDescPool_, nullptr); cullDescPool_ = VK_NULL_HANDLE; }
if (cullSetLayout_) { vkDestroyDescriptorSetLayout(device, cullSetLayout_, nullptr); cullSetLayout_ = VK_NULL_HANDLE; }
if (materialSetLayout_) { vkDestroyDescriptorSetLayout(device, materialSetLayout_, nullptr); materialSetLayout_ = VK_NULL_HANDLE; }
if (boneSetLayout_) { vkDestroyDescriptorSetLayout(device, boneSetLayout_, nullptr); boneSetLayout_ = VK_NULL_HANDLE; }
feat(rendering): GPU architecture + visual quality fixes M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris
2026-04-04 13:43:16 +03:00
if (instanceSetLayout_) { vkDestroyDescriptorSetLayout(device, instanceSetLayout_, nullptr); instanceSetLayout_ = VK_NULL_HANDLE; }
if (particleTexLayout_) { vkDestroyDescriptorSetLayout(device, particleTexLayout_, nullptr); particleTexLayout_ = VK_NULL_HANDLE; }
// Destroy shadow resources
destroyPipeline(shadowPipeline_);
if (shadowPipelineLayout_) { vkDestroyPipelineLayout(device, shadowPipelineLayout_, nullptr); shadowPipelineLayout_ = VK_NULL_HANDLE; }
for (auto& pool : shadowTexPool_) { if (pool) { vkDestroyDescriptorPool(device, pool, nullptr); pool = VK_NULL_HANDLE; } }
if (shadowParamsPool_) { vkDestroyDescriptorPool(device, shadowParamsPool_, nullptr); shadowParamsPool_ = VK_NULL_HANDLE; }
if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; }
if (shadowParamsUBO_) { vmaDestroyBuffer(alloc, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; }
initialized_ = false;
}
void M2Renderer::destroyModelGPU(M2ModelGPU& model) {
if (!vkCtx_) return;
VmaAllocator alloc = vkCtx_->getAllocator();
if (model.vertexBuffer) { vmaDestroyBuffer(alloc, model.vertexBuffer, model.vertexAlloc); model.vertexBuffer = VK_NULL_HANDLE; }
if (model.indexBuffer) { vmaDestroyBuffer(alloc, model.indexBuffer, model.indexAlloc); model.indexBuffer = VK_NULL_HANDLE; }
VkDevice device = vkCtx_->getDevice();
for (auto& batch : model.batches) {
if (batch.materialSet) { vkFreeDescriptorSets(device, materialDescPool_, 1, &batch.materialSet); batch.materialSet = VK_NULL_HANDLE; }
if (batch.materialUBO) { vmaDestroyBuffer(alloc, batch.materialUBO, batch.materialUBOAlloc); batch.materialUBO = VK_NULL_HANDLE; }
}
// Free pre-allocated particle texture descriptor sets
for (auto& pSet : model.particleTexSets) {
if (pSet) { vkFreeDescriptorSets(device, materialDescPool_, 1, &pSet); pSet = VK_NULL_HANDLE; }
}
model.particleTexSets.clear();
// Free ribbon texture descriptor sets
for (auto& rSet : model.ribbonTexSets) {
if (rSet) { vkFreeDescriptorSets(device, materialDescPool_, 1, &rSet); rSet = VK_NULL_HANDLE; }
}
model.ribbonTexSets.clear();
}
void M2Renderer::destroyInstanceBones(M2Instance& inst, bool defer) {
if (!vkCtx_) return;
VkDevice device = vkCtx_->getDevice();
VmaAllocator alloc = vkCtx_->getAllocator();
for (int i = 0; i < 2; i++) {
// Snapshot handles before clearing the instance — needed for both
// immediate and deferred paths.
VkDescriptorSet boneSet = inst.boneSet[i];
::VkBuffer boneBuf = inst.boneBuffer[i];
VmaAllocation boneAlloc = inst.boneAlloc[i];
inst.boneSet[i] = VK_NULL_HANDLE;
inst.boneBuffer[i] = VK_NULL_HANDLE;
inst.boneMapped[i] = nullptr;
if (!defer) {
// Immediate destruction (safe after vkDeviceWaitIdle)
if (boneSet != VK_NULL_HANDLE) {
vkFreeDescriptorSets(device, boneDescPool_, 1, &boneSet);
}
if (boneBuf) {
vmaDestroyBuffer(alloc, boneBuf, boneAlloc);
}
} else if (boneSet != VK_NULL_HANDLE || boneBuf) {
// Deferred destruction — the loop destroys bone sets for ALL frame
// slots, so the other slot's command buffer may still be in flight.
// Must wait for all fences, not just the current frame's.
VkDescriptorPool pool = boneDescPool_;
vkCtx_->deferAfterAllFrameFences([device, alloc, pool, boneSet, boneBuf, boneAlloc]() {
if (boneSet != VK_NULL_HANDLE) {
VkDescriptorSet s = boneSet;
vkFreeDescriptorSets(device, pool, 1, &s);
}
if (boneBuf) {
vmaDestroyBuffer(alloc, boneBuf, boneAlloc);
}
});
}
}
}
VkDescriptorSet M2Renderer::allocateMaterialSet() {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = materialDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &materialSetLayout_;
VkDescriptorSet set = VK_NULL_HANDLE;
2026-03-22 21:47:12 +03:00
VkResult result = vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
if (result != VK_SUCCESS) {
LOG_ERROR("M2Renderer: material descriptor set allocation failed (", result, ")");
return VK_NULL_HANDLE;
}
return set;
}
VkDescriptorSet M2Renderer::allocateBoneSet() {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = boneDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &boneSetLayout_;
VkDescriptorSet set = VK_NULL_HANDLE;
2026-03-22 21:47:12 +03:00
VkResult result = vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
if (result != VK_SUCCESS) {
LOG_ERROR("M2Renderer: bone descriptor set allocation failed (", result, ")");
return VK_NULL_HANDLE;
}
return set;
}
// ---------------------------------------------------------------------------
// M2 collision mesh: build spatial grid + classify triangles
// ---------------------------------------------------------------------------
void M2ModelGPU::CollisionMesh::build() {
if (indices.size() < 3 || vertices.empty()) return;
triCount = static_cast<uint32_t>(indices.size() / 3);
// Bounding box for grid
glm::vec3 bmin(std::numeric_limits<float>::max());
glm::vec3 bmax(-std::numeric_limits<float>::max());
for (const auto& v : vertices) {
bmin = glm::min(bmin, v);
bmax = glm::max(bmax, v);
}
gridOrigin = glm::vec2(bmin.x, bmin.y);
gridCellsX = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.x - bmin.x) / CELL_SIZE))));
gridCellsY = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.y - bmin.y) / CELL_SIZE))));
cellFloorTris.resize(gridCellsX * gridCellsY);
cellWallTris.resize(gridCellsX * gridCellsY);
triBounds.resize(triCount);
for (uint32_t ti = 0; ti < triCount; ti++) {
uint16_t i0 = indices[ti * 3];
uint16_t i1 = indices[ti * 3 + 1];
uint16_t i2 = indices[ti * 3 + 2];
if (i0 >= vertices.size() || i1 >= vertices.size() || i2 >= vertices.size()) continue;
const auto& v0 = vertices[i0];
const auto& v1 = vertices[i1];
const auto& v2 = vertices[i2];
triBounds[ti].minZ = std::min({v0.z, v1.z, v2.z});
triBounds[ti].maxZ = std::max({v0.z, v1.z, v2.z});
glm::vec3 normal = glm::cross(v1 - v0, v2 - v0);
float normalLen = glm::length(normal);
float absNz = (normalLen > 0.001f) ? std::abs(normal.z / normalLen) : 0.0f;
bool isFloor = (absNz >= 0.35f); // ~70° max slope (relaxed for steep stairs)
bool isWall = (absNz < 0.65f);
float triMinX = std::min({v0.x, v1.x, v2.x});
float triMaxX = std::max({v0.x, v1.x, v2.x});
float triMinY = std::min({v0.y, v1.y, v2.y});
float triMaxY = std::max({v0.y, v1.y, v2.y});
int cxMin = std::clamp(static_cast<int>((triMinX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cxMax = std::clamp(static_cast<int>((triMaxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cyMin = std::clamp(static_cast<int>((triMinY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
int cyMax = std::clamp(static_cast<int>((triMaxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
for (int cy = cyMin; cy <= cyMax; cy++) {
for (int cx = cxMin; cx <= cxMax; cx++) {
int ci = cy * gridCellsX + cx;
if (isFloor) cellFloorTris[ci].push_back(ti);
if (isWall) cellWallTris[ci].push_back(ti);
}
}
}
}
void M2ModelGPU::CollisionMesh::getFloorTrisInRange(
float minX, float minY, float maxX, float maxY,
std::vector<uint32_t>& out) const {
out.clear();
if (gridCellsX == 0 || gridCellsY == 0) return;
int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
const size_t cellCount = static_cast<size_t>(cxMax - cxMin + 1) *
static_cast<size_t>(cyMax - cyMin + 1);
out.reserve(cellCount * 8);
for (int cy = cyMin; cy <= cyMax; cy++) {
for (int cx = cxMin; cx <= cxMax; cx++) {
const auto& cell = cellFloorTris[cy * gridCellsX + cx];
out.insert(out.end(), cell.begin(), cell.end());
}
}
std::sort(out.begin(), out.end());
out.erase(std::unique(out.begin(), out.end()), out.end());
}
void M2ModelGPU::CollisionMesh::getWallTrisInRange(
float minX, float minY, float maxX, float maxY,
std::vector<uint32_t>& out) const {
out.clear();
if (gridCellsX == 0 || gridCellsY == 0) return;
int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
const size_t cellCount = static_cast<size_t>(cxMax - cxMin + 1) *
static_cast<size_t>(cyMax - cyMin + 1);
out.reserve(cellCount * 8);
for (int cy = cyMin; cy <= cyMax; cy++) {
for (int cx = cxMin; cx <= cxMax; cx++) {
const auto& cell = cellWallTris[cy * gridCellsX + cx];
out.insert(out.end(), cell.begin(), cell.end());
}
}
std::sort(out.begin(), out.end());
out.erase(std::unique(out.begin(), out.end()), out.end());
}
bool M2Renderer::hasModel(uint32_t modelId) const {
return models.find(modelId) != models.end();
}
bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) {
if (models.find(modelId) != models.end()) {
// Already loaded
return true;
}
if (models.size() >= modelCacheLimit_) {
if (modelLimitRejectWarnings_ < 3) {
LOG_WARNING("M2 model cache full (", models.size(), "/", modelCacheLimit_,
"), skipping model load: id=", modelId, " name=", model.name);
}
++modelLimitRejectWarnings_;
return false;
}
bool hasGeometry = !model.vertices.empty() && !model.indices.empty();
bool hasParticles = !model.particleEmitters.empty();
bool hasRibbons = !model.ribbonEmitters.empty();
if (!hasGeometry && !hasParticles && !hasRibbons) {
LOG_WARNING("M2 model has no renderable content: ", model.name);
return false;
}
M2ModelGPU gpuModel;
gpuModel.name = model.name;
// Use tight bounds from actual vertices for collision/camera occlusion.
// Header bounds in some M2s are overly conservative.
glm::vec3 tightMin(0.0f);
glm::vec3 tightMax(0.0f);
if (hasGeometry) {
tightMin = glm::vec3(std::numeric_limits<float>::max());
tightMax = glm::vec3(-std::numeric_limits<float>::max());
for (const auto& v : model.vertices) {
tightMin = glm::min(tightMin, v.position);
tightMax = glm::max(tightMax, v.position);
}
}
// Classify model from name and geometry — pure function, no GPU dependencies.
auto cls = classifyM2Model(model.name, tightMin, tightMax,
model.vertices.size(),
model.particleEmitters.size());
const bool isInvisibleTrap = cls.isInvisibleTrap;
const bool groundDetailModel = cls.isGroundDetail;
if (isInvisibleTrap) {
LOG_INFO("Loading InvisibleTrap model: ", model.name, " (will be invisible, no collision)");
}
gpuModel.isInvisibleTrap = cls.isInvisibleTrap;
gpuModel.collisionSteppedFountain = cls.collisionSteppedFountain;
gpuModel.collisionSteppedLowPlatform = cls.collisionSteppedLowPlatform;
gpuModel.collisionBridge = cls.collisionBridge;
gpuModel.collisionPlanter = cls.collisionPlanter;
gpuModel.collisionStatue = cls.collisionStatue;
gpuModel.collisionTreeTrunk = cls.collisionTreeTrunk;
gpuModel.collisionNarrowVerticalProp = cls.collisionNarrowVerticalProp;
gpuModel.collisionSmallSolidProp = cls.collisionSmallSolidProp;
gpuModel.collisionNoBlock = cls.collisionNoBlock;
gpuModel.isGroundDetail = cls.isGroundDetail;
gpuModel.isFoliageLike = cls.isFoliageLike;
gpuModel.disableAnimation = cls.disableAnimation;
gpuModel.shadowWindFoliage = cls.shadowWindFoliage;
gpuModel.isFireflyEffect = cls.isFireflyEffect;
gpuModel.isSmoke = cls.isSmoke;
gpuModel.isSpellEffect = cls.isSpellEffect;
gpuModel.isLavaModel = cls.isLavaModel;
gpuModel.isInstancePortal = cls.isInstancePortal;
gpuModel.isWaterVegetation = cls.isWaterVegetation;
gpuModel.isElvenLike = cls.isElvenLike;
gpuModel.isLanternLike = cls.isLanternLike;
gpuModel.isKoboldFlame = cls.isKoboldFlame;
gpuModel.boundMin = tightMin;
gpuModel.boundMax = tightMax;
gpuModel.boundRadius = model.boundRadius;
// Fallback: compute bound radius from vertex extents when M2 header reports 0
if (gpuModel.boundRadius < 0.01f && !model.vertices.empty()) {
glm::vec3 extent = tightMax - tightMin;
gpuModel.boundRadius = glm::length(extent) * 0.5f;
}
gpuModel.indexCount = static_cast<uint32_t>(model.indices.size());
gpuModel.vertexCount = static_cast<uint32_t>(model.vertices.size());
// Store bone/sequence data for animation
gpuModel.bones = model.bones;
gpuModel.sequences = model.sequences;
gpuModel.globalSequenceDurations = model.globalSequenceDurations;
gpuModel.hasAnimation = false;
for (const auto& bone : model.bones) {
if (bone.translation.hasData() || bone.rotation.hasData() || bone.scale.hasData()) {
gpuModel.hasAnimation = true;
break;
}
}
// Build collision mesh + spatial grid from M2 bounding geometry
gpuModel.collision.vertices = model.collisionVertices;
gpuModel.collision.indices = model.collisionIndices;
gpuModel.collision.build();
if (gpuModel.collision.valid()) {
core::Logger::getInstance().debug(" M2 collision mesh: ", gpuModel.collision.triCount,
" tris, grid ", gpuModel.collision.gridCellsX, "x", gpuModel.collision.gridCellsY);
}
// Identify idle variation sequences (animation ID 0 = Stand)
for (int i = 0; i < static_cast<int>(model.sequences.size()); i++) {
if (model.sequences[i].id == 0 && model.sequences[i].duration > 0) {
gpuModel.idleVariationIndices.push_back(i);
}
}
// Batch all GPU uploads (VB, IB, textures) into a single command buffer
// submission with one fence wait, instead of one fence wait per upload.
vkCtx_->beginUploadBatch();
if (hasGeometry) {
// Create VBO with interleaved vertex data
// Format: position (3), normal (3), texcoord0 (2), texcoord1 (2), boneWeights (4), boneIndices (4 as float)
const size_t floatsPerVertex = 18;
std::vector<float> vertexData;
vertexData.reserve(model.vertices.size() * floatsPerVertex);
for (const auto& v : model.vertices) {
vertexData.push_back(v.position.x);
vertexData.push_back(v.position.y);
vertexData.push_back(v.position.z);
vertexData.push_back(v.normal.x);
vertexData.push_back(v.normal.y);
vertexData.push_back(v.normal.z);
vertexData.push_back(v.texCoords[0].x);
vertexData.push_back(v.texCoords[0].y);
vertexData.push_back(v.texCoords[1].x);
vertexData.push_back(v.texCoords[1].y);
float w0 = v.boneWeights[0] / 255.0f;
float w1 = v.boneWeights[1] / 255.0f;
float w2 = v.boneWeights[2] / 255.0f;
float w3 = v.boneWeights[3] / 255.0f;
vertexData.push_back(w0);
vertexData.push_back(w1);
vertexData.push_back(w2);
vertexData.push_back(w3);
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[0], uint8_t(127))));
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[1], uint8_t(127))));
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[2], uint8_t(127))));
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[3], uint8_t(127))));
}
// Upload vertex buffer to GPU
{
auto buf = uploadBuffer(*vkCtx_,
vertexData.data(), vertexData.size() * sizeof(float),
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
gpuModel.vertexBuffer = buf.buffer;
gpuModel.vertexAlloc = buf.allocation;
}
// Upload index buffer to GPU
{
auto buf = uploadBuffer(*vkCtx_,
model.indices.data(), model.indices.size() * sizeof(uint16_t),
VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
gpuModel.indexBuffer = buf.buffer;
gpuModel.indexAlloc = buf.allocation;
}
2026-03-22 21:47:12 +03:00
if (!gpuModel.vertexBuffer || !gpuModel.indexBuffer) {
LOG_ERROR("M2Renderer::loadModel: GPU buffer upload failed for model ", modelId);
}
}
// Load ALL textures from the model into a local vector.
// textureLoadFailed[i] is true if texture[i] had a named path that failed to load.
// Such batches are hidden (batchOpacity=0) rather than rendered white.
std::vector<VkTexture*> allTextures;
std::vector<bool> textureLoadFailed;
std::vector<std::string> textureKeysLower;
if (assetManager) {
for (size_t ti = 0; ti < model.textures.size(); ti++) {
const auto& tex = model.textures[ti];
std::string texPath = tex.filename;
// Some extracted M2 texture strings contain embedded NUL + garbage suffix.
// Truncate at first NUL so valid paths like "...foo.blp\0junk" still resolve.
size_t nul = texPath.find('\0');
if (nul != std::string::npos) {
texPath.resize(nul);
}
if (!texPath.empty()) {
std::string texKey = texPath;
std::replace(texKey.begin(), texKey.end(), '/', '\\');
std::transform(texKey.begin(), texKey.end(), texKey.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
VkTexture* texPtr = loadTexture(texPath, tex.flags);
bool failed = (texPtr == whiteTexture_.get());
if (failed) {
static uint32_t loggedModelTextureFails = 0;
static bool loggedModelTextureFailSuppressed = false;
if (loggedModelTextureFails < 250) {
LOG_WARNING("M2 model ", model.name, " texture[", ti, "] failed to load: ", texPath);
++loggedModelTextureFails;
} else if (!loggedModelTextureFailSuppressed) {
LOG_WARNING("M2 model texture-failure warnings suppressed after ",
loggedModelTextureFails, " entries");
loggedModelTextureFailSuppressed = true;
}
}
if (isInvisibleTrap) {
LOG_INFO(" InvisibleTrap texture[", ti, "]: ", texPath, " -> ", (failed ? "WHITE" : "OK"));
}
allTextures.push_back(texPtr);
textureLoadFailed.push_back(failed);
textureKeysLower.push_back(std::move(texKey));
} else {
if (isInvisibleTrap) {
LOG_INFO(" InvisibleTrap texture[", ti, "]: EMPTY (using white fallback)");
}
allTextures.push_back(whiteTexture_.get());
textureLoadFailed.push_back(false); // Empty filename = intentional white (type!=0)
textureKeysLower.emplace_back();
}
}
}
static const bool kGlowDiag = envFlagEnabled("WOWEE_M2_GLOW_DIAG", false);
if (kGlowDiag) {
if (gpuModel.isLanternLike) {
for (size_t ti = 0; ti < model.textures.size(); ++ti) {
const std::string key = (ti < textureKeysLower.size()) ? textureKeysLower[ti] : std::string();
LOG_DEBUG("M2 GLOW TEX '", model.name, "' tex[", ti, "]='", key, "' flags=0x",
std::hex, model.textures[ti].flags, std::dec);
}
}
}
// Copy particle emitter data and resolve textures
gpuModel.particleEmitters = model.particleEmitters;
gpuModel.particleTextures.resize(model.particleEmitters.size(), whiteTexture_.get());
for (size_t ei = 0; ei < model.particleEmitters.size(); ei++) {
uint16_t texIdx = model.particleEmitters[ei].texture;
if (texIdx < allTextures.size() && allTextures[texIdx] != nullptr) {
gpuModel.particleTextures[ei] = allTextures[texIdx];
} else {
LOG_WARNING("M2 '", model.name, "' particle emitter[", ei,
"] texture index ", texIdx, " out of range (", allTextures.size(),
" textures) — using white fallback");
}
}
// Pre-allocate one stable descriptor set per particle emitter to avoid per-frame allocation.
// This prevents materialDescPool_ exhaustion when many emitters are active each frame.
if (particleTexLayout_ && materialDescPool_ && !model.particleEmitters.empty()) {
VkDevice device = vkCtx_->getDevice();
gpuModel.particleTexSets.resize(model.particleEmitters.size(), VK_NULL_HANDLE);
for (size_t ei = 0; ei < model.particleEmitters.size(); ei++) {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = materialDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &particleTexLayout_;
if (vkAllocateDescriptorSets(device, &ai, &gpuModel.particleTexSets[ei]) == VK_SUCCESS) {
VkTexture* tex = gpuModel.particleTextures[ei];
VkDescriptorImageInfo imgInfo = tex->descriptorInfo();
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.dstSet = gpuModel.particleTexSets[ei];
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = &imgInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
}
// Copy ribbon emitter data and resolve textures
gpuModel.ribbonEmitters = model.ribbonEmitters;
if (!model.ribbonEmitters.empty()) {
VkDevice device = vkCtx_->getDevice();
gpuModel.ribbonTextures.resize(model.ribbonEmitters.size(), whiteTexture_.get());
gpuModel.ribbonTexSets.resize(model.ribbonEmitters.size(), VK_NULL_HANDLE);
for (size_t ri = 0; ri < model.ribbonEmitters.size(); ri++) {
// Resolve texture via textureLookup table
uint16_t texLookupIdx = model.ribbonEmitters[ri].textureIndex;
uint32_t texIdx = (texLookupIdx < model.textureLookup.size())
? model.textureLookup[texLookupIdx] : UINT32_MAX;
if (texIdx < allTextures.size() && allTextures[texIdx] != nullptr) {
gpuModel.ribbonTextures[ri] = allTextures[texIdx];
} else {
LOG_WARNING("M2 '", model.name, "' ribbon emitter[", ri,
"] texLookup=", texLookupIdx, " resolved texIdx=", texIdx,
" out of range (", allTextures.size(),
" textures) — using white fallback");
}
// Allocate descriptor set (reuse particleTexLayout_ = single sampler)
if (particleTexLayout_ && materialDescPool_) {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = materialDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &particleTexLayout_;
if (vkAllocateDescriptorSets(device, &ai, &gpuModel.ribbonTexSets[ri]) == VK_SUCCESS) {
VkTexture* tex = gpuModel.ribbonTextures[ri];
VkDescriptorImageInfo imgInfo = tex->descriptorInfo();
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.dstSet = gpuModel.ribbonTexSets[ri];
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = &imgInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
}
LOG_DEBUG(" Ribbon emitters loaded: ", model.ribbonEmitters.size());
}
// Copy texture transform data for UV animation
gpuModel.textureTransforms = model.textureTransforms;
gpuModel.textureTransformLookup = model.textureTransformLookup;
gpuModel.hasTextureAnimation = false;
// Build per-batch GPU entries
if (!model.batches.empty()) {
for (const auto& batch : model.batches) {
M2ModelGPU::BatchGPU bgpu;
bgpu.indexStart = batch.indexStart;
bgpu.indexCount = batch.indexCount;
// Store texture animation index from batch
bgpu.textureAnimIndex = batch.textureAnimIndex;
if (bgpu.textureAnimIndex != 0xFFFF) {
gpuModel.hasTextureAnimation = true;
}
// Store blend mode and flags from material
if (batch.materialIndex < model.materials.size()) {
bgpu.blendMode = model.materials[batch.materialIndex].blendMode;
bgpu.materialFlags = model.materials[batch.materialIndex].flags;
if (bgpu.blendMode >= 2) gpuModel.hasTransparentBatches = true;
}
// Copy LOD level from batch
bgpu.submeshLevel = batch.submeshLevel;
// Resolve texture: batch.textureIndex → textureLookup → allTextures
VkTexture* tex = whiteTexture_.get();
bool texFailed = false;
std::string batchTexKeyLower;
if (batch.textureIndex < model.textureLookup.size()) {
uint16_t texIdx = model.textureLookup[batch.textureIndex];
if (texIdx < allTextures.size()) {
tex = allTextures[texIdx];
texFailed = (texIdx < textureLoadFailed.size()) && textureLoadFailed[texIdx];
if (texIdx < textureKeysLower.size()) {
batchTexKeyLower = textureKeysLower[texIdx];
}
}
if (texIdx < model.textures.size()) {
bgpu.texFlags = static_cast<uint8_t>(model.textures[texIdx].flags & 0x3);
}
} else if (!allTextures.empty()) {
LOG_WARNING("M2 '", model.name, "' batch textureIndex ", batch.textureIndex,
" out of range (textureLookup size=", model.textureLookup.size(),
") — falling back to texture[0]");
tex = allTextures[0];
texFailed = !textureLoadFailed.empty() && textureLoadFailed[0];
if (!textureKeysLower.empty()) {
batchTexKeyLower = textureKeysLower[0];
}
}
if (texFailed && groundDetailModel) {
static const std::string kDetailFallbackTexture = "World\\NoDXT\\Detail\\8des_detaildoodads01.blp";
VkTexture* fallbackTex = loadTexture(kDetailFallbackTexture, 0);
if (fallbackTex != nullptr && fallbackTex != whiteTexture_.get()) {
tex = fallbackTex;
texFailed = false;
}
}
bgpu.texture = tex;
const auto tcls = classifyBatchTexture(batchTexKeyLower);
const bool modelLanternFamily = gpuModel.isLanternLike;
bgpu.lanternGlowHint =
tcls.exactLanternGlowTex ||
((tcls.hasGlowToken || (modelLanternFamily && tcls.hasFlameToken)) &&
(tcls.lanternFamily || modelLanternFamily) &&
(!tcls.likelyFlame || modelLanternFamily));
bgpu.glowCardLike = bgpu.lanternGlowHint && tcls.hasGlowCardToken;
bgpu.glowTint = tcls.glowTint;
if (tex != nullptr && tex != whiteTexture_.get()) {
auto pit = texturePropsByPtr_.find(tex);
if (pit != texturePropsByPtr_.end()) {
bgpu.hasAlpha = pit->second.hasAlpha;
bgpu.colorKeyBlack = pit->second.colorKeyBlack;
}
}
// textureCoordIndex is an index into a texture coord combo table, not directly
// a UV set selector. Most batches have index=0 (UV set 0). We always use UV set 0
// since we don't have the full combo table — dual-UV effects are rare edge cases.
bgpu.textureUnit = 0;
// Start at full opacity; hide only if texture failed to load.
bgpu.batchOpacity = (texFailed && !groundDetailModel) ? 0.0f : 1.0f;
// Apply at-rest transparency and color alpha from the M2 animation tracks.
// These provide per-batch opacity for ghosts, ethereal effects, fading doodads, etc.
// Skip zero values: some animated tracks start at 0 and animate up, and baking
// that first keyframe would make the entire batch permanently invisible.
if (bgpu.batchOpacity > 0.0f) {
float animAlpha = 1.0f;
if (batch.colorIndex < model.colorAlphas.size()) {
float ca = model.colorAlphas[batch.colorIndex];
if (ca > 0.001f) animAlpha *= ca;
}
if (batch.transparencyIndex < model.textureWeights.size()) {
float tw = model.textureWeights[batch.transparencyIndex];
if (tw > 0.001f) animAlpha *= tw;
}
bgpu.batchOpacity *= animAlpha;
}
// Compute batch center and radius for glow sprite positioning
if ((bgpu.blendMode >= 3 || bgpu.colorKeyBlack) && batch.indexCount > 0) {
glm::vec3 sum(0.0f);
uint32_t counted = 0;
for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
if (j < model.indices.size()) {
uint16_t vi = model.indices[j];
if (vi < model.vertices.size()) {
sum += model.vertices[vi].position;
counted++;
}
}
}
if (counted > 0) {
bgpu.center = sum / static_cast<float>(counted);
float maxDist = 0.0f;
for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
if (j < model.indices.size()) {
uint16_t vi = model.indices[j];
if (vi < model.vertices.size()) {
float d = glm::length(model.vertices[vi].position - bgpu.center);
maxDist = std::max(maxDist, d);
}
}
}
bgpu.glowSize = std::max(maxDist, 0.5f);
}
}
// Optional diagnostics for glow/light batches (disabled by default).
if (kGlowDiag && gpuModel.isLanternLike) {
LOG_DEBUG("M2 GLOW DIAG '", model.name, "' batch ", gpuModel.batches.size(),
": blend=", bgpu.blendMode, " matFlags=0x",
std::hex, bgpu.materialFlags, std::dec,
" colorKey=", bgpu.colorKeyBlack ? "Y" : "N",
" hasAlpha=", bgpu.hasAlpha ? "Y" : "N",
" unlit=", (bgpu.materialFlags & 0x01) ? "Y" : "N",
" lanternHint=", bgpu.lanternGlowHint ? "Y" : "N",
" glowSize=", bgpu.glowSize,
" tex=", bgpu.texture,
" idxCount=", bgpu.indexCount);
}
gpuModel.batches.push_back(bgpu);
}
} else {
// Fallback: single batch covering all indices with first texture
M2ModelGPU::BatchGPU bgpu;
bgpu.indexStart = 0;
bgpu.indexCount = gpuModel.indexCount;
bgpu.texture = allTextures.empty() ? whiteTexture_.get() : allTextures[0];
if (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) {
auto pit = texturePropsByPtr_.find(bgpu.texture);
if (pit != texturePropsByPtr_.end()) {
bgpu.hasAlpha = pit->second.hasAlpha;
bgpu.colorKeyBlack = pit->second.colorKeyBlack;
}
}
gpuModel.batches.push_back(bgpu);
}
// Detect particle emitter volume models: box mesh (24 verts, 36 indices)
// with disproportionately large bounds. These are invisible bounding volumes
// that only exist to spawn particles — their mesh should never be rendered.
if (!isInvisibleTrap && !groundDetailModel &&
gpuModel.vertexCount <= 24 && gpuModel.indexCount <= 36
&& !model.particleEmitters.empty()) {
glm::vec3 size = gpuModel.boundMax - gpuModel.boundMin;
float maxDim = std::max({size.x, size.y, size.z});
if (maxDim > 5.0f) {
gpuModel.isInvisibleTrap = true;
LOG_DEBUG("M2 emitter volume hidden: '", model.name, "' size=(",
size.x, " x ", size.y, " x ", size.z, ")");
}
}
vkCtx_->endUploadBatch();
// Allocate Vulkan descriptor sets and UBOs for each batch
for (auto& bgpu : gpuModel.batches) {
// Create combined UBO for M2Params (binding 1) + M2Material (binding 2)
// We allocate them as separate buffers for clarity
VmaAllocationInfo matAllocInfo{};
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = sizeof(M2MaterialUBO);
bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &bgpu.materialUBO, &bgpu.materialUBOAlloc, &matAllocInfo);
// Write initial material data (static per-batch — fadeAlpha/interiorDarken updated at draw time)
M2MaterialUBO mat{};
mat.hasTexture = (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) ? 1 : 0;
mat.alphaTest = (bgpu.blendMode == 1 || (bgpu.blendMode >= 2 && !bgpu.hasAlpha)) ? 1 : 0;
mat.colorKeyBlack = bgpu.colorKeyBlack ? 1 : 0;
mat.colorKeyThreshold = 0.08f;
mat.unlit = (bgpu.materialFlags & 0x01) ? 1 : 0;
mat.blendMode = bgpu.blendMode;
mat.fadeAlpha = 1.0f;
mat.interiorDarken = 0.0f;
mat.specularIntensity = 0.5f;
memcpy(matAllocInfo.pMappedData, &mat, sizeof(mat));
bgpu.materialUBOMapped = matAllocInfo.pMappedData;
}
// Allocate descriptor set and write all bindings
bgpu.materialSet = allocateMaterialSet();
if (bgpu.materialSet) {
VkTexture* batchTex = bgpu.texture ? bgpu.texture : whiteTexture_.get();
VkDescriptorImageInfo imgInfo = batchTex->descriptorInfo();
VkDescriptorBufferInfo matBufInfo{};
matBufInfo.buffer = bgpu.materialUBO;
matBufInfo.offset = 0;
matBufInfo.range = sizeof(M2MaterialUBO);
VkWriteDescriptorSet writes[2] = {};
// binding 0: texture
writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[0].dstSet = bgpu.materialSet;
writes[0].dstBinding = 0;
writes[0].descriptorCount = 1;
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[0].pImageInfo = &imgInfo;
// binding 2: M2Material UBO
writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[1].dstSet = bgpu.materialSet;
writes[1].dstBinding = 2;
writes[1].descriptorCount = 1;
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
writes[1].pBufferInfo = &matBufInfo;
vkUpdateDescriptorSets(vkCtx_->getDevice(), 2, writes, 0, nullptr);
}
}
// Pre-compute available LOD levels to avoid per-instance batch iteration
gpuModel.availableLODs = 0;
for (const auto& b : gpuModel.batches) {
if (b.submeshLevel < 8) gpuModel.availableLODs |= (1u << b.submeshLevel);
}
models[modelId] = std::move(gpuModel);
2026-03-22 21:47:12 +03:00
spatialIndexDirty_ = true; // Map may have rehashed — refresh cachedModel pointers
LOG_DEBUG("Loaded M2 model: ", model.name, " (", models[modelId].vertexCount, " vertices, ",
models[modelId].indexCount / 3, " triangles, ", models[modelId].batches.size(), " batches)");
return true;
}
} // namespace rendering
} // namespace wowee