2026-02-02 12:24:50 -08:00
|
|
|
|
#include "rendering/m2_renderer.hpp"
|
2026-04-05 19:30:44 +03:00
|
|
|
|
#include "rendering/m2_renderer_internal.h"
|
2026-03-24 19:55:24 +03:00
|
|
|
|
#include "rendering/m2_model_classifier.hpp"
|
2026-02-21 19:41:21 -08:00
|
|
|
|
#include "rendering/vk_context.hpp"
|
|
|
|
|
|
#include "rendering/vk_buffer.hpp"
|
|
|
|
|
|
#include "rendering/vk_texture.hpp"
|
|
|
|
|
|
#include "rendering/vk_pipeline.hpp"
|
|
|
|
|
|
#include "rendering/vk_shader.hpp"
|
|
|
|
|
|
#include "rendering/vk_utils.hpp"
|
|
|
|
|
|
#include "rendering/vk_frame_data.hpp"
|
2026-02-02 12:24:50 -08:00
|
|
|
|
#include "rendering/camera.hpp"
|
|
|
|
|
|
#include "rendering/frustum.hpp"
|
|
|
|
|
|
#include "pipeline/asset_manager.hpp"
|
|
|
|
|
|
#include "pipeline/blp_loader.hpp"
|
|
|
|
|
|
#include "core/logger.hpp"
|
2026-04-03 09:41:34 +03:00
|
|
|
|
#include "core/profiler.hpp"
|
2026-02-03 16:21:48 -08:00
|
|
|
|
#include <chrono>
|
2026-02-03 16:28:33 -08:00
|
|
|
|
#include <cctype>
|
2026-02-02 12:24:50 -08:00
|
|
|
|
#include <glm/gtc/matrix_transform.hpp>
|
|
|
|
|
|
#include <glm/gtc/type_ptr.hpp>
|
2026-02-04 11:40:00 -08:00
|
|
|
|
#include <glm/gtx/quaternion.hpp>
|
2026-02-02 23:03:45 -08:00
|
|
|
|
#include <unordered_set>
|
2026-02-07 19:20:37 -08:00
|
|
|
|
#include <functional>
|
2026-02-02 23:03:45 -08:00
|
|
|
|
#include <algorithm>
|
2026-02-03 15:17:54 -08:00
|
|
|
|
#include <cmath>
|
2026-02-20 20:31:04 -08:00
|
|
|
|
#include <cstdlib>
|
2026-03-29 20:42:10 -07:00
|
|
|
|
#include <random>
|
2026-02-03 15:17:54 -08:00
|
|
|
|
#include <limits>
|
2026-02-07 14:28:14 -08:00
|
|
|
|
#include <future>
|
|
|
|
|
|
#include <thread>
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
|
|
|
|
|
namespace wowee {
|
|
|
|
|
|
namespace rendering {
|
|
|
|
|
|
|
2026-02-03 15:17:54 -08:00
|
|
|
|
namespace {
|
|
|
|
|
|
|
2026-02-20 20:31:04 -08:00
|
|
|
|
bool envFlagEnabled(const char* key, bool defaultValue) {
|
|
|
|
|
|
const char* raw = std::getenv(key);
|
|
|
|
|
|
if (!raw || !*raw) return defaultValue;
|
|
|
|
|
|
std::string v(raw);
|
|
|
|
|
|
std::transform(v.begin(), v.end(), v.begin(), [](unsigned char c) {
|
|
|
|
|
|
return static_cast<char>(std::tolower(c));
|
|
|
|
|
|
});
|
|
|
|
|
|
return !(v == "0" || v == "false" || v == "off" || v == "no");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-03 15:17:54 -08:00
|
|
|
|
} // namespace
|
|
|
|
|
|
|
2026-02-02 12:24:50 -08:00
|
|
|
|
void M2Instance::updateModelMatrix() {
|
|
|
|
|
|
modelMatrix = glm::mat4(1.0f);
|
|
|
|
|
|
modelMatrix = glm::translate(modelMatrix, position);
|
|
|
|
|
|
|
|
|
|
|
|
// Rotation in radians
|
|
|
|
|
|
modelMatrix = glm::rotate(modelMatrix, rotation.x, glm::vec3(1.0f, 0.0f, 0.0f));
|
|
|
|
|
|
modelMatrix = glm::rotate(modelMatrix, rotation.y, glm::vec3(0.0f, 1.0f, 0.0f));
|
|
|
|
|
|
modelMatrix = glm::rotate(modelMatrix, rotation.z, glm::vec3(0.0f, 0.0f, 1.0f));
|
|
|
|
|
|
|
|
|
|
|
|
modelMatrix = glm::scale(modelMatrix, glm::vec3(scale));
|
2026-02-03 16:04:21 -08:00
|
|
|
|
invModelMatrix = glm::inverse(modelMatrix);
|
2026-02-02 12:24:50 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
M2Renderer::M2Renderer() {
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
M2Renderer::~M2Renderer() {
|
|
|
|
|
|
shutdown();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout,
|
|
|
|
|
|
pipeline::AssetManager* assets) {
|
2026-02-18 22:41:05 -08:00
|
|
|
|
if (initialized_) { assetManager = assets; return true; }
|
2026-02-21 19:41:21 -08:00
|
|
|
|
vkCtx_ = ctx;
|
2026-02-02 12:24:50 -08:00
|
|
|
|
assetManager = assets;
|
|
|
|
|
|
|
2026-02-22 08:12:08 -08:00
|
|
|
|
const unsigned hc = std::thread::hardware_concurrency();
|
|
|
|
|
|
const size_t availableCores = (hc > 1u) ? static_cast<size_t>(hc - 1u) : 1ull;
|
|
|
|
|
|
// Keep headroom for other frame tasks: M2 gets about half of non-main cores by default.
|
|
|
|
|
|
const size_t defaultAnimThreads = std::max<size_t>(1, availableCores / 2);
|
|
|
|
|
|
numAnimThreads_ = static_cast<uint32_t>(std::max<size_t>(
|
|
|
|
|
|
1, envSizeOrDefault("WOWEE_M2_ANIM_THREADS", defaultAnimThreads)));
|
2026-02-21 19:41:21 -08:00
|
|
|
|
LOG_INFO("Initializing M2 renderer (Vulkan, ", numAnimThreads_, " anim threads)...");
|
2026-02-02 23:10:19 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkDevice device = vkCtx_->getDevice();
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// --- Descriptor set layouts ---
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// Material set layout (set 1): binding 0 = sampler2D, binding 2 = M2Material UBO
|
|
|
|
|
|
// (M2Params moved to push constants alongside model matrix)
|
|
|
|
|
|
{
|
|
|
|
|
|
VkDescriptorSetLayoutBinding bindings[2] = {};
|
|
|
|
|
|
bindings[0].binding = 0;
|
|
|
|
|
|
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
|
|
|
|
bindings[0].descriptorCount = 1;
|
|
|
|
|
|
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
|
|
|
|
bindings[1].binding = 2;
|
|
|
|
|
|
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
|
|
|
|
bindings[1].descriptorCount = 1;
|
|
|
|
|
|
bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
|
|
|
|
|
|
|
|
|
|
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
|
|
|
|
|
ci.bindingCount = 2;
|
|
|
|
|
|
ci.pBindings = bindings;
|
|
|
|
|
|
vkCreateDescriptorSetLayout(device, &ci, nullptr, &materialSetLayout_);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Bone set layout (set 2): binding 0 = STORAGE_BUFFER (bone matrices)
|
|
|
|
|
|
{
|
|
|
|
|
|
VkDescriptorSetLayoutBinding binding{};
|
|
|
|
|
|
binding.binding = 0;
|
|
|
|
|
|
binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
|
|
binding.descriptorCount = 1;
|
|
|
|
|
|
binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
2026-02-04 11:31:08 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
|
|
|
|
|
ci.bindingCount = 1;
|
|
|
|
|
|
ci.pBindings = &binding;
|
|
|
|
|
|
vkCreateDescriptorSetLayout(device, &ci, nullptr, &boneSetLayout_);
|
|
|
|
|
|
}
|
2026-02-06 03:28:21 -08:00
|
|
|
|
|
2026-04-05 12:27:35 +03:00
|
|
|
|
// Instance data set layout (set 3): binding 0 = STORAGE_BUFFER (per-instance data)
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
{
|
|
|
|
|
|
VkDescriptorSetLayoutBinding binding{};
|
|
|
|
|
|
binding.binding = 0;
|
|
|
|
|
|
binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
|
|
binding.descriptorCount = 1;
|
|
|
|
|
|
binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
|
|
|
|
|
|
|
|
|
|
|
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
|
|
|
|
|
ci.bindingCount = 1;
|
|
|
|
|
|
ci.pBindings = &binding;
|
|
|
|
|
|
vkCreateDescriptorSetLayout(device, &ci, nullptr, &instanceSetLayout_);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// Particle texture set layout (set 1 for particles): binding 0 = sampler2D
|
|
|
|
|
|
{
|
|
|
|
|
|
VkDescriptorSetLayoutBinding binding{};
|
|
|
|
|
|
binding.binding = 0;
|
|
|
|
|
|
binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
|
|
|
|
binding.descriptorCount = 1;
|
|
|
|
|
|
binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
|
|
|
|
|
ci.bindingCount = 1;
|
|
|
|
|
|
ci.pBindings = &binding;
|
|
|
|
|
|
vkCreateDescriptorSetLayout(device, &ci, nullptr, &particleTexLayout_);
|
|
|
|
|
|
}
|
2026-02-04 15:05:46 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// --- Descriptor pools ---
|
|
|
|
|
|
{
|
|
|
|
|
|
VkDescriptorPoolSize sizes[] = {
|
|
|
|
|
|
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_MATERIAL_SETS + 256},
|
|
|
|
|
|
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, MAX_MATERIAL_SETS + 256},
|
|
|
|
|
|
};
|
|
|
|
|
|
VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
|
|
|
|
|
|
ci.maxSets = MAX_MATERIAL_SETS + 256;
|
|
|
|
|
|
ci.poolSizeCount = 2;
|
|
|
|
|
|
ci.pPoolSizes = sizes;
|
|
|
|
|
|
ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
|
|
|
|
|
|
vkCreateDescriptorPool(device, &ci, nullptr, &materialDescPool_);
|
|
|
|
|
|
}
|
|
|
|
|
|
{
|
|
|
|
|
|
VkDescriptorPoolSize sizes[] = {
|
|
|
|
|
|
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BONE_SETS},
|
|
|
|
|
|
};
|
|
|
|
|
|
VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
|
|
|
|
|
|
ci.maxSets = MAX_BONE_SETS;
|
|
|
|
|
|
ci.poolSizeCount = 1;
|
|
|
|
|
|
ci.pPoolSizes = sizes;
|
|
|
|
|
|
ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
|
|
|
|
|
|
vkCreateDescriptorPool(device, &ci, nullptr, &boneDescPool_);
|
|
|
|
|
|
}
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
2026-03-22 21:47:12 +03:00
|
|
|
|
// Create a small identity-bone SSBO + descriptor set so that non-animated
|
|
|
|
|
|
// draws always have a valid set 2 bound. The Intel ANV driver segfaults
|
|
|
|
|
|
// on vkCmdDrawIndexed when a declared descriptor set slot is unbound.
|
|
|
|
|
|
{
|
|
|
|
|
|
// Single identity matrix (bone 0 = identity)
|
|
|
|
|
|
glm::mat4 identity(1.0f);
|
|
|
|
|
|
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
|
|
|
|
|
bci.size = sizeof(glm::mat4);
|
|
|
|
|
|
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
|
|
|
|
|
VmaAllocationCreateInfo aci{};
|
|
|
|
|
|
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
|
|
|
|
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
|
|
|
|
|
VmaAllocationInfo allocInfo{};
|
|
|
|
|
|
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
|
|
|
|
|
|
&dummyBoneBuffer_, &dummyBoneAlloc_, &allocInfo);
|
|
|
|
|
|
if (allocInfo.pMappedData) {
|
|
|
|
|
|
memcpy(allocInfo.pMappedData, &identity, sizeof(identity));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
dummyBoneSet_ = allocateBoneSet();
|
|
|
|
|
|
if (dummyBoneSet_) {
|
|
|
|
|
|
VkDescriptorBufferInfo bufInfo{};
|
|
|
|
|
|
bufInfo.buffer = dummyBoneBuffer_;
|
|
|
|
|
|
bufInfo.offset = 0;
|
|
|
|
|
|
bufInfo.range = sizeof(glm::mat4);
|
|
|
|
|
|
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
|
|
|
|
|
write.dstSet = dummyBoneSet_;
|
|
|
|
|
|
write.dstBinding = 0;
|
|
|
|
|
|
write.descriptorCount = 1;
|
|
|
|
|
|
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
|
|
write.pBufferInfo = &bufInfo;
|
|
|
|
|
|
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
// Mega bone SSBO — consolidates all animated instance bones into one buffer per frame.
|
|
|
|
|
|
// Slot 0 = identity matrix (for non-animated instances), slots 1..N = animated instances.
|
|
|
|
|
|
{
|
|
|
|
|
|
const VkDeviceSize megaSize = MEGA_BONE_MAX_INSTANCES * MAX_BONES_PER_INSTANCE * sizeof(glm::mat4);
|
|
|
|
|
|
glm::mat4 identity(1.0f);
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
|
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
|
|
|
|
|
bci.size = megaSize;
|
|
|
|
|
|
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
|
|
|
|
|
VmaAllocationCreateInfo aci{};
|
|
|
|
|
|
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
|
|
|
|
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
|
|
|
|
|
VmaAllocationInfo allocInfo{};
|
|
|
|
|
|
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
|
|
|
|
|
|
&megaBoneBuffer_[i], &megaBoneAlloc_[i], &allocInfo);
|
|
|
|
|
|
megaBoneMapped_[i] = allocInfo.pMappedData;
|
|
|
|
|
|
|
|
|
|
|
|
// Slot 0: identity matrix (for non-animated instances)
|
|
|
|
|
|
if (megaBoneMapped_[i]) {
|
|
|
|
|
|
memcpy(megaBoneMapped_[i], &identity, sizeof(identity));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
megaBoneSet_[i] = allocateBoneSet();
|
|
|
|
|
|
if (megaBoneSet_[i]) {
|
|
|
|
|
|
VkDescriptorBufferInfo bufInfo{};
|
|
|
|
|
|
bufInfo.buffer = megaBoneBuffer_[i];
|
|
|
|
|
|
bufInfo.offset = 0;
|
|
|
|
|
|
bufInfo.range = megaSize;
|
|
|
|
|
|
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
|
|
|
|
|
write.dstSet = megaBoneSet_[i];
|
|
|
|
|
|
write.dstBinding = 0;
|
|
|
|
|
|
write.descriptorCount = 1;
|
|
|
|
|
|
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
|
|
write.pBufferInfo = &bufInfo;
|
|
|
|
|
|
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-05 12:27:35 +03:00
|
|
|
|
// Instance data SSBO — per-frame buffer holding per-instance transforms, fade, bones.
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
// Shader reads instanceData[push.instanceDataOffset + gl_InstanceIndex].
|
|
|
|
|
|
{
|
|
|
|
|
|
static_assert(sizeof(M2InstanceGPU) == 96, "M2InstanceGPU must be 96 bytes (std430)");
|
|
|
|
|
|
const VkDeviceSize instBufSize = MAX_INSTANCE_DATA * sizeof(M2InstanceGPU);
|
|
|
|
|
|
|
|
|
|
|
|
// Descriptor pool for 2 sets (double-buffered)
|
|
|
|
|
|
VkDescriptorPoolSize poolSize{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2};
|
|
|
|
|
|
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
|
|
|
|
|
|
poolCi.maxSets = 2;
|
|
|
|
|
|
poolCi.poolSizeCount = 1;
|
|
|
|
|
|
poolCi.pPoolSizes = &poolSize;
|
|
|
|
|
|
vkCreateDescriptorPool(device, &poolCi, nullptr, &instanceDescPool_);
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
|
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
|
|
|
|
|
bci.size = instBufSize;
|
|
|
|
|
|
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
|
|
|
|
|
VmaAllocationCreateInfo aci{};
|
|
|
|
|
|
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
|
|
|
|
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
|
|
|
|
|
VmaAllocationInfo allocInfo{};
|
|
|
|
|
|
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
|
|
|
|
|
|
&instanceBuffer_[i], &instanceAlloc_[i], &allocInfo);
|
|
|
|
|
|
instanceMapped_[i] = allocInfo.pMappedData;
|
|
|
|
|
|
|
|
|
|
|
|
VkDescriptorSetAllocateInfo setAi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
|
|
|
|
|
setAi.descriptorPool = instanceDescPool_;
|
|
|
|
|
|
setAi.descriptorSetCount = 1;
|
|
|
|
|
|
setAi.pSetLayouts = &instanceSetLayout_;
|
|
|
|
|
|
vkAllocateDescriptorSets(device, &setAi, &instanceSet_[i]);
|
|
|
|
|
|
|
|
|
|
|
|
VkDescriptorBufferInfo bufInfo{};
|
|
|
|
|
|
bufInfo.buffer = instanceBuffer_[i];
|
|
|
|
|
|
bufInfo.offset = 0;
|
|
|
|
|
|
bufInfo.range = instBufSize;
|
|
|
|
|
|
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
|
|
|
|
|
write.dstSet = instanceSet_[i];
|
|
|
|
|
|
write.dstBinding = 0;
|
|
|
|
|
|
write.descriptorCount = 1;
|
|
|
|
|
|
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
|
|
write.pBufferInfo = &bufInfo;
|
|
|
|
|
|
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-05 12:27:35 +03:00
|
|
|
|
// GPU frustum culling — compute pipeline, buffers, descriptors.
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
// Compute shader tests each instance bounding sphere against 6 frustum planes + distance.
|
|
|
|
|
|
// Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build.
|
|
|
|
|
|
{
|
|
|
|
|
|
static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)");
|
|
|
|
|
|
static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)");
|
|
|
|
|
|
|
|
|
|
|
|
// Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output)
|
|
|
|
|
|
VkDescriptorSetLayoutBinding bindings[3] = {};
|
|
|
|
|
|
bindings[0].binding = 0;
|
|
|
|
|
|
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
|
|
|
|
bindings[0].descriptorCount = 1;
|
|
|
|
|
|
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
|
|
|
|
bindings[1].binding = 1;
|
|
|
|
|
|
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
|
|
bindings[1].descriptorCount = 1;
|
|
|
|
|
|
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
|
|
|
|
bindings[2].binding = 2;
|
|
|
|
|
|
bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
|
|
bindings[2].descriptorCount = 1;
|
|
|
|
|
|
bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
|
|
|
|
|
|
|
|
|
|
VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
|
|
|
|
|
|
layoutCi.bindingCount = 3;
|
|
|
|
|
|
layoutCi.pBindings = bindings;
|
|
|
|
|
|
vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &cullSetLayout_);
|
|
|
|
|
|
|
|
|
|
|
|
// Pipeline layout (no push constants — everything via UBO)
|
|
|
|
|
|
VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
|
|
|
|
|
plCi.setLayoutCount = 1;
|
|
|
|
|
|
plCi.pSetLayouts = &cullSetLayout_;
|
|
|
|
|
|
vkCreatePipelineLayout(device, &plCi, nullptr, &cullPipelineLayout_);
|
|
|
|
|
|
|
|
|
|
|
|
// Load compute shader
|
|
|
|
|
|
rendering::VkShaderModule cullComp;
|
|
|
|
|
|
if (!cullComp.loadFromFile(device, "assets/shaders/m2_cull.comp.spv")) {
|
|
|
|
|
|
LOG_ERROR("M2Renderer: failed to load m2_cull.comp.spv — GPU culling disabled");
|
|
|
|
|
|
} else {
|
|
|
|
|
|
VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
|
|
|
|
|
cpCi.stage = cullComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
|
|
|
|
|
|
cpCi.layout = cullPipelineLayout_;
|
|
|
|
|
|
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &cpCi, nullptr, &cullPipeline_) != VK_SUCCESS) {
|
|
|
|
|
|
LOG_ERROR("M2Renderer: failed to create cull compute pipeline");
|
|
|
|
|
|
cullPipeline_ = VK_NULL_HANDLE;
|
|
|
|
|
|
}
|
|
|
|
|
|
cullComp.destroy();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO)
|
|
|
|
|
|
VkDescriptorPoolSize poolSizes[2] = {};
|
|
|
|
|
|
poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2};
|
|
|
|
|
|
poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 4}; // 2 input + 2 output
|
|
|
|
|
|
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
|
|
|
|
|
|
poolCi.maxSets = 2;
|
|
|
|
|
|
poolCi.poolSizeCount = 2;
|
|
|
|
|
|
poolCi.pPoolSizes = poolSizes;
|
|
|
|
|
|
vkCreateDescriptorPool(device, &poolCi, nullptr, &cullDescPool_);
|
|
|
|
|
|
|
|
|
|
|
|
const VkDeviceSize uniformSize = sizeof(CullUniformsGPU);
|
|
|
|
|
|
const VkDeviceSize inputSize = MAX_CULL_INSTANCES * sizeof(CullInstanceGPU);
|
|
|
|
|
|
const VkDeviceSize outputSize = MAX_CULL_INSTANCES * sizeof(uint32_t);
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
|
// Uniform buffer (frustum planes + camera)
|
|
|
|
|
|
{
|
|
|
|
|
|
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
|
|
|
|
|
bci.size = uniformSize;
|
|
|
|
|
|
bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
|
|
|
|
|
|
VmaAllocationCreateInfo aci{};
|
|
|
|
|
|
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
|
|
|
|
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
|
|
|
|
|
VmaAllocationInfo ai{};
|
|
|
|
|
|
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
|
|
|
|
|
|
&cullUniformBuffer_[i], &cullUniformAlloc_[i], &ai);
|
|
|
|
|
|
cullUniformMapped_[i] = ai.pMappedData;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Input SSBO (per-instance cull data)
|
|
|
|
|
|
{
|
|
|
|
|
|
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
|
|
|
|
|
bci.size = inputSize;
|
|
|
|
|
|
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
|
|
|
|
|
VmaAllocationCreateInfo aci{};
|
|
|
|
|
|
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
|
|
|
|
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
|
|
|
|
|
VmaAllocationInfo ai{};
|
|
|
|
|
|
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
|
|
|
|
|
|
&cullInputBuffer_[i], &cullInputAlloc_[i], &ai);
|
|
|
|
|
|
cullInputMapped_[i] = ai.pMappedData;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Output SSBO (visibility flags — GPU writes, CPU reads)
|
|
|
|
|
|
{
|
|
|
|
|
|
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
|
|
|
|
|
bci.size = outputSize;
|
|
|
|
|
|
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
|
|
|
|
|
VmaAllocationCreateInfo aci{};
|
|
|
|
|
|
aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
|
|
|
|
|
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
|
|
|
|
|
VmaAllocationInfo ai{};
|
|
|
|
|
|
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
|
|
|
|
|
|
&cullOutputBuffer_[i], &cullOutputAlloc_[i], &ai);
|
|
|
|
|
|
cullOutputMapped_[i] = ai.pMappedData;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Allocate and write descriptor set
|
|
|
|
|
|
VkDescriptorSetAllocateInfo setAi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
|
|
|
|
|
setAi.descriptorPool = cullDescPool_;
|
|
|
|
|
|
setAi.descriptorSetCount = 1;
|
|
|
|
|
|
setAi.pSetLayouts = &cullSetLayout_;
|
|
|
|
|
|
vkAllocateDescriptorSets(device, &setAi, &cullSet_[i]);
|
|
|
|
|
|
|
|
|
|
|
|
VkDescriptorBufferInfo uboInfo{cullUniformBuffer_[i], 0, uniformSize};
|
|
|
|
|
|
VkDescriptorBufferInfo inputInfo{cullInputBuffer_[i], 0, inputSize};
|
|
|
|
|
|
VkDescriptorBufferInfo outputInfo{cullOutputBuffer_[i], 0, outputSize};
|
|
|
|
|
|
|
|
|
|
|
|
VkWriteDescriptorSet writes[3] = {};
|
|
|
|
|
|
writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
|
|
|
|
|
writes[0].dstSet = cullSet_[i];
|
|
|
|
|
|
writes[0].dstBinding = 0;
|
|
|
|
|
|
writes[0].descriptorCount = 1;
|
|
|
|
|
|
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
|
|
|
|
writes[0].pBufferInfo = &uboInfo;
|
|
|
|
|
|
|
|
|
|
|
|
writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
|
|
|
|
|
writes[1].dstSet = cullSet_[i];
|
|
|
|
|
|
writes[1].dstBinding = 1;
|
|
|
|
|
|
writes[1].descriptorCount = 1;
|
|
|
|
|
|
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
|
|
writes[1].pBufferInfo = &inputInfo;
|
|
|
|
|
|
|
|
|
|
|
|
writes[2] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
|
|
|
|
|
writes[2].dstSet = cullSet_[i];
|
|
|
|
|
|
writes[2].dstBinding = 2;
|
|
|
|
|
|
writes[2].descriptorCount = 1;
|
|
|
|
|
|
writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
|
|
|
|
writes[2].pBufferInfo = &outputInfo;
|
|
|
|
|
|
|
|
|
|
|
|
vkUpdateDescriptorSets(device, 3, writes, 0, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// --- Pipeline layouts ---
|
2026-02-04 15:05:46 -08:00
|
|
|
|
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
// Main M2 pipeline layout: set 0 = perFrame, set 1 = material, set 2 = bones, set 3 = instances
|
|
|
|
|
|
// Push constant: int texCoordSet + int isFoliage + int instanceDataOffset (12 bytes)
|
2026-02-21 19:41:21 -08:00
|
|
|
|
{
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
VkDescriptorSetLayout setLayouts[] = {perFrameLayout, materialSetLayout_, boneSetLayout_, instanceSetLayout_};
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkPushConstantRange pushRange{};
|
|
|
|
|
|
pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
|
|
|
|
|
pushRange.offset = 0;
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
pushRange.size = 12; // int texCoordSet + int isFoliage + int instanceDataOffset
|
2026-02-21 19:41:21 -08:00
|
|
|
|
|
|
|
|
|
|
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
ci.setLayoutCount = 4;
|
2026-02-21 19:41:21 -08:00
|
|
|
|
ci.pSetLayouts = setLayouts;
|
|
|
|
|
|
ci.pushConstantRangeCount = 1;
|
|
|
|
|
|
ci.pPushConstantRanges = &pushRange;
|
|
|
|
|
|
vkCreatePipelineLayout(device, &ci, nullptr, &pipelineLayout_);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Particle pipeline layout: set 0 = perFrame, set 1 = particleTex
|
|
|
|
|
|
// Push constant: vec2 tileCount + int alphaKey (12 bytes)
|
|
|
|
|
|
{
|
|
|
|
|
|
VkDescriptorSetLayout setLayouts[] = {perFrameLayout, particleTexLayout_};
|
|
|
|
|
|
VkPushConstantRange pushRange{};
|
|
|
|
|
|
pushRange.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
|
|
|
|
pushRange.offset = 0;
|
|
|
|
|
|
pushRange.size = 12; // vec2 + int
|
|
|
|
|
|
|
|
|
|
|
|
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
|
|
|
|
|
ci.setLayoutCount = 2;
|
|
|
|
|
|
ci.pSetLayouts = setLayouts;
|
|
|
|
|
|
ci.pushConstantRangeCount = 1;
|
|
|
|
|
|
ci.pPushConstantRanges = &pushRange;
|
|
|
|
|
|
vkCreatePipelineLayout(device, &ci, nullptr, &particlePipelineLayout_);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Smoke pipeline layout: set 0 = perFrame
|
|
|
|
|
|
// Push constant: float screenHeight (4 bytes)
|
|
|
|
|
|
{
|
|
|
|
|
|
VkDescriptorSetLayout setLayouts[] = {perFrameLayout};
|
|
|
|
|
|
VkPushConstantRange pushRange{};
|
|
|
|
|
|
pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
|
|
|
|
|
pushRange.offset = 0;
|
|
|
|
|
|
pushRange.size = 4;
|
|
|
|
|
|
|
|
|
|
|
|
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
|
|
|
|
|
ci.setLayoutCount = 1;
|
|
|
|
|
|
ci.pSetLayouts = setLayouts;
|
|
|
|
|
|
ci.pushConstantRangeCount = 1;
|
|
|
|
|
|
ci.pPushConstantRanges = &pushRange;
|
|
|
|
|
|
vkCreatePipelineLayout(device, &ci, nullptr, &smokePipelineLayout_);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// --- Load shaders ---
|
|
|
|
|
|
rendering::VkShaderModule m2Vert, m2Frag;
|
|
|
|
|
|
rendering::VkShaderModule particleVert, particleFrag;
|
|
|
|
|
|
rendering::VkShaderModule smokeVert, smokeFrag;
|
|
|
|
|
|
|
2026-03-27 15:17:19 -07:00
|
|
|
|
(void)m2Vert.loadFromFile(device, "assets/shaders/m2.vert.spv");
|
|
|
|
|
|
(void)m2Frag.loadFromFile(device, "assets/shaders/m2.frag.spv");
|
|
|
|
|
|
(void)particleVert.loadFromFile(device, "assets/shaders/m2_particle.vert.spv");
|
|
|
|
|
|
(void)particleFrag.loadFromFile(device, "assets/shaders/m2_particle.frag.spv");
|
|
|
|
|
|
(void)smokeVert.loadFromFile(device, "assets/shaders/m2_smoke.vert.spv");
|
|
|
|
|
|
(void)smokeFrag.loadFromFile(device, "assets/shaders/m2_smoke.frag.spv");
|
2026-02-21 19:41:21 -08:00
|
|
|
|
|
|
|
|
|
|
if (!m2Vert.isValid() || !m2Frag.isValid()) {
|
|
|
|
|
|
LOG_ERROR("M2: Missing required shaders, cannot initialize");
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
2026-02-04 15:05:46 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();
|
|
|
|
|
|
|
|
|
|
|
|
// --- Build M2 model pipelines ---
|
|
|
|
|
|
// Vertex input: 18 floats = 72 bytes stride
|
|
|
|
|
|
// loc 0: vec3 pos (0), loc 1: vec3 normal (12), loc 2: vec2 uv0 (24),
|
|
|
|
|
|
// loc 5: vec2 uv1 (32), loc 3: vec4 boneWeights (40), loc 4: vec4 boneIndices (56)
|
|
|
|
|
|
VkVertexInputBindingDescription m2Binding{};
|
|
|
|
|
|
m2Binding.binding = 0;
|
|
|
|
|
|
m2Binding.stride = 18 * sizeof(float);
|
|
|
|
|
|
m2Binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<VkVertexInputAttributeDescription> m2Attrs = {
|
|
|
|
|
|
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // position
|
|
|
|
|
|
{1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)}, // normal
|
|
|
|
|
|
{2, 0, VK_FORMAT_R32G32_SFLOAT, 6 * sizeof(float)}, // texCoord0
|
|
|
|
|
|
{5, 0, VK_FORMAT_R32G32_SFLOAT, 8 * sizeof(float)}, // texCoord1
|
|
|
|
|
|
{3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 10 * sizeof(float)}, // boneWeights
|
|
|
|
|
|
{4, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)}, // boneIndices (float)
|
|
|
|
|
|
};
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
// Pipeline derivatives — opaque is the base, others derive from it for shared state optimization
|
|
|
|
|
|
auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite,
|
|
|
|
|
|
VkPipelineCreateFlags flags = 0, VkPipeline basePipeline = VK_NULL_HANDLE) -> VkPipeline {
|
2026-02-21 19:41:21 -08:00
|
|
|
|
return PipelineBuilder()
|
|
|
|
|
|
.setShaders(m2Vert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
|
|
|
|
|
|
m2Frag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
|
|
|
|
|
|
.setVertexInput({m2Binding}, m2Attrs)
|
|
|
|
|
|
.setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
|
|
|
|
|
|
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
|
|
|
|
|
|
.setDepthTest(true, depthWrite, VK_COMPARE_OP_LESS_OR_EQUAL)
|
|
|
|
|
|
.setColorBlendAttachment(blendState)
|
2026-02-22 02:59:24 -08:00
|
|
|
|
.setMultisample(vkCtx_->getMsaaSamples())
|
2026-02-21 19:41:21 -08:00
|
|
|
|
.setLayout(pipelineLayout_)
|
|
|
|
|
|
.setRenderPass(mainPass)
|
|
|
|
|
|
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
.setFlags(flags)
|
|
|
|
|
|
.setBasePipeline(basePipeline)
|
2026-03-24 09:47:03 -07:00
|
|
|
|
.build(device, vkCtx_->getPipelineCache());
|
2026-02-21 19:41:21 -08:00
|
|
|
|
};
|
|
|
|
|
|
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true,
|
|
|
|
|
|
VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT);
|
|
|
|
|
|
alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true,
|
|
|
|
|
|
VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_);
|
|
|
|
|
|
alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false,
|
|
|
|
|
|
VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_);
|
|
|
|
|
|
additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false,
|
|
|
|
|
|
VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_);
|
2026-02-21 19:41:21 -08:00
|
|
|
|
|
|
|
|
|
|
// --- Build particle pipelines ---
|
|
|
|
|
|
if (particleVert.isValid() && particleFrag.isValid()) {
|
|
|
|
|
|
VkVertexInputBindingDescription pBind{};
|
|
|
|
|
|
pBind.binding = 0;
|
|
|
|
|
|
pBind.stride = 9 * sizeof(float); // pos3 + color4 + size1 + tile1
|
|
|
|
|
|
pBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<VkVertexInputAttributeDescription> pAttrs = {
|
|
|
|
|
|
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // position
|
|
|
|
|
|
{1, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 3 * sizeof(float)}, // color
|
|
|
|
|
|
{2, 0, VK_FORMAT_R32_SFLOAT, 7 * sizeof(float)}, // size
|
|
|
|
|
|
{3, 0, VK_FORMAT_R32_SFLOAT, 8 * sizeof(float)}, // tile
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
auto buildParticlePipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
|
|
|
|
|
|
return PipelineBuilder()
|
|
|
|
|
|
.setShaders(particleVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
|
|
|
|
|
|
particleFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
|
|
|
|
|
|
.setVertexInput({pBind}, pAttrs)
|
|
|
|
|
|
.setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
|
|
|
|
|
|
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
|
|
|
|
|
|
.setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
|
|
|
|
|
|
.setColorBlendAttachment(blend)
|
2026-02-22 02:59:24 -08:00
|
|
|
|
.setMultisample(vkCtx_->getMsaaSamples())
|
2026-02-21 19:41:21 -08:00
|
|
|
|
.setLayout(particlePipelineLayout_)
|
|
|
|
|
|
.setRenderPass(mainPass)
|
|
|
|
|
|
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
|
2026-03-24 09:47:03 -07:00
|
|
|
|
.build(device, vkCtx_->getPipelineCache());
|
2026-02-21 19:41:21 -08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
particlePipeline_ = buildParticlePipeline(PipelineBuilder::blendAlpha());
|
|
|
|
|
|
particleAdditivePipeline_ = buildParticlePipeline(PipelineBuilder::blendAdditive());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// --- Build smoke pipeline ---
|
|
|
|
|
|
if (smokeVert.isValid() && smokeFrag.isValid()) {
|
|
|
|
|
|
VkVertexInputBindingDescription sBind{};
|
|
|
|
|
|
sBind.binding = 0;
|
|
|
|
|
|
sBind.stride = 6 * sizeof(float); // pos3 + lifeRatio1 + size1 + isSpark1
|
|
|
|
|
|
sBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<VkVertexInputAttributeDescription> sAttrs = {
|
|
|
|
|
|
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // position
|
|
|
|
|
|
{1, 0, VK_FORMAT_R32_SFLOAT, 3 * sizeof(float)}, // lifeRatio
|
|
|
|
|
|
{2, 0, VK_FORMAT_R32_SFLOAT, 4 * sizeof(float)}, // size
|
|
|
|
|
|
{3, 0, VK_FORMAT_R32_SFLOAT, 5 * sizeof(float)}, // isSpark
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
smokePipeline_ = PipelineBuilder()
|
|
|
|
|
|
.setShaders(smokeVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
|
|
|
|
|
|
smokeFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
|
|
|
|
|
|
.setVertexInput({sBind}, sAttrs)
|
|
|
|
|
|
.setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
|
|
|
|
|
|
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
|
|
|
|
|
|
.setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
|
|
|
|
|
|
.setColorBlendAttachment(PipelineBuilder::blendAlpha())
|
2026-02-22 02:59:24 -08:00
|
|
|
|
.setMultisample(vkCtx_->getMsaaSamples())
|
2026-02-21 19:41:21 -08:00
|
|
|
|
.setLayout(smokePipelineLayout_)
|
|
|
|
|
|
.setRenderPass(mainPass)
|
|
|
|
|
|
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
|
2026-03-24 09:47:03 -07:00
|
|
|
|
.build(device, vkCtx_->getPipelineCache());
|
2026-02-21 19:41:21 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-13 01:17:30 -07:00
|
|
|
|
// --- Build ribbon pipelines ---
|
|
|
|
|
|
// Vertex format: pos(3) + color(3) + alpha(1) + uv(2) = 9 floats = 36 bytes
|
|
|
|
|
|
{
|
|
|
|
|
|
rendering::VkShaderModule ribVert, ribFrag;
|
2026-03-27 15:17:19 -07:00
|
|
|
|
(void)ribVert.loadFromFile(device, "assets/shaders/m2_ribbon.vert.spv");
|
|
|
|
|
|
(void)ribFrag.loadFromFile(device, "assets/shaders/m2_ribbon.frag.spv");
|
2026-03-13 01:17:30 -07:00
|
|
|
|
if (ribVert.isValid() && ribFrag.isValid()) {
|
|
|
|
|
|
// Reuse particleTexLayout_ for set 1 (single texture sampler)
|
|
|
|
|
|
VkDescriptorSetLayout ribLayouts[] = {perFrameLayout, particleTexLayout_};
|
|
|
|
|
|
VkPipelineLayoutCreateInfo lci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
|
|
|
|
|
lci.setLayoutCount = 2;
|
|
|
|
|
|
lci.pSetLayouts = ribLayouts;
|
|
|
|
|
|
vkCreatePipelineLayout(device, &lci, nullptr, &ribbonPipelineLayout_);
|
|
|
|
|
|
|
|
|
|
|
|
VkVertexInputBindingDescription rBind{};
|
|
|
|
|
|
rBind.binding = 0;
|
|
|
|
|
|
rBind.stride = 9 * sizeof(float);
|
|
|
|
|
|
rBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<VkVertexInputAttributeDescription> rAttrs = {
|
|
|
|
|
|
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // pos
|
|
|
|
|
|
{1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)}, // color
|
|
|
|
|
|
{2, 0, VK_FORMAT_R32_SFLOAT, 6 * sizeof(float)}, // alpha
|
|
|
|
|
|
{3, 0, VK_FORMAT_R32G32_SFLOAT, 7 * sizeof(float)}, // uv
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
auto buildRibbonPipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
|
|
|
|
|
|
return PipelineBuilder()
|
|
|
|
|
|
.setShaders(ribVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
|
|
|
|
|
|
ribFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
|
|
|
|
|
|
.setVertexInput({rBind}, rAttrs)
|
|
|
|
|
|
.setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP)
|
|
|
|
|
|
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
|
|
|
|
|
|
.setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
|
|
|
|
|
|
.setColorBlendAttachment(blend)
|
|
|
|
|
|
.setMultisample(vkCtx_->getMsaaSamples())
|
|
|
|
|
|
.setLayout(ribbonPipelineLayout_)
|
|
|
|
|
|
.setRenderPass(mainPass)
|
|
|
|
|
|
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
|
2026-03-24 09:47:03 -07:00
|
|
|
|
.build(device, vkCtx_->getPipelineCache());
|
2026-03-13 01:17:30 -07:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
ribbonPipeline_ = buildRibbonPipeline(PipelineBuilder::blendAlpha());
|
|
|
|
|
|
ribbonAdditivePipeline_ = buildRibbonPipeline(PipelineBuilder::blendAdditive());
|
|
|
|
|
|
}
|
|
|
|
|
|
ribVert.destroy(); ribFrag.destroy();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// Clean up shader modules
|
|
|
|
|
|
m2Vert.destroy(); m2Frag.destroy();
|
|
|
|
|
|
particleVert.destroy(); particleFrag.destroy();
|
|
|
|
|
|
smokeVert.destroy(); smokeFrag.destroy();
|
|
|
|
|
|
|
|
|
|
|
|
// --- Create dynamic particle buffers (mapped for CPU writes) ---
|
|
|
|
|
|
{
|
|
|
|
|
|
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
|
|
|
|
|
bci.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
|
|
|
|
|
|
|
|
|
|
|
VmaAllocationCreateInfo aci{};
|
|
|
|
|
|
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
|
|
|
|
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
|
|
|
|
|
|
|
|
|
|
|
VmaAllocationInfo allocInfo{};
|
|
|
|
|
|
|
|
|
|
|
|
// Smoke particle buffer
|
|
|
|
|
|
bci.size = MAX_SMOKE_PARTICLES * 6 * sizeof(float);
|
|
|
|
|
|
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &smokeVB_, &smokeVBAlloc_, &allocInfo);
|
|
|
|
|
|
smokeVBMapped_ = allocInfo.pMappedData;
|
|
|
|
|
|
|
|
|
|
|
|
// M2 particle buffer
|
|
|
|
|
|
bci.size = MAX_M2_PARTICLES * 9 * sizeof(float);
|
|
|
|
|
|
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &m2ParticleVB_, &m2ParticleVBAlloc_, &allocInfo);
|
|
|
|
|
|
m2ParticleVBMapped_ = allocInfo.pMappedData;
|
2026-03-04 08:17:32 -08:00
|
|
|
|
|
|
|
|
|
|
// Dedicated glow sprite buffer (separate from particle VB to avoid data race)
|
|
|
|
|
|
bci.size = MAX_GLOW_SPRITES * 9 * sizeof(float);
|
|
|
|
|
|
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &glowVB_, &glowVBAlloc_, &allocInfo);
|
|
|
|
|
|
glowVBMapped_ = allocInfo.pMappedData;
|
2026-03-13 01:17:30 -07:00
|
|
|
|
|
|
|
|
|
|
// Ribbon vertex buffer — triangle strip: pos(3)+color(3)+alpha(1)+uv(2)=9 floats/vert
|
|
|
|
|
|
bci.size = MAX_RIBBON_VERTS * 9 * sizeof(float);
|
|
|
|
|
|
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &ribbonVB_, &ribbonVBAlloc_, &allocInfo);
|
|
|
|
|
|
ribbonVBMapped_ = allocInfo.pMappedData;
|
2026-02-02 12:24:50 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// --- Create white fallback texture ---
|
2026-02-06 08:58:26 -08:00
|
|
|
|
{
|
2026-02-21 19:41:21 -08:00
|
|
|
|
uint8_t white[] = {255, 255, 255, 255};
|
|
|
|
|
|
whiteTexture_ = std::make_unique<VkTexture>();
|
|
|
|
|
|
whiteTexture_->upload(*vkCtx_, white, 1, 1, VK_FORMAT_R8G8B8A8_UNORM);
|
|
|
|
|
|
whiteTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_REPEAT);
|
|
|
|
|
|
}
|
2026-02-19 02:27:01 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// --- Generate soft radial gradient glow texture ---
|
2026-02-06 08:58:26 -08:00
|
|
|
|
{
|
|
|
|
|
|
static constexpr int SZ = 64;
|
|
|
|
|
|
std::vector<uint8_t> px(SZ * SZ * 4);
|
|
|
|
|
|
float half = SZ / 2.0f;
|
|
|
|
|
|
for (int y = 0; y < SZ; y++) {
|
|
|
|
|
|
for (int x = 0; x < SZ; x++) {
|
|
|
|
|
|
float dx = (x + 0.5f - half) / half;
|
|
|
|
|
|
float dy = (y + 0.5f - half) / half;
|
|
|
|
|
|
float r = std::sqrt(dx * dx + dy * dy);
|
|
|
|
|
|
float a = std::max(0.0f, 1.0f - r);
|
|
|
|
|
|
a = a * a; // Quadratic falloff
|
|
|
|
|
|
int idx = (y * SZ + x) * 4;
|
|
|
|
|
|
px[idx + 0] = 255;
|
|
|
|
|
|
px[idx + 1] = 255;
|
|
|
|
|
|
px[idx + 2] = 255;
|
|
|
|
|
|
px[idx + 3] = static_cast<uint8_t>(a * 255);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-02-21 19:41:21 -08:00
|
|
|
|
glowTexture_ = std::make_unique<VkTexture>();
|
|
|
|
|
|
glowTexture_->upload(*vkCtx_, px.data(), SZ, SZ, VK_FORMAT_R8G8B8A8_UNORM);
|
|
|
|
|
|
glowTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE);
|
2026-02-23 06:06:24 -08:00
|
|
|
|
// Pre-allocate glow texture descriptor set (reused every frame)
|
|
|
|
|
|
if (particleTexLayout_ && materialDescPool_) {
|
|
|
|
|
|
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
|
|
|
|
|
ai.descriptorPool = materialDescPool_;
|
|
|
|
|
|
ai.descriptorSetCount = 1;
|
|
|
|
|
|
ai.pSetLayouts = &particleTexLayout_;
|
|
|
|
|
|
if (vkAllocateDescriptorSets(device, &ai, &glowTexDescSet_) == VK_SUCCESS) {
|
|
|
|
|
|
VkDescriptorImageInfo imgInfo = glowTexture_->descriptorInfo();
|
|
|
|
|
|
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
|
|
|
|
|
write.dstSet = glowTexDescSet_;
|
|
|
|
|
|
write.dstBinding = 0;
|
|
|
|
|
|
write.descriptorCount = 1;
|
|
|
|
|
|
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
|
|
|
|
write.pImageInfo = &imgInfo;
|
|
|
|
|
|
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-02-06 08:58:26 -08:00
|
|
|
|
}
|
2026-02-22 07:26:54 -08:00
|
|
|
|
textureCacheBudgetBytes_ =
|
2026-02-23 04:32:58 -08:00
|
|
|
|
envSizeMBOrDefault("WOWEE_M2_TEX_CACHE_MB", 4096) * 1024ull * 1024ull;
|
2026-02-22 07:26:54 -08:00
|
|
|
|
modelCacheLimit_ = envSizeMBOrDefault("WOWEE_M2_MODEL_LIMIT", 6000);
|
|
|
|
|
|
LOG_INFO("M2 texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB");
|
|
|
|
|
|
LOG_INFO("M2 model cache limit: ", modelCacheLimit_);
|
2026-02-06 08:58:26 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
LOG_INFO("M2 renderer initialized (Vulkan)");
|
2026-02-18 22:41:05 -08:00
|
|
|
|
initialized_ = true;
|
2026-02-02 12:24:50 -08:00
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void M2Renderer::shutdown() {
|
|
|
|
|
|
LOG_INFO("Shutting down M2 renderer...");
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (!vkCtx_) return;
|
|
|
|
|
|
|
|
|
|
|
|
vkDeviceWaitIdle(vkCtx_->getDevice());
|
|
|
|
|
|
VkDevice device = vkCtx_->getDevice();
|
|
|
|
|
|
VmaAllocator alloc = vkCtx_->getAllocator();
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// Delete model GPU resources
|
2026-02-02 12:24:50 -08:00
|
|
|
|
for (auto& [id, model] : models) {
|
2026-02-21 19:41:21 -08:00
|
|
|
|
destroyModelGPU(model);
|
2026-02-02 12:24:50 -08:00
|
|
|
|
}
|
|
|
|
|
|
models.clear();
|
2026-02-21 19:41:21 -08:00
|
|
|
|
|
|
|
|
|
|
// Destroy instance bone buffers
|
|
|
|
|
|
for (auto& inst : instances) {
|
|
|
|
|
|
destroyInstanceBones(inst);
|
|
|
|
|
|
}
|
2026-02-02 12:24:50 -08:00
|
|
|
|
instances.clear();
|
2026-02-03 16:21:48 -08:00
|
|
|
|
spatialGrid.clear();
|
|
|
|
|
|
instanceIndexById.clear();
|
2026-03-07 11:59:19 -08:00
|
|
|
|
instanceDedupMap_.clear();
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
|
|
|
|
|
// Delete cached textures
|
|
|
|
|
|
textureCache.clear();
|
2026-02-12 16:29:36 -08:00
|
|
|
|
textureCacheBytes_ = 0;
|
|
|
|
|
|
textureCacheCounter_ = 0;
|
2026-03-27 16:47:30 -07:00
|
|
|
|
texturePropsByPtr_.clear();
|
2026-02-22 07:26:54 -08:00
|
|
|
|
failedTextureCache_.clear();
|
2026-03-15 01:21:23 -07:00
|
|
|
|
failedTextureRetryAt_.clear();
|
2026-02-22 07:26:54 -08:00
|
|
|
|
loggedTextureLoadFails_.clear();
|
2026-03-15 01:21:23 -07:00
|
|
|
|
textureLookupSerial_ = 0;
|
2026-02-22 07:26:54 -08:00
|
|
|
|
textureBudgetRejectWarnings_ = 0;
|
2026-02-21 19:41:21 -08:00
|
|
|
|
whiteTexture_.reset();
|
|
|
|
|
|
glowTexture_.reset();
|
|
|
|
|
|
|
2026-03-13 01:17:30 -07:00
|
|
|
|
// Clean up particle/ribbon buffers
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (smokeVB_) { vmaDestroyBuffer(alloc, smokeVB_, smokeVBAlloc_); smokeVB_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (m2ParticleVB_) { vmaDestroyBuffer(alloc, m2ParticleVB_, m2ParticleVBAlloc_); m2ParticleVB_ = VK_NULL_HANDLE; }
|
2026-03-04 08:17:32 -08:00
|
|
|
|
if (glowVB_) { vmaDestroyBuffer(alloc, glowVB_, glowVBAlloc_); glowVB_ = VK_NULL_HANDLE; }
|
2026-03-13 01:17:30 -07:00
|
|
|
|
if (ribbonVB_) { vmaDestroyBuffer(alloc, ribbonVB_, ribbonVBAlloc_); ribbonVB_ = VK_NULL_HANDLE; }
|
2026-02-21 19:41:21 -08:00
|
|
|
|
smokeParticles.clear();
|
|
|
|
|
|
|
|
|
|
|
|
// Destroy pipelines
|
|
|
|
|
|
auto destroyPipeline = [&](VkPipeline& p) { if (p) { vkDestroyPipeline(device, p, nullptr); p = VK_NULL_HANDLE; } };
|
|
|
|
|
|
destroyPipeline(opaquePipeline_);
|
|
|
|
|
|
destroyPipeline(alphaTestPipeline_);
|
|
|
|
|
|
destroyPipeline(alphaPipeline_);
|
|
|
|
|
|
destroyPipeline(additivePipeline_);
|
|
|
|
|
|
destroyPipeline(particlePipeline_);
|
|
|
|
|
|
destroyPipeline(particleAdditivePipeline_);
|
|
|
|
|
|
destroyPipeline(smokePipeline_);
|
2026-03-13 01:17:30 -07:00
|
|
|
|
destroyPipeline(ribbonPipeline_);
|
|
|
|
|
|
destroyPipeline(ribbonAdditivePipeline_);
|
2026-02-21 19:41:21 -08:00
|
|
|
|
|
|
|
|
|
|
if (pipelineLayout_) { vkDestroyPipelineLayout(device, pipelineLayout_, nullptr); pipelineLayout_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (particlePipelineLayout_) { vkDestroyPipelineLayout(device, particlePipelineLayout_, nullptr); particlePipelineLayout_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (smokePipelineLayout_) { vkDestroyPipelineLayout(device, smokePipelineLayout_, nullptr); smokePipelineLayout_ = VK_NULL_HANDLE; }
|
2026-03-13 01:17:30 -07:00
|
|
|
|
if (ribbonPipelineLayout_) { vkDestroyPipelineLayout(device, ribbonPipelineLayout_, nullptr); ribbonPipelineLayout_ = VK_NULL_HANDLE; }
|
2026-02-21 19:41:21 -08:00
|
|
|
|
|
|
|
|
|
|
// Destroy descriptor pools and layouts
|
2026-03-22 21:47:12 +03:00
|
|
|
|
if (dummyBoneBuffer_) { vmaDestroyBuffer(alloc, dummyBoneBuffer_, dummyBoneAlloc_); dummyBoneBuffer_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
// dummyBoneSet_ is freed implicitly when boneDescPool_ is destroyed
|
|
|
|
|
|
dummyBoneSet_ = VK_NULL_HANDLE;
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
// Mega bone SSBO cleanup (sets freed implicitly with boneDescPool_)
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
|
if (megaBoneBuffer_[i]) { vmaDestroyBuffer(alloc, megaBoneBuffer_[i], megaBoneAlloc_[i]); megaBoneBuffer_[i] = VK_NULL_HANDLE; }
|
|
|
|
|
|
megaBoneMapped_[i] = nullptr;
|
|
|
|
|
|
megaBoneSet_[i] = VK_NULL_HANDLE;
|
|
|
|
|
|
}
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (materialDescPool_) { vkDestroyDescriptorPool(device, materialDescPool_, nullptr); materialDescPool_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (boneDescPool_) { vkDestroyDescriptorPool(device, boneDescPool_, nullptr); boneDescPool_ = VK_NULL_HANDLE; }
|
2026-04-05 12:27:35 +03:00
|
|
|
|
// Instance data SSBO cleanup (sets freed with instanceDescPool_)
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
|
if (instanceBuffer_[i]) { vmaDestroyBuffer(alloc, instanceBuffer_[i], instanceAlloc_[i]); instanceBuffer_[i] = VK_NULL_HANDLE; }
|
|
|
|
|
|
instanceMapped_[i] = nullptr;
|
|
|
|
|
|
instanceSet_[i] = VK_NULL_HANDLE;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
|
2026-04-05 12:27:35 +03:00
|
|
|
|
// GPU frustum culling compute pipeline + buffers cleanup
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
|
if (cullUniformBuffer_[i]) { vmaDestroyBuffer(alloc, cullUniformBuffer_[i], cullUniformAlloc_[i]); cullUniformBuffer_[i] = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (cullInputBuffer_[i]) { vmaDestroyBuffer(alloc, cullInputBuffer_[i], cullInputAlloc_[i]); cullInputBuffer_[i] = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (cullOutputBuffer_[i]) { vmaDestroyBuffer(alloc, cullOutputBuffer_[i], cullOutputAlloc_[i]); cullOutputBuffer_[i] = VK_NULL_HANDLE; }
|
|
|
|
|
|
cullUniformMapped_[i] = cullInputMapped_[i] = cullOutputMapped_[i] = nullptr;
|
|
|
|
|
|
cullSet_[i] = VK_NULL_HANDLE;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (cullDescPool_) { vkDestroyDescriptorPool(device, cullDescPool_, nullptr); cullDescPool_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (cullSetLayout_) { vkDestroyDescriptorSetLayout(device, cullSetLayout_, nullptr); cullSetLayout_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (materialSetLayout_) { vkDestroyDescriptorSetLayout(device, materialSetLayout_, nullptr); materialSetLayout_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (boneSetLayout_) { vkDestroyDescriptorSetLayout(device, boneSetLayout_, nullptr); boneSetLayout_ = VK_NULL_HANDLE; }
|
feat(rendering): GPU architecture + visual quality fixes
M2 GPU instancing
- M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max)
- Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group
- boneBase field indexes into mega bone SSBO via gl_InstanceIndex
Indirect terrain drawing
- 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer
- CPU builds VkDrawIndexedIndirectCommand per visible chunk
- Single VB/IB bind per frame; shadow pass reuses mega buffers
- Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix
host-mapped buffer race condition that caused terrain flickering
GPU frustum culling (compute shader)
- m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull
- CullInstanceGPU SSBO input, uint visibility[] output, double-buffered
- dispatchCullCompute() runs before main pass via render graph node
Consolidated bone matrix SSBOs
- 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones)
- Eliminated per-instance descriptor sets; one megaBoneSet_ per frame
- prepareRender() packs bone matrices consecutively into current frame slot
Render graph / frame graph
- RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort
- Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes
- Passes: minimap_composite, worldmap_composite, preview_composite,
shadow_pass, reflection_pass, compute_cull
- beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd)
Pipeline derivatives
- PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT
- M2 opaque = base; alphaTest/alpha/additive are derivatives
- Applied to terrain (wireframe) and WMO (alpha-test) renderers
Rendering bug fixes:
- fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate
one-frame lag that caused shadow trails and flicker on moving objects
- fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f
to prevent acne at close range and gaps at far range
- fix(visibility): WMO group distance threshold 500u → 1200u to match terrain
view distance; buildings were disappearing on the horizon
- fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1),
eliminating Z-fighting and improving frustum plane extraction stability
- fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed
M2 render distance (2800u) and eliminate pop-in when camera turns;
unload radius 7 → 9; spawn radius 3 → 4
- fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce
early pop of grass and debris
2026-04-04 13:43:16 +03:00
|
|
|
|
if (instanceSetLayout_) { vkDestroyDescriptorSetLayout(device, instanceSetLayout_, nullptr); instanceSetLayout_ = VK_NULL_HANDLE; }
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (particleTexLayout_) { vkDestroyDescriptorSetLayout(device, particleTexLayout_, nullptr); particleTexLayout_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
|
|
|
|
|
|
// Destroy shadow resources
|
|
|
|
|
|
destroyPipeline(shadowPipeline_);
|
|
|
|
|
|
if (shadowPipelineLayout_) { vkDestroyPipelineLayout(device, shadowPipelineLayout_, nullptr); shadowPipelineLayout_ = VK_NULL_HANDLE; }
|
2026-04-03 17:41:14 -07:00
|
|
|
|
for (auto& pool : shadowTexPool_) { if (pool) { vkDestroyDescriptorPool(device, pool, nullptr); pool = VK_NULL_HANDLE; } }
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (shadowParamsPool_) { vkDestroyDescriptorPool(device, shadowParamsPool_, nullptr); shadowParamsPool_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (shadowParamsUBO_) { vmaDestroyBuffer(alloc, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; }
|
|
|
|
|
|
|
|
|
|
|
|
initialized_ = false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void M2Renderer::destroyModelGPU(M2ModelGPU& model) {
|
|
|
|
|
|
if (!vkCtx_) return;
|
|
|
|
|
|
VmaAllocator alloc = vkCtx_->getAllocator();
|
|
|
|
|
|
if (model.vertexBuffer) { vmaDestroyBuffer(alloc, model.vertexBuffer, model.vertexAlloc); model.vertexBuffer = VK_NULL_HANDLE; }
|
|
|
|
|
|
if (model.indexBuffer) { vmaDestroyBuffer(alloc, model.indexBuffer, model.indexAlloc); model.indexBuffer = VK_NULL_HANDLE; }
|
2026-03-11 01:44:12 -07:00
|
|
|
|
VkDevice device = vkCtx_->getDevice();
|
2026-02-21 19:41:21 -08:00
|
|
|
|
for (auto& batch : model.batches) {
|
2026-03-11 01:44:12 -07:00
|
|
|
|
if (batch.materialSet) { vkFreeDescriptorSets(device, materialDescPool_, 1, &batch.materialSet); batch.materialSet = VK_NULL_HANDLE; }
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (batch.materialUBO) { vmaDestroyBuffer(alloc, batch.materialUBO, batch.materialUBOAlloc); batch.materialUBO = VK_NULL_HANDLE; }
|
2026-02-06 08:58:26 -08:00
|
|
|
|
}
|
2026-03-11 02:01:23 -07:00
|
|
|
|
// Free pre-allocated particle texture descriptor sets
|
|
|
|
|
|
for (auto& pSet : model.particleTexSets) {
|
|
|
|
|
|
if (pSet) { vkFreeDescriptorSets(device, materialDescPool_, 1, &pSet); pSet = VK_NULL_HANDLE; }
|
|
|
|
|
|
}
|
|
|
|
|
|
model.particleTexSets.clear();
|
2026-03-13 01:17:30 -07:00
|
|
|
|
// Free ribbon texture descriptor sets
|
|
|
|
|
|
for (auto& rSet : model.ribbonTexSets) {
|
|
|
|
|
|
if (rSet) { vkFreeDescriptorSets(device, materialDescPool_, 1, &rSet); rSet = VK_NULL_HANDLE; }
|
|
|
|
|
|
}
|
|
|
|
|
|
model.ribbonTexSets.clear();
|
2026-02-21 19:41:21 -08:00
|
|
|
|
}
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
2026-04-03 18:30:52 -07:00
|
|
|
|
void M2Renderer::destroyInstanceBones(M2Instance& inst, bool defer) {
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (!vkCtx_) return;
|
2026-03-09 18:09:33 -07:00
|
|
|
|
VkDevice device = vkCtx_->getDevice();
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VmaAllocator alloc = vkCtx_->getAllocator();
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
2026-04-03 18:30:52 -07:00
|
|
|
|
// Snapshot handles before clearing the instance — needed for both
|
|
|
|
|
|
// immediate and deferred paths.
|
|
|
|
|
|
VkDescriptorSet boneSet = inst.boneSet[i];
|
|
|
|
|
|
::VkBuffer boneBuf = inst.boneBuffer[i];
|
|
|
|
|
|
VmaAllocation boneAlloc = inst.boneAlloc[i];
|
|
|
|
|
|
inst.boneSet[i] = VK_NULL_HANDLE;
|
|
|
|
|
|
inst.boneBuffer[i] = VK_NULL_HANDLE;
|
|
|
|
|
|
inst.boneMapped[i] = nullptr;
|
|
|
|
|
|
|
|
|
|
|
|
if (!defer) {
|
|
|
|
|
|
// Immediate destruction (safe after vkDeviceWaitIdle)
|
|
|
|
|
|
if (boneSet != VK_NULL_HANDLE) {
|
|
|
|
|
|
vkFreeDescriptorSets(device, boneDescPool_, 1, &boneSet);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (boneBuf) {
|
|
|
|
|
|
vmaDestroyBuffer(alloc, boneBuf, boneAlloc);
|
|
|
|
|
|
}
|
|
|
|
|
|
} else if (boneSet != VK_NULL_HANDLE || boneBuf) {
|
2026-04-03 19:54:54 -07:00
|
|
|
|
// Deferred destruction — the loop destroys bone sets for ALL frame
|
|
|
|
|
|
// slots, so the other slot's command buffer may still be in flight.
|
|
|
|
|
|
// Must wait for all fences, not just the current frame's.
|
2026-04-03 18:30:52 -07:00
|
|
|
|
VkDescriptorPool pool = boneDescPool_;
|
2026-04-03 19:54:54 -07:00
|
|
|
|
vkCtx_->deferAfterAllFrameFences([device, alloc, pool, boneSet, boneBuf, boneAlloc]() {
|
2026-04-03 18:30:52 -07:00
|
|
|
|
if (boneSet != VK_NULL_HANDLE) {
|
|
|
|
|
|
VkDescriptorSet s = boneSet;
|
|
|
|
|
|
vkFreeDescriptorSets(device, pool, 1, &s);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (boneBuf) {
|
|
|
|
|
|
vmaDestroyBuffer(alloc, boneBuf, boneAlloc);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
2026-02-21 19:41:21 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-02-04 14:37:32 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkDescriptorSet M2Renderer::allocateMaterialSet() {
|
|
|
|
|
|
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
|
|
|
|
|
ai.descriptorPool = materialDescPool_;
|
|
|
|
|
|
ai.descriptorSetCount = 1;
|
|
|
|
|
|
ai.pSetLayouts = &materialSetLayout_;
|
|
|
|
|
|
VkDescriptorSet set = VK_NULL_HANDLE;
|
2026-03-22 21:47:12 +03:00
|
|
|
|
VkResult result = vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
|
|
|
|
|
|
if (result != VK_SUCCESS) {
|
|
|
|
|
|
LOG_ERROR("M2Renderer: material descriptor set allocation failed (", result, ")");
|
|
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
|
|
}
|
2026-02-21 19:41:21 -08:00
|
|
|
|
return set;
|
|
|
|
|
|
}
|
2026-02-06 08:58:26 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkDescriptorSet M2Renderer::allocateBoneSet() {
|
|
|
|
|
|
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
|
|
|
|
|
ai.descriptorPool = boneDescPool_;
|
|
|
|
|
|
ai.descriptorSetCount = 1;
|
|
|
|
|
|
ai.pSetLayouts = &boneSetLayout_;
|
|
|
|
|
|
VkDescriptorSet set = VK_NULL_HANDLE;
|
2026-03-22 21:47:12 +03:00
|
|
|
|
VkResult result = vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
|
|
|
|
|
|
if (result != VK_SUCCESS) {
|
|
|
|
|
|
LOG_ERROR("M2Renderer: bone descriptor set allocation failed (", result, ")");
|
|
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
|
|
}
|
2026-02-21 19:41:21 -08:00
|
|
|
|
return set;
|
2026-02-02 12:24:50 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-08 19:56:17 -08:00
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
// M2 collision mesh: build spatial grid + classify triangles
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
void M2ModelGPU::CollisionMesh::build() {
|
|
|
|
|
|
if (indices.size() < 3 || vertices.empty()) return;
|
|
|
|
|
|
triCount = static_cast<uint32_t>(indices.size() / 3);
|
|
|
|
|
|
|
|
|
|
|
|
// Bounding box for grid
|
|
|
|
|
|
glm::vec3 bmin(std::numeric_limits<float>::max());
|
|
|
|
|
|
glm::vec3 bmax(-std::numeric_limits<float>::max());
|
|
|
|
|
|
for (const auto& v : vertices) {
|
|
|
|
|
|
bmin = glm::min(bmin, v);
|
|
|
|
|
|
bmax = glm::max(bmax, v);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
gridOrigin = glm::vec2(bmin.x, bmin.y);
|
|
|
|
|
|
gridCellsX = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.x - bmin.x) / CELL_SIZE))));
|
|
|
|
|
|
gridCellsY = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.y - bmin.y) / CELL_SIZE))));
|
|
|
|
|
|
|
|
|
|
|
|
cellFloorTris.resize(gridCellsX * gridCellsY);
|
|
|
|
|
|
cellWallTris.resize(gridCellsX * gridCellsY);
|
|
|
|
|
|
triBounds.resize(triCount);
|
|
|
|
|
|
|
|
|
|
|
|
for (uint32_t ti = 0; ti < triCount; ti++) {
|
|
|
|
|
|
uint16_t i0 = indices[ti * 3];
|
|
|
|
|
|
uint16_t i1 = indices[ti * 3 + 1];
|
|
|
|
|
|
uint16_t i2 = indices[ti * 3 + 2];
|
|
|
|
|
|
if (i0 >= vertices.size() || i1 >= vertices.size() || i2 >= vertices.size()) continue;
|
|
|
|
|
|
|
|
|
|
|
|
const auto& v0 = vertices[i0];
|
|
|
|
|
|
const auto& v1 = vertices[i1];
|
|
|
|
|
|
const auto& v2 = vertices[i2];
|
|
|
|
|
|
|
|
|
|
|
|
triBounds[ti].minZ = std::min({v0.z, v1.z, v2.z});
|
|
|
|
|
|
triBounds[ti].maxZ = std::max({v0.z, v1.z, v2.z});
|
|
|
|
|
|
|
|
|
|
|
|
glm::vec3 normal = glm::cross(v1 - v0, v2 - v0);
|
|
|
|
|
|
float normalLen = glm::length(normal);
|
|
|
|
|
|
float absNz = (normalLen > 0.001f) ? std::abs(normal.z / normalLen) : 0.0f;
|
2026-02-08 20:31:00 -08:00
|
|
|
|
bool isFloor = (absNz >= 0.35f); // ~70° max slope (relaxed for steep stairs)
|
2026-02-08 19:56:17 -08:00
|
|
|
|
bool isWall = (absNz < 0.65f);
|
|
|
|
|
|
|
|
|
|
|
|
float triMinX = std::min({v0.x, v1.x, v2.x});
|
|
|
|
|
|
float triMaxX = std::max({v0.x, v1.x, v2.x});
|
|
|
|
|
|
float triMinY = std::min({v0.y, v1.y, v2.y});
|
|
|
|
|
|
float triMaxY = std::max({v0.y, v1.y, v2.y});
|
|
|
|
|
|
|
|
|
|
|
|
int cxMin = std::clamp(static_cast<int>((triMinX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
|
|
|
|
|
|
int cxMax = std::clamp(static_cast<int>((triMaxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
|
|
|
|
|
|
int cyMin = std::clamp(static_cast<int>((triMinY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
|
|
|
|
|
|
int cyMax = std::clamp(static_cast<int>((triMaxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
|
|
|
|
|
|
|
|
|
|
|
|
for (int cy = cyMin; cy <= cyMax; cy++) {
|
|
|
|
|
|
for (int cx = cxMin; cx <= cxMax; cx++) {
|
|
|
|
|
|
int ci = cy * gridCellsX + cx;
|
|
|
|
|
|
if (isFloor) cellFloorTris[ci].push_back(ti);
|
|
|
|
|
|
if (isWall) cellWallTris[ci].push_back(ti);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void M2ModelGPU::CollisionMesh::getFloorTrisInRange(
|
|
|
|
|
|
float minX, float minY, float maxX, float maxY,
|
|
|
|
|
|
std::vector<uint32_t>& out) const {
|
|
|
|
|
|
out.clear();
|
|
|
|
|
|
if (gridCellsX == 0 || gridCellsY == 0) return;
|
|
|
|
|
|
int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
|
|
|
|
|
|
int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
|
|
|
|
|
|
int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
|
|
|
|
|
|
int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
|
2026-03-27 16:33:16 -07:00
|
|
|
|
const size_t cellCount = static_cast<size_t>(cxMax - cxMin + 1) *
|
|
|
|
|
|
static_cast<size_t>(cyMax - cyMin + 1);
|
|
|
|
|
|
out.reserve(cellCount * 8);
|
2026-02-08 19:56:17 -08:00
|
|
|
|
for (int cy = cyMin; cy <= cyMax; cy++) {
|
|
|
|
|
|
for (int cx = cxMin; cx <= cxMax; cx++) {
|
|
|
|
|
|
const auto& cell = cellFloorTris[cy * gridCellsX + cx];
|
|
|
|
|
|
out.insert(out.end(), cell.begin(), cell.end());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
std::sort(out.begin(), out.end());
|
|
|
|
|
|
out.erase(std::unique(out.begin(), out.end()), out.end());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void M2ModelGPU::CollisionMesh::getWallTrisInRange(
|
|
|
|
|
|
float minX, float minY, float maxX, float maxY,
|
|
|
|
|
|
std::vector<uint32_t>& out) const {
|
|
|
|
|
|
out.clear();
|
|
|
|
|
|
if (gridCellsX == 0 || gridCellsY == 0) return;
|
|
|
|
|
|
int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
|
|
|
|
|
|
int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
|
|
|
|
|
|
int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
|
|
|
|
|
|
int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
|
2026-03-27 16:33:16 -07:00
|
|
|
|
const size_t cellCount = static_cast<size_t>(cxMax - cxMin + 1) *
|
|
|
|
|
|
static_cast<size_t>(cyMax - cyMin + 1);
|
|
|
|
|
|
out.reserve(cellCount * 8);
|
2026-02-08 19:56:17 -08:00
|
|
|
|
for (int cy = cyMin; cy <= cyMax; cy++) {
|
|
|
|
|
|
for (int cx = cxMin; cx <= cxMax; cx++) {
|
|
|
|
|
|
const auto& cell = cellWallTris[cy * gridCellsX + cx];
|
|
|
|
|
|
out.insert(out.end(), cell.begin(), cell.end());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
std::sort(out.begin(), out.end());
|
|
|
|
|
|
out.erase(std::unique(out.begin(), out.end()), out.end());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-10 17:23:41 -08:00
|
|
|
|
bool M2Renderer::hasModel(uint32_t modelId) const {
|
|
|
|
|
|
return models.find(modelId) != models.end();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-02 12:24:50 -08:00
|
|
|
|
bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) {
|
|
|
|
|
|
if (models.find(modelId) != models.end()) {
|
|
|
|
|
|
// Already loaded
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
2026-02-22 07:26:54 -08:00
|
|
|
|
if (models.size() >= modelCacheLimit_) {
|
2026-02-23 04:32:58 -08:00
|
|
|
|
if (modelLimitRejectWarnings_ < 3) {
|
2026-02-22 07:26:54 -08:00
|
|
|
|
LOG_WARNING("M2 model cache full (", models.size(), "/", modelCacheLimit_,
|
|
|
|
|
|
"), skipping model load: id=", modelId, " name=", model.name);
|
|
|
|
|
|
}
|
|
|
|
|
|
++modelLimitRejectWarnings_;
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
2026-02-19 20:36:25 -08:00
|
|
|
|
bool hasGeometry = !model.vertices.empty() && !model.indices.empty();
|
|
|
|
|
|
bool hasParticles = !model.particleEmitters.empty();
|
2026-03-13 01:49:22 -07:00
|
|
|
|
bool hasRibbons = !model.ribbonEmitters.empty();
|
|
|
|
|
|
if (!hasGeometry && !hasParticles && !hasRibbons) {
|
|
|
|
|
|
LOG_WARNING("M2 model has no renderable content: ", model.name);
|
2026-02-02 12:24:50 -08:00
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
M2ModelGPU gpuModel;
|
|
|
|
|
|
gpuModel.name = model.name;
|
2026-02-09 22:31:36 -08:00
|
|
|
|
|
2026-02-03 15:17:54 -08:00
|
|
|
|
// Use tight bounds from actual vertices for collision/camera occlusion.
|
|
|
|
|
|
// Header bounds in some M2s are overly conservative.
|
2026-02-19 20:36:25 -08:00
|
|
|
|
glm::vec3 tightMin(0.0f);
|
|
|
|
|
|
glm::vec3 tightMax(0.0f);
|
|
|
|
|
|
if (hasGeometry) {
|
|
|
|
|
|
tightMin = glm::vec3(std::numeric_limits<float>::max());
|
|
|
|
|
|
tightMax = glm::vec3(-std::numeric_limits<float>::max());
|
|
|
|
|
|
for (const auto& v : model.vertices) {
|
|
|
|
|
|
tightMin = glm::min(tightMin, v.position);
|
|
|
|
|
|
tightMax = glm::max(tightMax, v.position);
|
|
|
|
|
|
}
|
2026-02-03 15:17:54 -08:00
|
|
|
|
}
|
2026-03-24 19:55:24 +03:00
|
|
|
|
|
|
|
|
|
|
// Classify model from name and geometry — pure function, no GPU dependencies.
|
|
|
|
|
|
auto cls = classifyM2Model(model.name, tightMin, tightMax,
|
|
|
|
|
|
model.vertices.size(),
|
|
|
|
|
|
model.particleEmitters.size());
|
|
|
|
|
|
const bool isInvisibleTrap = cls.isInvisibleTrap;
|
|
|
|
|
|
const bool groundDetailModel = cls.isGroundDetail;
|
|
|
|
|
|
if (isInvisibleTrap) {
|
|
|
|
|
|
LOG_INFO("Loading InvisibleTrap model: ", model.name, " (will be invisible, no collision)");
|
2026-02-03 16:51:25 -08:00
|
|
|
|
}
|
2026-03-24 19:55:24 +03:00
|
|
|
|
|
|
|
|
|
|
gpuModel.isInvisibleTrap = cls.isInvisibleTrap;
|
|
|
|
|
|
gpuModel.collisionSteppedFountain = cls.collisionSteppedFountain;
|
|
|
|
|
|
gpuModel.collisionSteppedLowPlatform = cls.collisionSteppedLowPlatform;
|
|
|
|
|
|
gpuModel.collisionBridge = cls.collisionBridge;
|
|
|
|
|
|
gpuModel.collisionPlanter = cls.collisionPlanter;
|
|
|
|
|
|
gpuModel.collisionStatue = cls.collisionStatue;
|
|
|
|
|
|
gpuModel.collisionTreeTrunk = cls.collisionTreeTrunk;
|
|
|
|
|
|
gpuModel.collisionNarrowVerticalProp = cls.collisionNarrowVerticalProp;
|
|
|
|
|
|
gpuModel.collisionSmallSolidProp = cls.collisionSmallSolidProp;
|
|
|
|
|
|
gpuModel.collisionNoBlock = cls.collisionNoBlock;
|
|
|
|
|
|
gpuModel.isGroundDetail = cls.isGroundDetail;
|
|
|
|
|
|
gpuModel.isFoliageLike = cls.isFoliageLike;
|
|
|
|
|
|
gpuModel.disableAnimation = cls.disableAnimation;
|
|
|
|
|
|
gpuModel.shadowWindFoliage = cls.shadowWindFoliage;
|
|
|
|
|
|
gpuModel.isFireflyEffect = cls.isFireflyEffect;
|
|
|
|
|
|
gpuModel.isSmoke = cls.isSmoke;
|
|
|
|
|
|
gpuModel.isSpellEffect = cls.isSpellEffect;
|
|
|
|
|
|
gpuModel.isLavaModel = cls.isLavaModel;
|
|
|
|
|
|
gpuModel.isInstancePortal = cls.isInstancePortal;
|
|
|
|
|
|
gpuModel.isWaterVegetation = cls.isWaterVegetation;
|
|
|
|
|
|
gpuModel.isElvenLike = cls.isElvenLike;
|
|
|
|
|
|
gpuModel.isLanternLike = cls.isLanternLike;
|
|
|
|
|
|
gpuModel.isKoboldFlame = cls.isKoboldFlame;
|
2026-02-03 15:17:54 -08:00
|
|
|
|
gpuModel.boundMin = tightMin;
|
|
|
|
|
|
gpuModel.boundMax = tightMax;
|
2026-02-02 12:24:50 -08:00
|
|
|
|
gpuModel.boundRadius = model.boundRadius;
|
2026-04-05 03:18:52 -07:00
|
|
|
|
// Fallback: compute bound radius from vertex extents when M2 header reports 0
|
|
|
|
|
|
if (gpuModel.boundRadius < 0.01f && !model.vertices.empty()) {
|
|
|
|
|
|
glm::vec3 extent = tightMax - tightMin;
|
|
|
|
|
|
gpuModel.boundRadius = glm::length(extent) * 0.5f;
|
|
|
|
|
|
}
|
2026-02-02 12:24:50 -08:00
|
|
|
|
gpuModel.indexCount = static_cast<uint32_t>(model.indices.size());
|
|
|
|
|
|
gpuModel.vertexCount = static_cast<uint32_t>(model.vertices.size());
|
|
|
|
|
|
|
2026-02-04 11:40:00 -08:00
|
|
|
|
// Store bone/sequence data for animation
|
|
|
|
|
|
gpuModel.bones = model.bones;
|
|
|
|
|
|
gpuModel.sequences = model.sequences;
|
2026-02-04 14:06:59 -08:00
|
|
|
|
gpuModel.globalSequenceDurations = model.globalSequenceDurations;
|
2026-02-04 11:40:00 -08:00
|
|
|
|
gpuModel.hasAnimation = false;
|
|
|
|
|
|
for (const auto& bone : model.bones) {
|
|
|
|
|
|
if (bone.translation.hasData() || bone.rotation.hasData() || bone.scale.hasData()) {
|
|
|
|
|
|
gpuModel.hasAnimation = true;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-03-24 19:55:24 +03:00
|
|
|
|
|
2026-02-04 11:40:00 -08:00
|
|
|
|
|
2026-02-08 19:56:17 -08:00
|
|
|
|
// Build collision mesh + spatial grid from M2 bounding geometry
|
|
|
|
|
|
gpuModel.collision.vertices = model.collisionVertices;
|
|
|
|
|
|
gpuModel.collision.indices = model.collisionIndices;
|
|
|
|
|
|
gpuModel.collision.build();
|
|
|
|
|
|
if (gpuModel.collision.valid()) {
|
|
|
|
|
|
core::Logger::getInstance().debug(" M2 collision mesh: ", gpuModel.collision.triCount,
|
|
|
|
|
|
" tris, grid ", gpuModel.collision.gridCellsX, "x", gpuModel.collision.gridCellsY);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-04 11:50:18 -08:00
|
|
|
|
// Identify idle variation sequences (animation ID 0 = Stand)
|
|
|
|
|
|
for (int i = 0; i < static_cast<int>(model.sequences.size()); i++) {
|
|
|
|
|
|
if (model.sequences[i].id == 0 && model.sequences[i].duration > 0) {
|
|
|
|
|
|
gpuModel.idleVariationIndices.push_back(i);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-07 12:19:59 -08:00
|
|
|
|
// Batch all GPU uploads (VB, IB, textures) into a single command buffer
|
|
|
|
|
|
// submission with one fence wait, instead of one fence wait per upload.
|
|
|
|
|
|
vkCtx_->beginUploadBatch();
|
|
|
|
|
|
|
2026-02-19 20:36:25 -08:00
|
|
|
|
if (hasGeometry) {
|
|
|
|
|
|
// Create VBO with interleaved vertex data
|
|
|
|
|
|
// Format: position (3), normal (3), texcoord0 (2), texcoord1 (2), boneWeights (4), boneIndices (4 as float)
|
|
|
|
|
|
const size_t floatsPerVertex = 18;
|
|
|
|
|
|
std::vector<float> vertexData;
|
|
|
|
|
|
vertexData.reserve(model.vertices.size() * floatsPerVertex);
|
|
|
|
|
|
|
|
|
|
|
|
for (const auto& v : model.vertices) {
|
|
|
|
|
|
vertexData.push_back(v.position.x);
|
|
|
|
|
|
vertexData.push_back(v.position.y);
|
|
|
|
|
|
vertexData.push_back(v.position.z);
|
|
|
|
|
|
vertexData.push_back(v.normal.x);
|
|
|
|
|
|
vertexData.push_back(v.normal.y);
|
|
|
|
|
|
vertexData.push_back(v.normal.z);
|
|
|
|
|
|
vertexData.push_back(v.texCoords[0].x);
|
|
|
|
|
|
vertexData.push_back(v.texCoords[0].y);
|
|
|
|
|
|
vertexData.push_back(v.texCoords[1].x);
|
|
|
|
|
|
vertexData.push_back(v.texCoords[1].y);
|
|
|
|
|
|
float w0 = v.boneWeights[0] / 255.0f;
|
|
|
|
|
|
float w1 = v.boneWeights[1] / 255.0f;
|
|
|
|
|
|
float w2 = v.boneWeights[2] / 255.0f;
|
|
|
|
|
|
float w3 = v.boneWeights[3] / 255.0f;
|
|
|
|
|
|
vertexData.push_back(w0);
|
|
|
|
|
|
vertexData.push_back(w1);
|
|
|
|
|
|
vertexData.push_back(w2);
|
|
|
|
|
|
vertexData.push_back(w3);
|
2026-04-03 22:26:14 -07:00
|
|
|
|
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[0], uint8_t(127))));
|
|
|
|
|
|
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[1], uint8_t(127))));
|
|
|
|
|
|
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[2], uint8_t(127))));
|
|
|
|
|
|
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[3], uint8_t(127))));
|
2026-02-19 20:36:25 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// Upload vertex buffer to GPU
|
|
|
|
|
|
{
|
|
|
|
|
|
auto buf = uploadBuffer(*vkCtx_,
|
|
|
|
|
|
vertexData.data(), vertexData.size() * sizeof(float),
|
|
|
|
|
|
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
|
|
|
|
|
|
gpuModel.vertexBuffer = buf.buffer;
|
|
|
|
|
|
gpuModel.vertexAlloc = buf.allocation;
|
|
|
|
|
|
}
|
2026-02-19 20:36:25 -08:00
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// Upload index buffer to GPU
|
|
|
|
|
|
{
|
|
|
|
|
|
auto buf = uploadBuffer(*vkCtx_,
|
|
|
|
|
|
model.indices.data(), model.indices.size() * sizeof(uint16_t),
|
|
|
|
|
|
VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
|
|
|
|
|
|
gpuModel.indexBuffer = buf.buffer;
|
|
|
|
|
|
gpuModel.indexAlloc = buf.allocation;
|
|
|
|
|
|
}
|
2026-03-22 21:47:12 +03:00
|
|
|
|
|
|
|
|
|
|
if (!gpuModel.vertexBuffer || !gpuModel.indexBuffer) {
|
|
|
|
|
|
LOG_ERROR("M2Renderer::loadModel: GPU buffer upload failed for model ", modelId);
|
|
|
|
|
|
}
|
2026-02-19 20:36:25 -08:00
|
|
|
|
}
|
2026-02-04 11:40:00 -08:00
|
|
|
|
|
2026-02-17 23:52:44 -08:00
|
|
|
|
// Load ALL textures from the model into a local vector.
|
|
|
|
|
|
// textureLoadFailed[i] is true if texture[i] had a named path that failed to load.
|
|
|
|
|
|
// Such batches are hidden (batchOpacity=0) rather than rendered white.
|
2026-02-21 19:41:21 -08:00
|
|
|
|
std::vector<VkTexture*> allTextures;
|
2026-02-17 23:52:44 -08:00
|
|
|
|
std::vector<bool> textureLoadFailed;
|
2026-02-21 03:51:42 -08:00
|
|
|
|
std::vector<std::string> textureKeysLower;
|
2026-02-02 12:24:50 -08:00
|
|
|
|
if (assetManager) {
|
2026-02-09 22:31:36 -08:00
|
|
|
|
for (size_t ti = 0; ti < model.textures.size(); ti++) {
|
|
|
|
|
|
const auto& tex = model.textures[ti];
|
2026-02-21 01:26:16 -08:00
|
|
|
|
std::string texPath = tex.filename;
|
|
|
|
|
|
// Some extracted M2 texture strings contain embedded NUL + garbage suffix.
|
|
|
|
|
|
// Truncate at first NUL so valid paths like "...foo.blp\0junk" still resolve.
|
|
|
|
|
|
size_t nul = texPath.find('\0');
|
|
|
|
|
|
if (nul != std::string::npos) {
|
|
|
|
|
|
texPath.resize(nul);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!texPath.empty()) {
|
2026-02-21 03:51:42 -08:00
|
|
|
|
std::string texKey = texPath;
|
|
|
|
|
|
std::replace(texKey.begin(), texKey.end(), '/', '\\');
|
|
|
|
|
|
std::transform(texKey.begin(), texKey.end(), texKey.begin(),
|
|
|
|
|
|
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkTexture* texPtr = loadTexture(texPath, tex.flags);
|
|
|
|
|
|
bool failed = (texPtr == whiteTexture_.get());
|
2026-02-17 23:52:44 -08:00
|
|
|
|
if (failed) {
|
2026-02-22 07:26:54 -08:00
|
|
|
|
static uint32_t loggedModelTextureFails = 0;
|
|
|
|
|
|
static bool loggedModelTextureFailSuppressed = false;
|
|
|
|
|
|
if (loggedModelTextureFails < 250) {
|
2026-02-21 04:05:53 -08:00
|
|
|
|
LOG_WARNING("M2 model ", model.name, " texture[", ti, "] failed to load: ", texPath);
|
2026-02-22 07:26:54 -08:00
|
|
|
|
++loggedModelTextureFails;
|
|
|
|
|
|
} else if (!loggedModelTextureFailSuppressed) {
|
|
|
|
|
|
LOG_WARNING("M2 model texture-failure warnings suppressed after ",
|
|
|
|
|
|
loggedModelTextureFails, " entries");
|
|
|
|
|
|
loggedModelTextureFailSuppressed = true;
|
2026-02-21 04:05:53 -08:00
|
|
|
|
}
|
2026-02-09 22:31:36 -08:00
|
|
|
|
}
|
|
|
|
|
|
if (isInvisibleTrap) {
|
2026-02-21 01:26:16 -08:00
|
|
|
|
LOG_INFO(" InvisibleTrap texture[", ti, "]: ", texPath, " -> ", (failed ? "WHITE" : "OK"));
|
2026-02-09 22:31:36 -08:00
|
|
|
|
}
|
2026-02-21 19:41:21 -08:00
|
|
|
|
allTextures.push_back(texPtr);
|
2026-02-17 23:52:44 -08:00
|
|
|
|
textureLoadFailed.push_back(failed);
|
2026-02-21 03:51:42 -08:00
|
|
|
|
textureKeysLower.push_back(std::move(texKey));
|
2026-02-02 12:24:50 -08:00
|
|
|
|
} else {
|
2026-02-09 22:31:36 -08:00
|
|
|
|
if (isInvisibleTrap) {
|
|
|
|
|
|
LOG_INFO(" InvisibleTrap texture[", ti, "]: EMPTY (using white fallback)");
|
|
|
|
|
|
}
|
2026-02-21 19:41:21 -08:00
|
|
|
|
allTextures.push_back(whiteTexture_.get());
|
2026-02-17 23:52:44 -08:00
|
|
|
|
textureLoadFailed.push_back(false); // Empty filename = intentional white (type!=0)
|
2026-02-21 03:51:42 -08:00
|
|
|
|
textureKeysLower.emplace_back();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static const bool kGlowDiag = envFlagEnabled("WOWEE_M2_GLOW_DIAG", false);
|
2026-02-21 04:05:53 -08:00
|
|
|
|
if (kGlowDiag) {
|
2026-03-24 19:55:24 +03:00
|
|
|
|
if (gpuModel.isLanternLike) {
|
2026-02-21 03:51:42 -08:00
|
|
|
|
for (size_t ti = 0; ti < model.textures.size(); ++ti) {
|
|
|
|
|
|
const std::string key = (ti < textureKeysLower.size()) ? textureKeysLower[ti] : std::string();
|
2026-02-21 04:05:53 -08:00
|
|
|
|
LOG_DEBUG("M2 GLOW TEX '", model.name, "' tex[", ti, "]='", key, "' flags=0x",
|
|
|
|
|
|
std::hex, model.textures[ti].flags, std::dec);
|
2026-02-02 12:24:50 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-06 20:57:02 -08:00
|
|
|
|
// Copy particle emitter data and resolve textures
|
|
|
|
|
|
gpuModel.particleEmitters = model.particleEmitters;
|
2026-02-21 19:41:21 -08:00
|
|
|
|
gpuModel.particleTextures.resize(model.particleEmitters.size(), whiteTexture_.get());
|
2026-02-06 20:57:02 -08:00
|
|
|
|
for (size_t ei = 0; ei < model.particleEmitters.size(); ei++) {
|
|
|
|
|
|
uint16_t texIdx = model.particleEmitters[ei].texture;
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (texIdx < allTextures.size() && allTextures[texIdx] != nullptr) {
|
2026-02-06 20:57:02 -08:00
|
|
|
|
gpuModel.particleTextures[ei] = allTextures[texIdx];
|
2026-04-03 16:11:45 -07:00
|
|
|
|
} else {
|
|
|
|
|
|
LOG_WARNING("M2 '", model.name, "' particle emitter[", ei,
|
|
|
|
|
|
"] texture index ", texIdx, " out of range (", allTextures.size(),
|
|
|
|
|
|
" textures) — using white fallback");
|
2026-02-06 20:57:02 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-02-06 08:58:26 -08:00
|
|
|
|
|
2026-03-11 02:01:23 -07:00
|
|
|
|
// Pre-allocate one stable descriptor set per particle emitter to avoid per-frame allocation.
|
|
|
|
|
|
// This prevents materialDescPool_ exhaustion when many emitters are active each frame.
|
|
|
|
|
|
if (particleTexLayout_ && materialDescPool_ && !model.particleEmitters.empty()) {
|
|
|
|
|
|
VkDevice device = vkCtx_->getDevice();
|
|
|
|
|
|
gpuModel.particleTexSets.resize(model.particleEmitters.size(), VK_NULL_HANDLE);
|
|
|
|
|
|
for (size_t ei = 0; ei < model.particleEmitters.size(); ei++) {
|
|
|
|
|
|
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
|
|
|
|
|
ai.descriptorPool = materialDescPool_;
|
|
|
|
|
|
ai.descriptorSetCount = 1;
|
|
|
|
|
|
ai.pSetLayouts = &particleTexLayout_;
|
|
|
|
|
|
if (vkAllocateDescriptorSets(device, &ai, &gpuModel.particleTexSets[ei]) == VK_SUCCESS) {
|
|
|
|
|
|
VkTexture* tex = gpuModel.particleTextures[ei];
|
|
|
|
|
|
VkDescriptorImageInfo imgInfo = tex->descriptorInfo();
|
|
|
|
|
|
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
|
|
|
|
|
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
|
|
write.dstSet = gpuModel.particleTexSets[ei];
|
|
|
|
|
|
write.dstBinding = 0;
|
|
|
|
|
|
write.descriptorCount = 1;
|
|
|
|
|
|
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
|
|
|
|
write.pImageInfo = &imgInfo;
|
|
|
|
|
|
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-13 01:17:30 -07:00
|
|
|
|
// Copy ribbon emitter data and resolve textures
|
|
|
|
|
|
gpuModel.ribbonEmitters = model.ribbonEmitters;
|
|
|
|
|
|
if (!model.ribbonEmitters.empty()) {
|
|
|
|
|
|
VkDevice device = vkCtx_->getDevice();
|
|
|
|
|
|
gpuModel.ribbonTextures.resize(model.ribbonEmitters.size(), whiteTexture_.get());
|
|
|
|
|
|
gpuModel.ribbonTexSets.resize(model.ribbonEmitters.size(), VK_NULL_HANDLE);
|
|
|
|
|
|
for (size_t ri = 0; ri < model.ribbonEmitters.size(); ri++) {
|
|
|
|
|
|
// Resolve texture via textureLookup table
|
|
|
|
|
|
uint16_t texLookupIdx = model.ribbonEmitters[ri].textureIndex;
|
|
|
|
|
|
uint32_t texIdx = (texLookupIdx < model.textureLookup.size())
|
|
|
|
|
|
? model.textureLookup[texLookupIdx] : UINT32_MAX;
|
|
|
|
|
|
if (texIdx < allTextures.size() && allTextures[texIdx] != nullptr) {
|
|
|
|
|
|
gpuModel.ribbonTextures[ri] = allTextures[texIdx];
|
2026-04-03 16:11:45 -07:00
|
|
|
|
} else {
|
|
|
|
|
|
LOG_WARNING("M2 '", model.name, "' ribbon emitter[", ri,
|
|
|
|
|
|
"] texLookup=", texLookupIdx, " resolved texIdx=", texIdx,
|
|
|
|
|
|
" out of range (", allTextures.size(),
|
|
|
|
|
|
" textures) — using white fallback");
|
2026-03-13 01:17:30 -07:00
|
|
|
|
}
|
|
|
|
|
|
// Allocate descriptor set (reuse particleTexLayout_ = single sampler)
|
|
|
|
|
|
if (particleTexLayout_ && materialDescPool_) {
|
|
|
|
|
|
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
|
|
|
|
|
ai.descriptorPool = materialDescPool_;
|
|
|
|
|
|
ai.descriptorSetCount = 1;
|
|
|
|
|
|
ai.pSetLayouts = &particleTexLayout_;
|
|
|
|
|
|
if (vkAllocateDescriptorSets(device, &ai, &gpuModel.ribbonTexSets[ri]) == VK_SUCCESS) {
|
|
|
|
|
|
VkTexture* tex = gpuModel.ribbonTextures[ri];
|
|
|
|
|
|
VkDescriptorImageInfo imgInfo = tex->descriptorInfo();
|
|
|
|
|
|
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
|
|
|
|
|
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
|
|
write.dstSet = gpuModel.ribbonTexSets[ri];
|
|
|
|
|
|
write.dstBinding = 0;
|
|
|
|
|
|
write.descriptorCount = 1;
|
|
|
|
|
|
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
|
|
|
|
write.pImageInfo = &imgInfo;
|
|
|
|
|
|
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
LOG_DEBUG(" Ribbon emitters loaded: ", model.ribbonEmitters.size());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-06 01:49:27 -08:00
|
|
|
|
// Copy texture transform data for UV animation
|
|
|
|
|
|
gpuModel.textureTransforms = model.textureTransforms;
|
|
|
|
|
|
gpuModel.textureTransformLookup = model.textureTransformLookup;
|
|
|
|
|
|
gpuModel.hasTextureAnimation = false;
|
|
|
|
|
|
|
2026-02-02 12:24:50 -08:00
|
|
|
|
// Build per-batch GPU entries
|
|
|
|
|
|
if (!model.batches.empty()) {
|
|
|
|
|
|
for (const auto& batch : model.batches) {
|
|
|
|
|
|
M2ModelGPU::BatchGPU bgpu;
|
|
|
|
|
|
bgpu.indexStart = batch.indexStart;
|
|
|
|
|
|
bgpu.indexCount = batch.indexCount;
|
|
|
|
|
|
|
2026-02-06 01:49:27 -08:00
|
|
|
|
// Store texture animation index from batch
|
|
|
|
|
|
bgpu.textureAnimIndex = batch.textureAnimIndex;
|
|
|
|
|
|
if (bgpu.textureAnimIndex != 0xFFFF) {
|
|
|
|
|
|
gpuModel.hasTextureAnimation = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-06 03:28:21 -08:00
|
|
|
|
// Store blend mode and flags from material
|
2026-02-06 01:54:25 -08:00
|
|
|
|
if (batch.materialIndex < model.materials.size()) {
|
|
|
|
|
|
bgpu.blendMode = model.materials[batch.materialIndex].blendMode;
|
2026-02-06 03:28:21 -08:00
|
|
|
|
bgpu.materialFlags = model.materials[batch.materialIndex].flags;
|
2026-03-09 20:58:49 -07:00
|
|
|
|
if (bgpu.blendMode >= 2) gpuModel.hasTransparentBatches = true;
|
2026-02-06 01:54:25 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-08 21:32:38 -08:00
|
|
|
|
// Copy LOD level from batch
|
|
|
|
|
|
bgpu.submeshLevel = batch.submeshLevel;
|
|
|
|
|
|
|
2026-02-02 12:24:50 -08:00
|
|
|
|
// Resolve texture: batch.textureIndex → textureLookup → allTextures
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkTexture* tex = whiteTexture_.get();
|
2026-02-17 23:52:44 -08:00
|
|
|
|
bool texFailed = false;
|
2026-02-21 03:51:42 -08:00
|
|
|
|
std::string batchTexKeyLower;
|
2026-02-02 12:24:50 -08:00
|
|
|
|
if (batch.textureIndex < model.textureLookup.size()) {
|
|
|
|
|
|
uint16_t texIdx = model.textureLookup[batch.textureIndex];
|
|
|
|
|
|
if (texIdx < allTextures.size()) {
|
|
|
|
|
|
tex = allTextures[texIdx];
|
2026-02-17 23:52:44 -08:00
|
|
|
|
texFailed = (texIdx < textureLoadFailed.size()) && textureLoadFailed[texIdx];
|
2026-02-21 03:51:42 -08:00
|
|
|
|
if (texIdx < textureKeysLower.size()) {
|
|
|
|
|
|
batchTexKeyLower = textureKeysLower[texIdx];
|
|
|
|
|
|
}
|
2026-02-17 23:52:44 -08:00
|
|
|
|
}
|
|
|
|
|
|
if (texIdx < model.textures.size()) {
|
|
|
|
|
|
bgpu.texFlags = static_cast<uint8_t>(model.textures[texIdx].flags & 0x3);
|
2026-02-02 12:24:50 -08:00
|
|
|
|
}
|
|
|
|
|
|
} else if (!allTextures.empty()) {
|
2026-04-03 16:11:45 -07:00
|
|
|
|
LOG_WARNING("M2 '", model.name, "' batch textureIndex ", batch.textureIndex,
|
|
|
|
|
|
" out of range (textureLookup size=", model.textureLookup.size(),
|
|
|
|
|
|
") — falling back to texture[0]");
|
2026-02-02 12:24:50 -08:00
|
|
|
|
tex = allTextures[0];
|
2026-02-17 23:52:44 -08:00
|
|
|
|
texFailed = !textureLoadFailed.empty() && textureLoadFailed[0];
|
2026-02-21 03:51:42 -08:00
|
|
|
|
if (!textureKeysLower.empty()) {
|
|
|
|
|
|
batchTexKeyLower = textureKeysLower[0];
|
|
|
|
|
|
}
|
2026-02-02 12:24:50 -08:00
|
|
|
|
}
|
2026-02-21 01:26:16 -08:00
|
|
|
|
|
|
|
|
|
|
if (texFailed && groundDetailModel) {
|
|
|
|
|
|
static const std::string kDetailFallbackTexture = "World\\NoDXT\\Detail\\8des_detaildoodads01.blp";
|
2026-02-21 19:41:21 -08:00
|
|
|
|
VkTexture* fallbackTex = loadTexture(kDetailFallbackTexture, 0);
|
|
|
|
|
|
if (fallbackTex != nullptr && fallbackTex != whiteTexture_.get()) {
|
2026-02-21 01:26:16 -08:00
|
|
|
|
tex = fallbackTex;
|
|
|
|
|
|
texFailed = false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-02-02 12:24:50 -08:00
|
|
|
|
bgpu.texture = tex;
|
2026-03-24 19:55:24 +03:00
|
|
|
|
const auto tcls = classifyBatchTexture(batchTexKeyLower);
|
|
|
|
|
|
const bool modelLanternFamily = gpuModel.isLanternLike;
|
2026-02-21 03:51:42 -08:00
|
|
|
|
bgpu.lanternGlowHint =
|
2026-03-24 19:55:24 +03:00
|
|
|
|
tcls.exactLanternGlowTex ||
|
|
|
|
|
|
((tcls.hasGlowToken || (modelLanternFamily && tcls.hasFlameToken)) &&
|
|
|
|
|
|
(tcls.lanternFamily || modelLanternFamily) &&
|
|
|
|
|
|
(!tcls.likelyFlame || modelLanternFamily));
|
|
|
|
|
|
bgpu.glowCardLike = bgpu.lanternGlowHint && tcls.hasGlowCardToken;
|
|
|
|
|
|
bgpu.glowTint = tcls.glowTint;
|
2026-02-21 19:41:21 -08:00
|
|
|
|
if (tex != nullptr && tex != whiteTexture_.get()) {
|
2026-03-27 16:47:30 -07:00
|
|
|
|
auto pit = texturePropsByPtr_.find(tex);
|
|
|
|
|
|
if (pit != texturePropsByPtr_.end()) {
|
|
|
|
|
|
bgpu.hasAlpha = pit->second.hasAlpha;
|
|
|
|
|
|
bgpu.colorKeyBlack = pit->second.colorKeyBlack;
|
|
|
|
|
|
}
|
2026-02-19 02:27:01 -08:00
|
|
|
|
}
|
2026-02-18 03:15:25 -08:00
|
|
|
|
// textureCoordIndex is an index into a texture coord combo table, not directly
|
|
|
|
|
|
// a UV set selector. Most batches have index=0 (UV set 0). We always use UV set 0
|
|
|
|
|
|
// since we don't have the full combo table — dual-UV effects are rare edge cases.
|
|
|
|
|
|
bgpu.textureUnit = 0;
|
|
|
|
|
|
|
2026-03-21 03:14:57 -07:00
|
|
|
|
// Start at full opacity; hide only if texture failed to load.
|
2026-02-21 01:26:16 -08:00
|
|
|
|
bgpu.batchOpacity = (texFailed && !groundDetailModel) ? 0.0f : 1.0f;
|
2026-02-06 08:58:26 -08:00
|
|
|
|
|
2026-03-21 03:14:57 -07:00
|
|
|
|
// Apply at-rest transparency and color alpha from the M2 animation tracks.
|
|
|
|
|
|
// These provide per-batch opacity for ghosts, ethereal effects, fading doodads, etc.
|
|
|
|
|
|
// Skip zero values: some animated tracks start at 0 and animate up, and baking
|
|
|
|
|
|
// that first keyframe would make the entire batch permanently invisible.
|
|
|
|
|
|
if (bgpu.batchOpacity > 0.0f) {
|
|
|
|
|
|
float animAlpha = 1.0f;
|
|
|
|
|
|
if (batch.colorIndex < model.colorAlphas.size()) {
|
|
|
|
|
|
float ca = model.colorAlphas[batch.colorIndex];
|
|
|
|
|
|
if (ca > 0.001f) animAlpha *= ca;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (batch.transparencyIndex < model.textureWeights.size()) {
|
|
|
|
|
|
float tw = model.textureWeights[batch.transparencyIndex];
|
|
|
|
|
|
if (tw > 0.001f) animAlpha *= tw;
|
|
|
|
|
|
}
|
|
|
|
|
|
bgpu.batchOpacity *= animAlpha;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-06 08:58:26 -08:00
|
|
|
|
// Compute batch center and radius for glow sprite positioning
|
Fix black background on lamp/lantern/torch glow effects
Three-part fix for glow textures showing opaque black rectangles instead
of being transparent:
1. Pass blend mode to fragment shader via uBlendMode uniform. For additive
blend modes (3=Add, 6=BlendAdd), discard near-black fragments (maxRGB
< 0.1) since they contribute nothing visually but render as dark
rectangles against sky/terrain.
2. Expand colorKeyBlack texture keyword detection to include "lamp",
"lantern", "glow", "flare", "brazier", "campfire", "bonfire" in
addition to the existing "candle", "flame", "fire", "torch".
3. Expand flameLikeModel detection for glow sprite conversion to include
"brazier", "campfire", "bonfire". Also compute glow centers for
colorKeyBlack batches (not just blendMode >= 3) so glow sprites
position correctly for all flame-like objects.
2026-02-19 18:19:52 -08:00
|
|
|
|
if ((bgpu.blendMode >= 3 || bgpu.colorKeyBlack) && batch.indexCount > 0) {
|
2026-02-06 08:58:26 -08:00
|
|
|
|
glm::vec3 sum(0.0f);
|
|
|
|
|
|
uint32_t counted = 0;
|
|
|
|
|
|
for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
|
|
|
|
|
|
if (j < model.indices.size()) {
|
|
|
|
|
|
uint16_t vi = model.indices[j];
|
|
|
|
|
|
if (vi < model.vertices.size()) {
|
|
|
|
|
|
sum += model.vertices[vi].position;
|
|
|
|
|
|
counted++;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (counted > 0) {
|
|
|
|
|
|
bgpu.center = sum / static_cast<float>(counted);
|
|
|
|
|
|
float maxDist = 0.0f;
|
|
|
|
|
|
for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
|
|
|
|
|
|
if (j < model.indices.size()) {
|
|
|
|
|
|
uint16_t vi = model.indices[j];
|
|
|
|
|
|
if (vi < model.vertices.size()) {
|
|
|
|
|
|
float d = glm::length(model.vertices[vi].position - bgpu.center);
|
|
|
|
|
|
maxDist = std::max(maxDist, d);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
bgpu.glowSize = std::max(maxDist, 0.5f);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-20 20:31:04 -08:00
|
|
|
|
// Optional diagnostics for glow/light batches (disabled by default).
|
2026-03-24 19:55:24 +03:00
|
|
|
|
if (kGlowDiag && gpuModel.isLanternLike) {
|
2026-02-21 04:05:53 -08:00
|
|
|
|
LOG_DEBUG("M2 GLOW DIAG '", model.name, "' batch ", gpuModel.batches.size(),
|
|
|
|
|
|
": blend=", bgpu.blendMode, " matFlags=0x",
|
|
|
|
|
|
std::hex, bgpu.materialFlags, std::dec,
|
|
|
|
|
|
" colorKey=", bgpu.colorKeyBlack ? "Y" : "N",
|
|
|
|
|
|
" hasAlpha=", bgpu.hasAlpha ? "Y" : "N",
|
|
|
|
|
|
" unlit=", (bgpu.materialFlags & 0x01) ? "Y" : "N",
|
|
|
|
|
|
" lanternHint=", bgpu.lanternGlowHint ? "Y" : "N",
|
|
|
|
|
|
" glowSize=", bgpu.glowSize,
|
|
|
|
|
|
" tex=", bgpu.texture,
|
|
|
|
|
|
" idxCount=", bgpu.indexCount);
|
2026-02-19 18:30:34 -08:00
|
|
|
|
}
|
2026-02-02 12:24:50 -08:00
|
|
|
|
gpuModel.batches.push_back(bgpu);
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// Fallback: single batch covering all indices with first texture
|
|
|
|
|
|
M2ModelGPU::BatchGPU bgpu;
|
|
|
|
|
|
bgpu.indexStart = 0;
|
|
|
|
|
|
bgpu.indexCount = gpuModel.indexCount;
|
2026-02-21 19:41:21 -08:00
|
|
|
|
bgpu.texture = allTextures.empty() ? whiteTexture_.get() : allTextures[0];
|
|
|
|
|
|
if (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) {
|
2026-03-27 16:47:30 -07:00
|
|
|
|
auto pit = texturePropsByPtr_.find(bgpu.texture);
|
|
|
|
|
|
if (pit != texturePropsByPtr_.end()) {
|
|
|
|
|
|
bgpu.hasAlpha = pit->second.hasAlpha;
|
|
|
|
|
|
bgpu.colorKeyBlack = pit->second.colorKeyBlack;
|
|
|
|
|
|
}
|
2026-02-19 02:27:01 -08:00
|
|
|
|
}
|
2026-02-02 12:24:50 -08:00
|
|
|
|
gpuModel.batches.push_back(bgpu);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-16 19:50:35 -08:00
|
|
|
|
// Detect particle emitter volume models: box mesh (24 verts, 36 indices)
|
|
|
|
|
|
// with disproportionately large bounds. These are invisible bounding volumes
|
|
|
|
|
|
// that only exist to spawn particles — their mesh should never be rendered.
|
2026-02-21 01:26:16 -08:00
|
|
|
|
if (!isInvisibleTrap && !groundDetailModel &&
|
|
|
|
|
|
gpuModel.vertexCount <= 24 && gpuModel.indexCount <= 36
|
2026-02-16 19:50:35 -08:00
|
|
|
|
&& !model.particleEmitters.empty()) {
|
|
|
|
|
|
glm::vec3 size = gpuModel.boundMax - gpuModel.boundMin;
|
|
|
|
|
|
float maxDim = std::max({size.x, size.y, size.z});
|
|
|
|
|
|
if (maxDim > 5.0f) {
|
|
|
|
|
|
gpuModel.isInvisibleTrap = true;
|
|
|
|
|
|
LOG_DEBUG("M2 emitter volume hidden: '", model.name, "' size=(",
|
|
|
|
|
|
size.x, " x ", size.y, " x ", size.z, ")");
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-07 12:19:59 -08:00
|
|
|
|
vkCtx_->endUploadBatch();
|
|
|
|
|
|
|
2026-02-21 19:41:21 -08:00
|
|
|
|
// Allocate Vulkan descriptor sets and UBOs for each batch
|
|
|
|
|
|
for (auto& bgpu : gpuModel.batches) {
|
|
|
|
|
|
// Create combined UBO for M2Params (binding 1) + M2Material (binding 2)
|
|
|
|
|
|
// We allocate them as separate buffers for clarity
|
|
|
|
|
|
VmaAllocationInfo matAllocInfo{};
|
|
|
|
|
|
{
|
|
|
|
|
|
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
|
|
|
|
|
bci.size = sizeof(M2MaterialUBO);
|
|
|
|
|
|
bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
|
|
|
|
|
|
VmaAllocationCreateInfo aci{};
|
|
|
|
|
|
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
|
|
|
|
|
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
|
|
|
|
|
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &bgpu.materialUBO, &bgpu.materialUBOAlloc, &matAllocInfo);
|
|
|
|
|
|
|
|
|
|
|
|
// Write initial material data (static per-batch — fadeAlpha/interiorDarken updated at draw time)
|
|
|
|
|
|
M2MaterialUBO mat{};
|
|
|
|
|
|
mat.hasTexture = (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) ? 1 : 0;
|
|
|
|
|
|
mat.alphaTest = (bgpu.blendMode == 1 || (bgpu.blendMode >= 2 && !bgpu.hasAlpha)) ? 1 : 0;
|
|
|
|
|
|
mat.colorKeyBlack = bgpu.colorKeyBlack ? 1 : 0;
|
|
|
|
|
|
mat.colorKeyThreshold = 0.08f;
|
|
|
|
|
|
mat.unlit = (bgpu.materialFlags & 0x01) ? 1 : 0;
|
|
|
|
|
|
mat.blendMode = bgpu.blendMode;
|
|
|
|
|
|
mat.fadeAlpha = 1.0f;
|
|
|
|
|
|
mat.interiorDarken = 0.0f;
|
|
|
|
|
|
mat.specularIntensity = 0.5f;
|
|
|
|
|
|
memcpy(matAllocInfo.pMappedData, &mat, sizeof(mat));
|
2026-02-23 06:06:24 -08:00
|
|
|
|
bgpu.materialUBOMapped = matAllocInfo.pMappedData;
|
2026-02-21 19:41:21 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Allocate descriptor set and write all bindings
|
|
|
|
|
|
bgpu.materialSet = allocateMaterialSet();
|
|
|
|
|
|
if (bgpu.materialSet) {
|
|
|
|
|
|
VkTexture* batchTex = bgpu.texture ? bgpu.texture : whiteTexture_.get();
|
|
|
|
|
|
VkDescriptorImageInfo imgInfo = batchTex->descriptorInfo();
|
|
|
|
|
|
|
|
|
|
|
|
VkDescriptorBufferInfo matBufInfo{};
|
|
|
|
|
|
matBufInfo.buffer = bgpu.materialUBO;
|
|
|
|
|
|
matBufInfo.offset = 0;
|
|
|
|
|
|
matBufInfo.range = sizeof(M2MaterialUBO);
|
|
|
|
|
|
|
|
|
|
|
|
VkWriteDescriptorSet writes[2] = {};
|
|
|
|
|
|
// binding 0: texture
|
|
|
|
|
|
writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
|
|
writes[0].dstSet = bgpu.materialSet;
|
|
|
|
|
|
writes[0].dstBinding = 0;
|
|
|
|
|
|
writes[0].descriptorCount = 1;
|
|
|
|
|
|
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
|
|
|
|
writes[0].pImageInfo = &imgInfo;
|
|
|
|
|
|
// binding 2: M2Material UBO
|
|
|
|
|
|
writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
|
|
|
|
writes[1].dstSet = bgpu.materialSet;
|
|
|
|
|
|
writes[1].dstBinding = 2;
|
|
|
|
|
|
writes[1].descriptorCount = 1;
|
|
|
|
|
|
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
|
|
|
|
writes[1].pBufferInfo = &matBufInfo;
|
|
|
|
|
|
|
|
|
|
|
|
vkUpdateDescriptorSets(vkCtx_->getDevice(), 2, writes, 0, nullptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-07 22:03:28 -08:00
|
|
|
|
// Pre-compute available LOD levels to avoid per-instance batch iteration
|
|
|
|
|
|
gpuModel.availableLODs = 0;
|
|
|
|
|
|
for (const auto& b : gpuModel.batches) {
|
|
|
|
|
|
if (b.submeshLevel < 8) gpuModel.availableLODs |= (1u << b.submeshLevel);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-02 12:24:50 -08:00
|
|
|
|
models[modelId] = std::move(gpuModel);
|
2026-03-22 21:47:12 +03:00
|
|
|
|
spatialIndexDirty_ = true; // Map may have rehashed — refresh cachedModel pointers
|
2026-02-02 12:24:50 -08:00
|
|
|
|
|
|
|
|
|
|
LOG_DEBUG("Loaded M2 model: ", model.name, " (", models[modelId].vertexCount, " vertices, ",
|
|
|
|
|
|
models[modelId].indexCount / 3, " triangles, ", models[modelId].batches.size(), " batches)");
|
|
|
|
|
|
|
2026-02-17 23:52:44 -08:00
|
|
|
|
|
2026-02-02 12:24:50 -08:00
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace rendering
|
|
|
|
|
|
} // namespace wowee
|