Kelsidavis-WoWee/src/rendering/m2_renderer.cpp

#include "rendering/m2_renderer.hpp"
#include "rendering/vk_context.hpp"
#include "rendering/vk_buffer.hpp"
#include "rendering/vk_texture.hpp"
#include "rendering/vk_pipeline.hpp"
#include "rendering/vk_shader.hpp"
#include "rendering/vk_utils.hpp"
#include "rendering/vk_frame_data.hpp"
#include "rendering/camera.hpp"
#include "rendering/frustum.hpp"
#include "pipeline/asset_manager.hpp"
#include "pipeline/blp_loader.hpp"
#include "core/logger.hpp"
#include <chrono>
#include <cctype>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <glm/gtx/quaternion.hpp>
#include <unordered_set>
#include <functional>
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <limits>
#include <future>
#include <thread>

namespace wowee {
namespace rendering {

namespace {

bool envFlagEnabled(const char* key, bool defaultValue) {
    const char* raw = std::getenv(key);
    if (!raw || !*raw) return defaultValue;
    std::string v(raw);
    std::transform(v.begin(), v.end(), v.begin(), [](unsigned char c) {
        return static_cast<char>(std::tolower(c));
    });
    return !(v == "0" || v == "false" || v == "off" || v == "no");
}

size_t envSizeMBOrDefault(const char* name, size_t defMb) {
    const char* raw = std::getenv(name);
    if (!raw || !*raw) return defMb;
    char* end = nullptr;
    unsigned long long mb = std::strtoull(raw, &end, 10);
    if (end == raw || mb == 0) return defMb;
    return static_cast<size_t>(mb);
}

size_t envSizeOrDefault(const char* name, size_t defValue) {
    const char* raw = std::getenv(name);
    if (!raw || !*raw) return defValue;
    char* end = nullptr;
    unsigned long long v = std::strtoull(raw, &end, 10);
    if (end == raw || v == 0) return defValue;
    return static_cast<size_t>(v);
}

static constexpr uint32_t kParticleFlagRandomized = 0x40;
static constexpr uint32_t kParticleFlagTiled = 0x80;

float computeGroundDetailDownOffset(const M2ModelGPU& model, float scale) {
    // Keep a tiny sink to avoid hovering, but cap pivot compensation so details
    // don't get pushed below the terrain on models with large positive boundMin.
    const float pivotComp = glm::clamp(std::max(0.0f, model.boundMin.z * scale), 0.0f, 0.10f);
    const float terrainSink = 0.03f;
    return pivotComp + terrainSink;
}

void getTightCollisionBounds(const M2ModelGPU& model, glm::vec3& outMin, glm::vec3& outMax) {
    glm::vec3 center = (model.boundMin + model.boundMax) * 0.5f;
    glm::vec3 half = (model.boundMax - model.boundMin) * 0.5f;

    // Per-shape collision fitting:
    // - small solid props (boxes/crates/chests): tighter than full mesh, but
    //   larger than default to prevent walk-through on narrow objects
    // - default: tighter fit (avoid oversized blockers)
    // - stepped low platforms (tree curbs/planters): wider XY + lower Z
    if (model.collisionTreeTrunk) {
        // Tree trunk: proportional cylinder at the base of the tree.
        float modelHoriz = std::max(model.boundMax.x - model.boundMin.x,
                                    model.boundMax.y - model.boundMin.y);
        float trunkHalf = std::clamp(modelHoriz * 0.05f, 0.5f, 5.0f);
        half.x = trunkHalf;
        half.y = trunkHalf;
        // Height proportional to trunk width, capped at 3.5 units.
        half.z = std::min(trunkHalf * 2.5f, 3.5f);
        // Shift center down so collision is at the base (trunk), not mid-canopy.
        center.z = model.boundMin.z + half.z;
    } else if (model.collisionNarrowVerticalProp) {
        // Tall thin props (lamps/posts): keep passable gaps near walls.
        half.x *= 0.30f;
        half.y *= 0.30f;
        half.z *= 0.96f;
    } else if (model.collisionSmallSolidProp) {
        // Keep full tight mesh bounds for small solid props to avoid clip-through.
        half.x *= 1.00f;
        half.y *= 1.00f;
        half.z *= 1.00f;
    } else if (model.collisionSteppedLowPlatform) {
        half.x *= 0.98f;
        half.y *= 0.98f;
        half.z *= 0.52f;
    } else {
        half.x *= 0.66f;
        half.y *= 0.66f;
        half.z *= 0.76f;
    }

    outMin = center - half;
    outMax = center + half;
}

float getEffectiveCollisionTopLocal(const M2ModelGPU& model,
                                    const glm::vec3& localPos,
                                    const glm::vec3& localMin,
                                    const glm::vec3& localMax) {
    if (!model.collisionSteppedFountain && !model.collisionSteppedLowPlatform) {
        return localMax.z;
    }

    glm::vec2 center((localMin.x + localMax.x) * 0.5f, (localMin.y + localMax.y) * 0.5f);
    glm::vec2 half((localMax.x - localMin.x) * 0.5f, (localMax.y - localMin.y) * 0.5f);
    if (half.x < 1e-4f || half.y < 1e-4f) {
        return localMax.z;
    }

    float nx = (localPos.x - center.x) / half.x;
    float ny = (localPos.y - center.y) / half.y;
    float r = std::sqrt(nx * nx + ny * ny);

    float h = localMax.z - localMin.z;
    if (model.collisionSteppedFountain) {
        if (r > 0.85f) return localMin.z + h * 0.18f;  // outer lip
        if (r > 0.65f) return localMin.z + h * 0.36f;  // mid step
        if (r > 0.45f) return localMin.z + h * 0.54f;  // inner step
        if (r > 0.28f) return localMin.z + h * 0.70f;  // center platform / statue base
        if (r > 0.14f) return localMin.z + h * 0.84f;  // statue body / sword
        return localMin.z + h * 0.96f;                  // statue head / top
    }

    // Low square curb/planter profile:
    // use edge distance (not radial) so corner blocks don't become too low and
    // clip-through at diagonals.
    float edge = std::max(std::abs(nx), std::abs(ny));
    if (edge > 0.92f) return localMin.z + h * 0.06f;
    if (edge > 0.72f) return localMin.z + h * 0.30f;
    return localMin.z + h * 0.62f;
}

bool segmentIntersectsAABB(const glm::vec3& from, const glm::vec3& to,
                           const glm::vec3& bmin, const glm::vec3& bmax,
                           float& outEnterT) {
    glm::vec3 d = to - from;
    float tEnter = 0.0f;
    float tExit = 1.0f;

    for (int axis = 0; axis < 3; axis++) {
        if (std::abs(d[axis]) < 1e-6f) {
            if (from[axis] < bmin[axis] || from[axis] > bmax[axis]) {
                return false;
            }
            continue;
        }

        float inv = 1.0f / d[axis];
        float t0 = (bmin[axis] - from[axis]) * inv;
        float t1 = (bmax[axis] - from[axis]) * inv;
        if (t0 > t1) std::swap(t0, t1);

        tEnter = std::max(tEnter, t0);
        tExit = std::min(tExit, t1);
        if (tEnter > tExit) return false;
    }

    outEnterT = tEnter;
    return tExit >= 0.0f && tEnter <= 1.0f;
}

void transformAABB(const glm::mat4& modelMatrix,
                   const glm::vec3& localMin,
                   const glm::vec3& localMax,
                   glm::vec3& outMin,
                   glm::vec3& outMax) {
    const glm::vec3 corners[8] = {
        {localMin.x, localMin.y, localMin.z},
        {localMin.x, localMin.y, localMax.z},
        {localMin.x, localMax.y, localMin.z},
        {localMin.x, localMax.y, localMax.z},
        {localMax.x, localMin.y, localMin.z},
        {localMax.x, localMin.y, localMax.z},
        {localMax.x, localMax.y, localMin.z},
        {localMax.x, localMax.y, localMax.z}
    };

    outMin = glm::vec3(std::numeric_limits<float>::max());
    outMax = glm::vec3(-std::numeric_limits<float>::max());
    for (const auto& c : corners) {
        glm::vec3 wc = glm::vec3(modelMatrix * glm::vec4(c, 1.0f));
        outMin = glm::min(outMin, wc);
        outMax = glm::max(outMax, wc);
    }
}

float pointAABBDistanceSq(const glm::vec3& p, const glm::vec3& bmin, const glm::vec3& bmax) {
    glm::vec3 q = glm::clamp(p, bmin, bmax);
    glm::vec3 d = p - q;
    return glm::dot(d, d);
}

struct QueryTimer {
    double* totalMs = nullptr;
    uint32_t* callCount = nullptr;
    std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
    QueryTimer(double* total, uint32_t* calls) : totalMs(total), callCount(calls) {}
    ~QueryTimer() {
        if (callCount) {
            (*callCount)++;
        }
        if (totalMs) {
            auto end = std::chrono::steady_clock::now();
            *totalMs += std::chrono::duration<double, std::milli>(end - start).count();
        }
    }
};

// Möller–Trumbore ray-triangle intersection.
// Returns distance along ray if hit, negative if miss.
float rayTriangleIntersect(const glm::vec3& origin, const glm::vec3& dir,
                           const glm::vec3& v0, const glm::vec3& v1, const glm::vec3& v2) {
    constexpr float EPSILON = 1e-6f;
    glm::vec3 e1 = v1 - v0;
    glm::vec3 e2 = v2 - v0;
    glm::vec3 h = glm::cross(dir, e2);
    float a = glm::dot(e1, h);
    if (a > -EPSILON && a < EPSILON) return -1.0f;
    float f = 1.0f / a;
    glm::vec3 s = origin - v0;
    float u = f * glm::dot(s, h);
    if (u < 0.0f || u > 1.0f) return -1.0f;
    glm::vec3 q = glm::cross(s, e1);
    float v = f * glm::dot(dir, q);
    if (v < 0.0f || u + v > 1.0f) return -1.0f;
    float t = f * glm::dot(e2, q);
    return t > EPSILON ? t : -1.0f;
}

// Closest point on triangle to a point (Ericson, Real-Time Collision Detection §5.1.5).
glm::vec3 closestPointOnTriangle(const glm::vec3& p,
                                  const glm::vec3& a, const glm::vec3& b, const glm::vec3& c) {
    glm::vec3 ab = b - a, ac = c - a, ap = p - a;
    float d1 = glm::dot(ab, ap), d2 = glm::dot(ac, ap);
    if (d1 <= 0.0f && d2 <= 0.0f) return a;
    glm::vec3 bp = p - b;
    float d3 = glm::dot(ab, bp), d4 = glm::dot(ac, bp);
    if (d3 >= 0.0f && d4 <= d3) return b;
    float vc = d1 * d4 - d3 * d2;
    if (vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f) {
        float v = d1 / (d1 - d3);
        return a + v * ab;
    }
    glm::vec3 cp = p - c;
    float d5 = glm::dot(ab, cp), d6 = glm::dot(ac, cp);
    if (d6 >= 0.0f && d5 <= d6) return c;
    float vb = d5 * d2 - d1 * d6;
    if (vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f) {
        float w = d2 / (d2 - d6);
        return a + w * ac;
    }
    float va = d3 * d6 - d5 * d4;
    if (va <= 0.0f && (d4 - d3) >= 0.0f && (d5 - d6) >= 0.0f) {
        float w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
        return b + w * (c - b);
    }
    float denom = 1.0f / (va + vb + vc);
    float v = vb * denom;
    float w = vc * denom;
    return a + ab * v + ac * w;
}

} // namespace

// Thread-local scratch buffers for collision queries (allows concurrent getFloorHeight calls)
static thread_local std::vector<size_t> tl_m2_candidateScratch;
static thread_local std::unordered_set<uint32_t> tl_m2_candidateIdScratch;
static thread_local std::vector<uint32_t> tl_m2_collisionTriScratch;

// Forward declaration (defined after animation helpers)
static void computeBoneMatrices(const M2ModelGPU& model, M2Instance& instance);

void M2Instance::updateModelMatrix() {
    modelMatrix = glm::mat4(1.0f);
    modelMatrix = glm::translate(modelMatrix, position);

    // Rotation in radians
    modelMatrix = glm::rotate(modelMatrix, rotation.x, glm::vec3(1.0f, 0.0f, 0.0f));
    modelMatrix = glm::rotate(modelMatrix, rotation.y, glm::vec3(0.0f, 1.0f, 0.0f));
    modelMatrix = glm::rotate(modelMatrix, rotation.z, glm::vec3(0.0f, 0.0f, 1.0f));

    modelMatrix = glm::scale(modelMatrix, glm::vec3(scale));
    invModelMatrix = glm::inverse(modelMatrix);
}

M2Renderer::M2Renderer() {
}

M2Renderer::~M2Renderer() {
    shutdown();
}

bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout,
                            pipeline::AssetManager* assets) {
    if (initialized_) { assetManager = assets; return true; }
    vkCtx_ = ctx;
    assetManager = assets;

    const unsigned hc = std::thread::hardware_concurrency();
    const size_t availableCores = (hc > 1u) ? static_cast<size_t>(hc - 1u) : 1ull;
    // Keep headroom for other frame tasks: M2 gets about half of non-main cores by default.
    const size_t defaultAnimThreads = std::max<size_t>(1, availableCores / 2);
    numAnimThreads_ = static_cast<uint32_t>(std::max<size_t>(
        1, envSizeOrDefault("WOWEE_M2_ANIM_THREADS", defaultAnimThreads)));
    LOG_INFO("Initializing M2 renderer (Vulkan, ", numAnimThreads_, " anim threads)...");

    VkDevice device = vkCtx_->getDevice();

    // --- Descriptor set layouts ---

    // Material set layout (set 1): binding 0 = sampler2D, binding 2 = M2Material UBO
    // (M2Params moved to push constants alongside model matrix)
    {
        VkDescriptorSetLayoutBinding bindings[2] = {};
        bindings[0].binding = 0;
        bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
        bindings[0].descriptorCount = 1;
        bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
        bindings[1].binding = 2;
        bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
        bindings[1].descriptorCount = 1;
        bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;

        VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
        ci.bindingCount = 2;
        ci.pBindings = bindings;
        vkCreateDescriptorSetLayout(device, &ci, nullptr, &materialSetLayout_);
    }

    // Bone set layout (set 2): binding 0 = STORAGE_BUFFER (bone matrices)
    {
        VkDescriptorSetLayoutBinding binding{};
        binding.binding = 0;
        binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
        binding.descriptorCount = 1;
        binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;

        VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
        ci.bindingCount = 1;
        ci.pBindings = &binding;
        vkCreateDescriptorSetLayout(device, &ci, nullptr, &boneSetLayout_);
    }

    // Particle texture set layout (set 1 for particles): binding 0 = sampler2D
    {
        VkDescriptorSetLayoutBinding binding{};
        binding.binding = 0;
        binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
        binding.descriptorCount = 1;
        binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;

        VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
        ci.bindingCount = 1;
        ci.pBindings = &binding;
        vkCreateDescriptorSetLayout(device, &ci, nullptr, &particleTexLayout_);
    }

    // --- Descriptor pools ---
    {
        VkDescriptorPoolSize sizes[] = {
            {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_MATERIAL_SETS + 256},
            {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, MAX_MATERIAL_SETS + 256},
        };
        VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
        ci.maxSets = MAX_MATERIAL_SETS + 256;
        ci.poolSizeCount = 2;
        ci.pPoolSizes = sizes;
        ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
        vkCreateDescriptorPool(device, &ci, nullptr, &materialDescPool_);
    }
    {
        VkDescriptorPoolSize sizes[] = {
            {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BONE_SETS},
        };
        VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
        ci.maxSets = MAX_BONE_SETS;
        ci.poolSizeCount = 1;
        ci.pPoolSizes = sizes;
        ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
        vkCreateDescriptorPool(device, &ci, nullptr, &boneDescPool_);
    }

    // --- Pipeline layouts ---

    // Main M2 pipeline layout: set 0 = perFrame, set 1 = material, set 2 = bones
    // Push constant: mat4 model + vec2 uvOffset + int texCoordSet + int useBones = 80 bytes
    {
        VkDescriptorSetLayout setLayouts[] = {perFrameLayout, materialSetLayout_, boneSetLayout_};
        VkPushConstantRange pushRange{};
        pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
        pushRange.offset = 0;
        pushRange.size = 88; // mat4(64) + vec2(8) + int(4) + int(4) + int(4) + float(4)

        VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
        ci.setLayoutCount = 3;
        ci.pSetLayouts = setLayouts;
        ci.pushConstantRangeCount = 1;
        ci.pPushConstantRanges = &pushRange;
        vkCreatePipelineLayout(device, &ci, nullptr, &pipelineLayout_);
    }

    // Particle pipeline layout: set 0 = perFrame, set 1 = particleTex
    // Push constant: vec2 tileCount + int alphaKey (12 bytes)
    {
        VkDescriptorSetLayout setLayouts[] = {perFrameLayout, particleTexLayout_};
        VkPushConstantRange pushRange{};
        pushRange.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
        pushRange.offset = 0;
        pushRange.size = 12; // vec2 + int

        VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
        ci.setLayoutCount = 2;
        ci.pSetLayouts = setLayouts;
        ci.pushConstantRangeCount = 1;
        ci.pPushConstantRanges = &pushRange;
        vkCreatePipelineLayout(device, &ci, nullptr, &particlePipelineLayout_);
    }

    // Smoke pipeline layout: set 0 = perFrame
    // Push constant: float screenHeight (4 bytes)
    {
        VkDescriptorSetLayout setLayouts[] = {perFrameLayout};
        VkPushConstantRange pushRange{};
        pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
        pushRange.offset = 0;
        pushRange.size = 4;

        VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
        ci.setLayoutCount = 1;
        ci.pSetLayouts = setLayouts;
        ci.pushConstantRangeCount = 1;
        ci.pPushConstantRanges = &pushRange;
        vkCreatePipelineLayout(device, &ci, nullptr, &smokePipelineLayout_);
    }

    // --- Load shaders ---
    rendering::VkShaderModule m2Vert, m2Frag;
    rendering::VkShaderModule particleVert, particleFrag;
    rendering::VkShaderModule smokeVert, smokeFrag;

    m2Vert.loadFromFile(device, "assets/shaders/m2.vert.spv");
    m2Frag.loadFromFile(device, "assets/shaders/m2.frag.spv");
    particleVert.loadFromFile(device, "assets/shaders/m2_particle.vert.spv");
    particleFrag.loadFromFile(device, "assets/shaders/m2_particle.frag.spv");
    smokeVert.loadFromFile(device, "assets/shaders/m2_smoke.vert.spv");
    smokeFrag.loadFromFile(device, "assets/shaders/m2_smoke.frag.spv");

    if (!m2Vert.isValid() || !m2Frag.isValid()) {
        LOG_ERROR("M2: Missing required shaders, cannot initialize");
        return false;
    }

    VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();

    // --- Build M2 model pipelines ---
    // Vertex input: 18 floats = 72 bytes stride
    // loc 0: vec3 pos (0), loc 1: vec3 normal (12), loc 2: vec2 uv0 (24),
    // loc 5: vec2 uv1 (32), loc 3: vec4 boneWeights (40), loc 4: vec4 boneIndices (56)
    VkVertexInputBindingDescription m2Binding{};
    m2Binding.binding = 0;
    m2Binding.stride = 18 * sizeof(float);
    m2Binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;

    std::vector<VkVertexInputAttributeDescription> m2Attrs = {
        {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},                     // position
        {1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)},     // normal
        {2, 0, VK_FORMAT_R32G32_SFLOAT, 6 * sizeof(float)},        // texCoord0
        {5, 0, VK_FORMAT_R32G32_SFLOAT, 8 * sizeof(float)},        // texCoord1
        {3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 10 * sizeof(float)}, // boneWeights
        {4, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)}, // boneIndices (float)
    };

    auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite) -> VkPipeline {
        return PipelineBuilder()
            .setShaders(m2Vert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
                        m2Frag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
            .setVertexInput({m2Binding}, m2Attrs)
            .setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
            .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
            .setDepthTest(true, depthWrite, VK_COMPARE_OP_LESS_OR_EQUAL)
            .setColorBlendAttachment(blendState)
            .setMultisample(vkCtx_->getMsaaSamples())
            .setLayout(pipelineLayout_)
            .setRenderPass(mainPass)
            .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
            .build(device);
    };

    opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true);
    alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true);
    alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false);
    additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false);

    // --- Build particle pipelines ---
    if (particleVert.isValid() && particleFrag.isValid()) {
        VkVertexInputBindingDescription pBind{};
        pBind.binding = 0;
        pBind.stride = 9 * sizeof(float); // pos3 + color4 + size1 + tile1
        pBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;

        std::vector<VkVertexInputAttributeDescription> pAttrs = {
            {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},                    // position
            {1, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 3 * sizeof(float)}, // color
            {2, 0, VK_FORMAT_R32_SFLOAT, 7 * sizeof(float)},          // size
            {3, 0, VK_FORMAT_R32_SFLOAT, 8 * sizeof(float)},          // tile
        };

        auto buildParticlePipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
            return PipelineBuilder()
                .setShaders(particleVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
                            particleFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
                .setVertexInput({pBind}, pAttrs)
                .setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
                .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
                .setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
                .setColorBlendAttachment(blend)
                .setMultisample(vkCtx_->getMsaaSamples())
                .setLayout(particlePipelineLayout_)
                .setRenderPass(mainPass)
                .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
                .build(device);
        };

        particlePipeline_ = buildParticlePipeline(PipelineBuilder::blendAlpha());
        particleAdditivePipeline_ = buildParticlePipeline(PipelineBuilder::blendAdditive());
    }

    // --- Build smoke pipeline ---
    if (smokeVert.isValid() && smokeFrag.isValid()) {
        VkVertexInputBindingDescription sBind{};
        sBind.binding = 0;
        sBind.stride = 6 * sizeof(float); // pos3 + lifeRatio1 + size1 + isSpark1
        sBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;

        std::vector<VkVertexInputAttributeDescription> sAttrs = {
            {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},           // position
            {1, 0, VK_FORMAT_R32_SFLOAT, 3 * sizeof(float)}, // lifeRatio
            {2, 0, VK_FORMAT_R32_SFLOAT, 4 * sizeof(float)}, // size
            {3, 0, VK_FORMAT_R32_SFLOAT, 5 * sizeof(float)}, // isSpark
        };

        smokePipeline_ = PipelineBuilder()
            .setShaders(smokeVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
                        smokeFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
            .setVertexInput({sBind}, sAttrs)
            .setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
            .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
            .setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
            .setColorBlendAttachment(PipelineBuilder::blendAlpha())
            .setMultisample(vkCtx_->getMsaaSamples())
            .setLayout(smokePipelineLayout_)
            .setRenderPass(mainPass)
            .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
            .build(device);
    }

    // Clean up shader modules
    m2Vert.destroy(); m2Frag.destroy();
    particleVert.destroy(); particleFrag.destroy();
    smokeVert.destroy(); smokeFrag.destroy();

    // --- Create dynamic particle buffers (mapped for CPU writes) ---
    {
        VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
        bci.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;

        VmaAllocationCreateInfo aci{};
        aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
        aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;

        VmaAllocationInfo allocInfo{};

        // Smoke particle buffer
        bci.size = MAX_SMOKE_PARTICLES * 6 * sizeof(float);
        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &smokeVB_, &smokeVBAlloc_, &allocInfo);
        smokeVBMapped_ = allocInfo.pMappedData;

        // M2 particle buffer
        bci.size = MAX_M2_PARTICLES * 9 * sizeof(float);
        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &m2ParticleVB_, &m2ParticleVBAlloc_, &allocInfo);
        m2ParticleVBMapped_ = allocInfo.pMappedData;

        // Dedicated glow sprite buffer (separate from particle VB to avoid data race)
        bci.size = MAX_GLOW_SPRITES * 9 * sizeof(float);
        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &glowVB_, &glowVBAlloc_, &allocInfo);
        glowVBMapped_ = allocInfo.pMappedData;
    }

    // --- Create white fallback texture ---
    {
        uint8_t white[] = {255, 255, 255, 255};
        whiteTexture_ = std::make_unique<VkTexture>();
        whiteTexture_->upload(*vkCtx_, white, 1, 1, VK_FORMAT_R8G8B8A8_UNORM);
        whiteTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_REPEAT);
    }

    // --- Generate soft radial gradient glow texture ---
    {
        static constexpr int SZ = 64;
        std::vector<uint8_t> px(SZ * SZ * 4);
        float half = SZ / 2.0f;
        for (int y = 0; y < SZ; y++) {
            for (int x = 0; x < SZ; x++) {
                float dx = (x + 0.5f - half) / half;
                float dy = (y + 0.5f - half) / half;
                float r = std::sqrt(dx * dx + dy * dy);
                float a = std::max(0.0f, 1.0f - r);
                a = a * a; // Quadratic falloff
                int idx = (y * SZ + x) * 4;
                px[idx + 0] = 255;
                px[idx + 1] = 255;
                px[idx + 2] = 255;
                px[idx + 3] = static_cast<uint8_t>(a * 255);
            }
        }
        glowTexture_ = std::make_unique<VkTexture>();
        glowTexture_->upload(*vkCtx_, px.data(), SZ, SZ, VK_FORMAT_R8G8B8A8_UNORM);
        glowTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE);
        // Pre-allocate glow texture descriptor set (reused every frame)
        if (particleTexLayout_ && materialDescPool_) {
            VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
            ai.descriptorPool = materialDescPool_;
            ai.descriptorSetCount = 1;
            ai.pSetLayouts = &particleTexLayout_;
            if (vkAllocateDescriptorSets(device, &ai, &glowTexDescSet_) == VK_SUCCESS) {
                VkDescriptorImageInfo imgInfo = glowTexture_->descriptorInfo();
                VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
                write.dstSet = glowTexDescSet_;
                write.dstBinding = 0;
                write.descriptorCount = 1;
                write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
                write.pImageInfo = &imgInfo;
                vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
            }
        }
    }
    textureCacheBudgetBytes_ =
        envSizeMBOrDefault("WOWEE_M2_TEX_CACHE_MB", 4096) * 1024ull * 1024ull;
    modelCacheLimit_ = envSizeMBOrDefault("WOWEE_M2_MODEL_LIMIT", 6000);
    LOG_INFO("M2 texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB");
    LOG_INFO("M2 model cache limit: ", modelCacheLimit_);

    LOG_INFO("M2 renderer initialized (Vulkan)");
    initialized_ = true;
    return true;
}

void M2Renderer::shutdown() {
    LOG_INFO("Shutting down M2 renderer...");
    if (!vkCtx_) return;

    vkDeviceWaitIdle(vkCtx_->getDevice());
    VkDevice device = vkCtx_->getDevice();
    VmaAllocator alloc = vkCtx_->getAllocator();

    // Delete model GPU resources
    for (auto& [id, model] : models) {
        destroyModelGPU(model);
    }
    models.clear();

    // Destroy instance bone buffers
    for (auto& inst : instances) {
        destroyInstanceBones(inst);
    }
    instances.clear();
    spatialGrid.clear();
    instanceIndexById.clear();
    instanceDedupMap_.clear();

    // Delete cached textures
    textureCache.clear();
    textureCacheBytes_ = 0;
    textureCacheCounter_ = 0;
    textureHasAlphaByPtr_.clear();
    textureColorKeyBlackByPtr_.clear();
    failedTextureCache_.clear();
    loggedTextureLoadFails_.clear();
    textureBudgetRejectWarnings_ = 0;
    whiteTexture_.reset();
    glowTexture_.reset();

    // Clean up particle buffers
    if (smokeVB_) { vmaDestroyBuffer(alloc, smokeVB_, smokeVBAlloc_); smokeVB_ = VK_NULL_HANDLE; }
    if (m2ParticleVB_) { vmaDestroyBuffer(alloc, m2ParticleVB_, m2ParticleVBAlloc_); m2ParticleVB_ = VK_NULL_HANDLE; }
    if (glowVB_) { vmaDestroyBuffer(alloc, glowVB_, glowVBAlloc_); glowVB_ = VK_NULL_HANDLE; }
    smokeParticles.clear();

    // Destroy pipelines
    auto destroyPipeline = [&](VkPipeline& p) { if (p) { vkDestroyPipeline(device, p, nullptr); p = VK_NULL_HANDLE; } };
    destroyPipeline(opaquePipeline_);
    destroyPipeline(alphaTestPipeline_);
    destroyPipeline(alphaPipeline_);
    destroyPipeline(additivePipeline_);
    destroyPipeline(particlePipeline_);
    destroyPipeline(particleAdditivePipeline_);
    destroyPipeline(smokePipeline_);

    if (pipelineLayout_) { vkDestroyPipelineLayout(device, pipelineLayout_, nullptr); pipelineLayout_ = VK_NULL_HANDLE; }
    if (particlePipelineLayout_) { vkDestroyPipelineLayout(device, particlePipelineLayout_, nullptr); particlePipelineLayout_ = VK_NULL_HANDLE; }
    if (smokePipelineLayout_) { vkDestroyPipelineLayout(device, smokePipelineLayout_, nullptr); smokePipelineLayout_ = VK_NULL_HANDLE; }

    // Destroy descriptor pools and layouts
    if (materialDescPool_) { vkDestroyDescriptorPool(device, materialDescPool_, nullptr); materialDescPool_ = VK_NULL_HANDLE; }
    if (boneDescPool_) { vkDestroyDescriptorPool(device, boneDescPool_, nullptr); boneDescPool_ = VK_NULL_HANDLE; }
    if (materialSetLayout_) { vkDestroyDescriptorSetLayout(device, materialSetLayout_, nullptr); materialSetLayout_ = VK_NULL_HANDLE; }
    if (boneSetLayout_) { vkDestroyDescriptorSetLayout(device, boneSetLayout_, nullptr); boneSetLayout_ = VK_NULL_HANDLE; }
    if (particleTexLayout_) { vkDestroyDescriptorSetLayout(device, particleTexLayout_, nullptr); particleTexLayout_ = VK_NULL_HANDLE; }

    // Destroy shadow resources
    destroyPipeline(shadowPipeline_);
    if (shadowPipelineLayout_) { vkDestroyPipelineLayout(device, shadowPipelineLayout_, nullptr); shadowPipelineLayout_ = VK_NULL_HANDLE; }
    if (shadowTexPool_) { vkDestroyDescriptorPool(device, shadowTexPool_, nullptr); shadowTexPool_ = VK_NULL_HANDLE; }
    if (shadowParamsPool_) { vkDestroyDescriptorPool(device, shadowParamsPool_, nullptr); shadowParamsPool_ = VK_NULL_HANDLE; }
    if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; }
    if (shadowParamsUBO_) { vmaDestroyBuffer(alloc, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; }

    initialized_ = false;
}

void M2Renderer::destroyModelGPU(M2ModelGPU& model) {
    if (!vkCtx_) return;
    VmaAllocator alloc = vkCtx_->getAllocator();
    if (model.vertexBuffer) { vmaDestroyBuffer(alloc, model.vertexBuffer, model.vertexAlloc); model.vertexBuffer = VK_NULL_HANDLE; }
    if (model.indexBuffer) { vmaDestroyBuffer(alloc, model.indexBuffer, model.indexAlloc); model.indexBuffer = VK_NULL_HANDLE; }
    for (auto& batch : model.batches) {
        if (batch.materialUBO) { vmaDestroyBuffer(alloc, batch.materialUBO, batch.materialUBOAlloc); batch.materialUBO = VK_NULL_HANDLE; }
        // materialSet freed when pool is reset/destroyed
    }
}

void M2Renderer::destroyInstanceBones(M2Instance& inst) {
    if (!vkCtx_) return;
    VmaAllocator alloc = vkCtx_->getAllocator();
    for (int i = 0; i < 2; i++) {
        if (inst.boneBuffer[i]) {
            vmaDestroyBuffer(alloc, inst.boneBuffer[i], inst.boneAlloc[i]);
            inst.boneBuffer[i] = VK_NULL_HANDLE;
            inst.boneMapped[i] = nullptr;
        }
        // boneSet freed when pool is reset/destroyed
    }
}

VkDescriptorSet M2Renderer::allocateMaterialSet() {
    VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
    ai.descriptorPool = materialDescPool_;
    ai.descriptorSetCount = 1;
    ai.pSetLayouts = &materialSetLayout_;
    VkDescriptorSet set = VK_NULL_HANDLE;
    vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
    return set;
}

VkDescriptorSet M2Renderer::allocateBoneSet() {
    VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
    ai.descriptorPool = boneDescPool_;
    ai.descriptorSetCount = 1;
    ai.pSetLayouts = &boneSetLayout_;
    VkDescriptorSet set = VK_NULL_HANDLE;
    vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
    return set;
}

// ---------------------------------------------------------------------------
// M2 collision mesh: build spatial grid + classify triangles
// ---------------------------------------------------------------------------
void M2ModelGPU::CollisionMesh::build() {
    if (indices.size() < 3 || vertices.empty()) return;
    triCount = static_cast<uint32_t>(indices.size() / 3);

    // Bounding box for grid
    glm::vec3 bmin(std::numeric_limits<float>::max());
    glm::vec3 bmax(-std::numeric_limits<float>::max());
    for (const auto& v : vertices) {
        bmin = glm::min(bmin, v);
        bmax = glm::max(bmax, v);
    }

    gridOrigin = glm::vec2(bmin.x, bmin.y);
    gridCellsX = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.x - bmin.x) / CELL_SIZE))));
    gridCellsY = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.y - bmin.y) / CELL_SIZE))));

    cellFloorTris.resize(gridCellsX * gridCellsY);
    cellWallTris.resize(gridCellsX * gridCellsY);
    triBounds.resize(triCount);

    for (uint32_t ti = 0; ti < triCount; ti++) {
        uint16_t i0 = indices[ti * 3];
        uint16_t i1 = indices[ti * 3 + 1];
        uint16_t i2 = indices[ti * 3 + 2];
        if (i0 >= vertices.size() || i1 >= vertices.size() || i2 >= vertices.size()) continue;

        const auto& v0 = vertices[i0];
        const auto& v1 = vertices[i1];
        const auto& v2 = vertices[i2];

        triBounds[ti].minZ = std::min({v0.z, v1.z, v2.z});
        triBounds[ti].maxZ = std::max({v0.z, v1.z, v2.z});

        glm::vec3 normal = glm::cross(v1 - v0, v2 - v0);
        float normalLen = glm::length(normal);
        float absNz = (normalLen > 0.001f) ? std::abs(normal.z / normalLen) : 0.0f;
        bool isFloor = (absNz >= 0.35f);  // ~70° max slope (relaxed for steep stairs)
        bool isWall  = (absNz < 0.65f);

        float triMinX = std::min({v0.x, v1.x, v2.x});
        float triMaxX = std::max({v0.x, v1.x, v2.x});
        float triMinY = std::min({v0.y, v1.y, v2.y});
        float triMaxY = std::max({v0.y, v1.y, v2.y});

        int cxMin = std::clamp(static_cast<int>((triMinX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
        int cxMax = std::clamp(static_cast<int>((triMaxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
        int cyMin = std::clamp(static_cast<int>((triMinY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
        int cyMax = std::clamp(static_cast<int>((triMaxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);

        for (int cy = cyMin; cy <= cyMax; cy++) {
            for (int cx = cxMin; cx <= cxMax; cx++) {
                int ci = cy * gridCellsX + cx;
                if (isFloor) cellFloorTris[ci].push_back(ti);
                if (isWall)  cellWallTris[ci].push_back(ti);
            }
        }
    }
}

void M2ModelGPU::CollisionMesh::getFloorTrisInRange(
        float minX, float minY, float maxX, float maxY,
        std::vector<uint32_t>& out) const {
    out.clear();
    if (gridCellsX == 0 || gridCellsY == 0) return;
    int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
    int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
    int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
    int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
    for (int cy = cyMin; cy <= cyMax; cy++) {
        for (int cx = cxMin; cx <= cxMax; cx++) {
            const auto& cell = cellFloorTris[cy * gridCellsX + cx];
            out.insert(out.end(), cell.begin(), cell.end());
        }
    }
    std::sort(out.begin(), out.end());
    out.erase(std::unique(out.begin(), out.end()), out.end());
}

void M2ModelGPU::CollisionMesh::getWallTrisInRange(
        float minX, float minY, float maxX, float maxY,
        std::vector<uint32_t>& out) const {
    out.clear();
    if (gridCellsX == 0 || gridCellsY == 0) return;
    int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
    int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
    int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
    int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
    for (int cy = cyMin; cy <= cyMax; cy++) {
        for (int cx = cxMin; cx <= cxMax; cx++) {
            const auto& cell = cellWallTris[cy * gridCellsX + cx];
            out.insert(out.end(), cell.begin(), cell.end());
        }
    }
    std::sort(out.begin(), out.end());
    out.erase(std::unique(out.begin(), out.end()), out.end());
}

bool M2Renderer::hasModel(uint32_t modelId) const {
    return models.find(modelId) != models.end();
}

bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) {
    if (models.find(modelId) != models.end()) {
        // Already loaded
        return true;
    }
    if (models.size() >= modelCacheLimit_) {
        if (modelLimitRejectWarnings_ < 3) {
            LOG_WARNING("M2 model cache full (", models.size(), "/", modelCacheLimit_,
                        "), skipping model load: id=", modelId, " name=", model.name);
        }
        ++modelLimitRejectWarnings_;
        return false;
    }

    bool hasGeometry = !model.vertices.empty() && !model.indices.empty();
    bool hasParticles = !model.particleEmitters.empty();
    if (!hasGeometry && !hasParticles) {
        LOG_WARNING("M2 model has no geometry and no particles: ", model.name);
        return false;
    }

    M2ModelGPU gpuModel;
    gpuModel.name = model.name;

    // Detect invisible trap models (event objects that should not render or collide)
    std::string lowerName = model.name;
    std::transform(lowerName.begin(), lowerName.end(), lowerName.begin(),
                   [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
    bool isInvisibleTrap = (lowerName.find("invisibletrap") != std::string::npos);
    gpuModel.isInvisibleTrap = isInvisibleTrap;
    if (isInvisibleTrap) {
        LOG_INFO("Loading InvisibleTrap model: ", model.name, " (will be invisible, no collision)");
    }
    // Use tight bounds from actual vertices for collision/camera occlusion.
    // Header bounds in some M2s are overly conservative.
    glm::vec3 tightMin(0.0f);
    glm::vec3 tightMax(0.0f);
    if (hasGeometry) {
        tightMin = glm::vec3(std::numeric_limits<float>::max());
        tightMax = glm::vec3(-std::numeric_limits<float>::max());
        for (const auto& v : model.vertices) {
            tightMin = glm::min(tightMin, v.position);
            tightMax = glm::max(tightMax, v.position);
        }
    }
    bool foliageOrTreeLike = false;
    bool chestName = false;
    bool groundDetailModel = false;
    {
        std::string lowerName = model.name;
        std::transform(lowerName.begin(), lowerName.end(), lowerName.begin(),
                       [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
        gpuModel.collisionSteppedFountain = (lowerName.find("fountain") != std::string::npos);

        glm::vec3 dims = tightMax - tightMin;
        float horiz = std::max(dims.x, dims.y);
        float vert = std::max(0.0f, dims.z);
        bool lowWideShape = (horiz > 1.4f && vert > 0.2f && vert < horiz * 0.70f);
        bool likelyCurbName =
            (lowerName.find("planter") != std::string::npos) ||
            (lowerName.find("curb") != std::string::npos) ||
            (lowerName.find("base") != std::string::npos) ||
            (lowerName.find("ring") != std::string::npos) ||
            (lowerName.find("well") != std::string::npos);
        bool knownStormwindPlanter =
            (lowerName.find("stormwindplanter") != std::string::npos) ||
            (lowerName.find("stormwindwindowplanter") != std::string::npos);
        bool lowPlatformShape = (horiz > 1.8f && vert > 0.2f && vert < 1.8f);
        bool bridgeName =
            (lowerName.find("bridge") != std::string::npos) ||
            (lowerName.find("plank") != std::string::npos) ||
            (lowerName.find("walkway") != std::string::npos);
        gpuModel.collisionSteppedLowPlatform = (!gpuModel.collisionSteppedFountain) &&
                                               (knownStormwindPlanter ||
                                                bridgeName ||
                                                (likelyCurbName && (lowPlatformShape || lowWideShape)));
        gpuModel.collisionBridge = bridgeName;

        bool isPlanter = (lowerName.find("planter") != std::string::npos);
        gpuModel.collisionPlanter = isPlanter;
        bool statueName =
            (lowerName.find("statue") != std::string::npos) ||
            (lowerName.find("monument") != std::string::npos) ||
            (lowerName.find("sculpture") != std::string::npos);
        gpuModel.collisionStatue = statueName;
        bool smallSolidPropName =
            statueName ||
            (lowerName.find("crate") != std::string::npos) ||
            (lowerName.find("box") != std::string::npos) ||
            (lowerName.find("chest") != std::string::npos) ||
            (lowerName.find("barrel") != std::string::npos);
        chestName = (lowerName.find("chest") != std::string::npos);
        bool foliageName =
            (lowerName.find("bush") != std::string::npos) ||
            (lowerName.find("grass") != std::string::npos) ||
            (lowerName.find("drygrass") != std::string::npos) ||
            (lowerName.find("dry_grass") != std::string::npos) ||
            (lowerName.find("dry-grass") != std::string::npos) ||
            (lowerName.find("deadgrass") != std::string::npos) ||
            (lowerName.find("dead_grass") != std::string::npos) ||
            (lowerName.find("dead-grass") != std::string::npos) ||
            ((lowerName.find("plant") != std::string::npos) && !isPlanter) ||
            (lowerName.find("flower") != std::string::npos) ||
            (lowerName.find("shrub") != std::string::npos) ||
            (lowerName.find("fern") != std::string::npos) ||
            (lowerName.find("vine") != std::string::npos) ||
            (lowerName.find("lily") != std::string::npos) ||
            (lowerName.find("weed") != std::string::npos) ||
            (lowerName.find("wheat") != std::string::npos) ||
            (lowerName.find("pumpkin") != std::string::npos) ||
            (lowerName.find("firefly") != std::string::npos) ||
            (lowerName.find("fireflies") != std::string::npos) ||
            (lowerName.find("fireflys") != std::string::npos) ||
            (lowerName.find("mushroom") != std::string::npos) ||
            (lowerName.find("fungus") != std::string::npos) ||
            (lowerName.find("toadstool") != std::string::npos) ||
            (lowerName.find("root") != std::string::npos) ||
            (lowerName.find("branch") != std::string::npos) ||
            (lowerName.find("thorn") != std::string::npos) ||
            (lowerName.find("moss") != std::string::npos) ||
            (lowerName.find("ivy") != std::string::npos) ||
            (lowerName.find("seaweed") != std::string::npos) ||
            (lowerName.find("kelp") != std::string::npos) ||
            (lowerName.find("cattail") != std::string::npos) ||
            (lowerName.find("reed") != std::string::npos) ||
            (lowerName.find("palm") != std::string::npos) ||
            (lowerName.find("bamboo") != std::string::npos) ||
            (lowerName.find("banana") != std::string::npos) ||
            (lowerName.find("coconut") != std::string::npos) ||
            (lowerName.find("canopy") != std::string::npos) ||
            (lowerName.find("hedge") != std::string::npos) ||
            (lowerName.find("cactus") != std::string::npos) ||
            (lowerName.find("leaf") != std::string::npos) ||
            (lowerName.find("leaves") != std::string::npos) ||
            (lowerName.find("stalk") != std::string::npos) ||
            (lowerName.find("corn") != std::string::npos) ||
            (lowerName.find("crop") != std::string::npos) ||
            (lowerName.find("hay") != std::string::npos) ||
            (lowerName.find("frond") != std::string::npos) ||
            (lowerName.find("algae") != std::string::npos) ||
            (lowerName.find("coral") != std::string::npos);
        bool treeLike = (lowerName.find("tree") != std::string::npos);
        foliageOrTreeLike = (foliageName || treeLike);
        groundDetailModel =
            (lowerName.find("\\nodxt\\detail\\") != std::string::npos) ||
            (lowerName.find("\\detail\\") != std::string::npos);
        bool hardTreePart =
            (lowerName.find("trunk") != std::string::npos) ||
            (lowerName.find("stump") != std::string::npos) ||
            (lowerName.find("log") != std::string::npos);
        // Trees with visible trunks get collision. Threshold: canopy wider than 6
        // model units AND taller than 4 units (filters out small bushes/saplings).
        bool treeWithTrunk = treeLike && !hardTreePart && !foliageName && horiz > 6.0f && vert > 4.0f;
        bool softTree = treeLike && !hardTreePart && !treeWithTrunk;
        bool forceSolidCurb = gpuModel.collisionSteppedLowPlatform || knownStormwindPlanter || likelyCurbName || gpuModel.collisionPlanter;
        bool narrowVerticalName =
            (lowerName.find("lamp") != std::string::npos) ||
            (lowerName.find("lantern") != std::string::npos) ||
            (lowerName.find("post") != std::string::npos) ||
            (lowerName.find("pole") != std::string::npos);
        bool narrowVerticalShape =
            (horiz > 0.12f && horiz < 2.0f && vert > 2.2f && vert > horiz * 1.8f);
        gpuModel.collisionTreeTrunk = treeWithTrunk;
        gpuModel.collisionNarrowVerticalProp =
            !gpuModel.collisionSteppedFountain &&
            !gpuModel.collisionSteppedLowPlatform &&
            (narrowVerticalName || narrowVerticalShape);
        bool genericSolidPropShape =
            (horiz > 0.6f && horiz < 6.0f && vert > 0.30f && vert < 4.0f && vert > horiz * 0.16f) ||
            statueName;
        bool curbLikeName =
            (lowerName.find("curb") != std::string::npos) ||
            (lowerName.find("planter") != std::string::npos) ||
            (lowerName.find("ring") != std::string::npos) ||
            (lowerName.find("well") != std::string::npos) ||
            (lowerName.find("base") != std::string::npos);
        bool lowPlatformLikeShape = lowWideShape || lowPlatformShape;
        bool carpetOrRug =
            (lowerName.find("carpet") != std::string::npos) ||
            (lowerName.find("rug") != std::string::npos);
        gpuModel.collisionSmallSolidProp =
            !gpuModel.collisionSteppedFountain &&
            !gpuModel.collisionSteppedLowPlatform &&
            !gpuModel.collisionNarrowVerticalProp &&
            !gpuModel.collisionTreeTrunk &&
            !curbLikeName &&
            !lowPlatformLikeShape &&
            (smallSolidPropName || (genericSolidPropShape && !foliageName && !softTree));
        // Disable collision for foliage, soft trees, and decorative carpets/rugs
        gpuModel.collisionNoBlock = ((foliageName || softTree || carpetOrRug) &&
                                     !forceSolidCurb);
    }
    gpuModel.boundMin = tightMin;
    gpuModel.boundMax = tightMax;
    gpuModel.boundRadius = model.boundRadius;
    gpuModel.indexCount = static_cast<uint32_t>(model.indices.size());
    gpuModel.vertexCount = static_cast<uint32_t>(model.vertices.size());

    // Store bone/sequence data for animation
    gpuModel.bones = model.bones;
    gpuModel.sequences = model.sequences;
    gpuModel.globalSequenceDurations = model.globalSequenceDurations;
    gpuModel.hasAnimation = false;
    for (const auto& bone : model.bones) {
        if (bone.translation.hasData() || bone.rotation.hasData() || bone.scale.hasData()) {
            gpuModel.hasAnimation = true;
            break;
        }
    }
    bool ambientCreature =
        (lowerName.find("firefly") != std::string::npos) ||
        (lowerName.find("fireflies") != std::string::npos) ||
        (lowerName.find("fireflys") != std::string::npos) ||
        (lowerName.find("dragonfly") != std::string::npos) ||
        (lowerName.find("dragonflies") != std::string::npos) ||
        (lowerName.find("butterfly") != std::string::npos) ||
        (lowerName.find("moth") != std::string::npos);
    gpuModel.disableAnimation = (foliageOrTreeLike && !ambientCreature) || chestName;
    gpuModel.shadowWindFoliage = foliageOrTreeLike && !ambientCreature;
    gpuModel.isFoliageLike = foliageOrTreeLike && !ambientCreature;
    gpuModel.isElvenLike =
        (lowerName.find("elf") != std::string::npos) ||
        (lowerName.find("elven") != std::string::npos) ||
        (lowerName.find("quel") != std::string::npos);
    gpuModel.isLanternLike =
        (lowerName.find("lantern") != std::string::npos) ||
        (lowerName.find("lamp") != std::string::npos) ||
        (lowerName.find("light") != std::string::npos);
    gpuModel.isKoboldFlame =
        (lowerName.find("kobold") != std::string::npos) &&
        ((lowerName.find("candle") != std::string::npos) ||
         (lowerName.find("torch") != std::string::npos) ||
         (lowerName.find("mine") != std::string::npos));
    gpuModel.isGroundDetail = groundDetailModel;
    if (groundDetailModel) {
        // Ground clutter (grass/pebbles/detail cards) should never block camera/movement.
        gpuModel.collisionNoBlock = true;
    }
    // Spell effect / pure-visual models: particle-dominated with minimal geometry,
    // or named effect models (light shafts, portals, emitters, spotlights)
    bool effectByName =
        (lowerName.find("lightshaft") != std::string::npos) ||
        (lowerName.find("volumetriclight") != std::string::npos) ||
        (lowerName.find("instanceportal") != std::string::npos) ||
        (lowerName.find("instancenewportal") != std::string::npos) ||
        (lowerName.find("mageportal") != std::string::npos) ||
        (lowerName.find("worldtreeportal") != std::string::npos) ||
        (lowerName.find("particleemitter") != std::string::npos) ||
        (lowerName.find("bubbles") != std::string::npos) ||
        (lowerName.find("spotlight") != std::string::npos) ||
        (lowerName.find("hazardlight") != std::string::npos) ||
        (lowerName.find("lavasplash") != std::string::npos) ||
        (lowerName.find("lavabubble") != std::string::npos) ||
        (lowerName.find("lavasteam") != std::string::npos) ||
        (lowerName.find("wisps") != std::string::npos);
    gpuModel.isSpellEffect = effectByName ||
                              (hasParticles && model.vertices.size() <= 200 &&
                               model.particleEmitters.size() >= 3);
    gpuModel.isLavaModel =
        (lowerName.find("forgelava") != std::string::npos) ||
        (lowerName.find("lavapot") != std::string::npos) ||
        (lowerName.find("lavaflow") != std::string::npos);
    gpuModel.isInstancePortal =
        (lowerName.find("instanceportal") != std::string::npos) ||
        (lowerName.find("instancenewportal") != std::string::npos) ||
        (lowerName.find("portalfx") != std::string::npos) ||
        (lowerName.find("spellportal") != std::string::npos);
    // Instance portals are spell effects too (additive blend, no collision)
    if (gpuModel.isInstancePortal) {
        gpuModel.isSpellEffect = true;
    }
    // Water vegetation: cattails, reeds, bulrushes, kelp, seaweed, lilypad near water
    gpuModel.isWaterVegetation =
        (lowerName.find("cattail") != std::string::npos) ||
        (lowerName.find("reed") != std::string::npos) ||
        (lowerName.find("bulrush") != std::string::npos) ||
        (lowerName.find("seaweed") != std::string::npos) ||
        (lowerName.find("kelp") != std::string::npos) ||
        (lowerName.find("lilypad") != std::string::npos);
    // Ambient creature effects: particle-based glow (exempt from particle dampeners)
    gpuModel.isFireflyEffect = ambientCreature;

    // Build collision mesh + spatial grid from M2 bounding geometry
    gpuModel.collision.vertices = model.collisionVertices;
    gpuModel.collision.indices = model.collisionIndices;
    gpuModel.collision.build();
    if (gpuModel.collision.valid()) {
        core::Logger::getInstance().debug("  M2 collision mesh: ", gpuModel.collision.triCount,
            " tris, grid ", gpuModel.collision.gridCellsX, "x", gpuModel.collision.gridCellsY);
    }

    // Flag smoke models for UV scroll animation (particle emitters not implemented)
    {
        std::string smokeName = model.name;
        std::transform(smokeName.begin(), smokeName.end(), smokeName.begin(),
                       [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
        gpuModel.isSmoke = (smokeName.find("smoke") != std::string::npos);
    }

    // Identify idle variation sequences (animation ID 0 = Stand)
    for (int i = 0; i < static_cast<int>(model.sequences.size()); i++) {
        if (model.sequences[i].id == 0 && model.sequences[i].duration > 0) {
            gpuModel.idleVariationIndices.push_back(i);
        }
    }

    // Batch all GPU uploads (VB, IB, textures) into a single command buffer
    // submission with one fence wait, instead of one fence wait per upload.
    vkCtx_->beginUploadBatch();

    if (hasGeometry) {
        // Create VBO with interleaved vertex data
        // Format: position (3), normal (3), texcoord0 (2), texcoord1 (2), boneWeights (4), boneIndices (4 as float)
        const size_t floatsPerVertex = 18;
        std::vector<float> vertexData;
        vertexData.reserve(model.vertices.size() * floatsPerVertex);

        for (const auto& v : model.vertices) {
            vertexData.push_back(v.position.x);
            vertexData.push_back(v.position.y);
            vertexData.push_back(v.position.z);
            vertexData.push_back(v.normal.x);
            vertexData.push_back(v.normal.y);
            vertexData.push_back(v.normal.z);
            vertexData.push_back(v.texCoords[0].x);
            vertexData.push_back(v.texCoords[0].y);
            vertexData.push_back(v.texCoords[1].x);
            vertexData.push_back(v.texCoords[1].y);
            float w0 = v.boneWeights[0] / 255.0f;
            float w1 = v.boneWeights[1] / 255.0f;
            float w2 = v.boneWeights[2] / 255.0f;
            float w3 = v.boneWeights[3] / 255.0f;
            vertexData.push_back(w0);
            vertexData.push_back(w1);
            vertexData.push_back(w2);
            vertexData.push_back(w3);
            vertexData.push_back(static_cast<float>(std::min(v.boneIndices[0], uint8_t(127))));
            vertexData.push_back(static_cast<float>(std::min(v.boneIndices[1], uint8_t(127))));
            vertexData.push_back(static_cast<float>(std::min(v.boneIndices[2], uint8_t(127))));
            vertexData.push_back(static_cast<float>(std::min(v.boneIndices[3], uint8_t(127))));
        }

        // Upload vertex buffer to GPU
        {
            auto buf = uploadBuffer(*vkCtx_,
                vertexData.data(), vertexData.size() * sizeof(float),
                VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
            gpuModel.vertexBuffer = buf.buffer;
            gpuModel.vertexAlloc = buf.allocation;
        }

        // Upload index buffer to GPU
        {
            auto buf = uploadBuffer(*vkCtx_,
                model.indices.data(), model.indices.size() * sizeof(uint16_t),
                VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
            gpuModel.indexBuffer = buf.buffer;
            gpuModel.indexAlloc = buf.allocation;
        }
    }

    // Load ALL textures from the model into a local vector.
    // textureLoadFailed[i] is true if texture[i] had a named path that failed to load.
    // Such batches are hidden (batchOpacity=0) rather than rendered white.
    std::vector<VkTexture*> allTextures;
    std::vector<bool> textureLoadFailed;
    std::vector<std::string> textureKeysLower;
    if (assetManager) {
        for (size_t ti = 0; ti < model.textures.size(); ti++) {
            const auto& tex = model.textures[ti];
            std::string texPath = tex.filename;
            // Some extracted M2 texture strings contain embedded NUL + garbage suffix.
            // Truncate at first NUL so valid paths like "...foo.blp\0junk" still resolve.
            size_t nul = texPath.find('\0');
            if (nul != std::string::npos) {
                texPath.resize(nul);
            }
            if (!texPath.empty()) {
                std::string texKey = texPath;
                std::replace(texKey.begin(), texKey.end(), '/', '\\');
                std::transform(texKey.begin(), texKey.end(), texKey.begin(),
                               [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
                VkTexture* texPtr = loadTexture(texPath, tex.flags);
                bool failed = (texPtr == whiteTexture_.get());
                if (failed) {
                    static uint32_t loggedModelTextureFails = 0;
                    static bool loggedModelTextureFailSuppressed = false;
                    if (loggedModelTextureFails < 250) {
                        LOG_WARNING("M2 model ", model.name, " texture[", ti, "] failed to load: ", texPath);
                        ++loggedModelTextureFails;
                    } else if (!loggedModelTextureFailSuppressed) {
                        LOG_WARNING("M2 model texture-failure warnings suppressed after ",
                                    loggedModelTextureFails, " entries");
                        loggedModelTextureFailSuppressed = true;
                    }
                }
                if (isInvisibleTrap) {
                    LOG_INFO("  InvisibleTrap texture[", ti, "]: ", texPath, " -> ", (failed ? "WHITE" : "OK"));
                }
                allTextures.push_back(texPtr);
                textureLoadFailed.push_back(failed);
                textureKeysLower.push_back(std::move(texKey));
            } else {
                if (isInvisibleTrap) {
                    LOG_INFO("  InvisibleTrap texture[", ti, "]: EMPTY (using white fallback)");
                }
                allTextures.push_back(whiteTexture_.get());
                textureLoadFailed.push_back(false);  // Empty filename = intentional white (type!=0)
                textureKeysLower.emplace_back();
            }
        }
    }

    static const bool kGlowDiag = envFlagEnabled("WOWEE_M2_GLOW_DIAG", false);
    if (kGlowDiag) {
        std::string lowerName = model.name;
        std::transform(lowerName.begin(), lowerName.end(), lowerName.begin(),
                       [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
        const bool lanternLike =
            (lowerName.find("lantern") != std::string::npos) ||
            (lowerName.find("lamp") != std::string::npos) ||
            (lowerName.find("light") != std::string::npos);
        if (lanternLike) {
            for (size_t ti = 0; ti < model.textures.size(); ++ti) {
                const std::string key = (ti < textureKeysLower.size()) ? textureKeysLower[ti] : std::string();
                LOG_DEBUG("M2 GLOW TEX '", model.name, "' tex[", ti, "]='", key, "' flags=0x",
                          std::hex, model.textures[ti].flags, std::dec);
            }
        }
    }

    // Copy particle emitter data and resolve textures
    gpuModel.particleEmitters = model.particleEmitters;
    gpuModel.particleTextures.resize(model.particleEmitters.size(), whiteTexture_.get());
    for (size_t ei = 0; ei < model.particleEmitters.size(); ei++) {
        uint16_t texIdx = model.particleEmitters[ei].texture;
        if (texIdx < allTextures.size() && allTextures[texIdx] != nullptr) {
            gpuModel.particleTextures[ei] = allTextures[texIdx];
        }
    }

    // Copy texture transform data for UV animation
    gpuModel.textureTransforms = model.textureTransforms;
    gpuModel.textureTransformLookup = model.textureTransformLookup;
    gpuModel.hasTextureAnimation = false;

    // Build per-batch GPU entries
    if (!model.batches.empty()) {
        for (const auto& batch : model.batches) {
            M2ModelGPU::BatchGPU bgpu;
            bgpu.indexStart = batch.indexStart;
            bgpu.indexCount = batch.indexCount;

            // Store texture animation index from batch
            bgpu.textureAnimIndex = batch.textureAnimIndex;
            if (bgpu.textureAnimIndex != 0xFFFF) {
                gpuModel.hasTextureAnimation = true;
            }

            // Store blend mode and flags from material
            if (batch.materialIndex < model.materials.size()) {
                bgpu.blendMode = model.materials[batch.materialIndex].blendMode;
                bgpu.materialFlags = model.materials[batch.materialIndex].flags;
            }

            // Copy LOD level from batch
            bgpu.submeshLevel = batch.submeshLevel;

            // Resolve texture: batch.textureIndex → textureLookup → allTextures
            VkTexture* tex = whiteTexture_.get();
            bool texFailed = false;
            std::string batchTexKeyLower;
            if (batch.textureIndex < model.textureLookup.size()) {
                uint16_t texIdx = model.textureLookup[batch.textureIndex];
                if (texIdx < allTextures.size()) {
                    tex = allTextures[texIdx];
                    texFailed = (texIdx < textureLoadFailed.size()) && textureLoadFailed[texIdx];
                    if (texIdx < textureKeysLower.size()) {
                        batchTexKeyLower = textureKeysLower[texIdx];
                    }
                }
                if (texIdx < model.textures.size()) {
                    bgpu.texFlags = static_cast<uint8_t>(model.textures[texIdx].flags & 0x3);
                }
            } else if (!allTextures.empty()) {
                tex = allTextures[0];
                texFailed = !textureLoadFailed.empty() && textureLoadFailed[0];
                if (!textureKeysLower.empty()) {
                    batchTexKeyLower = textureKeysLower[0];
                }
            }

            if (texFailed && groundDetailModel) {
                static const std::string kDetailFallbackTexture = "World\\NoDXT\\Detail\\8des_detaildoodads01.blp";
                VkTexture* fallbackTex = loadTexture(kDetailFallbackTexture, 0);
                if (fallbackTex != nullptr && fallbackTex != whiteTexture_.get()) {
                    tex = fallbackTex;
                    texFailed = false;
                }
            }
            bgpu.texture = tex;
            const bool exactLanternGlowTexture =
                (batchTexKeyLower == "world\\expansion06\\doodads\\nightelf\\7ne_druid_streetlamp01_light.blp") ||
                (batchTexKeyLower == "world\\generic\\nightelf\\passive doodads\\lamps\\glowblue32.blp") ||
                (batchTexKeyLower == "world\\generic\\human\\passive doodads\\stormwind\\t_vfx_glow01_64.blp") ||
                (batchTexKeyLower == "world\\azeroth\\karazahn\\passivedoodads\\bonfire\\flamelicksmallblue.blp") ||
                (batchTexKeyLower == "world\\generic\\nightelf\\passive doodads\\magicalimplements\\glow.blp");
            const bool texHasGlowToken =
                (batchTexKeyLower.find("glow") != std::string::npos) ||
                (batchTexKeyLower.find("flare") != std::string::npos) ||
                (batchTexKeyLower.find("halo") != std::string::npos) ||
                (batchTexKeyLower.find("light") != std::string::npos);
            const bool texHasFlameToken =
                (batchTexKeyLower.find("flame") != std::string::npos) ||
                (batchTexKeyLower.find("fire") != std::string::npos) ||
                (batchTexKeyLower.find("flamelick") != std::string::npos) ||
                (batchTexKeyLower.find("ember") != std::string::npos);
            const bool texGlowCardToken =
                (batchTexKeyLower.find("glow") != std::string::npos) ||
                (batchTexKeyLower.find("flamelick") != std::string::npos) ||
                (batchTexKeyLower.find("lensflare") != std::string::npos) ||
                (batchTexKeyLower.find("t_vfx") != std::string::npos) ||
                (batchTexKeyLower.find("lightbeam") != std::string::npos) ||
                (batchTexKeyLower.find("glowball") != std::string::npos) ||
                (batchTexKeyLower.find("genericglow") != std::string::npos);
            const bool texLikelyFlame =
                (batchTexKeyLower.find("fire") != std::string::npos) ||
                (batchTexKeyLower.find("flame") != std::string::npos) ||
                (batchTexKeyLower.find("torch") != std::string::npos);
            const bool texLanternFamily =
                (batchTexKeyLower.find("lantern") != std::string::npos) ||
                (batchTexKeyLower.find("lamp") != std::string::npos) ||
                (batchTexKeyLower.find("elf") != std::string::npos) ||
                (batchTexKeyLower.find("silvermoon") != std::string::npos) ||
                (batchTexKeyLower.find("quel") != std::string::npos) ||
                (batchTexKeyLower.find("thalas") != std::string::npos);
            const bool modelLanternFamily =
                (lowerName.find("lantern") != std::string::npos) ||
                (lowerName.find("lamp") != std::string::npos) ||
                (lowerName.find("light") != std::string::npos);
            bgpu.lanternGlowHint =
                exactLanternGlowTexture ||
                ((texHasGlowToken || (modelLanternFamily && texHasFlameToken)) &&
                 (texLanternFamily || modelLanternFamily) &&
                 (!texLikelyFlame || modelLanternFamily));
            bgpu.glowCardLike = bgpu.lanternGlowHint && texGlowCardToken;
            const bool texCoolTint =
                (batchTexKeyLower.find("blue") != std::string::npos) ||
                (batchTexKeyLower.find("nightelf") != std::string::npos) ||
                (batchTexKeyLower.find("arcane") != std::string::npos);
            const bool texRedTint =
                (batchTexKeyLower.find("red") != std::string::npos) ||
                (batchTexKeyLower.find("scarlet") != std::string::npos) ||
                (batchTexKeyLower.find("ruby") != std::string::npos);
            bgpu.glowTint = texCoolTint ? 1 : (texRedTint ? 2 : 0);
            bool texHasAlpha = false;
            if (tex != nullptr && tex != whiteTexture_.get()) {
                auto ait = textureHasAlphaByPtr_.find(tex);
                texHasAlpha = (ait != textureHasAlphaByPtr_.end()) ? ait->second : false;
            }
            bgpu.hasAlpha = texHasAlpha;
            bool colorKeyBlack = false;
            if (tex != nullptr && tex != whiteTexture_.get()) {
                auto cit = textureColorKeyBlackByPtr_.find(tex);
                colorKeyBlack = (cit != textureColorKeyBlackByPtr_.end()) ? cit->second : false;
            }
            bgpu.colorKeyBlack = colorKeyBlack;
            // textureCoordIndex is an index into a texture coord combo table, not directly
            // a UV set selector. Most batches have index=0 (UV set 0). We always use UV set 0
            // since we don't have the full combo table — dual-UV effects are rare edge cases.
            bgpu.textureUnit = 0;

            // Batch is hidden only when its named texture failed to load (avoids white shell artifacts).
            // Do NOT bake transparency/color animation tracks here — they animate over time and
            // baking the first keyframe value causes legitimate meshes to become invisible.
            // Keep terrain clutter visible even when source texture paths are malformed.
            bgpu.batchOpacity = (texFailed && !groundDetailModel) ? 0.0f : 1.0f;

            // Compute batch center and radius for glow sprite positioning
            if ((bgpu.blendMode >= 3 || bgpu.colorKeyBlack) && batch.indexCount > 0) {
                glm::vec3 sum(0.0f);
                uint32_t counted = 0;
                for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
                    if (j < model.indices.size()) {
                        uint16_t vi = model.indices[j];
                        if (vi < model.vertices.size()) {
                            sum += model.vertices[vi].position;
                            counted++;
                        }
                    }
                }
                if (counted > 0) {
                    bgpu.center = sum / static_cast<float>(counted);
                    float maxDist = 0.0f;
                    for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
                        if (j < model.indices.size()) {
                            uint16_t vi = model.indices[j];
                            if (vi < model.vertices.size()) {
                                float d = glm::length(model.vertices[vi].position - bgpu.center);
                                maxDist = std::max(maxDist, d);
                            }
                        }
                    }
                    bgpu.glowSize = std::max(maxDist, 0.5f);
                }
            }

            // Optional diagnostics for glow/light batches (disabled by default).
            if (kGlowDiag &&
                (lowerName.find("light") != std::string::npos ||
                 lowerName.find("lamp") != std::string::npos ||
                 lowerName.find("lantern") != std::string::npos)) {
                LOG_DEBUG("M2 GLOW DIAG '", model.name, "' batch ", gpuModel.batches.size(),
                          ": blend=", bgpu.blendMode, " matFlags=0x",
                          std::hex, bgpu.materialFlags, std::dec,
                          " colorKey=", bgpu.colorKeyBlack ? "Y" : "N",
                          " hasAlpha=", bgpu.hasAlpha ? "Y" : "N",
                          " unlit=", (bgpu.materialFlags & 0x01) ? "Y" : "N",
                          " lanternHint=", bgpu.lanternGlowHint ? "Y" : "N",
                          " glowSize=", bgpu.glowSize,
                          " tex=", bgpu.texture,
                          " idxCount=", bgpu.indexCount);
            }
            gpuModel.batches.push_back(bgpu);
        }
    } else {
        // Fallback: single batch covering all indices with first texture
        M2ModelGPU::BatchGPU bgpu;
        bgpu.indexStart = 0;
        bgpu.indexCount = gpuModel.indexCount;
        bgpu.texture = allTextures.empty() ? whiteTexture_.get() : allTextures[0];
        bool texHasAlpha = false;
        if (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) {
            auto ait = textureHasAlphaByPtr_.find(bgpu.texture);
            texHasAlpha = (ait != textureHasAlphaByPtr_.end()) ? ait->second : false;
        }
        bgpu.hasAlpha = texHasAlpha;
        bool colorKeyBlack = false;
        if (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) {
            auto cit = textureColorKeyBlackByPtr_.find(bgpu.texture);
            colorKeyBlack = (cit != textureColorKeyBlackByPtr_.end()) ? cit->second : false;
        }
        bgpu.colorKeyBlack = colorKeyBlack;
        gpuModel.batches.push_back(bgpu);
    }

    // Detect particle emitter volume models: box mesh (24 verts, 36 indices)
    // with disproportionately large bounds. These are invisible bounding volumes
    // that only exist to spawn particles — their mesh should never be rendered.
    if (!isInvisibleTrap && !groundDetailModel &&
        gpuModel.vertexCount <= 24 && gpuModel.indexCount <= 36
        && !model.particleEmitters.empty()) {
        glm::vec3 size = gpuModel.boundMax - gpuModel.boundMin;
        float maxDim = std::max({size.x, size.y, size.z});
        if (maxDim > 5.0f) {
            gpuModel.isInvisibleTrap = true;
            LOG_DEBUG("M2 emitter volume hidden: '", model.name, "' size=(",
                      size.x, " x ", size.y, " x ", size.z, ")");
        }
    }

    vkCtx_->endUploadBatch();

    // Allocate Vulkan descriptor sets and UBOs for each batch
    for (auto& bgpu : gpuModel.batches) {
        // Create combined UBO for M2Params (binding 1) + M2Material (binding 2)
        // We allocate them as separate buffers for clarity
        VmaAllocationInfo matAllocInfo{};
        {
            VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
            bci.size = sizeof(M2MaterialUBO);
            bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
            VmaAllocationCreateInfo aci{};
            aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
            aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
            vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &bgpu.materialUBO, &bgpu.materialUBOAlloc, &matAllocInfo);

            // Write initial material data (static per-batch — fadeAlpha/interiorDarken updated at draw time)
            M2MaterialUBO mat{};
            mat.hasTexture = (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) ? 1 : 0;
            mat.alphaTest = (bgpu.blendMode == 1 || (bgpu.blendMode >= 2 && !bgpu.hasAlpha)) ? 1 : 0;
            mat.colorKeyBlack = bgpu.colorKeyBlack ? 1 : 0;
            mat.colorKeyThreshold = 0.08f;
            mat.unlit = (bgpu.materialFlags & 0x01) ? 1 : 0;
            mat.blendMode = bgpu.blendMode;
            mat.fadeAlpha = 1.0f;
            mat.interiorDarken = 0.0f;
            mat.specularIntensity = 0.5f;
            memcpy(matAllocInfo.pMappedData, &mat, sizeof(mat));
            bgpu.materialUBOMapped = matAllocInfo.pMappedData;
        }

        // Allocate descriptor set and write all bindings
        bgpu.materialSet = allocateMaterialSet();
        if (bgpu.materialSet) {
            VkTexture* batchTex = bgpu.texture ? bgpu.texture : whiteTexture_.get();
            VkDescriptorImageInfo imgInfo = batchTex->descriptorInfo();

            VkDescriptorBufferInfo matBufInfo{};
            matBufInfo.buffer = bgpu.materialUBO;
            matBufInfo.offset = 0;
            matBufInfo.range = sizeof(M2MaterialUBO);

            VkWriteDescriptorSet writes[2] = {};
            // binding 0: texture
            writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
            writes[0].dstSet = bgpu.materialSet;
            writes[0].dstBinding = 0;
            writes[0].descriptorCount = 1;
            writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
            writes[0].pImageInfo = &imgInfo;
            // binding 2: M2Material UBO
            writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
            writes[1].dstSet = bgpu.materialSet;
            writes[1].dstBinding = 2;
            writes[1].descriptorCount = 1;
            writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
            writes[1].pBufferInfo = &matBufInfo;

            vkUpdateDescriptorSets(vkCtx_->getDevice(), 2, writes, 0, nullptr);
        }
    }

    // Pre-compute available LOD levels to avoid per-instance batch iteration
    gpuModel.availableLODs = 0;
    for (const auto& b : gpuModel.batches) {
        if (b.submeshLevel < 8) gpuModel.availableLODs |= (1u << b.submeshLevel);
    }

    models[modelId] = std::move(gpuModel);

    LOG_DEBUG("Loaded M2 model: ", model.name, " (", models[modelId].vertexCount, " vertices, ",
              models[modelId].indexCount / 3, " triangles, ", models[modelId].batches.size(), " batches)");


    return true;
}

uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
                                     const glm::vec3& rotation, float scale) {
    auto modelIt = models.find(modelId);
    if (modelIt == models.end()) {
        LOG_WARNING("Cannot create instance: model ", modelId, " not loaded");
        return 0;
    }
    const auto& mdlRef = modelIt->second;

    // Deduplicate: skip if same model already at nearly the same position.
    // Uses hash map for O(1) lookup instead of O(N) scan.
    if (!mdlRef.isGroundDetail) {
        DedupKey dk{modelId,
                    static_cast<int32_t>(std::round(position.x * 10.0f)),
                    static_cast<int32_t>(std::round(position.y * 10.0f)),
                    static_cast<int32_t>(std::round(position.z * 10.0f))};
        auto dit = instanceDedupMap_.find(dk);
        if (dit != instanceDedupMap_.end()) {
            return dit->second;
        }
    }

    M2Instance instance;
    instance.id = nextInstanceId++;
    instance.modelId = modelId;
    instance.position = position;
    if (mdlRef.isGroundDetail) {
        instance.position.z -= computeGroundDetailDownOffset(mdlRef, scale);
    }
    instance.rotation = rotation;
    instance.scale = scale;
    instance.updateModelMatrix();
    glm::vec3 localMin, localMax;
    getTightCollisionBounds(mdlRef, localMin, localMax);
    transformAABB(instance.modelMatrix, localMin, localMax, instance.worldBoundsMin, instance.worldBoundsMax);

    // Cache model flags on instance to avoid per-frame hash lookups
    instance.cachedHasAnimation = mdlRef.hasAnimation;
    instance.cachedDisableAnimation = mdlRef.disableAnimation;
    instance.cachedIsSmoke = mdlRef.isSmoke;
    instance.cachedHasParticleEmitters = !mdlRef.particleEmitters.empty();
    instance.cachedBoundRadius = mdlRef.boundRadius;
    instance.cachedIsGroundDetail = mdlRef.isGroundDetail;
    instance.cachedIsInvisibleTrap = mdlRef.isInvisibleTrap;
    instance.cachedIsInstancePortal = mdlRef.isInstancePortal;
    instance.cachedIsValid = mdlRef.isValid();
    instance.cachedModel = &mdlRef;

    // Initialize animation: play first sequence (usually Stand/Idle)
    const auto& mdl = mdlRef;
    if (mdl.hasAnimation && !mdl.disableAnimation && !mdl.sequences.empty()) {
        instance.currentSequenceIndex = 0;
        instance.idleSequenceIndex = 0;
        instance.animDuration = static_cast<float>(mdl.sequences[0].duration);
        instance.animTime = static_cast<float>(rand() % std::max(1u, mdl.sequences[0].duration));
        instance.variationTimer = 3000.0f + static_cast<float>(rand() % 8000);

        // Seed bone matrices from an existing instance of the same model so the
        // new instance renders immediately instead of being invisible until the
        // next update() computes bones (prevents pop-in flash).
        for (const auto& existing : instances) {
            if (existing.modelId == modelId && !existing.boneMatrices.empty()) {
                instance.boneMatrices = existing.boneMatrices;
                instance.bonesDirty = true;
                break;
            }
        }
        // If no sibling exists yet, compute bones immediately
        if (instance.boneMatrices.empty()) {
            computeBoneMatrices(mdlRef, instance);
        }
    }

    // Register in dedup map before pushing (uses original position, not ground-adjusted)
    if (!mdlRef.isGroundDetail) {
        DedupKey dk{modelId,
                    static_cast<int32_t>(std::round(position.x * 10.0f)),
                    static_cast<int32_t>(std::round(position.y * 10.0f)),
                    static_cast<int32_t>(std::round(position.z * 10.0f))};
        instanceDedupMap_[dk] = instance.id;
    }

    instances.push_back(instance);
    size_t idx = instances.size() - 1;
    // Track special instances for fast-path iteration
    if (mdlRef.isSmoke) {
        smokeInstanceIndices_.push_back(idx);
    }
    if (mdlRef.isInstancePortal) {
        portalInstanceIndices_.push_back(idx);
    }
    if (!mdlRef.particleEmitters.empty()) {
        particleInstanceIndices_.push_back(idx);
    }
    if (mdlRef.hasAnimation && !mdlRef.disableAnimation) {
        animatedInstanceIndices_.push_back(idx);
    } else if (!mdlRef.particleEmitters.empty()) {
        particleOnlyInstanceIndices_.push_back(idx);
    }
    instanceIndexById[instance.id] = idx;
    GridCell minCell = toCell(instance.worldBoundsMin);
    GridCell maxCell = toCell(instance.worldBoundsMax);
    for (int z = minCell.z; z <= maxCell.z; z++) {
        for (int y = minCell.y; y <= maxCell.y; y++) {
            for (int x = minCell.x; x <= maxCell.x; x++) {
                spatialGrid[GridCell{x, y, z}].push_back(instance.id);
            }
        }
    }

    return instance.id;
}

uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4& modelMatrix,
                                                const glm::vec3& position) {
    if (models.find(modelId) == models.end()) {
        LOG_WARNING("Cannot create instance: model ", modelId, " not loaded");
        return 0;
    }

    // Deduplicate: O(1) hash lookup
    {
        DedupKey dk{modelId,
                    static_cast<int32_t>(std::round(position.x * 10.0f)),
                    static_cast<int32_t>(std::round(position.y * 10.0f)),
                    static_cast<int32_t>(std::round(position.z * 10.0f))};
        auto dit = instanceDedupMap_.find(dk);
        if (dit != instanceDedupMap_.end()) {
            return dit->second;
        }
    }

    M2Instance instance;
    instance.id = nextInstanceId++;
    instance.modelId = modelId;
    instance.position = position;  // Used for frustum culling
    instance.rotation = glm::vec3(0.0f);
    instance.scale = 1.0f;
    instance.modelMatrix = modelMatrix;
    instance.invModelMatrix = glm::inverse(modelMatrix);
    glm::vec3 localMin, localMax;
    getTightCollisionBounds(models[modelId], localMin, localMax);
    transformAABB(instance.modelMatrix, localMin, localMax, instance.worldBoundsMin, instance.worldBoundsMax);
    // Cache model flags on instance to avoid per-frame hash lookups
    const auto& mdl2 = models[modelId];
    instance.cachedHasAnimation = mdl2.hasAnimation;
    instance.cachedDisableAnimation = mdl2.disableAnimation;
    instance.cachedIsSmoke = mdl2.isSmoke;
    instance.cachedHasParticleEmitters = !mdl2.particleEmitters.empty();
    instance.cachedBoundRadius = mdl2.boundRadius;
    instance.cachedIsGroundDetail = mdl2.isGroundDetail;
    instance.cachedIsInvisibleTrap = mdl2.isInvisibleTrap;
    instance.cachedIsValid = mdl2.isValid();
    instance.cachedModel = &mdl2;

    // Initialize animation
    if (mdl2.hasAnimation && !mdl2.disableAnimation && !mdl2.sequences.empty()) {
        instance.currentSequenceIndex = 0;
        instance.idleSequenceIndex = 0;
        instance.animDuration = static_cast<float>(mdl2.sequences[0].duration);
        instance.animTime = static_cast<float>(rand() % std::max(1u, mdl2.sequences[0].duration));
        instance.variationTimer = 3000.0f + static_cast<float>(rand() % 8000);

        // Seed bone matrices from an existing sibling so the instance renders immediately
        for (const auto& existing : instances) {
            if (existing.modelId == modelId && !existing.boneMatrices.empty()) {
                instance.boneMatrices = existing.boneMatrices;
                instance.bonesDirty = true;
                break;
            }
        }
        if (instance.boneMatrices.empty()) {
            computeBoneMatrices(mdl2, instance);
        }
    } else {
        instance.animTime = static_cast<float>(rand()) / RAND_MAX * 10000.0f;
    }

    // Register in dedup map
    {
        DedupKey dk{modelId,
                    static_cast<int32_t>(std::round(position.x * 10.0f)),
                    static_cast<int32_t>(std::round(position.y * 10.0f)),
                    static_cast<int32_t>(std::round(position.z * 10.0f))};
        instanceDedupMap_[dk] = instance.id;
    }

    instances.push_back(instance);
    size_t idx = instances.size() - 1;
    if (mdl2.isSmoke) {
        smokeInstanceIndices_.push_back(idx);
    }
    if (!mdl2.particleEmitters.empty()) {
        particleInstanceIndices_.push_back(idx);
    }
    if (mdl2.hasAnimation && !mdl2.disableAnimation) {
        animatedInstanceIndices_.push_back(idx);
    } else if (!mdl2.particleEmitters.empty()) {
        particleOnlyInstanceIndices_.push_back(idx);
    }
    instanceIndexById[instance.id] = idx;
    GridCell minCell = toCell(instance.worldBoundsMin);
    GridCell maxCell = toCell(instance.worldBoundsMax);
    for (int z = minCell.z; z <= maxCell.z; z++) {
        for (int y = minCell.y; y <= maxCell.y; y++) {
            for (int x = minCell.x; x <= maxCell.x; x++) {
                spatialGrid[GridCell{x, y, z}].push_back(instance.id);
            }
        }
    }

    return instance.id;
}

// --- Bone animation helpers (same logic as CharacterRenderer) ---

static int findKeyframeIndex(const std::vector<uint32_t>& timestamps, float time) {
    if (timestamps.empty()) return -1;
    if (timestamps.size() == 1) return 0;
    // Binary search using float comparison to match original semantics exactly
    auto it = std::upper_bound(timestamps.begin(), timestamps.end(), time,
        [](float t, uint32_t ts) { return t < static_cast<float>(ts); });
    if (it == timestamps.begin()) return 0;
    size_t idx = static_cast<size_t>(it - timestamps.begin()) - 1;
    return static_cast<int>(std::min(idx, timestamps.size() - 2));
}

// Resolve sequence index and time for a track, handling global sequences.
static void resolveTrackTime(const pipeline::M2AnimationTrack& track,
                              int seqIdx, float time,
                              const std::vector<uint32_t>& globalSeqDurations,
                              int& outSeqIdx, float& outTime) {
    if (track.globalSequence >= 0 &&
        static_cast<size_t>(track.globalSequence) < globalSeqDurations.size()) {
        // Global sequence: always use sub-array 0, wrap time at global duration
        outSeqIdx = 0;
        float dur = static_cast<float>(globalSeqDurations[track.globalSequence]);
        outTime = (dur > 0.0f) ? std::fmod(time, dur) : 0.0f;
    } else {
        outSeqIdx = seqIdx;
        outTime = time;
    }
}

static glm::vec3 interpVec3(const pipeline::M2AnimationTrack& track,
                             int seqIdx, float time, const glm::vec3& def,
                             const std::vector<uint32_t>& globalSeqDurations) {
    if (!track.hasData()) return def;
    int si; float t;
    resolveTrackTime(track, seqIdx, time, globalSeqDurations, si, t);
    if (si < 0 || si >= static_cast<int>(track.sequences.size())) return def;
    const auto& keys = track.sequences[si];
    if (keys.timestamps.empty() || keys.vec3Values.empty()) return def;
    auto safe = [&](const glm::vec3& v) -> glm::vec3 {
        if (std::isnan(v.x) || std::isnan(v.y) || std::isnan(v.z)) return def;
        return v;
    };
    if (keys.vec3Values.size() == 1) return safe(keys.vec3Values[0]);
    int idx = findKeyframeIndex(keys.timestamps, t);
    if (idx < 0) return def;
    size_t i0 = static_cast<size_t>(idx);
    size_t i1 = std::min(i0 + 1, keys.vec3Values.size() - 1);
    if (i0 == i1) return safe(keys.vec3Values[i0]);
    float t0 = static_cast<float>(keys.timestamps[i0]);
    float t1 = static_cast<float>(keys.timestamps[i1]);
    float dur = t1 - t0;
    float frac = (dur > 0.0f) ? glm::clamp((t - t0) / dur, 0.0f, 1.0f) : 0.0f;
    return safe(glm::mix(keys.vec3Values[i0], keys.vec3Values[i1], frac));
}

static glm::quat interpQuat(const pipeline::M2AnimationTrack& track,
                              int seqIdx, float time,
                              const std::vector<uint32_t>& globalSeqDurations) {
    glm::quat identity(1.0f, 0.0f, 0.0f, 0.0f);
    if (!track.hasData()) return identity;
    int si; float t;
    resolveTrackTime(track, seqIdx, time, globalSeqDurations, si, t);
    if (si < 0 || si >= static_cast<int>(track.sequences.size())) return identity;
    const auto& keys = track.sequences[si];
    if (keys.timestamps.empty() || keys.quatValues.empty()) return identity;
    auto safe = [&](const glm::quat& q) -> glm::quat {
        float lenSq = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
        if (lenSq < 0.000001f || std::isnan(lenSq)) return identity;
        return q;
    };
    if (keys.quatValues.size() == 1) return safe(keys.quatValues[0]);
    int idx = findKeyframeIndex(keys.timestamps, t);
    if (idx < 0) return identity;
    size_t i0 = static_cast<size_t>(idx);
    size_t i1 = std::min(i0 + 1, keys.quatValues.size() - 1);
    if (i0 == i1) return safe(keys.quatValues[i0]);
    float t0 = static_cast<float>(keys.timestamps[i0]);
    float t1 = static_cast<float>(keys.timestamps[i1]);
    float dur = t1 - t0;
    float frac = (dur > 0.0f) ? glm::clamp((t - t0) / dur, 0.0f, 1.0f) : 0.0f;
    return glm::slerp(safe(keys.quatValues[i0]), safe(keys.quatValues[i1]), frac);
}

static void computeBoneMatrices(const M2ModelGPU& model, M2Instance& instance) {
    size_t numBones = std::min(model.bones.size(), size_t(128));
    if (numBones == 0) return;
    instance.boneMatrices.resize(numBones);
    const auto& gsd = model.globalSequenceDurations;

    for (size_t i = 0; i < numBones; i++) {
        const auto& bone = model.bones[i];
        glm::vec3 trans = interpVec3(bone.translation, instance.currentSequenceIndex, instance.animTime, glm::vec3(0.0f), gsd);
        glm::quat rot = interpQuat(bone.rotation, instance.currentSequenceIndex, instance.animTime, gsd);
        glm::vec3 scl = interpVec3(bone.scale, instance.currentSequenceIndex, instance.animTime, glm::vec3(1.0f), gsd);

        // Sanity check scale to avoid degenerate matrices
        if (scl.x < 0.001f) scl.x = 1.0f;
        if (scl.y < 0.001f) scl.y = 1.0f;
        if (scl.z < 0.001f) scl.z = 1.0f;

        glm::mat4 local = glm::translate(glm::mat4(1.0f), bone.pivot);
        local = glm::translate(local, trans);
        local *= glm::toMat4(rot);
        local = glm::scale(local, scl);
        local = glm::translate(local, -bone.pivot);

        if (bone.parentBone >= 0 && static_cast<size_t>(bone.parentBone) < numBones) {
            instance.boneMatrices[i] = instance.boneMatrices[bone.parentBone] * local;
        } else {
            instance.boneMatrices[i] = local;
        }
    }
    instance.bonesDirty = true;
}

void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::mat4& viewProjection) {
    if (spatialIndexDirty_) {
        rebuildSpatialIndex();
    }

    float dtMs = deltaTime * 1000.0f;

    // Cache camera state for frustum-culling bone computation
    cachedCamPos_ = cameraPos;
    const float maxRenderDistance = (instances.size() > 2000) ? 800.0f : 2800.0f;
    cachedMaxRenderDistSq_ = maxRenderDistance * maxRenderDistance;

    // Build frustum for culling bones
    Frustum updateFrustum;
    updateFrustum.extractFromMatrix(viewProjection);

    // --- Smoke particle spawning (only iterate tracked smoke instances) ---
    std::uniform_real_distribution<float> distXY(-0.4f, 0.4f);
    std::uniform_real_distribution<float> distVelXY(-0.3f, 0.3f);
    std::uniform_real_distribution<float> distVelZ(3.0f, 5.0f);
    std::uniform_real_distribution<float> distLife(4.0f, 7.0f);
    std::uniform_real_distribution<float> distDrift(-0.2f, 0.2f);

    smokeEmitAccum += deltaTime;
    float emitInterval = 1.0f / 8.0f;  // 8 particles per second per emitter

    if (smokeEmitAccum >= emitInterval &&
        static_cast<int>(smokeParticles.size()) < MAX_SMOKE_PARTICLES) {
        for (size_t si : smokeInstanceIndices_) {
            if (si >= instances.size()) continue;
            auto& instance = instances[si];

            glm::vec3 emitWorld = glm::vec3(instance.modelMatrix * glm::vec4(0.0f, 0.0f, 0.0f, 1.0f));
            bool spark = (smokeRng() % 8 == 0);

            SmokeParticle p;
            p.position = emitWorld + glm::vec3(distXY(smokeRng), distXY(smokeRng), 0.0f);
            if (spark) {
                p.velocity = glm::vec3(distVelXY(smokeRng) * 2.0f, distVelXY(smokeRng) * 2.0f, distVelZ(smokeRng) * 1.5f);
                p.maxLife = 0.8f + static_cast<float>(smokeRng() % 100) / 100.0f * 1.2f;
                p.size = 0.5f;
                p.isSpark = 1.0f;
            } else {
                p.velocity = glm::vec3(distVelXY(smokeRng), distVelXY(smokeRng), distVelZ(smokeRng));
                p.maxLife = distLife(smokeRng);
                p.size = 1.0f;
                p.isSpark = 0.0f;
            }
            p.life = 0.0f;
            p.instanceId = instance.id;
            smokeParticles.push_back(p);
            if (static_cast<int>(smokeParticles.size()) >= MAX_SMOKE_PARTICLES) break;
        }
        smokeEmitAccum = 0.0f;
    }

    // --- Update existing smoke particles (swap-and-pop for O(1) removal) ---
    for (size_t i = 0; i < smokeParticles.size(); ) {
        auto& p = smokeParticles[i];
        p.life += deltaTime;
        if (p.life >= p.maxLife) {
            smokeParticles[i] = smokeParticles.back();
            smokeParticles.pop_back();
            continue;
        }
        p.position += p.velocity * deltaTime;
        p.velocity.z *= 0.98f;  // Slight deceleration
        p.velocity.x += distDrift(smokeRng) * deltaTime;
        p.velocity.y += distDrift(smokeRng) * deltaTime;
        // Grow from 1.0 to 3.5 over lifetime
        float t = p.life / p.maxLife;
        p.size = 1.0f + t * 2.5f;
        ++i;
    }

    // --- Spin instance portals ---
    static constexpr float PORTAL_SPIN_SPEED = 1.2f; // radians/sec
    for (size_t idx : portalInstanceIndices_) {
        if (idx >= instances.size()) continue;
        auto& inst = instances[idx];
        inst.portalSpinAngle += PORTAL_SPIN_SPEED * deltaTime;
        if (inst.portalSpinAngle > 6.2831853f)
            inst.portalSpinAngle -= 6.2831853f;
        inst.rotation.z = inst.portalSpinAngle;
        inst.updateModelMatrix();
    }

    // --- Normal M2 animation update ---
    // Advance animTime for ALL instances (needed for texture UV animation on static doodads).
    // This is a tight loop touching only one float per instance — no hash lookups.
    for (auto& instance : instances) {
        instance.animTime += dtMs;
    }
    // Wrap animTime for particle-only instances so emission rate tracks keep looping
    for (size_t idx : particleOnlyInstanceIndices_) {
        if (idx >= instances.size()) continue;
        auto& instance = instances[idx];
        if (instance.animTime > 3333.0f) {
            instance.animTime = std::fmod(instance.animTime, 3333.0f);
        }
    }

    boneWorkIndices_.clear();
    boneWorkIndices_.reserve(animatedInstanceIndices_.size());

    // Update animated instances (full animation state + bone computation culling)
    // Note: animTime was already advanced by dtMs in the global loop above.
    // Here we apply the speed factor: subtract the base dtMs and add dtMs*speed.
    for (size_t idx : animatedInstanceIndices_) {
        if (idx >= instances.size()) continue;
        auto& instance = instances[idx];

        instance.animTime += dtMs * (instance.animSpeed - 1.0f);

        // For animation looping/variation, we need the actual model data.
        if (!instance.cachedModel) continue;
        const M2ModelGPU& model = *instance.cachedModel;

        // Validate sequence index
        if (instance.currentSequenceIndex < 0 ||
            instance.currentSequenceIndex >= static_cast<int>(model.sequences.size())) {
            instance.currentSequenceIndex = 0;
            if (!model.sequences.empty()) {
                instance.animDuration = static_cast<float>(model.sequences[0].duration);
            }
        }

        // Handle animation looping / variation transitions
        if (instance.animDuration <= 0.0f && instance.cachedHasParticleEmitters) {
            instance.animDuration = 3333.0f;
        }
        if (instance.animDuration > 0.0f && instance.animTime >= instance.animDuration) {
            if (instance.playingVariation) {
                instance.playingVariation = false;
                instance.currentSequenceIndex = instance.idleSequenceIndex;
                if (instance.idleSequenceIndex < static_cast<int>(model.sequences.size())) {
                    instance.animDuration = static_cast<float>(model.sequences[instance.idleSequenceIndex].duration);
                }
                instance.animTime = 0.0f;
                instance.variationTimer = 4000.0f + static_cast<float>(rand() % 6000);
            } else {
                instance.animTime = std::fmod(instance.animTime, std::max(1.0f, instance.animDuration));
            }
        }

        // Idle variation timer
        if (!instance.playingVariation && model.idleVariationIndices.size() > 1) {
            instance.variationTimer -= dtMs;
            if (instance.variationTimer <= 0.0f) {
                int pick = rand() % static_cast<int>(model.idleVariationIndices.size());
                int newSeq = model.idleVariationIndices[pick];
                if (newSeq != instance.currentSequenceIndex && newSeq < static_cast<int>(model.sequences.size())) {
                    instance.playingVariation = true;
                    instance.currentSequenceIndex = newSeq;
                    instance.animDuration = static_cast<float>(model.sequences[newSeq].duration);
                    instance.animTime = 0.0f;
                } else {
                    instance.variationTimer = 2000.0f + static_cast<float>(rand() % 4000);
                }
            }
        }

        // Frustum + distance cull: skip expensive bone computation for off-screen instances.
        float worldRadius = instance.cachedBoundRadius * instance.scale;
        float cullRadius = worldRadius;
        glm::vec3 toCam = instance.position - cachedCamPos_;
        float distSq = glm::dot(toCam, toCam);
        float effectiveMaxDistSq = cachedMaxRenderDistSq_ * std::max(1.0f, cullRadius / 12.0f);
        if (distSq > effectiveMaxDistSq) continue;
        float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f);
        if (cullRadius > 0.0f && !updateFrustum.intersectsSphere(instance.position, paddedRadius)) continue;

        // Distance-based frame skipping: update distant bones less frequently
        uint32_t boneInterval = 1;
        if (distSq > 200.0f * 200.0f) boneInterval = 8;
        else if (distSq > 100.0f * 100.0f) boneInterval = 4;
        else if (distSq > 50.0f * 50.0f) boneInterval = 2;
        instance.frameSkipCounter++;
        if ((instance.frameSkipCounter % boneInterval) != 0) continue;

        boneWorkIndices_.push_back(idx);
    }

    // Phase 2: Compute bone matrices (expensive, parallel if enough work)
    const size_t animCount = boneWorkIndices_.size();
    if (animCount > 0) {
        static const size_t minParallelAnimInstances = std::max<size_t>(
            8, envSizeOrDefault("WOWEE_M2_ANIM_MT_MIN", 96));
        if (animCount < minParallelAnimInstances || numAnimThreads_ <= 1) {
            // Sequential — not enough work to justify thread overhead
            for (size_t i : boneWorkIndices_) {
                if (i >= instances.size()) continue;
                auto& inst = instances[i];
                if (!inst.cachedModel) continue;
                computeBoneMatrices(*inst.cachedModel, inst);
            }
        } else {
            // Parallel — dispatch across worker threads
            static const size_t minAnimWorkPerThread = std::max<size_t>(
                16, envSizeOrDefault("WOWEE_M2_ANIM_WORK_PER_THREAD", 64));
            const size_t maxUsefulThreads = std::max<size_t>(
                1, (animCount + minAnimWorkPerThread - 1) / minAnimWorkPerThread);
            const size_t numThreads = std::min(static_cast<size_t>(numAnimThreads_), maxUsefulThreads);
            if (numThreads <= 1) {
                for (size_t i : boneWorkIndices_) {
                    if (i >= instances.size()) continue;
                    auto& inst = instances[i];
                    if (!inst.cachedModel) continue;
                    computeBoneMatrices(*inst.cachedModel, inst);
                }
            } else {
                const size_t chunkSize = animCount / numThreads;
                const size_t remainder = animCount % numThreads;

                // Reuse persistent futures vector to avoid allocation
                animFutures_.clear();
                if (animFutures_.capacity() < numThreads) {
                    animFutures_.reserve(numThreads);
                }

                size_t start = 0;
                for (size_t t = 0; t < numThreads; ++t) {
                    size_t end = start + chunkSize + (t < remainder ? 1 : 0);
                    animFutures_.push_back(std::async(std::launch::async,
                        [this, start, end]() {
                            for (size_t j = start; j < end; ++j) {
                                size_t idx = boneWorkIndices_[j];
                                if (idx >= instances.size()) continue;
                                auto& inst = instances[idx];
                                if (!inst.cachedModel) continue;
                                computeBoneMatrices(*inst.cachedModel, inst);
                            }
                        }));
                    start = end;
                }

                for (auto& f : animFutures_) {
                    f.get();
                }
            }
        }
    }

    // Phase 3: Particle update (sequential — uses RNG, not thread-safe)
    // Only iterate instances that have particle emitters (pre-built list).
    for (size_t idx : particleInstanceIndices_) {
        if (idx >= instances.size()) continue;
        auto& instance = instances[idx];
        // Distance cull: only update particles within visible range
        glm::vec3 toCam = instance.position - cachedCamPos_;
        float distSq = glm::dot(toCam, toCam);
        if (distSq > cachedMaxRenderDistSq_) continue;
        if (!instance.cachedModel) continue;
        emitParticles(instance, *instance.cachedModel, deltaTime);
        updateParticles(instance, deltaTime);
    }

}

void M2Renderer::prepareRender(uint32_t frameIndex, const Camera& camera) {
    if (!initialized_ || instances.empty()) return;
    (void)camera;  // reserved for future frustum-based culling

    // Pre-allocate bone SSBOs + descriptor sets on main thread (pool ops not thread-safe).
    // Only iterate animated instances — static doodads don't need bone buffers.
    for (size_t idx : animatedInstanceIndices_) {
        if (idx >= instances.size()) continue;
        auto& instance = instances[idx];

        if (instance.boneMatrices.empty()) continue;

        if (!instance.boneBuffer[frameIndex]) {
            VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
            bci.size = 128 * sizeof(glm::mat4);
            bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
            VmaAllocationCreateInfo aci{};
            aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
            aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
            VmaAllocationInfo allocInfo{};
            vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci,
                            &instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo);
            instance.boneMapped[frameIndex] = allocInfo.pMappedData;

            instance.boneSet[frameIndex] = allocateBoneSet();
            if (instance.boneSet[frameIndex]) {
                VkDescriptorBufferInfo bufInfo{};
                bufInfo.buffer = instance.boneBuffer[frameIndex];
                bufInfo.offset = 0;
                bufInfo.range = bci.size;
                VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
                write.dstSet = instance.boneSet[frameIndex];
                write.dstBinding = 0;
                write.descriptorCount = 1;
                write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
                write.pBufferInfo = &bufInfo;
                vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr);
            }
        }
    }
}

void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera) {
    if (instances.empty() || !opaquePipeline_) {
        return;
    }

    // Debug: log once when we start rendering
    static bool loggedOnce = false;
    if (!loggedOnce) {
        loggedOnce = true;
        LOG_INFO("M2 render: ", instances.size(), " instances, ", models.size(), " models");
    }

    // Build frustum for culling
    const glm::mat4 view = camera.getViewMatrix();
    const glm::mat4 projection = camera.getProjectionMatrix();
    Frustum frustum;
    frustum.extractFromMatrix(projection * view);

    // Reuse persistent buffers (clear instead of reallocating)
    glowSprites_.clear();

    lastDrawCallCount = 0;

    // Adaptive render distance: smoothed to prevent pop-in/pop-out flickering
    const float targetRenderDist = (instances.size() > 2000) ? 300.0f
                                 : (instances.size() > 1000) ? 500.0f
                                 : 1000.0f;
    // Smooth transitions: shrink slowly (avoid popping out nearby objects)
    const float shrinkRate = 0.005f;  // very slow decrease
    const float growRate = 0.05f;     // faster increase
    float blendRate = (targetRenderDist < smoothedRenderDist_) ? shrinkRate : growRate;
    smoothedRenderDist_ = glm::mix(smoothedRenderDist_, targetRenderDist, blendRate);
    const float maxRenderDistance = smoothedRenderDist_;
    const float maxRenderDistanceSq = maxRenderDistance * maxRenderDistance;
    const float fadeStartFraction = 0.75f;
    const glm::vec3 camPos = camera.getPosition();

    // Build sorted visible instance list: cull then sort by modelId to batch VAO binds
    // Reuse persistent vector to avoid allocation
    sortedVisible_.clear();
    // Reserve based on expected visible count (roughly 30% of total instances in dense areas)
    const size_t expectedVisible = std::min(instances.size() / 3, size_t(600));
    if (sortedVisible_.capacity() < expectedVisible) {
        sortedVisible_.reserve(expectedVisible);
    }

    // Early distance rejection: max possible render distance (tight but safe upper bound)
    const float maxPossibleDistSq = maxRenderDistance * maxRenderDistance * 4.0f;  // 2x safety margin (reduced from 4x)

    for (uint32_t i = 0; i < static_cast<uint32_t>(instances.size()); ++i) {
        const auto& instance = instances[i];

        // Use cached model flags — no hash lookup needed
        if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;

        glm::vec3 toCam = instance.position - camPos;
        float distSq = glm::dot(toCam, toCam);
        if (distSq > maxPossibleDistSq) continue;

        float worldRadius = instance.cachedBoundRadius * instance.scale;
        float cullRadius = worldRadius;
        if (instance.cachedDisableAnimation) {
            cullRadius = std::max(cullRadius, 3.0f);
        }
        float effectiveMaxDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f);
        if (instance.cachedDisableAnimation) {
            effectiveMaxDistSq *= 2.6f;
        }
        if (instance.cachedIsGroundDetail) {
            effectiveMaxDistSq *= 0.75f;
        }

        if (distSq > effectiveMaxDistSq) continue;

        // Frustum cull with padding
        float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f);
        if (cullRadius > 0.0f && !frustum.intersectsSphere(instance.position, paddedRadius)) continue;

        sortedVisible_.push_back({i, instance.modelId, distSq, effectiveMaxDistSq});
    }

    // Sort by modelId to minimize vertex/index buffer rebinds
    std::sort(sortedVisible_.begin(), sortedVisible_.end(),
              [](const VisibleEntry& a, const VisibleEntry& b) { return a.modelId < b.modelId; });

    uint32_t currentModelId = UINT32_MAX;
    const M2ModelGPU* currentModel = nullptr;

    // State tracking
    VkPipeline currentPipeline = VK_NULL_HANDLE;
    uint32_t frameIndex = vkCtx_->getCurrentFrame();

    // Push constants struct matching m2.vert.glsl push_constant block
    struct M2PushConstants {
        glm::mat4 model;
        glm::vec2 uvOffset;
        int texCoordSet;
        int useBones;
        int isFoliage;
        float fadeAlpha;
    };

    // Bind per-frame descriptor set (set 0) — shared across all draws
    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                            pipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);

    // Start with opaque pipeline
    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, opaquePipeline_);
    currentPipeline = opaquePipeline_;

    for (const auto& entry : sortedVisible_) {
        if (entry.index >= instances.size()) continue;
        auto& instance = instances[entry.index];

        // Bind vertex + index buffers once per model group
        if (entry.modelId != currentModelId) {
            currentModelId = entry.modelId;
            auto mdlIt = models.find(currentModelId);
            if (mdlIt == models.end()) continue;
            currentModel = &mdlIt->second;
            if (!currentModel->vertexBuffer) continue;
            VkDeviceSize offset = 0;
            vkCmdBindVertexBuffers(cmd, 0, 1, &currentModel->vertexBuffer, &offset);
            vkCmdBindIndexBuffer(cmd, currentModel->indexBuffer, 0, VK_INDEX_TYPE_UINT16);
        }

        const M2ModelGPU& model = *currentModel;

        // Distance-based fade alpha for smooth pop-in (squared-distance, no sqrt)
        float fadeAlpha = 1.0f;
        float fadeFrac = model.disableAnimation ? 0.55f : fadeStartFraction;
        float fadeStartDistSq = entry.effectiveMaxDistSq * fadeFrac * fadeFrac;
        if (entry.distSq > fadeStartDistSq) {
            fadeAlpha = std::clamp((entry.effectiveMaxDistSq - entry.distSq) /
                                  (entry.effectiveMaxDistSq - fadeStartDistSq), 0.0f, 1.0f);
        }

        float instanceFadeAlpha = fadeAlpha;
        if (model.isGroundDetail) {
            instanceFadeAlpha *= 0.82f;
        }
        if (model.isInstancePortal) {
            // Render mesh at low alpha + emit glow sprite at center
            instanceFadeAlpha *= 0.12f;
            if (entry.distSq < 400.0f * 400.0f) {
                glm::vec3 center = glm::vec3(instance.modelMatrix * glm::vec4(0.0f, 0.0f, 0.0f, 1.0f));
                GlowSprite gs;
                gs.worldPos = center;
                gs.color = glm::vec4(0.35f, 0.5f, 1.0f, 1.1f);
                gs.size = instance.scale * 5.0f;
                glowSprites_.push_back(gs);
                GlowSprite halo = gs;
                halo.color.a *= 0.3f;
                halo.size *= 2.2f;
                glowSprites_.push_back(halo);
            }
        }

        // Upload bone matrices to SSBO if model has skeletal animation.
        // Skip animated instances entirely until bones are computed + buffers allocated
        // to prevent bind-pose/T-pose flash on first appearance.
        bool modelNeedsAnimation = model.hasAnimation && !model.disableAnimation;
        if (modelNeedsAnimation && instance.boneMatrices.empty()) {
            continue;  // Bones not yet computed — skip to avoid bind-pose flash
        }
        bool needsBones = modelNeedsAnimation && !instance.boneMatrices.empty();
        if (needsBones && (!instance.boneBuffer[frameIndex] || !instance.boneSet[frameIndex])) {
            continue;  // Bone buffers not yet allocated — skip to avoid bind-pose flash
        }
        bool useBones = needsBones;
        if (useBones) {
            // Upload bone matrices only when recomputed (skip frame-skipped instances)
            if (instance.bonesDirty && instance.boneMapped[frameIndex]) {
                int numBones = std::min(static_cast<int>(instance.boneMatrices.size()), 128);
                memcpy(instance.boneMapped[frameIndex], instance.boneMatrices.data(),
                       numBones * sizeof(glm::mat4));
                instance.bonesDirty = false;
            }

            // Bind bone descriptor set (set 2)
            if (instance.boneSet[frameIndex]) {
                vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                                        pipelineLayout_, 2, 1, &instance.boneSet[frameIndex], 0, nullptr);
            }
        }

        // LOD selection based on squared distance (avoid sqrt)
        uint16_t desiredLOD = 0;
        if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3;
        else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2;
        else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1;

        uint16_t targetLOD = desiredLOD;
        if (desiredLOD > 0 && !(model.availableLODs & (1u << desiredLOD))) {
            targetLOD = 0;
        }

        const bool foliageLikeModel = model.isFoliageLike;
        // Particle-dominant spell effects: mesh is emission geometry, render dim
        const bool particleDominantEffect = model.isSpellEffect &&
            !model.particleEmitters.empty() && model.batches.size() <= 2;
        for (const auto& batch : model.batches) {
            if (batch.indexCount == 0) continue;
            if (!model.isGroundDetail && batch.submeshLevel != targetLOD) continue;
            if (batch.batchOpacity < 0.01f) continue;

            const bool koboldFlameCard = batch.colorKeyBlack && model.isKoboldFlame;
            const bool smallCardLikeBatch =
                (batch.glowSize <= 1.35f) ||
                (batch.lanternGlowHint && batch.glowSize <= 6.0f);
            const bool batchUnlit = (batch.materialFlags & 0x01) != 0;
            const bool elvenLikeModel = model.isElvenLike;
            const bool lanternLikeModel = model.isLanternLike;
            const bool shouldUseGlowSprite =
                !koboldFlameCard &&
                (elvenLikeModel || (lanternLikeModel && batch.lanternGlowHint)) &&
                !model.isSpellEffect &&
                smallCardLikeBatch &&
                (batch.lanternGlowHint ||
                 (batch.blendMode >= 3) ||
                 (batch.colorKeyBlack && batchUnlit && batch.blendMode >= 1));
            if (shouldUseGlowSprite) {
                if (entry.distSq < 180.0f * 180.0f) {
                    glm::vec3 worldPos = glm::vec3(instance.modelMatrix * glm::vec4(batch.center, 1.0f));
                    GlowSprite gs;
                    gs.worldPos = worldPos;
                    if (batch.glowTint == 1 || elvenLikeModel) {
                        gs.color = glm::vec4(0.48f, 0.72f, 1.0f, 1.05f);
                    } else if (batch.glowTint == 2) {
                        gs.color = glm::vec4(1.0f, 0.28f, 0.22f, 1.10f);
                    } else {
                        gs.color = glm::vec4(1.0f, 0.82f, 0.46f, 1.15f);
                    }
                    gs.size = batch.glowSize * instance.scale * 1.45f;
                    glowSprites_.push_back(gs);
                    GlowSprite halo = gs;
                    halo.color.a *= 0.42f;
                    halo.size *= 1.8f;
                    glowSprites_.push_back(halo);
                }
                const bool cardLikeSkipMesh =
                    (batch.blendMode >= 3) ||
                    batch.colorKeyBlack ||
                    ((batch.materialFlags & 0x01) != 0);
                if ((batch.glowCardLike && lanternLikeModel) ||
                    (cardLikeSkipMesh && !lanternLikeModel)) {
                    continue;
                }
            }

            // Compute UV offset for texture animation
            glm::vec2 uvOffset(0.0f, 0.0f);
            if (batch.textureAnimIndex != 0xFFFF && model.hasTextureAnimation) {
                uint16_t lookupIdx = batch.textureAnimIndex;
                if (lookupIdx < model.textureTransformLookup.size()) {
                    uint16_t transformIdx = model.textureTransformLookup[lookupIdx];
                    if (transformIdx < model.textureTransforms.size()) {
                        const auto& tt = model.textureTransforms[transformIdx];
                        glm::vec3 trans = interpVec3(tt.translation,
                            instance.currentSequenceIndex, instance.animTime,
                            glm::vec3(0.0f), model.globalSequenceDurations);
                        uvOffset = glm::vec2(trans.x, trans.y);
                    }
                }
            }
            // Lava M2 models: fallback UV scroll if no texture animation
            if (model.isLavaModel && uvOffset == glm::vec2(0.0f)) {
                static auto startTime = std::chrono::steady_clock::now();
                float t = std::chrono::duration<float>(std::chrono::steady_clock::now() - startTime).count();
                uvOffset = glm::vec2(t * 0.03f, -t * 0.08f);
            }

            // Foliage/card-like batches render more stably as cutout (depth-write on)
            // instead of alpha-blended sorting.
            const bool foliageCutout =
                foliageLikeModel &&
                !model.isSpellEffect &&
                batch.blendMode <= 3;
            const bool forceCutout =
                !model.isSpellEffect &&
                (model.isGroundDetail ||
                 foliageCutout ||
                 batch.blendMode == 1 ||
                 (batch.blendMode >= 2 && !batch.hasAlpha) ||
                 batch.colorKeyBlack);

            // Select pipeline based on blend mode
            uint8_t effectiveBlendMode = batch.blendMode;
            if (model.isSpellEffect) {
                // Effect models: force additive blend for opaque/cutout batches
                // so the mesh renders as a transparent glow, not a solid object
                if (effectiveBlendMode <= 1) {
                    effectiveBlendMode = 3;  // additive
                } else if (effectiveBlendMode == 4 || effectiveBlendMode == 5) {
                    effectiveBlendMode = 3;
                }
            }
            if (forceCutout) {
                effectiveBlendMode = 1;
            }

            VkPipeline desiredPipeline;
            if (forceCutout) {
                // Use opaque pipeline + shader discard for stable foliage cards.
                desiredPipeline = opaquePipeline_;
            } else {
                switch (effectiveBlendMode) {
                    case 0: desiredPipeline = opaquePipeline_; break;
                    case 1: desiredPipeline = alphaTestPipeline_; break;
                    case 2: desiredPipeline = alphaPipeline_; break;
                    default: desiredPipeline = additivePipeline_; break;
                }
            }
            if (desiredPipeline != currentPipeline) {
                vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, desiredPipeline);
                currentPipeline = desiredPipeline;
            }

            // Update material UBO with per-draw dynamic values (interiorDarken, forceCutout overrides)
            // Note: fadeAlpha is in push constants (per-draw) to avoid shared-UBO race
            if (batch.materialUBOMapped) {
                auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
                mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
                if (batch.colorKeyBlack) {
                    mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
                }
                if (forceCutout) {
                    mat->alphaTest = model.isGroundDetail ? 3 : (foliageCutout ? 2 : 1);
                    if (model.isGroundDetail) {
                        mat->unlit = 0;
                    }
                }
            }

            // Bind material descriptor set (set 1) — skip batch if missing
            // to avoid inheriting a stale descriptor set from a prior renderer
            if (!batch.materialSet) continue;
            vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                                    pipelineLayout_, 1, 1, &batch.materialSet, 0, nullptr);

            // Push constants
            M2PushConstants pc;
            pc.model = instance.modelMatrix;
            pc.uvOffset = uvOffset;
            pc.texCoordSet = static_cast<int>(batch.textureUnit);
            pc.useBones = useBones ? 1 : 0;
            pc.isFoliage = model.shadowWindFoliage ? 1 : 0;
            pc.fadeAlpha = instanceFadeAlpha;
            // Particle-dominant effects: mesh is emission geometry, don't render
            if (particleDominantEffect && batch.blendMode <= 1) {
                continue;
            }
            vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc);

            vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
            lastDrawCallCount++;
        }
    }

    // Render glow sprites as billboarded additive point lights
    if (!glowSprites_.empty() && particleAdditivePipeline_ && glowVB_ && glowTexDescSet_) {
        vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, particleAdditivePipeline_);
        vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                                particlePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);
        vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                                particlePipelineLayout_, 1, 1, &glowTexDescSet_, 0, nullptr);

        // Push constants for particle: tileCount(vec2) + alphaKey(int)
        struct { float tileX, tileY; int alphaKey; } particlePush = {1.0f, 1.0f, 0};
        vkCmdPushConstants(cmd, particlePipelineLayout_, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
                           sizeof(particlePush), &particlePush);

        // Write glow vertex data directly to mapped buffer (no temp vector)
        size_t uploadCount = std::min(glowSprites_.size(), MAX_GLOW_SPRITES);
        float* dst = static_cast<float*>(glowVBMapped_);
        for (size_t gi = 0; gi < uploadCount; gi++) {
            const auto& gs = glowSprites_[gi];
            *dst++ = gs.worldPos.x;
            *dst++ = gs.worldPos.y;
            *dst++ = gs.worldPos.z;
            *dst++ = gs.color.r;
            *dst++ = gs.color.g;
            *dst++ = gs.color.b;
            *dst++ = gs.color.a;
            *dst++ = gs.size;
            *dst++ = 0.0f;
        }

        VkDeviceSize offset = 0;
        vkCmdBindVertexBuffers(cmd, 0, 1, &glowVB_, &offset);
        vkCmdDraw(cmd, static_cast<uint32_t>(uploadCount), 1, 0, 0);
    }

}

bool M2Renderer::initializeShadow(VkRenderPass shadowRenderPass) {
    if (!vkCtx_ || shadowRenderPass == VK_NULL_HANDLE) return false;
    VkDevice device = vkCtx_->getDevice();

    // ShadowParams UBO: useBones, useTexture, alphaTest, foliageSway, windTime, foliageMotionDamp
    struct ShadowParamsUBO {
        int32_t useBones = 0;
        int32_t useTexture = 0;
        int32_t alphaTest = 0;
        int32_t foliageSway = 0;
        float windTime = 0.0f;
        float foliageMotionDamp = 1.0f;
    };

    // Create ShadowParams UBO
    VkBufferCreateInfo bufCI{};
    bufCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
    bufCI.size = sizeof(ShadowParamsUBO);
    bufCI.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
    VmaAllocationCreateInfo allocCI{};
    allocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
    allocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
    VmaAllocationInfo allocInfo{};
    if (vmaCreateBuffer(vkCtx_->getAllocator(), &bufCI, &allocCI,
            &shadowParamsUBO_, &shadowParamsAlloc_, &allocInfo) != VK_SUCCESS) {
        LOG_ERROR("M2Renderer: failed to create shadow params UBO");
        return false;
    }
    ShadowParamsUBO defaultParams{};
    std::memcpy(allocInfo.pMappedData, &defaultParams, sizeof(defaultParams));

    // Create descriptor set layout: binding 0 = sampler2D, binding 1 = ShadowParams UBO
    VkDescriptorSetLayoutBinding layoutBindings[2]{};
    layoutBindings[0].binding = 0;
    layoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
    layoutBindings[0].descriptorCount = 1;
    layoutBindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
    layoutBindings[1].binding = 1;
    layoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
    layoutBindings[1].descriptorCount = 1;
    layoutBindings[1].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
    VkDescriptorSetLayoutCreateInfo layoutCI{};
    layoutCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
    layoutCI.bindingCount = 2;
    layoutCI.pBindings = layoutBindings;
    if (vkCreateDescriptorSetLayout(device, &layoutCI, nullptr, &shadowParamsLayout_) != VK_SUCCESS) {
        LOG_ERROR("M2Renderer: failed to create shadow params layout");
        return false;
    }

    // Create descriptor pool
    VkDescriptorPoolSize poolSizes[2]{};
    poolSizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
    poolSizes[0].descriptorCount = 1;
    poolSizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
    poolSizes[1].descriptorCount = 1;
    VkDescriptorPoolCreateInfo poolCI{};
    poolCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
    poolCI.maxSets = 1;
    poolCI.poolSizeCount = 2;
    poolCI.pPoolSizes = poolSizes;
    if (vkCreateDescriptorPool(device, &poolCI, nullptr, &shadowParamsPool_) != VK_SUCCESS) {
        LOG_ERROR("M2Renderer: failed to create shadow params pool");
        return false;
    }

    // Allocate descriptor set
    VkDescriptorSetAllocateInfo setAlloc{};
    setAlloc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
    setAlloc.descriptorPool = shadowParamsPool_;
    setAlloc.descriptorSetCount = 1;
    setAlloc.pSetLayouts = &shadowParamsLayout_;
    if (vkAllocateDescriptorSets(device, &setAlloc, &shadowParamsSet_) != VK_SUCCESS) {
        LOG_ERROR("M2Renderer: failed to allocate shadow params set");
        return false;
    }

    // Write descriptors (use white fallback for binding 0)
    VkDescriptorBufferInfo bufInfo{};
    bufInfo.buffer = shadowParamsUBO_;
    bufInfo.offset = 0;
    bufInfo.range = sizeof(ShadowParamsUBO);

    VkDescriptorImageInfo imgInfo{};
    imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
    imgInfo.imageView = whiteTexture_->getImageView();
    imgInfo.sampler = whiteTexture_->getSampler();

    VkWriteDescriptorSet writes[2]{};
    writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
    writes[0].dstSet = shadowParamsSet_;
    writes[0].dstBinding = 0;
    writes[0].descriptorCount = 1;
    writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
    writes[0].pImageInfo = &imgInfo;
    writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
    writes[1].dstSet = shadowParamsSet_;
    writes[1].dstBinding = 1;
    writes[1].descriptorCount = 1;
    writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
    writes[1].pBufferInfo = &bufInfo;
    vkUpdateDescriptorSets(device, 2, writes, 0, nullptr);

    // Per-frame pool for foliage shadow texture sets (reset each frame)
    {
        VkDescriptorPoolSize texPoolSizes[2]{};
        texPoolSizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
        texPoolSizes[0].descriptorCount = 256;
        texPoolSizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
        texPoolSizes[1].descriptorCount = 256;
        VkDescriptorPoolCreateInfo texPoolCI{};
        texPoolCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
        texPoolCI.maxSets = 256;
        texPoolCI.poolSizeCount = 2;
        texPoolCI.pPoolSizes = texPoolSizes;
        if (vkCreateDescriptorPool(device, &texPoolCI, nullptr, &shadowTexPool_) != VK_SUCCESS) {
            LOG_ERROR("M2Renderer: failed to create shadow texture pool");
            return false;
        }
    }

    // Create shadow pipeline layout: set 1 = shadowParamsLayout_, push constants = 128 bytes
    VkPushConstantRange pc{};
    pc.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
    pc.offset = 0;
    pc.size = 128;  // lightSpaceMatrix (64) + model (64)
    shadowPipelineLayout_ = createPipelineLayout(device, {shadowParamsLayout_}, {pc});
    if (!shadowPipelineLayout_) {
        LOG_ERROR("M2Renderer: failed to create shadow pipeline layout");
        return false;
    }

    // Load shadow shaders
    VkShaderModule vertShader, fragShader;
    if (!vertShader.loadFromFile(device, "assets/shaders/shadow.vert.spv")) {
        LOG_ERROR("M2Renderer: failed to load shadow vertex shader");
        return false;
    }
    if (!fragShader.loadFromFile(device, "assets/shaders/shadow.frag.spv")) {
        LOG_ERROR("M2Renderer: failed to load shadow fragment shader");
        return false;
    }

    // M2 vertex layout: 18 floats = 72 bytes stride
    // loc0=pos(off0), loc1=normal(off12), loc2=texCoord0(off24), loc5=texCoord1(off32),
    // loc3=boneWeights(off40), loc4=boneIndices(off56)
    // Shadow shader locations: 0=aPos, 1=aTexCoord, 2=aBoneWeights, 3=aBoneIndicesF
    // useBones=0 so locations 2,3 are never used
    VkVertexInputBindingDescription vertBind{};
    vertBind.binding = 0;
    vertBind.stride = 18 * sizeof(float);
    vertBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
    std::vector<VkVertexInputAttributeDescription> vertAttrs = {
        {0, 0, VK_FORMAT_R32G32B32_SFLOAT,    0},                     // aPos       -> position
        {1, 0, VK_FORMAT_R32G32_SFLOAT,       6 * sizeof(float)},     // aTexCoord  -> texCoord0
        {2, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 10 * sizeof(float)},    // aBoneWeights
        {3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)},    // aBoneIndicesF
    };

    shadowPipeline_ = PipelineBuilder()
        .setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
                    fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
        .setVertexInput({vertBind}, vertAttrs)
        .setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
        // Foliage/leaf cards are effectively two-sided; front-face culling can
        // drop them from the shadow map depending on light/view orientation.
        .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
        .setDepthTest(true, true, VK_COMPARE_OP_LESS_OR_EQUAL)
        .setDepthBias(0.05f, 0.20f)
        .setNoColorAttachment()
        .setLayout(shadowPipelineLayout_)
        .setRenderPass(shadowRenderPass)
        .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
        .build(device);

    vertShader.destroy();
    fragShader.destroy();

    if (!shadowPipeline_) {
        LOG_ERROR("M2Renderer: failed to create shadow pipeline");
        return false;
    }
    LOG_INFO("M2Renderer shadow pipeline initialized");
    return true;
}

void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMatrix, float globalTime,
                              const glm::vec3& shadowCenter, float shadowRadius) {
    if (!shadowPipeline_ || !shadowParamsSet_) return;
    if (instances.empty() || models.empty()) return;

    struct ShadowPush { glm::mat4 lightSpaceMatrix; glm::mat4 model; };
    struct ShadowParamsUBO {
        int32_t useBones = 0;
        int32_t useTexture = 0;
        int32_t alphaTest = 0;
        int32_t foliageSway = 0;
        float windTime = 0.0f;
        float foliageMotionDamp = 1.0f;
    };
    const float shadowRadiusSq = shadowRadius * shadowRadius;

    // Reset per-frame texture descriptor pool for foliage alpha-test sets
    if (shadowTexPool_) {
        vkResetDescriptorPool(vkCtx_->getDevice(), shadowTexPool_, 0);
    }
    // Cache: texture imageView -> allocated descriptor set (avoids duplicates within frame)
    std::unordered_map<VkImageView, VkDescriptorSet> texSetCache;

    auto getTexDescSet = [&](VkTexture* tex) -> VkDescriptorSet {
        VkImageView iv = tex->getImageView();
        auto cacheIt = texSetCache.find(iv);
        if (cacheIt != texSetCache.end()) return cacheIt->second;

        VkDescriptorSet set = VK_NULL_HANDLE;
        VkDescriptorSetAllocateInfo ai{};
        ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
        ai.descriptorPool = shadowTexPool_;
        ai.descriptorSetCount = 1;
        ai.pSetLayouts = &shadowParamsLayout_;
        if (vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set) != VK_SUCCESS) {
            return shadowParamsSet_; // fallback to white texture
        }
        VkDescriptorImageInfo imgInfo{};
        imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
        imgInfo.imageView = iv;
        imgInfo.sampler = tex->getSampler();
        VkDescriptorBufferInfo bufInfo{};
        bufInfo.buffer = shadowParamsUBO_;
        bufInfo.offset = 0;
        bufInfo.range = sizeof(ShadowParamsUBO);
        VkWriteDescriptorSet writes[2]{};
        writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
        writes[0].dstSet = set;
        writes[0].dstBinding = 0;
        writes[0].descriptorCount = 1;
        writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
        writes[0].pImageInfo = &imgInfo;
        writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
        writes[1].dstSet = set;
        writes[1].dstBinding = 1;
        writes[1].descriptorCount = 1;
        writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
        writes[1].pBufferInfo = &bufInfo;
        vkUpdateDescriptorSets(vkCtx_->getDevice(), 2, writes, 0, nullptr);
        texSetCache[iv] = set;
        return set;
    };

    // Helper lambda to draw instances with a given foliageSway setting
    auto drawPass = [&](bool foliagePass) {
        ShadowParamsUBO params{};
        params.foliageSway = foliagePass ? 1 : 0;
        params.windTime = globalTime;
        params.foliageMotionDamp = 1.0f;
        // For foliage pass: enable texture+alphaTest in UBO (per-batch textures bound below)
        if (foliagePass) {
            params.useTexture = 1;
            params.alphaTest = 1;
        }

        VmaAllocationInfo allocInfo{};
        vmaGetAllocationInfo(vkCtx_->getAllocator(), shadowParamsAlloc_, &allocInfo);
        std::memcpy(allocInfo.pMappedData, &params, sizeof(params));

        vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, shadowPipeline_);
        vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, shadowPipelineLayout_,
            0, 1, &shadowParamsSet_, 0, nullptr);

        uint32_t currentModelId = UINT32_MAX;
        const M2ModelGPU* currentModel = nullptr;

        for (const auto& instance : instances) {
            // Use cached flags to skip early without hash lookup
            if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;

            // Distance cull against shadow frustum
            glm::vec3 diff = instance.position - shadowCenter;
            if (glm::dot(diff, diff) > shadowRadiusSq) continue;

            if (!instance.cachedModel) continue;
            const M2ModelGPU& model = *instance.cachedModel;

            // Filter: only draw foliage models in foliage pass, non-foliage in non-foliage pass
            if (model.shadowWindFoliage != foliagePass) continue;

            // Bind vertex/index buffers when model changes
            if (instance.modelId != currentModelId) {
                currentModelId = instance.modelId;
                currentModel = &model;
                VkDeviceSize offset = 0;
                vkCmdBindVertexBuffers(cmd, 0, 1, &currentModel->vertexBuffer, &offset);
                vkCmdBindIndexBuffer(cmd, currentModel->indexBuffer, 0, VK_INDEX_TYPE_UINT16);
            }

            ShadowPush push{lightSpaceMatrix, instance.modelMatrix};
            vkCmdPushConstants(cmd, shadowPipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT,
                               0, 128, &push);

            for (const auto& batch : model.batches) {
                if (batch.submeshLevel > 0) continue;
                // For foliage: bind per-batch texture for alpha-tested shadows
                if (foliagePass && batch.hasAlpha && batch.texture) {
                    VkDescriptorSet texSet = getTexDescSet(batch.texture);
                    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, shadowPipelineLayout_,
                        0, 1, &texSet, 0, nullptr);
                } else if (foliagePass) {
                    // Non-alpha batch: rebind default set (white texture, alpha test passes)
                    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, shadowPipelineLayout_,
                        0, 1, &shadowParamsSet_, 0, nullptr);
                }
                vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
            }
        }
    };

    // Pass 1: non-foliage (no wind displacement)
    drawPass(false);
    // Pass 2: foliage (wind displacement enabled, per-batch alpha-tested textures)
    drawPass(true);
}

// --- M2 Particle Emitter Helpers ---

float M2Renderer::interpFloat(const pipeline::M2AnimationTrack& track, float animTime,
                                int seqIdx, const std::vector<pipeline::M2Sequence>& /*seqs*/,
                                const std::vector<uint32_t>& globalSeqDurations) {
    if (!track.hasData()) return 0.0f;
    int si; float t;
    resolveTrackTime(track, seqIdx, animTime, globalSeqDurations, si, t);
    if (si < 0 || si >= static_cast<int>(track.sequences.size())) return 0.0f;
    const auto& keys = track.sequences[si];
    if (keys.timestamps.empty() || keys.floatValues.empty()) return 0.0f;
    if (keys.floatValues.size() == 1) return keys.floatValues[0];
    int idx = findKeyframeIndex(keys.timestamps, t);
    if (idx < 0) return 0.0f;
    size_t i0 = static_cast<size_t>(idx);
    size_t i1 = std::min(i0 + 1, keys.floatValues.size() - 1);
    if (i0 == i1) return keys.floatValues[i0];
    float t0 = static_cast<float>(keys.timestamps[i0]);
    float t1 = static_cast<float>(keys.timestamps[i1]);
    float dur = t1 - t0;
    float frac = (dur > 0.0f) ? glm::clamp((t - t0) / dur, 0.0f, 1.0f) : 0.0f;
    return glm::mix(keys.floatValues[i0], keys.floatValues[i1], frac);
}

float M2Renderer::interpFBlockFloat(const pipeline::M2FBlock& fb, float lifeRatio) {
    if (fb.floatValues.empty()) return 1.0f;
    if (fb.floatValues.size() == 1 || fb.timestamps.empty()) return fb.floatValues[0];
    lifeRatio = glm::clamp(lifeRatio, 0.0f, 1.0f);
    // Find surrounding timestamps
    for (size_t i = 0; i < fb.timestamps.size() - 1; i++) {
        if (lifeRatio <= fb.timestamps[i + 1]) {
            float t0 = fb.timestamps[i];
            float t1 = fb.timestamps[i + 1];
            float dur = t1 - t0;
            float frac = (dur > 0.0f) ? (lifeRatio - t0) / dur : 0.0f;
            size_t v0 = std::min(i, fb.floatValues.size() - 1);
            size_t v1 = std::min(i + 1, fb.floatValues.size() - 1);
            return glm::mix(fb.floatValues[v0], fb.floatValues[v1], frac);
        }
    }
    return fb.floatValues.back();
}

glm::vec3 M2Renderer::interpFBlockVec3(const pipeline::M2FBlock& fb, float lifeRatio) {
    if (fb.vec3Values.empty()) return glm::vec3(1.0f);
    if (fb.vec3Values.size() == 1 || fb.timestamps.empty()) return fb.vec3Values[0];
    lifeRatio = glm::clamp(lifeRatio, 0.0f, 1.0f);
    for (size_t i = 0; i < fb.timestamps.size() - 1; i++) {
        if (lifeRatio <= fb.timestamps[i + 1]) {
            float t0 = fb.timestamps[i];
            float t1 = fb.timestamps[i + 1];
            float dur = t1 - t0;
            float frac = (dur > 0.0f) ? (lifeRatio - t0) / dur : 0.0f;
            size_t v0 = std::min(i, fb.vec3Values.size() - 1);
            size_t v1 = std::min(i + 1, fb.vec3Values.size() - 1);
            return glm::mix(fb.vec3Values[v0], fb.vec3Values[v1], frac);
        }
    }
    return fb.vec3Values.back();
}

std::vector<glm::vec3> M2Renderer::getWaterVegetationPositions(const glm::vec3& camPos, float maxDist) const {
    std::vector<glm::vec3> result;
    float maxDistSq = maxDist * maxDist;
    for (const auto& inst : instances) {
        if (!inst.cachedModel || !inst.cachedModel->isWaterVegetation) continue;
        glm::vec3 diff = inst.position - camPos;
        if (glm::dot(diff, diff) <= maxDistSq) {
            result.push_back(inst.position);
        }
    }
    return result;
}

void M2Renderer::emitParticles(M2Instance& inst, const M2ModelGPU& gpu, float dt) {
    if (inst.emitterAccumulators.size() != gpu.particleEmitters.size()) {
        inst.emitterAccumulators.resize(gpu.particleEmitters.size(), 0.0f);
    }

    std::uniform_real_distribution<float> dist01(0.0f, 1.0f);
    std::uniform_real_distribution<float> distN(-1.0f, 1.0f);
    std::uniform_int_distribution<int> distTile;

    for (size_t ei = 0; ei < gpu.particleEmitters.size(); ei++) {
        const auto& em = gpu.particleEmitters[ei];
        if (!em.enabled) continue;

        float rate = interpFloat(em.emissionRate, inst.animTime, inst.currentSequenceIndex,
                                  gpu.sequences, gpu.globalSequenceDurations);
        float life = interpFloat(em.lifespan, inst.animTime, inst.currentSequenceIndex,
                                  gpu.sequences, gpu.globalSequenceDurations);
        if (rate <= 0.0f || life <= 0.0f) continue;

        inst.emitterAccumulators[ei] += rate * dt;

        while (inst.emitterAccumulators[ei] >= 1.0f && inst.particles.size() < MAX_M2_PARTICLES) {
            inst.emitterAccumulators[ei] -= 1.0f;

            M2Particle p;
            p.emitterIndex = static_cast<int>(ei);
            p.life = 0.0f;
            p.maxLife = life;
            p.tileIndex = 0.0f;

            // Position: emitter position transformed by bone matrix
            glm::vec3 localPos = em.position;
            glm::mat4 boneXform = glm::mat4(1.0f);
            if (em.bone < inst.boneMatrices.size()) {
                boneXform = inst.boneMatrices[em.bone];
            }
            glm::vec3 worldPos = glm::vec3(inst.modelMatrix * boneXform * glm::vec4(localPos, 1.0f));
            p.position = worldPos;

            // Velocity: emission speed in upward direction + random spread
            float speed = interpFloat(em.emissionSpeed, inst.animTime, inst.currentSequenceIndex,
                                       gpu.sequences, gpu.globalSequenceDurations);
            float vRange = interpFloat(em.verticalRange, inst.animTime, inst.currentSequenceIndex,
                                        gpu.sequences, gpu.globalSequenceDurations);
            float hRange = interpFloat(em.horizontalRange, inst.animTime, inst.currentSequenceIndex,
                                        gpu.sequences, gpu.globalSequenceDurations);

            // Base direction: up in model space, transformed to world
            glm::vec3 dir(0.0f, 0.0f, 1.0f);
            // Add random spread
            dir.x += distN(particleRng_) * hRange;
            dir.y += distN(particleRng_) * hRange;
            dir.z += distN(particleRng_) * vRange;
            float len = glm::length(dir);
            if (len > 0.001f) dir /= len;

            // Transform direction by bone + model orientation (rotation only)
            glm::mat3 rotMat = glm::mat3(inst.modelMatrix * boneXform);
            p.velocity = rotMat * dir * speed;

            // When emission speed is ~0 and bone animation isn't loaded (.anim files),
            // particles pile up at the same position. Give them a drift so they
            // spread outward like a mist/spray effect instead of clustering.
            if (std::abs(speed) < 0.01f) {
                if (gpu.isFireflyEffect) {
                    // Fireflies: gentle random drift in all directions
                    p.velocity = rotMat * glm::vec3(
                        distN(particleRng_) * 0.6f,
                        distN(particleRng_) * 0.6f,
                        distN(particleRng_) * 0.3f
                    );
                } else {
                    p.velocity = rotMat * glm::vec3(
                        distN(particleRng_) * 1.0f,
                        distN(particleRng_) * 1.0f,
                        -dist01(particleRng_) * 0.5f
                    );
                }
            }

            const uint32_t tilesX = std::max<uint16_t>(em.textureCols, 1);
            const uint32_t tilesY = std::max<uint16_t>(em.textureRows, 1);
            const uint32_t totalTiles = tilesX * tilesY;
            if ((em.flags & kParticleFlagTiled) && totalTiles > 1) {
                if (em.flags & kParticleFlagRandomized) {
                    distTile = std::uniform_int_distribution<int>(0, static_cast<int>(totalTiles - 1));
                    p.tileIndex = static_cast<float>(distTile(particleRng_));
                } else {
                    p.tileIndex = 0.0f;
                }
            }

            inst.particles.push_back(p);
        }
        // Cap accumulator to avoid bursts after lag
        if (inst.emitterAccumulators[ei] > 2.0f) {
            inst.emitterAccumulators[ei] = 0.0f;
        }
    }
}

void M2Renderer::updateParticles(M2Instance& inst, float dt) {
    if (!inst.cachedModel) return;
    const auto& gpu = *inst.cachedModel;

    for (size_t i = 0; i < inst.particles.size(); ) {
        auto& p = inst.particles[i];
        p.life += dt;
        if (p.life >= p.maxLife) {
            // Swap-and-pop removal
            inst.particles[i] = inst.particles.back();
            inst.particles.pop_back();
            continue;
        }
        // Apply gravity
        if (p.emitterIndex >= 0 && p.emitterIndex < static_cast<int>(gpu.particleEmitters.size())) {
            const auto& pem = gpu.particleEmitters[p.emitterIndex];
            float grav = interpFloat(pem.gravity,
                                      inst.animTime, inst.currentSequenceIndex,
                                      gpu.sequences, gpu.globalSequenceDurations);
            // When M2 gravity is 0, apply default gravity so particles arc downward.
            // Many fountain M2s rely on bone animation (.anim files) we don't load yet.
            // Firefly/ambient glow particles intentionally have zero gravity — skip fallback.
            if (grav == 0.0f && !gpu.isFireflyEffect) {
                float emSpeed = interpFloat(pem.emissionSpeed,
                                             inst.animTime, inst.currentSequenceIndex,
                                             gpu.sequences, gpu.globalSequenceDurations);
                if (std::abs(emSpeed) > 0.1f) {
                    grav = 4.0f;  // spray particles
                } else {
                    grav = 1.5f;  // mist/drift particles - gentler fall
                }
            }
            p.velocity.z -= grav * dt;
        }
        p.position += p.velocity * dt;
        i++;
    }
}

void M2Renderer::renderM2Particles(VkCommandBuffer cmd, VkDescriptorSet perFrameSet) {
    if (!particlePipeline_ || !m2ParticleVB_) return;

    // Collect all particles from all instances, grouped by texture+blend
    struct ParticleGroupKey {
        VkTexture* texture;
        uint8_t blendType;
        uint16_t tilesX;
        uint16_t tilesY;

        bool operator==(const ParticleGroupKey& other) const {
            return texture == other.texture &&
                   blendType == other.blendType &&
                   tilesX == other.tilesX &&
                   tilesY == other.tilesY;
        }
    };
    struct ParticleGroupKeyHash {
        size_t operator()(const ParticleGroupKey& key) const {
            size_t h1 = std::hash<uintptr_t>{}(reinterpret_cast<uintptr_t>(key.texture));
            size_t h2 = std::hash<uint32_t>{}((static_cast<uint32_t>(key.tilesX) << 16) | key.tilesY);
            size_t h3 = std::hash<uint8_t>{}(key.blendType);
            return h1 ^ (h2 * 0x9e3779b9u) ^ (h3 * 0x85ebca6bu);
        }
    };
    struct ParticleGroup {
        VkTexture* texture;
        uint8_t blendType;
        uint16_t tilesX;
        uint16_t tilesY;
        std::vector<float> vertexData;  // 9 floats per particle
    };
    std::unordered_map<ParticleGroupKey, ParticleGroup, ParticleGroupKeyHash> groups;

    size_t totalParticles = 0;

    for (auto& inst : instances) {
        if (inst.particles.empty()) continue;
        if (!inst.cachedModel) continue;
        const auto& gpu = *inst.cachedModel;

        for (const auto& p : inst.particles) {
            if (p.emitterIndex < 0 || p.emitterIndex >= static_cast<int>(gpu.particleEmitters.size())) continue;
            const auto& em = gpu.particleEmitters[p.emitterIndex];

            float lifeRatio = p.life / std::max(p.maxLife, 0.001f);
            glm::vec3 color = interpFBlockVec3(em.particleColor, lifeRatio);
            float alpha = std::min(interpFBlockFloat(em.particleAlpha, lifeRatio), 1.0f);
            float rawScale = interpFBlockFloat(em.particleScale, lifeRatio);

            if (!gpu.isSpellEffect && !gpu.isFireflyEffect) {
                color = glm::mix(color, glm::vec3(1.0f), 0.7f);
                if (rawScale > 2.0f) alpha *= 0.02f;
                if (em.blendingType == 3 || em.blendingType == 4) alpha *= 0.05f;
            }
            float scale = (gpu.isSpellEffect || gpu.isFireflyEffect) ? rawScale : std::min(rawScale, 1.5f);

            VkTexture* tex = whiteTexture_.get();
            if (p.emitterIndex < static_cast<int>(gpu.particleTextures.size())) {
                tex = gpu.particleTextures[p.emitterIndex];
            }

            uint16_t tilesX = std::max<uint16_t>(em.textureCols, 1);
            uint16_t tilesY = std::max<uint16_t>(em.textureRows, 1);
            uint32_t totalTiles = static_cast<uint32_t>(tilesX) * static_cast<uint32_t>(tilesY);
            ParticleGroupKey key{tex, em.blendingType, tilesX, tilesY};
            auto& group = groups[key];
            group.texture = tex;
            group.blendType = em.blendingType;
            group.tilesX = tilesX;
            group.tilesY = tilesY;

            group.vertexData.push_back(p.position.x);
            group.vertexData.push_back(p.position.y);
            group.vertexData.push_back(p.position.z);
            group.vertexData.push_back(color.r);
            group.vertexData.push_back(color.g);
            group.vertexData.push_back(color.b);
            group.vertexData.push_back(alpha);
            group.vertexData.push_back(scale);
            float tileIndex = p.tileIndex;
            if ((em.flags & kParticleFlagTiled) && totalTiles > 1) {
                float animSeconds = inst.animTime / 1000.0f;
                uint32_t animFrame = static_cast<uint32_t>(std::floor(animSeconds * totalTiles)) % totalTiles;
                tileIndex = std::fmod(p.tileIndex + static_cast<float>(animFrame),
                                      static_cast<float>(totalTiles));
            }
            group.vertexData.push_back(tileIndex);
            totalParticles++;
        }
    }

    if (totalParticles == 0) return;

    // Bind per-frame set (set 0) for particle pipeline
    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                            particlePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);

    VkDeviceSize vbOffset = 0;
    vkCmdBindVertexBuffers(cmd, 0, 1, &m2ParticleVB_, &vbOffset);

    VkPipeline currentPipeline = VK_NULL_HANDLE;

    for (auto& [key, group] : groups) {
        if (group.vertexData.empty()) continue;

        uint8_t blendType = group.blendType;
        VkPipeline desiredPipeline = (blendType == 3 || blendType == 4)
            ? particleAdditivePipeline_ : particlePipeline_;
        if (desiredPipeline != currentPipeline) {
            vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, desiredPipeline);
            currentPipeline = desiredPipeline;
        }

        // Allocate descriptor set for this group's texture
        VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
        ai.descriptorPool = materialDescPool_;
        ai.descriptorSetCount = 1;
        ai.pSetLayouts = &particleTexLayout_;
        VkDescriptorSet texSet = VK_NULL_HANDLE;
        if (vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &texSet) == VK_SUCCESS) {
            VkTexture* tex = group.texture ? group.texture : whiteTexture_.get();
            VkDescriptorImageInfo imgInfo = tex->descriptorInfo();
            VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
            write.dstSet = texSet;
            write.dstBinding = 0;
            write.descriptorCount = 1;
            write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
            write.pImageInfo = &imgInfo;
            vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr);

            vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                                    particlePipelineLayout_, 1, 1, &texSet, 0, nullptr);
        }

        // Push constants: tileCount + alphaKey
        struct { float tileX, tileY; int alphaKey; } pc = {
            static_cast<float>(group.tilesX), static_cast<float>(group.tilesY),
            (blendType == 1) ? 1 : 0
        };
        vkCmdPushConstants(cmd, particlePipelineLayout_, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
                           sizeof(pc), &pc);

        // Upload and draw in chunks
        size_t count = group.vertexData.size() / 9;
        size_t offset = 0;
        while (offset < count) {
            size_t batch = std::min(count - offset, MAX_M2_PARTICLES);
            memcpy(m2ParticleVBMapped_, &group.vertexData[offset * 9], batch * 9 * sizeof(float));
            vkCmdDraw(cmd, static_cast<uint32_t>(batch), 1, 0, 0);
            offset += batch;
        }
    }
}

void M2Renderer::renderSmokeParticles(VkCommandBuffer cmd, VkDescriptorSet perFrameSet) {
    if (smokeParticles.empty() || !smokePipeline_ || !smokeVB_) return;

    // Build vertex data: pos(3) + lifeRatio(1) + size(1) + isSpark(1) per particle
    size_t count = std::min(smokeParticles.size(), static_cast<size_t>(MAX_SMOKE_PARTICLES));
    float* dst = static_cast<float*>(smokeVBMapped_);
    for (size_t i = 0; i < count; i++) {
        const auto& p = smokeParticles[i];
        *dst++ = p.position.x;
        *dst++ = p.position.y;
        *dst++ = p.position.z;
        *dst++ = p.life / p.maxLife;
        *dst++ = p.size;
        *dst++ = p.isSpark;
    }

    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, smokePipeline_);
    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                            smokePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);

    // Push constant: screenHeight
    float screenHeight = static_cast<float>(vkCtx_->getSwapchainExtent().height);
    vkCmdPushConstants(cmd, smokePipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0,
                       sizeof(float), &screenHeight);

    VkDeviceSize offset = 0;
    vkCmdBindVertexBuffers(cmd, 0, 1, &smokeVB_, &offset);
    vkCmdDraw(cmd, static_cast<uint32_t>(count), 1, 0, 0);
}

void M2Renderer::setInstancePosition(uint32_t instanceId, const glm::vec3& position) {
    auto idxIt = instanceIndexById.find(instanceId);
    if (idxIt == instanceIndexById.end()) return;
    auto& inst = instances[idxIt->second];

    // Save old grid cells
    GridCell oldMinCell = toCell(inst.worldBoundsMin);
    GridCell oldMaxCell = toCell(inst.worldBoundsMax);

    inst.position = position;
    inst.updateModelMatrix();
    auto modelIt = models.find(inst.modelId);
    if (modelIt != models.end()) {
        glm::vec3 localMin, localMax;
        getTightCollisionBounds(modelIt->second, localMin, localMax);
        transformAABB(inst.modelMatrix, localMin, localMax, inst.worldBoundsMin, inst.worldBoundsMax);
    }

    // Incrementally update spatial grid
    GridCell newMinCell = toCell(inst.worldBoundsMin);
    GridCell newMaxCell = toCell(inst.worldBoundsMax);
    if (oldMinCell.x != newMinCell.x || oldMinCell.y != newMinCell.y || oldMinCell.z != newMinCell.z ||
        oldMaxCell.x != newMaxCell.x || oldMaxCell.y != newMaxCell.y || oldMaxCell.z != newMaxCell.z) {
        for (int z = oldMinCell.z; z <= oldMaxCell.z; z++) {
            for (int y = oldMinCell.y; y <= oldMaxCell.y; y++) {
                for (int x = oldMinCell.x; x <= oldMaxCell.x; x++) {
                    auto it = spatialGrid.find(GridCell{x, y, z});
                    if (it != spatialGrid.end()) {
                        auto& vec = it->second;
                        vec.erase(std::remove(vec.begin(), vec.end(), instanceId), vec.end());
                    }
                }
            }
        }
        for (int z = newMinCell.z; z <= newMaxCell.z; z++) {
            for (int y = newMinCell.y; y <= newMaxCell.y; y++) {
                for (int x = newMinCell.x; x <= newMaxCell.x; x++) {
                    spatialGrid[GridCell{x, y, z}].push_back(instanceId);
                }
            }
        }
    }
}

void M2Renderer::setInstanceAnimationFrozen(uint32_t instanceId, bool frozen) {
    auto idxIt = instanceIndexById.find(instanceId);
    if (idxIt == instanceIndexById.end()) return;
    auto& inst = instances[idxIt->second];
    inst.animSpeed = frozen ? 0.0f : 1.0f;
    if (frozen) {
        inst.animTime = 0.0f;  // Reset to bind pose
    }
}

void M2Renderer::setInstanceTransform(uint32_t instanceId, const glm::mat4& transform) {
    auto idxIt = instanceIndexById.find(instanceId);
    if (idxIt == instanceIndexById.end()) return;
    auto& inst = instances[idxIt->second];

    // Remove old grid cells before updating bounds
    GridCell oldMinCell = toCell(inst.worldBoundsMin);
    GridCell oldMaxCell = toCell(inst.worldBoundsMax);

    // Update model matrix directly
    inst.modelMatrix = transform;
    inst.invModelMatrix = glm::inverse(transform);

    // Extract position from transform for bounds
    inst.position = glm::vec3(transform[3]);

    // Update bounds
    auto modelIt = models.find(inst.modelId);
    if (modelIt != models.end()) {
        glm::vec3 localMin, localMax;
        getTightCollisionBounds(modelIt->second, localMin, localMax);
        transformAABB(inst.modelMatrix, localMin, localMax, inst.worldBoundsMin, inst.worldBoundsMax);
    }

    // Incrementally update spatial grid (remove old cells, add new cells)
    GridCell newMinCell = toCell(inst.worldBoundsMin);
    GridCell newMaxCell = toCell(inst.worldBoundsMax);
    if (oldMinCell.x != newMinCell.x || oldMinCell.y != newMinCell.y || oldMinCell.z != newMinCell.z ||
        oldMaxCell.x != newMaxCell.x || oldMaxCell.y != newMaxCell.y || oldMaxCell.z != newMaxCell.z) {
        // Remove from old cells
        for (int z = oldMinCell.z; z <= oldMaxCell.z; z++) {
            for (int y = oldMinCell.y; y <= oldMaxCell.y; y++) {
                for (int x = oldMinCell.x; x <= oldMaxCell.x; x++) {
                    auto it = spatialGrid.find(GridCell{x, y, z});
                    if (it != spatialGrid.end()) {
                        auto& vec = it->second;
                        vec.erase(std::remove(vec.begin(), vec.end(), instanceId), vec.end());
                    }
                }
            }
        }
        // Add to new cells
        for (int z = newMinCell.z; z <= newMaxCell.z; z++) {
            for (int y = newMinCell.y; y <= newMaxCell.y; y++) {
                for (int x = newMinCell.x; x <= newMaxCell.x; x++) {
                    spatialGrid[GridCell{x, y, z}].push_back(instanceId);
                }
            }
        }
    }
    // No spatialIndexDirty_ = true — handled incrementally
}

void M2Renderer::removeInstance(uint32_t instanceId) {
    for (auto it = instances.begin(); it != instances.end(); ++it) {
        if (it->id == instanceId) {
            destroyInstanceBones(*it);
            instances.erase(it);
            rebuildSpatialIndex();
            return;
        }
    }
}

void M2Renderer::setSkipCollision(uint32_t instanceId, bool skip) {
    for (auto& inst : instances) {
        if (inst.id == instanceId) {
            inst.skipCollision = skip;
            return;
        }
    }
}

void M2Renderer::removeInstances(const std::vector<uint32_t>& instanceIds) {
    if (instanceIds.empty() || instances.empty()) {
        return;
    }

    std::unordered_set<uint32_t> toRemove(instanceIds.begin(), instanceIds.end());
    const size_t oldSize = instances.size();
    for (auto& inst : instances) {
        if (toRemove.count(inst.id)) {
            destroyInstanceBones(inst);
        }
    }
    instances.erase(std::remove_if(instances.begin(), instances.end(),
                   [&toRemove](const M2Instance& inst) {
                       return toRemove.find(inst.id) != toRemove.end();
                   }),
                   instances.end());

    if (instances.size() != oldSize) {
        rebuildSpatialIndex();
    }
}

void M2Renderer::clear() {
    if (vkCtx_) {
        vkDeviceWaitIdle(vkCtx_->getDevice());
        for (auto& [id, model] : models) {
            destroyModelGPU(model);
        }
        for (auto& inst : instances) {
            destroyInstanceBones(inst);
        }
        // Reset descriptor pools so new allocations succeed after reload.
        // destroyModelGPU/destroyInstanceBones don't free individual sets,
        // so the pools fill up across map changes without this reset.
        VkDevice device = vkCtx_->getDevice();
        if (materialDescPool_) {
            vkResetDescriptorPool(device, materialDescPool_, 0);
            // Re-allocate the glow texture descriptor set (pre-allocated during init,
            // invalidated by pool reset).
            if (glowTexture_ && particleTexLayout_) {
                VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
                ai.descriptorPool = materialDescPool_;
                ai.descriptorSetCount = 1;
                ai.pSetLayouts = &particleTexLayout_;
                glowTexDescSet_ = VK_NULL_HANDLE;
                if (vkAllocateDescriptorSets(device, &ai, &glowTexDescSet_) == VK_SUCCESS) {
                    VkDescriptorImageInfo imgInfo = glowTexture_->descriptorInfo();
                    VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
                    write.dstSet = glowTexDescSet_;
                    write.dstBinding = 0;
                    write.descriptorCount = 1;
                    write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
                    write.pImageInfo = &imgInfo;
                    vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
                }
            }
        }
        if (boneDescPool_) {
            vkResetDescriptorPool(device, boneDescPool_, 0);
        }
    }
    models.clear();
    instances.clear();
    spatialGrid.clear();
    instanceIndexById.clear();
    instanceDedupMap_.clear();
    smokeParticles.clear();
    smokeInstanceIndices_.clear();
    portalInstanceIndices_.clear();
    animatedInstanceIndices_.clear();
    particleOnlyInstanceIndices_.clear();
    particleInstanceIndices_.clear();
    smokeEmitAccum = 0.0f;
}

void M2Renderer::setCollisionFocus(const glm::vec3& worldPos, float radius) {
    collisionFocusEnabled = (radius > 0.0f);
    collisionFocusPos = worldPos;
    collisionFocusRadius = std::max(0.0f, radius);
    collisionFocusRadiusSq = collisionFocusRadius * collisionFocusRadius;
}

void M2Renderer::clearCollisionFocus() {
    collisionFocusEnabled = false;
}

void M2Renderer::resetQueryStats() {
    queryTimeMs = 0.0;
    queryCallCount = 0;
}

M2Renderer::GridCell M2Renderer::toCell(const glm::vec3& p) const {
    return GridCell{
        static_cast<int>(std::floor(p.x / SPATIAL_CELL_SIZE)),
        static_cast<int>(std::floor(p.y / SPATIAL_CELL_SIZE)),
        static_cast<int>(std::floor(p.z / SPATIAL_CELL_SIZE))
    };
}

void M2Renderer::rebuildSpatialIndex() {
    spatialGrid.clear();
    instanceIndexById.clear();
    instanceDedupMap_.clear();
    instanceIndexById.reserve(instances.size());
    smokeInstanceIndices_.clear();
    portalInstanceIndices_.clear();
    animatedInstanceIndices_.clear();
    particleOnlyInstanceIndices_.clear();
    particleInstanceIndices_.clear();

    for (size_t i = 0; i < instances.size(); i++) {
        auto& inst = instances[i];
        instanceIndexById[inst.id] = i;

        // Re-cache model pointer (may have changed after model map modifications)
        auto mdlIt = models.find(inst.modelId);
        inst.cachedModel = (mdlIt != models.end()) ? &mdlIt->second : nullptr;

        // Rebuild dedup map (skip ground detail)
        if (!inst.cachedIsGroundDetail) {
            DedupKey dk{inst.modelId,
                        static_cast<int32_t>(std::round(inst.position.x * 10.0f)),
                        static_cast<int32_t>(std::round(inst.position.y * 10.0f)),
                        static_cast<int32_t>(std::round(inst.position.z * 10.0f))};
            instanceDedupMap_[dk] = inst.id;
        }

        if (inst.cachedIsSmoke) {
            smokeInstanceIndices_.push_back(i);
        }
        if (inst.cachedIsInstancePortal) {
            portalInstanceIndices_.push_back(i);
        }
        if (inst.cachedHasParticleEmitters) {
            particleInstanceIndices_.push_back(i);
        }
        if (inst.cachedHasAnimation && !inst.cachedDisableAnimation) {
            animatedInstanceIndices_.push_back(i);
        } else if (inst.cachedHasParticleEmitters) {
            particleOnlyInstanceIndices_.push_back(i);
        }

        GridCell minCell = toCell(inst.worldBoundsMin);
        GridCell maxCell = toCell(inst.worldBoundsMax);
        for (int z = minCell.z; z <= maxCell.z; z++) {
            for (int y = minCell.y; y <= maxCell.y; y++) {
                for (int x = minCell.x; x <= maxCell.x; x++) {
                    spatialGrid[GridCell{x, y, z}].push_back(inst.id);
                }
            }
        }
    }
    spatialIndexDirty_ = false;
}

void M2Renderer::gatherCandidates(const glm::vec3& queryMin, const glm::vec3& queryMax,
                                  std::vector<size_t>& outIndices) const {
    outIndices.clear();
    tl_m2_candidateIdScratch.clear();

    GridCell minCell = toCell(queryMin);
    GridCell maxCell = toCell(queryMax);
    for (int z = minCell.z; z <= maxCell.z; z++) {
        for (int y = minCell.y; y <= maxCell.y; y++) {
            for (int x = minCell.x; x <= maxCell.x; x++) {
                auto it = spatialGrid.find(GridCell{x, y, z});
                if (it == spatialGrid.end()) continue;
                for (uint32_t id : it->second) {
                    if (!tl_m2_candidateIdScratch.insert(id).second) continue;
                    auto idxIt = instanceIndexById.find(id);
                    if (idxIt != instanceIndexById.end()) {
                        outIndices.push_back(idxIt->second);
                    }
                }
            }
        }
    }

    // Safety fallback to preserve collision correctness if the spatial index
    // misses candidates (e.g. during streaming churn).
    if (outIndices.empty() && !instances.empty()) {
        outIndices.reserve(instances.size());
        for (size_t i = 0; i < instances.size(); i++) {
            outIndices.push_back(i);
        }
    }
}

void M2Renderer::cleanupUnusedModels() {
    // Build set of model IDs that are still referenced by instances
    std::unordered_set<uint32_t> usedModelIds;
    for (const auto& instance : instances) {
        usedModelIds.insert(instance.modelId);
    }

    // Find and remove models with no instances
    std::vector<uint32_t> toRemove;
    for (const auto& [id, model] : models) {
        if (usedModelIds.find(id) == usedModelIds.end()) {
            toRemove.push_back(id);
        }
    }

    // Delete GPU resources and remove from map
    for (uint32_t id : toRemove) {
        auto it = models.find(id);
        if (it != models.end()) {
            destroyModelGPU(it->second);
            models.erase(it);
        }
    }

    if (!toRemove.empty()) {
        LOG_INFO("M2 cleanup: removed ", toRemove.size(), " unused models, ", models.size(), " remaining");
    }
}

VkTexture* M2Renderer::loadTexture(const std::string& path, uint32_t texFlags) {
    auto normalizeKey = [](std::string key) {
        std::replace(key.begin(), key.end(), '/', '\\');
        std::transform(key.begin(), key.end(), key.begin(),
                       [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
        return key;
    };
    std::string key = normalizeKey(path);

    // Check cache
    auto it = textureCache.find(key);
    if (it != textureCache.end()) {
        it->second.lastUse = ++textureCacheCounter_;
        return it->second.texture.get();
    }
    // No negative cache check — allow retries for transiently missing textures

    auto containsToken = [](const std::string& haystack, const char* token) {
        return haystack.find(token) != std::string::npos;
    };
    const bool colorKeyBlackHint =
        containsToken(key, "candle") ||
        containsToken(key, "flame") ||
        containsToken(key, "fire") ||
        containsToken(key, "torch") ||
        containsToken(key, "lamp") ||
        containsToken(key, "lantern") ||
        containsToken(key, "glow") ||
        containsToken(key, "flare") ||
        containsToken(key, "brazier") ||
        containsToken(key, "campfire") ||
        containsToken(key, "bonfire");

    // Check pre-decoded BLP cache first (populated by background worker threads)
    pipeline::BLPImage blp;
    if (predecodedBLPCache_) {
        auto pit = predecodedBLPCache_->find(key);
        if (pit != predecodedBLPCache_->end()) {
            blp = std::move(pit->second);
            predecodedBLPCache_->erase(pit);
        }
    }
    if (!blp.isValid()) {
        blp = assetManager->loadTexture(key);
    }
    if (!blp.isValid()) {
        // Return white fallback but don't cache the failure — MPQ reads can
        // fail transiently during streaming; allow retry on next model load.
        if (loggedTextureLoadFails_.insert(key).second) {
            LOG_WARNING("M2: Failed to load texture: ", path);
        }
        return whiteTexture_.get();
    }

    size_t base = static_cast<size_t>(blp.width) * static_cast<size_t>(blp.height) * 4ull;
    size_t approxBytes = base + (base / 3);
    if (textureCacheBytes_ + approxBytes > textureCacheBudgetBytes_) {
        static constexpr size_t kMaxFailedTextureCache = 200000;
        if (failedTextureCache_.size() < kMaxFailedTextureCache) {
            // Cache budget-rejected keys too; without this we repeatedly decode/load
            // the same textures every frame once budget is saturated.
            failedTextureCache_.insert(key);
        }
        if (textureBudgetRejectWarnings_ < 3) {
            LOG_WARNING("M2 texture cache full (", textureCacheBytes_ / (1024 * 1024),
                        " MB / ", textureCacheBudgetBytes_ / (1024 * 1024),
                        " MB), rejecting texture: ", path);
        }
        ++textureBudgetRejectWarnings_;
        return whiteTexture_.get();
    }

    // Track whether the texture actually uses alpha (any pixel with alpha < 255).
    bool hasAlpha = false;
    for (size_t i = 3; i < blp.data.size(); i += 4) {
        if (blp.data[i] != 255) {
            hasAlpha = true;
            break;
        }
    }

    // Create Vulkan texture
    auto tex = std::make_unique<VkTexture>();
    tex->upload(*vkCtx_, blp.data.data(), blp.width, blp.height, VK_FORMAT_R8G8B8A8_UNORM);

    // M2Texture flags: bit 0 = WrapS (1=repeat, 0=clamp), bit 1 = WrapT
    VkSamplerAddressMode wrapS = (texFlags & 0x1) ? VK_SAMPLER_ADDRESS_MODE_REPEAT : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
    VkSamplerAddressMode wrapT = (texFlags & 0x2) ? VK_SAMPLER_ADDRESS_MODE_REPEAT : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
    tex->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, wrapS, wrapT);

    VkTexture* texPtr = tex.get();

    TextureCacheEntry e;
    e.texture = std::move(tex);
    e.approxBytes = approxBytes;
    e.hasAlpha = hasAlpha;
    e.colorKeyBlack = colorKeyBlackHint;
    e.lastUse = ++textureCacheCounter_;
    textureCacheBytes_ += e.approxBytes;
    textureCache[key] = std::move(e);
    textureHasAlphaByPtr_[texPtr] = hasAlpha;
    textureColorKeyBlackByPtr_[texPtr] = colorKeyBlackHint;
    LOG_DEBUG("M2: Loaded texture: ", path, " (", blp.width, "x", blp.height, ")");

    return texPtr;
}

uint32_t M2Renderer::getTotalTriangleCount() const {
    uint32_t total = 0;
    for (const auto& instance : instances) {
        if (instance.cachedModel) {
            total += instance.cachedModel->indexCount / 3;
        }
    }
    return total;
}

std::optional<float> M2Renderer::getFloorHeight(float glX, float glY, float glZ, float* outNormalZ) const {
    QueryTimer timer(&queryTimeMs, &queryCallCount);
    std::optional<float> bestFloor;
    float bestNormalZ = 1.0f;  // Default to flat

    glm::vec3 queryMin(glX - 2.0f, glY - 2.0f, glZ - 6.0f);
    glm::vec3 queryMax(glX + 2.0f, glY + 2.0f, glZ + 8.0f);
    gatherCandidates(queryMin, queryMax, tl_m2_candidateScratch);

    for (size_t idx : tl_m2_candidateScratch) {
        const auto& instance = instances[idx];
        if (collisionFocusEnabled &&
            pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) {
            continue;
        }

        if (!instance.cachedModel) continue;
        if (instance.scale <= 0.001f) continue;

        const M2ModelGPU& model = *instance.cachedModel;
        if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue;
        if (instance.skipCollision) continue;

        // --- Mesh-based floor: vertical ray vs collision triangles ---
        // Does NOT skip the AABB path — both contribute and highest wins.
        if (model.collision.valid()) {
            glm::vec3 localPos = glm::vec3(instance.invModelMatrix * glm::vec4(glX, glY, glZ, 1.0f));

            model.collision.getFloorTrisInRange(
                localPos.x - 1.0f, localPos.y - 1.0f,
                localPos.x + 1.0f, localPos.y + 1.0f,
                tl_m2_collisionTriScratch);

            glm::vec3 rayOrigin(localPos.x, localPos.y, localPos.z + 5.0f);
            glm::vec3 rayDir(0.0f, 0.0f, -1.0f);
            float bestHitZ = -std::numeric_limits<float>::max();
            bool hitAny = false;

            for (uint32_t ti : tl_m2_collisionTriScratch) {
                if (ti >= model.collision.triCount) continue;
                if (model.collision.triBounds[ti].maxZ < localPos.z - 10.0f ||
                    model.collision.triBounds[ti].minZ > localPos.z + 5.0f) continue;

                const auto& verts = model.collision.vertices;
                const auto& idx   = model.collision.indices;
                const auto& v0 = verts[idx[ti * 3]];
                const auto& v1 = verts[idx[ti * 3 + 1]];
                const auto& v2 = verts[idx[ti * 3 + 2]];

                // Two-sided: try both windings
                float tHit = rayTriangleIntersect(rayOrigin, rayDir, v0, v1, v2);
                if (tHit < 0.0f)
                    tHit = rayTriangleIntersect(rayOrigin, rayDir, v0, v2, v1);
                if (tHit < 0.0f) continue;

                float hitZ = rayOrigin.z - tHit;

                // Walkable normal check (world space)
                glm::vec3 worldN(0.0f, 0.0f, 1.0f);  // Default to flat
                glm::vec3 localN = glm::cross(v1 - v0, v2 - v0);
                float nLen = glm::length(localN);
                if (nLen > 0.001f) {
                    localN /= nLen;
                    if (localN.z < 0.0f) localN = -localN;
                    worldN = glm::normalize(
                        glm::vec3(instance.modelMatrix * glm::vec4(localN, 0.0f)));
                    if (std::abs(worldN.z) < 0.35f) continue; // too steep (~70° max slope)
                }

                if (hitZ <= localPos.z + 3.0f && hitZ > bestHitZ) {
                    bestHitZ = hitZ;
                    hitAny = true;
                    bestNormalZ = std::abs(worldN.z);  // Store normal for output
                }
            }

            if (hitAny) {
                glm::vec3 localHit(localPos.x, localPos.y, bestHitZ);
                glm::vec3 worldHit = glm::vec3(instance.modelMatrix * glm::vec4(localHit, 1.0f));
                if (worldHit.z <= glZ + 3.0f && (!bestFloor || worldHit.z > *bestFloor)) {
                    bestFloor = worldHit.z;
                }
            }
            // Fall through to AABB floor — both contribute, highest wins
        }

        float zMargin = model.collisionBridge ? 25.0f : 2.0f;
        if (glX < instance.worldBoundsMin.x || glX > instance.worldBoundsMax.x ||
            glY < instance.worldBoundsMin.y || glY > instance.worldBoundsMax.y ||
            glZ < instance.worldBoundsMin.z - zMargin || glZ > instance.worldBoundsMax.z + zMargin) {
            continue;
        }
        glm::vec3 localMin, localMax;
        getTightCollisionBounds(model, localMin, localMax);

        glm::vec3 localPos = glm::vec3(instance.invModelMatrix * glm::vec4(glX, glY, glZ, 1.0f));

        // Must be within doodad footprint in local XY.
        // Stepped low platforms get a small pad so walk-up snapping catches edges.
        float footprintPad = 0.0f;
        if (model.collisionSteppedLowPlatform) {
            footprintPad = model.collisionPlanter ? 0.22f : 0.16f;
            if (model.collisionBridge) {
                footprintPad = 0.35f;
            }
        }
        if (localPos.x < localMin.x - footprintPad || localPos.x > localMax.x + footprintPad ||
            localPos.y < localMin.y - footprintPad || localPos.y > localMax.y + footprintPad) {
            continue;
        }

        // Construct "top" point at queried XY in local space, then transform back.
        float localTopZ = getEffectiveCollisionTopLocal(model, localPos, localMin, localMax);
        glm::vec3 localTop(localPos.x, localPos.y, localTopZ);
        glm::vec3 worldTop = glm::vec3(instance.modelMatrix * glm::vec4(localTop, 1.0f));

        // Reachability filter: allow a bit more climb for stepped low platforms.
        float maxStepUp = 1.0f;
        if (model.collisionStatue) {
            maxStepUp = 2.5f;
        } else if (model.collisionSmallSolidProp) {
            maxStepUp = 2.0f;
        } else if (model.collisionSteppedFountain) {
            maxStepUp = 2.5f;
        } else if (model.collisionSteppedLowPlatform) {
            maxStepUp = model.collisionPlanter ? 3.0f : 2.4f;
            if (model.collisionBridge) {
                maxStepUp = 25.0f;
            }
        }
        if (worldTop.z > glZ + maxStepUp) continue;

        if (!bestFloor || worldTop.z > *bestFloor) {
            bestFloor = worldTop.z;
        }
    }

    // Output surface normal if requested
    if (outNormalZ) {
        *outNormalZ = bestNormalZ;
    }

    return bestFloor;
}

bool M2Renderer::checkCollision(const glm::vec3& from, const glm::vec3& to,
                                 glm::vec3& adjustedPos, float playerRadius) const {
    QueryTimer timer(&queryTimeMs, &queryCallCount);
    adjustedPos = to;
    bool collided = false;

    glm::vec3 queryMin = glm::min(from, to) - glm::vec3(7.0f, 7.0f, 5.0f);
    glm::vec3 queryMax = glm::max(from, to) + glm::vec3(7.0f, 7.0f, 5.0f);
    gatherCandidates(queryMin, queryMax, tl_m2_candidateScratch);

    // Check against all M2 instances in local space (rotation-aware).
    for (size_t idx : tl_m2_candidateScratch) {
        const auto& instance = instances[idx];
        if (collisionFocusEnabled &&
            pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) {
            continue;
        }

        const float broadMargin = playerRadius + 1.0f;
        if (from.x < instance.worldBoundsMin.x - broadMargin && adjustedPos.x < instance.worldBoundsMin.x - broadMargin) continue;
        if (from.x > instance.worldBoundsMax.x + broadMargin && adjustedPos.x > instance.worldBoundsMax.x + broadMargin) continue;
        if (from.y < instance.worldBoundsMin.y - broadMargin && adjustedPos.y < instance.worldBoundsMin.y - broadMargin) continue;
        if (from.y > instance.worldBoundsMax.y + broadMargin && adjustedPos.y > instance.worldBoundsMax.y + broadMargin) continue;
        if (from.z > instance.worldBoundsMax.z + 2.5f && adjustedPos.z > instance.worldBoundsMax.z + 2.5f) continue;
        if (from.z + 2.5f < instance.worldBoundsMin.z && adjustedPos.z + 2.5f < instance.worldBoundsMin.z) continue;

        if (!instance.cachedModel) continue;

        const M2ModelGPU& model = *instance.cachedModel;
        if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue;
        if (instance.skipCollision) continue;
        if (instance.scale <= 0.001f) continue;

        // --- Mesh-based wall collision: closest-point push ---
        if (model.collision.valid()) {
            glm::vec3 localFrom = glm::vec3(instance.invModelMatrix * glm::vec4(from, 1.0f));
            glm::vec3 localPos  = glm::vec3(instance.invModelMatrix * glm::vec4(adjustedPos, 1.0f));
            float localRadius = playerRadius / instance.scale;

            model.collision.getWallTrisInRange(
                std::min(localFrom.x, localPos.x) - localRadius - 1.0f,
                std::min(localFrom.y, localPos.y) - localRadius - 1.0f,
                std::max(localFrom.x, localPos.x) + localRadius + 1.0f,
                std::max(localFrom.y, localPos.y) + localRadius + 1.0f,
                tl_m2_collisionTriScratch);

            constexpr float PLAYER_HEIGHT = 2.0f;
            constexpr float MAX_TOTAL_PUSH = 0.02f; // Cap total push per instance
            bool pushed = false;
            float totalPushX = 0.0f, totalPushY = 0.0f;

            for (uint32_t ti : tl_m2_collisionTriScratch) {
                if (ti >= model.collision.triCount) continue;
                if (localPos.z + PLAYER_HEIGHT < model.collision.triBounds[ti].minZ ||
                    localPos.z > model.collision.triBounds[ti].maxZ) continue;

                // Step-up: only skip wall when player is rising (jumping over it)
                constexpr float MAX_STEP_UP = 1.2f;
                bool rising = (localPos.z > localFrom.z + 0.05f);
                if (rising && localPos.z + MAX_STEP_UP >= model.collision.triBounds[ti].maxZ) continue;

                // Early out if we already pushed enough this instance
                float totalPushSoFar = std::sqrt(totalPushX * totalPushX + totalPushY * totalPushY);
                if (totalPushSoFar >= MAX_TOTAL_PUSH) break;

                const auto& verts = model.collision.vertices;
                const auto& idx   = model.collision.indices;
                const auto& v0 = verts[idx[ti * 3]];
                const auto& v1 = verts[idx[ti * 3 + 1]];
                const auto& v2 = verts[idx[ti * 3 + 2]];

                glm::vec3 closest = closestPointOnTriangle(localPos, v0, v1, v2);
                glm::vec3 diff = localPos - closest;
                float distXY = std::sqrt(diff.x * diff.x + diff.y * diff.y);

                if (distXY < localRadius && distXY > 1e-4f) {
                    // Gentle push — very small fraction of penetration
                    float penetration = localRadius - distXY;
                    float pushDist = std::clamp(penetration * 0.08f, 0.001f, 0.015f);
                    float dx = (diff.x / distXY) * pushDist;
                    float dy = (diff.y / distXY) * pushDist;
                    localPos.x += dx;
                    localPos.y += dy;
                    totalPushX += dx;
                    totalPushY += dy;
                    pushed = true;
                } else if (distXY < 1e-4f) {
                    // On the plane — soft push along triangle normal XY
                    glm::vec3 n = glm::cross(v1 - v0, v2 - v0);
                    float nxyLen = std::sqrt(n.x * n.x + n.y * n.y);
                    if (nxyLen > 1e-4f) {
                        float pushDist = std::min(localRadius, 0.015f);
                        float dx = (n.x / nxyLen) * pushDist;
                        float dy = (n.y / nxyLen) * pushDist;
                        localPos.x += dx;
                        localPos.y += dy;
                        totalPushX += dx;
                        totalPushY += dy;
                        pushed = true;
                    }
                }
            }

            if (pushed) {
                glm::vec3 worldPos = glm::vec3(instance.modelMatrix * glm::vec4(localPos, 1.0f));
                adjustedPos.x = worldPos.x;
                adjustedPos.y = worldPos.y;
                collided = true;
            }
            continue;
        }

        glm::vec3 localFrom = glm::vec3(instance.invModelMatrix * glm::vec4(from, 1.0f));
        glm::vec3 localPos = glm::vec3(instance.invModelMatrix * glm::vec4(adjustedPos, 1.0f));
        float radiusScale = model.collisionNarrowVerticalProp ? 0.45f : 1.0f;
        float localRadius = (playerRadius * radiusScale) / instance.scale;

        glm::vec3 rawMin, rawMax;
        getTightCollisionBounds(model, rawMin, rawMax);
        glm::vec3 localMin = rawMin - glm::vec3(localRadius);
        glm::vec3 localMax = rawMax + glm::vec3(localRadius);
        float effectiveTop = getEffectiveCollisionTopLocal(model, localPos, rawMin, rawMax) + localRadius;
        glm::vec2 localCenter((localMin.x + localMax.x) * 0.5f, (localMin.y + localMax.y) * 0.5f);
        float fromR = glm::length(glm::vec2(localFrom.x, localFrom.y) - localCenter);
        float toR = glm::length(glm::vec2(localPos.x, localPos.y) - localCenter);

        // Feet-based vertical overlap test: ignore objects fully above/below us.
        constexpr float PLAYER_HEIGHT = 2.0f;
        if (localPos.z + PLAYER_HEIGHT < localMin.z || localPos.z > effectiveTop) {
            continue;
        }

        bool fromInsideXY =
            (localFrom.x >= localMin.x && localFrom.x <= localMax.x &&
             localFrom.y >= localMin.y && localFrom.y <= localMax.y);
        bool fromInsideZ = (localFrom.z + PLAYER_HEIGHT >= localMin.z && localFrom.z <= effectiveTop);
        bool escapingOverlap = (fromInsideXY && fromInsideZ && (toR > fromR + 1e-4f));
        bool allowEscapeRelax = escapingOverlap && !model.collisionSmallSolidProp;

        // Swept hard clamp for taller blockers only.
        // Low/stepable objects should be climbable and not "shove" the player off.
        float maxStepUp = 1.20f;
        if (model.collisionStatue) {
            maxStepUp = 2.5f;
        } else if (model.collisionSmallSolidProp) {
            // Keep box/crate-class props hard-solid to prevent phase-through.
            maxStepUp = 0.75f;
        } else if (model.collisionSteppedFountain) {
            maxStepUp = 2.5f;
        } else if (model.collisionSteppedLowPlatform) {
            maxStepUp = model.collisionPlanter ? 2.8f : 2.4f;
            if (model.collisionBridge) {
                maxStepUp = 25.0f;
            }
        }
        bool stepableLowObject = (effectiveTop <= localFrom.z + maxStepUp);
        bool climbingAttempt = (localPos.z > localFrom.z + 0.18f);
        bool nearTop = (localFrom.z >= effectiveTop - 0.30f);
        float climbAllowance = model.collisionPlanter ? 0.95f : 0.60f;
        if (model.collisionSteppedLowPlatform && !model.collisionPlanter) {
            // Let low curb/planter blocks be stepable without sticky side shoves.
            climbAllowance = 1.00f;
        }
        if (model.collisionBridge) {
            climbAllowance = 3.0f;
        }
        if (model.collisionSmallSolidProp) {
            climbAllowance = 1.05f;
        }
        bool climbingTowardTop = climbingAttempt && (localFrom.z + climbAllowance >= effectiveTop);
        bool forceHardLateral =
            model.collisionSmallSolidProp &&
            !nearTop && !climbingTowardTop;
        if ((!stepableLowObject || forceHardLateral) && !allowEscapeRelax) {
            float tEnter = 0.0f;
            glm::vec3 sweepMax = localMax;
            sweepMax.z = std::min(sweepMax.z, effectiveTop);
            if (segmentIntersectsAABB(localFrom, localPos, localMin, sweepMax, tEnter)) {
                float tSafe = std::clamp(tEnter - 0.03f, 0.0f, 1.0f);
                glm::vec3 localSafe = localFrom + (localPos - localFrom) * tSafe;
                glm::vec3 worldSafe = glm::vec3(instance.modelMatrix * glm::vec4(localSafe, 1.0f));
                adjustedPos.x = worldSafe.x;
                adjustedPos.y = worldSafe.y;
                collided = true;
                continue;
            }
        }

        if (localPos.x < localMin.x || localPos.x > localMax.x ||
            localPos.y < localMin.y || localPos.y > localMax.y) {
            continue;
        }

        float pushLeft  = localPos.x - localMin.x;
        float pushRight = localMax.x - localPos.x;
        float pushBack  = localPos.y - localMin.y;
        float pushFront = localMax.y - localPos.y;

        float minPush = std::min({pushLeft, pushRight, pushBack, pushFront});
        if (allowEscapeRelax) {
            continue;
        }
        if (stepableLowObject && localFrom.z >= effectiveTop - 0.35f) {
            // Already on/near top surface: don't apply lateral push that ejects
            // the player from the object (carpets, platforms, etc).
            continue;
        }
        // Gentle fallback push for overlapping cases.
        float pushAmount;
        if (model.collisionNarrowVerticalProp) {
            pushAmount = std::clamp(minPush * 0.10f, 0.001f, 0.010f);
        } else if (model.collisionSteppedLowPlatform) {
            if (model.collisionPlanter && stepableLowObject) {
                pushAmount = std::clamp(minPush * 0.06f, 0.001f, 0.006f);
            } else {
            pushAmount = std::clamp(minPush * 0.12f, 0.003f, 0.012f);
            }
        } else if (stepableLowObject) {
            pushAmount = std::clamp(minPush * 0.12f, 0.002f, 0.015f);
        } else {
            pushAmount = std::clamp(minPush * 0.28f, 0.010f, 0.045f);
        }
        glm::vec3 localPush(0.0f);
        if (minPush == pushLeft) {
            localPush.x = -pushAmount;
        } else if (minPush == pushRight) {
            localPush.x = pushAmount;
        } else if (minPush == pushBack) {
            localPush.y = -pushAmount;
        } else {
            localPush.y = pushAmount;
        }

        glm::vec3 worldPush = glm::vec3(instance.modelMatrix * glm::vec4(localPush, 0.0f));
        adjustedPos.x += worldPush.x;
        adjustedPos.y += worldPush.y;
        collided = true;
    }

    return collided;
}

float M2Renderer::raycastBoundingBoxes(const glm::vec3& origin, const glm::vec3& direction, float maxDistance) const {
    QueryTimer timer(&queryTimeMs, &queryCallCount);
    float closestHit = maxDistance;

    glm::vec3 rayEnd = origin + direction * maxDistance;
    glm::vec3 queryMin = glm::min(origin, rayEnd) - glm::vec3(1.0f);
    glm::vec3 queryMax = glm::max(origin, rayEnd) + glm::vec3(1.0f);
    gatherCandidates(queryMin, queryMax, tl_m2_candidateScratch);

    for (size_t idx : tl_m2_candidateScratch) {
        const auto& instance = instances[idx];
        if (collisionFocusEnabled &&
            pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) {
            continue;
        }

        // Cheap world-space broad-phase.
        float tEnter = 0.0f;
        glm::vec3 worldMin = instance.worldBoundsMin - glm::vec3(0.35f);
        glm::vec3 worldMax = instance.worldBoundsMax + glm::vec3(0.35f);
        if (!segmentIntersectsAABB(origin, origin + direction * maxDistance, worldMin, worldMax, tEnter)) {
            continue;
        }

        if (!instance.cachedModel) continue;

        const M2ModelGPU& model = *instance.cachedModel;
        if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue;
        glm::vec3 localMin, localMax;
        getTightCollisionBounds(model, localMin, localMax);
        // Skip tiny doodads for camera occlusion; they cause jitter and false hits.
        glm::vec3 extents = (localMax - localMin) * instance.scale;
        if (glm::length(extents) < 0.75f) continue;

        glm::vec3 localOrigin = glm::vec3(instance.invModelMatrix * glm::vec4(origin, 1.0f));
        glm::vec3 localDir = glm::normalize(glm::vec3(instance.invModelMatrix * glm::vec4(direction, 0.0f)));
        if (!std::isfinite(localDir.x) || !std::isfinite(localDir.y) || !std::isfinite(localDir.z)) {
            continue;
        }

        // Local-space AABB slab intersection.
        glm::vec3 invDir = 1.0f / localDir;
        glm::vec3 tMin = (localMin - localOrigin) * invDir;
        glm::vec3 tMax = (localMax - localOrigin) * invDir;
        glm::vec3 t1 = glm::min(tMin, tMax);
        glm::vec3 t2 = glm::max(tMin, tMax);

        float tNear = std::max({t1.x, t1.y, t1.z});
        float tFar = std::min({t2.x, t2.y, t2.z});
        if (tNear > tFar || tFar <= 0.0f) continue;

        float tHit = tNear > 0.0f ? tNear : tFar;
        glm::vec3 localHit = localOrigin + localDir * tHit;
        glm::vec3 worldHit = glm::vec3(instance.modelMatrix * glm::vec4(localHit, 1.0f));
        float worldDist = glm::length(worldHit - origin);
        if (worldDist > 0.0f && worldDist < closestHit) {
            closestHit = worldDist;
        }
    }

    return closestHit;
}

void M2Renderer::recreatePipelines() {
    if (!vkCtx_) return;
    VkDevice device = vkCtx_->getDevice();

    // Destroy old main-pass pipelines (NOT shadow, NOT pipeline layouts)
    if (opaquePipeline_)            { vkDestroyPipeline(device, opaquePipeline_, nullptr); opaquePipeline_ = VK_NULL_HANDLE; }
    if (alphaTestPipeline_)         { vkDestroyPipeline(device, alphaTestPipeline_, nullptr); alphaTestPipeline_ = VK_NULL_HANDLE; }
    if (alphaPipeline_)             { vkDestroyPipeline(device, alphaPipeline_, nullptr); alphaPipeline_ = VK_NULL_HANDLE; }
    if (additivePipeline_)          { vkDestroyPipeline(device, additivePipeline_, nullptr); additivePipeline_ = VK_NULL_HANDLE; }
    if (particlePipeline_)          { vkDestroyPipeline(device, particlePipeline_, nullptr); particlePipeline_ = VK_NULL_HANDLE; }
    if (particleAdditivePipeline_)  { vkDestroyPipeline(device, particleAdditivePipeline_, nullptr); particleAdditivePipeline_ = VK_NULL_HANDLE; }
    if (smokePipeline_)             { vkDestroyPipeline(device, smokePipeline_, nullptr); smokePipeline_ = VK_NULL_HANDLE; }

    // --- Load shaders ---
    rendering::VkShaderModule m2Vert, m2Frag;
    rendering::VkShaderModule particleVert, particleFrag;
    rendering::VkShaderModule smokeVert, smokeFrag;

    m2Vert.loadFromFile(device, "assets/shaders/m2.vert.spv");
    m2Frag.loadFromFile(device, "assets/shaders/m2.frag.spv");
    particleVert.loadFromFile(device, "assets/shaders/m2_particle.vert.spv");
    particleFrag.loadFromFile(device, "assets/shaders/m2_particle.frag.spv");
    smokeVert.loadFromFile(device, "assets/shaders/m2_smoke.vert.spv");
    smokeFrag.loadFromFile(device, "assets/shaders/m2_smoke.frag.spv");

    if (!m2Vert.isValid() || !m2Frag.isValid()) {
        LOG_ERROR("M2Renderer::recreatePipelines: missing required shaders");
        return;
    }

    VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();

    // --- M2 model vertex input ---
    VkVertexInputBindingDescription m2Binding{};
    m2Binding.binding = 0;
    m2Binding.stride = 18 * sizeof(float);
    m2Binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;

    std::vector<VkVertexInputAttributeDescription> m2Attrs = {
        {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},                     // position
        {1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)},     // normal
        {2, 0, VK_FORMAT_R32G32_SFLOAT, 6 * sizeof(float)},        // texCoord0
        {5, 0, VK_FORMAT_R32G32_SFLOAT, 8 * sizeof(float)},        // texCoord1
        {3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 10 * sizeof(float)}, // boneWeights
        {4, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)}, // boneIndices (float)
    };

    auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite) -> VkPipeline {
        return PipelineBuilder()
            .setShaders(m2Vert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
                        m2Frag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
            .setVertexInput({m2Binding}, m2Attrs)
            .setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
            .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
            .setDepthTest(true, depthWrite, VK_COMPARE_OP_LESS_OR_EQUAL)
            .setColorBlendAttachment(blendState)
            .setMultisample(vkCtx_->getMsaaSamples())
            .setLayout(pipelineLayout_)
            .setRenderPass(mainPass)
            .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
            .build(device);
    };

    opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true);
    alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true);
    alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false);
    additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false);

    // --- Particle pipelines ---
    if (particleVert.isValid() && particleFrag.isValid()) {
        VkVertexInputBindingDescription pBind{};
        pBind.binding = 0;
        pBind.stride = 9 * sizeof(float); // pos3 + color4 + size1 + tile1
        pBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;

        std::vector<VkVertexInputAttributeDescription> pAttrs = {
            {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},                    // position
            {1, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 3 * sizeof(float)}, // color
            {2, 0, VK_FORMAT_R32_SFLOAT, 7 * sizeof(float)},          // size
            {3, 0, VK_FORMAT_R32_SFLOAT, 8 * sizeof(float)},          // tile
        };

        auto buildParticlePipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
            return PipelineBuilder()
                .setShaders(particleVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
                            particleFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
                .setVertexInput({pBind}, pAttrs)
                .setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
                .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
                .setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
                .setColorBlendAttachment(blend)
                .setMultisample(vkCtx_->getMsaaSamples())
                .setLayout(particlePipelineLayout_)
                .setRenderPass(mainPass)
                .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
                .build(device);
        };

        particlePipeline_ = buildParticlePipeline(PipelineBuilder::blendAlpha());
        particleAdditivePipeline_ = buildParticlePipeline(PipelineBuilder::blendAdditive());
    }

    // --- Smoke pipeline ---
    if (smokeVert.isValid() && smokeFrag.isValid()) {
        VkVertexInputBindingDescription sBind{};
        sBind.binding = 0;
        sBind.stride = 6 * sizeof(float); // pos3 + lifeRatio1 + size1 + isSpark1
        sBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;

        std::vector<VkVertexInputAttributeDescription> sAttrs = {
            {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},           // position
            {1, 0, VK_FORMAT_R32_SFLOAT, 3 * sizeof(float)}, // lifeRatio
            {2, 0, VK_FORMAT_R32_SFLOAT, 4 * sizeof(float)}, // size
            {3, 0, VK_FORMAT_R32_SFLOAT, 5 * sizeof(float)}, // isSpark
        };

        smokePipeline_ = PipelineBuilder()
            .setShaders(smokeVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
                        smokeFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
            .setVertexInput({sBind}, sAttrs)
            .setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
            .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
            .setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
            .setColorBlendAttachment(PipelineBuilder::blendAlpha())
            .setMultisample(vkCtx_->getMsaaSamples())
            .setLayout(smokePipelineLayout_)
            .setRenderPass(mainPass)
            .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
            .build(device);
    }

    m2Vert.destroy(); m2Frag.destroy();
    particleVert.destroy(); particleFrag.destroy();
    smokeVert.destroy(); smokeFrag.destroy();

    core::Logger::getInstance().info("M2Renderer: pipelines recreated");
}

} // namespace rendering
} // namespace wowee
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								#include "rendering/m2_renderer.hpp"
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								#include "rendering/vk_context.hpp"
 								#include "rendering/vk_buffer.hpp"
 								#include "rendering/vk_texture.hpp"
 								#include "rendering/vk_pipeline.hpp"
 								#include "rendering/vk_shader.hpp"
 								#include "rendering/vk_utils.hpp"
 								#include "rendering/vk_frame_data.hpp"
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								#include "rendering/camera.hpp"
 								#include "rendering/frustum.hpp"
 								#include "pipeline/asset_manager.hpp"
 								#include "pipeline/blp_loader.hpp"
 								#include "core/logger.hpp"
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								#include <chrono>
-												Add stepped fountain collision for parkour-style climbing

											
										
										
											2026-02-03 16:28:33 -08:00
+								#include <cctype>
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								#include <glm/gtc/matrix_transform.hpp>
 								#include <glm/gtc/type_ptr.hpp>
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								#include <glm/gtx/quaternion.hpp>
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								#include <unordered_set>
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								#include <functional>
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								#include <algorithm>
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								#include <cmath>
-												Fix NPC apparel fallback and reduce world-entry stutter

Hide NPC cloak/object-skin mesh when no cape texture resolves by using a transparent texture fallback, preventing skin-texture bleed on cloaks. Tighten NPC equipment region compositing by slot and add safe humanoid geoset selection to avoid robe-over-pants conflicts and odd pants texturing.

Reduce login/runtime hitching by deferring non-critical world-system initialization across frames, lowering per-frame transport doodad spawn budget, and demoting high-volume transport/MO_TRANSPORT diagnostics to debug. Gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and make zone music prewarm opt-in via WOWEE_PREWARM_ZONE_MUSIC.

											
										
										
											2026-02-20 20:31:04 -08:00
+								#include <cstdlib>
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								#include <limits>
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								#include <future>
 								#include <thread>
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
 								namespace wowee {
 								namespace rendering {
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								namespace {
-												Fix NPC apparel fallback and reduce world-entry stutter

Hide NPC cloak/object-skin mesh when no cape texture resolves by using a transparent texture fallback, preventing skin-texture bleed on cloaks. Tighten NPC equipment region compositing by slot and add safe humanoid geoset selection to avoid robe-over-pants conflicts and odd pants texturing.

Reduce login/runtime hitching by deferring non-critical world-system initialization across frames, lowering per-frame transport doodad spawn budget, and demoting high-volume transport/MO_TRANSPORT diagnostics to debug. Gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and make zone music prewarm opt-in via WOWEE_PREWARM_ZONE_MUSIC.

											
										
										
											2026-02-20 20:31:04 -08:00
+								bool envFlagEnabled(const char* key, bool defaultValue) {
 								    const char* raw = std::getenv(key);
 								    if (!raw || !*raw) return defaultValue;
 								    std::string v(raw);
 								    std::transform(v.begin(), v.end(), v.begin(), [](unsigned char c) {
 								        return static_cast<char>(std::tolower(c));
 								    });
 								    return !(v == "0" || v == "false" || v == "off" || v == "no");
 								}
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								size_t envSizeMBOrDefault(const char* name, size_t defMb) {
 								    const char* raw = std::getenv(name);
 								    if (!raw || !*raw) return defMb;
 								    char* end = nullptr;
 								    unsigned long long mb = std::strtoull(raw, &end, 10);
 								    if (end == raw || mb == 0) return defMb;
 								    return static_cast<size_t>(mb);
 								}
-												Optimize threading and texture fallback stability

											
										
										
											2026-02-22 08:12:08 -08:00
+								size_t envSizeOrDefault(const char* name, size_t defValue) {
 								    const char* raw = std::getenv(name);
 								    if (!raw || !*raw) return defValue;
 								    char* end = nullptr;
 								    unsigned long long v = std::strtoull(raw, &end, 10);
 								    if (end == raw || v == 0) return defValue;
 								    return static_cast<size_t>(v);
 								}
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								static constexpr uint32_t kParticleFlagRandomized = 0x40;
 								static constexpr uint32_t kParticleFlagTiled = 0x80;
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								float computeGroundDetailDownOffset(const M2ModelGPU& model, float scale) {
 								    // Keep a tiny sink to avoid hovering, but cap pivot compensation so details
 								    // don't get pushed below the terrain on models with large positive boundMin.
 								    const float pivotComp = glm::clamp(std::max(0.0f, model.boundMin.z * scale), 0.0f, 0.10f);
 								    const float terrainSink = 0.03f;
 								    return pivotComp + terrainSink;
 								}
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								void getTightCollisionBounds(const M2ModelGPU& model, glm::vec3& outMin, glm::vec3& outMax) {
 								    glm::vec3 center = (model.boundMin + model.boundMax) * 0.5f;
 								    glm::vec3 half = (model.boundMax - model.boundMin) * 0.5f;
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								    // Per-shape collision fitting:
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								    // - small solid props (boxes/crates/chests): tighter than full mesh, but
 								    //   larger than default to prevent walk-through on narrow objects
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								    // - default: tighter fit (avoid oversized blockers)
 								    // - stepped low platforms (tree curbs/planters): wider XY + lower Z
-												Add loading screen, fix tree/foliage collision, jump buffering, and fence rotation

- Loading screen stays visible until all terrain tiles finish streaming;
  character spawns only after terrain is loaded and Z-snapped to ground
- Reduce tree trunk collision bounds (5% of canopy, capped at 5.0) and
  make all small/medium trees, bushes, lily pads, and foliage walkthrough
- Add jump input buffering (150ms) and coyote time (100ms) for responsive jumps
- Fix fence orientation by adding +180° heading rotation
- Increase terrain load radius from 1 to 2 (5x5 tile grid)
- Add hearthstone callback for single-player camera reset

											
										
										
											2026-02-04 13:29:27 -08:00
+								    if (model.collisionTreeTrunk) {
 								        // Tree trunk: proportional cylinder at the base of the tree.
 								        float modelHoriz = std::max(model.boundMax.x - model.boundMin.x,
 								                                    model.boundMax.y - model.boundMin.y);
 								        float trunkHalf = std::clamp(modelHoriz * 0.05f, 0.5f, 5.0f);
 								        half.x = trunkHalf;
 								        half.y = trunkHalf;
 								        // Height proportional to trunk width, capped at 3.5 units.
 								        half.z = std::min(trunkHalf * 2.5f, 3.5f);
 								        // Shift center down so collision is at the base (trunk), not mid-canopy.
 								        center.z = model.boundMin.z + half.z;
 								    } else if (model.collisionNarrowVerticalProp) {
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        // Tall thin props (lamps/posts): keep passable gaps near walls.
 								        half.x *= 0.30f;
 								        half.y *= 0.30f;
 								        half.z *= 0.96f;
 								    } else if (model.collisionSmallSolidProp) {
 								        // Keep full tight mesh bounds for small solid props to avoid clip-through.
 								        half.x *= 1.00f;
 								        half.y *= 1.00f;
 								        half.z *= 1.00f;
 								    } else if (model.collisionSteppedLowPlatform) {
 								        half.x *= 0.98f;
 								        half.y *= 0.98f;
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								        half.z *= 0.52f;
 								    } else {
 								        half.x *= 0.66f;
 								        half.y *= 0.66f;
 								        half.z *= 0.76f;
 								    }
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
 								    outMin = center - half;
 								    outMax = center + half;
 								}
-												Add stepped fountain collision for parkour-style climbing

											
										
										
											2026-02-03 16:28:33 -08:00
+								float getEffectiveCollisionTopLocal(const M2ModelGPU& model,
 								                                    const glm::vec3& localPos,
 								                                    const glm::vec3& localMin,
 								                                    const glm::vec3& localMax) {
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								    if (!model.collisionSteppedFountain && !model.collisionSteppedLowPlatform) {
-												Add stepped fountain collision for parkour-style climbing

											
										
										
											2026-02-03 16:28:33 -08:00
+								        return localMax.z;
 								    }
 								    glm::vec2 center((localMin.x + localMax.x) * 0.5f, (localMin.y + localMax.y) * 0.5f);
 								    glm::vec2 half((localMax.x - localMin.x) * 0.5f, (localMax.y - localMin.y) * 0.5f);
 								    if (half.x < 1e-4f || half.y < 1e-4f) {
 								        return localMax.z;
 								    }
 								    float nx = (localPos.x - center.x) / half.x;
 								    float ny = (localPos.y - center.y) / half.y;
 								    float r = std::sqrt(nx * nx + ny * ny);
 								    float h = localMax.z - localMin.z;
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								    if (model.collisionSteppedFountain) {
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								        if (r > 0.85f) return localMin.z + h * 0.18f;  // outer lip
 								        if (r > 0.65f) return localMin.z + h * 0.36f;  // mid step
 								        if (r > 0.45f) return localMin.z + h * 0.54f;  // inner step
 								        if (r > 0.28f) return localMin.z + h * 0.70f;  // center platform / statue base
 								        if (r > 0.14f) return localMin.z + h * 0.84f;  // statue body / sword
 								        return localMin.z + h * 0.96f;                  // statue head / top
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								    }
 								    // Low square curb/planter profile:
 								    // use edge distance (not radial) so corner blocks don't become too low and
 								    // clip-through at diagonals.
 								    float edge = std::max(std::abs(nx), std::abs(ny));
-												Improve performance and tune ramp/planter collision behavior

											
										
										
											2026-02-03 17:21:04 -08:00
+								    if (edge > 0.92f) return localMin.z + h * 0.06f;
 								    if (edge > 0.72f) return localMin.z + h * 0.30f;
 								    return localMin.z + h * 0.62f;
-												Add stepped fountain collision for parkour-style climbing

											
										
										
											2026-02-03 16:28:33 -08:00
+								}
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								bool segmentIntersectsAABB(const glm::vec3& from, const glm::vec3& to,
 								                           const glm::vec3& bmin, const glm::vec3& bmax,
 								                           float& outEnterT) {
 								    glm::vec3 d = to - from;
 								    float tEnter = 0.0f;
 								    float tExit = 1.0f;
 								    for (int axis = 0; axis < 3; axis++) {
 								        if (std::abs(d[axis]) < 1e-6f) {
 								            if (from[axis] < bmin[axis] || from[axis] > bmax[axis]) {
 								                return false;
 								            }
 								            continue;
 								        }
 								        float inv = 1.0f / d[axis];
 								        float t0 = (bmin[axis] - from[axis]) * inv;
 								        float t1 = (bmax[axis] - from[axis]) * inv;
 								        if (t0 > t1) std::swap(t0, t1);
 								        tEnter = std::max(tEnter, t0);
 								        tExit = std::min(tExit, t1);
 								        if (tEnter > tExit) return false;
 								    }
 								    outEnterT = tEnter;
 								    return tExit >= 0.0f && tEnter <= 1.0f;
 								}
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								void transformAABB(const glm::mat4& modelMatrix,
 								                   const glm::vec3& localMin,
 								                   const glm::vec3& localMax,
 								                   glm::vec3& outMin,
 								                   glm::vec3& outMax) {
 								    const glm::vec3 corners[8] = {
 								        {localMin.x, localMin.y, localMin.z},
 								        {localMin.x, localMin.y, localMax.z},
 								        {localMin.x, localMax.y, localMin.z},
 								        {localMin.x, localMax.y, localMax.z},
 								        {localMax.x, localMin.y, localMin.z},
 								        {localMax.x, localMin.y, localMax.z},
 								        {localMax.x, localMax.y, localMin.z},
 								        {localMax.x, localMax.y, localMax.z}
 								    };
 								    outMin = glm::vec3(std::numeric_limits<float>::max());
 								    outMax = glm::vec3(-std::numeric_limits<float>::max());
 								    for (const auto& c : corners) {
 								        glm::vec3 wc = glm::vec3(modelMatrix * glm::vec4(c, 1.0f));
 								        outMin = glm::min(outMin, wc);
 								        outMax = glm::max(outMax, wc);
 								    }
 								}
 								float pointAABBDistanceSq(const glm::vec3& p, const glm::vec3& bmin, const glm::vec3& bmax) {
 								    glm::vec3 q = glm::clamp(p, bmin, bmax);
 								    glm::vec3 d = p - q;
 								    return glm::dot(d, d);
 								}
 								struct QueryTimer {
 								    double* totalMs = nullptr;
 								    uint32_t* callCount = nullptr;
 								    std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
 								    QueryTimer(double* total, uint32_t* calls) : totalMs(total), callCount(calls) {}
 								    ~QueryTimer() {
 								        if (callCount) {
 								            (*callCount)++;
 								        }
 								        if (totalMs) {
 								            auto end = std::chrono::steady_clock::now();
 								            *totalMs += std::chrono::duration<double, std::milli>(end - start).count();
 								        }
 								    }
 								};
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								// Möller–Trumbore ray-triangle intersection.
 								// Returns distance along ray if hit, negative if miss.
 								float rayTriangleIntersect(const glm::vec3& origin, const glm::vec3& dir,
 								                           const glm::vec3& v0, const glm::vec3& v1, const glm::vec3& v2) {
 								    constexpr float EPSILON = 1e-6f;
 								    glm::vec3 e1 = v1 - v0;
 								    glm::vec3 e2 = v2 - v0;
 								    glm::vec3 h = glm::cross(dir, e2);
 								    float a = glm::dot(e1, h);
 								    if (a > -EPSILON && a < EPSILON) return -1.0f;
 								    float f = 1.0f / a;
 								    glm::vec3 s = origin - v0;
 								    float u = f * glm::dot(s, h);
 								    if (u < 0.0f || u > 1.0f) return -1.0f;
 								    glm::vec3 q = glm::cross(s, e1);
 								    float v = f * glm::dot(dir, q);
 								    if (v < 0.0f || u + v > 1.0f) return -1.0f;
 								    float t = f * glm::dot(e2, q);
 								    return t > EPSILON ? t : -1.0f;
 								}
 								// Closest point on triangle to a point (Ericson, Real-Time Collision Detection §5.1.5).
 								glm::vec3 closestPointOnTriangle(const glm::vec3& p,
 								                                  const glm::vec3& a, const glm::vec3& b, const glm::vec3& c) {
 								    glm::vec3 ab = b - a, ac = c - a, ap = p - a;
 								    float d1 = glm::dot(ab, ap), d2 = glm::dot(ac, ap);
 								    if (d1 <= 0.0f && d2 <= 0.0f) return a;
 								    glm::vec3 bp = p - b;
 								    float d3 = glm::dot(ab, bp), d4 = glm::dot(ac, bp);
 								    if (d3 >= 0.0f && d4 <= d3) return b;
 								    float vc = d1 * d4 - d3 * d2;
 								    if (vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f) {
 								        float v = d1 / (d1 - d3);
 								        return a + v * ab;
 								    }
 								    glm::vec3 cp = p - c;
 								    float d5 = glm::dot(ab, cp), d6 = glm::dot(ac, cp);
 								    if (d6 >= 0.0f && d5 <= d6) return c;
 								    float vb = d5 * d2 - d1 * d6;
 								    if (vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f) {
 								        float w = d2 / (d2 - d6);
 								        return a + w * ac;
 								    }
 								    float va = d3 * d6 - d5 * d4;
 								    if (va <= 0.0f && (d4 - d3) >= 0.0f && (d5 - d6) >= 0.0f) {
 								        float w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
 								        return b + w * (c - b);
 								    }
 								    float denom = 1.0f / (va + vb + vc);
 								    float v = vb * denom;
 								    float w = vc * denom;
 								    return a + ab * v + ac * w;
 								}
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								} // namespace
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								// Thread-local scratch buffers for collision queries (allows concurrent getFloorHeight calls)
 								static thread_local std::vector<size_t> tl_m2_candidateScratch;
 								static thread_local std::unordered_set<uint32_t> tl_m2_candidateIdScratch;
 								static thread_local std::vector<uint32_t> tl_m2_collisionTriScratch;
 								// Forward declaration (defined after animation helpers)
 								static void computeBoneMatrices(const M2ModelGPU& model, M2Instance& instance);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								void M2Instance::updateModelMatrix() {
 								    modelMatrix = glm::mat4(1.0f);
 								    modelMatrix = glm::translate(modelMatrix, position);
 								    // Rotation in radians
 								    modelMatrix = glm::rotate(modelMatrix, rotation.x, glm::vec3(1.0f, 0.0f, 0.0f));
 								    modelMatrix = glm::rotate(modelMatrix, rotation.y, glm::vec3(0.0f, 1.0f, 0.0f));
 								    modelMatrix = glm::rotate(modelMatrix, rotation.z, glm::vec3(0.0f, 0.0f, 1.0f));
 								    modelMatrix = glm::scale(modelMatrix, glm::vec3(scale));
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								    invModelMatrix = glm::inverse(modelMatrix);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								}
 								M2Renderer::M2Renderer() {
 								}
 								M2Renderer::~M2Renderer() {
 								    shutdown();
 								}
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout,
 								                            pipeline::AssetManager* assets) {
-												Fix WMO visibility culling and renderer initialization guards

											
										
										
											2026-02-18 22:41:05 -08:00
+								    if (initialized_) { assetManager = assets; return true; }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    vkCtx_ = ctx;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    assetManager = assets;
-												Optimize threading and texture fallback stability

											
										
										
											2026-02-22 08:12:08 -08:00
+								    const unsigned hc = std::thread::hardware_concurrency();
 								    const size_t availableCores = (hc > 1u) ? static_cast<size_t>(hc - 1u) : 1ull;
 								    // Keep headroom for other frame tasks: M2 gets about half of non-main cores by default.
 								    const size_t defaultAnimThreads = std::max<size_t>(1, availableCores / 2);
 								    numAnimThreads_ = static_cast<uint32_t>(std::max<size_t>(
 , envSizeOrDefault("WOWEE_M2_ANIM_THREADS", defaultAnimThreads)));
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    LOG_INFO("Initializing M2 renderer (Vulkan, ", numAnimThreads_, " anim threads)...");
-												Improve movement, crouching, and add M2 animation

Movement:
- Fix speed controls: Shift=sprint (28), normal run (14), Ctrl=walk (5)
- Reduce character height for doorway clearance (eye height 1.2)
- Add working crouch (C or X key) with smooth transition (eye height 0.6)
- Jump to stand up from crouch

M2 Animation:
- Add animation time tracking per M2 instance
- Add procedural swaying animation in vertex shader
- Update animation each frame for vegetation movement

											
										
										
											2026-02-02 23:10:19 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    VkDevice device = vkCtx_->getDevice();
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // --- Descriptor set layouts ---
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Material set layout (set 1): binding 0 = sampler2D, binding 2 = M2Material UBO
 								    // (M2Params moved to push constants alongside model matrix)
 								    {
 								        VkDescriptorSetLayoutBinding bindings[2] = {};
 								        bindings[0].binding = 0;
 								        bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								        bindings[0].descriptorCount = 1;
 								        bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
 								        bindings[1].binding = 2;
 								        bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
 								        bindings[1].descriptorCount = 1;
 								        bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
 								        VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
 								        ci.bindingCount = 2;
 								        ci.pBindings = bindings;
 								        vkCreateDescriptorSetLayout(device, &ci, nullptr, &materialSetLayout_);
 								    }
 								    // Bone set layout (set 2): binding 0 = STORAGE_BUFFER (bone matrices)
 								    {
 								        VkDescriptorSetLayoutBinding binding{};
 								        binding.binding = 0;
 								        binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
 								        binding.descriptorCount = 1;
 								        binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
 								        ci.bindingCount = 1;
 								        ci.pBindings = &binding;
 								        vkCreateDescriptorSetLayout(device, &ci, nullptr, &boneSetLayout_);
 								    }
-												Add unlit rendering for M2 glow/additive batches

Batches with the M2 unlit material flag (0x01) or additive blend modes
(3+) now skip lighting, shadows, and fog, emitting texture color directly.
Fixes lantern glow quads appearing as dull transparent circles.

											
										
										
											2026-02-06 03:28:21 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Particle texture set layout (set 1 for particles): binding 0 = sampler2D
 								    {
 								        VkDescriptorSetLayoutBinding binding{};
 								        binding.binding = 0;
 								        binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								        binding.descriptorCount = 1;
 								        binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
 								        ci.bindingCount = 1;
 								        ci.pBindings = &binding;
 								        vkCreateDescriptorSetLayout(device, &ci, nullptr, &particleTexLayout_);
 								    }
-												Add centralized anisotropic filtering, fog, and Blinn-Phong specular to all renderers

Anisotropic filtering now queries GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT once
and applies via a single applyAnisotropicFiltering() utility, replacing
hardcoded calls across all renderers. Fog (sky horizon color, 100-600
range) and Blinn-Phong specular highlights are added to WMO, M2, and
character shaders for visual parity with terrain. Shadow sampling
plumbing (sampler2DShadow with 3x3 PCF) is wired into all three shaders
gated by uShadowEnabled, ready for a future shadow map pass.

											
										
										
											2026-02-04 15:05:46 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // --- Descriptor pools ---
 								    {
 								        VkDescriptorPoolSize sizes[] = {
 								            {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_MATERIAL_SETS + 256},
 								            {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, MAX_MATERIAL_SETS + 256},
 								        };
 								        VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
 								        ci.maxSets = MAX_MATERIAL_SETS + 256;
 								        ci.poolSizeCount = 2;
 								        ci.pPoolSizes = sizes;
 								        ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
 								        vkCreateDescriptorPool(device, &ci, nullptr, &materialDescPool_);
 								    }
 								    {
 								        VkDescriptorPoolSize sizes[] = {
 								            {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BONE_SETS},
 								        };
 								        VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
 								        ci.maxSets = MAX_BONE_SETS;
 								        ci.poolSizeCount = 1;
 								        ci.pPoolSizes = sizes;
 								        ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
 								        vkCreateDescriptorPool(device, &ci, nullptr, &boneDescPool_);
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // --- Pipeline layouts ---
-												Add centralized anisotropic filtering, fog, and Blinn-Phong specular to all renderers

Anisotropic filtering now queries GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT once
and applies via a single applyAnisotropicFiltering() utility, replacing
hardcoded calls across all renderers. Fog (sky horizon color, 100-600
range) and Blinn-Phong specular highlights are added to WMO, M2, and
character shaders for visual parity with terrain. Shadow sampling
plumbing (sampler2DShadow with 3x3 PCF) is wired into all three shaders
gated by uShadowEnabled, ready for a future shadow map pass.

											
										
										
											2026-02-04 15:05:46 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Main M2 pipeline layout: set 0 = perFrame, set 1 = material, set 2 = bones
 								    // Push constant: mat4 model + vec2 uvOffset + int texCoordSet + int useBones = 80 bytes
 								    {
 								        VkDescriptorSetLayout setLayouts[] = {perFrameLayout, materialSetLayout_, boneSetLayout_};
 								        VkPushConstantRange pushRange{};
 								        pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
 								        pushRange.offset = 0;
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								        pushRange.size = 88; // mat4(64) + vec2(8) + int(4) + int(4) + int(4) + float(4)
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
 								        VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
 								        ci.setLayoutCount = 3;
 								        ci.pSetLayouts = setLayouts;
 								        ci.pushConstantRangeCount = 1;
 								        ci.pPushConstantRanges = &pushRange;
 								        vkCreatePipelineLayout(device, &ci, nullptr, &pipelineLayout_);
 								    }
 								    // Particle pipeline layout: set 0 = perFrame, set 1 = particleTex
 								    // Push constant: vec2 tileCount + int alphaKey (12 bytes)
 								    {
 								        VkDescriptorSetLayout setLayouts[] = {perFrameLayout, particleTexLayout_};
 								        VkPushConstantRange pushRange{};
 								        pushRange.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
 								        pushRange.offset = 0;
 								        pushRange.size = 12; // vec2 + int
 								        VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
 								        ci.setLayoutCount = 2;
 								        ci.pSetLayouts = setLayouts;
 								        ci.pushConstantRangeCount = 1;
 								        ci.pPushConstantRanges = &pushRange;
 								        vkCreatePipelineLayout(device, &ci, nullptr, &particlePipelineLayout_);
 								    }
 								    // Smoke pipeline layout: set 0 = perFrame
 								    // Push constant: float screenHeight (4 bytes)
 								    {
 								        VkDescriptorSetLayout setLayouts[] = {perFrameLayout};
 								        VkPushConstantRange pushRange{};
 								        pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
 								        pushRange.offset = 0;
 								        pushRange.size = 4;
 								        VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
 								        ci.setLayoutCount = 1;
 								        ci.pSetLayouts = setLayouts;
 								        ci.pushConstantRangeCount = 1;
 								        ci.pPushConstantRanges = &pushRange;
 								        vkCreatePipelineLayout(device, &ci, nullptr, &smokePipelineLayout_);
 								    }
 								    // --- Load shaders ---
 								    rendering::VkShaderModule m2Vert, m2Frag;
 								    rendering::VkShaderModule particleVert, particleFrag;
 								    rendering::VkShaderModule smokeVert, smokeFrag;
 								    m2Vert.loadFromFile(device, "assets/shaders/m2.vert.spv");
 								    m2Frag.loadFromFile(device, "assets/shaders/m2.frag.spv");
 								    particleVert.loadFromFile(device, "assets/shaders/m2_particle.vert.spv");
 								    particleFrag.loadFromFile(device, "assets/shaders/m2_particle.frag.spv");
 								    smokeVert.loadFromFile(device, "assets/shaders/m2_smoke.vert.spv");
 								    smokeFrag.loadFromFile(device, "assets/shaders/m2_smoke.frag.spv");
 								    if (!m2Vert.isValid() || !m2Frag.isValid()) {
 								        LOG_ERROR("M2: Missing required shaders, cannot initialize");
 								        return false;
 								    }
-												Add centralized anisotropic filtering, fog, and Blinn-Phong specular to all renderers

Anisotropic filtering now queries GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT once
and applies via a single applyAnisotropicFiltering() utility, replacing
hardcoded calls across all renderers. Fog (sky horizon color, 100-600
range) and Blinn-Phong specular highlights are added to WMO, M2, and
character shaders for visual parity with terrain. Shadow sampling
plumbing (sampler2DShadow with 3x3 PCF) is wired into all three shaders
gated by uShadowEnabled, ready for a future shadow map pass.

											
										
										
											2026-02-04 15:05:46 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();
 								    // --- Build M2 model pipelines ---
 								    // Vertex input: 18 floats = 72 bytes stride
 								    // loc 0: vec3 pos (0), loc 1: vec3 normal (12), loc 2: vec2 uv0 (24),
 								    // loc 5: vec2 uv1 (32), loc 3: vec4 boneWeights (40), loc 4: vec4 boneIndices (56)
 								    VkVertexInputBindingDescription m2Binding{};
 								    m2Binding.binding = 0;
 								    m2Binding.stride = 18 * sizeof(float);
 								    m2Binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
 								    std::vector<VkVertexInputAttributeDescription> m2Attrs = {
 								        {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},                     // position
 								        {1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)},     // normal
 								        {2, 0, VK_FORMAT_R32G32_SFLOAT, 6 * sizeof(float)},        // texCoord0
 								        {5, 0, VK_FORMAT_R32G32_SFLOAT, 8 * sizeof(float)},        // texCoord1
 								        {3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 10 * sizeof(float)}, // boneWeights
 								        {4, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)}, // boneIndices (float)
 								    };
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite) -> VkPipeline {
 								        return PipelineBuilder()
 								            .setShaders(m2Vert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
 								                        m2Frag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
 								            .setVertexInput({m2Binding}, m2Attrs)
 								            .setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
 								            .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
 								            .setDepthTest(true, depthWrite, VK_COMPARE_OP_LESS_OR_EQUAL)
 								            .setColorBlendAttachment(blendState)
-												Add configurable MSAA anti-aliasing, update auth screen and terrain shader

- MSAA: conditional 2-att (off) vs 3-att (on) render pass with auto-resolve
- MSAA: multisampled color+depth images, query max supported sample count
- MSAA: .setMultisample() on all 25+ main-pass pipelines across 17 renderers
- MSAA: recreatePipelines() on every sub-renderer for runtime MSAA changes
- MSAA: Renderer::setMsaaSamples() orchestrates swapchain+pipeline+ImGui rebuild
- MSAA: Anti-Aliasing combo (Off/2x/4x/8x) in Video settings, persisted
- Update auth screen assets and terrain fragment shader

											
										
										
											2026-02-22 02:59:24 -08:00
+								            .setMultisample(vkCtx_->getMsaaSamples())
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            .setLayout(pipelineLayout_)
 								            .setRenderPass(mainPass)
 								            .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
 								            .build(device);
 								    };
 								    opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true);
 								    alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true);
 								    alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false);
 								    additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false);
 								    // --- Build particle pipelines ---
 								    if (particleVert.isValid() && particleFrag.isValid()) {
 								        VkVertexInputBindingDescription pBind{};
 								        pBind.binding = 0;
 								        pBind.stride = 9 * sizeof(float); // pos3 + color4 + size1 + tile1
 								        pBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
 								        std::vector<VkVertexInputAttributeDescription> pAttrs = {
 								            {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},                    // position
 								            {1, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 3 * sizeof(float)}, // color
 								            {2, 0, VK_FORMAT_R32_SFLOAT, 7 * sizeof(float)},          // size
 								            {3, 0, VK_FORMAT_R32_SFLOAT, 8 * sizeof(float)},          // tile
 								        };
 								        auto buildParticlePipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
 								            return PipelineBuilder()
 								                .setShaders(particleVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
 								                            particleFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
 								                .setVertexInput({pBind}, pAttrs)
 								                .setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
 								                .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
 								                .setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
 								                .setColorBlendAttachment(blend)
-												Add configurable MSAA anti-aliasing, update auth screen and terrain shader

- MSAA: conditional 2-att (off) vs 3-att (on) render pass with auto-resolve
- MSAA: multisampled color+depth images, query max supported sample count
- MSAA: .setMultisample() on all 25+ main-pass pipelines across 17 renderers
- MSAA: recreatePipelines() on every sub-renderer for runtime MSAA changes
- MSAA: Renderer::setMsaaSamples() orchestrates swapchain+pipeline+ImGui rebuild
- MSAA: Anti-Aliasing combo (Off/2x/4x/8x) in Video settings, persisted
- Update auth screen assets and terrain fragment shader

											
										
										
											2026-02-22 02:59:24 -08:00
+								                .setMultisample(vkCtx_->getMsaaSamples())
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								                .setLayout(particlePipelineLayout_)
 								                .setRenderPass(mainPass)
 								                .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
 								                .build(device);
 								        };
 								        particlePipeline_ = buildParticlePipeline(PipelineBuilder::blendAlpha());
 								        particleAdditivePipeline_ = buildParticlePipeline(PipelineBuilder::blendAdditive());
 								    }
 								    // --- Build smoke pipeline ---
 								    if (smokeVert.isValid() && smokeFrag.isValid()) {
 								        VkVertexInputBindingDescription sBind{};
 								        sBind.binding = 0;
 								        sBind.stride = 6 * sizeof(float); // pos3 + lifeRatio1 + size1 + isSpark1
 								        sBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
 								        std::vector<VkVertexInputAttributeDescription> sAttrs = {
 								            {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},           // position
 								            {1, 0, VK_FORMAT_R32_SFLOAT, 3 * sizeof(float)}, // lifeRatio
 								            {2, 0, VK_FORMAT_R32_SFLOAT, 4 * sizeof(float)}, // size
 								            {3, 0, VK_FORMAT_R32_SFLOAT, 5 * sizeof(float)}, // isSpark
 								        };
 								        smokePipeline_ = PipelineBuilder()
 								            .setShaders(smokeVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
 								                        smokeFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
 								            .setVertexInput({sBind}, sAttrs)
 								            .setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
 								            .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
 								            .setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
 								            .setColorBlendAttachment(PipelineBuilder::blendAlpha())
-												Add configurable MSAA anti-aliasing, update auth screen and terrain shader

- MSAA: conditional 2-att (off) vs 3-att (on) render pass with auto-resolve
- MSAA: multisampled color+depth images, query max supported sample count
- MSAA: .setMultisample() on all 25+ main-pass pipelines across 17 renderers
- MSAA: recreatePipelines() on every sub-renderer for runtime MSAA changes
- MSAA: Renderer::setMsaaSamples() orchestrates swapchain+pipeline+ImGui rebuild
- MSAA: Anti-Aliasing combo (Off/2x/4x/8x) in Video settings, persisted
- Update auth screen assets and terrain fragment shader

											
										
										
											2026-02-22 02:59:24 -08:00
+								            .setMultisample(vkCtx_->getMsaaSamples())
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            .setLayout(smokePipelineLayout_)
 								            .setRenderPass(mainPass)
 								            .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
 								            .build(device);
 								    }
 								    // Clean up shader modules
 								    m2Vert.destroy(); m2Frag.destroy();
 								    particleVert.destroy(); particleFrag.destroy();
 								    smokeVert.destroy(); smokeFrag.destroy();
 								    // --- Create dynamic particle buffers (mapped for CPU writes) ---
 								    {
 								        VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
 								        bci.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
 								        VmaAllocationCreateInfo aci{};
 								        aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
 								        aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
 								        VmaAllocationInfo allocInfo{};
 								        // Smoke particle buffer
 								        bci.size = MAX_SMOKE_PARTICLES * 6 * sizeof(float);
 								        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &smokeVB_, &smokeVBAlloc_, &allocInfo);
 								        smokeVBMapped_ = allocInfo.pMappedData;
 								        // M2 particle buffer
 								        bci.size = MAX_M2_PARTICLES * 9 * sizeof(float);
 								        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &m2ParticleVB_, &m2ParticleVBAlloc_, &allocInfo);
 								        m2ParticleVBMapped_ = allocInfo.pMappedData;
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
 								        // Dedicated glow sprite buffer (separate from particle VB to avoid data race)
 								        bci.size = MAX_GLOW_SPRITES * 9 * sizeof(float);
 								        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &glowVB_, &glowVBAlloc_, &allocInfo);
 								        glowVBMapped_ = allocInfo.pMappedData;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // --- Create white fallback texture ---
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								    {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        uint8_t white[] = {255, 255, 255, 255};
 								        whiteTexture_ = std::make_unique<VkTexture>();
 								        whiteTexture_->upload(*vkCtx_, white, 1, 1, VK_FORMAT_R8G8B8A8_UNORM);
 								        whiteTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_REPEAT);
 								    }
-												Fix quest flow regressions, tooltip compare stats, and M2 alpha-key handling

											
										
										
											2026-02-19 02:27:01 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // --- Generate soft radial gradient glow texture ---
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								    {
 								        static constexpr int SZ = 64;
 								        std::vector<uint8_t> px(SZ * SZ * 4);
 								        float half = SZ / 2.0f;
 								        for (int y = 0; y < SZ; y++) {
 								            for (int x = 0; x < SZ; x++) {
 								                float dx = (x + 0.5f - half) / half;
 								                float dy = (y + 0.5f - half) / half;
 								                float r = std::sqrt(dx * dx + dy * dy);
 								                float a = std::max(0.0f, 1.0f - r);
 								                a = a * a; // Quadratic falloff
 								                int idx = (y * SZ + x) * 4;
 								                px[idx + 0] = 255;
 								                px[idx + 1] = 255;
 								                px[idx + 2] = 255;
 								                px[idx + 3] = static_cast<uint8_t>(a * 255);
 								            }
 								        }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        glowTexture_ = std::make_unique<VkTexture>();
 								        glowTexture_->upload(*vkCtx_, px.data(), SZ, SZ, VK_FORMAT_R8G8B8A8_UNORM);
 								        glowTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE);
-												Optimize M2/WMO render loop: cache UBO pointers, precompute model flags, reduce rebinds

- Cache material UBO mapped pointers at creation time, eliminating
  per-batch vmaGetAllocationInfo() calls in the hot render path
- Precompute foliage/elven/lantern/kobold model name classifications
  at load time instead of per-instance string operations every frame
- Remove redundant descriptor set and push constant rebinds on WMO
  pipeline switches (preserved across compatible layouts)
- Pre-allocate glow sprite descriptor set once at init instead of
  allocating from the pool every frame

											
										
										
											2026-02-23 06:06:24 -08:00
+								        // Pre-allocate glow texture descriptor set (reused every frame)
 								        if (particleTexLayout_ && materialDescPool_) {
 								            VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
 								            ai.descriptorPool = materialDescPool_;
 								            ai.descriptorSetCount = 1;
 								            ai.pSetLayouts = &particleTexLayout_;
 								            if (vkAllocateDescriptorSets(device, &ai, &glowTexDescSet_) == VK_SUCCESS) {
 								                VkDescriptorImageInfo imgInfo = glowTexture_->descriptorInfo();
 								                VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
 								                write.dstSet = glowTexDescSet_;
 								                write.dstBinding = 0;
 								                write.descriptorCount = 1;
 								                write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								                write.pImageInfo = &imgInfo;
 								                vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
 								            }
 								        }
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								    }
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								    textureCacheBudgetBytes_ =
-												Increase texture cache budgets to 4GB and cap repetitive warnings

Raise all texture cache defaults from 1GB to 4GB to reduce rejections.
Cap cache-full warnings (texture + model) to 3 messages per renderer,
and cap update block parse errors to 5 messages.

											
										
										
											2026-02-23 04:32:58 -08:00
+								        envSizeMBOrDefault("WOWEE_M2_TEX_CACHE_MB", 4096) * 1024ull * 1024ull;
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								    modelCacheLimit_ = envSizeMBOrDefault("WOWEE_M2_MODEL_LIMIT", 6000);
 								    LOG_INFO("M2 texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB");
 								    LOG_INFO("M2 model cache limit: ", modelCacheLimit_);
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    LOG_INFO("M2 renderer initialized (Vulkan)");
-												Fix WMO visibility culling and renderer initialization guards

											
										
										
											2026-02-18 22:41:05 -08:00
+								    initialized_ = true;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    return true;
 								}
 								void M2Renderer::shutdown() {
 								    LOG_INFO("Shutting down M2 renderer...");
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    if (!vkCtx_) return;
 								    vkDeviceWaitIdle(vkCtx_->getDevice());
 								    VkDevice device = vkCtx_->getDevice();
 								    VmaAllocator alloc = vkCtx_->getAllocator();
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Delete model GPU resources
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    for (auto& [id, model] : models) {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        destroyModelGPU(model);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    }
 								    models.clear();
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
 								    // Destroy instance bone buffers
 								    for (auto& inst : instances) {
 								        destroyInstanceBones(inst);
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    instances.clear();
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    spatialGrid.clear();
 								    instanceIndexById.clear();
-												Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths

											
										
										
											2026-03-07 11:59:19 -08:00
+								    instanceDedupMap_.clear();
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
 								    // Delete cached textures
 								    textureCache.clear();
-												Bound MPQ archive lookup cache; remove always-on composite dumps; track texture cache entries

											
										
										
											2026-02-12 16:29:36 -08:00
+								    textureCacheBytes_ = 0;
 								    textureCacheCounter_ = 0;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    textureHasAlphaByPtr_.clear();
 								    textureColorKeyBlackByPtr_.clear();
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								    failedTextureCache_.clear();
 								    loggedTextureLoadFails_.clear();
 								    textureBudgetRejectWarnings_ = 0;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    whiteTexture_.reset();
 								    glowTexture_.reset();
 								    // Clean up particle buffers
 								    if (smokeVB_) { vmaDestroyBuffer(alloc, smokeVB_, smokeVBAlloc_); smokeVB_ = VK_NULL_HANDLE; }
 								    if (m2ParticleVB_) { vmaDestroyBuffer(alloc, m2ParticleVB_, m2ParticleVBAlloc_); m2ParticleVB_ = VK_NULL_HANDLE; }
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								    if (glowVB_) { vmaDestroyBuffer(alloc, glowVB_, glowVBAlloc_); glowVB_ = VK_NULL_HANDLE; }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    smokeParticles.clear();
 								    // Destroy pipelines
 								    auto destroyPipeline = [&](VkPipeline& p) { if (p) { vkDestroyPipeline(device, p, nullptr); p = VK_NULL_HANDLE; } };
 								    destroyPipeline(opaquePipeline_);
 								    destroyPipeline(alphaTestPipeline_);
 								    destroyPipeline(alphaPipeline_);
 								    destroyPipeline(additivePipeline_);
 								    destroyPipeline(particlePipeline_);
 								    destroyPipeline(particleAdditivePipeline_);
 								    destroyPipeline(smokePipeline_);
 								    if (pipelineLayout_) { vkDestroyPipelineLayout(device, pipelineLayout_, nullptr); pipelineLayout_ = VK_NULL_HANDLE; }
 								    if (particlePipelineLayout_) { vkDestroyPipelineLayout(device, particlePipelineLayout_, nullptr); particlePipelineLayout_ = VK_NULL_HANDLE; }
 								    if (smokePipelineLayout_) { vkDestroyPipelineLayout(device, smokePipelineLayout_, nullptr); smokePipelineLayout_ = VK_NULL_HANDLE; }
 								    // Destroy descriptor pools and layouts
 								    if (materialDescPool_) { vkDestroyDescriptorPool(device, materialDescPool_, nullptr); materialDescPool_ = VK_NULL_HANDLE; }
 								    if (boneDescPool_) { vkDestroyDescriptorPool(device, boneDescPool_, nullptr); boneDescPool_ = VK_NULL_HANDLE; }
 								    if (materialSetLayout_) { vkDestroyDescriptorSetLayout(device, materialSetLayout_, nullptr); materialSetLayout_ = VK_NULL_HANDLE; }
 								    if (boneSetLayout_) { vkDestroyDescriptorSetLayout(device, boneSetLayout_, nullptr); boneSetLayout_ = VK_NULL_HANDLE; }
 								    if (particleTexLayout_) { vkDestroyDescriptorSetLayout(device, particleTexLayout_, nullptr); particleTexLayout_ = VK_NULL_HANDLE; }
 								    // Destroy shadow resources
 								    destroyPipeline(shadowPipeline_);
 								    if (shadowPipelineLayout_) { vkDestroyPipelineLayout(device, shadowPipelineLayout_, nullptr); shadowPipelineLayout_ = VK_NULL_HANDLE; }
-												Add alpha-tested foliage shadows: per-batch texture binding and shadow map receiving

Shadow casting: foliage batches now bind their actual texture in the shadow
pass with alpha testing, producing leaf-shaped shadows instead of solid cards.
Uses a per-frame resettable descriptor pool for texture sets.

Shadow receiving: foliage fragments now sample the shadow map with PCF
instead of using a flat constant darkening.

											
										
										
											2026-02-23 05:55:03 -08:00
+								    if (shadowTexPool_) { vkDestroyDescriptorPool(device, shadowTexPool_, nullptr); shadowTexPool_ = VK_NULL_HANDLE; }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    if (shadowParamsPool_) { vkDestroyDescriptorPool(device, shadowParamsPool_, nullptr); shadowParamsPool_ = VK_NULL_HANDLE; }
 								    if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; }
 								    if (shadowParamsUBO_) { vmaDestroyBuffer(alloc, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; }
 								    initialized_ = false;
 								}
 								void M2Renderer::destroyModelGPU(M2ModelGPU& model) {
 								    if (!vkCtx_) return;
 								    VmaAllocator alloc = vkCtx_->getAllocator();
 								    if (model.vertexBuffer) { vmaDestroyBuffer(alloc, model.vertexBuffer, model.vertexAlloc); model.vertexBuffer = VK_NULL_HANDLE; }
 								    if (model.indexBuffer) { vmaDestroyBuffer(alloc, model.indexBuffer, model.indexAlloc); model.indexBuffer = VK_NULL_HANDLE; }
 								    for (auto& batch : model.batches) {
 								        if (batch.materialUBO) { vmaDestroyBuffer(alloc, batch.materialUBO, batch.materialUBOAlloc); batch.materialUBO = VK_NULL_HANDLE; }
 								        // materialSet freed when pool is reset/destroyed
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								    }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								}
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								void M2Renderer::destroyInstanceBones(M2Instance& inst) {
 								    if (!vkCtx_) return;
 								    VmaAllocator alloc = vkCtx_->getAllocator();
 								    for (int i = 0; i < 2; i++) {
 								        if (inst.boneBuffer[i]) {
 								            vmaDestroyBuffer(alloc, inst.boneBuffer[i], inst.boneAlloc[i]);
 								            inst.boneBuffer[i] = VK_NULL_HANDLE;
 								            inst.boneMapped[i] = nullptr;
 								        }
 								        // boneSet freed when pool is reset/destroyed
 								    }
 								}
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								VkDescriptorSet M2Renderer::allocateMaterialSet() {
 								    VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
 								    ai.descriptorPool = materialDescPool_;
 								    ai.descriptorSetCount = 1;
 								    ai.pSetLayouts = &materialSetLayout_;
 								    VkDescriptorSet set = VK_NULL_HANDLE;
 								    vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
 								    return set;
 								}
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								VkDescriptorSet M2Renderer::allocateBoneSet() {
 								    VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
 								    ai.descriptorPool = boneDescPool_;
 								    ai.descriptorSetCount = 1;
 								    ai.pSetLayouts = &boneSetLayout_;
 								    VkDescriptorSet set = VK_NULL_HANDLE;
 								    vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
 								    return set;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								}
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								// ---------------------------------------------------------------------------
 								// M2 collision mesh: build spatial grid + classify triangles
 								// ---------------------------------------------------------------------------
 								void M2ModelGPU::CollisionMesh::build() {
 								    if (indices.size() < 3 || vertices.empty()) return;
 								    triCount = static_cast<uint32_t>(indices.size() / 3);
 								    // Bounding box for grid
 								    glm::vec3 bmin(std::numeric_limits<float>::max());
 								    glm::vec3 bmax(-std::numeric_limits<float>::max());
 								    for (const auto& v : vertices) {
 								        bmin = glm::min(bmin, v);
 								        bmax = glm::max(bmax, v);
 								    }
 								    gridOrigin = glm::vec2(bmin.x, bmin.y);
 								    gridCellsX = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.x - bmin.x) / CELL_SIZE))));
 								    gridCellsY = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.y - bmin.y) / CELL_SIZE))));
 								    cellFloorTris.resize(gridCellsX * gridCellsY);
 								    cellWallTris.resize(gridCellsX * gridCellsY);
 								    triBounds.resize(triCount);
 								    for (uint32_t ti = 0; ti < triCount; ti++) {
 								        uint16_t i0 = indices[ti * 3];
 								        uint16_t i1 = indices[ti * 3 + 1];
 								        uint16_t i2 = indices[ti * 3 + 2];
 								        if (i0 >= vertices.size() || i1 >= vertices.size() || i2 >= vertices.size()) continue;
 								        const auto& v0 = vertices[i0];
 								        const auto& v1 = vertices[i1];
 								        const auto& v2 = vertices[i2];
 								        triBounds[ti].minZ = std::min({v0.z, v1.z, v2.z});
 								        triBounds[ti].maxZ = std::max({v0.z, v1.z, v2.z});
 								        glm::vec3 normal = glm::cross(v1 - v0, v2 - v0);
 								        float normalLen = glm::length(normal);
 								        float absNz = (normalLen > 0.001f) ? std::abs(normal.z / normalLen) : 0.0f;
-												Fix stair approach fall-through and relax steep slope climbing

Relaxed walkable slope threshold from 0.40 to 0.35 (~70° max) for
steeper stair climbing. Tightened WMO floor cache above-tolerance
back to 0.25 units to prevent cached stair landing from overriding
approach floor. Added M2 floor preference for ship decks to prevent
falling through to water below.

											
										
										
											2026-02-08 20:31:00 -08:00
+								        bool isFloor = (absNz >= 0.35f);  // ~70° max slope (relaxed for steep stairs)
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								        bool isWall  = (absNz < 0.65f);
 								        float triMinX = std::min({v0.x, v1.x, v2.x});
 								        float triMaxX = std::max({v0.x, v1.x, v2.x});
 								        float triMinY = std::min({v0.y, v1.y, v2.y});
 								        float triMaxY = std::max({v0.y, v1.y, v2.y});
 								        int cxMin = std::clamp(static_cast<int>((triMinX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
 								        int cxMax = std::clamp(static_cast<int>((triMaxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
 								        int cyMin = std::clamp(static_cast<int>((triMinY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
 								        int cyMax = std::clamp(static_cast<int>((triMaxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
 								        for (int cy = cyMin; cy <= cyMax; cy++) {
 								            for (int cx = cxMin; cx <= cxMax; cx++) {
 								                int ci = cy * gridCellsX + cx;
 								                if (isFloor) cellFloorTris[ci].push_back(ti);
 								                if (isWall)  cellWallTris[ci].push_back(ti);
 								            }
 								        }
 								    }
 								}
 								void M2ModelGPU::CollisionMesh::getFloorTrisInRange(
 								        float minX, float minY, float maxX, float maxY,
 								        std::vector<uint32_t>& out) const {
 								    out.clear();
 								    if (gridCellsX == 0 || gridCellsY == 0) return;
 								    int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
 								    int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
 								    int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
 								    int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
 								    for (int cy = cyMin; cy <= cyMax; cy++) {
 								        for (int cx = cxMin; cx <= cxMax; cx++) {
 								            const auto& cell = cellFloorTris[cy * gridCellsX + cx];
 								            out.insert(out.end(), cell.begin(), cell.end());
 								        }
 								    }
 								    std::sort(out.begin(), out.end());
 								    out.erase(std::unique(out.begin(), out.end()), out.end());
 								}
 								void M2ModelGPU::CollisionMesh::getWallTrisInRange(
 								        float minX, float minY, float maxX, float maxY,
 								        std::vector<uint32_t>& out) const {
 								    out.clear();
 								    if (gridCellsX == 0 || gridCellsY == 0) return;
 								    int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
 								    int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
 								    int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
 								    int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
 								    for (int cy = cyMin; cy <= cyMax; cy++) {
 								        for (int cx = cxMin; cx <= cxMax; cx++) {
 								            const auto& cell = cellWallTris[cy * gridCellsX + cx];
 								            out.insert(out.end(), cell.begin(), cell.end());
 								        }
 								    }
 								    std::sort(out.begin(), out.end());
 								    out.erase(std::unique(out.begin(), out.end()), out.end());
 								}
-												Optimize M2 and terrain rendering for 60fps target

Implements aggressive performance optimizations to improve frame rate from 29fps to 40fps:

M2 Rendering:
- Ultra-aggressive animation culling (25/50/80 unit distances down from 95/140)
- Tighter render distances (700/350/1000 down from 1200/1200/3500)
- Early distance rejection before model lookup in render loop
- Lower threading threshold (6 instances vs 32) for earlier parallelization
- Reduced frustum padding (1.5x vs 2.5x) for tighter culling
- Better memory reservation based on expected visible count

Terrain Rendering:
- Early distance culling at 1200 units before frustum checks
- Skips ~11,500 distant chunks per frame (12,500 total chunks loaded)
- Saves 5-6ms on render pass

Performance Impact:
- Render time: 20ms → 14-15ms (30% faster)
- Frame rate: 29fps → 40fps (+11fps)
- Total savings: ~9ms per frame

											
										
										
											2026-02-10 17:23:41 -08:00
+								bool M2Renderer::hasModel(uint32_t modelId) const {
 								    return models.find(modelId) != models.end();
 								}
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) {
 								    if (models.find(modelId) != models.end()) {
 								        // Already loaded
 								        return true;
 								    }
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								    if (models.size() >= modelCacheLimit_) {
-												Increase texture cache budgets to 4GB and cap repetitive warnings

Raise all texture cache defaults from 1GB to 4GB to reduce rejections.
Cap cache-full warnings (texture + model) to 3 messages per renderer,
and cap update block parse errors to 5 messages.

											
										
										
											2026-02-23 04:32:58 -08:00
+								        if (modelLimitRejectWarnings_ < 3) {
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								            LOG_WARNING("M2 model cache full (", models.size(), "/", modelCacheLimit_,
 								                        "), skipping model load: id=", modelId, " name=", model.name);
 								        }
 								        ++modelLimitRejectWarnings_;
 								        return false;
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								    bool hasGeometry = !model.vertices.empty() && !model.indices.empty();
 								    bool hasParticles = !model.particleEmitters.empty();
 								    if (!hasGeometry && !hasParticles) {
 								        LOG_WARNING("M2 model has no geometry and no particles: ", model.name);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								        return false;
 								    }
 								    M2ModelGPU gpuModel;
 								    gpuModel.name = model.name;
-												Make InvisibleTrap objects invisible and non-collidable

Event objects like Fire Festival Fury Trap and Mercutio Post use
SpellObject_InvisibleTrap.m2 models which were rendering as white
tiles using WHITE1.BLP texture. These are meant to be invisible
spell trigger objects that should not obstruct player movement.

Changes:
- Added isInvisibleTrap flag to M2ModelGPU struct
- Detect models with "invisibletrap" in name during loading
- Skip rendering invisible trap instances in render loop
- Disable all collision checks (floor/wall/occlusion) for invisible traps
- Objects remain functional for spell casting but are now invisible

											
										
										
											2026-02-09 22:31:36 -08:00
 								    // Detect invisible trap models (event objects that should not render or collide)
 								    std::string lowerName = model.name;
 								    std::transform(lowerName.begin(), lowerName.end(), lowerName.begin(),
 								                   [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
 								    bool isInvisibleTrap = (lowerName.find("invisibletrap") != std::string::npos);
 								    gpuModel.isInvisibleTrap = isInvisibleTrap;
 								    if (isInvisibleTrap) {
 								        LOG_INFO("Loading InvisibleTrap model: ", model.name, " (will be invisible, no collision)");
 								    }
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								    // Use tight bounds from actual vertices for collision/camera occlusion.
 								    // Header bounds in some M2s are overly conservative.
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								    glm::vec3 tightMin(0.0f);
 								    glm::vec3 tightMax(0.0f);
 								    if (hasGeometry) {
 								        tightMin = glm::vec3(std::numeric_limits<float>::max());
 								        tightMax = glm::vec3(-std::numeric_limits<float>::max());
 								        for (const auto& v : model.vertices) {
 								            tightMin = glm::min(tightMin, v.position);
 								            tightMax = glm::max(tightMax, v.position);
 								        }
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								    }
-												Improve shadow stability and reduce foliage pop-in

											
										
										
											2026-02-04 16:30:24 -08:00
+								    bool foliageOrTreeLike = false;
-												Fix movement, mounts, and terrain seams

											
										
										
											2026-02-07 20:24:25 -08:00
+								    bool chestName = false;
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								    bool groundDetailModel = false;
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								    {
 								        std::string lowerName = model.name;
 								        std::transform(lowerName.begin(), lowerName.end(), lowerName.begin(),
 								                       [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
 								        gpuModel.collisionSteppedFountain = (lowerName.find("fountain") != std::string::npos);
 								        glm::vec3 dims = tightMax - tightMin;
 								        float horiz = std::max(dims.x, dims.y);
 								        float vert = std::max(0.0f, dims.z);
 								        bool lowWideShape = (horiz > 1.4f && vert > 0.2f && vert < horiz * 0.70f);
 								        bool likelyCurbName =
 								            (lowerName.find("planter") != std::string::npos) ||
 								            (lowerName.find("curb") != std::string::npos) ||
 								            (lowerName.find("base") != std::string::npos) ||
 								            (lowerName.find("ring") != std::string::npos) ||
 								            (lowerName.find("well") != std::string::npos);
 								        bool knownStormwindPlanter =
 								            (lowerName.find("stormwindplanter") != std::string::npos) ||
 								            (lowerName.find("stormwindwindowplanter") != std::string::npos);
 								        bool lowPlatformShape = (horiz > 1.8f && vert > 0.2f && vert < 1.8f);
-												Improve targeting, minimap, and bridge collisions

											
										
										
											2026-02-07 20:51:53 -08:00
+								        bool bridgeName =
 								            (lowerName.find("bridge") != std::string::npos) ||
 								            (lowerName.find("plank") != std::string::npos) ||
 								            (lowerName.find("walkway") != std::string::npos);
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								        gpuModel.collisionSteppedLowPlatform = (!gpuModel.collisionSteppedFountain) &&
 								                                               (knownStormwindPlanter ||
-												Improve targeting, minimap, and bridge collisions

											
										
										
											2026-02-07 20:51:53 -08:00
+								                                                bridgeName ||
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								                                                (likelyCurbName && (lowPlatformShape || lowWideShape)));
-												Improve targeting, minimap, and bridge collisions

											
										
										
											2026-02-07 20:51:53 -08:00
+								        gpuModel.collisionBridge = bridgeName;
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
 								        bool isPlanter = (lowerName.find("planter") != std::string::npos);
-												Improve performance and tune ramp/planter collision behavior

											
										
										
											2026-02-03 17:21:04 -08:00
+								        gpuModel.collisionPlanter = isPlanter;
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								        bool statueName =
 								            (lowerName.find("statue") != std::string::npos) ||
 								            (lowerName.find("monument") != std::string::npos) ||
 								            (lowerName.find("sculpture") != std::string::npos);
 								        gpuModel.collisionStatue = statueName;
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        bool smallSolidPropName =
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								            statueName ||
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								            (lowerName.find("crate") != std::string::npos) ||
 								            (lowerName.find("box") != std::string::npos) ||
 								            (lowerName.find("chest") != std::string::npos) ||
 								            (lowerName.find("barrel") != std::string::npos);
-												Fix movement, mounts, and terrain seams

											
										
										
											2026-02-07 20:24:25 -08:00
+								        chestName = (lowerName.find("chest") != std::string::npos);
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								        bool foliageName =
 								            (lowerName.find("bush") != std::string::npos) ||
 								            (lowerName.find("grass") != std::string::npos) ||
-												Fix movement, mounts, and terrain seams

											
										
										
											2026-02-07 20:24:25 -08:00
+								            (lowerName.find("drygrass") != std::string::npos) ||
 								            (lowerName.find("dry_grass") != std::string::npos) ||
 								            (lowerName.find("dry-grass") != std::string::npos) ||
 								            (lowerName.find("deadgrass") != std::string::npos) ||
 								            (lowerName.find("dead_grass") != std::string::npos) ||
 								            (lowerName.find("dead-grass") != std::string::npos) ||
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								            ((lowerName.find("plant") != std::string::npos) && !isPlanter) ||
 								            (lowerName.find("flower") != std::string::npos) ||
 								            (lowerName.find("shrub") != std::string::npos) ||
 								            (lowerName.find("fern") != std::string::npos) ||
-												Add loading screen, fix tree/foliage collision, jump buffering, and fence rotation

- Loading screen stays visible until all terrain tiles finish streaming;
  character spawns only after terrain is loaded and Z-snapped to ground
- Reduce tree trunk collision bounds (5% of canopy, capped at 5.0) and
  make all small/medium trees, bushes, lily pads, and foliage walkthrough
- Add jump input buffering (150ms) and coyote time (100ms) for responsive jumps
- Fix fence orientation by adding +180° heading rotation
- Increase terrain load radius from 1 to 2 (5x5 tile grid)
- Add hearthstone callback for single-player camera reset

											
										
										
											2026-02-04 13:29:27 -08:00
+								            (lowerName.find("vine") != std::string::npos) ||
 								            (lowerName.find("lily") != std::string::npos) ||
-												Make mushrooms non-blocking foliage

											
										
										
											2026-02-07 19:45:09 -08:00
+								            (lowerName.find("weed") != std::string::npos) ||
-												Improve targeting, minimap, and bridge collisions

											
										
										
											2026-02-07 20:51:53 -08:00
+								            (lowerName.find("wheat") != std::string::npos) ||
-												Fix movement, mounts, and terrain seams

											
										
										
											2026-02-07 20:24:25 -08:00
+								            (lowerName.find("pumpkin") != std::string::npos) ||
 								            (lowerName.find("firefly") != std::string::npos) ||
 								            (lowerName.find("fireflies") != std::string::npos) ||
 								            (lowerName.find("fireflys") != std::string::npos) ||
-												Make mushrooms non-blocking foliage

											
										
										
											2026-02-07 19:45:09 -08:00
+								            (lowerName.find("mushroom") != std::string::npos) ||
 								            (lowerName.find("fungus") != std::string::npos) ||
-												Reduce collision trapping and improve /unstuck

Add root, branch, thorn, moss, ivy, and other natural doodads to the no-block foliage list. /unstuck now moves the player 5 units forward instead of resetting in place.

											
										
										
											2026-02-07 23:34:28 -08:00
+								            (lowerName.find("toadstool") != std::string::npos) ||
 								            (lowerName.find("root") != std::string::npos) ||
 								            (lowerName.find("branch") != std::string::npos) ||
 								            (lowerName.find("thorn") != std::string::npos) ||
 								            (lowerName.find("moss") != std::string::npos) ||
 								            (lowerName.find("ivy") != std::string::npos) ||
 								            (lowerName.find("seaweed") != std::string::npos) ||
 								            (lowerName.find("kelp") != std::string::npos) ||
 								            (lowerName.find("cattail") != std::string::npos) ||
-												Fix mount sounds, grey WMO meshes, taxi landing, tree animations, and classic dismount

- Per-family mount sounds (kodo, tallstrider, mechanostrider, etc.) detected from M2 model path
- Skip WMO groups with SHOW_SKYBOX flag or all-untextured batches (grey mesh in Orgrimmar)
- Freeze physics during taxi landing until terrain loads to prevent falling through void
- Disable bone animations on tropical vegetation (palm, bamboo, banana, etc.) to fix wiggling
- Snap player to final taxi waypoint on flight completion
- Extract mount aura spell ID from classic UNIT_FIELD_AURAS for CMSG_CANCEL_AURA dismount
- Increase /unstuck forward nudge to 5 units

											
										
										
											2026-02-14 21:04:20 -08:00
+								            (lowerName.find("reed") != std::string::npos) ||
 								            (lowerName.find("palm") != std::string::npos) ||
 								            (lowerName.find("bamboo") != std::string::npos) ||
 								            (lowerName.find("banana") != std::string::npos) ||
 								            (lowerName.find("coconut") != std::string::npos) ||
 								            (lowerName.find("canopy") != std::string::npos) ||
 								            (lowerName.find("hedge") != std::string::npos) ||
 								            (lowerName.find("cactus") != std::string::npos) ||
 								            (lowerName.find("leaf") != std::string::npos) ||
 								            (lowerName.find("leaves") != std::string::npos) ||
 								            (lowerName.find("stalk") != std::string::npos) ||
 								            (lowerName.find("corn") != std::string::npos) ||
 								            (lowerName.find("crop") != std::string::npos) ||
 								            (lowerName.find("hay") != std::string::npos) ||
 								            (lowerName.find("frond") != std::string::npos) ||
 								            (lowerName.find("algae") != std::string::npos) ||
 								            (lowerName.find("coral") != std::string::npos);
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								        bool treeLike = (lowerName.find("tree") != std::string::npos);
-												Improve shadow stability and reduce foliage pop-in

											
										
										
											2026-02-04 16:30:24 -08:00
+								        foliageOrTreeLike = (foliageName || treeLike);
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								        groundDetailModel =
 								            (lowerName.find("\\nodxt\\detail\\") != std::string::npos) ||
 								            (lowerName.find("\\detail\\") != std::string::npos);
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								        bool hardTreePart =
 								            (lowerName.find("trunk") != std::string::npos) ||
 								            (lowerName.find("stump") != std::string::npos) ||
 								            (lowerName.find("log") != std::string::npos);
-												Proactive tile streaming, faster finalization, tree trunk collision

- Re-check for unloaded tiles when workers are idle (no tile boundary needed)
- Increase M2 upload budget 4→16 and WMO 1→4 per frame when not under pressure
- Lower tree collision threshold from 40 to 6 units so large trees block movement

											
										
										
											2026-03-07 22:35:18 -08:00
+								        // Trees with visible trunks get collision. Threshold: canopy wider than 6
 								        // model units AND taller than 4 units (filters out small bushes/saplings).
 								        bool treeWithTrunk = treeLike && !hardTreePart && !foliageName && horiz > 6.0f && vert > 4.0f;
-												Add loading screen, fix tree/foliage collision, jump buffering, and fence rotation

- Loading screen stays visible until all terrain tiles finish streaming;
  character spawns only after terrain is loaded and Z-snapped to ground
- Reduce tree trunk collision bounds (5% of canopy, capped at 5.0) and
  make all small/medium trees, bushes, lily pads, and foliage walkthrough
- Add jump input buffering (150ms) and coyote time (100ms) for responsive jumps
- Fix fence orientation by adding +180° heading rotation
- Increase terrain load radius from 1 to 2 (5x5 tile grid)
- Add hearthstone callback for single-player camera reset

											
										
										
											2026-02-04 13:29:27 -08:00
+								        bool softTree = treeLike && !hardTreePart && !treeWithTrunk;
-												Improve performance and tune ramp/planter collision behavior

											
										
										
											2026-02-03 17:21:04 -08:00
+								        bool forceSolidCurb = gpuModel.collisionSteppedLowPlatform || knownStormwindPlanter || likelyCurbName || gpuModel.collisionPlanter;
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        bool narrowVerticalName =
 								            (lowerName.find("lamp") != std::string::npos) ||
 								            (lowerName.find("lantern") != std::string::npos) ||
 								            (lowerName.find("post") != std::string::npos) ||
 								            (lowerName.find("pole") != std::string::npos);
 								        bool narrowVerticalShape =
 								            (horiz > 0.12f && horiz < 2.0f && vert > 2.2f && vert > horiz * 1.8f);
-												Add loading screen, fix tree/foliage collision, jump buffering, and fence rotation

- Loading screen stays visible until all terrain tiles finish streaming;
  character spawns only after terrain is loaded and Z-snapped to ground
- Reduce tree trunk collision bounds (5% of canopy, capped at 5.0) and
  make all small/medium trees, bushes, lily pads, and foliage walkthrough
- Add jump input buffering (150ms) and coyote time (100ms) for responsive jumps
- Fix fence orientation by adding +180° heading rotation
- Increase terrain load radius from 1 to 2 (5x5 tile grid)
- Add hearthstone callback for single-player camera reset

											
										
										
											2026-02-04 13:29:27 -08:00
+								        gpuModel.collisionTreeTrunk = treeWithTrunk;
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        gpuModel.collisionNarrowVerticalProp =
 								            !gpuModel.collisionSteppedFountain &&
 								            !gpuModel.collisionSteppedLowPlatform &&
 								            (narrowVerticalName || narrowVerticalShape);
 								        bool genericSolidPropShape =
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								            (horiz > 0.6f && horiz < 6.0f && vert > 0.30f && vert < 4.0f && vert > horiz * 0.16f) ||
 								            statueName;
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        bool curbLikeName =
 								            (lowerName.find("curb") != std::string::npos) ||
 								            (lowerName.find("planter") != std::string::npos) ||
 								            (lowerName.find("ring") != std::string::npos) ||
 								            (lowerName.find("well") != std::string::npos) ||
 								            (lowerName.find("base") != std::string::npos);
 								        bool lowPlatformLikeShape = lowWideShape || lowPlatformShape;
-												Disable collision for carpet and rug M2 models

- Add carpet/rug name detection in model loading
- Set collisionNoBlock flag for carpet and rug models
- Prevents slipping/sliding on decorative floor coverings
- Player can now walk through carpets without collision

											
										
										
											2026-02-09 20:05:24 -08:00
+								        bool carpetOrRug =
 								            (lowerName.find("carpet") != std::string::npos) ||
 								            (lowerName.find("rug") != std::string::npos);
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        gpuModel.collisionSmallSolidProp =
 								            !gpuModel.collisionSteppedFountain &&
 								            !gpuModel.collisionSteppedLowPlatform &&
 								            !gpuModel.collisionNarrowVerticalProp &&
-												Add loading screen, fix tree/foliage collision, jump buffering, and fence rotation

- Loading screen stays visible until all terrain tiles finish streaming;
  character spawns only after terrain is loaded and Z-snapped to ground
- Reduce tree trunk collision bounds (5% of canopy, capped at 5.0) and
  make all small/medium trees, bushes, lily pads, and foliage walkthrough
- Add jump input buffering (150ms) and coyote time (100ms) for responsive jumps
- Fix fence orientation by adding +180° heading rotation
- Increase terrain load radius from 1 to 2 (5x5 tile grid)
- Add hearthstone callback for single-player camera reset

											
										
										
											2026-02-04 13:29:27 -08:00
+								            !gpuModel.collisionTreeTrunk &&
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								            !curbLikeName &&
 								            !lowPlatformLikeShape &&
 								            (smallSolidPropName || (genericSolidPropShape && !foliageName && !softTree));
-												Disable collision for carpet and rug M2 models

- Add carpet/rug name detection in model loading
- Set collisionNoBlock flag for carpet and rug models
- Prevents slipping/sliding on decorative floor coverings
- Player can now walk through carpets without collision

											
										
										
											2026-02-09 20:05:24 -08:00
+								        // Disable collision for foliage, soft trees, and decorative carpets/rugs
 								        gpuModel.collisionNoBlock = ((foliageName || softTree || carpetOrRug) &&
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								                                     !forceSolidCurb);
 								    }
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								    gpuModel.boundMin = tightMin;
 								    gpuModel.boundMax = tightMax;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    gpuModel.boundRadius = model.boundRadius;
 								    gpuModel.indexCount = static_cast<uint32_t>(model.indices.size());
 								    gpuModel.vertexCount = static_cast<uint32_t>(model.vertices.size());
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    // Store bone/sequence data for animation
 								    gpuModel.bones = model.bones;
 								    gpuModel.sequences = model.sequences;
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								    gpuModel.globalSequenceDurations = model.globalSequenceDurations;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    gpuModel.hasAnimation = false;
 								    for (const auto& bone : model.bones) {
 								        if (bone.translation.hasData() || bone.rotation.hasData() || bone.scale.hasData()) {
 								            gpuModel.hasAnimation = true;
 								            break;
 								        }
 								    }
-												Exempt ambient creatures from foliage animation freeze

Fireflies, dragonflies, butterflies, and moths no longer get
disableAnimation/foliage classification so their bone animation
and particles run normally.

											
										
										
											2026-02-23 07:21:45 -08:00
+								    bool ambientCreature =
 								        (lowerName.find("firefly") != std::string::npos) ||
 								        (lowerName.find("fireflies") != std::string::npos) ||
 								        (lowerName.find("fireflys") != std::string::npos) ||
 								        (lowerName.find("dragonfly") != std::string::npos) ||
 								        (lowerName.find("dragonflies") != std::string::npos) ||
 								        (lowerName.find("butterfly") != std::string::npos) ||
 								        (lowerName.find("moth") != std::string::npos);
 								    gpuModel.disableAnimation = (foliageOrTreeLike && !ambientCreature) || chestName;
 								    gpuModel.shadowWindFoliage = foliageOrTreeLike && !ambientCreature;
 								    gpuModel.isFoliageLike = foliageOrTreeLike && !ambientCreature;
-												Optimize M2/WMO render loop: cache UBO pointers, precompute model flags, reduce rebinds

- Cache material UBO mapped pointers at creation time, eliminating
  per-batch vmaGetAllocationInfo() calls in the hot render path
- Precompute foliage/elven/lantern/kobold model name classifications
  at load time instead of per-instance string operations every frame
- Remove redundant descriptor set and push constant rebinds on WMO
  pipeline switches (preserved across compatible layouts)
- Pre-allocate glow sprite descriptor set once at init instead of
  allocating from the pool every frame

											
										
										
											2026-02-23 06:06:24 -08:00
+								    gpuModel.isElvenLike =
 								        (lowerName.find("elf") != std::string::npos) ||
 								        (lowerName.find("elven") != std::string::npos) ||
 								        (lowerName.find("quel") != std::string::npos);
 								    gpuModel.isLanternLike =
 								        (lowerName.find("lantern") != std::string::npos) ||
 								        (lowerName.find("lamp") != std::string::npos) ||
 								        (lowerName.find("light") != std::string::npos);
 								    gpuModel.isKoboldFlame =
 								        (lowerName.find("kobold") != std::string::npos) &&
 								        ((lowerName.find("candle") != std::string::npos) ||
 								         (lowerName.find("torch") != std::string::npos) ||
 								         (lowerName.find("mine") != std::string::npos));
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								    gpuModel.isGroundDetail = groundDetailModel;
 								    if (groundDetailModel) {
 								        // Ground clutter (grass/pebbles/detail cards) should never block camera/movement.
 								        gpuModel.collisionNoBlock = true;
 								    }
-												Effect model additive blend, teleport facing, tighter area triggers

Classify light shafts, portals, spotlights, bubbles, and similar M2
doodads as spell effects so they render with additive blending instead
of as solid opaque objects.

Set camera yaw from server orientation on world load so teleports face
the correct direction.

Reduce area trigger minimum radius (3.0 sphere, 4.0 box) to prevent
premature portal firing near tram entrances.

											
										
										
											2026-03-06 17:03:29 -08:00
+								    // Spell effect / pure-visual models: particle-dominated with minimal geometry,
 								    // or named effect models (light shafts, portals, emitters, spotlights)
 								    bool effectByName =
 								        (lowerName.find("lightshaft") != std::string::npos) ||
 								        (lowerName.find("volumetriclight") != std::string::npos) ||
 								        (lowerName.find("instanceportal") != std::string::npos) ||
-												Instance portal glow, spin, and transparent additive rendering

											
										
										
											2026-03-06 18:02:56 -08:00
+								        (lowerName.find("instancenewportal") != std::string::npos) ||
-												Effect model additive blend, teleport facing, tighter area triggers

Classify light shafts, portals, spotlights, bubbles, and similar M2
doodads as spell effects so they render with additive blending instead
of as solid opaque objects.

Set camera yaw from server orientation on world load so teleports face
the correct direction.

Reduce area trigger minimum radius (3.0 sphere, 4.0 box) to prevent
premature portal firing near tram entrances.

											
										
										
											2026-03-06 17:03:29 -08:00
+								        (lowerName.find("mageportal") != std::string::npos) ||
 								        (lowerName.find("worldtreeportal") != std::string::npos) ||
 								        (lowerName.find("particleemitter") != std::string::npos) ||
 								        (lowerName.find("bubbles") != std::string::npos) ||
 								        (lowerName.find("spotlight") != std::string::npos) ||
 								        (lowerName.find("hazardlight") != std::string::npos) ||
 								        (lowerName.find("lavasplash") != std::string::npos) ||
 								        (lowerName.find("lavabubble") != std::string::npos) ||
-												Ironforge Great Forge lava, magma water rendering, LavaSteam particle effects

- Add magma/slime rendering path to water shader (fbm noise, crust/molten/core coloring)
- Fix WMO liquid height filter rejecting high-altitude zones like Ironforge (Z>300)
- Allow interior WMO magma/slime MLIQ groups to load (skip only water/ocean)
- Mark LAVASTEAM.m2 as spell effect for proper additive blend, hide emission mesh
- Add isLavaModel flag for M2 ForgeLava/LavaPots UV scroll fallback
- Add isLava material detection in WMO renderer for lava texture UV animation
- Fix WMO material UBO colors for magma (was blue, now orange-red)

											
										
										
											2026-03-07 00:48:04 -08:00
+								        (lowerName.find("lavasteam") != std::string::npos) ||
-												Effect model additive blend, teleport facing, tighter area triggers

Classify light shafts, portals, spotlights, bubbles, and similar M2
doodads as spell effects so they render with additive blending instead
of as solid opaque objects.

Set camera yaw from server orientation on world load so teleports face
the correct direction.

Reduce area trigger minimum radius (3.0 sphere, 4.0 box) to prevent
premature portal firing near tram entrances.

											
										
										
											2026-03-06 17:03:29 -08:00
+								        (lowerName.find("wisps") != std::string::npos);
 								    gpuModel.isSpellEffect = effectByName ||
 								                              (hasParticles && model.vertices.size() <= 200 &&
 								                               model.particleEmitters.size() >= 3);
-												Ironforge Great Forge lava, magma water rendering, LavaSteam particle effects

- Add magma/slime rendering path to water shader (fbm noise, crust/molten/core coloring)
- Fix WMO liquid height filter rejecting high-altitude zones like Ironforge (Z>300)
- Allow interior WMO magma/slime MLIQ groups to load (skip only water/ocean)
- Mark LAVASTEAM.m2 as spell effect for proper additive blend, hide emission mesh
- Add isLavaModel flag for M2 ForgeLava/LavaPots UV scroll fallback
- Add isLava material detection in WMO renderer for lava texture UV animation
- Fix WMO material UBO colors for magma (was blue, now orange-red)

											
										
										
											2026-03-07 00:48:04 -08:00
+								    gpuModel.isLavaModel =
 								        (lowerName.find("forgelava") != std::string::npos) ||
 								        (lowerName.find("lavapot") != std::string::npos) ||
 								        (lowerName.find("lavaflow") != std::string::npos);
-												Instance portal glow, spin, and transparent additive rendering

											
										
										
											2026-03-06 18:02:56 -08:00
+								    gpuModel.isInstancePortal =
 								        (lowerName.find("instanceportal") != std::string::npos) ||
 								        (lowerName.find("instancenewportal") != std::string::npos) ||
 								        (lowerName.find("portalfx") != std::string::npos) ||
 								        (lowerName.find("spellportal") != std::string::npos);
 								    // Instance portals are spell effects too (additive blend, no collision)
 								    if (gpuModel.isInstancePortal) {
 								        gpuModel.isSpellEffect = true;
 								    }
-												Add ambient insect particles near water vegetation, fix firefly particles, and improve water foam

- Spawn dark point-sprite insects buzzing around cattails/reeds/kelp/seaweed
- Fix firefly M2 particles: exempt from alpha dampening and forced gravity
- Make water shoreline/crest foam more irregular with UV warping and bluer tint

											
										
										
											2026-02-23 07:18:44 -08:00
+								    // Water vegetation: cattails, reeds, bulrushes, kelp, seaweed, lilypad near water
 								    gpuModel.isWaterVegetation =
 								        (lowerName.find("cattail") != std::string::npos) ||
 								        (lowerName.find("reed") != std::string::npos) ||
 								        (lowerName.find("bulrush") != std::string::npos) ||
 								        (lowerName.find("seaweed") != std::string::npos) ||
 								        (lowerName.find("kelp") != std::string::npos) ||
 								        (lowerName.find("lilypad") != std::string::npos);
-												Exempt ambient creatures from foliage animation freeze

Fireflies, dragonflies, butterflies, and moths no longer get
disableAnimation/foliage classification so their bone animation
and particles run normally.

											
										
										
											2026-02-23 07:21:45 -08:00
+								    // Ambient creature effects: particle-based glow (exempt from particle dampeners)
 								    gpuModel.isFireflyEffect = ambientCreature;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								    // Build collision mesh + spatial grid from M2 bounding geometry
 								    gpuModel.collision.vertices = model.collisionVertices;
 								    gpuModel.collision.indices = model.collisionIndices;
 								    gpuModel.collision.build();
 								    if (gpuModel.collision.valid()) {
 								        core::Logger::getInstance().debug("  M2 collision mesh: ", gpuModel.collision.triCount,
 								            " tris, grid ", gpuModel.collision.gridCellsX, "x", gpuModel.collision.gridCellsY);
 								    }
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								    // Flag smoke models for UV scroll animation (particle emitters not implemented)
 								    {
 								        std::string smokeName = model.name;
 								        std::transform(smokeName.begin(), smokeName.end(), smokeName.begin(),
 								                       [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
 								        gpuModel.isSmoke = (smokeName.find("smoke") != std::string::npos);
 								    }
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								    // Identify idle variation sequences (animation ID 0 = Stand)
 								    for (int i = 0; i < static_cast<int>(model.sequences.size()); i++) {
 								        if (model.sequences[i].id == 0 && model.sequences[i].duration > 0) {
 								            gpuModel.idleVariationIndices.push_back(i);
 								        }
 								    }
-												Batch GPU uploads to eliminate per-upload fence waits (stutter fix)

Every uploadBuffer/VkTexture::upload called immediateSubmit which did a
separate vkQueueSubmit + vkWaitForFences. Loading a single creature model
with textures caused 4-8+ fence waits; terrain chunks caused 80+ per batch.

Added beginUploadBatch/endUploadBatch to VkContext: records all upload
commands into a single command buffer, submits once with one fence wait.
Staging buffers are deferred for cleanup after the batch completes.

Wrapped in batch mode:
- CharacterRenderer::loadModel (creature VB/IB + textures)
- M2Renderer::loadModel (doodad VB/IB + textures)
- TerrainRenderer::loadTerrain/loadTerrainIncremental (chunk geometry + textures)
- TerrainRenderer::uploadPreloadedTextures
- WMORenderer::loadModel (group geometry + textures)

											
										
										
											2026-03-07 12:19:59 -08:00
+								    // Batch all GPU uploads (VB, IB, textures) into a single command buffer
 								    // submission with one fence wait, instead of one fence wait per upload.
 								    vkCtx_->beginUploadBatch();
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								    if (hasGeometry) {
 								        // Create VBO with interleaved vertex data
 								        // Format: position (3), normal (3), texcoord0 (2), texcoord1 (2), boneWeights (4), boneIndices (4 as float)
 								        const size_t floatsPerVertex = 18;
 								        std::vector<float> vertexData;
 								        vertexData.reserve(model.vertices.size() * floatsPerVertex);
 								        for (const auto& v : model.vertices) {
 								            vertexData.push_back(v.position.x);
 								            vertexData.push_back(v.position.y);
 								            vertexData.push_back(v.position.z);
 								            vertexData.push_back(v.normal.x);
 								            vertexData.push_back(v.normal.y);
 								            vertexData.push_back(v.normal.z);
 								            vertexData.push_back(v.texCoords[0].x);
 								            vertexData.push_back(v.texCoords[0].y);
 								            vertexData.push_back(v.texCoords[1].x);
 								            vertexData.push_back(v.texCoords[1].y);
 								            float w0 = v.boneWeights[0] / 255.0f;
 								            float w1 = v.boneWeights[1] / 255.0f;
 								            float w2 = v.boneWeights[2] / 255.0f;
 								            float w3 = v.boneWeights[3] / 255.0f;
 								            vertexData.push_back(w0);
 								            vertexData.push_back(w1);
 								            vertexData.push_back(w2);
 								            vertexData.push_back(w3);
 								            vertexData.push_back(static_cast<float>(std::min(v.boneIndices[0], uint8_t(127))));
 								            vertexData.push_back(static_cast<float>(std::min(v.boneIndices[1], uint8_t(127))));
 								            vertexData.push_back(static_cast<float>(std::min(v.boneIndices[2], uint8_t(127))));
 								            vertexData.push_back(static_cast<float>(std::min(v.boneIndices[3], uint8_t(127))));
 								        }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        // Upload vertex buffer to GPU
 								        {
 								            auto buf = uploadBuffer(*vkCtx_,
 								                vertexData.data(), vertexData.size() * sizeof(float),
 								                VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
 								            gpuModel.vertexBuffer = buf.buffer;
 								            gpuModel.vertexAlloc = buf.allocation;
 								        }
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        // Upload index buffer to GPU
 								        {
 								            auto buf = uploadBuffer(*vkCtx_,
 								                model.indices.data(), model.indices.size() * sizeof(uint16_t),
 								                VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
 								            gpuModel.indexBuffer = buf.buffer;
 								            gpuModel.indexAlloc = buf.allocation;
 								        }
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								    }
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								    // Load ALL textures from the model into a local vector.
 								    // textureLoadFailed[i] is true if texture[i] had a named path that failed to load.
 								    // Such batches are hidden (batchOpacity=0) rather than rendered white.
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    std::vector<VkTexture*> allTextures;
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								    std::vector<bool> textureLoadFailed;
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								    std::vector<std::string> textureKeysLower;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    if (assetManager) {
-												Make InvisibleTrap objects invisible and non-collidable

Event objects like Fire Festival Fury Trap and Mercutio Post use
SpellObject_InvisibleTrap.m2 models which were rendering as white
tiles using WHITE1.BLP texture. These are meant to be invisible
spell trigger objects that should not obstruct player movement.

Changes:
- Added isInvisibleTrap flag to M2ModelGPU struct
- Detect models with "invisibletrap" in name during loading
- Skip rendering invisible trap instances in render loop
- Disable all collision checks (floor/wall/occlusion) for invisible traps
- Objects remain functional for spell casting but are now invisible

											
										
										
											2026-02-09 22:31:36 -08:00
+								        for (size_t ti = 0; ti < model.textures.size(); ti++) {
 								            const auto& tex = model.textures[ti];
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								            std::string texPath = tex.filename;
 								            // Some extracted M2 texture strings contain embedded NUL + garbage suffix.
 								            // Truncate at first NUL so valid paths like "...foo.blp\0junk" still resolve.
 								            size_t nul = texPath.find('\0');
 								            if (nul != std::string::npos) {
 								                texPath.resize(nul);
 								            }
 								            if (!texPath.empty()) {
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								                std::string texKey = texPath;
 								                std::replace(texKey.begin(), texKey.end(), '/', '\\');
 								                std::transform(texKey.begin(), texKey.end(), texKey.begin(),
 								                               [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								                VkTexture* texPtr = loadTexture(texPath, tex.flags);
 								                bool failed = (texPtr == whiteTexture_.get());
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								                if (failed) {
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								                    static uint32_t loggedModelTextureFails = 0;
 								                    static bool loggedModelTextureFailSuppressed = false;
 								                    if (loggedModelTextureFails < 250) {
-												Harden runtime against stutter-inducing log floods and missing display IDs

- Re-gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and DEBUG

- Deduplicate missing/failed texture warnings in asset and M2 texture loaders

- Deduplicate unhandled opcode warnings by state/opcode key in non-IN_WORLD phases

- Throttle malformed spline point-count warnings across world/classic/tbc parsers

- Ignore suspiciously huge display IDs from malformed packets with throttled warning

- Add nearest-known displayId model fallback cache for missing creature display mappings

- Clear display fallback caches on expansion reload and logout

											
										
										
											2026-02-21 04:05:53 -08:00
+								                        LOG_WARNING("M2 model ", model.name, " texture[", ti, "] failed to load: ", texPath);
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								                        ++loggedModelTextureFails;
 								                    } else if (!loggedModelTextureFailSuppressed) {
 								                        LOG_WARNING("M2 model texture-failure warnings suppressed after ",
 								                                    loggedModelTextureFails, " entries");
 								                        loggedModelTextureFailSuppressed = true;
-												Harden runtime against stutter-inducing log floods and missing display IDs

- Re-gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and DEBUG

- Deduplicate missing/failed texture warnings in asset and M2 texture loaders

- Deduplicate unhandled opcode warnings by state/opcode key in non-IN_WORLD phases

- Throttle malformed spline point-count warnings across world/classic/tbc parsers

- Ignore suspiciously huge display IDs from malformed packets with throttled warning

- Add nearest-known displayId model fallback cache for missing creature display mappings

- Clear display fallback caches on expansion reload and logout

											
										
										
											2026-02-21 04:05:53 -08:00
+								                    }
-												Make InvisibleTrap objects invisible and non-collidable

Event objects like Fire Festival Fury Trap and Mercutio Post use
SpellObject_InvisibleTrap.m2 models which were rendering as white
tiles using WHITE1.BLP texture. These are meant to be invisible
spell trigger objects that should not obstruct player movement.

Changes:
- Added isInvisibleTrap flag to M2ModelGPU struct
- Detect models with "invisibletrap" in name during loading
- Skip rendering invisible trap instances in render loop
- Disable all collision checks (floor/wall/occlusion) for invisible traps
- Objects remain functional for spell casting but are now invisible

											
										
										
											2026-02-09 22:31:36 -08:00
+								                }
 								                if (isInvisibleTrap) {
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								                    LOG_INFO("  InvisibleTrap texture[", ti, "]: ", texPath, " -> ", (failed ? "WHITE" : "OK"));
-												Make InvisibleTrap objects invisible and non-collidable

Event objects like Fire Festival Fury Trap and Mercutio Post use
SpellObject_InvisibleTrap.m2 models which were rendering as white
tiles using WHITE1.BLP texture. These are meant to be invisible
spell trigger objects that should not obstruct player movement.

Changes:
- Added isInvisibleTrap flag to M2ModelGPU struct
- Detect models with "invisibletrap" in name during loading
- Skip rendering invisible trap instances in render loop
- Disable all collision checks (floor/wall/occlusion) for invisible traps
- Objects remain functional for spell casting but are now invisible

											
										
										
											2026-02-09 22:31:36 -08:00
+								                }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								                allTextures.push_back(texPtr);
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								                textureLoadFailed.push_back(failed);
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								                textureKeysLower.push_back(std::move(texKey));
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            } else {
-												Make InvisibleTrap objects invisible and non-collidable

Event objects like Fire Festival Fury Trap and Mercutio Post use
SpellObject_InvisibleTrap.m2 models which were rendering as white
tiles using WHITE1.BLP texture. These are meant to be invisible
spell trigger objects that should not obstruct player movement.

Changes:
- Added isInvisibleTrap flag to M2ModelGPU struct
- Detect models with "invisibletrap" in name during loading
- Skip rendering invisible trap instances in render loop
- Disable all collision checks (floor/wall/occlusion) for invisible traps
- Objects remain functional for spell casting but are now invisible

											
										
										
											2026-02-09 22:31:36 -08:00
+								                if (isInvisibleTrap) {
 								                    LOG_INFO("  InvisibleTrap texture[", ti, "]: EMPTY (using white fallback)");
 								                }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								                allTextures.push_back(whiteTexture_.get());
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								                textureLoadFailed.push_back(false);  // Empty filename = intentional white (type!=0)
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								                textureKeysLower.emplace_back();
 								            }
 								        }
 								    }
 								    static const bool kGlowDiag = envFlagEnabled("WOWEE_M2_GLOW_DIAG", false);
-												Harden runtime against stutter-inducing log floods and missing display IDs

- Re-gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and DEBUG

- Deduplicate missing/failed texture warnings in asset and M2 texture loaders

- Deduplicate unhandled opcode warnings by state/opcode key in non-IN_WORLD phases

- Throttle malformed spline point-count warnings across world/classic/tbc parsers

- Ignore suspiciously huge display IDs from malformed packets with throttled warning

- Add nearest-known displayId model fallback cache for missing creature display mappings

- Clear display fallback caches on expansion reload and logout

											
										
										
											2026-02-21 04:05:53 -08:00
+								    if (kGlowDiag) {
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								        std::string lowerName = model.name;
 								        std::transform(lowerName.begin(), lowerName.end(), lowerName.begin(),
 								                       [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
 								        const bool lanternLike =
 								            (lowerName.find("lantern") != std::string::npos) ||
 								            (lowerName.find("lamp") != std::string::npos) ||
 								            (lowerName.find("light") != std::string::npos);
-												Harden runtime against stutter-inducing log floods and missing display IDs

- Re-gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and DEBUG

- Deduplicate missing/failed texture warnings in asset and M2 texture loaders

- Deduplicate unhandled opcode warnings by state/opcode key in non-IN_WORLD phases

- Throttle malformed spline point-count warnings across world/classic/tbc parsers

- Ignore suspiciously huge display IDs from malformed packets with throttled warning

- Add nearest-known displayId model fallback cache for missing creature display mappings

- Clear display fallback caches on expansion reload and logout

											
										
										
											2026-02-21 04:05:53 -08:00
+								        if (lanternLike) {
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								            for (size_t ti = 0; ti < model.textures.size(); ++ti) {
 								                const std::string key = (ti < textureKeysLower.size()) ? textureKeysLower[ti] : std::string();
-												Harden runtime against stutter-inducing log floods and missing display IDs

- Re-gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and DEBUG

- Deduplicate missing/failed texture warnings in asset and M2 texture loaders

- Deduplicate unhandled opcode warnings by state/opcode key in non-IN_WORLD phases

- Throttle malformed spline point-count warnings across world/classic/tbc parsers

- Ignore suspiciously huge display IDs from malformed packets with throttled warning

- Add nearest-known displayId model fallback cache for missing creature display mappings

- Clear display fallback caches on expansion reload and logout

											
										
										
											2026-02-21 04:05:53 -08:00
+								                LOG_DEBUG("M2 GLOW TEX '", model.name, "' tex[", ti, "]='", key, "' flags=0x",
 								                          std::hex, model.textures[ti].flags, std::dec);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            }
 								        }
 								    }
-												Enable M2 particle emitters with correct WotLK struct parsing and overflow guards

											
										
										
											2026-02-06 20:57:02 -08:00
+								    // Copy particle emitter data and resolve textures
 								    gpuModel.particleEmitters = model.particleEmitters;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    gpuModel.particleTextures.resize(model.particleEmitters.size(), whiteTexture_.get());
-												Enable M2 particle emitters with correct WotLK struct parsing and overflow guards

											
										
										
											2026-02-06 20:57:02 -08:00
+								    for (size_t ei = 0; ei < model.particleEmitters.size(); ei++) {
 								        uint16_t texIdx = model.particleEmitters[ei].texture;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        if (texIdx < allTextures.size() && allTextures[texIdx] != nullptr) {
-												Enable M2 particle emitters with correct WotLK struct parsing and overflow guards

											
										
										
											2026-02-06 20:57:02 -08:00
+								            gpuModel.particleTextures[ei] = allTextures[texIdx];
 								        }
 								    }
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
-												Implement M2 texture animation (UV scrolling) for fountain water

Parse M2TextureTransform entries and texture transform lookups from the
M2 binary, then apply per-batch UV offsets in the vertex shader using
the existing animation time base and global sequence durations.

											
										
										
											2026-02-06 01:49:27 -08:00
+								    // Copy texture transform data for UV animation
 								    gpuModel.textureTransforms = model.textureTransforms;
 								    gpuModel.textureTransformLookup = model.textureTransformLookup;
 								    gpuModel.hasTextureAnimation = false;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    // Build per-batch GPU entries
 								    if (!model.batches.empty()) {
 								        for (const auto& batch : model.batches) {
 								            M2ModelGPU::BatchGPU bgpu;
 								            bgpu.indexStart = batch.indexStart;
 								            bgpu.indexCount = batch.indexCount;
-												Implement M2 texture animation (UV scrolling) for fountain water

Parse M2TextureTransform entries and texture transform lookups from the
M2 binary, then apply per-batch UV offsets in the vertex shader using
the existing animation time base and global sequence durations.

											
										
										
											2026-02-06 01:49:27 -08:00
+								            // Store texture animation index from batch
 								            bgpu.textureAnimIndex = batch.textureAnimIndex;
 								            if (bgpu.textureAnimIndex != 0xFFFF) {
 								                gpuModel.hasTextureAnimation = true;
 								            }
-												Add unlit rendering for M2 glow/additive batches

Batches with the M2 unlit material flag (0x01) or additive blend modes
(3+) now skip lighting, shadows, and fog, emitting texture color directly.
Fixes lantern glow quads appearing as dull transparent circles.

											
										
										
											2026-02-06 03:28:21 -08:00
+								            // Store blend mode and flags from material
-												Parse M2 render flags and apply per-batch blend modes

Water/lava batches in fountain and Ironforge M2 models use non-opaque
blend modes (alpha, additive) defined in the M2 material table. Without
parsing these, they rendered as solid surfaces extending visibly beyond
their containers. Now each batch looks up its blend mode from the
material array and sets the appropriate GL blend function.

											
										
										
											2026-02-06 01:54:25 -08:00
+								            if (batch.materialIndex < model.materials.size()) {
 								                bgpu.blendMode = model.materials[batch.materialIndex].blendMode;
-												Add unlit rendering for M2 glow/additive batches

Batches with the M2 unlit material flag (0x01) or additive blend modes
(3+) now skip lighting, shadows, and fog, emitting texture color directly.
Fixes lantern glow quads appearing as dull transparent circles.

											
										
										
											2026-02-06 03:28:21 -08:00
+								                bgpu.materialFlags = model.materials[batch.materialIndex].flags;
-												Parse M2 render flags and apply per-batch blend modes

Water/lava batches in fountain and Ironforge M2 models use non-opaque
blend modes (alpha, additive) defined in the M2 material table. Without
parsing these, they rendered as solid surfaces extending visibly beyond
their containers. Now each batch looks up its blend mode from the
material array and sets the appropriate GL blend function.

											
										
										
											2026-02-06 01:54:25 -08:00
+								            }
-												Implement comprehensive taxi flight optimizations and proper spline paths

Major improvements:
- Load TaxiPathNode.dbc for actual curved flight paths (no more flying through terrain)
- Add 3-second mounting delay with terrain precaching for entire route
- Implement LOD system for M2 models with distance-based quality reduction
- Add circular terrain loading pattern (13 tiles vs 25, 48% reduction)
- Increase terrain cache from 2GB to 8GB for modern systems

Performance optimizations during taxi:
- Cull small M2 models (boundRadius < 3.0) - not visible from altitude
- Disable particle systems (weather, smoke, M2 emitters) - saves ~7000 particles
- Disable specular lighting on M2 models - saves Blinn-Phong calculations
- Disable shadow mapping on M2 models - saves shadow map sampling and PCF

Technical details:
- Parse TaxiPathNode.dbc spline waypoints for curved paths around terrain
- Build full path from node pairs using TaxiPathEdge lookup
- Precache callback triggers during mounting delay for smooth takeoff
- Circular tile loading uses Euclidean distance check (dx²+dy² <= r²)
- LOD fallback to base mesh when higher LODs unavailable

Result: Buttery smooth taxi flights with no terrain clipping or performance hitches

											
										
										
											2026-02-08 21:32:38 -08:00
+								            // Copy LOD level from batch
 								            bgpu.submeshLevel = batch.submeshLevel;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            // Resolve texture: batch.textureIndex → textureLookup → allTextures
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            VkTexture* tex = whiteTexture_.get();
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								            bool texFailed = false;
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								            std::string batchTexKeyLower;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            if (batch.textureIndex < model.textureLookup.size()) {
 								                uint16_t texIdx = model.textureLookup[batch.textureIndex];
 								                if (texIdx < allTextures.size()) {
 								                    tex = allTextures[texIdx];
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								                    texFailed = (texIdx < textureLoadFailed.size()) && textureLoadFailed[texIdx];
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								                    if (texIdx < textureKeysLower.size()) {
 								                        batchTexKeyLower = textureKeysLower[texIdx];
 								                    }
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								                }
 								                if (texIdx < model.textures.size()) {
 								                    bgpu.texFlags = static_cast<uint8_t>(model.textures[texIdx].flags & 0x3);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								                }
 								            } else if (!allTextures.empty()) {
 								                tex = allTextures[0];
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								                texFailed = !textureLoadFailed.empty() && textureLoadFailed[0];
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								                if (!textureKeysLower.empty()) {
 								                    batchTexKeyLower = textureKeysLower[0];
 								                }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            }
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
 								            if (texFailed && groundDetailModel) {
 								                static const std::string kDetailFallbackTexture = "World\\NoDXT\\Detail\\8des_detaildoodads01.blp";
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								                VkTexture* fallbackTex = loadTexture(kDetailFallbackTexture, 0);
 								                if (fallbackTex != nullptr && fallbackTex != whiteTexture_.get()) {
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								                    tex = fallbackTex;
 								                    texFailed = false;
 								                }
 								            }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            bgpu.texture = tex;
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								            const bool exactLanternGlowTexture =
 								                (batchTexKeyLower == "world\\expansion06\\doodads\\nightelf\\7ne_druid_streetlamp01_light.blp") ||
 								                (batchTexKeyLower == "world\\generic\\nightelf\\passive doodads\\lamps\\glowblue32.blp") ||
 								                (batchTexKeyLower == "world\\generic\\human\\passive doodads\\stormwind\\t_vfx_glow01_64.blp") ||
 								                (batchTexKeyLower == "world\\azeroth\\karazahn\\passivedoodads\\bonfire\\flamelicksmallblue.blp") ||
 								                (batchTexKeyLower == "world\\generic\\nightelf\\passive doodads\\magicalimplements\\glow.blp");
 								            const bool texHasGlowToken =
 								                (batchTexKeyLower.find("glow") != std::string::npos) ||
 								                (batchTexKeyLower.find("flare") != std::string::npos) ||
 								                (batchTexKeyLower.find("halo") != std::string::npos) ||
 								                (batchTexKeyLower.find("light") != std::string::npos);
 								            const bool texHasFlameToken =
 								                (batchTexKeyLower.find("flame") != std::string::npos) ||
 								                (batchTexKeyLower.find("fire") != std::string::npos) ||
 								                (batchTexKeyLower.find("flamelick") != std::string::npos) ||
 								                (batchTexKeyLower.find("ember") != std::string::npos);
 								            const bool texGlowCardToken =
 								                (batchTexKeyLower.find("glow") != std::string::npos) ||
 								                (batchTexKeyLower.find("flamelick") != std::string::npos) ||
 								                (batchTexKeyLower.find("lensflare") != std::string::npos) ||
 								                (batchTexKeyLower.find("t_vfx") != std::string::npos) ||
 								                (batchTexKeyLower.find("lightbeam") != std::string::npos) ||
 								                (batchTexKeyLower.find("glowball") != std::string::npos) ||
 								                (batchTexKeyLower.find("genericglow") != std::string::npos);
 								            const bool texLikelyFlame =
 								                (batchTexKeyLower.find("fire") != std::string::npos) ||
 								                (batchTexKeyLower.find("flame") != std::string::npos) ||
 								                (batchTexKeyLower.find("torch") != std::string::npos);
 								            const bool texLanternFamily =
 								                (batchTexKeyLower.find("lantern") != std::string::npos) ||
 								                (batchTexKeyLower.find("lamp") != std::string::npos) ||
 								                (batchTexKeyLower.find("elf") != std::string::npos) ||
 								                (batchTexKeyLower.find("silvermoon") != std::string::npos) ||
 								                (batchTexKeyLower.find("quel") != std::string::npos) ||
 								                (batchTexKeyLower.find("thalas") != std::string::npos);
 								            const bool modelLanternFamily =
 								                (lowerName.find("lantern") != std::string::npos) ||
 								                (lowerName.find("lamp") != std::string::npos) ||
 								                (lowerName.find("light") != std::string::npos);
 								            bgpu.lanternGlowHint =
 								                exactLanternGlowTexture ||
 								                ((texHasGlowToken || (modelLanternFamily && texHasFlameToken)) &&
 								                 (texLanternFamily || modelLanternFamily) &&
 								                 (!texLikelyFlame || modelLanternFamily));
 								            bgpu.glowCardLike = bgpu.lanternGlowHint && texGlowCardToken;
 								            const bool texCoolTint =
 								                (batchTexKeyLower.find("blue") != std::string::npos) ||
 								                (batchTexKeyLower.find("nightelf") != std::string::npos) ||
 								                (batchTexKeyLower.find("arcane") != std::string::npos);
 								            const bool texRedTint =
 								                (batchTexKeyLower.find("red") != std::string::npos) ||
 								                (batchTexKeyLower.find("scarlet") != std::string::npos) ||
 								                (batchTexKeyLower.find("ruby") != std::string::npos);
 								            bgpu.glowTint = texCoolTint ? 1 : (texRedTint ? 2 : 0);
-												Fix quest flow regressions, tooltip compare stats, and M2 alpha-key handling

											
										
										
											2026-02-19 02:27:01 -08:00
+								            bool texHasAlpha = false;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            if (tex != nullptr && tex != whiteTexture_.get()) {
 								                auto ait = textureHasAlphaByPtr_.find(tex);
 								                texHasAlpha = (ait != textureHasAlphaByPtr_.end()) ? ait->second : false;
-												Fix quest flow regressions, tooltip compare stats, and M2 alpha-key handling

											
										
										
											2026-02-19 02:27:01 -08:00
+								            }
 								            bgpu.hasAlpha = texHasAlpha;
 								            bool colorKeyBlack = false;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            if (tex != nullptr && tex != whiteTexture_.get()) {
 								                auto cit = textureColorKeyBlackByPtr_.find(tex);
 								                colorKeyBlack = (cit != textureColorKeyBlackByPtr_.end()) ? cit->second : false;
-												Fix quest flow regressions, tooltip compare stats, and M2 alpha-key handling

											
										
										
											2026-02-19 02:27:01 -08:00
+								            }
 								            bgpu.colorKeyBlack = colorKeyBlack;
-												Update opcode data and movement integration docs/code

											
										
										
											2026-02-18 03:15:25 -08:00
+								            // textureCoordIndex is an index into a texture coord combo table, not directly
 								            // a UV set selector. Most batches have index=0 (UV set 0). We always use UV set 0
 								            // since we don't have the full combo table — dual-UV effects are rare edge cases.
 								            bgpu.textureUnit = 0;
 								            // Batch is hidden only when its named texture failed to load (avoids white shell artifacts).
 								            // Do NOT bake transparency/color animation tracks here — they animate over time and
 								            // baking the first keyframe value causes legitimate meshes to become invisible.
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								            // Keep terrain clutter visible even when source texture paths are malformed.
 								            bgpu.batchOpacity = (texFailed && !groundDetailModel) ? 0.0f : 1.0f;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
 								            // Compute batch center and radius for glow sprite positioning
-												Fix black background on lamp/lantern/torch glow effects

Three-part fix for glow textures showing opaque black rectangles instead
of being transparent:

1. Pass blend mode to fragment shader via uBlendMode uniform. For additive
   blend modes (3=Add, 6=BlendAdd), discard near-black fragments (maxRGB
   < 0.1) since they contribute nothing visually but render as dark
   rectangles against sky/terrain.

2. Expand colorKeyBlack texture keyword detection to include "lamp",
   "lantern", "glow", "flare", "brazier", "campfire", "bonfire" in
   addition to the existing "candle", "flame", "fire", "torch".

3. Expand flameLikeModel detection for glow sprite conversion to include
   "brazier", "campfire", "bonfire". Also compute glow centers for
   colorKeyBlack batches (not just blendMode >= 3) so glow sprites
   position correctly for all flame-like objects.

											
										
										
											2026-02-19 18:19:52 -08:00
+								            if ((bgpu.blendMode >= 3 || bgpu.colorKeyBlack) && batch.indexCount > 0) {
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								                glm::vec3 sum(0.0f);
 								                uint32_t counted = 0;
 								                for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
 								                    if (j < model.indices.size()) {
 								                        uint16_t vi = model.indices[j];
 								                        if (vi < model.vertices.size()) {
 								                            sum += model.vertices[vi].position;
 								                            counted++;
 								                        }
 								                    }
 								                }
 								                if (counted > 0) {
 								                    bgpu.center = sum / static_cast<float>(counted);
 								                    float maxDist = 0.0f;
 								                    for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
 								                        if (j < model.indices.size()) {
 								                            uint16_t vi = model.indices[j];
 								                            if (vi < model.vertices.size()) {
 								                                float d = glm::length(model.vertices[vi].position - bgpu.center);
 								                                maxDist = std::max(maxDist, d);
 								                            }
 								                        }
 								                    }
 								                    bgpu.glowSize = std::max(maxDist, 0.5f);
 								                }
 								            }
-												Fix NPC apparel fallback and reduce world-entry stutter

Hide NPC cloak/object-skin mesh when no cape texture resolves by using a transparent texture fallback, preventing skin-texture bleed on cloaks. Tighten NPC equipment region compositing by slot and add safe humanoid geoset selection to avoid robe-over-pants conflicts and odd pants texturing.

Reduce login/runtime hitching by deferring non-critical world-system initialization across frames, lowering per-frame transport doodad spawn budget, and demoting high-volume transport/MO_TRANSPORT diagnostics to debug. Gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and make zone music prewarm opt-in via WOWEE_PREWARM_ZONE_MUSIC.

											
										
										
											2026-02-20 20:31:04 -08:00
+								            // Optional diagnostics for glow/light batches (disabled by default).
 								            if (kGlowDiag &&
 								                (lowerName.find("light") != std::string::npos ||
 								                 lowerName.find("lamp") != std::string::npos ||
 								                 lowerName.find("lantern") != std::string::npos)) {
-												Harden runtime against stutter-inducing log floods and missing display IDs

- Re-gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and DEBUG

- Deduplicate missing/failed texture warnings in asset and M2 texture loaders

- Deduplicate unhandled opcode warnings by state/opcode key in non-IN_WORLD phases

- Throttle malformed spline point-count warnings across world/classic/tbc parsers

- Ignore suspiciously huge display IDs from malformed packets with throttled warning

- Add nearest-known displayId model fallback cache for missing creature display mappings

- Clear display fallback caches on expansion reload and logout

											
										
										
											2026-02-21 04:05:53 -08:00
+								                LOG_DEBUG("M2 GLOW DIAG '", model.name, "' batch ", gpuModel.batches.size(),
 								                          ": blend=", bgpu.blendMode, " matFlags=0x",
 								                          std::hex, bgpu.materialFlags, std::dec,
 								                          " colorKey=", bgpu.colorKeyBlack ? "Y" : "N",
 								                          " hasAlpha=", bgpu.hasAlpha ? "Y" : "N",
 								                          " unlit=", (bgpu.materialFlags & 0x01) ? "Y" : "N",
 								                          " lanternHint=", bgpu.lanternGlowHint ? "Y" : "N",
 								                          " glowSize=", bgpu.glowSize,
 								                          " tex=", bgpu.texture,
 								                          " idxCount=", bgpu.indexCount);
-												Revert glow pixel detection and forced additive override, add diagnostics

The pixel content glow detection (>60% dark = glow) was too aggressive,
flagging dark metal textures on sconces as glow textures and making
structural geometry transparent. The forced additive blending for
colorKeyBlack batches compounded the issue.

Reverted both. Added per-batch diagnostic logging for models containing
"light", "lamp", or "lantern" to identify the actual blend modes and
material flags on Stormwind bridge lamps.

											
										
										
											2026-02-19 18:30:34 -08:00
+								            }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            gpuModel.batches.push_back(bgpu);
 								        }
 								    } else {
 								        // Fallback: single batch covering all indices with first texture
 								        M2ModelGPU::BatchGPU bgpu;
 								        bgpu.indexStart = 0;
 								        bgpu.indexCount = gpuModel.indexCount;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        bgpu.texture = allTextures.empty() ? whiteTexture_.get() : allTextures[0];
-												Fix quest flow regressions, tooltip compare stats, and M2 alpha-key handling

											
										
										
											2026-02-19 02:27:01 -08:00
+								        bool texHasAlpha = false;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        if (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) {
 								            auto ait = textureHasAlphaByPtr_.find(bgpu.texture);
 								            texHasAlpha = (ait != textureHasAlphaByPtr_.end()) ? ait->second : false;
-												Fix quest flow regressions, tooltip compare stats, and M2 alpha-key handling

											
										
										
											2026-02-19 02:27:01 -08:00
+								        }
 								        bgpu.hasAlpha = texHasAlpha;
 								        bool colorKeyBlack = false;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        if (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) {
 								            auto cit = textureColorKeyBlackByPtr_.find(bgpu.texture);
 								            colorKeyBlack = (cit != textureColorKeyBlackByPtr_.end()) ? cit->second : false;
-												Fix quest flow regressions, tooltip compare stats, and M2 alpha-key handling

											
										
										
											2026-02-19 02:27:01 -08:00
+								        }
 								        bgpu.colorKeyBlack = colorKeyBlack;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								        gpuModel.batches.push_back(bgpu);
 								    }
-												Hide M2 particle emitter volumes rendering as grey boxes

M2 models like OrgrimmarFloatingEmbers and OrgrimmarSmokeEmitter have a
simple box mesh (24 verts, 36 indices) meant only to define particle
emitter bounds. Their blendMode was 0 (opaque), causing them to render
as large grey boxes. Detect these by checking for box geometry with
particle emitters and large bounds (>5 units), then mark as invisible.
Also add ANTIPORTAL and batch-disable flag checks to WMO group filtering.

											
										
										
											2026-02-16 19:50:35 -08:00
+								    // Detect particle emitter volume models: box mesh (24 verts, 36 indices)
 								    // with disproportionately large bounds. These are invisible bounding volumes
 								    // that only exist to spawn particles — their mesh should never be rendered.
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								    if (!isInvisibleTrap && !groundDetailModel &&
 								        gpuModel.vertexCount <= 24 && gpuModel.indexCount <= 36
-												Hide M2 particle emitter volumes rendering as grey boxes

M2 models like OrgrimmarFloatingEmbers and OrgrimmarSmokeEmitter have a
simple box mesh (24 verts, 36 indices) meant only to define particle
emitter bounds. Their blendMode was 0 (opaque), causing them to render
as large grey boxes. Detect these by checking for box geometry with
particle emitters and large bounds (>5 units), then mark as invisible.
Also add ANTIPORTAL and batch-disable flag checks to WMO group filtering.

											
										
										
											2026-02-16 19:50:35 -08:00
+								        && !model.particleEmitters.empty()) {
 								        glm::vec3 size = gpuModel.boundMax - gpuModel.boundMin;
 								        float maxDim = std::max({size.x, size.y, size.z});
 								        if (maxDim > 5.0f) {
 								            gpuModel.isInvisibleTrap = true;
 								            LOG_DEBUG("M2 emitter volume hidden: '", model.name, "' size=(",
 								                      size.x, " x ", size.y, " x ", size.z, ")");
 								        }
 								    }
-												Batch GPU uploads to eliminate per-upload fence waits (stutter fix)

Every uploadBuffer/VkTexture::upload called immediateSubmit which did a
separate vkQueueSubmit + vkWaitForFences. Loading a single creature model
with textures caused 4-8+ fence waits; terrain chunks caused 80+ per batch.

Added beginUploadBatch/endUploadBatch to VkContext: records all upload
commands into a single command buffer, submits once with one fence wait.
Staging buffers are deferred for cleanup after the batch completes.

Wrapped in batch mode:
- CharacterRenderer::loadModel (creature VB/IB + textures)
- M2Renderer::loadModel (doodad VB/IB + textures)
- TerrainRenderer::loadTerrain/loadTerrainIncremental (chunk geometry + textures)
- TerrainRenderer::uploadPreloadedTextures
- WMORenderer::loadModel (group geometry + textures)

											
										
										
											2026-03-07 12:19:59 -08:00
+								    vkCtx_->endUploadBatch();
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Allocate Vulkan descriptor sets and UBOs for each batch
 								    for (auto& bgpu : gpuModel.batches) {
 								        // Create combined UBO for M2Params (binding 1) + M2Material (binding 2)
 								        // We allocate them as separate buffers for clarity
 								        VmaAllocationInfo matAllocInfo{};
 								        {
 								            VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
 								            bci.size = sizeof(M2MaterialUBO);
 								            bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
 								            VmaAllocationCreateInfo aci{};
 								            aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
 								            aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
 								            vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &bgpu.materialUBO, &bgpu.materialUBOAlloc, &matAllocInfo);
 								            // Write initial material data (static per-batch — fadeAlpha/interiorDarken updated at draw time)
 								            M2MaterialUBO mat{};
 								            mat.hasTexture = (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) ? 1 : 0;
 								            mat.alphaTest = (bgpu.blendMode == 1 || (bgpu.blendMode >= 2 && !bgpu.hasAlpha)) ? 1 : 0;
 								            mat.colorKeyBlack = bgpu.colorKeyBlack ? 1 : 0;
 								            mat.colorKeyThreshold = 0.08f;
 								            mat.unlit = (bgpu.materialFlags & 0x01) ? 1 : 0;
 								            mat.blendMode = bgpu.blendMode;
 								            mat.fadeAlpha = 1.0f;
 								            mat.interiorDarken = 0.0f;
 								            mat.specularIntensity = 0.5f;
 								            memcpy(matAllocInfo.pMappedData, &mat, sizeof(mat));
-												Optimize M2/WMO render loop: cache UBO pointers, precompute model flags, reduce rebinds

- Cache material UBO mapped pointers at creation time, eliminating
  per-batch vmaGetAllocationInfo() calls in the hot render path
- Precompute foliage/elven/lantern/kobold model name classifications
  at load time instead of per-instance string operations every frame
- Remove redundant descriptor set and push constant rebinds on WMO
  pipeline switches (preserved across compatible layouts)
- Pre-allocate glow sprite descriptor set once at init instead of
  allocating from the pool every frame

											
										
										
											2026-02-23 06:06:24 -08:00
+								            bgpu.materialUBOMapped = matAllocInfo.pMappedData;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        }
 								        // Allocate descriptor set and write all bindings
 								        bgpu.materialSet = allocateMaterialSet();
 								        if (bgpu.materialSet) {
 								            VkTexture* batchTex = bgpu.texture ? bgpu.texture : whiteTexture_.get();
 								            VkDescriptorImageInfo imgInfo = batchTex->descriptorInfo();
 								            VkDescriptorBufferInfo matBufInfo{};
 								            matBufInfo.buffer = bgpu.materialUBO;
 								            matBufInfo.offset = 0;
 								            matBufInfo.range = sizeof(M2MaterialUBO);
 								            VkWriteDescriptorSet writes[2] = {};
 								            // binding 0: texture
 								            writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
 								            writes[0].dstSet = bgpu.materialSet;
 								            writes[0].dstBinding = 0;
 								            writes[0].descriptorCount = 1;
 								            writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								            writes[0].pImageInfo = &imgInfo;
 								            // binding 2: M2Material UBO
 								            writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
 								            writes[1].dstSet = bgpu.materialSet;
 								            writes[1].dstBinding = 2;
 								            writes[1].descriptorCount = 1;
 								            writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
 								            writes[1].pBufferInfo = &matBufInfo;
 								            vkUpdateDescriptorSets(vkCtx_->getDevice(), 2, writes, 0, nullptr);
 								        }
 								    }
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								    // Pre-compute available LOD levels to avoid per-instance batch iteration
 								    gpuModel.availableLODs = 0;
 								    for (const auto& b : gpuModel.batches) {
 								        if (b.submeshLevel < 8) gpuModel.availableLODs |= (1u << b.submeshLevel);
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    models[modelId] = std::move(gpuModel);
 								    LOG_DEBUG("Loaded M2 model: ", model.name, " (", models[modelId].vertexCount, " vertices, ",
 								              models[modelId].indexCount / 3, " triangles, ", models[modelId].batches.size(), " batches)");
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    return true;
 								}
 								uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
 								                                     const glm::vec3& rotation, float scale) {
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								    auto modelIt = models.find(modelId);
 								    if (modelIt == models.end()) {
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								        LOG_WARNING("Cannot create instance: model ", modelId, " not loaded");
 								        return 0;
 								    }
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								    const auto& mdlRef = modelIt->second;
-												Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths

											
										
										
											2026-03-07 11:59:19 -08:00
+								    // Deduplicate: skip if same model already at nearly the same position.
 								    // Uses hash map for O(1) lookup instead of O(N) scan.
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								    if (!mdlRef.isGroundDetail) {
-												Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths

											
										
										
											2026-03-07 11:59:19 -08:00
+								        DedupKey dk{modelId,
 								                    static_cast<int32_t>(std::round(position.x * 10.0f)),
 								                    static_cast<int32_t>(std::round(position.y * 10.0f)),
 								                    static_cast<int32_t>(std::round(position.z * 10.0f))};
 								        auto dit = instanceDedupMap_.find(dk);
 								        if (dit != instanceDedupMap_.end()) {
 								            return dit->second;
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        }
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    M2Instance instance;
 								    instance.id = nextInstanceId++;
 								    instance.modelId = modelId;
 								    instance.position = position;
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								    if (mdlRef.isGroundDetail) {
 								        instance.position.z -= computeGroundDetailDownOffset(mdlRef, scale);
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    instance.rotation = rotation;
 								    instance.scale = scale;
 								    instance.updateModelMatrix();
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    glm::vec3 localMin, localMax;
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								    getTightCollisionBounds(mdlRef, localMin, localMax);
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    transformAABB(instance.modelMatrix, localMin, localMax, instance.worldBoundsMin, instance.worldBoundsMax);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    // Cache model flags on instance to avoid per-frame hash lookups
 								    instance.cachedHasAnimation = mdlRef.hasAnimation;
 								    instance.cachedDisableAnimation = mdlRef.disableAnimation;
 								    instance.cachedIsSmoke = mdlRef.isSmoke;
 								    instance.cachedHasParticleEmitters = !mdlRef.particleEmitters.empty();
 								    instance.cachedBoundRadius = mdlRef.boundRadius;
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								    instance.cachedIsGroundDetail = mdlRef.isGroundDetail;
 								    instance.cachedIsInvisibleTrap = mdlRef.isInvisibleTrap;
-												Instance portal glow, spin, and transparent additive rendering

											
										
										
											2026-03-06 18:02:56 -08:00
+								    instance.cachedIsInstancePortal = mdlRef.isInstancePortal;
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								    instance.cachedIsValid = mdlRef.isValid();
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								    instance.cachedModel = &mdlRef;
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    // Initialize animation: play first sequence (usually Stand/Idle)
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								    const auto& mdl = mdlRef;
-												Improve shadow stability and reduce foliage pop-in

											
										
										
											2026-02-04 16:30:24 -08:00
+								    if (mdl.hasAnimation && !mdl.disableAnimation && !mdl.sequences.empty()) {
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								        instance.currentSequenceIndex = 0;
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        instance.idleSequenceIndex = 0;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								        instance.animDuration = static_cast<float>(mdl.sequences[0].duration);
 								        instance.animTime = static_cast<float>(rand() % std::max(1u, mdl.sequences[0].duration));
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        instance.variationTimer = 3000.0f + static_cast<float>(rand() % 8000);
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
 								        // Seed bone matrices from an existing instance of the same model so the
 								        // new instance renders immediately instead of being invisible until the
 								        // next update() computes bones (prevents pop-in flash).
 								        for (const auto& existing : instances) {
 								            if (existing.modelId == modelId && !existing.boneMatrices.empty()) {
 								                instance.boneMatrices = existing.boneMatrices;
 								                instance.bonesDirty = true;
 								                break;
 								            }
 								        }
 								        // If no sibling exists yet, compute bones immediately
 								        if (instance.boneMatrices.empty()) {
 								            computeBoneMatrices(mdlRef, instance);
 								        }
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    }
-												Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths

											
										
										
											2026-03-07 11:59:19 -08:00
+								    // Register in dedup map before pushing (uses original position, not ground-adjusted)
 								    if (!mdlRef.isGroundDetail) {
 								        DedupKey dk{modelId,
 								                    static_cast<int32_t>(std::round(position.x * 10.0f)),
 								                    static_cast<int32_t>(std::round(position.y * 10.0f)),
 								                    static_cast<int32_t>(std::round(position.z * 10.0f))};
 								        instanceDedupMap_[dk] = instance.id;
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    instances.push_back(instance);
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    size_t idx = instances.size() - 1;
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    // Track special instances for fast-path iteration
 								    if (mdlRef.isSmoke) {
 								        smokeInstanceIndices_.push_back(idx);
 								    }
-												Instance portal glow, spin, and transparent additive rendering

											
										
										
											2026-03-06 18:02:56 -08:00
+								    if (mdlRef.isInstancePortal) {
 								        portalInstanceIndices_.push_back(idx);
 								    }
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    if (!mdlRef.particleEmitters.empty()) {
 								        particleInstanceIndices_.push_back(idx);
 								    }
 								    if (mdlRef.hasAnimation && !mdlRef.disableAnimation) {
 								        animatedInstanceIndices_.push_back(idx);
 								    } else if (!mdlRef.particleEmitters.empty()) {
 								        particleOnlyInstanceIndices_.push_back(idx);
 								    }
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    instanceIndexById[instance.id] = idx;
 								    GridCell minCell = toCell(instance.worldBoundsMin);
 								    GridCell maxCell = toCell(instance.worldBoundsMax);
 								    for (int z = minCell.z; z <= maxCell.z; z++) {
 								        for (int y = minCell.y; y <= maxCell.y; y++) {
 								            for (int x = minCell.x; x <= maxCell.x; x++) {
 								                spatialGrid[GridCell{x, y, z}].push_back(instance.id);
 								            }
 								        }
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
 								    return instance.id;
 								}
 								uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4& modelMatrix,
 								                                                const glm::vec3& position) {
 								    if (models.find(modelId) == models.end()) {
 								        LOG_WARNING("Cannot create instance: model ", modelId, " not loaded");
 								        return 0;
 								    }
-												Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths

											
										
										
											2026-03-07 11:59:19 -08:00
+								    // Deduplicate: O(1) hash lookup
 								    {
 								        DedupKey dk{modelId,
 								                    static_cast<int32_t>(std::round(position.x * 10.0f)),
 								                    static_cast<int32_t>(std::round(position.y * 10.0f)),
 								                    static_cast<int32_t>(std::round(position.z * 10.0f))};
 								        auto dit = instanceDedupMap_.find(dk);
 								        if (dit != instanceDedupMap_.end()) {
 								            return dit->second;
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        }
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    M2Instance instance;
 								    instance.id = nextInstanceId++;
 								    instance.modelId = modelId;
 								    instance.position = position;  // Used for frustum culling
 								    instance.rotation = glm::vec3(0.0f);
 								    instance.scale = 1.0f;
 								    instance.modelMatrix = modelMatrix;
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								    instance.invModelMatrix = glm::inverse(modelMatrix);
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    glm::vec3 localMin, localMax;
-												Fix Windows ARM64 build: disable x86 asm in StormLib's libtomcrypt

StormLib's bundled libtomcrypt uses x86 inline assembly (bswapl/movl)
gated by __MINGW32__, which is defined on CLANGARM64 too. Pass
-DLTC_NO_BSWAP to force portable C byte-swap fallback.

											
										
										
											2026-02-25 03:06:06 -08:00
+								    getTightCollisionBounds(models[modelId], localMin, localMax);
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    transformAABB(instance.modelMatrix, localMin, localMax, instance.worldBoundsMin, instance.worldBoundsMax);
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    // Cache model flags on instance to avoid per-frame hash lookups
-												Fix Windows ARM64 build: disable x86 asm in StormLib's libtomcrypt

StormLib's bundled libtomcrypt uses x86 inline assembly (bswapl/movl)
gated by __MINGW32__, which is defined on CLANGARM64 too. Pass
-DLTC_NO_BSWAP to force portable C byte-swap fallback.

											
										
										
											2026-02-25 03:06:06 -08:00
+								    const auto& mdl2 = models[modelId];
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    instance.cachedHasAnimation = mdl2.hasAnimation;
 								    instance.cachedDisableAnimation = mdl2.disableAnimation;
 								    instance.cachedIsSmoke = mdl2.isSmoke;
 								    instance.cachedHasParticleEmitters = !mdl2.particleEmitters.empty();
 								    instance.cachedBoundRadius = mdl2.boundRadius;
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								    instance.cachedIsGroundDetail = mdl2.isGroundDetail;
 								    instance.cachedIsInvisibleTrap = mdl2.isInvisibleTrap;
 								    instance.cachedIsValid = mdl2.isValid();
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								    instance.cachedModel = &mdl2;
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
 								    // Initialize animation
-												Fix Windows ARM64 build: disable x86 asm in StormLib's libtomcrypt

StormLib's bundled libtomcrypt uses x86 inline assembly (bswapl/movl)
gated by __MINGW32__, which is defined on CLANGARM64 too. Pass
-DLTC_NO_BSWAP to force portable C byte-swap fallback.

											
										
										
											2026-02-25 03:06:06 -08:00
+								    if (mdl2.hasAnimation && !mdl2.disableAnimation && !mdl2.sequences.empty()) {
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								        instance.currentSequenceIndex = 0;
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        instance.idleSequenceIndex = 0;
-												Fix Windows ARM64 build: disable x86 asm in StormLib's libtomcrypt

StormLib's bundled libtomcrypt uses x86 inline assembly (bswapl/movl)
gated by __MINGW32__, which is defined on CLANGARM64 too. Pass
-DLTC_NO_BSWAP to force portable C byte-swap fallback.

											
										
										
											2026-02-25 03:06:06 -08:00
+								        instance.animDuration = static_cast<float>(mdl2.sequences[0].duration);
 								        instance.animTime = static_cast<float>(rand() % std::max(1u, mdl2.sequences[0].duration));
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        instance.variationTimer = 3000.0f + static_cast<float>(rand() % 8000);
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
 								        // Seed bone matrices from an existing sibling so the instance renders immediately
 								        for (const auto& existing : instances) {
 								            if (existing.modelId == modelId && !existing.boneMatrices.empty()) {
 								                instance.boneMatrices = existing.boneMatrices;
 								                instance.bonesDirty = true;
 								                break;
 								            }
 								        }
 								        if (instance.boneMatrices.empty()) {
 								            computeBoneMatrices(mdl2, instance);
 								        }
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    } else {
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        instance.animTime = static_cast<float>(rand()) / RAND_MAX * 10000.0f;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths

											
										
										
											2026-03-07 11:59:19 -08:00
+								    // Register in dedup map
 								    {
 								        DedupKey dk{modelId,
 								                    static_cast<int32_t>(std::round(position.x * 10.0f)),
 								                    static_cast<int32_t>(std::round(position.y * 10.0f)),
 								                    static_cast<int32_t>(std::round(position.z * 10.0f))};
 								        instanceDedupMap_[dk] = instance.id;
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    instances.push_back(instance);
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    size_t idx = instances.size() - 1;
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    if (mdl2.isSmoke) {
 								        smokeInstanceIndices_.push_back(idx);
 								    }
 								    if (!mdl2.particleEmitters.empty()) {
 								        particleInstanceIndices_.push_back(idx);
 								    }
 								    if (mdl2.hasAnimation && !mdl2.disableAnimation) {
 								        animatedInstanceIndices_.push_back(idx);
 								    } else if (!mdl2.particleEmitters.empty()) {
 								        particleOnlyInstanceIndices_.push_back(idx);
 								    }
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    instanceIndexById[instance.id] = idx;
 								    GridCell minCell = toCell(instance.worldBoundsMin);
 								    GridCell maxCell = toCell(instance.worldBoundsMax);
 								    for (int z = minCell.z; z <= maxCell.z; z++) {
 								        for (int y = minCell.y; y <= maxCell.y; y++) {
 								            for (int x = minCell.x; x <= maxCell.x; x++) {
 								                spatialGrid[GridCell{x, y, z}].push_back(instance.id);
 								            }
 								        }
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
 								    return instance.id;
 								}
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								// --- Bone animation helpers (same logic as CharacterRenderer) ---
 								static int findKeyframeIndex(const std::vector<uint32_t>& timestamps, float time) {
 								    if (timestamps.empty()) return -1;
 								    if (timestamps.size() == 1) return 0;
-												Per-instance NPC hair/skin textures, fix binary search float comparison

- NPC hair/skin textures now use per-instance overrides instead of shared
  model-level textures, so each NPC shows its own hair color/style
- Hair/skin DBC lookup runs for every NPC instance (including cached models)
  rather than only on first load
- Fix keyframe binary search to use float comparison matching original
  linear scan semantics

											
										
										
											2026-03-04 09:19:02 -08:00
+								    // Binary search using float comparison to match original semantics exactly
 								    auto it = std::upper_bound(timestamps.begin(), timestamps.end(), time,
 								        [](float t, uint32_t ts) { return t < static_cast<float>(ts); });
-												Optimize animation hotpaths: binary keyframe search, eliminate sqrt calls

- Replace O(n) linear keyframe search with O(log n) binary search in both
  M2 and Character renderers (runs thousands of times per frame)
- Smoke particle removal: swap-and-pop instead of O(n²) vector erase
- Character render backface cull: eliminate sqrt via squared comparison
- Quaternion validation: use length² instead of sqrt-based length check

											
										
										
											2026-03-04 08:33:56 -08:00
+								    if (it == timestamps.begin()) return 0;
 								    size_t idx = static_cast<size_t>(it - timestamps.begin()) - 1;
 								    return static_cast<int>(std::min(idx, timestamps.size() - 2));
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								}
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								// Resolve sequence index and time for a track, handling global sequences.
 								static void resolveTrackTime(const pipeline::M2AnimationTrack& track,
 								                              int seqIdx, float time,
 								                              const std::vector<uint32_t>& globalSeqDurations,
 								                              int& outSeqIdx, float& outTime) {
 								    if (track.globalSequence >= 0 &&
 								        static_cast<size_t>(track.globalSequence) < globalSeqDurations.size()) {
 								        // Global sequence: always use sub-array 0, wrap time at global duration
 								        outSeqIdx = 0;
 								        float dur = static_cast<float>(globalSeqDurations[track.globalSequence]);
 								        outTime = (dur > 0.0f) ? std::fmod(time, dur) : 0.0f;
 								    } else {
 								        outSeqIdx = seqIdx;
 								        outTime = time;
 								    }
 								}
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								static glm::vec3 interpVec3(const pipeline::M2AnimationTrack& track,
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								                             int seqIdx, float time, const glm::vec3& def,
 								                             const std::vector<uint32_t>& globalSeqDurations) {
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    if (!track.hasData()) return def;
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								    int si; float t;
 								    resolveTrackTime(track, seqIdx, time, globalSeqDurations, si, t);
 								    if (si < 0 || si >= static_cast<int>(track.sequences.size())) return def;
 								    const auto& keys = track.sequences[si];
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    if (keys.timestamps.empty() || keys.vec3Values.empty()) return def;
 								    auto safe = [&](const glm::vec3& v) -> glm::vec3 {
 								        if (std::isnan(v.x) || std::isnan(v.y) || std::isnan(v.z)) return def;
 								        return v;
 								    };
 								    if (keys.vec3Values.size() == 1) return safe(keys.vec3Values[0]);
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								    int idx = findKeyframeIndex(keys.timestamps, t);
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    if (idx < 0) return def;
 								    size_t i0 = static_cast<size_t>(idx);
 								    size_t i1 = std::min(i0 + 1, keys.vec3Values.size() - 1);
 								    if (i0 == i1) return safe(keys.vec3Values[i0]);
 								    float t0 = static_cast<float>(keys.timestamps[i0]);
 								    float t1 = static_cast<float>(keys.timestamps[i1]);
 								    float dur = t1 - t0;
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								    float frac = (dur > 0.0f) ? glm::clamp((t - t0) / dur, 0.0f, 1.0f) : 0.0f;
 								    return safe(glm::mix(keys.vec3Values[i0], keys.vec3Values[i1], frac));
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								}
 								static glm::quat interpQuat(const pipeline::M2AnimationTrack& track,
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								                              int seqIdx, float time,
 								                              const std::vector<uint32_t>& globalSeqDurations) {
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    glm::quat identity(1.0f, 0.0f, 0.0f, 0.0f);
 								    if (!track.hasData()) return identity;
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								    int si; float t;
 								    resolveTrackTime(track, seqIdx, time, globalSeqDurations, si, t);
 								    if (si < 0 || si >= static_cast<int>(track.sequences.size())) return identity;
 								    const auto& keys = track.sequences[si];
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    if (keys.timestamps.empty() || keys.quatValues.empty()) return identity;
 								    auto safe = [&](const glm::quat& q) -> glm::quat {
-												Optimize animation hotpaths: binary keyframe search, eliminate sqrt calls

- Replace O(n) linear keyframe search with O(log n) binary search in both
  M2 and Character renderers (runs thousands of times per frame)
- Smoke particle removal: swap-and-pop instead of O(n²) vector erase
- Character render backface cull: eliminate sqrt via squared comparison
- Quaternion validation: use length² instead of sqrt-based length check

											
										
										
											2026-03-04 08:33:56 -08:00
+								        float lenSq = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
 								        if (lenSq < 0.000001f || std::isnan(lenSq)) return identity;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								        return q;
 								    };
 								    if (keys.quatValues.size() == 1) return safe(keys.quatValues[0]);
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								    int idx = findKeyframeIndex(keys.timestamps, t);
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    if (idx < 0) return identity;
 								    size_t i0 = static_cast<size_t>(idx);
 								    size_t i1 = std::min(i0 + 1, keys.quatValues.size() - 1);
 								    if (i0 == i1) return safe(keys.quatValues[i0]);
 								    float t0 = static_cast<float>(keys.timestamps[i0]);
 								    float t1 = static_cast<float>(keys.timestamps[i1]);
 								    float dur = t1 - t0;
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								    float frac = (dur > 0.0f) ? glm::clamp((t - t0) / dur, 0.0f, 1.0f) : 0.0f;
 								    return glm::slerp(safe(keys.quatValues[i0]), safe(keys.quatValues[i1]), frac);
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								}
 								static void computeBoneMatrices(const M2ModelGPU& model, M2Instance& instance) {
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								    size_t numBones = std::min(model.bones.size(), size_t(128));
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								    if (numBones == 0) return;
 								    instance.boneMatrices.resize(numBones);
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								    const auto& gsd = model.globalSequenceDurations;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
 								    for (size_t i = 0; i < numBones; i++) {
 								        const auto& bone = model.bones[i];
-												Add M2 global sequence animation, smoke UV scroll, and fix WMO floor detection

- Parse global sequence durations from M2 binary and use them in bone
  interpolation so torches, candles, and other env doodads animate.
- Add UV scroll shader effect for smoke models (HouseSmoke, SmokeStack)
  as a workaround for unimplemented M2 particle emitters.
- Tighten WMO floor probe heights to prevent multi-story buildings from
  returning the wrong floor, fixing player clipping through inn floors
  and camera locking onto the second floor.
- Use player ground level as reference for camera orbit floor collision
  so the camera doesn't fight upper floors in buildings.

											
										
										
											2026-02-04 14:06:59 -08:00
+								        glm::vec3 trans = interpVec3(bone.translation, instance.currentSequenceIndex, instance.animTime, glm::vec3(0.0f), gsd);
 								        glm::quat rot = interpQuat(bone.rotation, instance.currentSequenceIndex, instance.animTime, gsd);
 								        glm::vec3 scl = interpVec3(bone.scale, instance.currentSequenceIndex, instance.animTime, glm::vec3(1.0f), gsd);
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        // Sanity check scale to avoid degenerate matrices
 								        if (scl.x < 0.001f) scl.x = 1.0f;
 								        if (scl.y < 0.001f) scl.y = 1.0f;
 								        if (scl.z < 0.001f) scl.z = 1.0f;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								        glm::mat4 local = glm::translate(glm::mat4(1.0f), bone.pivot);
 								        local = glm::translate(local, trans);
 								        local *= glm::toMat4(rot);
 								        local = glm::scale(local, scl);
 								        local = glm::translate(local, -bone.pivot);
 								        if (bone.parentBone >= 0 && static_cast<size_t>(bone.parentBone) < numBones) {
 								            instance.boneMatrices[i] = instance.boneMatrices[bone.parentBone] * local;
 								        } else {
 								            instance.boneMatrices[i] = local;
 								        }
 								    }
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								    instance.bonesDirty = true;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								}
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::mat4& viewProjection) {
-												Fix transport sync and stabilize WMO/tunnel grounding

											
										
										
											2026-02-12 00:04:53 -08:00
+								    if (spatialIndexDirty_) {
 								        rebuildSpatialIndex();
 								    }
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								    float dtMs = deltaTime * 1000.0f;
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								    // Cache camera state for frustum-culling bone computation
 								    cachedCamPos_ = cameraPos;
-												Fix Windows ARM64 build: disable x86 asm in StormLib's libtomcrypt

StormLib's bundled libtomcrypt uses x86 inline assembly (bswapl/movl)
gated by __MINGW32__, which is defined on CLANGARM64 too. Pass
-DLTC_NO_BSWAP to force portable C byte-swap fallback.

											
										
										
											2026-02-25 03:06:06 -08:00
+								    const float maxRenderDistance = (instances.size() > 2000) ? 800.0f : 2800.0f;
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								    cachedMaxRenderDistSq_ = maxRenderDistance * maxRenderDistance;
 								    // Build frustum for culling bones
 								    Frustum updateFrustum;
 								    updateFrustum.extractFromMatrix(viewProjection);
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    // --- Smoke particle spawning (only iterate tracked smoke instances) ---
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								    std::uniform_real_distribution<float> distXY(-0.4f, 0.4f);
 								    std::uniform_real_distribution<float> distVelXY(-0.3f, 0.3f);
 								    std::uniform_real_distribution<float> distVelZ(3.0f, 5.0f);
 								    std::uniform_real_distribution<float> distLife(4.0f, 7.0f);
 								    std::uniform_real_distribution<float> distDrift(-0.2f, 0.2f);
 								    smokeEmitAccum += deltaTime;
 								    float emitInterval = 1.0f / 8.0f;  // 8 particles per second per emitter
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    if (smokeEmitAccum >= emitInterval &&
 								        static_cast<int>(smokeParticles.size()) < MAX_SMOKE_PARTICLES) {
 								        for (size_t si : smokeInstanceIndices_) {
 								            if (si >= instances.size()) continue;
 								            auto& instance = instances[si];
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
 								            glm::vec3 emitWorld = glm::vec3(instance.modelMatrix * glm::vec4(0.0f, 0.0f, 0.0f, 1.0f));
 								            bool spark = (smokeRng() % 8 == 0);
 								            SmokeParticle p;
 								            p.position = emitWorld + glm::vec3(distXY(smokeRng), distXY(smokeRng), 0.0f);
 								            if (spark) {
 								                p.velocity = glm::vec3(distVelXY(smokeRng) * 2.0f, distVelXY(smokeRng) * 2.0f, distVelZ(smokeRng) * 1.5f);
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								                p.maxLife = 0.8f + static_cast<float>(smokeRng() % 100) / 100.0f * 1.2f;
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								                p.size = 0.5f;
 								                p.isSpark = 1.0f;
 								            } else {
 								                p.velocity = glm::vec3(distVelXY(smokeRng), distVelXY(smokeRng), distVelZ(smokeRng));
 								                p.maxLife = distLife(smokeRng);
 								                p.size = 1.0f;
 								                p.isSpark = 0.0f;
 								            }
 								            p.life = 0.0f;
 								            p.instanceId = instance.id;
 								            smokeParticles.push_back(p);
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								            if (static_cast<int>(smokeParticles.size()) >= MAX_SMOKE_PARTICLES) break;
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								        }
 								        smokeEmitAccum = 0.0f;
 								    }
-												Optimize animation hotpaths: binary keyframe search, eliminate sqrt calls

- Replace O(n) linear keyframe search with O(log n) binary search in both
  M2 and Character renderers (runs thousands of times per frame)
- Smoke particle removal: swap-and-pop instead of O(n²) vector erase
- Character render backface cull: eliminate sqrt via squared comparison
- Quaternion validation: use length² instead of sqrt-based length check

											
										
										
											2026-03-04 08:33:56 -08:00
+								    // --- Update existing smoke particles (swap-and-pop for O(1) removal) ---
 								    for (size_t i = 0; i < smokeParticles.size(); ) {
 								        auto& p = smokeParticles[i];
 								        p.life += deltaTime;
 								        if (p.life >= p.maxLife) {
 								            smokeParticles[i] = smokeParticles.back();
 								            smokeParticles.pop_back();
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								            continue;
 								        }
-												Optimize animation hotpaths: binary keyframe search, eliminate sqrt calls

- Replace O(n) linear keyframe search with O(log n) binary search in both
  M2 and Character renderers (runs thousands of times per frame)
- Smoke particle removal: swap-and-pop instead of O(n²) vector erase
- Character render backface cull: eliminate sqrt via squared comparison
- Quaternion validation: use length² instead of sqrt-based length check

											
										
										
											2026-03-04 08:33:56 -08:00
+								        p.position += p.velocity * deltaTime;
 								        p.velocity.z *= 0.98f;  // Slight deceleration
 								        p.velocity.x += distDrift(smokeRng) * deltaTime;
 								        p.velocity.y += distDrift(smokeRng) * deltaTime;
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								        // Grow from 1.0 to 3.5 over lifetime
-												Optimize animation hotpaths: binary keyframe search, eliminate sqrt calls

- Replace O(n) linear keyframe search with O(log n) binary search in both
  M2 and Character renderers (runs thousands of times per frame)
- Smoke particle removal: swap-and-pop instead of O(n²) vector erase
- Character render backface cull: eliminate sqrt via squared comparison
- Quaternion validation: use length² instead of sqrt-based length check

											
										
										
											2026-03-04 08:33:56 -08:00
+								        float t = p.life / p.maxLife;
 								        p.size = 1.0f + t * 2.5f;
 								        ++i;
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								    }
-												Instance portal glow, spin, and transparent additive rendering

											
										
										
											2026-03-06 18:02:56 -08:00
+								    // --- Spin instance portals ---
 								    static constexpr float PORTAL_SPIN_SPEED = 1.2f; // radians/sec
 								    for (size_t idx : portalInstanceIndices_) {
 								        if (idx >= instances.size()) continue;
 								        auto& inst = instances[idx];
 								        inst.portalSpinAngle += PORTAL_SPIN_SPEED * deltaTime;
 								        if (inst.portalSpinAngle > 6.2831853f)
 								            inst.portalSpinAngle -= 6.2831853f;
 								        inst.rotation.z = inst.portalSpinAngle;
 								        inst.updateModelMatrix();
 								    }
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								    // --- Normal M2 animation update ---
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    // Advance animTime for ALL instances (needed for texture UV animation on static doodads).
 								    // This is a tight loop touching only one float per instance — no hash lookups.
 								    for (auto& instance : instances) {
 								        instance.animTime += dtMs;
 								    }
 								    // Wrap animTime for particle-only instances so emission rate tracks keep looping
 								    for (size_t idx : particleOnlyInstanceIndices_) {
 								        if (idx >= instances.size()) continue;
 								        auto& instance = instances[idx];
 								        if (instance.animTime > 3333.0f) {
 								            instance.animTime = std::fmod(instance.animTime, 3333.0f);
 								        }
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
+								    }
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    boneWorkIndices_.clear();
 								    boneWorkIndices_.reserve(animatedInstanceIndices_.size());
 								    // Update animated instances (full animation state + bone computation culling)
 								    // Note: animTime was already advanced by dtMs in the global loop above.
 								    // Here we apply the speed factor: subtract the base dtMs and add dtMs*speed.
 								    for (size_t idx : animatedInstanceIndices_) {
 								        if (idx >= instances.size()) continue;
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								        auto& instance = instances[idx];
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
 								        instance.animTime += dtMs * (instance.animSpeed - 1.0f);
 								        // For animation looping/variation, we need the actual model data.
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        if (!instance.cachedModel) continue;
 								        const M2ModelGPU& model = *instance.cachedModel;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        // Validate sequence index
 								        if (instance.currentSequenceIndex < 0 ||
 								            instance.currentSequenceIndex >= static_cast<int>(model.sequences.size())) {
 								            instance.currentSequenceIndex = 0;
 								            if (!model.sequences.empty()) {
 								                instance.animDuration = static_cast<float>(model.sequences[0].duration);
 								            }
 								        }
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        // Handle animation looping / variation transitions
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								        if (instance.animDuration <= 0.0f && instance.cachedHasParticleEmitters) {
 								            instance.animDuration = 3333.0f;
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								        }
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								        if (instance.animDuration > 0.0f && instance.animTime >= instance.animDuration) {
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								            if (instance.playingVariation) {
 								                instance.playingVariation = false;
 								                instance.currentSequenceIndex = instance.idleSequenceIndex;
 								                if (instance.idleSequenceIndex < static_cast<int>(model.sequences.size())) {
 								                    instance.animDuration = static_cast<float>(model.sequences[instance.idleSequenceIndex].duration);
 								                }
 								                instance.animTime = 0.0f;
 								                instance.variationTimer = 4000.0f + static_cast<float>(rand() % 6000);
 								            } else {
 								                instance.animTime = std::fmod(instance.animTime, std::max(1.0f, instance.animDuration));
 								            }
 								        }
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								        // Idle variation timer
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								        if (!instance.playingVariation && model.idleVariationIndices.size() > 1) {
 								            instance.variationTimer -= dtMs;
 								            if (instance.variationTimer <= 0.0f) {
 								                int pick = rand() % static_cast<int>(model.idleVariationIndices.size());
 								                int newSeq = model.idleVariationIndices[pick];
 								                if (newSeq != instance.currentSequenceIndex && newSeq < static_cast<int>(model.sequences.size())) {
 								                    instance.playingVariation = true;
 								                    instance.currentSequenceIndex = newSeq;
 								                    instance.animDuration = static_cast<float>(model.sequences[newSeq].duration);
 								                    instance.animTime = 0.0f;
 								                } else {
 								                    instance.variationTimer = 2000.0f + static_cast<float>(rand() % 4000);
 								                }
 								            }
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								        }
-												Harden transport updates and fix waterfall particle tint

											
										
										
											2026-02-12 00:45:24 -08:00
+								        // Frustum + distance cull: skip expensive bone computation for off-screen instances.
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								        float worldRadius = instance.cachedBoundRadius * instance.scale;
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								        float cullRadius = worldRadius;
 								        glm::vec3 toCam = instance.position - cachedCamPos_;
 								        float distSq = glm::dot(toCam, toCam);
 								        float effectiveMaxDistSq = cachedMaxRenderDistSq_ * std::max(1.0f, cullRadius / 12.0f);
 								        if (distSq > effectiveMaxDistSq) continue;
-												Harden transport updates and fix waterfall particle tint

											
										
										
											2026-02-12 00:45:24 -08:00
+								        float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f);
 								        if (cullRadius > 0.0f && !updateFrustum.intersectsSphere(instance.position, paddedRadius)) continue;
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        // Distance-based frame skipping: update distant bones less frequently
 								        uint32_t boneInterval = 1;
 								        if (distSq > 200.0f * 200.0f) boneInterval = 8;
 								        else if (distSq > 100.0f * 100.0f) boneInterval = 4;
 								        else if (distSq > 50.0f * 50.0f) boneInterval = 2;
 								        instance.frameSkipCounter++;
 								        if ((instance.frameSkipCounter % boneInterval) != 0) continue;
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
+								        boneWorkIndices_.push_back(idx);
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								    }
 								    // Phase 2: Compute bone matrices (expensive, parallel if enough work)
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
+								    const size_t animCount = boneWorkIndices_.size();
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								    if (animCount > 0) {
-												Optimize threading and texture fallback stability

											
										
										
											2026-02-22 08:12:08 -08:00
+								        static const size_t minParallelAnimInstances = std::max<size_t>(
 , envSizeOrDefault("WOWEE_M2_ANIM_MT_MIN", 96));
 								        if (animCount < minParallelAnimInstances || numAnimThreads_ <= 1) {
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								            // Sequential — not enough work to justify thread overhead
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
+								            for (size_t i : boneWorkIndices_) {
-												Fix terrain streaming crash: pendingTiles data race and missing null checks

Guard pendingTiles.erase() with queueMutex in processReadyTiles and
unloadTile to prevent data race with worker threads. Add defensive null
checks in M2/WMO render and animation paths. Move cleanupUnusedModels
out of per-tile unload loop to run once after all tiles are removed.

											
										
										
											2026-02-07 18:57:34 -08:00
+								                if (i >= instances.size()) continue;
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								                auto& inst = instances[i];
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								                if (!inst.cachedModel) continue;
 								                computeBoneMatrices(*inst.cachedModel, inst);
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								            }
 								        } else {
 								            // Parallel — dispatch across worker threads
-												Optimize threading and texture fallback stability

											
										
										
											2026-02-22 08:12:08 -08:00
+								            static const size_t minAnimWorkPerThread = std::max<size_t>(
 , envSizeOrDefault("WOWEE_M2_ANIM_WORK_PER_THREAD", 64));
 								            const size_t maxUsefulThreads = std::max<size_t>(
 , (animCount + minAnimWorkPerThread - 1) / minAnimWorkPerThread);
 								            const size_t numThreads = std::min(static_cast<size_t>(numAnimThreads_), maxUsefulThreads);
 								            if (numThreads <= 1) {
 								                for (size_t i : boneWorkIndices_) {
 								                    if (i >= instances.size()) continue;
 								                    auto& inst = instances[i];
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								                    if (!inst.cachedModel) continue;
 								                    computeBoneMatrices(*inst.cachedModel, inst);
-												Optimize threading and texture fallback stability

											
										
										
											2026-02-22 08:12:08 -08:00
+								                }
 								            } else {
 								                const size_t chunkSize = animCount / numThreads;
 								                const size_t remainder = animCount % numThreads;
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
-												Optimize threading and texture fallback stability

											
										
										
											2026-02-22 08:12:08 -08:00
+								                // Reuse persistent futures vector to avoid allocation
 								                animFutures_.clear();
 								                if (animFutures_.capacity() < numThreads) {
 								                    animFutures_.reserve(numThreads);
 								                }
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
-												Optimize threading and texture fallback stability

											
										
										
											2026-02-22 08:12:08 -08:00
+								                size_t start = 0;
 								                for (size_t t = 0; t < numThreads; ++t) {
 								                    size_t end = start + chunkSize + (t < remainder ? 1 : 0);
 								                    animFutures_.push_back(std::async(std::launch::async,
 								                        [this, start, end]() {
 								                            for (size_t j = start; j < end; ++j) {
 								                                size_t idx = boneWorkIndices_[j];
 								                                if (idx >= instances.size()) continue;
 								                                auto& inst = instances[idx];
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								                                if (!inst.cachedModel) continue;
 								                                computeBoneMatrices(*inst.cachedModel, inst);
-												Optimize threading and texture fallback stability

											
										
										
											2026-02-22 08:12:08 -08:00
+								                            }
 								                        }));
 								                    start = end;
 								                }
 								                for (auto& f : animFutures_) {
 								                    f.get();
 								                }
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								            }
 								        }
 								    }
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								    // Phase 3: Particle update (sequential — uses RNG, not thread-safe)
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    // Only iterate instances that have particle emitters (pre-built list).
 								    for (size_t idx : particleInstanceIndices_) {
 								        if (idx >= instances.size()) continue;
-												Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.

											
										
										
											2026-02-07 14:28:14 -08:00
+								        auto& instance = instances[idx];
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								        // Distance cull: only update particles within visible range
 								        glm::vec3 toCam = instance.position - cachedCamPos_;
 								        float distSq = glm::dot(toCam, toCam);
 								        if (distSq > cachedMaxRenderDistSq_) continue;
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        if (!instance.cachedModel) continue;
 								        emitParticles(instance, *instance.cachedModel, deltaTime);
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								        updateParticles(instance, deltaTime);
-												Improve movement, crouching, and add M2 animation

Movement:
- Fix speed controls: Shift=sprint (28), normal run (14), Ctrl=walk (5)
- Reduce character height for doorway clearance (eye height 1.2)
- Add working crouch (C or X key) with smooth transition (eye height 0.6)
- Jump to stand up from crouch

M2 Animation:
- Add animation time tracking per M2 instance
- Add procedural swaying animation in vertex shader
- Update animation each frame for vegetation movement

											
										
										
											2026-02-02 23:10:19 -08:00
+								    }
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
-												Improve movement, crouching, and add M2 animation

Movement:
- Fix speed controls: Shift=sprint (28), normal run (14), Ctrl=walk (5)
- Reduce character height for doorway clearance (eye height 1.2)
- Add working crouch (C or X key) with smooth transition (eye height 0.6)
- Jump to stand up from crouch

M2 Animation:
- Add animation time tracking per M2 instance
- Add procedural swaying animation in vertex shader
- Update animation each frame for vegetation movement

											
										
										
											2026-02-02 23:10:19 -08:00
+								}
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								void M2Renderer::prepareRender(uint32_t frameIndex, const Camera& camera) {
 								    if (!initialized_ || instances.empty()) return;
 								    (void)camera;  // reserved for future frustum-based culling
 								    // Pre-allocate bone SSBOs + descriptor sets on main thread (pool ops not thread-safe).
 								    // Only iterate animated instances — static doodads don't need bone buffers.
 								    for (size_t idx : animatedInstanceIndices_) {
 								        if (idx >= instances.size()) continue;
 								        auto& instance = instances[idx];
 								        if (instance.boneMatrices.empty()) continue;
 								        if (!instance.boneBuffer[frameIndex]) {
 								            VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
 								            bci.size = 128 * sizeof(glm::mat4);
 								            bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
 								            VmaAllocationCreateInfo aci{};
 								            aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
 								            aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
 								            VmaAllocationInfo allocInfo{};
 								            vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci,
 								                            &instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo);
 								            instance.boneMapped[frameIndex] = allocInfo.pMappedData;
 								            instance.boneSet[frameIndex] = allocateBoneSet();
 								            if (instance.boneSet[frameIndex]) {
 								                VkDescriptorBufferInfo bufInfo{};
 								                bufInfo.buffer = instance.boneBuffer[frameIndex];
 								                bufInfo.offset = 0;
 								                bufInfo.range = bci.size;
 								                VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
 								                write.dstSet = instance.boneSet[frameIndex];
 								                write.dstBinding = 0;
 								                write.descriptorCount = 1;
 								                write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
 								                write.pBufferInfo = &bufInfo;
 								                vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr);
 								            }
 								        }
 								    }
 								}
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera) {
 								    if (instances.empty() || !opaquePipeline_) {
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								        return;
 								    }
 								    // Debug: log once when we start rendering
 								    static bool loggedOnce = false;
 								    if (!loggedOnce) {
 								        loggedOnce = true;
 								        LOG_INFO("M2 render: ", instances.size(), " instances, ", models.size(), " models");
 								    }
 								    // Build frustum for culling
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    const glm::mat4 view = camera.getViewMatrix();
 								    const glm::mat4 projection = camera.getProjectionMatrix();
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    Frustum frustum;
 								    frustum.extractFromMatrix(projection * view);
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
+								    // Reuse persistent buffers (clear instead of reallocating)
 								    glowSprites_.clear();
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    lastDrawCallCount = 0;
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								    // Adaptive render distance: smoothed to prevent pop-in/pop-out flickering
 								    const float targetRenderDist = (instances.size() > 2000) ? 300.0f
 								                                 : (instances.size() > 1000) ? 500.0f
 								                                 : 1000.0f;
 								    // Smooth transitions: shrink slowly (avoid popping out nearby objects)
 								    const float shrinkRate = 0.005f;  // very slow decrease
 								    const float growRate = 0.05f;     // faster increase
 								    float blendRate = (targetRenderDist < smoothedRenderDist_) ? shrinkRate : growRate;
 								    smoothedRenderDist_ = glm::mix(smoothedRenderDist_, targetRenderDist, blendRate);
 								    const float maxRenderDistance = smoothedRenderDist_;
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								    const float maxRenderDistanceSq = maxRenderDistance * maxRenderDistance;
-												Add M2 idle animation variations, dedup instances, fix terrain textures

- Add idle variation system: creatures randomly play Stand variations
  (stretch, flap, look around) every 4-10s, then return to idle loop
- Deduplicate M2 instances at same position (was hidden before animation
  made duplicates visible with different random start times)
- Adaptive M2 render distance: 350 units in open terrain, 180 in cities
- Restore terrain sampler-to-unit uniform bindings lost during texture
  bind optimization (roads were invisible under grass)
- Safety: clamp bone count to 128, validate sequence indices, sanitize scale

											
										
										
											2026-02-04 11:50:18 -08:00
+								    const float fadeStartFraction = 0.75f;
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								    const glm::vec3 camPos = camera.getPosition();
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								    // Build sorted visible instance list: cull then sort by modelId to batch VAO binds
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
+								    // Reuse persistent vector to avoid allocation
 								    sortedVisible_.clear();
-												Optimize M2 and terrain rendering for 60fps target

Implements aggressive performance optimizations to improve frame rate from 29fps to 40fps:

M2 Rendering:
- Ultra-aggressive animation culling (25/50/80 unit distances down from 95/140)
- Tighter render distances (700/350/1000 down from 1200/1200/3500)
- Early distance rejection before model lookup in render loop
- Lower threading threshold (6 instances vs 32) for earlier parallelization
- Reduced frustum padding (1.5x vs 2.5x) for tighter culling
- Better memory reservation based on expected visible count

Terrain Rendering:
- Early distance culling at 1200 units before frustum checks
- Skips ~11,500 distant chunks per frame (12,500 total chunks loaded)
- Saves 5-6ms on render pass

Performance Impact:
- Render time: 20ms → 14-15ms (30% faster)
- Frame rate: 29fps → 40fps (+11fps)
- Total savings: ~9ms per frame

											
										
										
											2026-02-10 17:23:41 -08:00
+								    // Reserve based on expected visible count (roughly 30% of total instances in dense areas)
 								    const size_t expectedVisible = std::min(instances.size() / 3, size_t(600));
 								    if (sortedVisible_.capacity() < expectedVisible) {
 								        sortedVisible_.reserve(expectedVisible);
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
+								    }
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
-												Optimize M2 and terrain rendering for 60fps target

Implements aggressive performance optimizations to improve frame rate from 29fps to 40fps:

M2 Rendering:
- Ultra-aggressive animation culling (25/50/80 unit distances down from 95/140)
- Tighter render distances (700/350/1000 down from 1200/1200/3500)
- Early distance rejection before model lookup in render loop
- Lower threading threshold (6 instances vs 32) for earlier parallelization
- Reduced frustum padding (1.5x vs 2.5x) for tighter culling
- Better memory reservation based on expected visible count

Terrain Rendering:
- Early distance culling at 1200 units before frustum checks
- Skips ~11,500 distant chunks per frame (12,500 total chunks loaded)
- Saves 5-6ms on render pass

Performance Impact:
- Render time: 20ms → 14-15ms (30% faster)
- Frame rate: 29fps → 40fps (+11fps)
- Total savings: ~9ms per frame

											
										
										
											2026-02-10 17:23:41 -08:00
+								    // Early distance rejection: max possible render distance (tight but safe upper bound)
 								    const float maxPossibleDistSq = maxRenderDistance * maxRenderDistance * 4.0f;  // 2x safety margin (reduced from 4x)
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								    for (uint32_t i = 0; i < static_cast<uint32_t>(instances.size()); ++i) {
 								        const auto& instance = instances[i];
-												Optimize M2 and terrain rendering for 60fps target

Implements aggressive performance optimizations to improve frame rate from 29fps to 40fps:

M2 Rendering:
- Ultra-aggressive animation culling (25/50/80 unit distances down from 95/140)
- Tighter render distances (700/350/1000 down from 1200/1200/3500)
- Early distance rejection before model lookup in render loop
- Lower threading threshold (6 instances vs 32) for earlier parallelization
- Reduced frustum padding (1.5x vs 2.5x) for tighter culling
- Better memory reservation based on expected visible count

Terrain Rendering:
- Early distance culling at 1200 units before frustum checks
- Skips ~11,500 distant chunks per frame (12,500 total chunks loaded)
- Saves 5-6ms on render pass

Performance Impact:
- Render time: 20ms → 14-15ms (30% faster)
- Frame rate: 29fps → 40fps (+11fps)
- Total savings: ~9ms per frame

											
										
										
											2026-02-10 17:23:41 -08:00
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        // Use cached model flags — no hash lookup needed
 								        if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
-												Optimize M2 and terrain rendering for 60fps target

Implements aggressive performance optimizations to improve frame rate from 29fps to 40fps:

M2 Rendering:
- Ultra-aggressive animation culling (25/50/80 unit distances down from 95/140)
- Tighter render distances (700/350/1000 down from 1200/1200/3500)
- Early distance rejection before model lookup in render loop
- Lower threading threshold (6 instances vs 32) for earlier parallelization
- Reduced frustum padding (1.5x vs 2.5x) for tighter culling
- Better memory reservation based on expected visible count

Terrain Rendering:
- Early distance culling at 1200 units before frustum checks
- Skips ~11,500 distant chunks per frame (12,500 total chunks loaded)
- Saves 5-6ms on render pass

Performance Impact:
- Render time: 20ms → 14-15ms (30% faster)
- Frame rate: 29fps → 40fps (+11fps)
- Total savings: ~9ms per frame

											
										
										
											2026-02-10 17:23:41 -08:00
+								        glm::vec3 toCam = instance.position - camPos;
 								        float distSq = glm::dot(toCam, toCam);
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        if (distSq > maxPossibleDistSq) continue;
-												Optimize M2 and terrain rendering for 60fps target

Implements aggressive performance optimizations to improve frame rate from 29fps to 40fps:

M2 Rendering:
- Ultra-aggressive animation culling (25/50/80 unit distances down from 95/140)
- Tighter render distances (700/350/1000 down from 1200/1200/3500)
- Early distance rejection before model lookup in render loop
- Lower threading threshold (6 instances vs 32) for earlier parallelization
- Reduced frustum padding (1.5x vs 2.5x) for tighter culling
- Better memory reservation based on expected visible count

Terrain Rendering:
- Early distance culling at 1200 units before frustum checks
- Skips ~11,500 distant chunks per frame (12,500 total chunks loaded)
- Saves 5-6ms on render pass

Performance Impact:
- Render time: 20ms → 14-15ms (30% faster)
- Frame rate: 29fps → 40fps (+11fps)
- Total savings: ~9ms per frame

											
										
										
											2026-02-10 17:23:41 -08:00
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        float worldRadius = instance.cachedBoundRadius * instance.scale;
-												Reduce foliage and bush pop-in distance artifacts

											
										
										
											2026-02-04 16:32:48 -08:00
+								        float cullRadius = worldRadius;
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        if (instance.cachedDisableAnimation) {
-												Reduce foliage and bush pop-in distance artifacts

											
										
										
											2026-02-04 16:32:48 -08:00
+								            cullRadius = std::max(cullRadius, 3.0f);
 								        }
 								        float effectiveMaxDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f);
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        if (instance.cachedDisableAnimation) {
-												Reduce foliage and bush pop-in distance artifacts

											
										
										
											2026-02-04 16:32:48 -08:00
+								            effectiveMaxDistSq *= 2.6f;
-												Improve shadow stability and reduce foliage pop-in

											
										
										
											2026-02-04 16:30:24 -08:00
+								        }
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        if (instance.cachedIsGroundDetail) {
-												Fix Vulkan shadow light direction and restore ground-clutter cutout visibility

											
										
										
											2026-02-22 09:47:39 -08:00
+								            effectiveMaxDistSq *= 0.75f;
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								        }
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								        if (distSq > effectiveMaxDistSq) continue;
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								        // Frustum cull with padding
-												Optimize M2 and terrain rendering for 60fps target

Implements aggressive performance optimizations to improve frame rate from 29fps to 40fps:

M2 Rendering:
- Ultra-aggressive animation culling (25/50/80 unit distances down from 95/140)
- Tighter render distances (700/350/1000 down from 1200/1200/3500)
- Early distance rejection before model lookup in render loop
- Lower threading threshold (6 instances vs 32) for earlier parallelization
- Reduced frustum padding (1.5x vs 2.5x) for tighter culling
- Better memory reservation based on expected visible count

Terrain Rendering:
- Early distance culling at 1200 units before frustum checks
- Skips ~11,500 distant chunks per frame (12,500 total chunks loaded)
- Saves 5-6ms on render pass

Performance Impact:
- Render time: 20ms → 14-15ms (30% faster)
- Frame rate: 29fps → 40fps (+11fps)
- Total savings: ~9ms per frame

											
										
										
											2026-02-10 17:23:41 -08:00
+								        float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f);
-												Increase M2 frustum culling padding to prevent edge pop-out

- Increase padding from 1.2x to 2.5x model radius
- Add minimum 5 unit padding for small objects like lamps
- Fixes models disappearing at viewport edges during camera rotation

											
										
										
											2026-02-09 20:02:52 -08:00
+								        if (cullRadius > 0.0f && !frustum.intersectsSphere(instance.position, paddedRadius)) continue;
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
 								        sortedVisible_.push_back({i, instance.modelId, distSq, effectiveMaxDistSq});
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								    }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Sort by modelId to minimize vertex/index buffer rebinds
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								    std::sort(sortedVisible_.begin(), sortedVisible_.end(),
 								              [](const VisibleEntry& a, const VisibleEntry& b) { return a.modelId < b.modelId; });
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
 								    uint32_t currentModelId = UINT32_MAX;
 								    const M2ModelGPU* currentModel = nullptr;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // State tracking
 								    VkPipeline currentPipeline = VK_NULL_HANDLE;
 								    uint32_t frameIndex = vkCtx_->getCurrentFrame();
 								    // Push constants struct matching m2.vert.glsl push_constant block
 								    struct M2PushConstants {
 								        glm::mat4 model;
 								        glm::vec2 uvOffset;
 								        int texCoordSet;
 								        int useBones;
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
+								        int isFoliage;
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								        float fadeAlpha;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    };
 								    // Bind per-frame descriptor set (set 0) — shared across all draws
 								    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
 								                            pipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);
 								    // Start with opaque pipeline
 								    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, opaquePipeline_);
 								    currentPipeline = opaquePipeline_;
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
 								    for (const auto& entry : sortedVisible_) {
-												Fix terrain streaming crash: pendingTiles data race and missing null checks

Guard pendingTiles.erase() with queueMutex in processReadyTiles and
unloadTile to prevent data race with worker threads. Add defensive null
checks in M2/WMO render and animation paths. Move cleanupUnusedModels
out of per-tile unload loop to run once after all tiles are removed.

											
										
										
											2026-02-07 18:57:34 -08:00
+								        if (entry.index >= instances.size()) continue;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        auto& instance = instances[entry.index];
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        // Bind vertex + index buffers once per model group
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								        if (entry.modelId != currentModelId) {
 								            currentModelId = entry.modelId;
-												Fix terrain streaming crash: pendingTiles data race and missing null checks

Guard pendingTiles.erase() with queueMutex in processReadyTiles and
unloadTile to prevent data race with worker threads. Add defensive null
checks in M2/WMO render and animation paths. Move cleanupUnusedModels
out of per-tile unload loop to run once after all tiles are removed.

											
										
										
											2026-02-07 18:57:34 -08:00
+								            auto mdlIt = models.find(currentModelId);
 								            if (mdlIt == models.end()) continue;
 								            currentModel = &mdlIt->second;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            if (!currentModel->vertexBuffer) continue;
 								            VkDeviceSize offset = 0;
 								            vkCmdBindVertexBuffers(cmd, 0, 1, &currentModel->vertexBuffer, &offset);
 								            vkCmdBindIndexBuffer(cmd, currentModel->indexBuffer, 0, VK_INDEX_TYPE_UINT16);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								        }
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								        const M2ModelGPU& model = *currentModel;
 								        // Distance-based fade alpha for smooth pop-in (squared-distance, no sqrt)
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								        float fadeAlpha = 1.0f;
-												Reduce foliage and bush pop-in distance artifacts

											
										
										
											2026-02-04 16:32:48 -08:00
+								        float fadeFrac = model.disableAnimation ? 0.55f : fadeStartFraction;
-												Skip bone computation for off-screen M2 instances, sort by model for batched VAO binds, and eliminate sqrt in distance fade

											
										
										
											2026-02-07 14:37:14 -08:00
+								        float fadeStartDistSq = entry.effectiveMaxDistSq * fadeFrac * fadeFrac;
 								        if (entry.distSq > fadeStartDistSq) {
 								            fadeAlpha = std::clamp((entry.effectiveMaxDistSq - entry.distSq) /
 								                                  (entry.effectiveMaxDistSq - fadeStartDistSq), 0.0f, 1.0f);
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								        }
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								        float instanceFadeAlpha = fadeAlpha;
 								        if (model.isGroundDetail) {
 								            instanceFadeAlpha *= 0.82f;
 								        }
-												Instance portal glow, spin, and transparent additive rendering

											
										
										
											2026-03-06 18:02:56 -08:00
+								        if (model.isInstancePortal) {
 								            // Render mesh at low alpha + emit glow sprite at center
 								            instanceFadeAlpha *= 0.12f;
 								            if (entry.distSq < 400.0f * 400.0f) {
 								                glm::vec3 center = glm::vec3(instance.modelMatrix * glm::vec4(0.0f, 0.0f, 0.0f, 1.0f));
 								                GlowSprite gs;
 								                gs.worldPos = center;
 								                gs.color = glm::vec4(0.35f, 0.5f, 1.0f, 1.1f);
 								                gs.size = instance.scale * 5.0f;
 								                glowSprites_.push_back(gs);
 								                GlowSprite halo = gs;
 								                halo.color.a *= 0.3f;
 								                halo.size *= 2.2f;
 								                glowSprites_.push_back(halo);
 								            }
 								        }
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								        // Upload bone matrices to SSBO if model has skeletal animation.
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								        // Skip animated instances entirely until bones are computed + buffers allocated
 								        // to prevent bind-pose/T-pose flash on first appearance.
 								        bool modelNeedsAnimation = model.hasAnimation && !model.disableAnimation;
 								        if (modelNeedsAnimation && instance.boneMatrices.empty()) {
 								            continue;  // Bones not yet computed — skip to avoid bind-pose flash
 								        }
 								        bool needsBones = modelNeedsAnimation && !instance.boneMatrices.empty();
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								        if (needsBones && (!instance.boneBuffer[frameIndex] || !instance.boneSet[frameIndex])) {
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								            continue;  // Bone buffers not yet allocated — skip to avoid bind-pose flash
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								        }
 								        bool useBones = needsBones;
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
+								        if (useBones) {
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								            // Upload bone matrices only when recomputed (skip frame-skipped instances)
 								            if (instance.bonesDirty && instance.boneMapped[frameIndex]) {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								                int numBones = std::min(static_cast<int>(instance.boneMatrices.size()), 128);
 								                memcpy(instance.boneMapped[frameIndex], instance.boneMatrices.data(),
 								                       numBones * sizeof(glm::mat4));
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								                instance.bonesDirty = false;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            }
-												Add M2 skeletal animation and fix terrain texture layers

- Implement GPU bone skinning for M2 doodads/creatures (gryphons, birds)
- Store bone hierarchy and animation keyframes per model
- Compute bone matrices per-instance with keyframe interpolation
- Upload bone weights/indices in vertex buffer, skinning in vertex shader
- Fix terrain texture rendering: restore sampler-to-unit uniform bindings
  removed during texture bind optimization (roads were invisible)

											
										
										
											2026-02-04 11:40:00 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            // Bind bone descriptor set (set 2)
 								            if (instance.boneSet[frameIndex]) {
 								                vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
 								                                        pipelineLayout_, 2, 1, &instance.boneSet[frameIndex], 0, nullptr);
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
+								            }
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								        }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        // LOD selection based on squared distance (avoid sqrt)
-												Implement comprehensive taxi flight optimizations and proper spline paths

Major improvements:
- Load TaxiPathNode.dbc for actual curved flight paths (no more flying through terrain)
- Add 3-second mounting delay with terrain precaching for entire route
- Implement LOD system for M2 models with distance-based quality reduction
- Add circular terrain loading pattern (13 tiles vs 25, 48% reduction)
- Increase terrain cache from 2GB to 8GB for modern systems

Performance optimizations during taxi:
- Cull small M2 models (boundRadius < 3.0) - not visible from altitude
- Disable particle systems (weather, smoke, M2 emitters) - saves ~7000 particles
- Disable specular lighting on M2 models - saves Blinn-Phong calculations
- Disable shadow mapping on M2 models - saves shadow map sampling and PCF

Technical details:
- Parse TaxiPathNode.dbc spline waypoints for curved paths around terrain
- Build full path from node pairs using TaxiPathEdge lookup
- Precache callback triggers during mounting delay for smooth takeoff
- Circular tile loading uses Euclidean distance check (dx²+dy² <= r²)
- LOD fallback to base mesh when higher LODs unavailable

Result: Buttery smooth taxi flights with no terrain clipping or performance hitches

											
										
										
											2026-02-08 21:32:38 -08:00
+								        uint16_t desiredLOD = 0;
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3;
 								        else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2;
 								        else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1;
-												Implement comprehensive taxi flight optimizations and proper spline paths

Major improvements:
- Load TaxiPathNode.dbc for actual curved flight paths (no more flying through terrain)
- Add 3-second mounting delay with terrain precaching for entire route
- Implement LOD system for M2 models with distance-based quality reduction
- Add circular terrain loading pattern (13 tiles vs 25, 48% reduction)
- Increase terrain cache from 2GB to 8GB for modern systems

Performance optimizations during taxi:
- Cull small M2 models (boundRadius < 3.0) - not visible from altitude
- Disable particle systems (weather, smoke, M2 emitters) - saves ~7000 particles
- Disable specular lighting on M2 models - saves Blinn-Phong calculations
- Disable shadow mapping on M2 models - saves shadow map sampling and PCF

Technical details:
- Parse TaxiPathNode.dbc spline waypoints for curved paths around terrain
- Build full path from node pairs using TaxiPathEdge lookup
- Precache callback triggers during mounting delay for smooth takeoff
- Circular tile loading uses Euclidean distance check (dx²+dy² <= r²)
- LOD fallback to base mesh when higher LODs unavailable

Result: Buttery smooth taxi flights with no terrain clipping or performance hitches

											
										
										
											2026-02-08 21:32:38 -08:00
 								        uint16_t targetLOD = desiredLOD;
-												Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders

											
										
										
											2026-03-07 22:03:28 -08:00
+								        if (desiredLOD > 0 && !(model.availableLODs & (1u << desiredLOD))) {
 								            targetLOD = 0;
-												Implement comprehensive taxi flight optimizations and proper spline paths

Major improvements:
- Load TaxiPathNode.dbc for actual curved flight paths (no more flying through terrain)
- Add 3-second mounting delay with terrain precaching for entire route
- Implement LOD system for M2 models with distance-based quality reduction
- Add circular terrain loading pattern (13 tiles vs 25, 48% reduction)
- Increase terrain cache from 2GB to 8GB for modern systems

Performance optimizations during taxi:
- Cull small M2 models (boundRadius < 3.0) - not visible from altitude
- Disable particle systems (weather, smoke, M2 emitters) - saves ~7000 particles
- Disable specular lighting on M2 models - saves Blinn-Phong calculations
- Disable shadow mapping on M2 models - saves shadow map sampling and PCF

Technical details:
- Parse TaxiPathNode.dbc spline waypoints for curved paths around terrain
- Build full path from node pairs using TaxiPathEdge lookup
- Precache callback triggers during mounting delay for smooth takeoff
- Circular tile loading uses Euclidean distance check (dx²+dy² <= r²)
- LOD fallback to base mesh when higher LODs unavailable

Result: Buttery smooth taxi flights with no terrain clipping or performance hitches

											
										
										
											2026-02-08 21:32:38 -08:00
+								        }
-												Optimize M2/WMO render loop: cache UBO pointers, precompute model flags, reduce rebinds

- Cache material UBO mapped pointers at creation time, eliminating
  per-batch vmaGetAllocationInfo() calls in the hot render path
- Precompute foliage/elven/lantern/kobold model name classifications
  at load time instead of per-instance string operations every frame
- Remove redundant descriptor set and push constant rebinds on WMO
  pipeline switches (preserved across compatible layouts)
- Pre-allocate glow sprite descriptor set once at init instead of
  allocating from the pool every frame

											
										
										
											2026-02-23 06:06:24 -08:00
+								        const bool foliageLikeModel = model.isFoliageLike;
-												Ironforge Great Forge lava, magma water rendering, LavaSteam particle effects

- Add magma/slime rendering path to water shader (fbm noise, crust/molten/core coloring)
- Fix WMO liquid height filter rejecting high-altitude zones like Ironforge (Z>300)
- Allow interior WMO magma/slime MLIQ groups to load (skip only water/ocean)
- Mark LAVASTEAM.m2 as spell effect for proper additive blend, hide emission mesh
- Add isLavaModel flag for M2 ForgeLava/LavaPots UV scroll fallback
- Add isLava material detection in WMO renderer for lava texture UV animation
- Fix WMO material UBO colors for magma (was blue, now orange-red)

											
										
										
											2026-03-07 00:48:04 -08:00
+								        // Particle-dominant spell effects: mesh is emission geometry, render dim
 								        const bool particleDominantEffect = model.isSpellEffect &&
 								            !model.particleEmitters.empty() && model.batches.size() <= 2;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								        for (const auto& batch : model.batches) {
 								            if (batch.indexCount == 0) continue;
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								            if (!model.isGroundDetail && batch.submeshLevel != targetLOD) continue;
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								            if (batch.batchOpacity < 0.01f) continue;
-												Optimize M2/WMO render loop: cache UBO pointers, precompute model flags, reduce rebinds

- Cache material UBO mapped pointers at creation time, eliminating
  per-batch vmaGetAllocationInfo() calls in the hot render path
- Precompute foliage/elven/lantern/kobold model name classifications
  at load time instead of per-instance string operations every frame
- Remove redundant descriptor set and push constant rebinds on WMO
  pipeline switches (preserved across compatible layouts)
- Pre-allocate glow sprite descriptor set once at init instead of
  allocating from the pool every frame

											
										
										
											2026-02-23 06:06:24 -08:00
+								            const bool koboldFlameCard = batch.colorKeyBlack && model.isKoboldFlame;
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								            const bool smallCardLikeBatch =
 								                (batch.glowSize <= 1.35f) ||
 								                (batch.lanternGlowHint && batch.glowSize <= 6.0f);
-												Fix torch models eaten by glow sprites, fix lantern glow black backgrounds

Two fixes:

1. shouldUseGlowSprite now requires the UNLIT material flag (0x01) for
   the colorKeyBlack+flameLikeModel path. Previously, structural geometry
   (torch handles, sconce brackets) on flame-like models got replaced
   with glow sprites because their texture paths contained keywords like
   "torch" in the directory name, setting colorKeyBlack on non-glow
   textures. Requiring UNLIT ensures only actual glow/emissive batches
   become sprites while lit structural geometry renders normally.

2. Fragment shader now discards near-black (maxRGB < 0.1) for ALL unlit
   non-opaque batches, not just additive blend modes. Glow effects on
   lanterns/lamps that use blendMode 1 (AlphaKey) or 2 (Alpha) instead
   of 3 (Additive) now properly discard their black backgrounds.

											
										
										
											2026-02-19 18:23:59 -08:00
+								            const bool batchUnlit = (batch.materialFlags & 0x01) != 0;
-												Optimize M2/WMO render loop: cache UBO pointers, precompute model flags, reduce rebinds

- Cache material UBO mapped pointers at creation time, eliminating
  per-batch vmaGetAllocationInfo() calls in the hot render path
- Precompute foliage/elven/lantern/kobold model name classifications
  at load time instead of per-instance string operations every frame
- Remove redundant descriptor set and push constant rebinds on WMO
  pipeline switches (preserved across compatible layouts)
- Pre-allocate glow sprite descriptor set once at init instead of
  allocating from the pool every frame

											
										
										
											2026-02-23 06:06:24 -08:00
+								            const bool elvenLikeModel = model.isElvenLike;
 								            const bool lanternLikeModel = model.isLanternLike;
-												Fix elven lantern glow cards rendering as flat disks

- route small lantern/lamp/torch/candle emissive batches to glow-sprite path
- broaden light-emitter detection beyond additive-only blend modes
- add a secondary wider low-alpha halo layer for softer glow falloff
- keep elven-style lantern tint in cool blue while preserving warm tint elsewhere

This avoids rendering hard flat glow planes and restores a softer volumetric-looking lantern glow.

											
										
										
											2026-02-21 02:49:26 -08:00
+								            const bool shouldUseGlowSprite =
 								                !koboldFlameCard &&
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								                (elvenLikeModel || (lanternLikeModel && batch.lanternGlowHint)) &&
-												Narrow glow-card replacement to preserve lamp/sconce textures

- restrict glow sprite substitution to elven-like light models only
- keep Stormwind lamps and generic torch sconces rendering authored glass/grate/flame cards
- remove over-broad flame heuristic that turned many light fixtures into plain glow orbs

											
										
										
											2026-02-21 03:26:21 -08:00
+								                !model.isSpellEffect &&
 								                smallCardLikeBatch &&
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								                (batch.lanternGlowHint ||
 								                 (batch.blendMode >= 3) ||
-												Narrow glow-card replacement to preserve lamp/sconce textures

- restrict glow sprite substitution to elven-like light models only
- keep Stormwind lamps and generic torch sconces rendering authored glass/grate/flame cards
- remove over-broad flame heuristic that turned many light fixtures into plain glow orbs

											
										
										
											2026-02-21 03:26:21 -08:00
+								                 (batch.colorKeyBlack && batchUnlit && batch.blendMode >= 1));
-												Tune M2 lantern flame glow without restoring card artifacts

											
										
										
											2026-02-19 06:12:32 -08:00
+								            if (shouldUseGlowSprite) {
 								                if (entry.distSq < 180.0f * 180.0f) {
 								                    glm::vec3 worldPos = glm::vec3(instance.modelMatrix * glm::vec4(batch.center, 1.0f));
 								                    GlowSprite gs;
 								                    gs.worldPos = worldPos;
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								                    if (batch.glowTint == 1 || elvenLikeModel) {
 								                        gs.color = glm::vec4(0.48f, 0.72f, 1.0f, 1.05f);
 								                    } else if (batch.glowTint == 2) {
 								                        gs.color = glm::vec4(1.0f, 0.28f, 0.22f, 1.10f);
 								                    } else {
 								                        gs.color = glm::vec4(1.0f, 0.82f, 0.46f, 1.15f);
 								                    }
-												Tune M2 lantern flame glow without restoring card artifacts

											
										
										
											2026-02-19 06:12:32 -08:00
+								                    gs.size = batch.glowSize * instance.scale * 1.45f;
 								                    glowSprites_.push_back(gs);
-												Fix elven lantern glow cards rendering as flat disks

- route small lantern/lamp/torch/candle emissive batches to glow-sprite path
- broaden light-emitter detection beyond additive-only blend modes
- add a secondary wider low-alpha halo layer for softer glow falloff
- keep elven-style lantern tint in cool blue while preserving warm tint elsewhere

This avoids rendering hard flat glow planes and restores a softer volumetric-looking lantern glow.

											
										
										
											2026-02-21 02:49:26 -08:00
+								                    GlowSprite halo = gs;
 								                    halo.color.a *= 0.42f;
 								                    halo.size *= 1.8f;
 								                    glowSprites_.push_back(halo);
-												Tune M2 lantern flame glow without restoring card artifacts

											
										
										
											2026-02-19 06:12:32 -08:00
+								                }
-												Refine lantern glow-card replacement and preserve lamp geometry

- Add per-batch glow metadata (lantern hint, card-like classification, tint)

- Track normalized texture keys and log lantern/light texture sets once for diagnostics

- Force sprite replacement for known Stormwind/Night Elf glow textures

- Keep lantern/light meshes visible while hiding only classified glow-card submeshes

- Choose glow sprite tint from texture hints (cool/red/warm) to avoid orange-only cards

- Broaden lantern glow detection to handle gameobject lights with nonstandard material setups

											
										
										
											2026-02-21 03:51:42 -08:00
+								                const bool cardLikeSkipMesh =
 								                    (batch.blendMode >= 3) ||
 								                    batch.colorKeyBlack ||
 								                    ((batch.materialFlags & 0x01) != 0);
 								                if ((batch.glowCardLike && lanternLikeModel) ||
 								                    (cardLikeSkipMesh && !lanternLikeModel)) {
 								                    continue;
 								                }
-												Tune M2 lantern flame glow without restoring card artifacts

											
										
										
											2026-02-19 06:12:32 -08:00
+								            }
-												Skip additive/mod blend batches in M2 rendering

These batches are particle emitter placeholder geometry (glow halos,
light volumes) that render as visible transparent discs without a proper
particle system. Skip them entirely instead of attempting to render as
raw mesh geometry.

											
										
										
											2026-02-06 08:05:40 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            // Compute UV offset for texture animation
-												Implement M2 texture animation (UV scrolling) for fountain water

Parse M2TextureTransform entries and texture transform lookups from the
M2 binary, then apply per-batch UV offsets in the vertex shader using
the existing animation time base and global sequence durations.

											
										
										
											2026-02-06 01:49:27 -08:00
+								            glm::vec2 uvOffset(0.0f, 0.0f);
 								            if (batch.textureAnimIndex != 0xFFFF && model.hasTextureAnimation) {
 								                uint16_t lookupIdx = batch.textureAnimIndex;
 								                if (lookupIdx < model.textureTransformLookup.size()) {
 								                    uint16_t transformIdx = model.textureTransformLookup[lookupIdx];
 								                    if (transformIdx < model.textureTransforms.size()) {
 								                        const auto& tt = model.textureTransforms[transformIdx];
 								                        glm::vec3 trans = interpVec3(tt.translation,
 								                            instance.currentSequenceIndex, instance.animTime,
 								                            glm::vec3(0.0f), model.globalSequenceDurations);
 								                        uvOffset = glm::vec2(trans.x, trans.y);
 								                    }
 								                }
 								            }
-												Ironforge Great Forge lava, magma water rendering, LavaSteam particle effects

- Add magma/slime rendering path to water shader (fbm noise, crust/molten/core coloring)
- Fix WMO liquid height filter rejecting high-altitude zones like Ironforge (Z>300)
- Allow interior WMO magma/slime MLIQ groups to load (skip only water/ocean)
- Mark LAVASTEAM.m2 as spell effect for proper additive blend, hide emission mesh
- Add isLavaModel flag for M2 ForgeLava/LavaPots UV scroll fallback
- Add isLava material detection in WMO renderer for lava texture UV animation
- Fix WMO material UBO colors for magma (was blue, now orange-red)

											
										
										
											2026-03-07 00:48:04 -08:00
+								            // Lava M2 models: fallback UV scroll if no texture animation
 								            if (model.isLavaModel && uvOffset == glm::vec2(0.0f)) {
 								                static auto startTime = std::chrono::steady_clock::now();
 								                float t = std::chrono::duration<float>(std::chrono::steady_clock::now() - startTime).count();
 								                uvOffset = glm::vec2(t * 0.03f, -t * 0.08f);
 								            }
-												Implement M2 texture animation (UV scrolling) for fountain water

Parse M2TextureTransform entries and texture transform lookups from the
M2 binary, then apply per-batch UV offsets in the vertex shader using
the existing animation time base and global sequence durations.

											
										
										
											2026-02-06 01:49:27 -08:00
-												Stabilize Vulkan rendering state for minimap, foliage, and water

											
										
										
											2026-02-22 09:34:27 -08:00
+								            // Foliage/card-like batches render more stably as cutout (depth-write on)
 								            // instead of alpha-blended sorting.
 								            const bool foliageCutout =
 								                foliageLikeModel &&
 								                !model.isSpellEffect &&
 								                batch.blendMode <= 3;
 								            const bool forceCutout =
 								                !model.isSpellEffect &&
 								                (model.isGroundDetail ||
 								                 foliageCutout ||
 								                 batch.blendMode == 1 ||
 								                 (batch.blendMode >= 2 && !batch.hasAlpha) ||
 								                 batch.colorKeyBlack);
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            // Select pipeline based on blend mode
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								            uint8_t effectiveBlendMode = batch.blendMode;
-												Effect model additive blend, teleport facing, tighter area triggers

Classify light shafts, portals, spotlights, bubbles, and similar M2
doodads as spell effects so they render with additive blending instead
of as solid opaque objects.

Set camera yaw from server orientation on world load so teleports face
the correct direction.

Reduce area trigger minimum radius (3.0 sphere, 4.0 box) to prevent
premature portal firing near tram entrances.

											
										
										
											2026-03-06 17:03:29 -08:00
+								            if (model.isSpellEffect) {
 								                // Effect models: force additive blend for opaque/cutout batches
 								                // so the mesh renders as a transparent glow, not a solid object
 								                if (effectiveBlendMode <= 1) {
 								                    effectiveBlendMode = 3;  // additive
 								                } else if (effectiveBlendMode == 4 || effectiveBlendMode == 5) {
 								                    effectiveBlendMode = 3;
 								                }
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								            }
-												Stabilize Vulkan rendering state for minimap, foliage, and water

											
										
										
											2026-02-22 09:34:27 -08:00
+								            if (forceCutout) {
-												Fix minimap arrow orientation and ground-detail foliage transparency

											
										
										
											2026-02-22 08:44:16 -08:00
+								                effectiveBlendMode = 1;
-												Improve runtime stutter handling and ground clutter performance

- reduce per-tile ground clutter generation pressure and enforce tighter caps to avoid spikes

- remove expensive detail dedupe scans from the hot render path

- add progressive/lazy clutter updates around player movement to smooth frame pacing

- lower noisy runtime INFO logging to DEBUG/throttled paths

- keep terrain/game screen updates responsive while preserving existing behavior

											
										
										
											2026-02-21 01:26:16 -08:00
+								            }
-												Eliminate per-frame allocations in M2 renderer to reduce CPU stutter

Use persistent vectors for animation work indices, futures, and glow sprites instead of allocating each frame.

											
										
										
											2026-02-09 00:41:07 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            VkPipeline desiredPipeline;
-												Stabilize Vulkan rendering state for minimap, foliage, and water

											
										
										
											2026-02-22 09:34:27 -08:00
+								            if (forceCutout) {
 								                // Use opaque pipeline + shader discard for stable foliage cards.
 								                desiredPipeline = opaquePipeline_;
 								            } else {
 								                switch (effectiveBlendMode) {
 								                    case 0: desiredPipeline = opaquePipeline_; break;
 								                    case 1: desiredPipeline = alphaTestPipeline_; break;
 								                    case 2: desiredPipeline = alphaPipeline_; break;
 								                    default: desiredPipeline = additivePipeline_; break;
 								                }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            }
 								            if (desiredPipeline != currentPipeline) {
 								                vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, desiredPipeline);
 								                currentPipeline = desiredPipeline;
 								            }
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								            // Update material UBO with per-draw dynamic values (interiorDarken, forceCutout overrides)
 								            // Note: fadeAlpha is in push constants (per-draw) to avoid shared-UBO race
-												Optimize M2/WMO render loop: cache UBO pointers, precompute model flags, reduce rebinds

- Cache material UBO mapped pointers at creation time, eliminating
  per-batch vmaGetAllocationInfo() calls in the hot render path
- Precompute foliage/elven/lantern/kobold model name classifications
  at load time instead of per-instance string operations every frame
- Remove redundant descriptor set and push constant rebinds on WMO
  pipeline switches (preserved across compatible layouts)
- Pre-allocate glow sprite descriptor set once at init instead of
  allocating from the pool every frame

											
										
										
											2026-02-23 06:06:24 -08:00
+								            if (batch.materialUBOMapped) {
 								                auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
 								                mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
 								                if (batch.colorKeyBlack) {
 								                    mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
 								                }
 								                if (forceCutout) {
 								                    mat->alphaTest = model.isGroundDetail ? 3 : (foliageCutout ? 2 : 1);
 								                    if (model.isGroundDetail) {
 								                        mat->unlit = 0;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								                    }
-												Fix black glow cards on streetlamps: discard dark pixels in Mod blend batches

Mod blend (GL_DST_COLOR, GL_ZERO) multiplies the framebuffer by texture color,
so dark pixels create visible black rectangles. Use a variable colorKeyThreshold
uniform (0.7 for Mod/Mod2x, 0.08 default) to discard dark pixels from these
batches while preserving normal colorKeyBlack behavior elsewhere.

											
										
										
											2026-02-19 19:07:58 -08:00
+								                }
 								            }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Fix game objects rendering with player textures

When M2Renderer's descriptor pool was exhausted, batch.materialSet
would be VK_NULL_HANDLE and the bind was skipped, but the draw call
still executed using the previously bound descriptor set from
CharacterRenderer — causing game objects to render with the player's
skin/armor textures. Skip the entire batch instead.

											
										
										
											2026-02-23 20:41:06 -08:00
+								            // Bind material descriptor set (set 1) — skip batch if missing
 								            // to avoid inheriting a stale descriptor set from a prior renderer
 								            if (!batch.materialSet) continue;
 								            vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
 								                                    pipelineLayout_, 1, 1, &batch.materialSet, 0, nullptr);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            // Push constants
 								            M2PushConstants pc;
 								            pc.model = instance.modelMatrix;
 								            pc.uvOffset = uvOffset;
 								            pc.texCoordSet = static_cast<int>(batch.textureUnit);
 								            pc.useBones = useBones ? 1 : 0;
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
+								            pc.isFoliage = model.shadowWindFoliage ? 1 : 0;
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								            pc.fadeAlpha = instanceFadeAlpha;
-												Ironforge Great Forge lava, magma water rendering, LavaSteam particle effects

- Add magma/slime rendering path to water shader (fbm noise, crust/molten/core coloring)
- Fix WMO liquid height filter rejecting high-altitude zones like Ironforge (Z>300)
- Allow interior WMO magma/slime MLIQ groups to load (skip only water/ocean)
- Mark LAVASTEAM.m2 as spell effect for proper additive blend, hide emission mesh
- Add isLavaModel flag for M2 ForgeLava/LavaPots UV scroll fallback
- Add isLava material detection in WMO renderer for lava texture UV animation
- Fix WMO material UBO colors for magma (was blue, now orange-red)

											
										
										
											2026-03-07 00:48:04 -08:00
+								            // Particle-dominant effects: mesh is emission geometry, don't render
 								            if (particleDominantEffect && batch.blendMode <= 1) {
 								                continue;
 								            }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            lastDrawCallCount++;
 								        }
 								    }
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								    // Render glow sprites as billboarded additive point lights
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								    if (!glowSprites_.empty() && particleAdditivePipeline_ && glowVB_ && glowTexDescSet_) {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, particleAdditivePipeline_);
 								        vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
 								                                particlePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);
-												Optimize M2/WMO render loop: cache UBO pointers, precompute model flags, reduce rebinds

- Cache material UBO mapped pointers at creation time, eliminating
  per-batch vmaGetAllocationInfo() calls in the hot render path
- Precompute foliage/elven/lantern/kobold model name classifications
  at load time instead of per-instance string operations every frame
- Remove redundant descriptor set and push constant rebinds on WMO
  pipeline switches (preserved across compatible layouts)
- Pre-allocate glow sprite descriptor set once at init instead of
  allocating from the pool every frame

											
										
										
											2026-02-23 06:06:24 -08:00
+								        vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
 								                                particlePipelineLayout_, 1, 1, &glowTexDescSet_, 0, nullptr);
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
 								        // Push constants for particle: tileCount(vec2) + alphaKey(int)
 								        struct { float tileX, tileY; int alphaKey; } particlePush = {1.0f, 1.0f, 0};
 								        vkCmdPushConstants(cmd, particlePipelineLayout_, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
 								                           sizeof(particlePush), &particlePush);
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        // Write glow vertex data directly to mapped buffer (no temp vector)
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								        size_t uploadCount = std::min(glowSprites_.size(), MAX_GLOW_SPRITES);
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								        float* dst = static_cast<float*>(glowVBMapped_);
 								        for (size_t gi = 0; gi < uploadCount; gi++) {
 								            const auto& gs = glowSprites_[gi];
 								            *dst++ = gs.worldPos.x;
 								            *dst++ = gs.worldPos.y;
 								            *dst++ = gs.worldPos.z;
 								            *dst++ = gs.color.r;
 								            *dst++ = gs.color.g;
 								            *dst++ = gs.color.b;
 								            *dst++ = gs.color.a;
 								            *dst++ = gs.size;
 								            *dst++ = 0.0f;
 								        }
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        VkDeviceSize offset = 0;
-												Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)

											
										
										
											2026-03-04 08:17:32 -08:00
+								        vkCmdBindVertexBuffers(cmd, 0, 1, &glowVB_, &offset);
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        vkCmdDraw(cmd, static_cast<uint32_t>(uploadCount), 1, 0, 0);
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								}
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								bool M2Renderer::initializeShadow(VkRenderPass shadowRenderPass) {
 								    if (!vkCtx_ || shadowRenderPass == VK_NULL_HANDLE) return false;
 								    VkDevice device = vkCtx_->getDevice();
 								    // ShadowParams UBO: useBones, useTexture, alphaTest, foliageSway, windTime, foliageMotionDamp
 								    struct ShadowParamsUBO {
 								        int32_t useBones = 0;
 								        int32_t useTexture = 0;
 								        int32_t alphaTest = 0;
 								        int32_t foliageSway = 0;
 								        float windTime = 0.0f;
 								        float foliageMotionDamp = 1.0f;
 								    };
 								    // Create ShadowParams UBO
 								    VkBufferCreateInfo bufCI{};
 								    bufCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
 								    bufCI.size = sizeof(ShadowParamsUBO);
 								    bufCI.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
 								    VmaAllocationCreateInfo allocCI{};
 								    allocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
 								    allocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
 								    VmaAllocationInfo allocInfo{};
 								    if (vmaCreateBuffer(vkCtx_->getAllocator(), &bufCI, &allocCI,
 								            &shadowParamsUBO_, &shadowParamsAlloc_, &allocInfo) != VK_SUCCESS) {
 								        LOG_ERROR("M2Renderer: failed to create shadow params UBO");
 								        return false;
 								    }
 								    ShadowParamsUBO defaultParams{};
 								    std::memcpy(allocInfo.pMappedData, &defaultParams, sizeof(defaultParams));
 								    // Create descriptor set layout: binding 0 = sampler2D, binding 1 = ShadowParams UBO
 								    VkDescriptorSetLayoutBinding layoutBindings[2]{};
 								    layoutBindings[0].binding = 0;
 								    layoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								    layoutBindings[0].descriptorCount = 1;
 								    layoutBindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
 								    layoutBindings[1].binding = 1;
 								    layoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
 								    layoutBindings[1].descriptorCount = 1;
 								    layoutBindings[1].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
 								    VkDescriptorSetLayoutCreateInfo layoutCI{};
 								    layoutCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
 								    layoutCI.bindingCount = 2;
 								    layoutCI.pBindings = layoutBindings;
 								    if (vkCreateDescriptorSetLayout(device, &layoutCI, nullptr, &shadowParamsLayout_) != VK_SUCCESS) {
 								        LOG_ERROR("M2Renderer: failed to create shadow params layout");
 								        return false;
-												Stabilize shadows and soften foliage shadow casting

											
										
										
											2026-02-04 16:22:18 -08:00
+								    }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Create descriptor pool
 								    VkDescriptorPoolSize poolSizes[2]{};
 								    poolSizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								    poolSizes[0].descriptorCount = 1;
 								    poolSizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
 								    poolSizes[1].descriptorCount = 1;
 								    VkDescriptorPoolCreateInfo poolCI{};
 								    poolCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
 								    poolCI.maxSets = 1;
 								    poolCI.poolSizeCount = 2;
 								    poolCI.pPoolSizes = poolSizes;
 								    if (vkCreateDescriptorPool(device, &poolCI, nullptr, &shadowParamsPool_) != VK_SUCCESS) {
 								        LOG_ERROR("M2Renderer: failed to create shadow params pool");
 								        return false;
-												Stabilize shadows and soften foliage shadow casting

											
										
										
											2026-02-04 16:22:18 -08:00
+								    }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Allocate descriptor set
 								    VkDescriptorSetAllocateInfo setAlloc{};
 								    setAlloc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
 								    setAlloc.descriptorPool = shadowParamsPool_;
 								    setAlloc.descriptorSetCount = 1;
 								    setAlloc.pSetLayouts = &shadowParamsLayout_;
 								    if (vkAllocateDescriptorSets(device, &setAlloc, &shadowParamsSet_) != VK_SUCCESS) {
 								        LOG_ERROR("M2Renderer: failed to allocate shadow params set");
 								        return false;
 								    }
-												Stabilize shadows and soften foliage shadow casting

											
										
										
											2026-02-04 16:22:18 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Write descriptors (use white fallback for binding 0)
 								    VkDescriptorBufferInfo bufInfo{};
 								    bufInfo.buffer = shadowParamsUBO_;
 								    bufInfo.offset = 0;
 								    bufInfo.range = sizeof(ShadowParamsUBO);
 								    VkDescriptorImageInfo imgInfo{};
 								    imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
 								    imgInfo.imageView = whiteTexture_->getImageView();
 								    imgInfo.sampler = whiteTexture_->getSampler();
 								    VkWriteDescriptorSet writes[2]{};
 								    writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
 								    writes[0].dstSet = shadowParamsSet_;
 								    writes[0].dstBinding = 0;
 								    writes[0].descriptorCount = 1;
 								    writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								    writes[0].pImageInfo = &imgInfo;
 								    writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
 								    writes[1].dstSet = shadowParamsSet_;
 								    writes[1].dstBinding = 1;
 								    writes[1].descriptorCount = 1;
 								    writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
 								    writes[1].pBufferInfo = &bufInfo;
 								    vkUpdateDescriptorSets(device, 2, writes, 0, nullptr);
-												Add alpha-tested foliage shadows: per-batch texture binding and shadow map receiving

Shadow casting: foliage batches now bind their actual texture in the shadow
pass with alpha testing, producing leaf-shaped shadows instead of solid cards.
Uses a per-frame resettable descriptor pool for texture sets.

Shadow receiving: foliage fragments now sample the shadow map with PCF
instead of using a flat constant darkening.

											
										
										
											2026-02-23 05:55:03 -08:00
+								    // Per-frame pool for foliage shadow texture sets (reset each frame)
 								    {
 								        VkDescriptorPoolSize texPoolSizes[2]{};
 								        texPoolSizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								        texPoolSizes[0].descriptorCount = 256;
 								        texPoolSizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
 								        texPoolSizes[1].descriptorCount = 256;
 								        VkDescriptorPoolCreateInfo texPoolCI{};
 								        texPoolCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
 								        texPoolCI.maxSets = 256;
 								        texPoolCI.poolSizeCount = 2;
 								        texPoolCI.pPoolSizes = texPoolSizes;
 								        if (vkCreateDescriptorPool(device, &texPoolCI, nullptr, &shadowTexPool_) != VK_SUCCESS) {
 								            LOG_ERROR("M2Renderer: failed to create shadow texture pool");
 								            return false;
 								        }
 								    }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Create shadow pipeline layout: set 1 = shadowParamsLayout_, push constants = 128 bytes
 								    VkPushConstantRange pc{};
 								    pc.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
 								    pc.offset = 0;
 								    pc.size = 128;  // lightSpaceMatrix (64) + model (64)
 								    shadowPipelineLayout_ = createPipelineLayout(device, {shadowParamsLayout_}, {pc});
 								    if (!shadowPipelineLayout_) {
 								        LOG_ERROR("M2Renderer: failed to create shadow pipeline layout");
 								        return false;
 								    }
-												Add shadow frustum culling to terrain and M2 depth passes

Both passes were rendering the entire loaded scene (17×17 tile radius)
into a shadow map that only covers 360×360 world units — submitting
10-50× more geometry than the shadow frustum can actually use.

- TerrainRenderer::renderShadow: skip chunks whose bounding sphere
  doesn't overlap the shadow frustum AABB in XY. Reduces terrain draw
  calls from O(all loaded chunks) to O(chunks within ~180 units).
- M2Renderer::renderShadow: skip instances whose world AABB doesn't
  overlap the shadow frustum in XY. Reduces M2 draw calls similarly.
- Both functions now take shadowCenter + halfExtent parameters.

											
										
										
											2026-02-18 21:15:24 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Load shadow shaders
 								    VkShaderModule vertShader, fragShader;
 								    if (!vertShader.loadFromFile(device, "assets/shaders/shadow.vert.spv")) {
 								        LOG_ERROR("M2Renderer: failed to load shadow vertex shader");
 								        return false;
 								    }
 								    if (!fragShader.loadFromFile(device, "assets/shaders/shadow.frag.spv")) {
 								        LOG_ERROR("M2Renderer: failed to load shadow fragment shader");
 								        return false;
 								    }
-												Stabilize shadows and soften foliage shadow casting

											
										
										
											2026-02-04 16:22:18 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // M2 vertex layout: 18 floats = 72 bytes stride
 								    // loc0=pos(off0), loc1=normal(off12), loc2=texCoord0(off24), loc5=texCoord1(off32),
 								    // loc3=boneWeights(off40), loc4=boneIndices(off56)
 								    // Shadow shader locations: 0=aPos, 1=aTexCoord, 2=aBoneWeights, 3=aBoneIndicesF
 								    // useBones=0 so locations 2,3 are never used
 								    VkVertexInputBindingDescription vertBind{};
 								    vertBind.binding = 0;
 								    vertBind.stride = 18 * sizeof(float);
 								    vertBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
 								    std::vector<VkVertexInputAttributeDescription> vertAttrs = {
 								        {0, 0, VK_FORMAT_R32G32B32_SFLOAT,    0},                     // aPos       -> position
 								        {1, 0, VK_FORMAT_R32G32_SFLOAT,       6 * sizeof(float)},     // aTexCoord  -> texCoord0
 								        {2, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 10 * sizeof(float)},    // aBoneWeights
 								        {3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)},    // aBoneIndicesF
 								    };
 								    shadowPipeline_ = PipelineBuilder()
 								        .setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
 								                    fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
 								        .setVertexInput({vertBind}, vertAttrs)
 								        .setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
-												Stabilize Vulkan shadow pipeline diagnostics and compatibility path

- Fix shadow depth image layout transitions by tracking per-frame old/new layouts.
- Update receiver shadow projection to Vulkan clip-depth convention.
- Test inverted shadow compare op path (GREATER_OR_EQUAL).
- Switch shadow compare samplers to NEAREST filtering for broader Vulkan compatibility.
- Expand shadow caster coverage by disabling caster cull filtering in WMO/M2/Character shadow pipelines.
- Keep light-space matrix path on stable character-centered framing.

											
										
										
											2026-02-22 10:23:20 -08:00
+								        // Foliage/leaf cards are effectively two-sided; front-face culling can
 								        // drop them from the shadow map depending on light/view orientation.
 								        .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        .setDepthTest(true, true, VK_COMPARE_OP_LESS_OR_EQUAL)
-												Stabilize Vulkan shadow pipeline diagnostics and compatibility path

- Fix shadow depth image layout transitions by tracking per-frame old/new layouts.
- Update receiver shadow projection to Vulkan clip-depth convention.
- Test inverted shadow compare op path (GREATER_OR_EQUAL).
- Switch shadow compare samplers to NEAREST filtering for broader Vulkan compatibility.
- Expand shadow caster coverage by disabling caster cull filtering in WMO/M2/Character shadow pipelines.
- Keep light-space matrix path on stable character-centered framing.

											
										
										
											2026-02-22 10:23:20 -08:00
+								        .setDepthBias(0.05f, 0.20f)
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        .setNoColorAttachment()
 								        .setLayout(shadowPipelineLayout_)
 								        .setRenderPass(shadowRenderPass)
 								        .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
 								        .build(device);
 								    vertShader.destroy();
 								    fragShader.destroy();
 								    if (!shadowPipeline_) {
 								        LOG_ERROR("M2Renderer: failed to create shadow pipeline");
 								        return false;
 								    }
 								    LOG_INFO("M2Renderer shadow pipeline initialized");
 								    return true;
 								}
-												Add distance culling to shadow passes for CPU-bound shadow perf

All three shadow renderers (WMO, M2, Character) were iterating every
loaded instance with zero culling. Now skip instances outside the
180-unit shadow frustum radius via squared-distance check.

											
										
										
											2026-02-23 04:48:26 -08:00
+								void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMatrix, float globalTime,
 								                              const glm::vec3& shadowCenter, float shadowRadius) {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    if (!shadowPipeline_ || !shadowParamsSet_) return;
 								    if (instances.empty() || models.empty()) return;
 								    struct ShadowPush { glm::mat4 lightSpaceMatrix; glm::mat4 model; };
-												Add alpha-tested foliage shadows: per-batch texture binding and shadow map receiving

Shadow casting: foliage batches now bind their actual texture in the shadow
pass with alpha testing, producing leaf-shaped shadows instead of solid cards.
Uses a per-frame resettable descriptor pool for texture sets.

Shadow receiving: foliage fragments now sample the shadow map with PCF
instead of using a flat constant darkening.

											
										
										
											2026-02-23 05:55:03 -08:00
+								    struct ShadowParamsUBO {
 								        int32_t useBones = 0;
 								        int32_t useTexture = 0;
 								        int32_t alphaTest = 0;
 								        int32_t foliageSway = 0;
 								        float windTime = 0.0f;
 								        float foliageMotionDamp = 1.0f;
 								    };
-												Add distance culling to shadow passes for CPU-bound shadow perf

All three shadow renderers (WMO, M2, Character) were iterating every
loaded instance with zero culling. Now skip instances outside the
180-unit shadow frustum radius via squared-distance check.

											
										
										
											2026-02-23 04:48:26 -08:00
+								    const float shadowRadiusSq = shadowRadius * shadowRadius;
-												Stabilize shadows and soften foliage shadow casting

											
										
										
											2026-02-04 16:22:18 -08:00
-												Add alpha-tested foliage shadows: per-batch texture binding and shadow map receiving

Shadow casting: foliage batches now bind their actual texture in the shadow
pass with alpha testing, producing leaf-shaped shadows instead of solid cards.
Uses a per-frame resettable descriptor pool for texture sets.

Shadow receiving: foliage fragments now sample the shadow map with PCF
instead of using a flat constant darkening.

											
										
										
											2026-02-23 05:55:03 -08:00
+								    // Reset per-frame texture descriptor pool for foliage alpha-test sets
 								    if (shadowTexPool_) {
 								        vkResetDescriptorPool(vkCtx_->getDevice(), shadowTexPool_, 0);
 								    }
 								    // Cache: texture imageView -> allocated descriptor set (avoids duplicates within frame)
 								    std::unordered_map<VkImageView, VkDescriptorSet> texSetCache;
 								    auto getTexDescSet = [&](VkTexture* tex) -> VkDescriptorSet {
 								        VkImageView iv = tex->getImageView();
 								        auto cacheIt = texSetCache.find(iv);
 								        if (cacheIt != texSetCache.end()) return cacheIt->second;
 								        VkDescriptorSet set = VK_NULL_HANDLE;
 								        VkDescriptorSetAllocateInfo ai{};
 								        ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
 								        ai.descriptorPool = shadowTexPool_;
 								        ai.descriptorSetCount = 1;
 								        ai.pSetLayouts = &shadowParamsLayout_;
 								        if (vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set) != VK_SUCCESS) {
 								            return shadowParamsSet_; // fallback to white texture
 								        }
 								        VkDescriptorImageInfo imgInfo{};
 								        imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
 								        imgInfo.imageView = iv;
 								        imgInfo.sampler = tex->getSampler();
 								        VkDescriptorBufferInfo bufInfo{};
 								        bufInfo.buffer = shadowParamsUBO_;
 								        bufInfo.offset = 0;
 								        bufInfo.range = sizeof(ShadowParamsUBO);
 								        VkWriteDescriptorSet writes[2]{};
 								        writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
 								        writes[0].dstSet = set;
 								        writes[0].dstBinding = 0;
 								        writes[0].descriptorCount = 1;
 								        writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								        writes[0].pImageInfo = &imgInfo;
 								        writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
 								        writes[1].dstSet = set;
 								        writes[1].dstBinding = 1;
 								        writes[1].descriptorCount = 1;
 								        writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
 								        writes[1].pBufferInfo = &bufInfo;
 								        vkUpdateDescriptorSets(vkCtx_->getDevice(), 2, writes, 0, nullptr);
 								        texSetCache[iv] = set;
 								        return set;
 								    };
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
+								    // Helper lambda to draw instances with a given foliageSway setting
 								    auto drawPass = [&](bool foliagePass) {
 								        ShadowParamsUBO params{};
 								        params.foliageSway = foliagePass ? 1 : 0;
 								        params.windTime = globalTime;
 								        params.foliageMotionDamp = 1.0f;
-												Add alpha-tested foliage shadows: per-batch texture binding and shadow map receiving

Shadow casting: foliage batches now bind their actual texture in the shadow
pass with alpha testing, producing leaf-shaped shadows instead of solid cards.
Uses a per-frame resettable descriptor pool for texture sets.

Shadow receiving: foliage fragments now sample the shadow map with PCF
instead of using a flat constant darkening.

											
										
										
											2026-02-23 05:55:03 -08:00
+								        // For foliage pass: enable texture+alphaTest in UBO (per-batch textures bound below)
 								        if (foliagePass) {
 								            params.useTexture = 1;
 								            params.alphaTest = 1;
 								        }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
+								        VmaAllocationInfo allocInfo{};
 								        vmaGetAllocationInfo(vkCtx_->getAllocator(), shadowParamsAlloc_, &allocInfo);
 								        std::memcpy(allocInfo.pMappedData, &params, sizeof(params));
 								        vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, shadowPipeline_);
 								        vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, shadowPipelineLayout_,
 , 1, &shadowParamsSet_, 0, nullptr);
 								        uint32_t currentModelId = UINT32_MAX;
 								        const M2ModelGPU* currentModel = nullptr;
 								        for (const auto& instance : instances) {
-												Eliminate per-instance hash lookups in M2 render/shadow culling loops

Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().

											
										
										
											2026-03-04 08:28:21 -08:00
+								            // Use cached flags to skip early without hash lookup
 								            if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
-												Add distance culling to shadow passes for CPU-bound shadow perf

All three shadow renderers (WMO, M2, Character) were iterating every
loaded instance with zero culling. Now skip instances outside the
180-unit shadow frustum radius via squared-distance check.

											
										
										
											2026-02-23 04:48:26 -08:00
+								            // Distance cull against shadow frustum
 								            glm::vec3 diff = instance.position - shadowCenter;
 								            if (glm::dot(diff, diff) > shadowRadiusSq) continue;
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								            if (!instance.cachedModel) continue;
 								            const M2ModelGPU& model = *instance.cachedModel;
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
 								            // Filter: only draw foliage models in foliage pass, non-foliage in non-foliage pass
 								            if (model.shadowWindFoliage != foliagePass) continue;
 								            // Bind vertex/index buffers when model changes
 								            if (instance.modelId != currentModelId) {
 								                currentModelId = instance.modelId;
 								                currentModel = &model;
 								                VkDeviceSize offset = 0;
 								                vkCmdBindVertexBuffers(cmd, 0, 1, &currentModel->vertexBuffer, &offset);
 								                vkCmdBindIndexBuffer(cmd, currentModel->indexBuffer, 0, VK_INDEX_TYPE_UINT16);
 								            }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
+								            ShadowPush push{lightSpaceMatrix, instance.modelMatrix};
 								            vkCmdPushConstants(cmd, shadowPipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT,
 , 128, &push);
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
+								            for (const auto& batch : model.batches) {
 								                if (batch.submeshLevel > 0) continue;
-												Add alpha-tested foliage shadows: per-batch texture binding and shadow map receiving

Shadow casting: foliage batches now bind their actual texture in the shadow
pass with alpha testing, producing leaf-shaped shadows instead of solid cards.
Uses a per-frame resettable descriptor pool for texture sets.

Shadow receiving: foliage fragments now sample the shadow map with PCF
instead of using a flat constant darkening.

											
										
										
											2026-02-23 05:55:03 -08:00
+								                // For foliage: bind per-batch texture for alpha-tested shadows
 								                if (foliagePass && batch.hasAlpha && batch.texture) {
 								                    VkDescriptorSet texSet = getTexDescSet(batch.texture);
 								                    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, shadowPipelineLayout_,
 , 1, &texSet, 0, nullptr);
 								                } else if (foliagePass) {
 								                    // Non-alpha batch: rebind default set (white texture, alpha test passes)
 								                    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, shadowPipelineLayout_,
 , 1, &shadowParamsSet_, 0, nullptr);
 								                }
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
+								                vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
 								            }
-												Stabilize shadows and soften foliage shadow casting

											
										
										
											2026-02-04 16:22:18 -08:00
+								        }
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
+								    };
 								    // Pass 1: non-foliage (no wind displacement)
 								    drawPass(false);
-												Add alpha-tested foliage shadows: per-batch texture binding and shadow map receiving

Shadow casting: foliage batches now bind their actual texture in the shadow
pass with alpha testing, producing leaf-shaped shadows instead of solid cards.
Uses a per-frame resettable descriptor pool for texture sets.

Shadow receiving: foliage fragments now sample the shadow map with PCF
instead of using a flat constant darkening.

											
										
										
											2026-02-23 05:55:03 -08:00
+								    // Pass 2: foliage (wind displacement enabled, per-batch alpha-tested textures)
-												Add shader-driven tree beautification: wind sway, SSS, color variation, AO

- Vertex wind animation: 3-layer displacement (trunk/branch/leaf) with
  quadratic height scaling so bases stay grounded
- Shadow pass: matching vertex displacement split into foliage/non-foliage
  passes, removed UV-wiggle approach
- Leaf subsurface scattering: warm backlit glow when looking toward sun
- Per-instance color variation: hue/brightness from position hash via flat
  varying to avoid interpolation flicker
- Canopy ambient occlusion: height-based darkening of tree interiors
- Detail normal perturbation: UV-only procedural normals to break flat cards
- Bayer 4x4 ordered dither replacing sin-hash noise for alpha edges
- Foliage skips shadow map sampling and specular to prevent flicker from
  swaying geometry sampling unstable shadow/highlight values

											
										
										
											2026-02-23 03:53:50 -08:00
+								    drawPass(true);
-												Stabilize shadows and soften foliage shadow casting

											
										
										
											2026-02-04 16:22:18 -08:00
+								}
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								// --- M2 Particle Emitter Helpers ---
 								float M2Renderer::interpFloat(const pipeline::M2AnimationTrack& track, float animTime,
 								                                int seqIdx, const std::vector<pipeline::M2Sequence>& /*seqs*/,
 								                                const std::vector<uint32_t>& globalSeqDurations) {
 								    if (!track.hasData()) return 0.0f;
 								    int si; float t;
 								    resolveTrackTime(track, seqIdx, animTime, globalSeqDurations, si, t);
 								    if (si < 0 || si >= static_cast<int>(track.sequences.size())) return 0.0f;
 								    const auto& keys = track.sequences[si];
 								    if (keys.timestamps.empty() || keys.floatValues.empty()) return 0.0f;
 								    if (keys.floatValues.size() == 1) return keys.floatValues[0];
 								    int idx = findKeyframeIndex(keys.timestamps, t);
 								    if (idx < 0) return 0.0f;
 								    size_t i0 = static_cast<size_t>(idx);
 								    size_t i1 = std::min(i0 + 1, keys.floatValues.size() - 1);
 								    if (i0 == i1) return keys.floatValues[i0];
 								    float t0 = static_cast<float>(keys.timestamps[i0]);
 								    float t1 = static_cast<float>(keys.timestamps[i1]);
 								    float dur = t1 - t0;
 								    float frac = (dur > 0.0f) ? glm::clamp((t - t0) / dur, 0.0f, 1.0f) : 0.0f;
 								    return glm::mix(keys.floatValues[i0], keys.floatValues[i1], frac);
 								}
 								float M2Renderer::interpFBlockFloat(const pipeline::M2FBlock& fb, float lifeRatio) {
 								    if (fb.floatValues.empty()) return 1.0f;
 								    if (fb.floatValues.size() == 1 || fb.timestamps.empty()) return fb.floatValues[0];
 								    lifeRatio = glm::clamp(lifeRatio, 0.0f, 1.0f);
 								    // Find surrounding timestamps
 								    for (size_t i = 0; i < fb.timestamps.size() - 1; i++) {
 								        if (lifeRatio <= fb.timestamps[i + 1]) {
 								            float t0 = fb.timestamps[i];
 								            float t1 = fb.timestamps[i + 1];
 								            float dur = t1 - t0;
 								            float frac = (dur > 0.0f) ? (lifeRatio - t0) / dur : 0.0f;
 								            size_t v0 = std::min(i, fb.floatValues.size() - 1);
 								            size_t v1 = std::min(i + 1, fb.floatValues.size() - 1);
 								            return glm::mix(fb.floatValues[v0], fb.floatValues[v1], frac);
 								        }
 								    }
 								    return fb.floatValues.back();
 								}
 								glm::vec3 M2Renderer::interpFBlockVec3(const pipeline::M2FBlock& fb, float lifeRatio) {
 								    if (fb.vec3Values.empty()) return glm::vec3(1.0f);
 								    if (fb.vec3Values.size() == 1 || fb.timestamps.empty()) return fb.vec3Values[0];
 								    lifeRatio = glm::clamp(lifeRatio, 0.0f, 1.0f);
 								    for (size_t i = 0; i < fb.timestamps.size() - 1; i++) {
 								        if (lifeRatio <= fb.timestamps[i + 1]) {
 								            float t0 = fb.timestamps[i];
 								            float t1 = fb.timestamps[i + 1];
 								            float dur = t1 - t0;
 								            float frac = (dur > 0.0f) ? (lifeRatio - t0) / dur : 0.0f;
 								            size_t v0 = std::min(i, fb.vec3Values.size() - 1);
 								            size_t v1 = std::min(i + 1, fb.vec3Values.size() - 1);
 								            return glm::mix(fb.vec3Values[v0], fb.vec3Values[v1], frac);
 								        }
 								    }
 								    return fb.vec3Values.back();
 								}
-												Add ambient insect particles near water vegetation, fix firefly particles, and improve water foam

- Spawn dark point-sprite insects buzzing around cattails/reeds/kelp/seaweed
- Fix firefly M2 particles: exempt from alpha dampening and forced gravity
- Make water shoreline/crest foam more irregular with UV warping and bluer tint

											
										
										
											2026-02-23 07:18:44 -08:00
+								std::vector<glm::vec3> M2Renderer::getWaterVegetationPositions(const glm::vec3& camPos, float maxDist) const {
 								    std::vector<glm::vec3> result;
 								    float maxDistSq = maxDist * maxDist;
 								    for (const auto& inst : instances) {
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        if (!inst.cachedModel || !inst.cachedModel->isWaterVegetation) continue;
-												Add ambient insect particles near water vegetation, fix firefly particles, and improve water foam

- Spawn dark point-sprite insects buzzing around cattails/reeds/kelp/seaweed
- Fix firefly M2 particles: exempt from alpha dampening and forced gravity
- Make water shoreline/crest foam more irregular with UV warping and bluer tint

											
										
										
											2026-02-23 07:18:44 -08:00
+								        glm::vec3 diff = inst.position - camPos;
 								        if (glm::dot(diff, diff) <= maxDistSq) {
 								            result.push_back(inst.position);
 								        }
 								    }
 								    return result;
 								}
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								void M2Renderer::emitParticles(M2Instance& inst, const M2ModelGPU& gpu, float dt) {
 								    if (inst.emitterAccumulators.size() != gpu.particleEmitters.size()) {
 								        inst.emitterAccumulators.resize(gpu.particleEmitters.size(), 0.0f);
 								    }
 								    std::uniform_real_distribution<float> dist01(0.0f, 1.0f);
 								    std::uniform_real_distribution<float> distN(-1.0f, 1.0f);
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								    std::uniform_int_distribution<int> distTile;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
 								    for (size_t ei = 0; ei < gpu.particleEmitters.size(); ei++) {
 								        const auto& em = gpu.particleEmitters[ei];
 								        if (!em.enabled) continue;
 								        float rate = interpFloat(em.emissionRate, inst.animTime, inst.currentSequenceIndex,
 								                                  gpu.sequences, gpu.globalSequenceDurations);
 								        float life = interpFloat(em.lifespan, inst.animTime, inst.currentSequenceIndex,
 								                                  gpu.sequences, gpu.globalSequenceDurations);
 								        if (rate <= 0.0f || life <= 0.0f) continue;
 								        inst.emitterAccumulators[ei] += rate * dt;
 								        while (inst.emitterAccumulators[ei] >= 1.0f && inst.particles.size() < MAX_M2_PARTICLES) {
 								            inst.emitterAccumulators[ei] -= 1.0f;
 								            M2Particle p;
 								            p.emitterIndex = static_cast<int>(ei);
 								            p.life = 0.0f;
 								            p.maxLife = life;
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								            p.tileIndex = 0.0f;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
 								            // Position: emitter position transformed by bone matrix
 								            glm::vec3 localPos = em.position;
 								            glm::mat4 boneXform = glm::mat4(1.0f);
 								            if (em.bone < inst.boneMatrices.size()) {
 								                boneXform = inst.boneMatrices[em.bone];
 								            }
 								            glm::vec3 worldPos = glm::vec3(inst.modelMatrix * boneXform * glm::vec4(localPos, 1.0f));
 								            p.position = worldPos;
 								            // Velocity: emission speed in upward direction + random spread
 								            float speed = interpFloat(em.emissionSpeed, inst.animTime, inst.currentSequenceIndex,
 								                                       gpu.sequences, gpu.globalSequenceDurations);
 								            float vRange = interpFloat(em.verticalRange, inst.animTime, inst.currentSequenceIndex,
 								                                        gpu.sequences, gpu.globalSequenceDurations);
 								            float hRange = interpFloat(em.horizontalRange, inst.animTime, inst.currentSequenceIndex,
 								                                        gpu.sequences, gpu.globalSequenceDurations);
 								            // Base direction: up in model space, transformed to world
 								            glm::vec3 dir(0.0f, 0.0f, 1.0f);
 								            // Add random spread
 								            dir.x += distN(particleRng_) * hRange;
 								            dir.y += distN(particleRng_) * hRange;
 								            dir.z += distN(particleRng_) * vRange;
 								            float len = glm::length(dir);
 								            if (len > 0.001f) dir /= len;
 								            // Transform direction by bone + model orientation (rotation only)
 								            glm::mat3 rotMat = glm::mat3(inst.modelMatrix * boneXform);
 								            p.velocity = rotMat * dir * speed;
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								            // When emission speed is ~0 and bone animation isn't loaded (.anim files),
 								            // particles pile up at the same position. Give them a drift so they
 								            // spread outward like a mist/spray effect instead of clustering.
 								            if (std::abs(speed) < 0.01f) {
-												Add ambient insect particles near water vegetation, fix firefly particles, and improve water foam

- Spawn dark point-sprite insects buzzing around cattails/reeds/kelp/seaweed
- Fix firefly M2 particles: exempt from alpha dampening and forced gravity
- Make water shoreline/crest foam more irregular with UV warping and bluer tint

											
										
										
											2026-02-23 07:18:44 -08:00
+								                if (gpu.isFireflyEffect) {
 								                    // Fireflies: gentle random drift in all directions
 								                    p.velocity = rotMat * glm::vec3(
 								                        distN(particleRng_) * 0.6f,
 								                        distN(particleRng_) * 0.6f,
 								                        distN(particleRng_) * 0.3f
 								                    );
 								                } else {
 								                    p.velocity = rotMat * glm::vec3(
 								                        distN(particleRng_) * 1.0f,
 								                        distN(particleRng_) * 1.0f,
 								                        -dist01(particleRng_) * 0.5f
 								                    );
 								                }
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								            }
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								            const uint32_t tilesX = std::max<uint16_t>(em.textureCols, 1);
 								            const uint32_t tilesY = std::max<uint16_t>(em.textureRows, 1);
 								            const uint32_t totalTiles = tilesX * tilesY;
 								            if ((em.flags & kParticleFlagTiled) && totalTiles > 1) {
 								                if (em.flags & kParticleFlagRandomized) {
 								                    distTile = std::uniform_int_distribution<int>(0, static_cast<int>(totalTiles - 1));
 								                    p.tileIndex = static_cast<float>(distTile(particleRng_));
 								                } else {
 								                    p.tileIndex = 0.0f;
 								                }
 								            }
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								            inst.particles.push_back(p);
 								        }
 								        // Cap accumulator to avoid bursts after lag
 								        if (inst.emitterAccumulators[ei] > 2.0f) {
 								            inst.emitterAccumulators[ei] = 0.0f;
 								        }
 								    }
 								}
 								void M2Renderer::updateParticles(M2Instance& inst, float dt) {
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								    if (!inst.cachedModel) return;
 								    const auto& gpu = *inst.cachedModel;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
 								    for (size_t i = 0; i < inst.particles.size(); ) {
 								        auto& p = inst.particles[i];
 								        p.life += dt;
 								        if (p.life >= p.maxLife) {
 								            // Swap-and-pop removal
 								            inst.particles[i] = inst.particles.back();
 								            inst.particles.pop_back();
 								            continue;
 								        }
 								        // Apply gravity
 								        if (p.emitterIndex >= 0 && p.emitterIndex < static_cast<int>(gpu.particleEmitters.size())) {
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								            const auto& pem = gpu.particleEmitters[p.emitterIndex];
 								            float grav = interpFloat(pem.gravity,
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								                                      inst.animTime, inst.currentSequenceIndex,
 								                                      gpu.sequences, gpu.globalSequenceDurations);
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								            // When M2 gravity is 0, apply default gravity so particles arc downward.
 								            // Many fountain M2s rely on bone animation (.anim files) we don't load yet.
-												Add ambient insect particles near water vegetation, fix firefly particles, and improve water foam

- Spawn dark point-sprite insects buzzing around cattails/reeds/kelp/seaweed
- Fix firefly M2 particles: exempt from alpha dampening and forced gravity
- Make water shoreline/crest foam more irregular with UV warping and bluer tint

											
										
										
											2026-02-23 07:18:44 -08:00
+								            // Firefly/ambient glow particles intentionally have zero gravity — skip fallback.
 								            if (grav == 0.0f && !gpu.isFireflyEffect) {
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								                float emSpeed = interpFloat(pem.emissionSpeed,
 								                                             inst.animTime, inst.currentSequenceIndex,
 								                                             gpu.sequences, gpu.globalSequenceDurations);
 								                if (std::abs(emSpeed) > 0.1f) {
 								                    grav = 4.0f;  // spray particles
 								                } else {
 								                    grav = 1.5f;  // mist/drift particles - gentler fall
 								                }
 								            }
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								            p.velocity.z -= grav * dt;
 								        }
 								        p.position += p.velocity * dt;
 								        i++;
 								    }
 								}
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								void M2Renderer::renderM2Particles(VkCommandBuffer cmd, VkDescriptorSet perFrameSet) {
 								    if (!particlePipeline_ || !m2ParticleVB_) return;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
 								    // Collect all particles from all instances, grouped by texture+blend
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								    struct ParticleGroupKey {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        VkTexture* texture;
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								        uint8_t blendType;
 								        uint16_t tilesX;
 								        uint16_t tilesY;
 								        bool operator==(const ParticleGroupKey& other) const {
 								            return texture == other.texture &&
 								                   blendType == other.blendType &&
 								                   tilesX == other.tilesX &&
 								                   tilesY == other.tilesY;
 								        }
 								    };
 								    struct ParticleGroupKeyHash {
 								        size_t operator()(const ParticleGroupKey& key) const {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            size_t h1 = std::hash<uintptr_t>{}(reinterpret_cast<uintptr_t>(key.texture));
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								            size_t h2 = std::hash<uint32_t>{}((static_cast<uint32_t>(key.tilesX) << 16) | key.tilesY);
 								            size_t h3 = std::hash<uint8_t>{}(key.blendType);
 								            return h1 ^ (h2 * 0x9e3779b9u) ^ (h3 * 0x85ebca6bu);
 								        }
 								    };
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								    struct ParticleGroup {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        VkTexture* texture;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								        uint8_t blendType;
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								        uint16_t tilesX;
 								        uint16_t tilesY;
 								        std::vector<float> vertexData;  // 9 floats per particle
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								    };
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								    std::unordered_map<ParticleGroupKey, ParticleGroup, ParticleGroupKeyHash> groups;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
 								    size_t totalParticles = 0;
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								    for (auto& inst : instances) {
 								        if (inst.particles.empty()) continue;
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        if (!inst.cachedModel) continue;
 								        const auto& gpu = *inst.cachedModel;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
 								        for (const auto& p : inst.particles) {
 								            if (p.emitterIndex < 0 || p.emitterIndex >= static_cast<int>(gpu.particleEmitters.size())) continue;
 								            const auto& em = gpu.particleEmitters[p.emitterIndex];
 								            float lifeRatio = p.life / std::max(p.maxLife, 0.001f);
 								            glm::vec3 color = interpFBlockVec3(em.particleColor, lifeRatio);
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								            float alpha = std::min(interpFBlockFloat(em.particleAlpha, lifeRatio), 1.0f);
 								            float rawScale = interpFBlockFloat(em.particleScale, lifeRatio);
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
-												Add ambient insect particles near water vegetation, fix firefly particles, and improve water foam

- Spawn dark point-sprite insects buzzing around cattails/reeds/kelp/seaweed
- Fix firefly M2 particles: exempt from alpha dampening and forced gravity
- Make water shoreline/crest foam more irregular with UV warping and bluer tint

											
										
										
											2026-02-23 07:18:44 -08:00
+								            if (!gpu.isSpellEffect && !gpu.isFireflyEffect) {
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								                color = glm::mix(color, glm::vec3(1.0f), 0.7f);
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								                if (rawScale > 2.0f) alpha *= 0.02f;
 								                if (em.blendingType == 3 || em.blendingType == 4) alpha *= 0.05f;
-												Fix M2 particle rendering: color, gravity, transparency, and animation

- Fix FBlock color keys from 3-byte BGR to 4-byte RGBA (CImVector) to
  prevent garbled purple/red colors from byte misalignment
- Add circular soft-edge falloff in particle fragment shader (GL_POINTS
  rendered as squares by default)
- Apply default gravity (4.0 spray, 1.5 mist) when M2 gravity is 0 since
  bone animation from .anim files isn't loaded yet
- Add drift velocity to speed=0 emitters so particles spread as mist
  instead of clustering at static bone positions
- Run particle updates for all nearby instances, not just those in
  boneWorkIndices_, to prevent particles freezing when bone culled
- Wrap animation time for particle models to keep emission tracks looping
- Cap particle scale to 1.5 and reduce point size multiplier (800→400)
- Desaturate FBlock colors 70% toward white for natural water appearance
- Reduce additive blend alpha to 5% and volume particles to 2%

											
										
										
											2026-02-16 02:12:43 -08:00
+								            }
-												Add ambient insect particles near water vegetation, fix firefly particles, and improve water foam

- Spawn dark point-sprite insects buzzing around cattails/reeds/kelp/seaweed
- Fix firefly M2 particles: exempt from alpha dampening and forced gravity
- Make water shoreline/crest foam more irregular with UV warping and bluer tint

											
										
										
											2026-02-23 07:18:44 -08:00
+								            float scale = (gpu.isSpellEffect || gpu.isFireflyEffect) ? rawScale : std::min(rawScale, 1.5f);
-												Harden transport updates and fix waterfall particle tint

											
										
										
											2026-02-12 00:45:24 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            VkTexture* tex = whiteTexture_.get();
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								            if (p.emitterIndex < static_cast<int>(gpu.particleTextures.size())) {
 								                tex = gpu.particleTextures[p.emitterIndex];
 								            }
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								            uint16_t tilesX = std::max<uint16_t>(em.textureCols, 1);
 								            uint16_t tilesY = std::max<uint16_t>(em.textureRows, 1);
 								            uint32_t totalTiles = static_cast<uint32_t>(tilesX) * static_cast<uint32_t>(tilesY);
 								            ParticleGroupKey key{tex, em.blendingType, tilesX, tilesY};
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								            auto& group = groups[key];
 								            group.texture = tex;
 								            group.blendType = em.blendingType;
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								            group.tilesX = tilesX;
 								            group.tilesY = tilesY;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
 								            group.vertexData.push_back(p.position.x);
 								            group.vertexData.push_back(p.position.y);
 								            group.vertexData.push_back(p.position.z);
 								            group.vertexData.push_back(color.r);
 								            group.vertexData.push_back(color.g);
 								            group.vertexData.push_back(color.b);
 								            group.vertexData.push_back(alpha);
 								            group.vertexData.push_back(scale);
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								            float tileIndex = p.tileIndex;
 								            if ((em.flags & kParticleFlagTiled) && totalTiles > 1) {
 								                float animSeconds = inst.animTime / 1000.0f;
 								                uint32_t animFrame = static_cast<uint32_t>(std::floor(animSeconds * totalTiles)) % totalTiles;
 								                tileIndex = std::fmod(p.tileIndex + static_cast<float>(animFrame),
 								                                      static_cast<float>(totalTiles));
 								            }
 								            group.vertexData.push_back(tileIndex);
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								            totalParticles++;
 								        }
 								    }
 								    if (totalParticles == 0) return;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Bind per-frame set (set 0) for particle pipeline
 								    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
 								                            particlePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    VkDeviceSize vbOffset = 0;
 								    vkCmdBindVertexBuffers(cmd, 0, 1, &m2ParticleVB_, &vbOffset);
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    VkPipeline currentPipeline = VK_NULL_HANDLE;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
 								    for (auto& [key, group] : groups) {
 								        if (group.vertexData.empty()) continue;
-												Fix particle glow transparency for textures without alpha

											
										
										
											2026-02-14 22:32:12 -08:00
+								        uint8_t blendType = group.blendType;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        VkPipeline desiredPipeline = (blendType == 3 || blendType == 4)
 								            ? particleAdditivePipeline_ : particlePipeline_;
 								        if (desiredPipeline != currentPipeline) {
 								            vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, desiredPipeline);
 								            currentPipeline = desiredPipeline;
 								        }
 								        // Allocate descriptor set for this group's texture
 								        VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
 								        ai.descriptorPool = materialDescPool_;
 								        ai.descriptorSetCount = 1;
 								        ai.pSetLayouts = &particleTexLayout_;
 								        VkDescriptorSet texSet = VK_NULL_HANDLE;
 								        if (vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &texSet) == VK_SUCCESS) {
 								            VkTexture* tex = group.texture ? group.texture : whiteTexture_.get();
 								            VkDescriptorImageInfo imgInfo = tex->descriptorInfo();
 								            VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
 								            write.dstSet = texSet;
 								            write.dstBinding = 0;
 								            write.descriptorCount = 1;
 								            write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								            write.pImageInfo = &imgInfo;
 								            vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr);
 								            vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
 								                                    particlePipelineLayout_, 1, 1, &texSet, 0, nullptr);
 								        }
 								        // Push constants: tileCount + alphaKey
 								        struct { float tileX, tileY; int alphaKey; } pc = {
 								            static_cast<float>(group.tilesX), static_cast<float>(group.tilesY),
 								            (blendType == 1) ? 1 : 0
 								        };
 								        vkCmdPushConstants(cmd, particlePipelineLayout_, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
 								                           sizeof(pc), &pc);
 								        // Upload and draw in chunks
-												Add tiled particle atlas support

											
										
										
											2026-02-07 19:20:37 -08:00
+								        size_t count = group.vertexData.size() / 9;
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								        size_t offset = 0;
 								        while (offset < count) {
 								            size_t batch = std::min(count - offset, MAX_M2_PARTICLES);
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            memcpy(m2ParticleVBMapped_, &group.vertexData[offset * 9], batch * 9 * sizeof(float));
 								            vkCmdDraw(cmd, static_cast<uint32_t>(batch), 1, 0, 0);
-												Render M2 glow batches as billboarded light sprites

Replace flat mesh rendering of additive/mod blend batches (blendMode >= 3)
with camera-facing point sprites using a soft radial gradient texture and
additive blending. Adds M2 particle emitter infrastructure (structs, shader,
parsing stubs) but disables emitter parsing — the assumed 476-byte struct
size is wrong for WotLK 3.3.5a, causing misaligned reads that explode RAM.

											
										
										
											2026-02-06 08:58:26 -08:00
+								            offset += batch;
 								        }
 								    }
 								}
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								void M2Renderer::renderSmokeParticles(VkCommandBuffer cmd, VkDescriptorSet perFrameSet) {
 								    if (smokeParticles.empty() || !smokePipeline_ || !smokeVB_) return;
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
 								    // Build vertex data: pos(3) + lifeRatio(1) + size(1) + isSpark(1) per particle
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    size_t count = std::min(smokeParticles.size(), static_cast<size_t>(MAX_SMOKE_PARTICLES));
 								    float* dst = static_cast<float*>(smokeVBMapped_);
 								    for (size_t i = 0; i < count; i++) {
 								        const auto& p = smokeParticles[i];
 								        *dst++ = p.position.x;
 								        *dst++ = p.position.y;
 								        *dst++ = p.position.z;
 								        *dst++ = p.life / p.maxLife;
 								        *dst++ = p.size;
 								        *dst++ = p.isSpark;
 								    }
 								    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, smokePipeline_);
 								    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
 								                            smokePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);
 								    // Push constant: screenHeight
 								    float screenHeight = static_cast<float>(vkCtx_->getSwapchainExtent().height);
 								    vkCmdPushConstants(cmd, smokePipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0,
 								                       sizeof(float), &screenHeight);
 								    VkDeviceSize offset = 0;
 								    vkCmdBindVertexBuffers(cmd, 0, 1, &smokeVB_, &offset);
 								    vkCmdDraw(cmd, static_cast<uint32_t>(count), 1, 0, 0);
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								}
-												Add transport support, gameobject queries, and fix item use

- Add setInstancePosition() to M2Renderer and WMORenderer for moving
  transport instances at runtime
- Detect UPDATEFLAG_TRANSPORT on gameobjects and track transport GUIDs
- Parse player-on-transport state from movement blocks
- Wire transport move callback in Application to update render positions
- Implement CMSG_GAMEOBJECT_QUERY / SMSG_GAMEOBJECT_QUERY_RESPONSE so
  gameobjects display proper names instead of "Unknown"
- Add name/entry fields to GameObject entity class
- Fix CMSG_USE_ITEM packet: remove extra uint8 that shifted the item
  GUID by one byte, breaking hearthstone and all item usage
- Remove redundant CMSG_LOOT after CMSG_GAMEOBJECT_USE for chests
- Show PvP enabled/disabled state in toggle message
- Relax WMO ramp wall-collision step-up check to allow walking on
  gentle ramps where floor rise per step is under 0.1 units
- Add M2 fallback when WMO group files fail to load for gameobjects
- Handle re-creation of existing gameobject render instances by
  updating position instead of silently ignoring

											
										
										
											2026-02-08 00:59:40 -08:00
+								void M2Renderer::setInstancePosition(uint32_t instanceId, const glm::vec3& position) {
 								    auto idxIt = instanceIndexById.find(instanceId);
 								    if (idxIt == instanceIndexById.end()) return;
 								    auto& inst = instances[idxIt->second];
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
 								    // Save old grid cells
 								    GridCell oldMinCell = toCell(inst.worldBoundsMin);
 								    GridCell oldMaxCell = toCell(inst.worldBoundsMax);
-												Add transport support, gameobject queries, and fix item use

- Add setInstancePosition() to M2Renderer and WMORenderer for moving
  transport instances at runtime
- Detect UPDATEFLAG_TRANSPORT on gameobjects and track transport GUIDs
- Parse player-on-transport state from movement blocks
- Wire transport move callback in Application to update render positions
- Implement CMSG_GAMEOBJECT_QUERY / SMSG_GAMEOBJECT_QUERY_RESPONSE so
  gameobjects display proper names instead of "Unknown"
- Add name/entry fields to GameObject entity class
- Fix CMSG_USE_ITEM packet: remove extra uint8 that shifted the item
  GUID by one byte, breaking hearthstone and all item usage
- Remove redundant CMSG_LOOT after CMSG_GAMEOBJECT_USE for chests
- Show PvP enabled/disabled state in toggle message
- Relax WMO ramp wall-collision step-up check to allow walking on
  gentle ramps where floor rise per step is under 0.1 units
- Add M2 fallback when WMO group files fail to load for gameobjects
- Handle re-creation of existing gameobject render instances by
  updating position instead of silently ignoring

											
										
										
											2026-02-08 00:59:40 -08:00
+								    inst.position = position;
 								    inst.updateModelMatrix();
 								    auto modelIt = models.find(inst.modelId);
 								    if (modelIt != models.end()) {
 								        glm::vec3 localMin, localMax;
 								        getTightCollisionBounds(modelIt->second, localMin, localMax);
 								        transformAABB(inst.modelMatrix, localMin, localMax, inst.worldBoundsMin, inst.worldBoundsMax);
 								    }
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
 								    // Incrementally update spatial grid
 								    GridCell newMinCell = toCell(inst.worldBoundsMin);
 								    GridCell newMaxCell = toCell(inst.worldBoundsMax);
 								    if (oldMinCell.x != newMinCell.x || oldMinCell.y != newMinCell.y || oldMinCell.z != newMinCell.z ||
 								        oldMaxCell.x != newMaxCell.x || oldMaxCell.y != newMaxCell.y || oldMaxCell.z != newMaxCell.z) {
 								        for (int z = oldMinCell.z; z <= oldMaxCell.z; z++) {
 								            for (int y = oldMinCell.y; y <= oldMaxCell.y; y++) {
 								                for (int x = oldMinCell.x; x <= oldMaxCell.x; x++) {
 								                    auto it = spatialGrid.find(GridCell{x, y, z});
 								                    if (it != spatialGrid.end()) {
 								                        auto& vec = it->second;
 								                        vec.erase(std::remove(vec.begin(), vec.end(), instanceId), vec.end());
 								                    }
 								                }
 								            }
 								        }
 								        for (int z = newMinCell.z; z <= newMaxCell.z; z++) {
 								            for (int y = newMinCell.y; y <= newMaxCell.y; y++) {
 								                for (int x = newMinCell.x; x <= newMaxCell.x; x++) {
 								                    spatialGrid[GridCell{x, y, z}].push_back(instanceId);
 								                }
 								            }
 								        }
 								    }
-												Add transport support, gameobject queries, and fix item use

- Add setInstancePosition() to M2Renderer and WMORenderer for moving
  transport instances at runtime
- Detect UPDATEFLAG_TRANSPORT on gameobjects and track transport GUIDs
- Parse player-on-transport state from movement blocks
- Wire transport move callback in Application to update render positions
- Implement CMSG_GAMEOBJECT_QUERY / SMSG_GAMEOBJECT_QUERY_RESPONSE so
  gameobjects display proper names instead of "Unknown"
- Add name/entry fields to GameObject entity class
- Fix CMSG_USE_ITEM packet: remove extra uint8 that shifted the item
  GUID by one byte, breaking hearthstone and all item usage
- Remove redundant CMSG_LOOT after CMSG_GAMEOBJECT_USE for chests
- Show PvP enabled/disabled state in toggle message
- Relax WMO ramp wall-collision step-up check to allow walking on
  gentle ramps where floor rise per step is under 0.1 units
- Add M2 fallback when WMO group files fail to load for gameobjects
- Handle re-creation of existing gameobject render instances by
  updating position instead of silently ignoring

											
										
										
											2026-02-08 00:59:40 -08:00
+								}
-												Freeze gameobject M2 animations to prevent cycling

Gameobject M2 instances (books, crates, chests) were continuously
cycling their animations because M2Renderer unconditionally loops
all sequences. Added setInstanceAnimationFrozen() and freeze all
gameobject instances at creation time so they stay in their bind pose.

											
										
										
											2026-02-23 05:31:02 -08:00
+								void M2Renderer::setInstanceAnimationFrozen(uint32_t instanceId, bool frozen) {
 								    auto idxIt = instanceIndexById.find(instanceId);
 								    if (idxIt == instanceIndexById.end()) return;
 								    auto& inst = instances[idxIt->second];
 								    inst.animSpeed = frozen ? 0.0f : 1.0f;
 								    if (frozen) {
 								        inst.animTime = 0.0f;  // Reset to bind pose
 								    }
 								}
-												Transport hell

											
										
										
											2026-02-11 00:54:38 -08:00
+								void M2Renderer::setInstanceTransform(uint32_t instanceId, const glm::mat4& transform) {
 								    auto idxIt = instanceIndexById.find(instanceId);
 								    if (idxIt == instanceIndexById.end()) return;
 								    auto& inst = instances[idxIt->second];
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    // Remove old grid cells before updating bounds
 								    GridCell oldMinCell = toCell(inst.worldBoundsMin);
 								    GridCell oldMaxCell = toCell(inst.worldBoundsMax);
-												Transport hell

											
										
										
											2026-02-11 00:54:38 -08:00
+								    // Update model matrix directly
 								    inst.modelMatrix = transform;
 								    inst.invModelMatrix = glm::inverse(transform);
 								    // Extract position from transform for bounds
 								    inst.position = glm::vec3(transform[3]);
 								    // Update bounds
 								    auto modelIt = models.find(inst.modelId);
 								    if (modelIt != models.end()) {
 								        glm::vec3 localMin, localMax;
 								        getTightCollisionBounds(modelIt->second, localMin, localMax);
 								        transformAABB(inst.modelMatrix, localMin, localMax, inst.worldBoundsMin, inst.worldBoundsMax);
 								    }
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
 								    // Incrementally update spatial grid (remove old cells, add new cells)
 								    GridCell newMinCell = toCell(inst.worldBoundsMin);
 								    GridCell newMaxCell = toCell(inst.worldBoundsMax);
 								    if (oldMinCell.x != newMinCell.x || oldMinCell.y != newMinCell.y || oldMinCell.z != newMinCell.z ||
 								        oldMaxCell.x != newMaxCell.x || oldMaxCell.y != newMaxCell.y || oldMaxCell.z != newMaxCell.z) {
 								        // Remove from old cells
 								        for (int z = oldMinCell.z; z <= oldMaxCell.z; z++) {
 								            for (int y = oldMinCell.y; y <= oldMaxCell.y; y++) {
 								                for (int x = oldMinCell.x; x <= oldMaxCell.x; x++) {
 								                    auto it = spatialGrid.find(GridCell{x, y, z});
 								                    if (it != spatialGrid.end()) {
 								                        auto& vec = it->second;
 								                        vec.erase(std::remove(vec.begin(), vec.end(), instanceId), vec.end());
 								                    }
 								                }
 								            }
 								        }
 								        // Add to new cells
 								        for (int z = newMinCell.z; z <= newMaxCell.z; z++) {
 								            for (int y = newMinCell.y; y <= newMaxCell.y; y++) {
 								                for (int x = newMinCell.x; x <= newMaxCell.x; x++) {
 								                    spatialGrid[GridCell{x, y, z}].push_back(instanceId);
 								                }
 								            }
 								        }
 								    }
 								    // No spatialIndexDirty_ = true — handled incrementally
-												Transport hell

											
										
										
											2026-02-11 00:54:38 -08:00
+								}
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								void M2Renderer::removeInstance(uint32_t instanceId) {
 								    for (auto it = instances.begin(); it != instances.end(); ++it) {
 								        if (it->id == instanceId) {
-												Fix M2 bone buffer leak on instance removal

removeInstance() and removeInstances() were erasing M2Instances without
calling destroyInstanceBones(), leaking VMA bone buffers permanently.
This caused framerate to drop and never recover after NPC encounters.

											
										
										
											2026-02-23 04:52:40 -08:00
+								            destroyInstanceBones(*it);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            instances.erase(it);
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								            rebuildSpatialIndex();
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								            return;
 								        }
 								    }
 								}
-												Fix invisible walls from WMO doodad M2 collision and MOPY filtering

WMO interior doodads (gears, decorations) were blocking player movement
via M2 collision. Skip collision for all WMO doodad M2 instances since
the WMO itself handles wall collision.

Also filter WMO wall collision using MOPY per-triangle flags: only
rendered+collidable triangles block the player, skipping invisible
collision hulls.

Revert tram portal extended range (no longer needed with collision fix).

											
										
										
											2026-03-06 12:26:17 -08:00
+								void M2Renderer::setSkipCollision(uint32_t instanceId, bool skip) {
 								    for (auto& inst : instances) {
 								        if (inst.id == instanceId) {
 								            inst.skipCollision = skip;
 								            return;
 								        }
 								    }
 								}
-												Fix taxi state sync and transport authority; reduce runtime log overhead; restore first-person self-hide

											
										
										
											2026-02-11 22:27:02 -08:00
+								void M2Renderer::removeInstances(const std::vector<uint32_t>& instanceIds) {
 								    if (instanceIds.empty() || instances.empty()) {
 								        return;
 								    }
 								    std::unordered_set<uint32_t> toRemove(instanceIds.begin(), instanceIds.end());
 								    const size_t oldSize = instances.size();
-												Fix M2 bone buffer leak on instance removal

removeInstance() and removeInstances() were erasing M2Instances without
calling destroyInstanceBones(), leaking VMA bone buffers permanently.
This caused framerate to drop and never recover after NPC encounters.

											
										
										
											2026-02-23 04:52:40 -08:00
+								    for (auto& inst : instances) {
 								        if (toRemove.count(inst.id)) {
 								            destroyInstanceBones(inst);
 								        }
 								    }
-												Fix taxi state sync and transport authority; reduce runtime log overhead; restore first-person self-hide

											
										
										
											2026-02-11 22:27:02 -08:00
+								    instances.erase(std::remove_if(instances.begin(), instances.end(),
 								                   [&toRemove](const M2Instance& inst) {
 								                       return toRemove.find(inst.id) != toRemove.end();
 								                   }),
 								                   instances.end());
 								    if (instances.size() != oldSize) {
 								        rebuildSpatialIndex();
 								    }
 								}
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								void M2Renderer::clear() {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    if (vkCtx_) {
 								        vkDeviceWaitIdle(vkCtx_->getDevice());
 								        for (auto& [id, model] : models) {
 								            destroyModelGPU(model);
 								        }
 								        for (auto& inst : instances) {
 								            destroyInstanceBones(inst);
 								        }
-												Fix GPU resource leaks and re-entrant world loading for instance transitions

Reset descriptor pools in CharacterRenderer/M2Renderer/WMORenderer on map
change to prevent VK_ERROR_DEVICE_LOST from pool exhaustion. Defer re-entrant
SMSG_NEW_WORLD during active world load to avoid recursive cleanup crashes.
Gate swim bubbles on swimming state, skip redundant shadow pipeline re-init,
add WOWEE_SKIP_* env vars for render isolation debugging.

											
										
										
											2026-03-02 08:06:35 -08:00
+								        // Reset descriptor pools so new allocations succeed after reload.
 								        // destroyModelGPU/destroyInstanceBones don't free individual sets,
 								        // so the pools fill up across map changes without this reset.
 								        VkDevice device = vkCtx_->getDevice();
 								        if (materialDescPool_) {
 								            vkResetDescriptorPool(device, materialDescPool_, 0);
 								            // Re-allocate the glow texture descriptor set (pre-allocated during init,
 								            // invalidated by pool reset).
 								            if (glowTexture_ && particleTexLayout_) {
 								                VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
 								                ai.descriptorPool = materialDescPool_;
 								                ai.descriptorSetCount = 1;
 								                ai.pSetLayouts = &particleTexLayout_;
 								                glowTexDescSet_ = VK_NULL_HANDLE;
 								                if (vkAllocateDescriptorSets(device, &ai, &glowTexDescSet_) == VK_SUCCESS) {
 								                    VkDescriptorImageInfo imgInfo = glowTexture_->descriptorInfo();
 								                    VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
 								                    write.dstSet = glowTexDescSet_;
 								                    write.dstBinding = 0;
 								                    write.descriptorCount = 1;
 								                    write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 								                    write.pImageInfo = &imgInfo;
 								                    vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
 								                }
 								            }
 								        }
 								        if (boneDescPool_) {
 								            vkResetDescriptorPool(device, boneDescPool_, 0);
 								        }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    }
 								    models.clear();
 								    instances.clear();
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    spatialGrid.clear();
 								    instanceIndexById.clear();
-												Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths

											
										
										
											2026-03-07 11:59:19 -08:00
+								    instanceDedupMap_.clear();
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								    smokeParticles.clear();
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    smokeInstanceIndices_.clear();
-												Instance portal glow, spin, and transparent additive rendering

											
										
										
											2026-03-06 18:02:56 -08:00
+								    portalInstanceIndices_.clear();
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    animatedInstanceIndices_.clear();
 								    particleOnlyInstanceIndices_.clear();
 								    particleInstanceIndices_.clear();
-												Add smoke particle emitters with ember sparks and enable 4x MSAA

Replace UV scroll workaround for chimney smoke with proper GL_POINTS
particle system. Smoke particles rise, expand, drift, and fade over
4-7 seconds. One in eight particles spawns as a bright orange/red
ember spark. Enable 4x multisample antialiasing for smoother edges
on player models, fences, and foliage.

											
										
										
											2026-02-04 14:37:32 -08:00
+								    smokeEmitAccum = 0.0f;
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								}
 								void M2Renderer::setCollisionFocus(const glm::vec3& worldPos, float radius) {
 								    collisionFocusEnabled = (radius > 0.0f);
 								    collisionFocusPos = worldPos;
 								    collisionFocusRadius = std::max(0.0f, radius);
 								    collisionFocusRadiusSq = collisionFocusRadius * collisionFocusRadius;
 								}
 								void M2Renderer::clearCollisionFocus() {
 								    collisionFocusEnabled = false;
 								}
 								void M2Renderer::resetQueryStats() {
 								    queryTimeMs = 0.0;
 								    queryCallCount = 0;
 								}
 								M2Renderer::GridCell M2Renderer::toCell(const glm::vec3& p) const {
 								    return GridCell{
 								        static_cast<int>(std::floor(p.x / SPATIAL_CELL_SIZE)),
 								        static_cast<int>(std::floor(p.y / SPATIAL_CELL_SIZE)),
 								        static_cast<int>(std::floor(p.z / SPATIAL_CELL_SIZE))
 								    };
 								}
 								void M2Renderer::rebuildSpatialIndex() {
 								    spatialGrid.clear();
 								    instanceIndexById.clear();
-												Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths

											
										
										
											2026-03-07 11:59:19 -08:00
+								    instanceDedupMap_.clear();
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    instanceIndexById.reserve(instances.size());
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    smokeInstanceIndices_.clear();
-												Instance portal glow, spin, and transparent additive rendering

											
										
										
											2026-03-06 18:02:56 -08:00
+								    portalInstanceIndices_.clear();
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								    animatedInstanceIndices_.clear();
 								    particleOnlyInstanceIndices_.clear();
 								    particleInstanceIndices_.clear();
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
 								    for (size_t i = 0; i < instances.size(); i++) {
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        auto& inst = instances[i];
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								        instanceIndexById[inst.id] = i;
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        // Re-cache model pointer (may have changed after model map modifications)
 								        auto mdlIt = models.find(inst.modelId);
 								        inst.cachedModel = (mdlIt != models.end()) ? &mdlIt->second : nullptr;
-												Incremental terrain upload + M2 instance dedup hash for city stutter

Terrain finalization was uploading all 256 chunks (GPU fence waits) in one
atomic advanceFinalization call that couldn't be interrupted by the 5ms time
budget. Now split into incremental batches of 16 chunks per call, allowing
the time budget to yield between batches.

M2 instance creation had O(N) dedup scans iterating ALL instances to check
for duplicates. In cities with 5000+ doodads, this caused O(N²) total work
during tile loading. Replaced with hash-based DedupKey map for O(1) lookups.

Changes:
- TerrainRenderer::loadTerrainIncremental: uploads N chunks per call
- FinalizingTile tracks terrainChunkNext for cross-frame progress
- TERRAIN phase yields after preload and after each chunk batch
- M2Renderer::DedupKey hash map replaces linear scan in createInstance
  and createInstanceWithMatrix
- Dedup map maintained through rebuildSpatialIndex and clear paths

											
										
										
											2026-03-07 11:59:19 -08:00
+								        // Rebuild dedup map (skip ground detail)
 								        if (!inst.cachedIsGroundDetail) {
 								            DedupKey dk{inst.modelId,
 								                        static_cast<int32_t>(std::round(inst.position.x * 10.0f)),
 								                        static_cast<int32_t>(std::round(inst.position.y * 10.0f)),
 								                        static_cast<int32_t>(std::round(inst.position.z * 10.0f))};
 								            instanceDedupMap_[dk] = inst.id;
 								        }
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								        if (inst.cachedIsSmoke) {
 								            smokeInstanceIndices_.push_back(i);
 								        }
-												Instance portal glow, spin, and transparent additive rendering

											
										
										
											2026-03-06 18:02:56 -08:00
+								        if (inst.cachedIsInstancePortal) {
 								            portalInstanceIndices_.push_back(i);
 								        }
-												Optimize M2 update loop: skip static doodads, incremental spatial index

- Split M2 instances into fast-path index lists (animated, particle-only,
  particle-all, smoke) to avoid iterating all 46K instances per frame
- Cache model flags (hasAnimation, disableAnimation, isSmoke, etc.) on
  M2Instance struct to eliminate per-frame hash lookups
- Replace full rebuildSpatialIndex on position/transform updates with
  incremental grid cell remove+add, preventing 8.5ms/frame rebuild cost
- Advance animTime for all instances (texture UV animation) but only
  compute bones and particles for the ~3K that need it

M2_UPDATE: 10.7ms → 2.0ms, FPS: 35 → 55-59

											
										
										
											2026-03-02 14:45:49 -08:00
+								        if (inst.cachedHasParticleEmitters) {
 								            particleInstanceIndices_.push_back(i);
 								        }
 								        if (inst.cachedHasAnimation && !inst.cachedDisableAnimation) {
 								            animatedInstanceIndices_.push_back(i);
 								        } else if (inst.cachedHasParticleEmitters) {
 								            particleOnlyInstanceIndices_.push_back(i);
 								        }
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								        GridCell minCell = toCell(inst.worldBoundsMin);
 								        GridCell maxCell = toCell(inst.worldBoundsMax);
 								        for (int z = minCell.z; z <= maxCell.z; z++) {
 								            for (int y = minCell.y; y <= maxCell.y; y++) {
 								                for (int x = minCell.x; x <= maxCell.x; x++) {
 								                    spatialGrid[GridCell{x, y, z}].push_back(inst.id);
 								                }
 								            }
 								        }
 								    }
-												Fix transport sync and stabilize WMO/tunnel grounding

											
										
										
											2026-02-12 00:04:53 -08:00
+								    spatialIndexDirty_ = false;
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								}
 								void M2Renderer::gatherCandidates(const glm::vec3& queryMin, const glm::vec3& queryMax,
 								                                  std::vector<size_t>& outIndices) const {
 								    outIndices.clear();
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								    tl_m2_candidateIdScratch.clear();
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
 								    GridCell minCell = toCell(queryMin);
 								    GridCell maxCell = toCell(queryMax);
 								    for (int z = minCell.z; z <= maxCell.z; z++) {
 								        for (int y = minCell.y; y <= maxCell.y; y++) {
 								            for (int x = minCell.x; x <= maxCell.x; x++) {
 								                auto it = spatialGrid.find(GridCell{x, y, z});
 								                if (it == spatialGrid.end()) continue;
 								                for (uint32_t id : it->second) {
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								                    if (!tl_m2_candidateIdScratch.insert(id).second) continue;
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								                    auto idxIt = instanceIndexById.find(id);
 								                    if (idxIt != instanceIndexById.end()) {
 								                        outIndices.push_back(idxIt->second);
 								                    }
 								                }
 								            }
 								        }
 								    }
 								    // Safety fallback to preserve collision correctness if the spatial index
 								    // misses candidates (e.g. during streaming churn).
 								    if (outIndices.empty() && !instances.empty()) {
 								        outIndices.reserve(instances.size());
 								        for (size_t i = 0; i < instances.size(); i++) {
 								            outIndices.push_back(i);
 								        }
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								}
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								void M2Renderer::cleanupUnusedModels() {
 								    // Build set of model IDs that are still referenced by instances
 								    std::unordered_set<uint32_t> usedModelIds;
 								    for (const auto& instance : instances) {
 								        usedModelIds.insert(instance.modelId);
 								    }
 								    // Find and remove models with no instances
 								    std::vector<uint32_t> toRemove;
 								    for (const auto& [id, model] : models) {
 								        if (usedModelIds.find(id) == usedModelIds.end()) {
 								            toRemove.push_back(id);
 								        }
 								    }
 								    // Delete GPU resources and remove from map
 								    for (uint32_t id : toRemove) {
 								        auto it = models.find(id);
 								        if (it != models.end()) {
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								            destroyModelGPU(it->second);
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								            models.erase(it);
 								        }
 								    }
 								    if (!toRemove.empty()) {
 								        LOG_INFO("M2 cleanup: removed ", toRemove.size(), " unused models, ", models.size(), " remaining");
 								    }
 								}
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								VkTexture* M2Renderer::loadTexture(const std::string& path, uint32_t texFlags) {
-												Normalize texture cache keys to prevent duplicate GPU textures

											
										
										
											2026-02-12 16:15:25 -08:00
+								    auto normalizeKey = [](std::string key) {
 								        std::replace(key.begin(), key.end(), '/', '\\');
 								        std::transform(key.begin(), key.end(), key.begin(),
 								                       [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
 								        return key;
 								    };
 								    std::string key = normalizeKey(path);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    // Check cache
-												Normalize texture cache keys to prevent duplicate GPU textures

											
										
										
											2026-02-12 16:15:25 -08:00
+								    auto it = textureCache.find(key);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    if (it != textureCache.end()) {
-												Bound MPQ archive lookup cache; remove always-on composite dumps; track texture cache entries

											
										
										
											2026-02-12 16:29:36 -08:00
+								        it->second.lastUse = ++textureCacheCounter_;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        return it->second.texture.get();
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    }
-												Allow texture load retries instead of permanently caching failures

Remove negative cache for transient load failures in M2, terrain, and
character renderers. Failed textures return white and will be retried
on next model/tile load when assets may have finished streaming.

											
										
										
											2026-02-23 06:51:06 -08:00
+								    // No negative cache check — allow retries for transiently missing textures
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Fix quest flow regressions, tooltip compare stats, and M2 alpha-key handling

											
										
										
											2026-02-19 02:27:01 -08:00
+								    auto containsToken = [](const std::string& haystack, const char* token) {
 								        return haystack.find(token) != std::string::npos;
 								    };
 								    const bool colorKeyBlackHint =
 								        containsToken(key, "candle") ||
 								        containsToken(key, "flame") ||
 								        containsToken(key, "fire") ||
-												Fix black background on lamp/lantern/torch glow effects

Three-part fix for glow textures showing opaque black rectangles instead
of being transparent:

1. Pass blend mode to fragment shader via uBlendMode uniform. For additive
   blend modes (3=Add, 6=BlendAdd), discard near-black fragments (maxRGB
   < 0.1) since they contribute nothing visually but render as dark
   rectangles against sky/terrain.

2. Expand colorKeyBlack texture keyword detection to include "lamp",
   "lantern", "glow", "flare", "brazier", "campfire", "bonfire" in
   addition to the existing "candle", "flame", "fire", "torch".

3. Expand flameLikeModel detection for glow sprite conversion to include
   "brazier", "campfire", "bonfire". Also compute glow centers for
   colorKeyBlack batches (not just blendMode >= 3) so glow sprites
   position correctly for all flame-like objects.

											
										
										
											2026-02-19 18:19:52 -08:00
+								        containsToken(key, "torch") ||
 								        containsToken(key, "lamp") ||
 								        containsToken(key, "lantern") ||
 								        containsToken(key, "glow") ||
 								        containsToken(key, "flare") ||
 								        containsToken(key, "brazier") ||
 								        containsToken(key, "campfire") ||
 								        containsToken(key, "bonfire");
-												Fix quest flow regressions, tooltip compare stats, and M2 alpha-key handling

											
										
										
											2026-02-19 02:27:01 -08:00
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								    // Check pre-decoded BLP cache first (populated by background worker threads)
 								    pipeline::BLPImage blp;
 								    if (predecodedBLPCache_) {
 								        auto pit = predecodedBLPCache_->find(key);
 								        if (pit != predecodedBLPCache_->end()) {
 								            blp = std::move(pit->second);
 								            predecodedBLPCache_->erase(pit);
 								        }
 								    }
 								    if (!blp.isValid()) {
 								        blp = assetManager->loadTexture(key);
 								    }
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    if (!blp.isValid()) {
-												Allow texture load retries instead of permanently caching failures

Remove negative cache for transient load failures in M2, terrain, and
character renderers. Failed textures return white and will be retried
on next model/tile load when assets may have finished streaming.

											
										
										
											2026-02-23 06:51:06 -08:00
+								        // Return white fallback but don't cache the failure — MPQ reads can
 								        // fail transiently during streaming; allow retry on next model load.
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								        if (loggedTextureLoadFails_.insert(key).second) {
-												Harden runtime against stutter-inducing log floods and missing display IDs

- Re-gate M2 glow diagnostics behind WOWEE_M2_GLOW_DIAG and DEBUG

- Deduplicate missing/failed texture warnings in asset and M2 texture loaders

- Deduplicate unhandled opcode warnings by state/opcode key in non-IN_WORLD phases

- Throttle malformed spline point-count warnings across world/classic/tbc parsers

- Ignore suspiciously huge display IDs from malformed packets with throttled warning

- Add nearest-known displayId model fallback cache for missing creature display mappings

- Clear display fallback caches on expansion reload and logout

											
										
										
											2026-02-21 04:05:53 -08:00
+								            LOG_WARNING("M2: Failed to load texture: ", path);
 								        }
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								        return whiteTexture_.get();
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    }
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								    size_t base = static_cast<size_t>(blp.width) * static_cast<size_t>(blp.height) * 4ull;
 								    size_t approxBytes = base + (base / 3);
 								    if (textureCacheBytes_ + approxBytes > textureCacheBudgetBytes_) {
-												Stabilize net parsing and reduce texture-cache churn

											
										
										
											2026-02-22 07:44:32 -08:00
+								        static constexpr size_t kMaxFailedTextureCache = 200000;
 								        if (failedTextureCache_.size() < kMaxFailedTextureCache) {
 								            // Cache budget-rejected keys too; without this we repeatedly decode/load
 								            // the same textures every frame once budget is saturated.
 								            failedTextureCache_.insert(key);
 								        }
-												Increase texture cache budgets to 4GB and cap repetitive warnings

Raise all texture cache defaults from 1GB to 4GB to reduce rejections.
Cap cache-full warnings (texture + model) to 3 messages per renderer,
and cap update block parse errors to 5 messages.

											
										
										
											2026-02-23 04:32:58 -08:00
+								        if (textureBudgetRejectWarnings_ < 3) {
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								            LOG_WARNING("M2 texture cache full (", textureCacheBytes_ / (1024 * 1024),
 								                        " MB / ", textureCacheBudgetBytes_ / (1024 * 1024),
 								                        " MB), rejecting texture: ", path);
 								        }
 								        ++textureBudgetRejectWarnings_;
 								        return whiteTexture_.get();
 								    }
-												Fix particle glow transparency for textures without alpha

											
										
										
											2026-02-14 22:32:12 -08:00
+								    // Track whether the texture actually uses alpha (any pixel with alpha < 255).
 								    bool hasAlpha = false;
-												Revert glow pixel detection and forced additive override, add diagnostics

The pixel content glow detection (>60% dark = glow) was too aggressive,
flagging dark metal textures on sconces as glow textures and making
structural geometry transparent. The forced additive blending for
colorKeyBlack batches compounded the issue.

Reverted both. Added per-batch diagnostic logging for models containing
"light", "lamp", or "lantern" to identify the actual blend modes and
material flags on Stormwind bridge lamps.

											
										
										
											2026-02-19 18:30:34 -08:00
+								    for (size_t i = 3; i < blp.data.size(); i += 4) {
 								        if (blp.data[i] != 255) {
 								            hasAlpha = true;
 								            break;
 								        }
 								    }
-												Fix particle glow transparency for textures without alpha

											
										
										
											2026-02-14 22:32:12 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    // Create Vulkan texture
 								    auto tex = std::make_unique<VkTexture>();
 								    tex->upload(*vkCtx_, blp.data.data(), blp.width, blp.height, VK_FORMAT_R8G8B8A8_UNORM);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Fix M2 white shell artifact from missing textures, add opacity track support

Batches whose named texture fails to load now render invisible instead of
white (the swampreeds01a.blp case causing a white shell around aquatic plants).

Also implements proper M2 opacity plumbing:
- Parse texture weight tracks (M2Track<fixed16>) and color animation alpha
  tracks (M2Color.alpha) to resolve per-batch opacity at load time
- Skip batches with batchOpacity < 0.01 in the render loop
- Apply M2Texture.flags (bit0=WrapS, bit1=WrapT) to GL sampler wrap mode
- Upload both UV sets (texCoords[0] and texCoords[1]) and select via
  textureUnit uniform, so batches referencing UV set 1 render correctly

											
										
										
											2026-02-17 23:52:44 -08:00
+								    // M2Texture flags: bit 0 = WrapS (1=repeat, 0=clamp), bit 1 = WrapT
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    VkSamplerAddressMode wrapS = (texFlags & 0x1) ? VK_SAMPLER_ADDRESS_MODE_REPEAT : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
 								    VkSamplerAddressMode wrapT = (texFlags & 0x2) ? VK_SAMPLER_ADDRESS_MODE_REPEAT : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
 								    tex->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, wrapS, wrapT);
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    VkTexture* texPtr = tex.get();
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
-												Bound MPQ archive lookup cache; remove always-on composite dumps; track texture cache entries

											
										
										
											2026-02-12 16:29:36 -08:00
+								    TextureCacheEntry e;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    e.texture = std::move(tex);
-												Stabilize streaming memory and parser handling; revert socket recv optimizations

											
										
										
											2026-02-22 07:26:54 -08:00
+								    e.approxBytes = approxBytes;
-												Fix particle glow transparency for textures without alpha

											
										
										
											2026-02-14 22:32:12 -08:00
+								    e.hasAlpha = hasAlpha;
-												Revert glow pixel detection and forced additive override, add diagnostics

The pixel content glow detection (>60% dark = glow) was too aggressive,
flagging dark metal textures on sconces as glow textures and making
structural geometry transparent. The forced additive blending for
colorKeyBlack batches compounded the issue.

Reverted both. Added per-batch diagnostic logging for models containing
"light", "lamp", or "lantern" to identify the actual blend modes and
material flags on Stormwind bridge lamps.

											
										
										
											2026-02-19 18:30:34 -08:00
+								    e.colorKeyBlack = colorKeyBlackHint;
-												Bound MPQ archive lookup cache; remove always-on composite dumps; track texture cache entries

											
										
										
											2026-02-12 16:29:36 -08:00
+								    e.lastUse = ++textureCacheCounter_;
 								    textureCacheBytes_ += e.approxBytes;
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    textureCache[key] = std::move(e);
 								    textureHasAlphaByPtr_[texPtr] = hasAlpha;
 								    textureColorKeyBlackByPtr_[texPtr] = colorKeyBlackHint;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								    LOG_DEBUG("M2: Loaded texture: ", path, " (", blp.width, "x", blp.height, ")");
-												Vulcan Nightmare

Experimentally bringing up vulcan support

											
										
										
											2026-02-21 19:41:21 -08:00
+								    return texPtr;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								}
 								uint32_t M2Renderer::getTotalTriangleCount() const {
 								    uint32_t total = 0;
 								    for (const auto& instance : instances) {
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        if (instance.cachedModel) {
 								            total += instance.cachedModel->indexCount / 3;
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								        }
 								    }
 								    return total;
 								}
-												Add steep slope limiting to prevent terrain clipping

Added slope normal checking to reject surfaces too steep to walk.
Prevents character/mount from clipping through steep terrain.

Changes:
- Added MIN_WALKABLE_NORMAL threshold (0.7 = ~45° max slope)
- WMO collision: query surface normal, reject if normalZ < 0.7
- M2 collision: query surface normal, reject if normalZ < 0.7
- Updated M2Renderer::getFloorHeight to output surface normal
- M2 already had internal 0.35 check (~70°), new 0.7 is more restrictive

Steep slopes now block movement instead of allowing clipping.

											
										
										
											2026-02-10 20:45:25 -08:00
+								std::optional<float> M2Renderer::getFloorHeight(float glX, float glY, float glZ, float* outNormalZ) const {
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    QueryTimer timer(&queryTimeMs, &queryCallCount);
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								    std::optional<float> bestFloor;
-												Add steep slope limiting to prevent terrain clipping

Added slope normal checking to reject surfaces too steep to walk.
Prevents character/mount from clipping through steep terrain.

Changes:
- Added MIN_WALKABLE_NORMAL threshold (0.7 = ~45° max slope)
- WMO collision: query surface normal, reject if normalZ < 0.7
- M2 collision: query surface normal, reject if normalZ < 0.7
- Updated M2Renderer::getFloorHeight to output surface normal
- M2 already had internal 0.35 check (~70°), new 0.7 is more restrictive

Steep slopes now block movement instead of allowing clipping.

											
										
										
											2026-02-10 20:45:25 -08:00
+								    float bestNormalZ = 1.0f;  // Default to flat
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    glm::vec3 queryMin(glX - 2.0f, glY - 2.0f, glZ - 6.0f);
 								    glm::vec3 queryMax(glX + 2.0f, glY + 2.0f, glZ + 8.0f);
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								    gatherCandidates(queryMin, queryMax, tl_m2_candidateScratch);
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								    for (size_t idx : tl_m2_candidateScratch) {
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								        const auto& instance = instances[idx];
 								        if (collisionFocusEnabled &&
 								            pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) {
 								            continue;
 								        }
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        if (!instance.cachedModel) continue;
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        if (instance.scale <= 0.001f) continue;
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        const M2ModelGPU& model = *instance.cachedModel;
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								        if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue;
-												Fix invisible walls from WMO doodad M2 collision and MOPY filtering

WMO interior doodads (gears, decorations) were blocking player movement
via M2 collision. Skip collision for all WMO doodad M2 instances since
the WMO itself handles wall collision.

Also filter WMO wall collision using MOPY per-triangle flags: only
rendered+collidable triangles block the player, skipping invisible
collision hulls.

Revert tram portal extended range (no longer needed with collision fix).

											
										
										
											2026-03-06 12:26:17 -08:00
+								        if (instance.skipCollision) continue;
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
 								        // --- Mesh-based floor: vertical ray vs collision triangles ---
 								        // Does NOT skip the AABB path — both contribute and highest wins.
 								        if (model.collision.valid()) {
 								            glm::vec3 localPos = glm::vec3(instance.invModelMatrix * glm::vec4(glX, glY, glZ, 1.0f));
 								            model.collision.getFloorTrisInRange(
 								                localPos.x - 1.0f, localPos.y - 1.0f,
 								                localPos.x + 1.0f, localPos.y + 1.0f,
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								                tl_m2_collisionTriScratch);
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
 								            glm::vec3 rayOrigin(localPos.x, localPos.y, localPos.z + 5.0f);
 								            glm::vec3 rayDir(0.0f, 0.0f, -1.0f);
 								            float bestHitZ = -std::numeric_limits<float>::max();
 								            bool hitAny = false;
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								            for (uint32_t ti : tl_m2_collisionTriScratch) {
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								                if (ti >= model.collision.triCount) continue;
 								                if (model.collision.triBounds[ti].maxZ < localPos.z - 10.0f ||
 								                    model.collision.triBounds[ti].minZ > localPos.z + 5.0f) continue;
 								                const auto& verts = model.collision.vertices;
 								                const auto& idx   = model.collision.indices;
 								                const auto& v0 = verts[idx[ti * 3]];
 								                const auto& v1 = verts[idx[ti * 3 + 1]];
 								                const auto& v2 = verts[idx[ti * 3 + 2]];
 								                // Two-sided: try both windings
 								                float tHit = rayTriangleIntersect(rayOrigin, rayDir, v0, v1, v2);
 								                if (tHit < 0.0f)
 								                    tHit = rayTriangleIntersect(rayOrigin, rayDir, v0, v2, v1);
 								                if (tHit < 0.0f) continue;
 								                float hitZ = rayOrigin.z - tHit;
 								                // Walkable normal check (world space)
-												Add steep slope limiting to prevent terrain clipping

Added slope normal checking to reject surfaces too steep to walk.
Prevents character/mount from clipping through steep terrain.

Changes:
- Added MIN_WALKABLE_NORMAL threshold (0.7 = ~45° max slope)
- WMO collision: query surface normal, reject if normalZ < 0.7
- M2 collision: query surface normal, reject if normalZ < 0.7
- Updated M2Renderer::getFloorHeight to output surface normal
- M2 already had internal 0.35 check (~70°), new 0.7 is more restrictive

Steep slopes now block movement instead of allowing clipping.

											
										
										
											2026-02-10 20:45:25 -08:00
+								                glm::vec3 worldN(0.0f, 0.0f, 1.0f);  // Default to flat
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								                glm::vec3 localN = glm::cross(v1 - v0, v2 - v0);
 								                float nLen = glm::length(localN);
 								                if (nLen > 0.001f) {
 								                    localN /= nLen;
 								                    if (localN.z < 0.0f) localN = -localN;
-												Add steep slope limiting to prevent terrain clipping

Added slope normal checking to reject surfaces too steep to walk.
Prevents character/mount from clipping through steep terrain.

Changes:
- Added MIN_WALKABLE_NORMAL threshold (0.7 = ~45° max slope)
- WMO collision: query surface normal, reject if normalZ < 0.7
- M2 collision: query surface normal, reject if normalZ < 0.7
- Updated M2Renderer::getFloorHeight to output surface normal
- M2 already had internal 0.35 check (~70°), new 0.7 is more restrictive

Steep slopes now block movement instead of allowing clipping.

											
										
										
											2026-02-10 20:45:25 -08:00
+								                    worldN = glm::normalize(
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								                        glm::vec3(instance.modelMatrix * glm::vec4(localN, 0.0f)));
-												Fix stair approach fall-through and relax steep slope climbing

Relaxed walkable slope threshold from 0.40 to 0.35 (~70° max) for
steeper stair climbing. Tightened WMO floor cache above-tolerance
back to 0.25 units to prevent cached stair landing from overriding
approach floor. Added M2 floor preference for ship decks to prevent
falling through to water below.

											
										
										
											2026-02-08 20:31:00 -08:00
+								                    if (std::abs(worldN.z) < 0.35f) continue; // too steep (~70° max slope)
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								                }
 								                if (hitZ <= localPos.z + 3.0f && hitZ > bestHitZ) {
 								                    bestHitZ = hitZ;
 								                    hitAny = true;
-												Add steep slope limiting to prevent terrain clipping

Added slope normal checking to reject surfaces too steep to walk.
Prevents character/mount from clipping through steep terrain.

Changes:
- Added MIN_WALKABLE_NORMAL threshold (0.7 = ~45° max slope)
- WMO collision: query surface normal, reject if normalZ < 0.7
- M2 collision: query surface normal, reject if normalZ < 0.7
- Updated M2Renderer::getFloorHeight to output surface normal
- M2 already had internal 0.35 check (~70°), new 0.7 is more restrictive

Steep slopes now block movement instead of allowing clipping.

											
										
										
											2026-02-10 20:45:25 -08:00
+								                    bestNormalZ = std::abs(worldN.z);  // Store normal for output
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								                }
 								            }
 								            if (hitAny) {
 								                glm::vec3 localHit(localPos.x, localPos.y, bestHitZ);
 								                glm::vec3 worldHit = glm::vec3(instance.modelMatrix * glm::vec4(localHit, 1.0f));
 								                if (worldHit.z <= glZ + 3.0f && (!bestFloor || worldHit.z > *bestFloor)) {
 								                    bestFloor = worldHit.z;
 								                }
 								            }
 								            // Fall through to AABB floor — both contribute, highest wins
 								        }
-												Improve targeting, minimap, and bridge collisions

											
										
										
											2026-02-07 20:51:53 -08:00
+								        float zMargin = model.collisionBridge ? 25.0f : 2.0f;
 								        if (glX < instance.worldBoundsMin.x || glX > instance.worldBoundsMax.x ||
 								            glY < instance.worldBoundsMin.y || glY > instance.worldBoundsMax.y ||
 								            glZ < instance.worldBoundsMin.z - zMargin || glZ > instance.worldBoundsMax.z + zMargin) {
 								            continue;
 								        }
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        glm::vec3 localMin, localMax;
 								        getTightCollisionBounds(model, localMin, localMax);
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								        glm::vec3 localPos = glm::vec3(instance.invModelMatrix * glm::vec4(glX, glY, glZ, 1.0f));
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
 								        // Must be within doodad footprint in local XY.
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        // Stepped low platforms get a small pad so walk-up snapping catches edges.
 								        float footprintPad = 0.0f;
 								        if (model.collisionSteppedLowPlatform) {
 								            footprintPad = model.collisionPlanter ? 0.22f : 0.16f;
-												Improve targeting, minimap, and bridge collisions

											
										
										
											2026-02-07 20:51:53 -08:00
+								            if (model.collisionBridge) {
 								                footprintPad = 0.35f;
 								            }
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        }
 								        if (localPos.x < localMin.x - footprintPad || localPos.x > localMax.x + footprintPad ||
 								            localPos.y < localMin.y - footprintPad || localPos.y > localMax.y + footprintPad) {
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								            continue;
 								        }
 								        // Construct "top" point at queried XY in local space, then transform back.
-												Add stepped fountain collision for parkour-style climbing

											
										
										
											2026-02-03 16:28:33 -08:00
+								        float localTopZ = getEffectiveCollisionTopLocal(model, localPos, localMin, localMax);
 								        glm::vec3 localTop(localPos.x, localPos.y, localTopZ);
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        glm::vec3 worldTop = glm::vec3(instance.modelMatrix * glm::vec4(localTop, 1.0f));
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								        // Reachability filter: allow a bit more climb for stepped low platforms.
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        float maxStepUp = 1.0f;
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								        if (model.collisionStatue) {
 								            maxStepUp = 2.5f;
 								        } else if (model.collisionSmallSolidProp) {
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								            maxStepUp = 2.0f;
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								        } else if (model.collisionSteppedFountain) {
 								            maxStepUp = 2.5f;
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        } else if (model.collisionSteppedLowPlatform) {
 								            maxStepUp = model.collisionPlanter ? 3.0f : 2.4f;
-												Improve targeting, minimap, and bridge collisions

											
										
										
											2026-02-07 20:51:53 -08:00
+								            if (model.collisionBridge) {
 								                maxStepUp = 25.0f;
 								            }
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        }
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								        if (worldTop.z > glZ + maxStepUp) continue;
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
 								        if (!bestFloor || worldTop.z > *bestFloor) {
 								            bestFloor = worldTop.z;
 								        }
 								    }
-												Add steep slope limiting to prevent terrain clipping

Added slope normal checking to reject surfaces too steep to walk.
Prevents character/mount from clipping through steep terrain.

Changes:
- Added MIN_WALKABLE_NORMAL threshold (0.7 = ~45° max slope)
- WMO collision: query surface normal, reject if normalZ < 0.7
- M2 collision: query surface normal, reject if normalZ < 0.7
- Updated M2Renderer::getFloorHeight to output surface normal
- M2 already had internal 0.35 check (~70°), new 0.7 is more restrictive

Steep slopes now block movement instead of allowing clipping.

											
										
										
											2026-02-10 20:45:25 -08:00
+								    // Output surface normal if requested
 								    if (outNormalZ) {
 								        *outNormalZ = bestNormalZ;
 								    }
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								    return bestFloor;
 								}
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								bool M2Renderer::checkCollision(const glm::vec3& from, const glm::vec3& to,
 								                                 glm::vec3& adjustedPos, float playerRadius) const {
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    QueryTimer timer(&queryTimeMs, &queryCallCount);
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								    adjustedPos = to;
 								    bool collided = false;
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    glm::vec3 queryMin = glm::min(from, to) - glm::vec3(7.0f, 7.0f, 5.0f);
 								    glm::vec3 queryMax = glm::max(from, to) + glm::vec3(7.0f, 7.0f, 5.0f);
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								    gatherCandidates(queryMin, queryMax, tl_m2_candidateScratch);
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								    // Check against all M2 instances in local space (rotation-aware).
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								    for (size_t idx : tl_m2_candidateScratch) {
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								        const auto& instance = instances[idx];
 								        if (collisionFocusEnabled &&
 								            pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) {
 								            continue;
 								        }
 								        const float broadMargin = playerRadius + 1.0f;
 								        if (from.x < instance.worldBoundsMin.x - broadMargin && adjustedPos.x < instance.worldBoundsMin.x - broadMargin) continue;
 								        if (from.x > instance.worldBoundsMax.x + broadMargin && adjustedPos.x > instance.worldBoundsMax.x + broadMargin) continue;
 								        if (from.y < instance.worldBoundsMin.y - broadMargin && adjustedPos.y < instance.worldBoundsMin.y - broadMargin) continue;
 								        if (from.y > instance.worldBoundsMax.y + broadMargin && adjustedPos.y > instance.worldBoundsMax.y + broadMargin) continue;
 								        if (from.z > instance.worldBoundsMax.z + 2.5f && adjustedPos.z > instance.worldBoundsMax.z + 2.5f) continue;
 								        if (from.z + 2.5f < instance.worldBoundsMin.z && adjustedPos.z + 2.5f < instance.worldBoundsMin.z) continue;
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        if (!instance.cachedModel) continue;
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        const M2ModelGPU& model = *instance.cachedModel;
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								        if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue;
-												Fix invisible walls from WMO doodad M2 collision and MOPY filtering

WMO interior doodads (gears, decorations) were blocking player movement
via M2 collision. Skip collision for all WMO doodad M2 instances since
the WMO itself handles wall collision.

Also filter WMO wall collision using MOPY per-triangle flags: only
rendered+collidable triangles block the player, skipping invisible
collision hulls.

Revert tram portal extended range (no longer needed with collision fix).

											
										
										
											2026-03-06 12:26:17 -08:00
+								        if (instance.skipCollision) continue;
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        if (instance.scale <= 0.001f) continue;
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								        // --- Mesh-based wall collision: closest-point push ---
 								        if (model.collision.valid()) {
 								            glm::vec3 localFrom = glm::vec3(instance.invModelMatrix * glm::vec4(from, 1.0f));
 								            glm::vec3 localPos  = glm::vec3(instance.invModelMatrix * glm::vec4(adjustedPos, 1.0f));
 								            float localRadius = playerRadius / instance.scale;
 								            model.collision.getWallTrisInRange(
 								                std::min(localFrom.x, localPos.x) - localRadius - 1.0f,
 								                std::min(localFrom.y, localPos.y) - localRadius - 1.0f,
 								                std::max(localFrom.x, localPos.x) + localRadius + 1.0f,
 								                std::max(localFrom.y, localPos.y) + localRadius + 1.0f,
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								                tl_m2_collisionTriScratch);
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
 								            constexpr float PLAYER_HEIGHT = 2.0f;
 								            constexpr float MAX_TOTAL_PUSH = 0.02f; // Cap total push per instance
 								            bool pushed = false;
 								            float totalPushX = 0.0f, totalPushY = 0.0f;
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								            for (uint32_t ti : tl_m2_collisionTriScratch) {
-												Add M2 collision mesh parsing and mesh-based wall/floor collision

Parse bounding vertices, triangles, and normals from M2 files and use
them for proper triangle-level collision instead of AABB heuristics.
Spatial grid bucketing for efficient queries, closest-point wall push
with soft clamping, and ray-triangle floor detection alongside existing
AABB fallback.

											
										
										
											2026-02-08 19:56:17 -08:00
+								                if (ti >= model.collision.triCount) continue;
 								                if (localPos.z + PLAYER_HEIGHT < model.collision.triBounds[ti].minZ ||
 								                    localPos.z > model.collision.triBounds[ti].maxZ) continue;
 								                // Step-up: only skip wall when player is rising (jumping over it)
 								                constexpr float MAX_STEP_UP = 1.2f;
 								                bool rising = (localPos.z > localFrom.z + 0.05f);
 								                if (rising && localPos.z + MAX_STEP_UP >= model.collision.triBounds[ti].maxZ) continue;
 								                // Early out if we already pushed enough this instance
 								                float totalPushSoFar = std::sqrt(totalPushX * totalPushX + totalPushY * totalPushY);
 								                if (totalPushSoFar >= MAX_TOTAL_PUSH) break;
 								                const auto& verts = model.collision.vertices;
 								                const auto& idx   = model.collision.indices;
 								                const auto& v0 = verts[idx[ti * 3]];
 								                const auto& v1 = verts[idx[ti * 3 + 1]];
 								                const auto& v2 = verts[idx[ti * 3 + 2]];
 								                glm::vec3 closest = closestPointOnTriangle(localPos, v0, v1, v2);
 								                glm::vec3 diff = localPos - closest;
 								                float distXY = std::sqrt(diff.x * diff.x + diff.y * diff.y);
 								                if (distXY < localRadius && distXY > 1e-4f) {
 								                    // Gentle push — very small fraction of penetration
 								                    float penetration = localRadius - distXY;
 								                    float pushDist = std::clamp(penetration * 0.08f, 0.001f, 0.015f);
 								                    float dx = (diff.x / distXY) * pushDist;
 								                    float dy = (diff.y / distXY) * pushDist;
 								                    localPos.x += dx;
 								                    localPos.y += dy;
 								                    totalPushX += dx;
 								                    totalPushY += dy;
 								                    pushed = true;
 								                } else if (distXY < 1e-4f) {
 								                    // On the plane — soft push along triangle normal XY
 								                    glm::vec3 n = glm::cross(v1 - v0, v2 - v0);
 								                    float nxyLen = std::sqrt(n.x * n.x + n.y * n.y);
 								                    if (nxyLen > 1e-4f) {
 								                        float pushDist = std::min(localRadius, 0.015f);
 								                        float dx = (n.x / nxyLen) * pushDist;
 								                        float dy = (n.y / nxyLen) * pushDist;
 								                        localPos.x += dx;
 								                        localPos.y += dy;
 								                        totalPushX += dx;
 								                        totalPushY += dy;
 								                        pushed = true;
 								                    }
 								                }
 								            }
 								            if (pushed) {
 								                glm::vec3 worldPos = glm::vec3(instance.modelMatrix * glm::vec4(localPos, 1.0f));
 								                adjustedPos.x = worldPos.x;
 								                adjustedPos.y = worldPos.y;
 								                collided = true;
 								            }
 								            continue;
 								        }
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								        glm::vec3 localFrom = glm::vec3(instance.invModelMatrix * glm::vec4(from, 1.0f));
 								        glm::vec3 localPos = glm::vec3(instance.invModelMatrix * glm::vec4(adjustedPos, 1.0f));
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        float radiusScale = model.collisionNarrowVerticalProp ? 0.45f : 1.0f;
 								        float localRadius = (playerRadius * radiusScale) / instance.scale;
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
-												Add stepped fountain collision for parkour-style climbing

											
										
										
											2026-02-03 16:28:33 -08:00
+								        glm::vec3 rawMin, rawMax;
 								        getTightCollisionBounds(model, rawMin, rawMax);
 								        glm::vec3 localMin = rawMin - glm::vec3(localRadius);
 								        glm::vec3 localMax = rawMax + glm::vec3(localRadius);
 								        float effectiveTop = getEffectiveCollisionTopLocal(model, localPos, rawMin, rawMax) + localRadius;
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        glm::vec2 localCenter((localMin.x + localMax.x) * 0.5f, (localMin.y + localMax.y) * 0.5f);
 								        float fromR = glm::length(glm::vec2(localFrom.x, localFrom.y) - localCenter);
 								        float toR = glm::length(glm::vec2(localPos.x, localPos.y) - localCenter);
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        // Feet-based vertical overlap test: ignore objects fully above/below us.
 								        constexpr float PLAYER_HEIGHT = 2.0f;
-												Add stepped fountain collision for parkour-style climbing

											
										
										
											2026-02-03 16:28:33 -08:00
+								        if (localPos.z + PLAYER_HEIGHT < localMin.z || localPos.z > effectiveTop) {
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								            continue;
 								        }
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        bool fromInsideXY =
 								            (localFrom.x >= localMin.x && localFrom.x <= localMax.x &&
 								             localFrom.y >= localMin.y && localFrom.y <= localMax.y);
 								        bool fromInsideZ = (localFrom.z + PLAYER_HEIGHT >= localMin.z && localFrom.z <= effectiveTop);
 								        bool escapingOverlap = (fromInsideXY && fromInsideZ && (toR > fromR + 1e-4f));
 								        bool allowEscapeRelax = escapingOverlap && !model.collisionSmallSolidProp;
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								        // Swept hard clamp for taller blockers only.
 								        // Low/stepable objects should be climbable and not "shove" the player off.
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        float maxStepUp = 1.20f;
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								        if (model.collisionStatue) {
 								            maxStepUp = 2.5f;
 								        } else if (model.collisionSmallSolidProp) {
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								            // Keep box/crate-class props hard-solid to prevent phase-through.
 								            maxStepUp = 0.75f;
-												Add spellbook, fix WMO floor clipping, and polish UI/visuals

- Add spellbook screen (P key) with Spell.dbc name lookup and action bar assignment
- Default Attack and Hearthstone spells available in single player
- Fix WMO floor clipping (gryphon roost) by tightening ceiling rejection threshold
- Darken ocean water, increase wave motion and opacity
- Add M2 model distance fade-in to prevent pop-in
- Reposition chat window, add slash/enter key focus
- Remove debug key commands (keep only F1 perf HUD, N minimap)
- Performance: return chat history by const ref, use deque for O(1) pop_front

											
										
										
											2026-02-04 11:31:08 -08:00
+								        } else if (model.collisionSteppedFountain) {
 								            maxStepUp = 2.5f;
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        } else if (model.collisionSteppedLowPlatform) {
 								            maxStepUp = model.collisionPlanter ? 2.8f : 2.4f;
-												Improve targeting, minimap, and bridge collisions

											
										
										
											2026-02-07 20:51:53 -08:00
+								            if (model.collisionBridge) {
 								                maxStepUp = 25.0f;
 								            }
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        }
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								        bool stepableLowObject = (effectiveTop <= localFrom.z + maxStepUp);
 								        bool climbingAttempt = (localPos.z > localFrom.z + 0.18f);
 								        bool nearTop = (localFrom.z >= effectiveTop - 0.30f);
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        float climbAllowance = model.collisionPlanter ? 0.95f : 0.60f;
 								        if (model.collisionSteppedLowPlatform && !model.collisionPlanter) {
 								            // Let low curb/planter blocks be stepable without sticky side shoves.
 								            climbAllowance = 1.00f;
 								        }
-												Improve targeting, minimap, and bridge collisions

											
										
										
											2026-02-07 20:51:53 -08:00
+								        if (model.collisionBridge) {
 								            climbAllowance = 3.0f;
 								        }
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        if (model.collisionSmallSolidProp) {
 								            climbAllowance = 1.05f;
 								        }
 								        bool climbingTowardTop = climbingAttempt && (localFrom.z + climbAllowance >= effectiveTop);
 								        bool forceHardLateral =
 								            model.collisionSmallSolidProp &&
 								            !nearTop && !climbingTowardTop;
 								        if ((!stepableLowObject || forceHardLateral) && !allowEscapeRelax) {
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								            float tEnter = 0.0f;
-												Add stepped fountain collision for parkour-style climbing

											
										
										
											2026-02-03 16:28:33 -08:00
+								            glm::vec3 sweepMax = localMax;
 								            sweepMax.z = std::min(sweepMax.z, effectiveTop);
 								            if (segmentIntersectsAABB(localFrom, localPos, localMin, sweepMax, tEnter)) {
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								                float tSafe = std::clamp(tEnter - 0.03f, 0.0f, 1.0f);
 								                glm::vec3 localSafe = localFrom + (localPos - localFrom) * tSafe;
 								                glm::vec3 worldSafe = glm::vec3(instance.modelMatrix * glm::vec4(localSafe, 1.0f));
 								                adjustedPos.x = worldSafe.x;
 								                adjustedPos.y = worldSafe.y;
 								                collided = true;
 								                continue;
 								            }
 								        }
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        if (localPos.x < localMin.x || localPos.x > localMax.x ||
 								            localPos.y < localMin.y || localPos.y > localMax.y) {
 								            continue;
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								        }
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
 								        float pushLeft  = localPos.x - localMin.x;
 								        float pushRight = localMax.x - localPos.x;
 								        float pushBack  = localPos.y - localMin.y;
 								        float pushFront = localMax.y - localPos.y;
 								        float minPush = std::min({pushLeft, pushRight, pushBack, pushFront});
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        if (allowEscapeRelax) {
 								            continue;
 								        }
-												Soften WMO wall pushback and fix fountain climbing

Cap WMO swept wall collision pushback to 0.15 units (was 0.55) so walls
stop the player without violent shoves. Fix M2 stepped fountain lateral
push using effectiveTop instead of rawMax.z so the near-top check matches
the stepped profile height at the player's radial position.

											
										
										
											2026-02-08 18:53:56 -08:00
+								        if (stepableLowObject && localFrom.z >= effectiveTop - 0.35f) {
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								            // Already on/near top surface: don't apply lateral push that ejects
-												Fix M2 interior lighting and carpet sliding

- M2 interior darkening now uses global player-inside-WMO flag instead
  of per-instance queries that were unreliable
- Fix carpet/rug sliding by skipping lateral collision push when player
  is standing on top of any stepable low object, not just platforms

											
										
										
											2026-02-07 17:05:30 -08:00
+								            // the player from the object (carpets, platforms, etc).
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								            continue;
 								        }
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								        // Gentle fallback push for overlapping cases.
 								        float pushAmount;
-												Tune collision behavior for ramps, props, and structural walls

											
										
										
											2026-02-03 19:10:22 -08:00
+								        if (model.collisionNarrowVerticalProp) {
 								            pushAmount = std::clamp(minPush * 0.10f, 0.001f, 0.010f);
 								        } else if (model.collisionSteppedLowPlatform) {
-												Improve performance and tune ramp/planter collision behavior

											
										
										
											2026-02-03 17:21:04 -08:00
+								            if (model.collisionPlanter && stepableLowObject) {
 								                pushAmount = std::clamp(minPush * 0.06f, 0.001f, 0.006f);
 								            } else {
 								            pushAmount = std::clamp(minPush * 0.12f, 0.003f, 0.012f);
 								            }
-												Tune planter curb collision and reduce foliage pushback

											
										
										
											2026-02-03 16:51:25 -08:00
+								        } else if (stepableLowObject) {
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								            pushAmount = std::clamp(minPush * 0.12f, 0.002f, 0.015f);
 								        } else {
 								            pushAmount = std::clamp(minPush * 0.28f, 0.010f, 0.045f);
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        }
 								        glm::vec3 localPush(0.0f);
 								        if (minPush == pushLeft) {
 								            localPush.x = -pushAmount;
 								        } else if (minPush == pushRight) {
 								            localPush.x = pushAmount;
 								        } else if (minPush == pushBack) {
 								            localPush.y = -pushAmount;
 								        } else {
 								            localPush.y = pushAmount;
 								        }
 								        glm::vec3 worldPush = glm::vec3(instance.modelMatrix * glm::vec4(localPush, 0.0f));
 								        adjustedPos.x += worldPush.x;
 								        adjustedPos.y += worldPush.y;
 								        collided = true;
-												Performance optimizations and collision improvements

Performance:
- Remove expensive inverse() from all vertex shaders (terrain, WMO, M2, water, character)
- Add uniform location caching to avoid repeated glGetUniformLocation calls
- Add proper frustum culling for WMO groups using AABB intersection
- Add distance-based culling for WMO and M2 instances
- Add cleanup of unused M2/WMO models when tiles unload

Collision & Movement:
- Add M2 doodad collision detection (fences, boxes, etc.)
- Reduce character eye height (5.0 -> 1.8) and collision radius (2.5 -> 0.5)
- Enable WoW-style movement speed by default (14 units/sec run, 5 walk, 9 back)
- Fix emote grammar ("You waves." -> "You wave.")

Misc:
- Rename window title to "Wowee"

											
										
										
											2026-02-02 23:03:45 -08:00
+								    }
 								    return collided;
 								}
-												Improve rendering distances, camera collision, and spawn point

- Increase WMO render distance from 1500 to 3000 units for better city loading
- Increase M2 render distance from 500 to 1000 units
- Increase terrain load radius from 4 to 6 tiles (~3200 units)
- Add raycast-based camera collision that zooms in when obstructed by walls/objects
- Move spawn point outside chapel to road near Stormwind gate
- Add ground height smoothing to prevent stumbling on uneven terrain

											
										
										
											2026-02-02 23:18:34 -08:00
+								float M2Renderer::raycastBoundingBoxes(const glm::vec3& origin, const glm::vec3& direction, float maxDistance) const {
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    QueryTimer timer(&queryTimeMs, &queryCallCount);
-												Improve rendering distances, camera collision, and spawn point

- Increase WMO render distance from 1500 to 3000 units for better city loading
- Increase M2 render distance from 500 to 1000 units
- Increase terrain load radius from 4 to 6 tiles (~3200 units)
- Add raycast-based camera collision that zooms in when obstructed by walls/objects
- Move spawn point outside chapel to road near Stormwind gate
- Add ground height smoothing to prevent stumbling on uneven terrain

											
										
										
											2026-02-02 23:18:34 -08:00
+								    float closestHit = maxDistance;
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								    glm::vec3 rayEnd = origin + direction * maxDistance;
 								    glm::vec3 queryMin = glm::min(origin, rayEnd) - glm::vec3(1.0f);
 								    glm::vec3 queryMax = glm::max(origin, rayEnd) + glm::vec3(1.0f);
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								    gatherCandidates(queryMin, queryMax, tl_m2_candidateScratch);
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
-												Parallel animation updates, thread-safe collision, M2 pop-in fix, shadow stabilization

- Overlap M2 and character animation updates via std::async (~2-5ms saved)
- Thread-local collision scratch buffers for concurrent floor queries
- Parallel terrain/WMO/M2 floor queries in camera controller
- Seed new M2 instance bones from existing siblings to eliminate pop-in flash
- Fix shadow flicker: snap center along stable light axes instead of in view space
- Increase shadow distance default to 300 units (slider max 500)

											
										
										
											2026-03-07 22:29:06 -08:00
+								    for (size_t idx : tl_m2_candidateScratch) {
-												Optimize collision queries with spatial grid and improve movement CCD

											
										
										
											2026-02-03 16:21:48 -08:00
+								        const auto& instance = instances[idx];
 								        if (collisionFocusEnabled &&
 								            pointAABBDistanceSq(collisionFocusPos, instance.worldBoundsMin, instance.worldBoundsMax) > collisionFocusRadiusSq) {
 								            continue;
 								        }
 								        // Cheap world-space broad-phase.
 								        float tEnter = 0.0f;
 								        glm::vec3 worldMin = instance.worldBoundsMin - glm::vec3(0.35f);
 								        glm::vec3 worldMax = instance.worldBoundsMax + glm::vec3(0.35f);
 								        if (!segmentIntersectsAABB(origin, origin + direction * maxDistance, worldMin, worldMax, tEnter)) {
 								            continue;
 								        }
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        if (!instance.cachedModel) continue;
-												Improve rendering distances, camera collision, and spawn point

- Increase WMO render distance from 1500 to 3000 units for better city loading
- Increase M2 render distance from 500 to 1000 units
- Increase terrain load radius from 4 to 6 tiles (~3200 units)
- Add raycast-based camera collision that zooms in when obstructed by walls/objects
- Move spawn point outside chapel to road near Stormwind gate
- Add ground height smoothing to prevent stumbling on uneven terrain

											
										
										
											2026-02-02 23:18:34 -08:00
-												Background BLP texture pre-decoding + deferred WMO normal maps (12x streaming perf)

Move CPU-heavy BLP texture decoding from main thread to background worker
threads for all hot paths: terrain M2 models, WMO doodad M2s, WMO textures,
creature models, and gameobject WMOs. Each renderer (M2, WMO, Character) now
accepts a pre-decoded BLP cache that loadTexture() checks before falling back
to synchronous decode.

Defer WMO normal/height map generation (3 per-pixel passes: luminance, box
blur, Sobel) during terrain streaming finalization — this was the dominant
remaining bottleneck after BLP pre-decoding.

Terrain streaming stalls: 1576ms → 124ms worst case.

											
										
										
											2026-03-07 15:46:56 -08:00
+								        const M2ModelGPU& model = *instance.cachedModel;
-												Add 3D level-up effect using LevelUp.m2 spell model

Replace 2D screen-space ding rings with real WoW LevelUp.m2 particle/geometry
effect. Fix FBlock particle color parsing (C3Vector floats, not CImVector bytes)
which was producing blue/red instead of golden yellow. Spell effect models bypass
particle dampeners, glow sprite conversion, Mod→Additive blend override, and all
collision (floor/wall/camera) to prevent camera zoom-in. Other players' level-ups
trigger the 3D effect at their position with group chat notification. F7 hotkey
for testing.

											
										
										
											2026-02-19 20:36:25 -08:00
+								        if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue;
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        glm::vec3 localMin, localMax;
 								        getTightCollisionBounds(model, localMin, localMax);
 								        // Skip tiny doodads for camera occlusion; they cause jitter and false hits.
 								        glm::vec3 extents = (localMax - localMin) * instance.scale;
 								        if (glm::length(extents) < 0.75f) continue;
-												Fix camera occlusion and stabilize WMO/M2 collision behavior

											
										
										
											2026-02-03 16:04:21 -08:00
+								        glm::vec3 localOrigin = glm::vec3(instance.invModelMatrix * glm::vec4(origin, 1.0f));
 								        glm::vec3 localDir = glm::normalize(glm::vec3(instance.invModelMatrix * glm::vec4(direction, 0.0f)));
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        if (!std::isfinite(localDir.x) || !std::isfinite(localDir.y) || !std::isfinite(localDir.z)) {
 								            continue;
 								        }
-												Improve rendering distances, camera collision, and spawn point

- Increase WMO render distance from 1500 to 3000 units for better city loading
- Increase M2 render distance from 500 to 1000 units
- Increase terrain load radius from 4 to 6 tiles (~3200 units)
- Add raycast-based camera collision that zooms in when obstructed by walls/objects
- Move spawn point outside chapel to road near Stormwind gate
- Add ground height smoothing to prevent stumbling on uneven terrain

											
										
										
											2026-02-02 23:18:34 -08:00
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        // Local-space AABB slab intersection.
 								        glm::vec3 invDir = 1.0f / localDir;
 								        glm::vec3 tMin = (localMin - localOrigin) * invDir;
 								        glm::vec3 tMax = (localMax - localOrigin) * invDir;
-												Improve rendering distances, camera collision, and spawn point

- Increase WMO render distance from 1500 to 3000 units for better city loading
- Increase M2 render distance from 500 to 1000 units
- Increase terrain load radius from 4 to 6 tiles (~3200 units)
- Add raycast-based camera collision that zooms in when obstructed by walls/objects
- Move spawn point outside chapel to road near Stormwind gate
- Add ground height smoothing to prevent stumbling on uneven terrain

											
										
										
											2026-02-02 23:18:34 -08:00
+								        glm::vec3 t1 = glm::min(tMin, tMax);
 								        glm::vec3 t2 = glm::max(tMin, tMax);
 								        float tNear = std::max({t1.x, t1.y, t1.z});
 								        float tFar = std::min({t2.x, t2.y, t2.z});
-												Tune collision feel and align M2 movement/camera behavior

											
										
										
											2026-02-03 15:17:54 -08:00
+								        if (tNear > tFar || tFar <= 0.0f) continue;
 								        float tHit = tNear > 0.0f ? tNear : tFar;
 								        glm::vec3 localHit = localOrigin + localDir * tHit;
 								        glm::vec3 worldHit = glm::vec3(instance.modelMatrix * glm::vec4(localHit, 1.0f));
 								        float worldDist = glm::length(worldHit - origin);
 								        if (worldDist > 0.0f && worldDist < closestHit) {
 								            closestHit = worldDist;
-												Improve rendering distances, camera collision, and spawn point

- Increase WMO render distance from 1500 to 3000 units for better city loading
- Increase M2 render distance from 500 to 1000 units
- Increase terrain load radius from 4 to 6 tiles (~3200 units)
- Add raycast-based camera collision that zooms in when obstructed by walls/objects
- Move spawn point outside chapel to road near Stormwind gate
- Add ground height smoothing to prevent stumbling on uneven terrain

											
										
										
											2026-02-02 23:18:34 -08:00
+								        }
 								    }
 								    return closestHit;
 								}
-												Add configurable MSAA anti-aliasing, update auth screen and terrain shader

- MSAA: conditional 2-att (off) vs 3-att (on) render pass with auto-resolve
- MSAA: multisampled color+depth images, query max supported sample count
- MSAA: .setMultisample() on all 25+ main-pass pipelines across 17 renderers
- MSAA: recreatePipelines() on every sub-renderer for runtime MSAA changes
- MSAA: Renderer::setMsaaSamples() orchestrates swapchain+pipeline+ImGui rebuild
- MSAA: Anti-Aliasing combo (Off/2x/4x/8x) in Video settings, persisted
- Update auth screen assets and terrain fragment shader

											
										
										
											2026-02-22 02:59:24 -08:00
+								void M2Renderer::recreatePipelines() {
 								    if (!vkCtx_) return;
 								    VkDevice device = vkCtx_->getDevice();
 								    // Destroy old main-pass pipelines (NOT shadow, NOT pipeline layouts)
 								    if (opaquePipeline_)            { vkDestroyPipeline(device, opaquePipeline_, nullptr); opaquePipeline_ = VK_NULL_HANDLE; }
 								    if (alphaTestPipeline_)         { vkDestroyPipeline(device, alphaTestPipeline_, nullptr); alphaTestPipeline_ = VK_NULL_HANDLE; }
 								    if (alphaPipeline_)             { vkDestroyPipeline(device, alphaPipeline_, nullptr); alphaPipeline_ = VK_NULL_HANDLE; }
 								    if (additivePipeline_)          { vkDestroyPipeline(device, additivePipeline_, nullptr); additivePipeline_ = VK_NULL_HANDLE; }
 								    if (particlePipeline_)          { vkDestroyPipeline(device, particlePipeline_, nullptr); particlePipeline_ = VK_NULL_HANDLE; }
 								    if (particleAdditivePipeline_)  { vkDestroyPipeline(device, particleAdditivePipeline_, nullptr); particleAdditivePipeline_ = VK_NULL_HANDLE; }
 								    if (smokePipeline_)             { vkDestroyPipeline(device, smokePipeline_, nullptr); smokePipeline_ = VK_NULL_HANDLE; }
 								    // --- Load shaders ---
 								    rendering::VkShaderModule m2Vert, m2Frag;
 								    rendering::VkShaderModule particleVert, particleFrag;
 								    rendering::VkShaderModule smokeVert, smokeFrag;
 								    m2Vert.loadFromFile(device, "assets/shaders/m2.vert.spv");
 								    m2Frag.loadFromFile(device, "assets/shaders/m2.frag.spv");
 								    particleVert.loadFromFile(device, "assets/shaders/m2_particle.vert.spv");
 								    particleFrag.loadFromFile(device, "assets/shaders/m2_particle.frag.spv");
 								    smokeVert.loadFromFile(device, "assets/shaders/m2_smoke.vert.spv");
 								    smokeFrag.loadFromFile(device, "assets/shaders/m2_smoke.frag.spv");
 								    if (!m2Vert.isValid() || !m2Frag.isValid()) {
 								        LOG_ERROR("M2Renderer::recreatePipelines: missing required shaders");
 								        return;
 								    }
 								    VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();
 								    // --- M2 model vertex input ---
 								    VkVertexInputBindingDescription m2Binding{};
 								    m2Binding.binding = 0;
 								    m2Binding.stride = 18 * sizeof(float);
 								    m2Binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
 								    std::vector<VkVertexInputAttributeDescription> m2Attrs = {
 								        {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},                     // position
 								        {1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)},     // normal
 								        {2, 0, VK_FORMAT_R32G32_SFLOAT, 6 * sizeof(float)},        // texCoord0
 								        {5, 0, VK_FORMAT_R32G32_SFLOAT, 8 * sizeof(float)},        // texCoord1
 								        {3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 10 * sizeof(float)}, // boneWeights
 								        {4, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)}, // boneIndices (float)
 								    };
 								    auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite) -> VkPipeline {
 								        return PipelineBuilder()
 								            .setShaders(m2Vert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
 								                        m2Frag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
 								            .setVertexInput({m2Binding}, m2Attrs)
 								            .setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
 								            .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
 								            .setDepthTest(true, depthWrite, VK_COMPARE_OP_LESS_OR_EQUAL)
 								            .setColorBlendAttachment(blendState)
 								            .setMultisample(vkCtx_->getMsaaSamples())
 								            .setLayout(pipelineLayout_)
 								            .setRenderPass(mainPass)
 								            .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
 								            .build(device);
 								    };
 								    opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true);
 								    alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true);
 								    alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false);
 								    additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false);
 								    // --- Particle pipelines ---
 								    if (particleVert.isValid() && particleFrag.isValid()) {
 								        VkVertexInputBindingDescription pBind{};
 								        pBind.binding = 0;
 								        pBind.stride = 9 * sizeof(float); // pos3 + color4 + size1 + tile1
 								        pBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
 								        std::vector<VkVertexInputAttributeDescription> pAttrs = {
 								            {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},                    // position
 								            {1, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 3 * sizeof(float)}, // color
 								            {2, 0, VK_FORMAT_R32_SFLOAT, 7 * sizeof(float)},          // size
 								            {3, 0, VK_FORMAT_R32_SFLOAT, 8 * sizeof(float)},          // tile
 								        };
 								        auto buildParticlePipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
 								            return PipelineBuilder()
 								                .setShaders(particleVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
 								                            particleFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
 								                .setVertexInput({pBind}, pAttrs)
 								                .setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
 								                .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
 								                .setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
 								                .setColorBlendAttachment(blend)
 								                .setMultisample(vkCtx_->getMsaaSamples())
 								                .setLayout(particlePipelineLayout_)
 								                .setRenderPass(mainPass)
 								                .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
 								                .build(device);
 								        };
 								        particlePipeline_ = buildParticlePipeline(PipelineBuilder::blendAlpha());
 								        particleAdditivePipeline_ = buildParticlePipeline(PipelineBuilder::blendAdditive());
 								    }
 								    // --- Smoke pipeline ---
 								    if (smokeVert.isValid() && smokeFrag.isValid()) {
 								        VkVertexInputBindingDescription sBind{};
 								        sBind.binding = 0;
 								        sBind.stride = 6 * sizeof(float); // pos3 + lifeRatio1 + size1 + isSpark1
 								        sBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
 								        std::vector<VkVertexInputAttributeDescription> sAttrs = {
 								            {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0},           // position
 								            {1, 0, VK_FORMAT_R32_SFLOAT, 3 * sizeof(float)}, // lifeRatio
 								            {2, 0, VK_FORMAT_R32_SFLOAT, 4 * sizeof(float)}, // size
 								            {3, 0, VK_FORMAT_R32_SFLOAT, 5 * sizeof(float)}, // isSpark
 								        };
 								        smokePipeline_ = PipelineBuilder()
 								            .setShaders(smokeVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
 								                        smokeFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
 								            .setVertexInput({sBind}, sAttrs)
 								            .setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
 								            .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
 								            .setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
 								            .setColorBlendAttachment(PipelineBuilder::blendAlpha())
 								            .setMultisample(vkCtx_->getMsaaSamples())
 								            .setLayout(smokePipelineLayout_)
 								            .setRenderPass(mainPass)
 								            .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
 								            .build(device);
 								    }
 								    m2Vert.destroy(); m2Frag.destroy();
 								    particleVert.destroy(); particleFrag.destroy();
 								    smokeVert.destroy(); smokeFrag.destroy();
 								    core::Logger::getInstance().info("M2Renderer: pipelines recreated");
 								}
-												Initial commit: wowee native WoW 3.3.5a client

											
										
										
											2026-02-02 12:24:50 -08:00
+								} // namespace rendering
 								} // namespace wowee