Fix missing floors in dungeon instances by restricting LOD detection

Low-vertex groups (<100 verts) were incorrectly marked as distance-only LOD shells in small WMOs like Stockades. Now only applies this heuristic to large WMOs (50+ groups) where it's needed for city exterior shells.
Per-instance NPC hair/skin textures, fix binary search float comparison
2026-04-17 09:33:51 +00:00 · 2026-03-04 09:25:00 -08:00 · 2026-03-04 09:19:02 -08:00 · 2026-03-04 08:33:56 -08:00 · 2026-03-04 08:28:21 -08:00 · 2026-03-04 08:17:32 -08:00
10 changed files with 231 additions and 92 deletions
--- a/assets/shaders/m2.frag.glsl
+++ b/assets/shaders/m2.frag.glsl
@ -34,6 +34,7 @@ layout(location = 1) in vec3 Normal;
 layout(location = 2) in vec2 TexCoord;
 layout(location = 3) flat in vec3 InstanceOrigin;
 layout(location = 4) in float ModelHeight;
+layout(location = 5) in float vFadeAlpha;

 layout(location = 0) out vec4 outColor;

@ -175,16 +176,16 @@ void main() {
    float fogFactor = clamp((fogParams.y - dist) / (fogParams.y - fogParams.x), 0.0, 1.0);
    result = mix(fogColor.rgb, result, fogFactor);

-    float outAlpha = texColor.a * fadeAlpha;
+    float outAlpha = texColor.a * vFadeAlpha;
    // Cutout materials should not remain partially transparent after discard,
    // otherwise foliage cards look view-dependent.
    if (alphaTest != 0 || colorKeyBlack != 0) {
-        outAlpha = fadeAlpha;
+        outAlpha = vFadeAlpha;
    }
    // Foliage cutout should stay opaque after alpha discard to avoid
    // view-angle translucency artifacts.
    if (alphaTest == 2 || alphaTest == 3) {
-        outAlpha = 1.0 * fadeAlpha;
+        outAlpha = 1.0 * vFadeAlpha;
    }
    outColor = vec4(result, outAlpha);
 }
--- a/assets/shaders/m2.frag.spv
+++ b/assets/shaders/m2.frag.spv
--- a/assets/shaders/m2.vert.glsl
+++ b/assets/shaders/m2.vert.glsl
@ -19,6 +19,7 @@ layout(push_constant) uniform Push {
    int texCoordSet;
    int useBones;
    int isFoliage;
+    float fadeAlpha;
 } push;

 layout(set = 2, binding = 0) readonly buffer BoneSSBO {
@ -37,6 +38,7 @@ layout(location = 1) out vec3 Normal;
 layout(location = 2) out vec2 TexCoord;
 layout(location = 3) flat out vec3 InstanceOrigin;
 layout(location = 4) out float ModelHeight;
+layout(location = 5) out float vFadeAlpha;

 void main() {
    vec4 pos = vec4(aPos, 1.0);
@ -86,6 +88,7 @@ void main() {

    InstanceOrigin = push.model[3].xyz;
    ModelHeight = pos.z;
+    vFadeAlpha = push.fadeAlpha;

    gl_Position = projection * view * worldPos;
 }
--- a/assets/shaders/m2.vert.spv
+++ b/assets/shaders/m2.vert.spv
--- a/include/rendering/character_renderer.hpp
+++ b/include/rendering/character_renderer.hpp
@ -178,6 +178,9 @@ private:
        bool hasOverrideModelMatrix = false;
        glm::mat4 overrideModelMatrix{1.0f};

+        // Bone update throttling (skip frames for distant characters)
+        uint32_t boneUpdateCounter = 0;
+
        // Per-instance bone SSBO (double-buffered per frame)
        VkBuffer boneBuffer[2] = {};
        VmaAllocation boneAlloc[2] = {};
--- a/include/rendering/m2_renderer.hpp
+++ b/include/rendering/m2_renderer.hpp
@ -179,6 +179,9 @@ struct M2Instance {
    bool cachedDisableAnimation = false;
    bool cachedIsSmoke = false;
    bool cachedHasParticleEmitters = false;
+    bool cachedIsGroundDetail = false;
+    bool cachedIsInvisibleTrap = false;
+    bool cachedIsValid = false;
    float cachedBoundRadius = 0.0f;

    // Frame-skip optimization (update distant animations less frequently)
@ -371,6 +374,11 @@ private:
    ::VkBuffer m2ParticleVB_ = VK_NULL_HANDLE;
    VmaAllocation m2ParticleVBAlloc_ = VK_NULL_HANDLE;
    void* m2ParticleVBMapped_ = nullptr;
+    // Dedicated glow sprite vertex buffer (separate from particle VB to avoid data race)
+    static constexpr size_t MAX_GLOW_SPRITES = 2000;
+    ::VkBuffer glowVB_ = VK_NULL_HANDLE;
+    VmaAllocation glowVBAlloc_ = VK_NULL_HANDLE;
+    void* glowVBMapped_ = nullptr;

    std::unordered_map<uint32_t, M2ModelGPU> models;
    std::vector<M2Instance> instances;
@ -477,6 +485,7 @@ private:
    // Cached camera state from update() for frustum-culling bones
    glm::vec3 cachedCamPos_ = glm::vec3(0.0f);
    float cachedMaxRenderDistSq_ = 0.0f;
+    float smoothedRenderDist_ = 1000.0f;  // Smoothed render distance to prevent flickering

    // Thread count for parallel bone animation
    uint32_t numAnimThreads_ = 1;
--- a/src/core/application.cpp
+++ b/src/core/application.cpp
@ -4790,6 +4790,77 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x
        return;
    }

+    // Per-instance hair/skin texture overrides — runs for ALL NPCs (including cached models)
+    // so that each NPC gets its own hair/skin color regardless of model sharing.
+    {
+        auto itDD = displayDataMap_.find(displayId);
+        if (itDD != displayDataMap_.end() && itDD->second.extraDisplayId != 0) {
+            auto itExtra2 = humanoidExtraMap_.find(itDD->second.extraDisplayId);
+            if (itExtra2 != humanoidExtraMap_.end()) {
+                const auto& extra = itExtra2->second;
+                const auto* md = charRenderer->getModelData(modelId);
+                if (md) {
+                    auto charSectionsDbc2 = assetManager->loadDBC("CharSections.dbc");
+                    if (charSectionsDbc2) {
+                        const auto* csL = pipeline::getActiveDBCLayout()
+                            ? pipeline::getActiveDBCLayout()->getLayout("CharSections") : nullptr;
+                        uint32_t tgtRace = static_cast<uint32_t>(extra.raceId);
+                        uint32_t tgtSex = static_cast<uint32_t>(extra.sexId);
+
+                        // Look up hair texture (section 3)
+                        for (uint32_t r = 0; r < charSectionsDbc2->getRecordCount(); r++) {
+                            uint32_t rId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["RaceID"] : 1);
+                            uint32_t sId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["SexID"] : 2);
+                            if (rId != tgtRace || sId != tgtSex) continue;
+                            uint32_t sec = charSectionsDbc2->getUInt32(r, csL ? (*csL)["BaseSection"] : 3);
+                            if (sec != 3) continue;
+                            uint32_t var = charSectionsDbc2->getUInt32(r, csL ? (*csL)["VariationIndex"] : 4);
+                            uint32_t col = charSectionsDbc2->getUInt32(r, csL ? (*csL)["ColorIndex"] : 5);
+                            if (var != static_cast<uint32_t>(extra.hairStyleId)) continue;
+                            if (col != static_cast<uint32_t>(extra.hairColorId)) continue;
+                            std::string hairPath = charSectionsDbc2->getString(r, csL ? (*csL)["Texture1"] : 6);
+                            if (!hairPath.empty()) {
+                                rendering::VkTexture* hairTex = charRenderer->loadTexture(hairPath);
+                                if (hairTex) {
+                                    for (size_t ti = 0; ti < md->textures.size(); ti++) {
+                                        if (md->textures[ti].type == 6) {
+                                            charRenderer->setTextureSlotOverride(instanceId, static_cast<uint16_t>(ti), hairTex);
+                                        }
+                                    }
+                                }
+                            }
+                            break;
+                        }
+
+                        // Look up skin texture (section 0) for per-instance skin color
+                        for (uint32_t r = 0; r < charSectionsDbc2->getRecordCount(); r++) {
+                            uint32_t rId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["RaceID"] : 1);
+                            uint32_t sId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["SexID"] : 2);
+                            if (rId != tgtRace || sId != tgtSex) continue;
+                            uint32_t sec = charSectionsDbc2->getUInt32(r, csL ? (*csL)["BaseSection"] : 3);
+                            if (sec != 0) continue;
+                            uint32_t col = charSectionsDbc2->getUInt32(r, csL ? (*csL)["ColorIndex"] : 5);
+                            if (col != static_cast<uint32_t>(extra.skinId)) continue;
+                            std::string skinPath = charSectionsDbc2->getString(r, csL ? (*csL)["Texture1"] : 6);
+                            if (!skinPath.empty()) {
+                                rendering::VkTexture* skinTex = charRenderer->loadTexture(skinPath);
+                                if (skinTex) {
+                                    for (size_t ti = 0; ti < md->textures.size(); ti++) {
+                                        uint32_t tt = md->textures[ti].type;
+                                        if (tt == 1 || tt == 11) {
+                                            charRenderer->setTextureSlotOverride(instanceId, static_cast<uint16_t>(ti), skinTex);
+                                        }
+                                    }
+                                }
+                            }
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
    // Optional humanoid NPC geoset mask. Disabled by default because forcing geosets
    // causes long-standing visual artifacts on some models (missing waist, phantom
    // bracers, flickering apron overlays). Prefer model defaults.
--- a/src/rendering/character_renderer.cpp
+++ b/src/rendering/character_renderer.cpp
@ -1423,20 +1423,53 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
    }

    // Only update animations for nearby characters (performance optimization)
-    // Collect instances that need updates
+    // Collect instances that need bone recomputation, with distance-based throttling
    std::vector<std::reference_wrapper<CharacterInstance>> toUpdate;
    toUpdate.reserve(instances.size());

    for (auto& pair : instances) {
-        float distSq = glm::distance2(pair.second.position, cameraPos);
-        if (distSq < animUpdateRadiusSq) {
-            toUpdate.push_back(std::ref(pair.second));
+        auto& inst = pair.second;
+
+        // Skip weapon instances — their transforms are set by parent bones
+        if (inst.hasOverrideModelMatrix) continue;
+
+        float distSq = glm::distance2(inst.position, cameraPos);
+        if (distSq >= animUpdateRadiusSq) continue;
+
+        // Always advance animation time (cheap)
+        auto modelIt = models.find(inst.modelId);
+        if (modelIt != models.end() && !modelIt->second.data.sequences.empty()) {
+            if (inst.currentSequenceIndex < 0) {
+                inst.currentSequenceIndex = 0;
+                inst.currentAnimationId = modelIt->second.data.sequences[0].id;
+            }
+            const auto& seq = modelIt->second.data.sequences[inst.currentSequenceIndex];
+            inst.animationTime += deltaTime * 1000.0f;
+            if (seq.duration > 0 && inst.animationTime >= static_cast<float>(seq.duration)) {
+                if (inst.animationLoop) {
+                    inst.animationTime = std::fmod(inst.animationTime, static_cast<float>(seq.duration));
+                } else {
+                    inst.animationTime = static_cast<float>(seq.duration);
+                }
+            }
+        }
+
+        // Distance-tiered bone throttling: near=every frame, mid=every 3rd, far=every 6th
+        uint32_t boneInterval = 1;
+        if (distSq > 60.0f * 60.0f) boneInterval = 6;
+        else if (distSq > 30.0f * 30.0f) boneInterval = 3;
+
+        inst.boneUpdateCounter++;
+        bool needsBones = (inst.boneUpdateCounter >= boneInterval) || inst.boneMatrices.empty();
+        if (needsBones) {
+            inst.boneUpdateCounter = 0;
+            toUpdate.push_back(std::ref(inst));
        }
    }

    const size_t updatedCount = toUpdate.size();

-    // Thread animation updates in chunks to avoid spawning one task per instance.
+    // Thread bone matrix computation in chunks
    if (updatedCount >= 8 && numAnimThreads_ > 1) {
        static const size_t minAnimWorkPerThread = std::max<size_t>(
            16, envSizeOrDefault("WOWEE_CHAR_ANIM_WORK_PER_THREAD", 64));
@ -1446,7 +1479,7 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {

        if (numThreads <= 1) {
            for (auto& instRef : toUpdate) {
-                updateAnimation(instRef.get(), deltaTime);
+                calculateBoneMatrices(instRef.get());
            }
        } else {
            const size_t chunkSize = updatedCount / numThreads;
@ -1461,9 +1494,9 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
            for (size_t t = 0; t < numThreads; t++) {
                size_t end = start + chunkSize + (t < remainder ? 1 : 0);
                animFutures_.push_back(std::async(std::launch::async,
-                    [this, &toUpdate, start, end, deltaTime]() {
+                    [this, &toUpdate, start, end]() {
                        for (size_t i = start; i < end; i++) {
-                            updateAnimation(toUpdate[i].get(), deltaTime);
+                            calculateBoneMatrices(toUpdate[i].get());
                        }
                    }));
                start = end;
@ -1474,9 +1507,8 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
            }
        }
    } else {
-        // Sequential for small counts (avoid thread overhead)
        for (auto& instRef : toUpdate) {
-            updateAnimation(instRef.get(), deltaTime);
+            calculateBoneMatrices(instRef.get());
        }
    }

@ -1548,13 +1580,12 @@ int CharacterRenderer::findKeyframeIndex(const std::vector<uint32_t>& timestamps
    if (timestamps.empty()) return -1;
    if (timestamps.size() == 1) return 0;

-    // Binary search for the keyframe bracket
-    for (size_t i = 0; i < timestamps.size() - 1; i++) {
-        if (time < static_cast<float>(timestamps[i + 1])) {
-            return static_cast<int>(i);
-        }
-    }
-    return static_cast<int>(timestamps.size() - 2);
+    // Binary search using float comparison to match original semantics exactly
+    auto it = std::upper_bound(timestamps.begin(), timestamps.end(), time,
+        [](float t, uint32_t ts) { return t < static_cast<float>(ts); });
+    if (it == timestamps.begin()) return 0;
+    size_t idx = static_cast<size_t>(it - timestamps.begin()) - 1;
+    return static_cast<int>(std::min(idx, timestamps.size() - 2));
 }

 glm::vec3 CharacterRenderer::interpolateVec3(const pipeline::M2AnimationTrack& track,
@ -1598,8 +1629,8 @@ glm::quat CharacterRenderer::interpolateQuat(const pipeline::M2AnimationTrack& t
    if (keys.timestamps.empty() || keys.quatValues.empty()) return identity;

    auto safeQuat = [&](const glm::quat& q) -> glm::quat {
-        float len = glm::length(q);
-        if (len < 0.001f || std::isnan(len)) return identity;
+        float lenSq = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
+        if (lenSq < 0.000001f || std::isnan(lenSq)) return identity;
        return q;
    };

@ -1741,9 +1772,14 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet,
            float distSq = glm::dot(toInst, toInst);
            if (distSq > renderRadiusSq) continue;
            if (distSq > nearNoConeCullSq) {
-                float invDist = 1.0f / std::sqrt(distSq);
-                float facingDot = glm::dot(toInst, camForward) * invDist;
-                if (facingDot < backfaceDotCull) continue;
+                // Backface cull without sqrt: dot(toInst, camFwd) / |toInst| < threshold
+                // ⟺ dot < 0 || dot² < threshold² * distSq  (when threshold < 0, dot must be negative)
+                float rawDot = glm::dot(toInst, camForward);
+                if (backfaceDotCull >= 0.0f) {
+                    if (rawDot < 0.0f || rawDot * rawDot < backfaceDotCull * backfaceDotCull * distSq) continue;
+                } else {
+                    if (rawDot < 0.0f && rawDot * rawDot > backfaceDotCull * backfaceDotCull * distSq) continue;
+                }
            }
        }

--- a/src/rendering/m2_renderer.cpp
+++ b/src/rendering/m2_renderer.cpp
@ -401,7 +401,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
        VkPushConstantRange pushRange{};
        pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
        pushRange.offset = 0;
-        pushRange.size = 84; // mat4(64) + vec2(8) + int(4) + int(4) + int(4)
+        pushRange.size = 88; // mat4(64) + vec2(8) + int(4) + int(4) + int(4) + float(4)

        VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
        ci.setLayoutCount = 3;
@ -591,6 +591,11 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
        bci.size = MAX_M2_PARTICLES * 9 * sizeof(float);
        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &m2ParticleVB_, &m2ParticleVBAlloc_, &allocInfo);
        m2ParticleVBMapped_ = allocInfo.pMappedData;
+
+        // Dedicated glow sprite buffer (separate from particle VB to avoid data race)
+        bci.size = MAX_GLOW_SPRITES * 9 * sizeof(float);
+        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &glowVB_, &glowVBAlloc_, &allocInfo);
+        glowVBMapped_ = allocInfo.pMappedData;
    }

    // --- Create white fallback texture ---
@ -689,6 +694,7 @@ void M2Renderer::shutdown() {
    // Clean up particle buffers
    if (smokeVB_) { vmaDestroyBuffer(alloc, smokeVB_, smokeVBAlloc_); smokeVB_ = VK_NULL_HANDLE; }
    if (m2ParticleVB_) { vmaDestroyBuffer(alloc, m2ParticleVB_, m2ParticleVBAlloc_); m2ParticleVB_ = VK_NULL_HANDLE; }
+    if (glowVB_) { vmaDestroyBuffer(alloc, glowVB_, glowVBAlloc_); glowVB_ = VK_NULL_HANDLE; }
    smokeParticles.clear();

    // Destroy pipelines
@ -1611,6 +1617,9 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
    instance.cachedIsSmoke = mdlRef.isSmoke;
    instance.cachedHasParticleEmitters = !mdlRef.particleEmitters.empty();
    instance.cachedBoundRadius = mdlRef.boundRadius;
+    instance.cachedIsGroundDetail = mdlRef.isGroundDetail;
+    instance.cachedIsInvisibleTrap = mdlRef.isInvisibleTrap;
+    instance.cachedIsValid = mdlRef.isValid();

    // Initialize animation: play first sequence (usually Stand/Idle)
    const auto& mdl = mdlRef;
@ -1685,6 +1694,9 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
    instance.cachedIsSmoke = mdl2.isSmoke;
    instance.cachedHasParticleEmitters = !mdl2.particleEmitters.empty();
    instance.cachedBoundRadius = mdl2.boundRadius;
+    instance.cachedIsGroundDetail = mdl2.isGroundDetail;
+    instance.cachedIsInvisibleTrap = mdl2.isInvisibleTrap;
+    instance.cachedIsValid = mdl2.isValid();

    // Initialize animation
    if (mdl2.hasAnimation && !mdl2.disableAnimation && !mdl2.sequences.empty()) {
@ -1729,12 +1741,12 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
 static int findKeyframeIndex(const std::vector<uint32_t>& timestamps, float time) {
    if (timestamps.empty()) return -1;
    if (timestamps.size() == 1) return 0;
-    for (size_t i = 0; i < timestamps.size() - 1; i++) {
-        if (time < static_cast<float>(timestamps[i + 1])) {
-            return static_cast<int>(i);
-        }
-    }
-    return static_cast<int>(timestamps.size() - 2);
+    // Binary search using float comparison to match original semantics exactly
+    auto it = std::upper_bound(timestamps.begin(), timestamps.end(), time,
+        [](float t, uint32_t ts) { return t < static_cast<float>(ts); });
+    if (it == timestamps.begin()) return 0;
+    size_t idx = static_cast<size_t>(it - timestamps.begin()) - 1;
+    return static_cast<int>(std::min(idx, timestamps.size() - 2));
 }

 // Resolve sequence index and time for a track, handling global sequences.
@ -1791,8 +1803,8 @@ static glm::quat interpQuat(const pipeline::M2AnimationTrack& track,
    const auto& keys = track.sequences[si];
    if (keys.timestamps.empty() || keys.quatValues.empty()) return identity;
    auto safe = [&](const glm::quat& q) -> glm::quat {
-        float len = glm::length(q);
-        if (len < 0.001f || std::isnan(len)) return identity;
+        float lenSq = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
+        if (lenSq < 0.000001f || std::isnan(lenSq)) return identity;
        return q;
    };
    if (keys.quatValues.size() == 1) return safe(keys.quatValues[0]);
@ -1895,21 +1907,23 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
        smokeEmitAccum = 0.0f;
    }

-    // --- Update existing smoke particles ---
-    for (auto it = smokeParticles.begin(); it != smokeParticles.end(); ) {
-        it->life += deltaTime;
-        if (it->life >= it->maxLife) {
-            it = smokeParticles.erase(it);
+    // --- Update existing smoke particles (swap-and-pop for O(1) removal) ---
+    for (size_t i = 0; i < smokeParticles.size(); ) {
+        auto& p = smokeParticles[i];
+        p.life += deltaTime;
+        if (p.life >= p.maxLife) {
+            smokeParticles[i] = smokeParticles.back();
+            smokeParticles.pop_back();
            continue;
        }
-        it->position += it->velocity * deltaTime;
-        it->velocity.z *= 0.98f;  // Slight deceleration
-        it->velocity.x += distDrift(smokeRng) * deltaTime;
-        it->velocity.y += distDrift(smokeRng) * deltaTime;
+        p.position += p.velocity * deltaTime;
+        p.velocity.z *= 0.98f;  // Slight deceleration
+        p.velocity.x += distDrift(smokeRng) * deltaTime;
+        p.velocity.y += distDrift(smokeRng) * deltaTime;
        // Grow from 1.0 to 3.5 over lifetime
-        float t = it->life / it->maxLife;
-        it->size = 1.0f + t * 2.5f;
-        ++it;
+        float t = p.life / p.maxLife;
+        p.size = 1.0f + t * 2.5f;
+        ++i;
    }

    // --- Normal M2 animation update ---
@ -2104,10 +2118,16 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const

    lastDrawCallCount = 0;

-    // Adaptive render distance: tiered by instance density to cap draw calls
-    const float maxRenderDistance = (instances.size() > 2000) ? 300.0f
-                                  : (instances.size() > 1000) ? 500.0f
-                                  : 1000.0f;
+    // Adaptive render distance: smoothed to prevent pop-in/pop-out flickering
+    const float targetRenderDist = (instances.size() > 2000) ? 300.0f
+                                 : (instances.size() > 1000) ? 500.0f
+                                 : 1000.0f;
+    // Smooth transitions: shrink slowly (avoid popping out nearby objects)
+    const float shrinkRate = 0.005f;  // very slow decrease
+    const float growRate = 0.05f;     // faster increase
+    float blendRate = (targetRenderDist < smoothedRenderDist_) ? shrinkRate : growRate;
+    smoothedRenderDist_ = glm::mix(smoothedRenderDist_, targetRenderDist, blendRate);
+    const float maxRenderDistance = smoothedRenderDist_;
    const float maxRenderDistanceSq = maxRenderDistance * maxRenderDistance;
    const float fadeStartFraction = 0.75f;
    const glm::vec3 camPos = camera.getPosition();
@ -2127,34 +2147,29 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
    for (uint32_t i = 0; i < static_cast<uint32_t>(instances.size()); ++i) {
        const auto& instance = instances[i];

-        // Fast early rejection: skip instances that are definitely too far
+        // Use cached model flags — no hash lookup needed
+        if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
+
        glm::vec3 toCam = instance.position - camPos;
        float distSq = glm::dot(toCam, toCam);
-        if (distSq > maxPossibleDistSq) continue;  // Early out before model lookup
+        if (distSq > maxPossibleDistSq) continue;

-        auto it = models.find(instance.modelId);
-        if (it == models.end()) continue;
-        const M2ModelGPU& model = it->second;
-        if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue;
-        float worldRadius = model.boundRadius * instance.scale;
+        float worldRadius = instance.cachedBoundRadius * instance.scale;
        float cullRadius = worldRadius;
-        if (model.disableAnimation) {
+        if (instance.cachedDisableAnimation) {
            cullRadius = std::max(cullRadius, 3.0f);
        }
        float effectiveMaxDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f);
-        if (model.disableAnimation) {
+        if (instance.cachedDisableAnimation) {
            effectiveMaxDistSq *= 2.6f;
        }
-        if (model.isGroundDetail) {
-            // Keep clutter local so distant grass doesn't overdraw the scene.
+        if (instance.cachedIsGroundDetail) {
            effectiveMaxDistSq *= 0.75f;
        }
-        // Removed aggressive small-object distance caps to prevent city pop-out
-        // Small props (barrels, lanterns, etc.) now use same distance as larger objects
+
        if (distSq > effectiveMaxDistSq) continue;

-        // Frustum cull with moderate padding to prevent edge pop-out during camera rotation
-        // Reduced from 2.5x to 1.5x for better performance
+        // Frustum cull with padding
        float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f);
        if (cullRadius > 0.0f && !frustum.intersectsSphere(instance.position, paddedRadius)) continue;

@ -2179,6 +2194,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
        int texCoordSet;
        int useBones;
        int isFoliage;
+        float fadeAlpha;
    };

    // Bind per-frame descriptor set (set 0) — shared across all draws
@ -2268,12 +2284,11 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
            }
        }

-        // LOD selection based on distance
-        float dist = std::sqrt(entry.distSq);
+        // LOD selection based on squared distance (avoid sqrt)
        uint16_t desiredLOD = 0;
-        if (dist > 150.0f) desiredLOD = 3;
-        else if (dist > 80.0f) desiredLOD = 2;
-        else if (dist > 40.0f) desiredLOD = 1;
+        if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3;
+        else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2;
+        else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1;

        uint16_t targetLOD = desiredLOD;
        if (desiredLOD > 0) {
@ -2390,10 +2405,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
                currentPipeline = desiredPipeline;
            }

-            // Update material UBO with per-draw dynamic values (fadeAlpha, interiorDarken)
+            // Update material UBO with per-draw dynamic values (interiorDarken, forceCutout overrides)
+            // Note: fadeAlpha is in push constants (per-draw) to avoid shared-UBO race
            if (batch.materialUBOMapped) {
                auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
-                mat->fadeAlpha = instanceFadeAlpha;
                mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
                if (batch.colorKeyBlack) {
                    mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
@ -2419,6 +2434,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
            pc.texCoordSet = static_cast<int>(batch.textureUnit);
            pc.useBones = useBones ? 1 : 0;
            pc.isFoliage = model.shadowWindFoliage ? 1 : 0;
+            pc.fadeAlpha = instanceFadeAlpha;
            vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc);

            vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
@ -2427,7 +2443,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
    }

    // Render glow sprites as billboarded additive point lights
-    if (!glowSprites_.empty() && particleAdditivePipeline_ && m2ParticleVB_ && glowTexDescSet_) {
+    if (!glowSprites_.empty() && particleAdditivePipeline_ && glowVB_ && glowTexDescSet_) {
        vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, particleAdditivePipeline_);
        vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                                particlePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);
@ -2439,26 +2455,24 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
        vkCmdPushConstants(cmd, particlePipelineLayout_, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
                           sizeof(particlePush), &particlePush);

-        // Build and upload vertex data
-        std::vector<float> glowData;
-        glowData.reserve(glowSprites_.size() * 9);
-        for (const auto& gs : glowSprites_) {
-            glowData.push_back(gs.worldPos.x);
-            glowData.push_back(gs.worldPos.y);
-            glowData.push_back(gs.worldPos.z);
-            glowData.push_back(gs.color.r);
-            glowData.push_back(gs.color.g);
-            glowData.push_back(gs.color.b);
-            glowData.push_back(gs.color.a);
-            glowData.push_back(gs.size);
-            glowData.push_back(0.0f);
+        // Write glow vertex data directly to mapped buffer (no temp vector)
+        size_t uploadCount = std::min(glowSprites_.size(), MAX_GLOW_SPRITES);
+        float* dst = static_cast<float*>(glowVBMapped_);
+        for (size_t gi = 0; gi < uploadCount; gi++) {
+            const auto& gs = glowSprites_[gi];
+            *dst++ = gs.worldPos.x;
+            *dst++ = gs.worldPos.y;
+            *dst++ = gs.worldPos.z;
+            *dst++ = gs.color.r;
+            *dst++ = gs.color.g;
+            *dst++ = gs.color.b;
+            *dst++ = gs.color.a;
+            *dst++ = gs.size;
+            *dst++ = 0.0f;
        }

-        size_t uploadCount = std::min(glowSprites_.size(), MAX_M2_PARTICLES);
-        memcpy(m2ParticleVBMapped_, glowData.data(), uploadCount * 9 * sizeof(float));
-
        VkDeviceSize offset = 0;
-        vkCmdBindVertexBuffers(cmd, 0, 1, &m2ParticleVB_, &offset);
+        vkCmdBindVertexBuffers(cmd, 0, 1, &glowVB_, &offset);
        vkCmdDraw(cmd, static_cast<uint32_t>(uploadCount), 1, 0, 0);
    }

@ -2737,6 +2751,9 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa
        const M2ModelGPU* currentModel = nullptr;

        for (const auto& instance : instances) {
+            // Use cached flags to skip early without hash lookup
+            if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
+
            // Distance cull against shadow frustum
            glm::vec3 diff = instance.position - shadowCenter;
            if (glm::dot(diff, diff) > shadowRadiusSq) continue;
@ -2744,7 +2761,6 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa
            auto modelIt = models.find(instance.modelId);
            if (modelIt == models.end()) continue;
            const M2ModelGPU& model = modelIt->second;
-            if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue;

            // Filter: only draw foliage models in foliage pass, non-foliage in non-foliage pass
            if (model.shadowWindFoliage != foliagePass) continue;
--- a/src/rendering/wmo_renderer.cpp
+++ b/src/rendering/wmo_renderer.cpp
@ -536,7 +536,7 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) {
            // Flag 0x80 on INDOOR groups in large WMOs = interior cathedral shell
            bool hasFlag80 = (wmoGroup.flags & 0x80) != 0;
            bool isIndoor = (wmoGroup.flags & 0x2000) != 0;
-            if (nVerts < 100 || (alwaysDraw && nVerts < 5000) || (isFacade && isLargeWmo) || (isCityShell && isLargeWmo) || (hasFlag80 && isIndoor && isLargeWmo)) {
+            if ((nVerts < 100 && isLargeWmo) || (alwaysDraw && nVerts < 5000) || (isFacade && isLargeWmo) || (isCityShell && isLargeWmo) || (hasFlag80 && isIndoor && isLargeWmo)) {
                resources.isLOD = true;
            }
            modelData.groups.push_back(resources);
Author	SHA1	Message	Date
Kelsi	bec7a678aa	Fix missing floors in dungeon instances by restricting LOD detection Some checks are pending Build / Build (arm64) (push) Waiting to run Details Build / Build (x86-64) (push) Waiting to run Details Build / Build (macOS arm64) (push) Waiting to run Details Build / Build (windows-arm64) (push) Waiting to run Details Build / Build (windows-x86-64) (push) Waiting to run Details Security / CodeQL (C/C++) (push) Waiting to run Details Security / Semgrep (push) Waiting to run Details Security / Sanitizer Build (ASan/UBSan) (push) Waiting to run Details Low-vertex groups (<100 verts) were incorrectly marked as distance-only LOD shells in small WMOs like Stockades. Now only applies this heuristic to large WMOs (50+ groups) where it's needed for city exterior shells.	2026-03-04 09:25:00 -08:00
Kelsi	84b04446c1	Per-instance NPC hair/skin textures, fix binary search float comparison - NPC hair/skin textures now use per-instance overrides instead of shared model-level textures, so each NPC shows its own hair color/style - Hair/skin DBC lookup runs for every NPC instance (including cached models) rather than only on first load - Fix keyframe binary search to use float comparison matching original linear scan semantics	2026-03-04 09:19:02 -08:00
Kelsi	e6acb4ac9a	Optimize animation hotpaths: binary keyframe search, eliminate sqrt calls - Replace O(n) linear keyframe search with O(log n) binary search in both M2 and Character renderers (runs thousands of times per frame) - Smoke particle removal: swap-and-pop instead of O(n²) vector erase - Character render backface cull: eliminate sqrt via squared comparison - Quaternion validation: use length² instead of sqrt-based length check	2026-03-04 08:33:56 -08:00
Kelsi	2e432fc123	Eliminate per-instance hash lookups in M2 render/shadow culling loops Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail, disableAnimation, boundRadius) on M2Instance instead of models.find() in the hot culling paths. Also complete cached flag initialization in createInstanceWithMatrix().	2026-03-04 08:28:21 -08:00
Kelsi	30fa9836d9	Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones - Glow sprites now use dedicated vertex buffer (glowVB_) separate from M2 particle buffer to prevent data race when renderM2Particles() overwrites glow data mid-flight - Move fadeAlpha from shared material UBO to per-draw push constants, eliminating cross-instance alpha race on non-double-buffered UBOs - Smooth adaptive render distance transitions to prevent pop-in/out at instance count thresholds (1000/2000) - Distance-tiered character bone throttling: near (<30u) every frame, mid (30-60u) every 3rd, far (60-120u) every 6th frame - Skip weapon instance animation updates (transforms set by parent bones)	2026-03-04 08:17:32 -08:00