Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from M2 particle buffer to prevent data race when renderM2Particles() overwrites glow data mid-flight - Move fadeAlpha from shared material UBO to per-draw push constants, eliminating cross-instance alpha race on non-double-buffered UBOs - Smooth adaptive render distance transitions to prevent pop-in/out at instance count thresholds (1000/2000) - Distance-tiered character bone throttling: near (<30u) every frame, mid (30-60u) every 3rd, far (60-120u) every 6th frame - Skip weapon instance animation updates (transforms set by parent bones)
2026-03-22 23:30:14 +00:00 · 2026-03-04 08:17:32 -08:00 · 2026-03-04 08:17:32 -08:00 · 30fa9836d9
commit 30fa9836d9
parent 3482dacea8
8 changed files with 87 additions and 31 deletions
--- a/assets/shaders/m2.frag.glsl
+++ b/assets/shaders/m2.frag.glsl
@ -34,6 +34,7 @@ layout(location = 1) in vec3 Normal;
 layout(location = 2) in vec2 TexCoord;
 layout(location = 3) flat in vec3 InstanceOrigin;
 layout(location = 4) in float ModelHeight;
+layout(location = 5) in float vFadeAlpha;

 layout(location = 0) out vec4 outColor;

@ -175,16 +176,16 @@ void main() {
    float fogFactor = clamp((fogParams.y - dist) / (fogParams.y - fogParams.x), 0.0, 1.0);
    result = mix(fogColor.rgb, result, fogFactor);

-    float outAlpha = texColor.a * fadeAlpha;
+    float outAlpha = texColor.a * vFadeAlpha;
    // Cutout materials should not remain partially transparent after discard,
    // otherwise foliage cards look view-dependent.
    if (alphaTest != 0 || colorKeyBlack != 0) {
-        outAlpha = fadeAlpha;
+        outAlpha = vFadeAlpha;
    }
    // Foliage cutout should stay opaque after alpha discard to avoid
    // view-angle translucency artifacts.
    if (alphaTest == 2 || alphaTest == 3) {
-        outAlpha = 1.0 * fadeAlpha;
+        outAlpha = 1.0 * vFadeAlpha;
    }
    outColor = vec4(result, outAlpha);
 }
--- a/assets/shaders/m2.frag.spv
+++ b/assets/shaders/m2.frag.spv
--- a/assets/shaders/m2.vert.glsl
+++ b/assets/shaders/m2.vert.glsl
@ -19,6 +19,7 @@ layout(push_constant) uniform Push {
    int texCoordSet;
    int useBones;
    int isFoliage;
+    float fadeAlpha;
 } push;

 layout(set = 2, binding = 0) readonly buffer BoneSSBO {
@ -37,6 +38,7 @@ layout(location = 1) out vec3 Normal;
 layout(location = 2) out vec2 TexCoord;
 layout(location = 3) flat out vec3 InstanceOrigin;
 layout(location = 4) out float ModelHeight;
+layout(location = 5) out float vFadeAlpha;

 void main() {
    vec4 pos = vec4(aPos, 1.0);
@ -86,6 +88,7 @@ void main() {

    InstanceOrigin = push.model[3].xyz;
    ModelHeight = pos.z;
+    vFadeAlpha = push.fadeAlpha;

    gl_Position = projection * view * worldPos;
 }
--- a/assets/shaders/m2.vert.spv
+++ b/assets/shaders/m2.vert.spv
--- a/include/rendering/character_renderer.hpp
+++ b/include/rendering/character_renderer.hpp
@ -178,6 +178,9 @@ private:
        bool hasOverrideModelMatrix = false;
        glm::mat4 overrideModelMatrix{1.0f};

+        // Bone update throttling (skip frames for distant characters)
+        uint32_t boneUpdateCounter = 0;
+
        // Per-instance bone SSBO (double-buffered per frame)
        VkBuffer boneBuffer[2] = {};
        VmaAllocation boneAlloc[2] = {};
--- a/include/rendering/m2_renderer.hpp
+++ b/include/rendering/m2_renderer.hpp
@ -371,6 +371,11 @@ private:
    ::VkBuffer m2ParticleVB_ = VK_NULL_HANDLE;
    VmaAllocation m2ParticleVBAlloc_ = VK_NULL_HANDLE;
    void* m2ParticleVBMapped_ = nullptr;
+    // Dedicated glow sprite vertex buffer (separate from particle VB to avoid data race)
+    static constexpr size_t MAX_GLOW_SPRITES = 2000;
+    ::VkBuffer glowVB_ = VK_NULL_HANDLE;
+    VmaAllocation glowVBAlloc_ = VK_NULL_HANDLE;
+    void* glowVBMapped_ = nullptr;

    std::unordered_map<uint32_t, M2ModelGPU> models;
    std::vector<M2Instance> instances;
@ -477,6 +482,7 @@ private:
    // Cached camera state from update() for frustum-culling bones
    glm::vec3 cachedCamPos_ = glm::vec3(0.0f);
    float cachedMaxRenderDistSq_ = 0.0f;
+    float smoothedRenderDist_ = 1000.0f;  // Smoothed render distance to prevent flickering

    // Thread count for parallel bone animation
    uint32_t numAnimThreads_ = 1;
--- a/src/rendering/character_renderer.cpp
+++ b/src/rendering/character_renderer.cpp
@ -1423,20 +1423,53 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
    }

    // Only update animations for nearby characters (performance optimization)
-    // Collect instances that need updates
+    // Collect instances that need bone recomputation, with distance-based throttling
    std::vector<std::reference_wrapper<CharacterInstance>> toUpdate;
    toUpdate.reserve(instances.size());

    for (auto& pair : instances) {
-        float distSq = glm::distance2(pair.second.position, cameraPos);
-        if (distSq < animUpdateRadiusSq) {
-            toUpdate.push_back(std::ref(pair.second));
+        auto& inst = pair.second;
+
+        // Skip weapon instances — their transforms are set by parent bones
+        if (inst.hasOverrideModelMatrix) continue;
+
+        float distSq = glm::distance2(inst.position, cameraPos);
+        if (distSq >= animUpdateRadiusSq) continue;
+
+        // Always advance animation time (cheap)
+        auto modelIt = models.find(inst.modelId);
+        if (modelIt != models.end() && !modelIt->second.data.sequences.empty()) {
+            if (inst.currentSequenceIndex < 0) {
+                inst.currentSequenceIndex = 0;
+                inst.currentAnimationId = modelIt->second.data.sequences[0].id;
+            }
+            const auto& seq = modelIt->second.data.sequences[inst.currentSequenceIndex];
+            inst.animationTime += deltaTime * 1000.0f;
+            if (seq.duration > 0 && inst.animationTime >= static_cast<float>(seq.duration)) {
+                if (inst.animationLoop) {
+                    inst.animationTime = std::fmod(inst.animationTime, static_cast<float>(seq.duration));
+                } else {
+                    inst.animationTime = static_cast<float>(seq.duration);
+                }
+            }
+        }
+
+        // Distance-tiered bone throttling: near=every frame, mid=every 3rd, far=every 6th
+        uint32_t boneInterval = 1;
+        if (distSq > 60.0f * 60.0f) boneInterval = 6;
+        else if (distSq > 30.0f * 30.0f) boneInterval = 3;
+
+        inst.boneUpdateCounter++;
+        bool needsBones = (inst.boneUpdateCounter >= boneInterval) || inst.boneMatrices.empty();
+        if (needsBones) {
+            inst.boneUpdateCounter = 0;
+            toUpdate.push_back(std::ref(inst));
        }
    }

    const size_t updatedCount = toUpdate.size();

-    // Thread animation updates in chunks to avoid spawning one task per instance.
+    // Thread bone matrix computation in chunks
    if (updatedCount >= 8 && numAnimThreads_ > 1) {
        static const size_t minAnimWorkPerThread = std::max<size_t>(
            16, envSizeOrDefault("WOWEE_CHAR_ANIM_WORK_PER_THREAD", 64));
@ -1446,7 +1479,7 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {

        if (numThreads <= 1) {
            for (auto& instRef : toUpdate) {
-                updateAnimation(instRef.get(), deltaTime);
+                calculateBoneMatrices(instRef.get());
            }
        } else {
            const size_t chunkSize = updatedCount / numThreads;
@ -1461,9 +1494,9 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
            for (size_t t = 0; t < numThreads; t++) {
                size_t end = start + chunkSize + (t < remainder ? 1 : 0);
                animFutures_.push_back(std::async(std::launch::async,
-                    [this, &toUpdate, start, end, deltaTime]() {
+                    [this, &toUpdate, start, end]() {
                        for (size_t i = start; i < end; i++) {
-                            updateAnimation(toUpdate[i].get(), deltaTime);
+                            calculateBoneMatrices(toUpdate[i].get());
                        }
                    }));
                start = end;
@ -1474,9 +1507,8 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
            }
        }
    } else {
-        // Sequential for small counts (avoid thread overhead)
        for (auto& instRef : toUpdate) {
-            updateAnimation(instRef.get(), deltaTime);
+            calculateBoneMatrices(instRef.get());
        }
    }

--- a/src/rendering/m2_renderer.cpp
+++ b/src/rendering/m2_renderer.cpp
@ -401,7 +401,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
        VkPushConstantRange pushRange{};
        pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
        pushRange.offset = 0;
-        pushRange.size = 84; // mat4(64) + vec2(8) + int(4) + int(4) + int(4)
+        pushRange.size = 88; // mat4(64) + vec2(8) + int(4) + int(4) + int(4) + float(4)

        VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
        ci.setLayoutCount = 3;
@ -591,6 +591,11 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
        bci.size = MAX_M2_PARTICLES * 9 * sizeof(float);
        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &m2ParticleVB_, &m2ParticleVBAlloc_, &allocInfo);
        m2ParticleVBMapped_ = allocInfo.pMappedData;
+
+        // Dedicated glow sprite buffer (separate from particle VB to avoid data race)
+        bci.size = MAX_GLOW_SPRITES * 9 * sizeof(float);
+        vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &glowVB_, &glowVBAlloc_, &allocInfo);
+        glowVBMapped_ = allocInfo.pMappedData;
    }

    // --- Create white fallback texture ---
@ -689,6 +694,7 @@ void M2Renderer::shutdown() {
    // Clean up particle buffers
    if (smokeVB_) { vmaDestroyBuffer(alloc, smokeVB_, smokeVBAlloc_); smokeVB_ = VK_NULL_HANDLE; }
    if (m2ParticleVB_) { vmaDestroyBuffer(alloc, m2ParticleVB_, m2ParticleVBAlloc_); m2ParticleVB_ = VK_NULL_HANDLE; }
+    if (glowVB_) { vmaDestroyBuffer(alloc, glowVB_, glowVBAlloc_); glowVB_ = VK_NULL_HANDLE; }
    smokeParticles.clear();

    // Destroy pipelines
@ -2104,10 +2110,16 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const

    lastDrawCallCount = 0;

-    // Adaptive render distance: tiered by instance density to cap draw calls
-    const float maxRenderDistance = (instances.size() > 2000) ? 300.0f
-                                  : (instances.size() > 1000) ? 500.0f
-                                  : 1000.0f;
+    // Adaptive render distance: smoothed to prevent pop-in/pop-out flickering
+    const float targetRenderDist = (instances.size() > 2000) ? 300.0f
+                                 : (instances.size() > 1000) ? 500.0f
+                                 : 1000.0f;
+    // Smooth transitions: shrink slowly (avoid popping out nearby objects)
+    const float shrinkRate = 0.005f;  // very slow decrease
+    const float growRate = 0.05f;     // faster increase
+    float blendRate = (targetRenderDist < smoothedRenderDist_) ? shrinkRate : growRate;
+    smoothedRenderDist_ = glm::mix(smoothedRenderDist_, targetRenderDist, blendRate);
+    const float maxRenderDistance = smoothedRenderDist_;
    const float maxRenderDistanceSq = maxRenderDistance * maxRenderDistance;
    const float fadeStartFraction = 0.75f;
    const glm::vec3 camPos = camera.getPosition();
@ -2127,15 +2139,14 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
    for (uint32_t i = 0; i < static_cast<uint32_t>(instances.size()); ++i) {
        const auto& instance = instances[i];

-        // Fast early rejection: skip instances that are definitely too far
        glm::vec3 toCam = instance.position - camPos;
        float distSq = glm::dot(toCam, toCam);
-        if (distSq > maxPossibleDistSq) continue;  // Early out before model lookup

        auto it = models.find(instance.modelId);
        if (it == models.end()) continue;
        const M2ModelGPU& model = it->second;
        if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue;
+
        float worldRadius = model.boundRadius * instance.scale;
        float cullRadius = worldRadius;
        if (model.disableAnimation) {
@ -2146,15 +2157,13 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
            effectiveMaxDistSq *= 2.6f;
        }
        if (model.isGroundDetail) {
-            // Keep clutter local so distant grass doesn't overdraw the scene.
            effectiveMaxDistSq *= 0.75f;
        }
-        // Removed aggressive small-object distance caps to prevent city pop-out
-        // Small props (barrels, lanterns, etc.) now use same distance as larger objects
+
+        if (distSq > maxPossibleDistSq) continue;
        if (distSq > effectiveMaxDistSq) continue;

-        // Frustum cull with moderate padding to prevent edge pop-out during camera rotation
-        // Reduced from 2.5x to 1.5x for better performance
+        // Frustum cull with padding
        float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f);
        if (cullRadius > 0.0f && !frustum.intersectsSphere(instance.position, paddedRadius)) continue;

@ -2179,6 +2188,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
        int texCoordSet;
        int useBones;
        int isFoliage;
+        float fadeAlpha;
    };

    // Bind per-frame descriptor set (set 0) — shared across all draws
@ -2390,10 +2400,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
                currentPipeline = desiredPipeline;
            }

-            // Update material UBO with per-draw dynamic values (fadeAlpha, interiorDarken)
+            // Update material UBO with per-draw dynamic values (interiorDarken, forceCutout overrides)
+            // Note: fadeAlpha is in push constants (per-draw) to avoid shared-UBO race
            if (batch.materialUBOMapped) {
                auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
-                mat->fadeAlpha = instanceFadeAlpha;
                mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
                if (batch.colorKeyBlack) {
                    mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
@ -2419,6 +2429,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
            pc.texCoordSet = static_cast<int>(batch.textureUnit);
            pc.useBones = useBones ? 1 : 0;
            pc.isFoliage = model.shadowWindFoliage ? 1 : 0;
+            pc.fadeAlpha = instanceFadeAlpha;
            vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc);

            vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
@ -2427,7 +2438,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
    }

    // Render glow sprites as billboarded additive point lights
-    if (!glowSprites_.empty() && particleAdditivePipeline_ && m2ParticleVB_ && glowTexDescSet_) {
+    if (!glowSprites_.empty() && particleAdditivePipeline_ && glowVB_ && glowTexDescSet_) {
        vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, particleAdditivePipeline_);
        vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
                                particlePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);
@ -2454,11 +2465,11 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
            glowData.push_back(0.0f);
        }

-        size_t uploadCount = std::min(glowSprites_.size(), MAX_M2_PARTICLES);
-        memcpy(m2ParticleVBMapped_, glowData.data(), uploadCount * 9 * sizeof(float));
+        size_t uploadCount = std::min(glowSprites_.size(), MAX_GLOW_SPRITES);
+        memcpy(glowVBMapped_, glowData.data(), uploadCount * 9 * sizeof(float));

        VkDeviceSize offset = 0;
-        vkCmdBindVertexBuffers(cmd, 0, 1, &m2ParticleVB_, &offset);
+        vkCmdBindVertexBuffers(cmd, 0, 1, &glowVB_, &offset);
        vkCmdDraw(cmd, static_cast<uint32_t>(uploadCount), 1, 0, 0);
    }