Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones

- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)
This commit is contained in:
Kelsi 2026-03-04 08:17:32 -08:00
parent 3482dacea8
commit 30fa9836d9
8 changed files with 87 additions and 31 deletions

View file

@ -34,6 +34,7 @@ layout(location = 1) in vec3 Normal;
layout(location = 2) in vec2 TexCoord;
layout(location = 3) flat in vec3 InstanceOrigin;
layout(location = 4) in float ModelHeight;
layout(location = 5) in float vFadeAlpha;
layout(location = 0) out vec4 outColor;
@ -175,16 +176,16 @@ void main() {
float fogFactor = clamp((fogParams.y - dist) / (fogParams.y - fogParams.x), 0.0, 1.0);
result = mix(fogColor.rgb, result, fogFactor);
float outAlpha = texColor.a * fadeAlpha;
float outAlpha = texColor.a * vFadeAlpha;
// Cutout materials should not remain partially transparent after discard,
// otherwise foliage cards look view-dependent.
if (alphaTest != 0 || colorKeyBlack != 0) {
outAlpha = fadeAlpha;
outAlpha = vFadeAlpha;
}
// Foliage cutout should stay opaque after alpha discard to avoid
// view-angle translucency artifacts.
if (alphaTest == 2 || alphaTest == 3) {
outAlpha = 1.0 * fadeAlpha;
outAlpha = 1.0 * vFadeAlpha;
}
outColor = vec4(result, outAlpha);
}

Binary file not shown.

View file

@ -19,6 +19,7 @@ layout(push_constant) uniform Push {
int texCoordSet;
int useBones;
int isFoliage;
float fadeAlpha;
} push;
layout(set = 2, binding = 0) readonly buffer BoneSSBO {
@ -37,6 +38,7 @@ layout(location = 1) out vec3 Normal;
layout(location = 2) out vec2 TexCoord;
layout(location = 3) flat out vec3 InstanceOrigin;
layout(location = 4) out float ModelHeight;
layout(location = 5) out float vFadeAlpha;
void main() {
vec4 pos = vec4(aPos, 1.0);
@ -86,6 +88,7 @@ void main() {
InstanceOrigin = push.model[3].xyz;
ModelHeight = pos.z;
vFadeAlpha = push.fadeAlpha;
gl_Position = projection * view * worldPos;
}

Binary file not shown.

View file

@ -178,6 +178,9 @@ private:
bool hasOverrideModelMatrix = false;
glm::mat4 overrideModelMatrix{1.0f};
// Bone update throttling (skip frames for distant characters)
uint32_t boneUpdateCounter = 0;
// Per-instance bone SSBO (double-buffered per frame)
VkBuffer boneBuffer[2] = {};
VmaAllocation boneAlloc[2] = {};

View file

@ -371,6 +371,11 @@ private:
::VkBuffer m2ParticleVB_ = VK_NULL_HANDLE;
VmaAllocation m2ParticleVBAlloc_ = VK_NULL_HANDLE;
void* m2ParticleVBMapped_ = nullptr;
// Dedicated glow sprite vertex buffer (separate from particle VB to avoid data race)
static constexpr size_t MAX_GLOW_SPRITES = 2000;
::VkBuffer glowVB_ = VK_NULL_HANDLE;
VmaAllocation glowVBAlloc_ = VK_NULL_HANDLE;
void* glowVBMapped_ = nullptr;
std::unordered_map<uint32_t, M2ModelGPU> models;
std::vector<M2Instance> instances;
@ -477,6 +482,7 @@ private:
// Cached camera state from update() for frustum-culling bones
glm::vec3 cachedCamPos_ = glm::vec3(0.0f);
float cachedMaxRenderDistSq_ = 0.0f;
float smoothedRenderDist_ = 1000.0f; // Smoothed render distance to prevent flickering
// Thread count for parallel bone animation
uint32_t numAnimThreads_ = 1;

View file

@ -1423,20 +1423,53 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
}
// Only update animations for nearby characters (performance optimization)
// Collect instances that need updates
// Collect instances that need bone recomputation, with distance-based throttling
std::vector<std::reference_wrapper<CharacterInstance>> toUpdate;
toUpdate.reserve(instances.size());
for (auto& pair : instances) {
float distSq = glm::distance2(pair.second.position, cameraPos);
if (distSq < animUpdateRadiusSq) {
toUpdate.push_back(std::ref(pair.second));
auto& inst = pair.second;
// Skip weapon instances — their transforms are set by parent bones
if (inst.hasOverrideModelMatrix) continue;
float distSq = glm::distance2(inst.position, cameraPos);
if (distSq >= animUpdateRadiusSq) continue;
// Always advance animation time (cheap)
auto modelIt = models.find(inst.modelId);
if (modelIt != models.end() && !modelIt->second.data.sequences.empty()) {
if (inst.currentSequenceIndex < 0) {
inst.currentSequenceIndex = 0;
inst.currentAnimationId = modelIt->second.data.sequences[0].id;
}
const auto& seq = modelIt->second.data.sequences[inst.currentSequenceIndex];
inst.animationTime += deltaTime * 1000.0f;
if (seq.duration > 0 && inst.animationTime >= static_cast<float>(seq.duration)) {
if (inst.animationLoop) {
inst.animationTime = std::fmod(inst.animationTime, static_cast<float>(seq.duration));
} else {
inst.animationTime = static_cast<float>(seq.duration);
}
}
}
// Distance-tiered bone throttling: near=every frame, mid=every 3rd, far=every 6th
uint32_t boneInterval = 1;
if (distSq > 60.0f * 60.0f) boneInterval = 6;
else if (distSq > 30.0f * 30.0f) boneInterval = 3;
inst.boneUpdateCounter++;
bool needsBones = (inst.boneUpdateCounter >= boneInterval) || inst.boneMatrices.empty();
if (needsBones) {
inst.boneUpdateCounter = 0;
toUpdate.push_back(std::ref(inst));
}
}
const size_t updatedCount = toUpdate.size();
// Thread animation updates in chunks to avoid spawning one task per instance.
// Thread bone matrix computation in chunks
if (updatedCount >= 8 && numAnimThreads_ > 1) {
static const size_t minAnimWorkPerThread = std::max<size_t>(
16, envSizeOrDefault("WOWEE_CHAR_ANIM_WORK_PER_THREAD", 64));
@ -1446,7 +1479,7 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
if (numThreads <= 1) {
for (auto& instRef : toUpdate) {
updateAnimation(instRef.get(), deltaTime);
calculateBoneMatrices(instRef.get());
}
} else {
const size_t chunkSize = updatedCount / numThreads;
@ -1461,9 +1494,9 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
for (size_t t = 0; t < numThreads; t++) {
size_t end = start + chunkSize + (t < remainder ? 1 : 0);
animFutures_.push_back(std::async(std::launch::async,
[this, &toUpdate, start, end, deltaTime]() {
[this, &toUpdate, start, end]() {
for (size_t i = start; i < end; i++) {
updateAnimation(toUpdate[i].get(), deltaTime);
calculateBoneMatrices(toUpdate[i].get());
}
}));
start = end;
@ -1474,9 +1507,8 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
}
}
} else {
// Sequential for small counts (avoid thread overhead)
for (auto& instRef : toUpdate) {
updateAnimation(instRef.get(), deltaTime);
calculateBoneMatrices(instRef.get());
}
}

View file

@ -401,7 +401,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
VkPushConstantRange pushRange{};
pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
pushRange.offset = 0;
pushRange.size = 84; // mat4(64) + vec2(8) + int(4) + int(4) + int(4)
pushRange.size = 88; // mat4(64) + vec2(8) + int(4) + int(4) + int(4) + float(4)
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
ci.setLayoutCount = 3;
@ -591,6 +591,11 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
bci.size = MAX_M2_PARTICLES * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &m2ParticleVB_, &m2ParticleVBAlloc_, &allocInfo);
m2ParticleVBMapped_ = allocInfo.pMappedData;
// Dedicated glow sprite buffer (separate from particle VB to avoid data race)
bci.size = MAX_GLOW_SPRITES * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &glowVB_, &glowVBAlloc_, &allocInfo);
glowVBMapped_ = allocInfo.pMappedData;
}
// --- Create white fallback texture ---
@ -689,6 +694,7 @@ void M2Renderer::shutdown() {
// Clean up particle buffers
if (smokeVB_) { vmaDestroyBuffer(alloc, smokeVB_, smokeVBAlloc_); smokeVB_ = VK_NULL_HANDLE; }
if (m2ParticleVB_) { vmaDestroyBuffer(alloc, m2ParticleVB_, m2ParticleVBAlloc_); m2ParticleVB_ = VK_NULL_HANDLE; }
if (glowVB_) { vmaDestroyBuffer(alloc, glowVB_, glowVBAlloc_); glowVB_ = VK_NULL_HANDLE; }
smokeParticles.clear();
// Destroy pipelines
@ -2104,10 +2110,16 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
lastDrawCallCount = 0;
// Adaptive render distance: tiered by instance density to cap draw calls
const float maxRenderDistance = (instances.size() > 2000) ? 300.0f
: (instances.size() > 1000) ? 500.0f
: 1000.0f;
// Adaptive render distance: smoothed to prevent pop-in/pop-out flickering
const float targetRenderDist = (instances.size() > 2000) ? 300.0f
: (instances.size() > 1000) ? 500.0f
: 1000.0f;
// Smooth transitions: shrink slowly (avoid popping out nearby objects)
const float shrinkRate = 0.005f; // very slow decrease
const float growRate = 0.05f; // faster increase
float blendRate = (targetRenderDist < smoothedRenderDist_) ? shrinkRate : growRate;
smoothedRenderDist_ = glm::mix(smoothedRenderDist_, targetRenderDist, blendRate);
const float maxRenderDistance = smoothedRenderDist_;
const float maxRenderDistanceSq = maxRenderDistance * maxRenderDistance;
const float fadeStartFraction = 0.75f;
const glm::vec3 camPos = camera.getPosition();
@ -2127,15 +2139,14 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
for (uint32_t i = 0; i < static_cast<uint32_t>(instances.size()); ++i) {
const auto& instance = instances[i];
// Fast early rejection: skip instances that are definitely too far
glm::vec3 toCam = instance.position - camPos;
float distSq = glm::dot(toCam, toCam);
if (distSq > maxPossibleDistSq) continue; // Early out before model lookup
auto it = models.find(instance.modelId);
if (it == models.end()) continue;
const M2ModelGPU& model = it->second;
if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue;
float worldRadius = model.boundRadius * instance.scale;
float cullRadius = worldRadius;
if (model.disableAnimation) {
@ -2146,15 +2157,13 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
effectiveMaxDistSq *= 2.6f;
}
if (model.isGroundDetail) {
// Keep clutter local so distant grass doesn't overdraw the scene.
effectiveMaxDistSq *= 0.75f;
}
// Removed aggressive small-object distance caps to prevent city pop-out
// Small props (barrels, lanterns, etc.) now use same distance as larger objects
if (distSq > maxPossibleDistSq) continue;
if (distSq > effectiveMaxDistSq) continue;
// Frustum cull with moderate padding to prevent edge pop-out during camera rotation
// Reduced from 2.5x to 1.5x for better performance
// Frustum cull with padding
float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f);
if (cullRadius > 0.0f && !frustum.intersectsSphere(instance.position, paddedRadius)) continue;
@ -2179,6 +2188,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
int texCoordSet;
int useBones;
int isFoliage;
float fadeAlpha;
};
// Bind per-frame descriptor set (set 0) — shared across all draws
@ -2390,10 +2400,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
currentPipeline = desiredPipeline;
}
// Update material UBO with per-draw dynamic values (fadeAlpha, interiorDarken)
// Update material UBO with per-draw dynamic values (interiorDarken, forceCutout overrides)
// Note: fadeAlpha is in push constants (per-draw) to avoid shared-UBO race
if (batch.materialUBOMapped) {
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
mat->fadeAlpha = instanceFadeAlpha;
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
if (batch.colorKeyBlack) {
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
@ -2419,6 +2429,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
pc.texCoordSet = static_cast<int>(batch.textureUnit);
pc.useBones = useBones ? 1 : 0;
pc.isFoliage = model.shadowWindFoliage ? 1 : 0;
pc.fadeAlpha = instanceFadeAlpha;
vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc);
vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
@ -2427,7 +2438,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
}
// Render glow sprites as billboarded additive point lights
if (!glowSprites_.empty() && particleAdditivePipeline_ && m2ParticleVB_ && glowTexDescSet_) {
if (!glowSprites_.empty() && particleAdditivePipeline_ && glowVB_ && glowTexDescSet_) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, particleAdditivePipeline_);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
particlePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);
@ -2454,11 +2465,11 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
glowData.push_back(0.0f);
}
size_t uploadCount = std::min(glowSprites_.size(), MAX_M2_PARTICLES);
memcpy(m2ParticleVBMapped_, glowData.data(), uploadCount * 9 * sizeof(float));
size_t uploadCount = std::min(glowSprites_.size(), MAX_GLOW_SPRITES);
memcpy(glowVBMapped_, glowData.data(), uploadCount * 9 * sizeof(float));
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &m2ParticleVB_, &offset);
vkCmdBindVertexBuffers(cmd, 0, 1, &glowVB_, &offset);
vkCmdDraw(cmd, static_cast<uint32_t>(uploadCount), 1, 0, 0);
}