Compare commits

...

5 commits

Author SHA1 Message Date
Kelsi
bec7a678aa Fix missing floors in dungeon instances by restricting LOD detection
Some checks are pending
Build / Build (arm64) (push) Waiting to run
Build / Build (x86-64) (push) Waiting to run
Build / Build (macOS arm64) (push) Waiting to run
Build / Build (windows-arm64) (push) Waiting to run
Build / Build (windows-x86-64) (push) Waiting to run
Security / CodeQL (C/C++) (push) Waiting to run
Security / Semgrep (push) Waiting to run
Security / Sanitizer Build (ASan/UBSan) (push) Waiting to run
Low-vertex groups (<100 verts) were incorrectly marked as distance-only
LOD shells in small WMOs like Stockades. Now only applies this heuristic
to large WMOs (50+ groups) where it's needed for city exterior shells.
2026-03-04 09:25:00 -08:00
Kelsi
84b04446c1 Per-instance NPC hair/skin textures, fix binary search float comparison
- NPC hair/skin textures now use per-instance overrides instead of shared
  model-level textures, so each NPC shows its own hair color/style
- Hair/skin DBC lookup runs for every NPC instance (including cached models)
  rather than only on first load
- Fix keyframe binary search to use float comparison matching original
  linear scan semantics
2026-03-04 09:19:02 -08:00
Kelsi
e6acb4ac9a Optimize animation hotpaths: binary keyframe search, eliminate sqrt calls
- Replace O(n) linear keyframe search with O(log n) binary search in both
  M2 and Character renderers (runs thousands of times per frame)
- Smoke particle removal: swap-and-pop instead of O(n²) vector erase
- Character render backface cull: eliminate sqrt via squared comparison
- Quaternion validation: use length² instead of sqrt-based length check
2026-03-04 08:33:56 -08:00
Kelsi
2e432fc123 Eliminate per-instance hash lookups in M2 render/shadow culling loops
Use cached model flags (isValid, isSmoke, isInvisibleTrap, isGroundDetail,
disableAnimation, boundRadius) on M2Instance instead of models.find() in
the hot culling paths. Also complete cached flag initialization in
createInstanceWithMatrix().
2026-03-04 08:28:21 -08:00
Kelsi
30fa9836d9 Fix glow sprite flashing, move fadeAlpha to push constants, throttle character bones
- Glow sprites now use dedicated vertex buffer (glowVB_) separate from
  M2 particle buffer to prevent data race when renderM2Particles()
  overwrites glow data mid-flight
- Move fadeAlpha from shared material UBO to per-draw push constants,
  eliminating cross-instance alpha race on non-double-buffered UBOs
- Smooth adaptive render distance transitions to prevent pop-in/out
  at instance count thresholds (1000/2000)
- Distance-tiered character bone throttling: near (<30u) every frame,
  mid (30-60u) every 3rd, far (60-120u) every 6th frame
- Skip weapon instance animation updates (transforms set by parent bones)
2026-03-04 08:17:32 -08:00
10 changed files with 231 additions and 92 deletions

View file

@ -34,6 +34,7 @@ layout(location = 1) in vec3 Normal;
layout(location = 2) in vec2 TexCoord;
layout(location = 3) flat in vec3 InstanceOrigin;
layout(location = 4) in float ModelHeight;
layout(location = 5) in float vFadeAlpha;
layout(location = 0) out vec4 outColor;
@ -175,16 +176,16 @@ void main() {
float fogFactor = clamp((fogParams.y - dist) / (fogParams.y - fogParams.x), 0.0, 1.0);
result = mix(fogColor.rgb, result, fogFactor);
float outAlpha = texColor.a * fadeAlpha;
float outAlpha = texColor.a * vFadeAlpha;
// Cutout materials should not remain partially transparent after discard,
// otherwise foliage cards look view-dependent.
if (alphaTest != 0 || colorKeyBlack != 0) {
outAlpha = fadeAlpha;
outAlpha = vFadeAlpha;
}
// Foliage cutout should stay opaque after alpha discard to avoid
// view-angle translucency artifacts.
if (alphaTest == 2 || alphaTest == 3) {
outAlpha = 1.0 * fadeAlpha;
outAlpha = 1.0 * vFadeAlpha;
}
outColor = vec4(result, outAlpha);
}

Binary file not shown.

View file

@ -19,6 +19,7 @@ layout(push_constant) uniform Push {
int texCoordSet;
int useBones;
int isFoliage;
float fadeAlpha;
} push;
layout(set = 2, binding = 0) readonly buffer BoneSSBO {
@ -37,6 +38,7 @@ layout(location = 1) out vec3 Normal;
layout(location = 2) out vec2 TexCoord;
layout(location = 3) flat out vec3 InstanceOrigin;
layout(location = 4) out float ModelHeight;
layout(location = 5) out float vFadeAlpha;
void main() {
vec4 pos = vec4(aPos, 1.0);
@ -86,6 +88,7 @@ void main() {
InstanceOrigin = push.model[3].xyz;
ModelHeight = pos.z;
vFadeAlpha = push.fadeAlpha;
gl_Position = projection * view * worldPos;
}

Binary file not shown.

View file

@ -178,6 +178,9 @@ private:
bool hasOverrideModelMatrix = false;
glm::mat4 overrideModelMatrix{1.0f};
// Bone update throttling (skip frames for distant characters)
uint32_t boneUpdateCounter = 0;
// Per-instance bone SSBO (double-buffered per frame)
VkBuffer boneBuffer[2] = {};
VmaAllocation boneAlloc[2] = {};

View file

@ -179,6 +179,9 @@ struct M2Instance {
bool cachedDisableAnimation = false;
bool cachedIsSmoke = false;
bool cachedHasParticleEmitters = false;
bool cachedIsGroundDetail = false;
bool cachedIsInvisibleTrap = false;
bool cachedIsValid = false;
float cachedBoundRadius = 0.0f;
// Frame-skip optimization (update distant animations less frequently)
@ -371,6 +374,11 @@ private:
::VkBuffer m2ParticleVB_ = VK_NULL_HANDLE;
VmaAllocation m2ParticleVBAlloc_ = VK_NULL_HANDLE;
void* m2ParticleVBMapped_ = nullptr;
// Dedicated glow sprite vertex buffer (separate from particle VB to avoid data race)
static constexpr size_t MAX_GLOW_SPRITES = 2000;
::VkBuffer glowVB_ = VK_NULL_HANDLE;
VmaAllocation glowVBAlloc_ = VK_NULL_HANDLE;
void* glowVBMapped_ = nullptr;
std::unordered_map<uint32_t, M2ModelGPU> models;
std::vector<M2Instance> instances;
@ -477,6 +485,7 @@ private:
// Cached camera state from update() for frustum-culling bones
glm::vec3 cachedCamPos_ = glm::vec3(0.0f);
float cachedMaxRenderDistSq_ = 0.0f;
float smoothedRenderDist_ = 1000.0f; // Smoothed render distance to prevent flickering
// Thread count for parallel bone animation
uint32_t numAnimThreads_ = 1;

View file

@ -4790,6 +4790,77 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x
return;
}
// Per-instance hair/skin texture overrides — runs for ALL NPCs (including cached models)
// so that each NPC gets its own hair/skin color regardless of model sharing.
{
auto itDD = displayDataMap_.find(displayId);
if (itDD != displayDataMap_.end() && itDD->second.extraDisplayId != 0) {
auto itExtra2 = humanoidExtraMap_.find(itDD->second.extraDisplayId);
if (itExtra2 != humanoidExtraMap_.end()) {
const auto& extra = itExtra2->second;
const auto* md = charRenderer->getModelData(modelId);
if (md) {
auto charSectionsDbc2 = assetManager->loadDBC("CharSections.dbc");
if (charSectionsDbc2) {
const auto* csL = pipeline::getActiveDBCLayout()
? pipeline::getActiveDBCLayout()->getLayout("CharSections") : nullptr;
uint32_t tgtRace = static_cast<uint32_t>(extra.raceId);
uint32_t tgtSex = static_cast<uint32_t>(extra.sexId);
// Look up hair texture (section 3)
for (uint32_t r = 0; r < charSectionsDbc2->getRecordCount(); r++) {
uint32_t rId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["RaceID"] : 1);
uint32_t sId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["SexID"] : 2);
if (rId != tgtRace || sId != tgtSex) continue;
uint32_t sec = charSectionsDbc2->getUInt32(r, csL ? (*csL)["BaseSection"] : 3);
if (sec != 3) continue;
uint32_t var = charSectionsDbc2->getUInt32(r, csL ? (*csL)["VariationIndex"] : 4);
uint32_t col = charSectionsDbc2->getUInt32(r, csL ? (*csL)["ColorIndex"] : 5);
if (var != static_cast<uint32_t>(extra.hairStyleId)) continue;
if (col != static_cast<uint32_t>(extra.hairColorId)) continue;
std::string hairPath = charSectionsDbc2->getString(r, csL ? (*csL)["Texture1"] : 6);
if (!hairPath.empty()) {
rendering::VkTexture* hairTex = charRenderer->loadTexture(hairPath);
if (hairTex) {
for (size_t ti = 0; ti < md->textures.size(); ti++) {
if (md->textures[ti].type == 6) {
charRenderer->setTextureSlotOverride(instanceId, static_cast<uint16_t>(ti), hairTex);
}
}
}
}
break;
}
// Look up skin texture (section 0) for per-instance skin color
for (uint32_t r = 0; r < charSectionsDbc2->getRecordCount(); r++) {
uint32_t rId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["RaceID"] : 1);
uint32_t sId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["SexID"] : 2);
if (rId != tgtRace || sId != tgtSex) continue;
uint32_t sec = charSectionsDbc2->getUInt32(r, csL ? (*csL)["BaseSection"] : 3);
if (sec != 0) continue;
uint32_t col = charSectionsDbc2->getUInt32(r, csL ? (*csL)["ColorIndex"] : 5);
if (col != static_cast<uint32_t>(extra.skinId)) continue;
std::string skinPath = charSectionsDbc2->getString(r, csL ? (*csL)["Texture1"] : 6);
if (!skinPath.empty()) {
rendering::VkTexture* skinTex = charRenderer->loadTexture(skinPath);
if (skinTex) {
for (size_t ti = 0; ti < md->textures.size(); ti++) {
uint32_t tt = md->textures[ti].type;
if (tt == 1 || tt == 11) {
charRenderer->setTextureSlotOverride(instanceId, static_cast<uint16_t>(ti), skinTex);
}
}
}
}
break;
}
}
}
}
}
}
// Optional humanoid NPC geoset mask. Disabled by default because forcing geosets
// causes long-standing visual artifacts on some models (missing waist, phantom
// bracers, flickering apron overlays). Prefer model defaults.

View file

@ -1423,20 +1423,53 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
}
// Only update animations for nearby characters (performance optimization)
// Collect instances that need updates
// Collect instances that need bone recomputation, with distance-based throttling
std::vector<std::reference_wrapper<CharacterInstance>> toUpdate;
toUpdate.reserve(instances.size());
for (auto& pair : instances) {
float distSq = glm::distance2(pair.second.position, cameraPos);
if (distSq < animUpdateRadiusSq) {
toUpdate.push_back(std::ref(pair.second));
auto& inst = pair.second;
// Skip weapon instances — their transforms are set by parent bones
if (inst.hasOverrideModelMatrix) continue;
float distSq = glm::distance2(inst.position, cameraPos);
if (distSq >= animUpdateRadiusSq) continue;
// Always advance animation time (cheap)
auto modelIt = models.find(inst.modelId);
if (modelIt != models.end() && !modelIt->second.data.sequences.empty()) {
if (inst.currentSequenceIndex < 0) {
inst.currentSequenceIndex = 0;
inst.currentAnimationId = modelIt->second.data.sequences[0].id;
}
const auto& seq = modelIt->second.data.sequences[inst.currentSequenceIndex];
inst.animationTime += deltaTime * 1000.0f;
if (seq.duration > 0 && inst.animationTime >= static_cast<float>(seq.duration)) {
if (inst.animationLoop) {
inst.animationTime = std::fmod(inst.animationTime, static_cast<float>(seq.duration));
} else {
inst.animationTime = static_cast<float>(seq.duration);
}
}
}
// Distance-tiered bone throttling: near=every frame, mid=every 3rd, far=every 6th
uint32_t boneInterval = 1;
if (distSq > 60.0f * 60.0f) boneInterval = 6;
else if (distSq > 30.0f * 30.0f) boneInterval = 3;
inst.boneUpdateCounter++;
bool needsBones = (inst.boneUpdateCounter >= boneInterval) || inst.boneMatrices.empty();
if (needsBones) {
inst.boneUpdateCounter = 0;
toUpdate.push_back(std::ref(inst));
}
}
const size_t updatedCount = toUpdate.size();
// Thread animation updates in chunks to avoid spawning one task per instance.
// Thread bone matrix computation in chunks
if (updatedCount >= 8 && numAnimThreads_ > 1) {
static const size_t minAnimWorkPerThread = std::max<size_t>(
16, envSizeOrDefault("WOWEE_CHAR_ANIM_WORK_PER_THREAD", 64));
@ -1446,7 +1479,7 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
if (numThreads <= 1) {
for (auto& instRef : toUpdate) {
updateAnimation(instRef.get(), deltaTime);
calculateBoneMatrices(instRef.get());
}
} else {
const size_t chunkSize = updatedCount / numThreads;
@ -1461,9 +1494,9 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
for (size_t t = 0; t < numThreads; t++) {
size_t end = start + chunkSize + (t < remainder ? 1 : 0);
animFutures_.push_back(std::async(std::launch::async,
[this, &toUpdate, start, end, deltaTime]() {
[this, &toUpdate, start, end]() {
for (size_t i = start; i < end; i++) {
updateAnimation(toUpdate[i].get(), deltaTime);
calculateBoneMatrices(toUpdate[i].get());
}
}));
start = end;
@ -1474,9 +1507,8 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) {
}
}
} else {
// Sequential for small counts (avoid thread overhead)
for (auto& instRef : toUpdate) {
updateAnimation(instRef.get(), deltaTime);
calculateBoneMatrices(instRef.get());
}
}
@ -1548,13 +1580,12 @@ int CharacterRenderer::findKeyframeIndex(const std::vector<uint32_t>& timestamps
if (timestamps.empty()) return -1;
if (timestamps.size() == 1) return 0;
// Binary search for the keyframe bracket
for (size_t i = 0; i < timestamps.size() - 1; i++) {
if (time < static_cast<float>(timestamps[i + 1])) {
return static_cast<int>(i);
}
}
return static_cast<int>(timestamps.size() - 2);
// Binary search using float comparison to match original semantics exactly
auto it = std::upper_bound(timestamps.begin(), timestamps.end(), time,
[](float t, uint32_t ts) { return t < static_cast<float>(ts); });
if (it == timestamps.begin()) return 0;
size_t idx = static_cast<size_t>(it - timestamps.begin()) - 1;
return static_cast<int>(std::min(idx, timestamps.size() - 2));
}
glm::vec3 CharacterRenderer::interpolateVec3(const pipeline::M2AnimationTrack& track,
@ -1598,8 +1629,8 @@ glm::quat CharacterRenderer::interpolateQuat(const pipeline::M2AnimationTrack& t
if (keys.timestamps.empty() || keys.quatValues.empty()) return identity;
auto safeQuat = [&](const glm::quat& q) -> glm::quat {
float len = glm::length(q);
if (len < 0.001f || std::isnan(len)) return identity;
float lenSq = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
if (lenSq < 0.000001f || std::isnan(lenSq)) return identity;
return q;
};
@ -1741,9 +1772,14 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet,
float distSq = glm::dot(toInst, toInst);
if (distSq > renderRadiusSq) continue;
if (distSq > nearNoConeCullSq) {
float invDist = 1.0f / std::sqrt(distSq);
float facingDot = glm::dot(toInst, camForward) * invDist;
if (facingDot < backfaceDotCull) continue;
// Backface cull without sqrt: dot(toInst, camFwd) / |toInst| < threshold
// ⟺ dot < 0 || dot² < threshold² * distSq (when threshold < 0, dot must be negative)
float rawDot = glm::dot(toInst, camForward);
if (backfaceDotCull >= 0.0f) {
if (rawDot < 0.0f || rawDot * rawDot < backfaceDotCull * backfaceDotCull * distSq) continue;
} else {
if (rawDot < 0.0f && rawDot * rawDot > backfaceDotCull * backfaceDotCull * distSq) continue;
}
}
}

View file

@ -401,7 +401,7 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
VkPushConstantRange pushRange{};
pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
pushRange.offset = 0;
pushRange.size = 84; // mat4(64) + vec2(8) + int(4) + int(4) + int(4)
pushRange.size = 88; // mat4(64) + vec2(8) + int(4) + int(4) + int(4) + float(4)
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
ci.setLayoutCount = 3;
@ -591,6 +591,11 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout
bci.size = MAX_M2_PARTICLES * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &m2ParticleVB_, &m2ParticleVBAlloc_, &allocInfo);
m2ParticleVBMapped_ = allocInfo.pMappedData;
// Dedicated glow sprite buffer (separate from particle VB to avoid data race)
bci.size = MAX_GLOW_SPRITES * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &glowVB_, &glowVBAlloc_, &allocInfo);
glowVBMapped_ = allocInfo.pMappedData;
}
// --- Create white fallback texture ---
@ -689,6 +694,7 @@ void M2Renderer::shutdown() {
// Clean up particle buffers
if (smokeVB_) { vmaDestroyBuffer(alloc, smokeVB_, smokeVBAlloc_); smokeVB_ = VK_NULL_HANDLE; }
if (m2ParticleVB_) { vmaDestroyBuffer(alloc, m2ParticleVB_, m2ParticleVBAlloc_); m2ParticleVB_ = VK_NULL_HANDLE; }
if (glowVB_) { vmaDestroyBuffer(alloc, glowVB_, glowVBAlloc_); glowVB_ = VK_NULL_HANDLE; }
smokeParticles.clear();
// Destroy pipelines
@ -1611,6 +1617,9 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position,
instance.cachedIsSmoke = mdlRef.isSmoke;
instance.cachedHasParticleEmitters = !mdlRef.particleEmitters.empty();
instance.cachedBoundRadius = mdlRef.boundRadius;
instance.cachedIsGroundDetail = mdlRef.isGroundDetail;
instance.cachedIsInvisibleTrap = mdlRef.isInvisibleTrap;
instance.cachedIsValid = mdlRef.isValid();
// Initialize animation: play first sequence (usually Stand/Idle)
const auto& mdl = mdlRef;
@ -1685,6 +1694,9 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
instance.cachedIsSmoke = mdl2.isSmoke;
instance.cachedHasParticleEmitters = !mdl2.particleEmitters.empty();
instance.cachedBoundRadius = mdl2.boundRadius;
instance.cachedIsGroundDetail = mdl2.isGroundDetail;
instance.cachedIsInvisibleTrap = mdl2.isInvisibleTrap;
instance.cachedIsValid = mdl2.isValid();
// Initialize animation
if (mdl2.hasAnimation && !mdl2.disableAnimation && !mdl2.sequences.empty()) {
@ -1729,12 +1741,12 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4&
static int findKeyframeIndex(const std::vector<uint32_t>& timestamps, float time) {
if (timestamps.empty()) return -1;
if (timestamps.size() == 1) return 0;
for (size_t i = 0; i < timestamps.size() - 1; i++) {
if (time < static_cast<float>(timestamps[i + 1])) {
return static_cast<int>(i);
}
}
return static_cast<int>(timestamps.size() - 2);
// Binary search using float comparison to match original semantics exactly
auto it = std::upper_bound(timestamps.begin(), timestamps.end(), time,
[](float t, uint32_t ts) { return t < static_cast<float>(ts); });
if (it == timestamps.begin()) return 0;
size_t idx = static_cast<size_t>(it - timestamps.begin()) - 1;
return static_cast<int>(std::min(idx, timestamps.size() - 2));
}
// Resolve sequence index and time for a track, handling global sequences.
@ -1791,8 +1803,8 @@ static glm::quat interpQuat(const pipeline::M2AnimationTrack& track,
const auto& keys = track.sequences[si];
if (keys.timestamps.empty() || keys.quatValues.empty()) return identity;
auto safe = [&](const glm::quat& q) -> glm::quat {
float len = glm::length(q);
if (len < 0.001f || std::isnan(len)) return identity;
float lenSq = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
if (lenSq < 0.000001f || std::isnan(lenSq)) return identity;
return q;
};
if (keys.quatValues.size() == 1) return safe(keys.quatValues[0]);
@ -1895,21 +1907,23 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
smokeEmitAccum = 0.0f;
}
// --- Update existing smoke particles ---
for (auto it = smokeParticles.begin(); it != smokeParticles.end(); ) {
it->life += deltaTime;
if (it->life >= it->maxLife) {
it = smokeParticles.erase(it);
// --- Update existing smoke particles (swap-and-pop for O(1) removal) ---
for (size_t i = 0; i < smokeParticles.size(); ) {
auto& p = smokeParticles[i];
p.life += deltaTime;
if (p.life >= p.maxLife) {
smokeParticles[i] = smokeParticles.back();
smokeParticles.pop_back();
continue;
}
it->position += it->velocity * deltaTime;
it->velocity.z *= 0.98f; // Slight deceleration
it->velocity.x += distDrift(smokeRng) * deltaTime;
it->velocity.y += distDrift(smokeRng) * deltaTime;
p.position += p.velocity * deltaTime;
p.velocity.z *= 0.98f; // Slight deceleration
p.velocity.x += distDrift(smokeRng) * deltaTime;
p.velocity.y += distDrift(smokeRng) * deltaTime;
// Grow from 1.0 to 3.5 over lifetime
float t = it->life / it->maxLife;
it->size = 1.0f + t * 2.5f;
++it;
float t = p.life / p.maxLife;
p.size = 1.0f + t * 2.5f;
++i;
}
// --- Normal M2 animation update ---
@ -2104,10 +2118,16 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
lastDrawCallCount = 0;
// Adaptive render distance: tiered by instance density to cap draw calls
const float maxRenderDistance = (instances.size() > 2000) ? 300.0f
: (instances.size() > 1000) ? 500.0f
: 1000.0f;
// Adaptive render distance: smoothed to prevent pop-in/pop-out flickering
const float targetRenderDist = (instances.size() > 2000) ? 300.0f
: (instances.size() > 1000) ? 500.0f
: 1000.0f;
// Smooth transitions: shrink slowly (avoid popping out nearby objects)
const float shrinkRate = 0.005f; // very slow decrease
const float growRate = 0.05f; // faster increase
float blendRate = (targetRenderDist < smoothedRenderDist_) ? shrinkRate : growRate;
smoothedRenderDist_ = glm::mix(smoothedRenderDist_, targetRenderDist, blendRate);
const float maxRenderDistance = smoothedRenderDist_;
const float maxRenderDistanceSq = maxRenderDistance * maxRenderDistance;
const float fadeStartFraction = 0.75f;
const glm::vec3 camPos = camera.getPosition();
@ -2127,34 +2147,29 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
for (uint32_t i = 0; i < static_cast<uint32_t>(instances.size()); ++i) {
const auto& instance = instances[i];
// Fast early rejection: skip instances that are definitely too far
// Use cached model flags — no hash lookup needed
if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
glm::vec3 toCam = instance.position - camPos;
float distSq = glm::dot(toCam, toCam);
if (distSq > maxPossibleDistSq) continue; // Early out before model lookup
if (distSq > maxPossibleDistSq) continue;
auto it = models.find(instance.modelId);
if (it == models.end()) continue;
const M2ModelGPU& model = it->second;
if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue;
float worldRadius = model.boundRadius * instance.scale;
float worldRadius = instance.cachedBoundRadius * instance.scale;
float cullRadius = worldRadius;
if (model.disableAnimation) {
if (instance.cachedDisableAnimation) {
cullRadius = std::max(cullRadius, 3.0f);
}
float effectiveMaxDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f);
if (model.disableAnimation) {
if (instance.cachedDisableAnimation) {
effectiveMaxDistSq *= 2.6f;
}
if (model.isGroundDetail) {
// Keep clutter local so distant grass doesn't overdraw the scene.
if (instance.cachedIsGroundDetail) {
effectiveMaxDistSq *= 0.75f;
}
// Removed aggressive small-object distance caps to prevent city pop-out
// Small props (barrels, lanterns, etc.) now use same distance as larger objects
if (distSq > effectiveMaxDistSq) continue;
// Frustum cull with moderate padding to prevent edge pop-out during camera rotation
// Reduced from 2.5x to 1.5x for better performance
// Frustum cull with padding
float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f);
if (cullRadius > 0.0f && !frustum.intersectsSphere(instance.position, paddedRadius)) continue;
@ -2179,6 +2194,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
int texCoordSet;
int useBones;
int isFoliage;
float fadeAlpha;
};
// Bind per-frame descriptor set (set 0) — shared across all draws
@ -2268,12 +2284,11 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
}
}
// LOD selection based on distance
float dist = std::sqrt(entry.distSq);
// LOD selection based on squared distance (avoid sqrt)
uint16_t desiredLOD = 0;
if (dist > 150.0f) desiredLOD = 3;
else if (dist > 80.0f) desiredLOD = 2;
else if (dist > 40.0f) desiredLOD = 1;
if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3;
else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2;
else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1;
uint16_t targetLOD = desiredLOD;
if (desiredLOD > 0) {
@ -2390,10 +2405,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
currentPipeline = desiredPipeline;
}
// Update material UBO with per-draw dynamic values (fadeAlpha, interiorDarken)
// Update material UBO with per-draw dynamic values (interiorDarken, forceCutout overrides)
// Note: fadeAlpha is in push constants (per-draw) to avoid shared-UBO race
if (batch.materialUBOMapped) {
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
mat->fadeAlpha = instanceFadeAlpha;
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
if (batch.colorKeyBlack) {
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
@ -2419,6 +2434,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
pc.texCoordSet = static_cast<int>(batch.textureUnit);
pc.useBones = useBones ? 1 : 0;
pc.isFoliage = model.shadowWindFoliage ? 1 : 0;
pc.fadeAlpha = instanceFadeAlpha;
vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc);
vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
@ -2427,7 +2443,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
}
// Render glow sprites as billboarded additive point lights
if (!glowSprites_.empty() && particleAdditivePipeline_ && m2ParticleVB_ && glowTexDescSet_) {
if (!glowSprites_.empty() && particleAdditivePipeline_ && glowVB_ && glowTexDescSet_) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, particleAdditivePipeline_);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
particlePipelineLayout_, 0, 1, &perFrameSet, 0, nullptr);
@ -2439,26 +2455,24 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
vkCmdPushConstants(cmd, particlePipelineLayout_, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
sizeof(particlePush), &particlePush);
// Build and upload vertex data
std::vector<float> glowData;
glowData.reserve(glowSprites_.size() * 9);
for (const auto& gs : glowSprites_) {
glowData.push_back(gs.worldPos.x);
glowData.push_back(gs.worldPos.y);
glowData.push_back(gs.worldPos.z);
glowData.push_back(gs.color.r);
glowData.push_back(gs.color.g);
glowData.push_back(gs.color.b);
glowData.push_back(gs.color.a);
glowData.push_back(gs.size);
glowData.push_back(0.0f);
// Write glow vertex data directly to mapped buffer (no temp vector)
size_t uploadCount = std::min(glowSprites_.size(), MAX_GLOW_SPRITES);
float* dst = static_cast<float*>(glowVBMapped_);
for (size_t gi = 0; gi < uploadCount; gi++) {
const auto& gs = glowSprites_[gi];
*dst++ = gs.worldPos.x;
*dst++ = gs.worldPos.y;
*dst++ = gs.worldPos.z;
*dst++ = gs.color.r;
*dst++ = gs.color.g;
*dst++ = gs.color.b;
*dst++ = gs.color.a;
*dst++ = gs.size;
*dst++ = 0.0f;
}
size_t uploadCount = std::min(glowSprites_.size(), MAX_M2_PARTICLES);
memcpy(m2ParticleVBMapped_, glowData.data(), uploadCount * 9 * sizeof(float));
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &m2ParticleVB_, &offset);
vkCmdBindVertexBuffers(cmd, 0, 1, &glowVB_, &offset);
vkCmdDraw(cmd, static_cast<uint32_t>(uploadCount), 1, 0, 0);
}
@ -2737,6 +2751,9 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa
const M2ModelGPU* currentModel = nullptr;
for (const auto& instance : instances) {
// Use cached flags to skip early without hash lookup
if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue;
// Distance cull against shadow frustum
glm::vec3 diff = instance.position - shadowCenter;
if (glm::dot(diff, diff) > shadowRadiusSq) continue;
@ -2744,7 +2761,6 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa
auto modelIt = models.find(instance.modelId);
if (modelIt == models.end()) continue;
const M2ModelGPU& model = modelIt->second;
if (!model.isValid() || model.isSmoke || model.isInvisibleTrap) continue;
// Filter: only draw foliage models in foliage pass, non-foliage in non-foliage pass
if (model.shadowWindFoliage != foliagePass) continue;

View file

@ -536,7 +536,7 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) {
// Flag 0x80 on INDOOR groups in large WMOs = interior cathedral shell
bool hasFlag80 = (wmoGroup.flags & 0x80) != 0;
bool isIndoor = (wmoGroup.flags & 0x2000) != 0;
if (nVerts < 100 || (alwaysDraw && nVerts < 5000) || (isFacade && isLargeWmo) || (isCityShell && isLargeWmo) || (hasFlag80 && isIndoor && isLargeWmo)) {
if ((nVerts < 100 && isLargeWmo) || (alwaysDraw && nVerts < 5000) || (isFacade && isLargeWmo) || (isCityShell && isLargeWmo) || (hasFlag80 && isIndoor && isLargeWmo)) {
resources.isLOD = true;
}
modelData.groups.push_back(resources);