Fix main-thread hang from terrain finalization; two-pass M2 rendering; tile streaming improvements

Hang/GPU device lost fix:
- M2_INSTANCES and WMO_INSTANCES finalization phases now create instances
  incrementally (32 per step / 4 per step) instead of all at once, eliminating
  the >1s main-thread stalls that caused GPU fence timeouts and device loss

M2 two-pass transparent rendering:
- Opaque/alpha-test batches render in pass 1, transparent/additive in pass 2
  (back-to-front sorted) to fix wing transparency showing terrain instead of
  trees — adds hasTransparentBatches flag to skip models with no transparency

Tile streaming improvements:
- Sort new load queue entries nearest-first so critical tiles load before
  distant ones during fast taxi flight
- Increase taxi load radius 6→8 tiles, unload 9→12 for better coverage

Water refraction gated on FSR:
- Disable water refraction when FSR is not active (bugged without upscaling)
- Auto-disable refraction if FSR is turned off while refraction was on
This commit is contained in:
Kelsi 2026-03-09 20:58:49 -07:00
parent a49c013c89
commit e0d47040d3
6 changed files with 234 additions and 38 deletions

View file

@ -122,6 +122,7 @@ struct M2ModelGPU {
bool isKoboldFlame = false; // Model name matches kobold+(candle/torch/mine) (precomputed)
bool isLavaModel = false; // Model name contains lava/molten/magma (UV scroll fallback)
bool hasTextureAnimation = false; // True if any batch has UV animation
bool hasTransparentBatches = false; // True if any batch uses alpha-blend or additive (blendMode >= 2)
uint8_t availableLODs = 0; // Bitmask: bit N set if any batch has submeshLevel==N
// Particle emitter data (kept from M2Model)

View file

@ -152,9 +152,11 @@ struct FinalizingTile {
FinalizationPhase phase = FinalizationPhase::TERRAIN;
// Progress indices within current phase
size_t m2ModelIndex = 0; // Next M2 model to upload
size_t wmoModelIndex = 0; // Next WMO model to upload
size_t wmoDoodadIndex = 0; // Next WMO doodad to upload
size_t m2ModelIndex = 0; // Next M2 model to upload
size_t m2InstanceIndex = 0; // Next M2 placement to instantiate
size_t wmoModelIndex = 0; // Next WMO model to upload
size_t wmoInstanceIndex = 0; // Next WMO placement to instantiate
size_t wmoDoodadIndex = 0; // Next WMO doodad to upload
// Incremental terrain upload state (splits TERRAIN phase across frames)
bool terrainPreloaded = false; // True after preloaded textures uploaded

View file

@ -1108,8 +1108,8 @@ void Application::update(float deltaTime) {
// Taxi flights move fast (32 u/s) — load further ahead so terrain is ready
// before the camera arrives. Keep updates frequent to spot new tiles early.
renderer->getTerrainManager()->setUpdateInterval(onTaxi ? 0.033f : 0.033f);
renderer->getTerrainManager()->setLoadRadius(onTaxi ? 6 : 4);
renderer->getTerrainManager()->setUnloadRadius(onTaxi ? 9 : 7);
renderer->getTerrainManager()->setLoadRadius(onTaxi ? 8 : 4);
renderer->getTerrainManager()->setUnloadRadius(onTaxi ? 12 : 7);
renderer->getTerrainManager()->setTaxiStreamingMode(onTaxi);
}
lastTaxiFlight_ = onTaxi;

View file

@ -1357,6 +1357,7 @@ bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) {
if (batch.materialIndex < model.materials.size()) {
bgpu.blendMode = model.materials[batch.materialIndex].blendMode;
bgpu.materialFlags = model.materials[batch.materialIndex].flags;
if (bgpu.blendMode >= 2) gpuModel.hasTransparentBatches = true;
}
// Copy LOD level from batch
@ -2349,7 +2350,11 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
sortedVisible_.push_back({i, instance.modelId, distSq, effectiveMaxDistSq});
}
// Sort by modelId to minimize vertex/index buffer rebinds
// Two-pass rendering: opaque/alpha-test first (depth write ON), then transparent/additive
// (depth write OFF, sorted back-to-front) so transparent geometry composites correctly
// against all opaque geometry rather than only against what was rendered before it.
// Pass 1: sort by modelId for minimum buffer rebinds (opaque batches)
std::sort(sortedVisible_.begin(), sortedVisible_.end(),
[](const VisibleEntry& a, const VisibleEntry& b) { return a.modelId < b.modelId; });
@ -2377,6 +2382,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
// Start with opaque pipeline
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, opaquePipeline_);
currentPipeline = opaquePipeline_;
bool opaquePass = true; // Pass 1 = opaque, pass 2 = transparent (set below for second pass)
for (const auto& entry : sortedVisible_) {
if (entry.index >= instances.size()) continue;
@ -2475,6 +2481,15 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
if (!model.isGroundDetail && batch.submeshLevel != targetLOD) continue;
if (batch.batchOpacity < 0.01f) continue;
// Two-pass gate: pass 1 = opaque/cutout only, pass 2 = transparent/additive only.
// Alpha-test (blendMode==1) and spell effects that force-additive are handled
// by their effective blend mode below; gate on raw blendMode here.
{
const bool rawTransparent = (batch.blendMode >= 2) || model.isSpellEffect;
if (opaquePass && rawTransparent) continue; // skip transparent in opaque pass
if (!opaquePass && !rawTransparent) continue; // skip opaque in transparent pass
}
const bool koboldFlameCard = batch.colorKeyBlack && model.isKoboldFlame;
const bool smallCardLikeBatch =
(batch.glowSize <= 1.35f) ||
@ -2628,6 +2643,163 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
}
}
// Pass 2: transparent/additive batches — sort back-to-front by distance so
// overlapping transparent geometry composites in the correct painter's order.
opaquePass = false;
std::sort(sortedVisible_.begin(), sortedVisible_.end(),
[](const VisibleEntry& a, const VisibleEntry& b) { return a.distSq > b.distSq; });
currentModelId = UINT32_MAX;
currentModel = nullptr;
// Reset pipeline to opaque so the first transparent bind always sets explicitly
currentPipeline = opaquePipeline_;
for (const auto& entry : sortedVisible_) {
if (entry.index >= instances.size()) continue;
auto& instance = instances[entry.index];
// Quick skip: if model has no transparent batches at all, skip it entirely
if (entry.modelId != currentModelId) {
auto mdlIt = models.find(entry.modelId);
if (mdlIt == models.end()) continue;
if (!mdlIt->second.hasTransparentBatches && !mdlIt->second.isSpellEffect) continue;
}
// Reuse the same rendering logic as pass 1 (via fallthrough — the batch gate
// `!opaquePass && !rawTransparent → continue` handles opaque skipping)
if (entry.modelId != currentModelId) {
currentModelId = entry.modelId;
auto mdlIt = models.find(currentModelId);
if (mdlIt == models.end()) continue;
currentModel = &mdlIt->second;
if (!currentModel->vertexBuffer) continue;
VkDeviceSize offset = 0;
vkCmdBindVertexBuffers(cmd, 0, 1, &currentModel->vertexBuffer, &offset);
vkCmdBindIndexBuffer(cmd, currentModel->indexBuffer, 0, VK_INDEX_TYPE_UINT16);
}
const M2ModelGPU& model = *currentModel;
// Distance-based fade alpha (same as pass 1)
float fadeAlpha = 1.0f;
float fadeFrac = model.disableAnimation ? 0.55f : fadeStartFraction;
float fadeStartDistSq = entry.effectiveMaxDistSq * fadeFrac * fadeFrac;
if (entry.distSq > fadeStartDistSq) {
fadeAlpha = std::clamp((entry.effectiveMaxDistSq - entry.distSq) /
(entry.effectiveMaxDistSq - fadeStartDistSq), 0.0f, 1.0f);
}
float instanceFadeAlpha = fadeAlpha;
if (model.isGroundDetail) instanceFadeAlpha *= 0.82f;
if (model.isInstancePortal) instanceFadeAlpha *= 0.12f;
bool modelNeedsAnimation = model.hasAnimation && !model.disableAnimation;
if (modelNeedsAnimation && instance.boneMatrices.empty()) continue;
bool needsBones = modelNeedsAnimation && !instance.boneMatrices.empty();
if (needsBones && (!instance.boneBuffer[frameIndex] || !instance.boneSet[frameIndex])) continue;
bool useBones = needsBones;
if (useBones && instance.boneSet[frameIndex]) {
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipelineLayout_, 2, 1, &instance.boneSet[frameIndex], 0, nullptr);
}
uint16_t desiredLOD = 0;
if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3;
else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2;
else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1;
uint16_t targetLOD = desiredLOD;
if (desiredLOD > 0 && !(model.availableLODs & (1u << desiredLOD))) targetLOD = 0;
const bool foliageLikeModel = model.isFoliageLike;
const bool particleDominantEffect = model.isSpellEffect &&
!model.particleEmitters.empty() && model.batches.size() <= 2;
for (const auto& batch : model.batches) {
if (batch.indexCount == 0) continue;
if (!model.isGroundDetail && batch.submeshLevel != targetLOD) continue;
if (batch.batchOpacity < 0.01f) continue;
// Pass 2 gate: only transparent/additive batches
{
const bool rawTransparent = (batch.blendMode >= 2) || model.isSpellEffect;
if (!rawTransparent) continue;
}
// Skip glow sprites (handled after loop)
const bool batchUnlit = (batch.materialFlags & 0x01) != 0;
const bool shouldUseGlowSprite =
!batch.colorKeyBlack &&
(model.isElvenLike || model.isLanternLike) &&
!model.isSpellEffect &&
(batch.glowSize <= 1.35f || (batch.lanternGlowHint && batch.glowSize <= 6.0f)) &&
(batch.lanternGlowHint || (batch.blendMode >= 3) ||
(batch.colorKeyBlack && batchUnlit && batch.blendMode >= 1));
if (shouldUseGlowSprite) {
const bool cardLikeSkipMesh = (batch.blendMode >= 3) || batch.colorKeyBlack || batchUnlit;
if ((batch.glowCardLike && model.isLanternLike) || (cardLikeSkipMesh && !model.isLanternLike))
continue;
}
glm::vec2 uvOffset(0.0f, 0.0f);
if (batch.textureAnimIndex != 0xFFFF && model.hasTextureAnimation) {
uint16_t lookupIdx = batch.textureAnimIndex;
if (lookupIdx < model.textureTransformLookup.size()) {
uint16_t transformIdx = model.textureTransformLookup[lookupIdx];
if (transformIdx < model.textureTransforms.size()) {
const auto& tt = model.textureTransforms[transformIdx];
glm::vec3 trans = interpVec3(tt.translation,
instance.currentSequenceIndex, instance.animTime,
glm::vec3(0.0f), model.globalSequenceDurations);
uvOffset = glm::vec2(trans.x, trans.y);
}
}
}
if (model.isLavaModel && uvOffset == glm::vec2(0.0f)) {
static auto startTime2 = std::chrono::steady_clock::now();
float t = std::chrono::duration<float>(std::chrono::steady_clock::now() - startTime2).count();
uvOffset = glm::vec2(t * 0.03f, -t * 0.08f);
}
uint8_t effectiveBlendMode = batch.blendMode;
if (model.isSpellEffect) {
if (effectiveBlendMode <= 1) effectiveBlendMode = 3;
else if (effectiveBlendMode == 4 || effectiveBlendMode == 5) effectiveBlendMode = 3;
}
VkPipeline desiredPipeline;
switch (effectiveBlendMode) {
case 2: desiredPipeline = alphaPipeline_; break;
default: desiredPipeline = additivePipeline_; break;
}
if (desiredPipeline != currentPipeline) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, desiredPipeline);
currentPipeline = desiredPipeline;
}
if (batch.materialUBOMapped) {
auto* mat = static_cast<M2MaterialUBO*>(batch.materialUBOMapped);
mat->interiorDarken = insideInterior ? 1.0f : 0.0f;
if (batch.colorKeyBlack)
mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f;
}
if (!batch.materialSet) continue;
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipelineLayout_, 1, 1, &batch.materialSet, 0, nullptr);
M2PushConstants pc;
pc.model = instance.modelMatrix;
pc.uvOffset = uvOffset;
pc.texCoordSet = static_cast<int>(batch.textureUnit);
pc.useBones = useBones ? 1 : 0;
pc.isFoliage = model.shadowWindFoliage ? 1 : 0;
pc.fadeAlpha = instanceFadeAlpha;
if (particleDominantEffect) continue; // emission-only mesh
vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc);
vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0);
lastDrawCallCount++;
}
}
// Render glow sprites as billboarded additive point lights
if (!glowSprites_.empty() && particleAdditivePipeline_ && glowVB_ && glowTexDescSet_) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, particleAdditivePipeline_);

View file

@ -885,13 +885,15 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
}
case FinalizationPhase::M2_INSTANCES: {
// Create all M2 instances (lightweight struct allocation, no GPU work)
if (m2Renderer) {
int loadedDoodads = 0;
int skippedDedup = 0;
for (const auto& p : pending->m2Placements) {
// Create M2 instances incrementally to avoid main-thread stalls.
// createInstance includes an O(n) bone-sibling scan that becomes expensive
// on dense tiles with many placements and a large existing instance list.
if (m2Renderer && ft.m2InstanceIndex < pending->m2Placements.size()) {
constexpr size_t kInstancesPerStep = 32;
size_t created = 0;
while (ft.m2InstanceIndex < pending->m2Placements.size() && created < kInstancesPerStep) {
const auto& p = pending->m2Placements[ft.m2InstanceIndex++];
if (p.uniqueId != 0 && placedDoodadIds.count(p.uniqueId)) {
skippedDedup++;
continue;
}
uint32_t instId = m2Renderer->createInstance(p.modelId, p.position, p.rotation, p.scale);
@ -901,12 +903,14 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
placedDoodadIds.insert(p.uniqueId);
ft.tileUniqueIds.push_back(p.uniqueId);
}
loadedDoodads++;
created++;
}
}
if (ft.m2InstanceIndex < pending->m2Placements.size()) {
return false; // More instances to create — yield
}
LOG_DEBUG(" Loaded doodads for tile [", x, ",", y, "]: ",
loadedDoodads, " instances (", ft.uploadedM2ModelIds.size(), " new models, ",
skippedDedup, " dedup skipped)");
ft.m2InstanceIds.size(), " instances (", ft.uploadedM2ModelIds.size(), " new models)");
}
ft.phase = FinalizationPhase::WMO_MODELS;
return false;
@ -948,17 +952,15 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
}
case FinalizationPhase::WMO_INSTANCES: {
// Create all WMO instances + load WMO liquids
if (wmoRenderer) {
int loadedWMOs = 0;
int loadedLiquids = 0;
int skippedWmoDedup = 0;
for (auto& wmoReady : pending->wmoModels) {
// Create WMO instances incrementally to avoid stalls on tiles with many WMOs.
if (wmoRenderer && ft.wmoInstanceIndex < pending->wmoModels.size()) {
constexpr size_t kWmoInstancesPerStep = 4;
size_t created = 0;
while (ft.wmoInstanceIndex < pending->wmoModels.size() && created < kWmoInstancesPerStep) {
auto& wmoReady = pending->wmoModels[ft.wmoInstanceIndex++];
if (wmoReady.uniqueId != 0 && placedWmoIds.count(wmoReady.uniqueId)) {
skippedWmoDedup++;
continue;
}
uint32_t wmoInstId = wmoRenderer->createInstance(wmoReady.modelId, wmoReady.position, wmoReady.rotation);
if (wmoInstId) {
ft.wmoInstanceIds.push_back(wmoInstId);
@ -966,8 +968,6 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
placedWmoIds.insert(wmoReady.uniqueId);
ft.tileWmoUniqueIds.push_back(wmoReady.uniqueId);
}
loadedWMOs++;
// Load WMO liquids (canals, pools, etc.)
if (waterRenderer) {
glm::mat4 modelMatrix = glm::mat4(1.0f);
@ -977,25 +977,21 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
modelMatrix = glm::rotate(modelMatrix, wmoReady.rotation.x, glm::vec3(1.0f, 0.0f, 0.0f));
for (const auto& group : wmoReady.model.groups) {
if (!group.liquid.hasLiquid()) continue;
// Skip interior water/ocean but keep magma/slime (e.g. Ironforge lava)
if (group.flags & 0x2000) {
uint16_t lt = group.liquid.materialId;
uint8_t basicType = (lt == 0) ? 0 : ((lt - 1) % 4);
if (basicType < 2) continue;
}
waterRenderer->loadFromWMO(group.liquid, modelMatrix, wmoInstId);
loadedLiquids++;
}
}
created++;
}
}
if (loadedWMOs > 0 || skippedWmoDedup > 0) {
LOG_DEBUG(" Loaded WMOs for tile [", x, ",", y, "]: ",
loadedWMOs, " instances, ", skippedWmoDedup, " dedup skipped");
}
if (loadedLiquids > 0) {
LOG_DEBUG(" Loaded WMO liquids for tile [", x, ",", y, "]: ", loadedLiquids);
if (ft.wmoInstanceIndex < pending->wmoModels.size()) {
return false; // More WMO instances to create — yield
}
LOG_DEBUG(" Loaded WMOs for tile [", x, ",", y, "]: ", ft.wmoInstanceIds.size(), " instances");
}
ft.phase = FinalizationPhase::WMO_DOODADS;
return false;
@ -2213,10 +2209,16 @@ void TerrainManager::streamTiles() {
return false;
};
// Enqueue tiles in radius around current tile for async loading
// Enqueue tiles in radius around current tile for async loading.
// Collect all newly-needed tiles, then sort by distance so the closest
// (most visible) tiles get loaded first. This is critical during taxi
// flight where new tiles enter the radius faster than they can load.
{
std::lock_guard<std::mutex> lock(queueMutex);
struct PendingEntry { TileCoord coord; int distSq; };
std::vector<PendingEntry> newTiles;
for (int dy = -loadRadius; dy <= loadRadius; dy++) {
for (int dx = -loadRadius; dx <= loadRadius; dx++) {
int tileX = currentTile.x + dx;
@ -2240,10 +2242,19 @@ void TerrainManager::streamTiles() {
if (failedTiles.find(coord) != failedTiles.end()) continue;
if (shouldSkipMissingAdt(coord)) continue;
loadQueue.push_back(coord);
newTiles.push_back({coord, dx*dx + dy*dy});
pendingTiles[coord] = true;
}
}
// Sort nearest tiles first so workers service the most visible tiles
std::sort(newTiles.begin(), newTiles.end(),
[](const PendingEntry& a, const PendingEntry& b) { return a.distSq < b.distSq; });
// Insert at front so new close tiles preempt any distant tiles already queued
for (auto it = newTiles.rbegin(); it != newTiles.rend(); ++it) {
loadQueue.push_front(it->coord);
}
}
// Notify workers that there's work

View file

@ -7184,9 +7184,19 @@ void GameScreen::renderSettingsWindow() {
saveSettings();
}
}
if (ImGui::Checkbox("Water Refraction", &pendingWaterRefraction)) {
if (renderer) renderer->setWaterRefractionEnabled(pendingWaterRefraction);
saveSettings();
{
bool fsrActive = renderer && (renderer->isFSREnabled() || renderer->isFSR2Enabled());
if (!fsrActive && pendingWaterRefraction) {
// FSR was disabled while refraction was on — auto-disable
pendingWaterRefraction = false;
if (renderer) renderer->setWaterRefractionEnabled(false);
}
if (!fsrActive) ImGui::BeginDisabled();
if (ImGui::Checkbox("Water Refraction (requires FSR)", &pendingWaterRefraction)) {
if (renderer) renderer->setWaterRefractionEnabled(pendingWaterRefraction);
saveSettings();
}
if (!fsrActive) ImGui::EndDisabled();
}
{
const char* aaLabels[] = { "Off", "2x MSAA", "4x MSAA", "8x MSAA" };