From 86505ad377cd1dc5f068c32f6dffb9fb40e3ffb2 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Wed, 25 Feb 2026 03:39:45 -0800 Subject: [PATCH] Merge per-chunk water surfaces, restore incremental tile finalization, and pin main thread CPU affinity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Water deduplication: merge per-chunk water surfaces into per-tile surfaces to reduce Vulkan descriptor set usage from ~8900 to ~100-200. Uses hybrid approach — groups with ≤4 chunks stay per-chunk (preserving shore detail), larger groups merge into 128×128 tile-wide surfaces. Re-add incremental tile finalization state machine (reverted in 9b90ab0) to spread GPU uploads across frames and prevent city stuttering. Pin main thread to CPU core 0 and exclude worker threads from core 0 to reduce scheduling jitter on the render/game loop. --- include/rendering/terrain_manager.hpp | 56 ++- include/rendering/water_renderer.hpp | 2 +- src/core/application.cpp | 26 ++ src/rendering/terrain_manager.cpp | 536 ++++++++++++++------------ src/rendering/water_renderer.cpp | 323 +++++++++++++--- 5 files changed, 629 insertions(+), 314 deletions(-) diff --git a/include/rendering/terrain_manager.hpp b/include/rendering/terrain_manager.hpp index 7d4d21b7..da99ed53 100644 --- a/include/rendering/terrain_manager.hpp +++ b/include/rendering/terrain_manager.hpp @@ -123,6 +123,41 @@ struct PendingTile { std::unordered_map preloadedTextures; }; +/** + * Phases for incremental tile finalization (one bounded unit of work per call) + */ +enum class FinalizationPhase { + TERRAIN, // Upload terrain mesh + textures + water + M2_MODELS, // Upload ONE M2 model per call + M2_INSTANCES, // Create all M2 instances (lightweight struct allocation) + WMO_MODELS, // Upload ONE WMO model per call + WMO_INSTANCES, // Create all WMO instances + load WMO liquids + WMO_DOODADS, // Upload ONE WMO doodad M2 per call + WATER, // Generate water ambient emitters + AMBIENT, // Register ambient emitters + commit tile + DONE // Fully finalized +}; + +/** + * In-progress tile finalization state — tracks progress across frames + */ +struct FinalizingTile { + std::shared_ptr pending; + FinalizationPhase phase = FinalizationPhase::TERRAIN; + + // Progress indices within current phase + size_t m2ModelIndex = 0; // Next M2 model to upload + size_t wmoModelIndex = 0; // Next WMO model to upload + size_t wmoDoodadIndex = 0; // Next WMO doodad to upload + + // Accumulated results (built up across phases) + std::vector m2InstanceIds; + std::vector wmoInstanceIds; + std::vector tileUniqueIds; + std::vector tileWmoUniqueIds; + std::unordered_set uploadedM2ModelIds; +}; + /** * Terrain manager for multi-tile terrain streaming * @@ -219,8 +254,8 @@ public: int getLoadedTileCount() const { return static_cast(loadedTiles.size()); } int getPendingTileCount() const { return static_cast(pendingTiles.size()); } int getReadyQueueCount() const { return static_cast(readyQueue.size()); } - /** Total unfinished tiles (worker threads + ready queue) */ - int getRemainingTileCount() const { return static_cast(pendingTiles.size() + readyQueue.size()); } + /** Total unfinished tiles (worker threads + ready queue + finalizing) */ + int getRemainingTileCount() const { return static_cast(pendingTiles.size() + readyQueue.size() + finalizingTiles_.size()); } TileCoord getCurrentTile() const { return currentTile; } /** Process all ready tiles immediately (use during loading screens) */ @@ -254,9 +289,10 @@ private: std::shared_ptr prepareTile(int x, int y); /** - * Main thread: upload prepared tile data to GPU + * Advance incremental finalization of a tile (one bounded unit of work). + * Returns true when the tile is fully finalized (phase == DONE). */ - void finalizeTile(const std::shared_ptr& pending); + bool advanceFinalization(FinalizingTile& ft); /** * Background worker thread loop @@ -341,16 +377,8 @@ private: // Dedup set for WMO placements across tile boundaries (prevents rendering Stormwind 16x) std::unordered_set placedWmoIds; - // Progressive M2 upload queue (spread heavy uploads across frames) - struct PendingM2Upload { - uint32_t modelId; - pipeline::M2Model model; - std::string path; - }; - std::queue m2UploadQueue_; - static constexpr int MAX_M2_UPLOADS_PER_FRAME = 5; // Upload up to 5 models per frame - - void processM2UploadQueue(); + // Tiles currently being incrementally finalized across frames + std::deque finalizingTiles_; struct GroundEffectEntry { std::array doodadIds{{0, 0, 0, 0}}; diff --git a/include/rendering/water_renderer.hpp b/include/rendering/water_renderer.hpp index af255ca5..cf04cbc5 100644 --- a/include/rendering/water_renderer.hpp +++ b/include/rendering/water_renderer.hpp @@ -160,7 +160,7 @@ private: VkDescriptorSetLayout sceneSetLayout = VK_NULL_HANDLE; VkDescriptorPool sceneDescPool = VK_NULL_HANDLE; VkDescriptorSet sceneSet = VK_NULL_HANDLE; - static constexpr uint32_t MAX_WATER_SETS = 2048; + static constexpr uint32_t MAX_WATER_SETS = 16384; VkSampler sceneColorSampler = VK_NULL_HANDLE; VkSampler sceneDepthSampler = VK_NULL_HANDLE; diff --git a/src/core/application.cpp b/src/core/application.cpp index e5ca6e54..fec2ca5f 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -55,6 +55,12 @@ #include #include +#include +#ifdef __linux__ +#include +#include +#endif + namespace wowee { namespace core { @@ -230,6 +236,26 @@ bool Application::initialize() { void Application::run() { LOG_INFO("Starting main loop"); + + // Pin main thread to a dedicated CPU core to reduce scheduling jitter +#ifdef __linux__ + { + int numCores = static_cast(std::thread::hardware_concurrency()); + if (numCores >= 2) { + // Use core 0 for the main thread (typically the highest-clocked core) + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + int rc = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + if (rc == 0) { + LOG_INFO("Main thread pinned to CPU core 0 (", numCores, " cores available)"); + } else { + LOG_WARNING("Failed to pin main thread to CPU core 0 (error ", rc, ")"); + } + } + } +#endif + const bool frameProfileEnabled = envFlagEnabled("WOWEE_FRAME_PROFILE", false); if (frameProfileEnabled) { LOG_INFO("Frame timing profile enabled (WOWEE_FRAME_PROFILE=1)"); diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp index da92981a..3dbd2bd6 100644 --- a/src/rendering/terrain_manager.cpp +++ b/src/rendering/terrain_manager.cpp @@ -22,6 +22,11 @@ #include #include +#ifdef __linux__ +#include +#include +#endif + namespace wowee { namespace rendering { @@ -226,7 +231,9 @@ bool TerrainManager::loadTile(int x, int y) { return false; } - finalizeTile(pending); + FinalizingTile ft; + ft.pending = std::move(pending); + while (!advanceFinalization(ft)) {} return true; } @@ -648,176 +655,157 @@ void TerrainManager::logMissingAdtOnce(const std::string& adtPath) { } } -void TerrainManager::finalizeTile(const std::shared_ptr& pending) { +bool TerrainManager::advanceFinalization(FinalizingTile& ft) { + auto& pending = ft.pending; int x = pending->coord.x; int y = pending->coord.y; TileCoord coord = pending->coord; - LOG_DEBUG("Finalizing tile [", x, ",", y, "] (GPU upload)"); + switch (ft.phase) { - // Check if tile was already loaded (race condition guard) or failed - if (loadedTiles.find(coord) != loadedTiles.end()) { - return; - } - if (failedTiles.find(coord) != failedTiles.end()) { - return; - } - - // Upload pre-loaded textures to the GL cache so loadTerrain avoids file I/O - if (!pending->preloadedTextures.empty()) { - terrainRenderer->uploadPreloadedTextures(pending->preloadedTextures); - } - - // Upload terrain to GPU - if (!terrainRenderer->loadTerrain(pending->mesh, pending->terrain.textures, x, y)) { - LOG_ERROR("Failed to upload terrain to GPU for tile [", x, ",", y, "]"); - failedTiles[coord] = true; - return; - } - - // Load water - if (waterRenderer) { - waterRenderer->loadFromTerrain(pending->terrain, true, x, y); - } - - // Register water surface ambient sound emitters - if (ambientSoundManager) { - // Scan ADT water data for water surfaces - int waterEmitterCount = 0; - for (size_t chunkIdx = 0; chunkIdx < pending->terrain.waterData.size(); chunkIdx++) { - const auto& chunkWater = pending->terrain.waterData[chunkIdx]; - if (!chunkWater.hasWater()) continue; - - // Calculate chunk position in world coordinates - int chunkX = chunkIdx % 16; - int chunkY = chunkIdx / 16; - - // WoW coordinates: Each ADT tile is 533.33 units, each chunk is 533.33/16 = 33.333 units - // Tile origin in GL space - float tileOriginX = (32.0f - x) * 533.33333f; - float tileOriginY = (32.0f - y) * 533.33333f; - - // Chunk center position - float chunkCenterX = tileOriginX + (chunkX + 0.5f) * 33.333333f; - float chunkCenterY = tileOriginY + (chunkY + 0.5f) * 33.333333f; - - // Use first layer for height and type detection - if (!chunkWater.layers.empty()) { - const auto& layer = chunkWater.layers[0]; - float waterHeight = layer.minHeight; - - // Determine water type and register appropriate emitter - // liquidType: 0=water/lake, 1=ocean, 2=magma, 3=slime - if (layer.liquidType == 0) { - // Lake/river water - add water surface emitter every 32 chunks to avoid spam - if (chunkIdx % 32 == 0) { - PendingTile::AmbientEmitter emitter; - emitter.position = glm::vec3(chunkCenterX, chunkCenterY, waterHeight); - emitter.type = 4; // WATER_SURFACE - pending->ambientEmitters.push_back(emitter); - waterEmitterCount++; - } - } else if (layer.liquidType == 1) { - // Ocean - add ocean emitter every 64 chunks (oceans are very large) - if (chunkIdx % 64 == 0) { - PendingTile::AmbientEmitter emitter; - emitter.position = glm::vec3(chunkCenterX, chunkCenterY, waterHeight); - emitter.type = 4; // WATER_SURFACE (could add separate OCEAN type later) - pending->ambientEmitters.push_back(emitter); - waterEmitterCount++; - } - } - // Skip magma and slime for now (no ambient sounds for those) + case FinalizationPhase::TERRAIN: { + // Check if tile was already loaded or failed + if (loadedTiles.find(coord) != loadedTiles.end() || failedTiles.find(coord) != failedTiles.end()) { + { + std::lock_guard lock(queueMutex); + pendingTiles.erase(coord); } + ft.phase = FinalizationPhase::DONE; + return true; } - if (waterEmitterCount > 0) { + + LOG_DEBUG("Finalizing tile [", x, ",", y, "] (incremental)"); + + // Upload pre-loaded textures + if (!pending->preloadedTextures.empty()) { + terrainRenderer->uploadPreloadedTextures(pending->preloadedTextures); } + + // Upload terrain mesh to GPU + if (!terrainRenderer->loadTerrain(pending->mesh, pending->terrain.textures, x, y)) { + LOG_ERROR("Failed to upload terrain to GPU for tile [", x, ",", y, "]"); + failedTiles[coord] = true; + { + std::lock_guard lock(queueMutex); + pendingTiles.erase(coord); + } + ft.phase = FinalizationPhase::DONE; + return true; + } + + // Load water immediately after terrain (same frame) — water is now + // deduplicated to ~1-2 merged surfaces per tile, so this is fast. + if (waterRenderer) { + waterRenderer->loadFromTerrain(pending->terrain, true, x, y); + } + + // Ensure M2 renderer has asset manager + if (m2Renderer && assetManager) { + m2Renderer->initialize(nullptr, VK_NULL_HANDLE, assetManager); + } + + ft.phase = FinalizationPhase::M2_MODELS; + return false; } - std::vector m2InstanceIds; - std::vector wmoInstanceIds; - std::vector tileUniqueIds; - std::vector tileWmoUniqueIds; - - // Upload M2 models to GPU and create instances - if (m2Renderer && assetManager) { - // Always pass the latest asset manager. initialize() is idempotent and updates - // the pointer even when the renderer was initialized earlier without assets. - m2Renderer->initialize(nullptr, VK_NULL_HANDLE, assetManager); - - // Upload M2 models immediately (batching was causing hangs) - // The 5ms time budget in processReadyTiles() limits the spike - std::unordered_set uploadedModelIds; - for (auto& m2Ready : pending->m2Models) { + case FinalizationPhase::M2_MODELS: { + // Upload ONE M2 model per call + if (m2Renderer && ft.m2ModelIndex < pending->m2Models.size()) { + auto& m2Ready = pending->m2Models[ft.m2ModelIndex]; if (m2Renderer->loadModel(m2Ready.model, m2Ready.modelId)) { - uploadedModelIds.insert(m2Ready.modelId); + ft.uploadedM2ModelIds.insert(m2Ready.modelId); + } + ft.m2ModelIndex++; + // Stay in this phase until all models uploaded + if (ft.m2ModelIndex < pending->m2Models.size()) { + return false; } } - if (!uploadedModelIds.empty()) { - LOG_DEBUG(" Uploaded ", uploadedModelIds.size(), " M2 models for tile [", x, ",", y, "]"); + if (!ft.uploadedM2ModelIds.empty()) { + LOG_DEBUG(" Uploaded ", ft.uploadedM2ModelIds.size(), " M2 models for tile [", x, ",", y, "]"); } - - // Create instances (deduplicate by uniqueId across tile boundaries) - int loadedDoodads = 0; - int skippedDedup = 0; - for (const auto& p : pending->m2Placements) { - // Skip if this doodad was already placed by a neighboring tile - if (p.uniqueId != 0 && placedDoodadIds.count(p.uniqueId)) { - skippedDedup++; - continue; - } - uint32_t instId = m2Renderer->createInstance(p.modelId, p.position, p.rotation, p.scale); - if (instId) { - m2InstanceIds.push_back(instId); - if (p.uniqueId != 0) { - placedDoodadIds.insert(p.uniqueId); - tileUniqueIds.push_back(p.uniqueId); - } - loadedDoodads++; - } - } - - LOG_DEBUG(" Loaded doodads for tile [", x, ",", y, "]: ", - loadedDoodads, " instances (", uploadedModelIds.size(), " new models, ", - skippedDedup, " dedup skipped)"); + ft.phase = FinalizationPhase::M2_INSTANCES; + return false; } - // Upload WMO models to GPU and create instances - if (wmoRenderer && assetManager) { - // WMORenderer may be initialized before assets are ready; always re-pass assets. - wmoRenderer->initialize(nullptr, VK_NULL_HANDLE, assetManager); - - int loadedWMOs = 0; - int loadedLiquids = 0; - int skippedWmoDedup = 0; - for (auto& wmoReady : pending->wmoModels) { - // Deduplicate by placement uniqueId when available. - // Some ADTs use uniqueId=0, which is not safe for dedup. - if (wmoReady.uniqueId != 0 && placedWmoIds.count(wmoReady.uniqueId)) { - skippedWmoDedup++; - continue; + case FinalizationPhase::M2_INSTANCES: { + // Create all M2 instances (lightweight struct allocation, no GPU work) + if (m2Renderer) { + int loadedDoodads = 0; + int skippedDedup = 0; + for (const auto& p : pending->m2Placements) { + if (p.uniqueId != 0 && placedDoodadIds.count(p.uniqueId)) { + skippedDedup++; + continue; + } + uint32_t instId = m2Renderer->createInstance(p.modelId, p.position, p.rotation, p.scale); + if (instId) { + ft.m2InstanceIds.push_back(instId); + if (p.uniqueId != 0) { + placedDoodadIds.insert(p.uniqueId); + ft.tileUniqueIds.push_back(p.uniqueId); + } + loadedDoodads++; + } } + LOG_DEBUG(" Loaded doodads for tile [", x, ",", y, "]: ", + loadedDoodads, " instances (", ft.uploadedM2ModelIds.size(), " new models, ", + skippedDedup, " dedup skipped)"); + } + ft.phase = FinalizationPhase::WMO_MODELS; + return false; + } + + case FinalizationPhase::WMO_MODELS: { + // Upload ONE WMO model per call + if (wmoRenderer && assetManager) { + wmoRenderer->initialize(nullptr, VK_NULL_HANDLE, assetManager); + + if (ft.wmoModelIndex < pending->wmoModels.size()) { + auto& wmoReady = pending->wmoModels[ft.wmoModelIndex]; + // Deduplicate + if (wmoReady.uniqueId != 0 && placedWmoIds.count(wmoReady.uniqueId)) { + ft.wmoModelIndex++; + if (ft.wmoModelIndex < pending->wmoModels.size()) return false; + } else { + wmoRenderer->loadModel(wmoReady.model, wmoReady.modelId); + ft.wmoModelIndex++; + if (ft.wmoModelIndex < pending->wmoModels.size()) return false; + } + } + } + ft.phase = FinalizationPhase::WMO_INSTANCES; + return false; + } + + case FinalizationPhase::WMO_INSTANCES: { + // Create all WMO instances + load WMO liquids + if (wmoRenderer) { + int loadedWMOs = 0; + int loadedLiquids = 0; + int skippedWmoDedup = 0; + for (auto& wmoReady : pending->wmoModels) { + if (wmoReady.uniqueId != 0 && placedWmoIds.count(wmoReady.uniqueId)) { + skippedWmoDedup++; + continue; + } - if (wmoRenderer->loadModel(wmoReady.model, wmoReady.modelId)) { uint32_t wmoInstId = wmoRenderer->createInstance(wmoReady.modelId, wmoReady.position, wmoReady.rotation); if (wmoInstId) { - wmoInstanceIds.push_back(wmoInstId); + ft.wmoInstanceIds.push_back(wmoInstId); if (wmoReady.uniqueId != 0) { placedWmoIds.insert(wmoReady.uniqueId); - tileWmoUniqueIds.push_back(wmoReady.uniqueId); + ft.tileWmoUniqueIds.push_back(wmoReady.uniqueId); } loadedWMOs++; // Load WMO liquids (canals, pools, etc.) if (waterRenderer) { - // Compute the same model matrix as WMORenderer uses glm::mat4 modelMatrix = glm::mat4(1.0f); modelMatrix = glm::translate(modelMatrix, wmoReady.position); modelMatrix = glm::rotate(modelMatrix, wmoReady.rotation.z, glm::vec3(0.0f, 0.0f, 1.0f)); modelMatrix = glm::rotate(modelMatrix, wmoReady.rotation.y, glm::vec3(0.0f, 1.0f, 0.0f)); modelMatrix = glm::rotate(modelMatrix, wmoReady.rotation.x, glm::vec3(1.0f, 0.0f, 0.0f)); - - // Load liquids from each WMO group for (const auto& group : wmoReady.model.groups) { if (group.liquid.hasLiquid()) { waterRenderer->loadFromWMO(group.liquid, modelMatrix, wmoInstId); @@ -827,60 +815,126 @@ void TerrainManager::finalizeTile(const std::shared_ptr& pending) { } } } + if (loadedWMOs > 0 || skippedWmoDedup > 0) { + LOG_DEBUG(" Loaded WMOs for tile [", x, ",", y, "]: ", + loadedWMOs, " instances, ", skippedWmoDedup, " dedup skipped"); + } + if (loadedLiquids > 0) { + LOG_DEBUG(" Loaded WMO liquids for tile [", x, ",", y, "]: ", loadedLiquids); + } } - if (loadedWMOs > 0 || skippedWmoDedup > 0) { - LOG_DEBUG(" Loaded WMOs for tile [", x, ",", y, "]: ", - loadedWMOs, " instances, ", skippedWmoDedup, " dedup skipped"); - } - if (loadedLiquids > 0) { - LOG_DEBUG(" Loaded WMO liquids for tile [", x, ",", y, "]: ", loadedLiquids); - } + ft.phase = FinalizationPhase::WMO_DOODADS; + return false; + } - // Upload WMO doodad M2 models - if (m2Renderer) { - for (auto& doodad : pending->wmoDoodads) { - m2Renderer->loadModel(doodad.model, doodad.modelId); - uint32_t wmoDoodadInstId = m2Renderer->createInstanceWithMatrix( - doodad.modelId, doodad.modelMatrix, doodad.worldPosition); - if (wmoDoodadInstId) m2InstanceIds.push_back(wmoDoodadInstId); + case FinalizationPhase::WMO_DOODADS: { + // Upload ONE WMO doodad M2 per call + if (m2Renderer && ft.wmoDoodadIndex < pending->wmoDoodads.size()) { + auto& doodad = pending->wmoDoodads[ft.wmoDoodadIndex]; + m2Renderer->loadModel(doodad.model, doodad.modelId); + uint32_t wmoDoodadInstId = m2Renderer->createInstanceWithMatrix( + doodad.modelId, doodad.modelMatrix, doodad.worldPosition); + if (wmoDoodadInstId) ft.m2InstanceIds.push_back(wmoDoodadInstId); + ft.wmoDoodadIndex++; + if (ft.wmoDoodadIndex < pending->wmoDoodads.size()) return false; + } + ft.phase = FinalizationPhase::WATER; + return false; + } + + case FinalizationPhase::WATER: { + // Terrain water was already loaded in TERRAIN phase. + // Generate water ambient emitters here. + if (ambientSoundManager) { + for (size_t chunkIdx = 0; chunkIdx < pending->terrain.waterData.size(); chunkIdx++) { + const auto& chunkWater = pending->terrain.waterData[chunkIdx]; + if (!chunkWater.hasWater()) continue; + + int chunkX = chunkIdx % 16; + int chunkY = chunkIdx / 16; + float tileOriginX = (32.0f - x) * 533.33333f; + float tileOriginY = (32.0f - y) * 533.33333f; + float chunkCenterX = tileOriginX + (chunkX + 0.5f) * 33.333333f; + float chunkCenterY = tileOriginY + (chunkY + 0.5f) * 33.333333f; + + if (!chunkWater.layers.empty()) { + const auto& layer = chunkWater.layers[0]; + float waterHeight = layer.minHeight; + if (layer.liquidType == 0 && chunkIdx % 32 == 0) { + PendingTile::AmbientEmitter emitter; + emitter.position = glm::vec3(chunkCenterX, chunkCenterY, waterHeight); + emitter.type = 4; + pending->ambientEmitters.push_back(emitter); + } else if (layer.liquidType == 1 && chunkIdx % 64 == 0) { + PendingTile::AmbientEmitter emitter; + emitter.position = glm::vec3(chunkCenterX, chunkCenterY, waterHeight); + emitter.type = 4; + pending->ambientEmitters.push_back(emitter); + } + } } } - if (loadedWMOs > 0) { - LOG_DEBUG(" Loaded WMOs for tile [", x, ",", y, "]: ", loadedWMOs); - } + ft.phase = FinalizationPhase::AMBIENT; + return false; } - // Register ambient sound emitters with ambient sound manager - if (ambientSoundManager && !pending->ambientEmitters.empty()) { - for (const auto& emitter : pending->ambientEmitters) { - // Cast uint32_t type to AmbientSoundManager::AmbientType enum - auto type = static_cast(emitter.type); - ambientSoundManager->addEmitter(emitter.position, type); + case FinalizationPhase::AMBIENT: { + // Register ambient sound emitters + if (ambientSoundManager && !pending->ambientEmitters.empty()) { + for (const auto& emitter : pending->ambientEmitters) { + auto type = static_cast(emitter.type); + ambientSoundManager->addEmitter(emitter.position, type); + } } + + // Commit tile to loadedTiles + auto tile = std::make_unique(); + tile->coord = coord; + tile->terrain = std::move(pending->terrain); + tile->mesh = std::move(pending->mesh); + tile->loaded = true; + tile->m2InstanceIds = std::move(ft.m2InstanceIds); + tile->wmoInstanceIds = std::move(ft.wmoInstanceIds); + tile->wmoUniqueIds = std::move(ft.tileWmoUniqueIds); + tile->doodadUniqueIds = std::move(ft.tileUniqueIds); + getTileBounds(coord, tile->minX, tile->minY, tile->maxX, tile->maxY); + loadedTiles[coord] = std::move(tile); + putCachedTile(pending); + + // Now safe to remove from pendingTiles (tile is in loadedTiles) + { + std::lock_guard lock(queueMutex); + pendingTiles.erase(coord); + } + + LOG_DEBUG(" Finalized tile [", x, ",", y, "]"); + + ft.phase = FinalizationPhase::DONE; + return true; } - // Create tile entry - auto tile = std::make_unique(); - tile->coord = coord; - tile->terrain = std::move(pending->terrain); - tile->mesh = std::move(pending->mesh); - tile->loaded = true; - tile->m2InstanceIds = std::move(m2InstanceIds); - tile->wmoInstanceIds = std::move(wmoInstanceIds); - tile->wmoUniqueIds = std::move(tileWmoUniqueIds); - tile->doodadUniqueIds = std::move(tileUniqueIds); - - // Calculate world bounds - getTileBounds(coord, tile->minX, tile->minY, tile->maxX, tile->maxY); - - loadedTiles[coord] = std::move(tile); - putCachedTile(pending); - - LOG_DEBUG(" Finalized tile [", x, ",", y, "]"); + case FinalizationPhase::DONE: + return true; + } + return true; } void TerrainManager::workerLoop() { + // Keep worker threads off core 0 (reserved for main thread) +#ifdef __linux__ + { + int numCores = static_cast(std::thread::hardware_concurrency()); + if (numCores >= 2) { + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + for (int i = 1; i < numCores; i++) { + CPU_SET(i, &cpuset); + } + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + } + } +#endif LOG_INFO("Terrain worker thread started"); while (workerRunning.load()) { @@ -927,80 +981,60 @@ void TerrainManager::processReadyTiles() { // Taxi mode gets a slightly larger budget to avoid visible late-pop terrain/models. const float timeBudgetMs = taxiStreamingMode_ ? 8.0f : 5.0f; auto startTime = std::chrono::high_resolution_clock::now(); - int processed = 0; - while (true) { - std::shared_ptr pending; - - { - std::lock_guard lock(queueMutex); - if (readyQueue.empty()) { - break; - } - pending = readyQueue.front(); + // Move newly ready tiles into the finalizing deque. + // Keep them in pendingTiles so streamTiles() won't re-enqueue them. + { + std::lock_guard lock(queueMutex); + while (!readyQueue.empty()) { + auto pending = readyQueue.front(); readyQueue.pop(); - } - - if (pending) { - TileCoord coord = pending->coord; - - finalizeTile(pending); - - auto now = std::chrono::high_resolution_clock::now(); - - { - std::lock_guard lock(queueMutex); - pendingTiles.erase(coord); - } - processed++; - - // Check if we've exceeded time budget - float elapsedMs = std::chrono::duration(now - startTime).count(); - if (elapsedMs >= timeBudgetMs) { - if (processed > 1) { - LOG_DEBUG("Processed ", processed, " tiles in ", elapsedMs, "ms (budget: ", timeBudgetMs, "ms)"); - } - break; + if (pending) { + FinalizingTile ft; + ft.pending = std::move(pending); + finalizingTiles_.push_back(std::move(ft)); } } } -} -void TerrainManager::processM2UploadQueue() { - // Upload up to MAX_M2_UPLOADS_PER_FRAME models per frame - int uploaded = 0; - while (!m2UploadQueue_.empty() && uploaded < MAX_M2_UPLOADS_PER_FRAME) { - auto& upload = m2UploadQueue_.front(); - if (m2Renderer) { - m2Renderer->loadModel(upload.model, upload.modelId); + // Drive incremental finalization within time budget + while (!finalizingTiles_.empty()) { + auto& ft = finalizingTiles_.front(); + bool done = advanceFinalization(ft); + + if (done) { + finalizingTiles_.pop_front(); } - m2UploadQueue_.pop(); - uploaded++; - } - if (uploaded > 0) { - LOG_DEBUG("Uploaded ", uploaded, " M2 models (", m2UploadQueue_.size(), " remaining in queue)"); + auto now = std::chrono::high_resolution_clock::now(); + float elapsedMs = std::chrono::duration(now - startTime).count(); + if (elapsedMs >= timeBudgetMs) { + break; + } } } void TerrainManager::processAllReadyTiles() { - while (true) { - std::shared_ptr pending; - { - std::lock_guard lock(queueMutex); - if (readyQueue.empty()) break; - pending = readyQueue.front(); + // Move all ready tiles into finalizing deque + // Keep in pendingTiles until committed (same as processReadyTiles) + { + std::lock_guard lock(queueMutex); + while (!readyQueue.empty()) { + auto pending = readyQueue.front(); readyQueue.pop(); - } - if (pending) { - TileCoord coord = pending->coord; - finalizeTile(pending); - { - std::lock_guard lock(queueMutex); - pendingTiles.erase(coord); + if (pending) { + FinalizingTile ft; + ft.pending = std::move(pending); + finalizingTiles_.push_back(std::move(ft)); } } } + // Finalize all tiles completely (no time budget — used for loading screens) + while (!finalizingTiles_.empty()) { + auto& ft = finalizingTiles_.front(); + while (!advanceFinalization(ft)) {} + finalizingTiles_.pop_front(); + } } std::shared_ptr TerrainManager::getCachedTile(const TileCoord& coord) { @@ -1099,6 +1133,31 @@ void TerrainManager::unloadTile(int x, int y) { pendingTiles.erase(coord); } + // Remove from finalizingTiles_ if it's being incrementally finalized. + // Water may have already been loaded in TERRAIN phase, so clean it up. + for (auto fit = finalizingTiles_.begin(); fit != finalizingTiles_.end(); ++fit) { + if (fit->pending && fit->pending->coord == coord) { + // If past TERRAIN phase, water was already loaded — remove it + if (fit->phase != FinalizationPhase::TERRAIN && waterRenderer) { + waterRenderer->removeTile(x, y); + } + // Clean up any M2/WMO instances that were already created + if (m2Renderer && !fit->m2InstanceIds.empty()) { + m2Renderer->removeInstances(fit->m2InstanceIds); + } + if (wmoRenderer && !fit->wmoInstanceIds.empty()) { + for (uint32_t id : fit->wmoInstanceIds) { + if (waterRenderer) waterRenderer->removeWMO(id); + } + wmoRenderer->removeInstances(fit->wmoInstanceIds); + } + for (uint32_t uid : fit->tileUniqueIds) placedDoodadIds.erase(uid); + for (uint32_t uid : fit->tileWmoUniqueIds) placedWmoIds.erase(uid); + finalizingTiles_.erase(fit); + return; + } + } + auto it = loadedTiles.find(coord); if (it == loadedTiles.end()) { return; @@ -1167,6 +1226,7 @@ void TerrainManager::unloadAll() { while (!readyQueue.empty()) readyQueue.pop(); } pendingTiles.clear(); + finalizingTiles_.clear(); placedDoodadIds.clear(); LOG_INFO("Unloading all terrain tiles"); diff --git a/src/rendering/water_renderer.cpp b/src/rendering/water_renderer.cpp index 89c1e509..8ecb1d7a 100644 --- a/src/rendering/water_renderer.cpp +++ b/src/rendering/water_renderer.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace wowee { namespace rendering { @@ -555,7 +556,27 @@ void WaterRenderer::loadFromTerrain(const pipeline::ADTTerrain& terrain, bool ap clear(); } - int totalLayers = 0; + // ── Pass 1: collect layers into merge groups keyed by {liquidType, roundedHeight} ── + struct ChunkLayerInfo { + int chunkX, chunkY; + const pipeline::ADTTerrain::WaterLayer* layer; + }; + + struct MergeKey { + uint16_t liquidType; + int32_t roundedHeight; // minHeight * 2, rounded to int + bool operator==(const MergeKey& o) const { + return liquidType == o.liquidType && roundedHeight == o.roundedHeight; + } + }; + + struct MergeKeyHash { + size_t operator()(const MergeKey& k) const { + return std::hash()((uint64_t(k.liquidType) << 32) | uint32_t(k.roundedHeight)); + } + }; + + std::unordered_map, MergeKeyHash> mergeGroups; for (int chunkIdx = 0; chunkIdx < 256; chunkIdx++) { const auto& chunkWater = terrain.waterData[chunkIdx]; @@ -563,34 +584,146 @@ void WaterRenderer::loadFromTerrain(const pipeline::ADTTerrain& terrain, bool ap int chunkX = chunkIdx % 16; int chunkY = chunkIdx / 16; - const auto& terrainChunk = terrain.getChunk(chunkX, chunkY); for (const auto& layer : chunkWater.layers) { - WaterSurface surface; + MergeKey key; + key.liquidType = layer.liquidType; + key.roundedHeight = static_cast(std::round(layer.minHeight * 2.0f)); + mergeGroups[key].push_back({chunkX, chunkY, &layer}); + } + } - surface.position = glm::vec3( - terrainChunk.position[0], - terrainChunk.position[1], - layer.minHeight - ); - surface.origin = glm::vec3( - surface.position.x - (static_cast(layer.y) * TILE_SIZE), - surface.position.y - (static_cast(layer.x) * TILE_SIZE), - layer.minHeight - ); - surface.stepX = glm::vec3(0.0f, -TILE_SIZE, 0.0f); - surface.stepY = glm::vec3(-TILE_SIZE, 0.0f, 0.0f); + // Tile origin = NW corner = chunk(0,0) position + const auto& chunk00 = terrain.getChunk(0, 0); - surface.minHeight = layer.minHeight; - surface.maxHeight = layer.maxHeight; - surface.liquidType = layer.liquidType; + // Stormwind water lowering check + bool isStormwindArea = (tileX >= 28 && tileX <= 50 && tileY >= 28 && tileY <= 52); + float tileWorldX = 0, tileWorldY = 0; + glm::vec2 moonwellPos2D(0.0f); + if (isStormwindArea) { + tileWorldX = (32.0f - tileX) * 533.33333f; + tileWorldY = (32.0f - tileY) * 533.33333f; + moonwellPos2D = glm::vec2(-8755.9f, 1108.9f); + } - surface.xOffset = layer.x; - surface.yOffset = layer.y; - surface.width = layer.width; - surface.height = layer.height; + int totalSurfaces = 0; - size_t numVertices = (layer.width + 1) * (layer.height + 1); + // Merge threshold: groups with more than this many chunks get merged into + // one tile-wide surface. Small groups (shore, lakes) stay per-chunk so + // their original mask / height data is preserved exactly. + constexpr size_t MERGE_THRESHOLD = 4; + + // ── Pass 2: create surfaces ── + for (auto& [key, chunkLayers] : mergeGroups) { + + // ── Small group → per-chunk surfaces (original code path) ── + if (chunkLayers.size() <= MERGE_THRESHOLD) { + for (const auto& info : chunkLayers) { + const auto& layer = *info.layer; + const auto& terrainChunk = terrain.getChunk(info.chunkX, info.chunkY); + + WaterSurface surface; + surface.position = glm::vec3( + terrainChunk.position[0], + terrainChunk.position[1], + layer.minHeight + ); + surface.origin = glm::vec3( + surface.position.x - (static_cast(layer.y) * TILE_SIZE), + surface.position.y - (static_cast(layer.x) * TILE_SIZE), + layer.minHeight + ); + surface.stepX = glm::vec3(0.0f, -TILE_SIZE, 0.0f); + surface.stepY = glm::vec3(-TILE_SIZE, 0.0f, 0.0f); + + surface.minHeight = layer.minHeight; + surface.maxHeight = layer.maxHeight; + surface.liquidType = layer.liquidType; + surface.xOffset = layer.x; + surface.yOffset = layer.y; + surface.width = layer.width; + surface.height = layer.height; + + size_t numVertices = (layer.width + 1) * (layer.height + 1); + bool useFlat = true; + if (layer.heights.size() == numVertices) { + bool sane = true; + for (float h : layer.heights) { + if (!std::isfinite(h) || std::abs(h) > 50000.0f) { sane = false; break; } + if (h < layer.minHeight - 8.0f || h > layer.maxHeight + 8.0f) { sane = false; break; } + } + if (sane) { useFlat = false; surface.heights = layer.heights; } + } + if (useFlat) surface.heights.resize(numVertices, layer.minHeight); + + if (isStormwindArea && layer.minHeight > 94.0f) { + float distToMoonwell = glm::distance(glm::vec2(tileWorldX, tileWorldY), moonwellPos2D); + if (distToMoonwell > 300.0f) { + for (float& h : surface.heights) h -= 1.0f; + surface.minHeight -= 1.0f; + surface.maxHeight -= 1.0f; + } + } + + surface.mask = layer.mask; + surface.tileX = tileX; + surface.tileY = tileY; + + createWaterMesh(surface); + if (surface.indexCount > 0 && vkCtx) { + updateMaterialUBO(surface); + } + surfaces.push_back(std::move(surface)); + totalSurfaces++; + } + continue; + } + + // ── Large group → merged tile-wide surface ── + WaterSurface surface; + + float groupHeight = key.roundedHeight / 2.0f; + + surface.width = 128; + surface.height = 128; + surface.xOffset = 0; + surface.yOffset = 0; + surface.liquidType = key.liquidType; + surface.tileX = tileX; + surface.tileY = tileY; + + // Origin = chunk(0,0) position (NW corner of tile) + surface.origin = glm::vec3(chunk00.position[0], chunk00.position[1], groupHeight); + surface.position = surface.origin; + surface.stepX = glm::vec3(0.0f, -TILE_SIZE, 0.0f); + surface.stepY = glm::vec3(-TILE_SIZE, 0.0f, 0.0f); + + surface.minHeight = groupHeight; + surface.maxHeight = groupHeight; + + // Initialize height grid (129×129) with group height + constexpr int MERGED_W = 128; + const int gridW = MERGED_W + 1; // 129 + const int gridH = MERGED_W + 1; + surface.heights.resize(gridW * gridH, groupHeight); + + // Initialize mask (128×128 sub-tiles, all masked OUT) + // Mask uses LSB bit order: tileIndex = row * 128 + col + const int maskBytes = (MERGED_W * MERGED_W + 7) / 8; + surface.mask.resize(maskBytes, 0); + + // ── Fill from each contributing chunk ── + for (const auto& info : chunkLayers) { + const auto& layer = *info.layer; + + // Merged grid offset for this chunk + // gx = chunkY*8 + layer.x + localX, gy = chunkX*8 + layer.y + localY + int baseGx = info.chunkY * 8; + int baseGy = info.chunkX * 8; + + // Copy heights + int layerGridW = layer.width + 1; + size_t numVertices = static_cast(layerGridW) * (layer.height + 1); bool useFlat = true; if (layer.heights.size() == numVertices) { bool sane = true; @@ -598,39 +731,79 @@ void WaterRenderer::loadFromTerrain(const pipeline::ADTTerrain& terrain, bool ap if (!std::isfinite(h) || std::abs(h) > 50000.0f) { sane = false; break; } if (h < layer.minHeight - 8.0f || h > layer.maxHeight + 8.0f) { sane = false; break; } } - if (sane) { useFlat = false; surface.heights = layer.heights; } + if (sane) useFlat = false; } - if (useFlat) surface.heights.resize(numVertices, layer.minHeight); - // Stormwind water lowering - bool isStormwindArea = (tileX >= 28 && tileX <= 50 && tileY >= 28 && tileY <= 52); - if (isStormwindArea && layer.minHeight > 94.0f) { - float tileWorldX = (32.0f - tileX) * 533.33333f; - float tileWorldY = (32.0f - tileY) * 533.33333f; - glm::vec3 moonwellPos(-8755.9f, 1108.9f, 96.1f); - float distToMoonwell = glm::distance(glm::vec2(tileWorldX, tileWorldY), - glm::vec2(moonwellPos.x, moonwellPos.y)); - if (distToMoonwell > 300.0f) { - for (float& h : surface.heights) h -= 1.0f; - surface.minHeight -= 1.0f; - surface.maxHeight -= 1.0f; + for (int ly = 0; ly <= layer.height; ly++) { + for (int lx = 0; lx <= layer.width; lx++) { + int mgx = baseGx + layer.x + lx; + int mgy = baseGy + layer.y + ly; + if (mgx >= gridW || mgy >= gridH) continue; + + float h; + if (!useFlat) { + int layerIdx = ly * layerGridW + lx; + h = layer.heights[layerIdx]; + } else { + h = layer.minHeight; + } + + surface.heights[mgy * gridW + mgx] = h; + if (h < surface.minHeight) surface.minHeight = h; + if (h > surface.maxHeight) surface.maxHeight = h; } } - surface.mask = layer.mask; - surface.tileX = tileX; - surface.tileY = tileY; + // Copy mask — mark contributing sub-tiles as renderable + for (int ly = 0; ly < layer.height; ly++) { + for (int lx = 0; lx < layer.width; lx++) { + bool render = true; + if (!layer.mask.empty()) { + int cx = layer.x + lx; + int cy = layer.y + ly; + int origTileIdx = cy * 8 + cx; + int origByte = origTileIdx / 8; + int origBit = origTileIdx % 8; + if (origByte < static_cast(layer.mask.size())) { + uint8_t mb = layer.mask[origByte]; + render = (mb & (1 << origBit)) || (mb & (1 << (7 - origBit))); + } + } - createWaterMesh(surface); - if (surface.indexCount > 0 && vkCtx) { - updateMaterialUBO(surface); + if (render) { + int mx = baseGx + layer.x + lx; + int my = baseGy + layer.y + ly; + if (mx >= MERGED_W || my >= MERGED_W) continue; + + int mergedTileIdx = my * MERGED_W + mx; + int byteIdx = mergedTileIdx / 8; + int bitIdx = mergedTileIdx % 8; + surface.mask[byteIdx] |= static_cast(1 << bitIdx); + } + } } - surfaces.push_back(std::move(surface)); - totalLayers++; } + + // Stormwind water lowering + if (isStormwindArea && surface.minHeight > 94.0f) { + float distToMoonwell = glm::distance(glm::vec2(tileWorldX, tileWorldY), moonwellPos2D); + if (distToMoonwell > 300.0f) { + for (float& h : surface.heights) h -= 1.0f; + surface.minHeight -= 1.0f; + surface.maxHeight -= 1.0f; + } + } + + createWaterMesh(surface); + if (surface.indexCount > 0 && vkCtx) { + updateMaterialUBO(surface); + } + surfaces.push_back(std::move(surface)); + totalSurfaces++; } - LOG_DEBUG("Loaded ", totalLayers, " water layers from MH2O data"); + LOG_DEBUG("Water: Loaded ", totalSurfaces, " surfaces from tile [", tileX, ",", tileY, + "] (", mergeGroups.size(), " groups), total surfaces: ", surfaces.size()); } void WaterRenderer::removeTile(int tileX, int tileY) { @@ -646,7 +819,7 @@ void WaterRenderer::removeTile(int tileX, int tileY) { } } if (removed > 0) { - LOG_DEBUG("Removed ", removed, " water surfaces for tile [", tileX, ",", tileY, "]"); + LOG_DEBUG("Water: Removed ", removed, " surfaces for tile [", tileX, ",", tileY, "], remaining: ", surfaces.size()); } } @@ -948,7 +1121,8 @@ void WaterRenderer::createWaterMesh(WaterSurface& surface) { bool renderTile = true; if (!surface.mask.empty()) { int tileIndex; - if (surface.wmoId == 0 && surface.mask.size() >= 8) { + bool isMergedTerrain = (surface.wmoId == 0 && surface.width > 8); + if (surface.wmoId == 0 && surface.width <= 8 && surface.mask.size() >= 8) { int cx = static_cast(surface.xOffset) + x; int cy = static_cast(surface.yOffset) + y; tileIndex = cy * 8 + cx; @@ -959,9 +1133,14 @@ void WaterRenderer::createWaterMesh(WaterSurface& surface) { int bitIndex = tileIndex % 8; if (byteIndex < static_cast(surface.mask.size())) { uint8_t maskByte = surface.mask[byteIndex]; - bool lsbOrder = (maskByte & (1 << bitIndex)) != 0; - bool msbOrder = (maskByte & (1 << (7 - bitIndex))) != 0; - renderTile = lsbOrder || msbOrder; + if (isMergedTerrain) { + // Merged surfaces use LSB-only bit order + renderTile = (maskByte & (1 << bitIndex)) != 0; + } else { + bool lsbOrder = (maskByte & (1 << bitIndex)) != 0; + bool msbOrder = (maskByte & (1 << (7 - bitIndex))) != 0; + renderTile = lsbOrder || msbOrder; + } if (!renderTile) { for (int dy = -1; dy <= 1; dy++) { @@ -970,7 +1149,7 @@ void WaterRenderer::createWaterMesh(WaterSurface& surface) { int nx = x + dx, ny = y + dy; if (nx < 0 || ny < 0 || nx >= gridWidth-1 || ny >= gridHeight-1) continue; int neighborIdx; - if (surface.wmoId == 0 && surface.mask.size() >= 8) { + if (surface.wmoId == 0 && surface.width <= 8 && surface.mask.size() >= 8) { neighborIdx = (static_cast(surface.yOffset) + ny) * 8 + (static_cast(surface.xOffset) + nx); } else { @@ -980,9 +1159,16 @@ void WaterRenderer::createWaterMesh(WaterSurface& surface) { int nBitIdx = neighborIdx % 8; if (nByteIdx < static_cast(surface.mask.size())) { uint8_t nMask = surface.mask[nByteIdx]; - if ((nMask & (1 << nBitIdx)) || (nMask & (1 << (7 - nBitIdx)))) { - renderTile = true; - goto found_neighbor; + if (isMergedTerrain) { + if (nMask & (1 << nBitIdx)) { + renderTile = true; + goto found_neighbor; + } + } else { + if ((nMask & (1 << nBitIdx)) || (nMask & (1 << (7 - nBitIdx)))) { + renderTile = true; + goto found_neighbor; + } } } } @@ -1100,7 +1286,7 @@ std::optional WaterRenderer::getWaterHeightAt(float glX, float glY) const if (!surface.mask.empty()) { int tileIndex; - if (surface.wmoId == 0 && surface.mask.size() >= 8) { + if (surface.wmoId == 0 && surface.width <= 8 && surface.mask.size() >= 8) { tileIndex = (static_cast(surface.yOffset) + iy) * 8 + (static_cast(surface.xOffset) + ix); } else { @@ -1110,7 +1296,12 @@ std::optional WaterRenderer::getWaterHeightAt(float glX, float glY) const int bitIndex = tileIndex % 8; if (byteIndex < static_cast(surface.mask.size())) { uint8_t maskByte = surface.mask[byteIndex]; - bool renderTile = (maskByte & (1 << bitIndex)) || (maskByte & (1 << (7 - bitIndex))); + bool renderTile; + if (surface.wmoId == 0 && surface.width > 8) { + renderTile = (maskByte & (1 << bitIndex)) != 0; + } else { + renderTile = (maskByte & (1 << bitIndex)) || (maskByte & (1 << (7 - bitIndex))); + } if (!renderTile) continue; } } @@ -1162,7 +1353,7 @@ std::optional WaterRenderer::getNearestWaterHeightAt(float glX, float glY if (!surface.mask.empty()) { int tileIndex; - if (surface.wmoId == 0 && surface.mask.size() >= 8) { + if (surface.wmoId == 0 && surface.width <= 8 && surface.mask.size() >= 8) { tileIndex = (static_cast(surface.yOffset) + iy) * 8 + (static_cast(surface.xOffset) + ix); } else { @@ -1172,7 +1363,12 @@ std::optional WaterRenderer::getNearestWaterHeightAt(float glX, float glY int bitIndex = tileIndex % 8; if (byteIndex < static_cast(surface.mask.size())) { uint8_t maskByte = surface.mask[byteIndex]; - bool renderTile = (maskByte & (1 << bitIndex)) || (maskByte & (1 << (7 - bitIndex))); + bool renderTile; + if (surface.wmoId == 0 && surface.width > 8) { + renderTile = (maskByte & (1 << bitIndex)) != 0; + } else { + renderTile = (maskByte & (1 << bitIndex)) || (maskByte & (1 << (7 - bitIndex))); + } if (!renderTile) continue; } } @@ -1228,7 +1424,7 @@ std::optional WaterRenderer::getWaterTypeAt(float glX, float glY) cons if (!surface.mask.empty()) { int tileIndex; - if (surface.wmoId == 0 && surface.mask.size() >= 8) { + if (surface.wmoId == 0 && surface.width <= 8 && surface.mask.size() >= 8) { tileIndex = (static_cast(surface.yOffset) + iy) * 8 + (static_cast(surface.xOffset) + ix); } else { @@ -1238,7 +1434,12 @@ std::optional WaterRenderer::getWaterTypeAt(float glX, float glY) cons int bitIndex = tileIndex % 8; if (byteIndex < static_cast(surface.mask.size())) { uint8_t maskByte = surface.mask[byteIndex]; - bool renderTile = (maskByte & (1 << bitIndex)) || (maskByte & (1 << (7 - bitIndex))); + bool renderTile; + if (surface.wmoId == 0 && surface.width > 8) { + renderTile = (maskByte & (1 << bitIndex)) != 0; + } else { + renderTile = (maskByte & (1 << bitIndex)) || (maskByte & (1 << (7 - bitIndex))); + } if (!renderTile) continue; } }