diff --git a/include/core/application.hpp b/include/core/application.hpp index 92e96e8e..84b89f32 100644 --- a/include/core/application.hpp +++ b/include/core/application.hpp @@ -3,13 +3,19 @@ #include "core/window.hpp" #include "core/input.hpp" #include "game/character.hpp" +#include "pipeline/blp_loader.hpp" #include #include #include +#include #include #include #include #include +#include +#include +#include +#include namespace wowee { @@ -18,7 +24,7 @@ namespace rendering { class Renderer; } namespace ui { class UIManager; } namespace auth { class AuthHandler; } namespace game { class GameHandler; class World; class ExpansionRegistry; } -namespace pipeline { class AssetManager; class DBCLayout; } +namespace pipeline { class AssetManager; class DBCLayout; struct M2Model; struct WMOModel; } namespace audio { enum class VoiceType; } namespace core { @@ -90,6 +96,7 @@ private: static const char* mapIdToName(uint32_t mapId); void loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float z); void buildFactionHostilityMap(uint8_t playerRace); + pipeline::M2Model loadCreatureM2Sync(const std::string& m2Path); void spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x, float y, float z, float orientation); void despawnOnlineCreature(uint64_t guid); bool tryAttachCreatureVirtualWeapons(uint64_t guid, uint32_t instanceId); @@ -181,8 +188,39 @@ private: std::unordered_map creatureRenderPosCache_; // guid -> last synced render position std::unordered_set creatureWeaponsAttached_; // guid set when NPC virtual weapons attached std::unordered_map creatureWeaponAttachAttempts_; // guid -> attach attempts + std::unordered_map modelIdIsWolfLike_; // modelId → cached wolf/worg check + static constexpr int MAX_WEAPON_ATTACHES_PER_TICK = 2; // limit weapon attach work per 1s tick + + // CharSections.dbc lookup cache to avoid O(N) DBC scan per NPC spawn. + // Key: (race<<24)|(sex<<16)|(section<<12)|(variation<<8)|color → texture path + std::unordered_map charSectionsCache_; + bool charSectionsCacheBuilt_ = false; + void buildCharSectionsCache(); + std::string lookupCharSection(uint8_t race, uint8_t sex, uint8_t section, + uint8_t variation, uint8_t color, int texIndex = 0) const; + + // Async creature model loading: file I/O + M2 parsing on background thread, + // GPU upload + instance creation on main thread. + struct PreparedCreatureModel { + uint64_t guid; + uint32_t displayId; + uint32_t modelId; + float x, y, z, orientation; + std::shared_ptr model; // parsed on background thread + std::unordered_map predecodedTextures; // decoded on bg thread + bool valid = false; + bool permanent_failure = false; + }; + struct AsyncCreatureLoad { + std::future future; + }; + std::vector asyncCreatureLoads_; + void processAsyncCreatureResults(); + static constexpr int MAX_ASYNC_CREATURE_LOADS = 4; // concurrent background loads std::unordered_set deadCreatureGuids_; // GUIDs that should spawn in corpse/death pose std::unordered_map displayIdModelCache_; // displayId → modelId (model caching) + std::unordered_set displayIdTexturesApplied_; // displayIds with per-model textures applied + std::unordered_map> displayIdPredecodedTextures_; // displayId → pre-decoded skin textures mutable std::unordered_set warnedMissingDisplayDataIds_; // displayIds already warned mutable std::unordered_set warnedMissingModelPathIds_; // modelIds/displayIds already warned uint32_t nextCreatureModelId_ = 5000; // Model IDs for online creatures @@ -250,7 +288,7 @@ private: uint32_t displayId; float x, y, z, orientation; }; - std::vector pendingCreatureSpawns_; + std::deque pendingCreatureSpawns_; static constexpr int MAX_SPAWNS_PER_FRAME = 3; static constexpr int MAX_NEW_CREATURE_MODELS_PER_FRAME = 1; static constexpr uint16_t MAX_CREATURE_SPAWN_RETRIES = 300; @@ -275,6 +313,49 @@ private: // Deferred equipment compositing queue — processes max 1 per frame to avoid stutter std::vector, std::array>>> deferredEquipmentQueue_; void processDeferredEquipmentQueue(); + // Async equipment texture pre-decode: BLP decode on background thread, composite on main thread + struct PreparedEquipmentUpdate { + uint64_t guid; + std::array displayInfoIds; + std::array inventoryTypes; + std::unordered_map predecodedTextures; + }; + struct AsyncEquipmentLoad { + std::future future; + }; + std::vector asyncEquipmentLoads_; + void processAsyncEquipmentResults(); + std::vector resolveEquipmentTexturePaths(uint64_t guid, + const std::array& displayInfoIds, + const std::array& inventoryTypes) const; + // Deferred NPC texture setup — async DBC lookups + BLP pre-decode to avoid main-thread stalls + struct DeferredNpcComposite { + uint32_t modelId; + uint32_t displayId; + // Skin compositing (type-1 slots) + std::string basePath; // CharSections skin base texture + std::vector overlayPaths; // face + underwear overlays + std::vector> regionLayers; // equipment region overlays + std::vector skinTextureSlots; // model texture slots needing skin composite + bool hasComposite = false; // needs compositing (overlays or equipment regions) + bool hasSimpleSkin = false; // just base skin, no compositing needed + // Baked skin (type-1 slots) + std::string bakedSkinPath; // baked texture path (if available) + bool hasBakedSkin = false; // baked skin resolved successfully + // Hair (type-6 slots) + std::vector hairTextureSlots; // model texture slots needing hair texture + std::string hairTexturePath; // resolved hair texture path + bool useBakedForHair = false; // bald NPC: use baked skin for type-6 + }; + struct PreparedNpcComposite { + DeferredNpcComposite info; + std::unordered_map predecodedTextures; + }; + struct AsyncNpcCompositeLoad { + std::future future; + }; + std::vector asyncNpcCompositeLoads_; + void processAsyncNpcCompositeResults(); // Cache base player model geometry by (raceId, genderId) std::unordered_map playerModelCache_; // key=(race<<8)|gender → modelId struct PlayerTextureSlots { int skin = -1; int hair = -1; int underwear = -1; }; @@ -302,6 +383,24 @@ private: }; std::vector pendingGameObjectSpawns_; void processGameObjectSpawnQueue(); + + // Async WMO loading for game objects (file I/O + parse on background thread) + struct PreparedGameObjectWMO { + uint64_t guid; + uint32_t entry; + uint32_t displayId; + float x, y, z, orientation; + std::shared_ptr wmoModel; + std::unordered_map predecodedTextures; // decoded on bg thread + bool valid = false; + bool isWmo = false; + std::string modelPath; + }; + struct AsyncGameObjectLoad { + std::future future; + }; + std::vector asyncGameObjectLoads_; + void processAsyncGameObjectResults(); struct PendingTransportDoodadBatch { uint64_t guid = 0; uint32_t modelId = 0; @@ -321,6 +420,23 @@ private: // Quest marker billboard sprites (above NPCs) void loadQuestMarkerModels(); // Now loads BLP textures void updateQuestMarkers(); // Updates billboard positions + + // Background world preloader — warms AssetManager file cache for the + // expected world before the user clicks Enter World. + struct WorldPreload { + uint32_t mapId = 0; + std::string mapName; + int centerTileX = 0; + int centerTileY = 0; + std::atomic cancel{false}; + std::vector workers; + }; + std::unique_ptr worldPreload_; + void startWorldPreload(uint32_t mapId, const std::string& mapName, float serverX, float serverY); + void cancelWorldPreload(); + void saveLastWorldInfo(uint32_t mapId, const std::string& mapName, float serverX, float serverY); + struct LastWorldInfo { uint32_t mapId = 0; std::string mapName; float x = 0, y = 0; bool valid = false; }; + LastWorldInfo loadLastWorldInfo() const; }; } // namespace core diff --git a/include/rendering/character_renderer.hpp b/include/rendering/character_renderer.hpp index c6f63451..83cb3e7f 100644 --- a/include/rendering/character_renderer.hpp +++ b/include/rendering/character_renderer.hpp @@ -1,6 +1,7 @@ #pragma once #include "pipeline/m2_loader.hpp" +#include "pipeline/blp_loader.hpp" #include #include #include @@ -11,6 +12,7 @@ #include #include #include +#include namespace wowee { namespace pipeline { class AssetManager; } @@ -114,7 +116,11 @@ public: void setShadowMap(VkTexture*, const glm::mat4&) {} void clearShadowMap() {} + // Pre-decoded BLP cache: set before calling loadModel() to skip main-thread BLP decode + void setPredecodedBLPCache(std::unordered_map* cache) { predecodedBLPCache_ = cache; } + private: + std::unordered_map* predecodedBLPCache_ = nullptr; // GPU representation of M2 model struct M2ModelGPU { VkBuffer vertexBuffer = VK_NULL_HANDLE; @@ -180,6 +186,7 @@ private: // Bone update throttling (skip frames for distant characters) uint32_t boneUpdateCounter = 0; + const M2ModelGPU* cachedModel = nullptr; // Avoid per-frame hash lookups // Per-instance bone SSBO (double-buffered per frame) VkBuffer boneBuffer[2] = {}; @@ -254,7 +261,14 @@ private: VkDescriptorPool materialDescPools_[2] = {VK_NULL_HANDLE, VK_NULL_HANDLE}; VkDescriptorPool boneDescPool_ = VK_NULL_HANDLE; uint32_t lastMaterialPoolResetFrame_ = 0xFFFFFFFFu; - std::vector> transientMaterialUbos_[2]; + + // Material UBO ring buffer — pre-allocated per frame slot, sub-allocated each draw + VkBuffer materialRingBuffer_[2] = {VK_NULL_HANDLE, VK_NULL_HANDLE}; + VmaAllocation materialRingAlloc_[2] = {VK_NULL_HANDLE, VK_NULL_HANDLE}; + void* materialRingMapped_[2] = {nullptr, nullptr}; + uint32_t materialRingOffset_[2] = {0, 0}; + uint32_t materialUboAlignment_ = 256; // minUniformBufferOffsetAlignment + static constexpr uint32_t MATERIAL_RING_CAPACITY = 4096; // Texture cache struct TextureCacheEntry { @@ -265,6 +279,7 @@ private: uint64_t lastUse = 0; bool hasAlpha = false; bool colorKeyBlack = false; + bool normalMapPending = false; // deferred normal map generation }; std::unordered_map textureCache; std::unordered_map textureHasAlphaByPtr_; @@ -289,6 +304,17 @@ private: std::unique_ptr generateNormalHeightMap( const uint8_t* pixels, uint32_t width, uint32_t height, float& outVariance); + // Deferred normal map generation — avoids stalling loadModel + struct PendingNormalMap { + std::string cacheKey; + std::vector pixels; // RGBA pixel data + uint32_t width, height; + }; + std::deque pendingNormalMaps_; +public: + void processPendingNormalMaps(int budget = 2); +private: + // Normal mapping / POM settings bool normalMappingEnabled_ = true; float normalMapStrength_ = 0.8f; diff --git a/include/rendering/m2_renderer.hpp b/include/rendering/m2_renderer.hpp index f53fb4bf..1c35e34b 100644 --- a/include/rendering/m2_renderer.hpp +++ b/include/rendering/m2_renderer.hpp @@ -1,6 +1,7 @@ #pragma once #include "pipeline/m2_loader.hpp" +#include "pipeline/blp_loader.hpp" #include #include #include @@ -188,6 +189,7 @@ struct M2Instance { bool skipCollision = false; // WMO interior doodads — skip player wall collision float cachedBoundRadius = 0.0f; float portalSpinAngle = 0.0f; // Accumulated spin angle for portal rotation + const M2ModelGPU* cachedModel = nullptr; // Avoid per-frame hash lookups // Frame-skip optimization (update distant animations less frequently) uint8_t frameSkipCounter = 0; @@ -328,6 +330,10 @@ public: std::vector getWaterVegetationPositions(const glm::vec3& camPos, float maxDist) const; + // Pre-decoded BLP cache: set by terrain manager before calling loadModel() + // so loadTexture() can skip the expensive assetManager->loadTexture() call. + void setPredecodedBLPCache(std::unordered_map* cache) { predecodedBLPCache_ = cache; } + private: bool initialized_ = false; bool insideInterior = false; @@ -389,12 +395,33 @@ private: std::unordered_map models; std::vector instances; + // O(1) dedup: key = (modelId, quantized x, quantized y, quantized z) → instanceId + struct DedupKey { + uint32_t modelId; + int32_t qx, qy, qz; // position quantized to 0.1 units + bool operator==(const DedupKey& o) const { + return modelId == o.modelId && qx == o.qx && qy == o.qy && qz == o.qz; + } + }; + struct DedupHash { + size_t operator()(const DedupKey& k) const { + size_t h = std::hash()(k.modelId); + h ^= std::hash()(k.qx) * 2654435761u; + h ^= std::hash()(k.qy) * 40503u; + h ^= std::hash()(k.qz) * 12289u; + return h; + } + }; + std::unordered_map instanceDedupMap_; + uint32_t nextInstanceId = 1; uint32_t lastDrawCallCount = 0; size_t modelCacheLimit_ = 6000; uint32_t modelLimitRejectWarnings_ = 0; VkTexture* loadTexture(const std::string& path, uint32_t texFlags = 0); + std::unordered_map* predecodedBLPCache_ = nullptr; + struct TextureCacheEntry { std::unique_ptr texture; size_t approxBytes = 0; diff --git a/include/rendering/terrain_manager.hpp b/include/rendering/terrain_manager.hpp index 0090edc4..6f732721 100644 --- a/include/rendering/terrain_manager.hpp +++ b/include/rendering/terrain_manager.hpp @@ -121,6 +121,12 @@ struct PendingTile { // Pre-loaded terrain texture BLP data (loaded on background thread to avoid // blocking file I/O on the main thread during finalizeTile) std::unordered_map preloadedTextures; + + // Pre-decoded M2 model textures (decoded on background thread) + std::unordered_map preloadedM2Textures; + + // Pre-decoded WMO textures (decoded on background thread) + std::unordered_map preloadedWMOTextures; }; /** @@ -150,6 +156,11 @@ struct FinalizingTile { size_t wmoModelIndex = 0; // Next WMO model to upload size_t wmoDoodadIndex = 0; // Next WMO doodad to upload + // Incremental terrain upload state (splits TERRAIN phase across frames) + bool terrainPreloaded = false; // True after preloaded textures uploaded + int terrainChunkNext = 0; // Next chunk index to upload (0-255, row-major) + bool terrainMeshDone = false; // True when all chunks uploaded + // Accumulated results (built up across phases) std::vector m2InstanceIds; std::vector wmoInstanceIds; @@ -376,6 +387,11 @@ private: std::unordered_set missingAdtWarnings_; std::mutex missingAdtWarningsMutex_; + // Thread-safe set of M2 model IDs already uploaded to GPU + // (checked by workers to skip redundant file I/O + parsing) + std::unordered_set uploadedM2Ids_; + std::mutex uploadedM2IdsMutex_; + // Dedup set for doodad placements across tile boundaries std::unordered_set placedDoodadIds; diff --git a/include/rendering/terrain_renderer.hpp b/include/rendering/terrain_renderer.hpp index 91279e9c..77af9a64 100644 --- a/include/rendering/terrain_renderer.hpp +++ b/include/rendering/terrain_renderer.hpp @@ -86,6 +86,13 @@ public: const std::vector& texturePaths, int tileX = -1, int tileY = -1); + /// Upload a batch of terrain chunks incrementally. Returns true when all chunks done. + /// chunkIndex is updated to the next chunk to process (0-255 row-major). + bool loadTerrainIncremental(const pipeline::TerrainMesh& mesh, + const std::vector& texturePaths, + int tileX, int tileY, + int& chunkIndex, int maxChunksPerCall = 16); + void removeTile(int tileX, int tileY); void uploadPreloadedTextures(const std::unordered_map& textures); @@ -120,6 +127,7 @@ public: int getRenderedChunkCount() const { return renderedChunks; } int getCulledChunkCount() const { return culledChunks; } int getTriangleCount() const; + VkContext* getVkContext() const { return vkCtx; } private: TerrainChunkGPU uploadChunk(const pipeline::ChunkMesh& chunk); diff --git a/include/rendering/vk_context.hpp b/include/rendering/vk_context.hpp index 3a242940..907e21bf 100644 --- a/include/rendering/vk_context.hpp +++ b/include/rendering/vk_context.hpp @@ -1,5 +1,6 @@ #pragma once +#include "rendering/vk_utils.hpp" #include #include #include @@ -46,6 +47,16 @@ public: // Immediate submit for one-off GPU work (descriptor pool creation, etc.) void immediateSubmit(std::function&& function); + // Batch upload mode: records multiple upload commands into a single + // command buffer, then submits with ONE fence wait instead of one per upload. + void beginUploadBatch(); + void endUploadBatch(); // Async: submits but does NOT wait for fence + void endUploadBatchSync(); // Sync: submits and waits (for load screens) + bool isInUploadBatch() const { return inUploadBatch_; } + void deferStagingCleanup(AllocatedBuffer staging); + void pollUploadBatches(); // Check completed async uploads, free staging buffers + void waitAllUploads(); // Block until all in-flight uploads complete + // Accessors VkInstance getInstance() const { return instance; } VkPhysicalDevice getPhysicalDevice() const { return physicalDevice; } @@ -143,6 +154,20 @@ private: VkCommandPool immCommandPool = VK_NULL_HANDLE; VkFence immFence = VK_NULL_HANDLE; + // Batch upload state (nesting-safe via depth counter) + int uploadBatchDepth_ = 0; + bool inUploadBatch_ = false; + VkCommandBuffer batchCmd_ = VK_NULL_HANDLE; + std::vector batchStagingBuffers_; + + // Async upload: in-flight batches awaiting GPU completion + struct InFlightBatch { + VkFence fence = VK_NULL_HANDLE; + VkCommandBuffer cmd = VK_NULL_HANDLE; + std::vector stagingBuffers; + }; + std::vector inFlightBatches_; + // Depth buffer (shared across all framebuffers) VkImage depthImage = VK_NULL_HANDLE; VkImageView depthImageView = VK_NULL_HANDLE; diff --git a/include/rendering/wmo_renderer.hpp b/include/rendering/wmo_renderer.hpp index 095a354d..f0d3b36f 100644 --- a/include/rendering/wmo_renderer.hpp +++ b/include/rendering/wmo_renderer.hpp @@ -1,5 +1,6 @@ #pragma once +#include "pipeline/blp_loader.hpp" #include #include #include @@ -325,6 +326,12 @@ public: // Pre-compute floor cache for all loaded WMO instances void precomputeFloorCache(); + // Pre-decoded BLP cache: set before calling loadModel() to skip main-thread BLP decode + void setPredecodedBLPCache(std::unordered_map* cache) { predecodedBLPCache_ = cache; } + + // Defer normal/height map generation during streaming to avoid CPU stalls + void setDeferNormalMaps(bool defer) { deferNormalMaps_ = defer; } + private: // WMO material UBO — matches WMOMaterial in wmo.frag.glsl struct WMOMaterialUBO { @@ -558,6 +565,7 @@ private: * Load a texture from path */ VkTexture* loadTexture(const std::string& path); + std::unordered_map* predecodedBLPCache_ = nullptr; /** * Generate normal+height map from diffuse RGBA8 pixels @@ -670,6 +678,7 @@ private: // Normal mapping / POM settings bool normalMappingEnabled_ = true; // on by default + bool deferNormalMaps_ = false; // skip normal map gen during streaming float normalMapStrength_ = 0.8f; // 0.0 = flat, 1.0 = full, 2.0 = exaggerated bool pomEnabled_ = true; // on by default int pomQuality_ = 1; // 0=Low(16), 1=Medium(32), 2=High(64) diff --git a/src/core/application.cpp b/src/core/application.cpp index 2a8ef041..1a239d8a 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include #ifdef __linux__ @@ -314,6 +315,15 @@ bool Application::initialize() { gameHandler->getTransportManager()->loadTaxiPathNodeDBC(assetManager.get()); } + // Start background preload for last-played character's world. + // Warms the file cache so terrain tile loading is faster at Enter World. + { + auto lastWorld = loadLastWorldInfo(); + if (lastWorld.valid) { + startWorldPreload(lastWorld.mapId, lastWorld.mapName, lastWorld.x, lastWorld.y); + } + } + } else { LOG_WARNING("Failed to initialize asset manager - asset loading will be unavailable"); LOG_WARNING("Set WOW_DATA_PATH environment variable to your WoW Data directory"); @@ -521,6 +531,9 @@ void Application::run() { void Application::shutdown() { LOG_WARNING("Shutting down application..."); + // Stop background world preloader before destroying AssetManager + cancelWorldPreload(); + // Save floor cache before renderer is destroyed if (renderer && renderer->getWMORenderer()) { size_t cacheSize = renderer->getWMORenderer()->getFloorCacheSize(); @@ -734,6 +747,16 @@ void Application::logoutToLogin() { deadCreatureGuids_.clear(); nonRenderableCreatureDisplayIds_.clear(); creaturePermanentFailureGuids_.clear(); + modelIdIsWolfLike_.clear(); + displayIdTexturesApplied_.clear(); + charSectionsCache_.clear(); + charSectionsCacheBuilt_ = false; + + // Wait for any in-flight async creature loads before clearing state + for (auto& load : asyncCreatureLoads_) { + if (load.future.valid()) load.future.wait(); + } + asyncCreatureLoads_.clear(); // --- Creature spawn queues --- pendingCreatureSpawns_.clear(); @@ -833,6 +856,7 @@ void Application::update(float deltaTime) { const char* inGameStep = "begin"; try { auto runInGameStage = [&](const char* stageName, auto&& fn) { + auto stageStart = std::chrono::steady_clock::now(); try { fn(); } catch (const std::bad_alloc& e) { @@ -842,6 +866,11 @@ void Application::update(float deltaTime) { LOG_ERROR("Exception during IN_GAME update stage '", stageName, "': ", e.what()); throw; } + auto stageEnd = std::chrono::steady_clock::now(); + float stageMs = std::chrono::duration(stageEnd - stageStart).count(); + if (stageMs > 3.0f) { + LOG_WARNING("SLOW update stage '", stageName, "': ", stageMs, "ms"); + } }; inGameStep = "gameHandler update"; updateCheckpoint = "in_game: gameHandler update"; @@ -884,11 +913,30 @@ void Application::update(float deltaTime) { inGameStep = "spawn/equipment queues"; updateCheckpoint = "in_game: spawn/equipment queues"; runInGameStage("spawn/equipment queues", [&] { + auto t0 = std::chrono::steady_clock::now(); processPlayerSpawnQueue(); - // Process deferred online creature spawns (throttled) + auto t1 = std::chrono::steady_clock::now(); processCreatureSpawnQueue(); - // Process deferred equipment compositing (max 1 per frame to avoid stutter) + auto t2 = std::chrono::steady_clock::now(); + processAsyncNpcCompositeResults(); + auto t3 = std::chrono::steady_clock::now(); processDeferredEquipmentQueue(); + auto t4 = std::chrono::steady_clock::now(); + // Process deferred normal maps (2 per frame to spread CPU cost) + if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) { + cr->processPendingNormalMaps(2); + } + auto t5 = std::chrono::steady_clock::now(); + float pMs = std::chrono::duration(t1 - t0).count(); + float cMs = std::chrono::duration(t2 - t1).count(); + float nMs = std::chrono::duration(t3 - t2).count(); + float eMs = std::chrono::duration(t4 - t3).count(); + float nmMs = std::chrono::duration(t5 - t4).count(); + float total = pMs + cMs + nMs + eMs + nmMs; + if (total > 4.0f) { + LOG_WARNING("spawn/equip breakdown: player=", pMs, "ms creature=", cMs, + "ms npcComposite=", nMs, "ms equip=", eMs, "ms normalMaps=", nmMs, "ms"); + } }); // Self-heal missing creature visuals: if a nearby UNIT exists in // entity state but has no render instance, queue a spawn retry. @@ -1279,12 +1327,14 @@ void Application::update(float deltaTime) { // creature models remain at stale spawn positions. inGameStep = "creature render sync"; updateCheckpoint = "in_game: creature render sync"; + auto creatureSyncStart = std::chrono::steady_clock::now(); if (renderer && gameHandler && renderer->getCharacterRenderer()) { auto* charRenderer = renderer->getCharacterRenderer(); static float npcWeaponRetryTimer = 0.0f; npcWeaponRetryTimer += deltaTime; const bool npcWeaponRetryTick = (npcWeaponRetryTimer >= 1.0f); if (npcWeaponRetryTick) npcWeaponRetryTimer = 0.0f; + int weaponAttachesThisTick = 0; glm::vec3 playerPos(0.0f); glm::vec3 playerRenderPos(0.0f); bool havePlayerPos = false; @@ -1304,11 +1354,14 @@ void Application::update(float deltaTime) { auto entity = gameHandler->getEntityManager().getEntity(guid); if (!entity || entity->getType() != game::ObjectType::UNIT) continue; - if (npcWeaponRetryTick && !creatureWeaponsAttached_.count(guid)) { + if (npcWeaponRetryTick && + weaponAttachesThisTick < MAX_WEAPON_ATTACHES_PER_TICK && + !creatureWeaponsAttached_.count(guid)) { uint8_t attempts = 0; auto itAttempts = creatureWeaponAttachAttempts_.find(guid); if (itAttempts != creatureWeaponAttachAttempts_.end()) attempts = itAttempts->second; if (attempts < 30) { + weaponAttachesThisTick++; if (tryAttachCreatureVirtualWeapons(guid, instanceId)) { creatureWeaponsAttached_.insert(guid); creatureWeaponAttachAttempts_.erase(guid); @@ -1319,24 +1372,31 @@ void Application::update(float deltaTime) { } glm::vec3 canonical(entity->getX(), entity->getY(), entity->getZ()); + float canonDistSq = 0.0f; if (havePlayerPos) { glm::vec3 d = canonical - playerPos; - if (glm::dot(d, d) > syncRadiusSq) continue; + canonDistSq = glm::dot(d, d); + if (canonDistSq > syncRadiusSq) continue; } glm::vec3 renderPos = core::coords::canonicalToRender(canonical); // Visual collision guard: keep hostile melee units from rendering inside the // player's model while attacking. This is client-side only (no server position change). - auto unit = std::static_pointer_cast(entity); - const uint64_t currentTargetGuid = gameHandler->hasTarget() ? gameHandler->getTargetGuid() : 0; - const uint64_t autoAttackGuid = gameHandler->getAutoAttackTargetGuid(); - const bool isCombatTarget = (guid == currentTargetGuid || guid == autoAttackGuid); - bool clipGuardEligible = havePlayerPos && - unit->getHealth() > 0 && - (unit->isHostile() || - gameHandler->isAggressiveTowardPlayer(guid) || - isCombatTarget); + // Only check for creatures within 8 units (melee range) — saves expensive + // getRenderBoundsForGuid/getModelData calls for distant creatures. + bool clipGuardEligible = false; + bool isCombatTarget = false; + if (havePlayerPos && canonDistSq < 64.0f) { // 8² = melee range + auto unit = std::static_pointer_cast(entity); + const uint64_t currentTargetGuid = gameHandler->hasTarget() ? gameHandler->getTargetGuid() : 0; + const uint64_t autoAttackGuid = gameHandler->getAutoAttackTargetGuid(); + isCombatTarget = (guid == currentTargetGuid || guid == autoAttackGuid); + clipGuardEligible = unit->getHealth() > 0 && + (unit->isHostile() || + gameHandler->isAggressiveTowardPlayer(guid) || + isCombatTarget); + } if (clipGuardEligible) { float creatureCollisionRadius = 0.8f; glm::vec3 cc; @@ -1355,14 +1415,21 @@ void Application::update(float deltaTime) { // often put head/torso inside the player capsule). auto mit = creatureModelIds_.find(guid); if (mit != creatureModelIds_.end()) { - if (const auto* md = charRenderer->getModelData(mit->second)) { - std::string modelName = md->name; - std::transform(modelName.begin(), modelName.end(), modelName.begin(), - [](unsigned char c) { return static_cast(std::tolower(c)); }); - if (modelName.find("wolf") != std::string::npos || - modelName.find("worg") != std::string::npos) { - minSep = std::max(minSep, 2.45f); + uint32_t mid = mit->second; + auto wolfIt = modelIdIsWolfLike_.find(mid); + if (wolfIt == modelIdIsWolfLike_.end()) { + bool isWolf = false; + if (const auto* md = charRenderer->getModelData(mid)) { + std::string modelName = md->name; + std::transform(modelName.begin(), modelName.end(), modelName.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + isWolf = (modelName.find("wolf") != std::string::npos || + modelName.find("worg") != std::string::npos); } + wolfIt = modelIdIsWolfLike_.emplace(mid, isWolf).first; + } + if (wolfIt->second) { + minSep = std::max(minSep, 2.45f); } } @@ -1389,7 +1456,8 @@ void Application::update(float deltaTime) { float planarDist = glm::length(delta2); float dz = std::abs(renderPos.z - prevPos.z); - const bool deadOrCorpse = unit->getHealth() == 0; + auto unitPtr = std::static_pointer_cast(entity); + const bool deadOrCorpse = unitPtr->getHealth() == 0; const bool largeCorrection = (planarDist > 6.0f) || (dz > 3.0f); if (deadOrCorpse || largeCorrection) { charRenderer->setInstancePosition(instanceId, renderPos); @@ -1404,6 +1472,14 @@ void Application::update(float deltaTime) { charRenderer->setInstanceRotation(instanceId, glm::vec3(0.0f, 0.0f, renderYaw)); } } + { + float csMs = std::chrono::duration( + std::chrono::steady_clock::now() - creatureSyncStart).count(); + if (csMs > 5.0f) { + LOG_WARNING("SLOW update stage 'creature render sync': ", csMs, "ms (", + creatureInstances_.size(), " creatures)"); + } + } // Movement heartbeat is sent from GameHandler::update() to avoid // duplicate packets from multiple update loops. @@ -1426,6 +1502,7 @@ void Application::update(float deltaTime) { // Update renderer (camera, etc.) only when in-game updateCheckpoint = "renderer update"; if (renderer && state == AppState::IN_GAME) { + auto rendererUpdateStart = std::chrono::steady_clock::now(); try { renderer->update(deltaTime); } catch (const std::bad_alloc& e) { @@ -1435,6 +1512,11 @@ void Application::update(float deltaTime) { LOG_ERROR("Exception during Application::update stage 'renderer->update': ", e.what()); throw; } + float ruMs = std::chrono::duration( + std::chrono::steady_clock::now() - rendererUpdateStart).count(); + if (ruMs > 5.0f) { + LOG_WARNING("SLOW update stage 'renderer->update': ", ruMs, "ms"); + } } // Update UI updateCheckpoint = "ui update"; @@ -3465,6 +3547,14 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float deadCreatureGuids_.clear(); nonRenderableCreatureDisplayIds_.clear(); creaturePermanentFailureGuids_.clear(); + modelIdIsWolfLike_.clear(); + displayIdTexturesApplied_.clear(); + charSectionsCache_.clear(); + charSectionsCacheBuilt_ = false; + for (auto& load : asyncCreatureLoads_) { + if (load.future.valid()) load.future.wait(); + } + asyncCreatureLoads_.clear(); playerInstances_.clear(); onlinePlayerAppearance_.clear(); @@ -3508,6 +3598,21 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float } LOG_INFO("Loading online world terrain for map '", mapName, "' (ID ", mapId, ")"); + // Cancel any stale preload (if it was for a different map, the file cache + // still retains whatever was loaded — it doesn't hurt). + if (worldPreload_) { + if (worldPreload_->mapId == mapId) { + LOG_INFO("World preload: cache-warm hit for map '", mapName, "'"); + } else { + LOG_INFO("World preload: map mismatch (preloaded ", worldPreload_->mapName, + ", entering ", mapName, ")"); + } + } + cancelWorldPreload(); + + // Save this world info for next session's early preload + saveLastWorldInfo(mapId, mapName, x, y); + // Convert server coordinates to canonical WoW coordinates // Server sends: X=West (canonical.Y), Y=North (canonical.X), Z=Up glm::vec3 spawnCanonical = core::coords::serverToCanonical(glm::vec3(x, y, z)); @@ -3896,6 +4001,13 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float auto* terrainMgr = renderer->getTerrainManager(); auto* camera = renderer->getCamera(); + // Use a small radius for the initial load (just immediate tiles), + // then restore the full radius after entering the game. + // This matches WoW's behavior: load quickly, stream the rest in-game. + const int savedLoadRadius = 4; + terrainMgr->setLoadRadius(1); + terrainMgr->setUnloadRadius(7); + // Trigger tile streaming for surrounding area terrainMgr->update(*camera, 1.0f); @@ -3931,8 +4043,11 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float // Trigger new streaming — enqueue tiles for background workers terrainMgr->update(*camera, 0.016f); - // Process ONE tile per iteration so loading screen updates after each - terrainMgr->processOneReadyTile(); + // Process ALL available ready tiles per iteration — batches GPU + // uploads into a single command buffer + fence wait instead of + // one fence per tile. Loading screen still updates between + // iterations while workers parse more tiles. + terrainMgr->processAllReadyTiles(); int remaining = terrainMgr->getRemainingTileCount(); int loaded = terrainMgr->getLoadedTileCount(); @@ -3987,6 +4102,9 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float LOG_INFO("Online terrain streaming complete: ", terrainMgr->getLoadedTileCount(), " tiles loaded"); + // Restore full load radius — remaining tiles stream in-game + terrainMgr->setLoadRadius(savedLoadRadius); + // Load/precompute collision cache if (renderer->getWMORenderer()) { showProgress("Building collision cache...", 0.88f); @@ -4087,9 +4205,68 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float if (world) world->update(1.0f / 60.0f); processPlayerSpawnQueue(); + + // During load screen warmup: lift per-frame budgets so GPU uploads + // happen in bulk while the loading screen is still visible. + // Process ALL async creature model uploads (no 3-per-frame cap). + { + for (auto it = asyncCreatureLoads_.begin(); it != asyncCreatureLoads_.end(); ) { + if (!it->future.valid() || + it->future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { + ++it; + continue; + } + auto result = it->future.get(); + it = asyncCreatureLoads_.erase(it); + if (result.permanent_failure) { + nonRenderableCreatureDisplayIds_.insert(result.displayId); + creaturePermanentFailureGuids_.insert(result.guid); + pendingCreatureSpawnGuids_.erase(result.guid); + creatureSpawnRetryCounts_.erase(result.guid); + continue; + } + if (!result.valid || !result.model) { + pendingCreatureSpawnGuids_.erase(result.guid); + creatureSpawnRetryCounts_.erase(result.guid); + continue; + } + auto* charRenderer = renderer ? renderer->getCharacterRenderer() : nullptr; + if (!charRenderer) { pendingCreatureSpawnGuids_.erase(result.guid); continue; } + if (!charRenderer->loadModel(*result.model, result.modelId)) { + nonRenderableCreatureDisplayIds_.insert(result.displayId); + creaturePermanentFailureGuids_.insert(result.guid); + pendingCreatureSpawnGuids_.erase(result.guid); + creatureSpawnRetryCounts_.erase(result.guid); + continue; + } + displayIdModelCache_[result.displayId] = result.modelId; + pendingCreatureSpawnGuids_.erase(result.guid); + creatureSpawnRetryCounts_.erase(result.guid); + if (!creatureInstances_.count(result.guid) && + !creaturePermanentFailureGuids_.count(result.guid)) { + PendingCreatureSpawn s{}; + s.guid = result.guid; s.displayId = result.displayId; + s.x = result.x; s.y = result.y; s.z = result.z; + s.orientation = result.orientation; + pendingCreatureSpawns_.push_back(s); + pendingCreatureSpawnGuids_.insert(result.guid); + } + } + } processCreatureSpawnQueue(); + processAsyncNpcCompositeResults(); processDeferredEquipmentQueue(); - processGameObjectSpawnQueue(); + if (auto* cr = renderer ? renderer->getCharacterRenderer() : nullptr) { + cr->processPendingNormalMaps(10); // higher budget during load screen + } + + // Process ALL pending game object spawns (no 1-per-frame cap during load screen). + while (!pendingGameObjectSpawns_.empty()) { + auto& s = pendingGameObjectSpawns_.front(); + spawnOnlineGameObject(s.guid, s.entry, s.displayId, s.x, s.y, s.z, s.orientation); + pendingGameObjectSpawns_.erase(pendingGameObjectSpawns_.begin()); + } + processPendingTransportDoodads(); processPendingMount(); updateQuestMarkers(); @@ -4140,6 +4317,55 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float setState(AppState::IN_GAME); } +void Application::buildCharSectionsCache() { + if (charSectionsCacheBuilt_ || !assetManager || !assetManager->isInitialized()) return; + auto dbc = assetManager->loadDBC("CharSections.dbc"); + if (!dbc) return; + const auto* csL = pipeline::getActiveDBCLayout() + ? pipeline::getActiveDBCLayout()->getLayout("CharSections") : nullptr; + uint32_t raceF = csL ? (*csL)["RaceID"] : 1; + uint32_t sexF = csL ? (*csL)["SexID"] : 2; + uint32_t secF = csL ? (*csL)["BaseSection"] : 3; + uint32_t varF = csL ? (*csL)["VariationIndex"] : 4; + uint32_t colF = csL ? (*csL)["ColorIndex"] : 5; + uint32_t tex1F = csL ? (*csL)["Texture1"] : 6; + for (uint32_t r = 0; r < dbc->getRecordCount(); r++) { + uint32_t race = dbc->getUInt32(r, raceF); + uint32_t sex = dbc->getUInt32(r, sexF); + uint32_t section = dbc->getUInt32(r, secF); + uint32_t variation = dbc->getUInt32(r, varF); + uint32_t color = dbc->getUInt32(r, colF); + // We only cache sections 0 (skin), 1 (face), 3 (hair), 4 (underwear) + if (section != 0 && section != 1 && section != 3 && section != 4) continue; + for (int ti = 0; ti < 3; ti++) { + std::string tex = dbc->getString(r, tex1F + ti); + if (tex.empty()) continue; + // Key: race(8)|sex(4)|section(4)|variation(8)|color(8)|texIndex(2) packed into 64 bits + uint64_t key = (static_cast(race) << 26) | + (static_cast(sex & 0xF) << 22) | + (static_cast(section & 0xF) << 18) | + (static_cast(variation & 0xFF) << 10) | + (static_cast(color & 0xFF) << 2) | + static_cast(ti); + charSectionsCache_.emplace(key, tex); + } + } + charSectionsCacheBuilt_ = true; + LOG_INFO("CharSections cache built: ", charSectionsCache_.size(), " entries"); +} + +std::string Application::lookupCharSection(uint8_t race, uint8_t sex, uint8_t section, + uint8_t variation, uint8_t color, int texIndex) const { + uint64_t key = (static_cast(race) << 26) | + (static_cast(sex & 0xF) << 22) | + (static_cast(section & 0xF) << 18) | + (static_cast(variation & 0xFF) << 10) | + (static_cast(color & 0xFF) << 2) | + static_cast(texIndex); + auto it = charSectionsCache_.find(key); + return (it != charSectionsCache_.end()) ? it->second : std::string(); +} + void Application::buildCreatureDisplayLookups() { if (creatureLookupsBuilt_ || !assetManager || !assetManager->isInitialized()) return; @@ -4479,6 +4705,47 @@ bool Application::getRenderFootZForGuid(uint64_t guid, float& outFootZ) const { return renderer->getCharacterRenderer()->getInstanceFootZ(instanceId, outFootZ); } +pipeline::M2Model Application::loadCreatureM2Sync(const std::string& m2Path) { + auto m2Data = assetManager->readFile(m2Path); + if (m2Data.empty()) { + LOG_WARNING("Failed to read creature M2: ", m2Path); + return {}; + } + + pipeline::M2Model model = pipeline::M2Loader::load(m2Data); + if (model.vertices.empty()) { + LOG_WARNING("Failed to parse creature M2: ", m2Path); + return {}; + } + + // Load skin file (only for WotLK M2s - vanilla has embedded skin) + if (model.version >= 264) { + std::string skinPath = m2Path.substr(0, m2Path.size() - 3) + "00.skin"; + auto skinData = assetManager->readFile(skinPath); + if (!skinData.empty()) { + pipeline::M2Loader::loadSkin(skinData, model); + } else { + LOG_WARNING("Missing skin file for WotLK creature M2: ", skinPath); + } + } + + // Load external .anim files for sequences without flag 0x20 + std::string basePath = m2Path.substr(0, m2Path.size() - 3); + for (uint32_t si = 0; si < model.sequences.size(); si++) { + if (!(model.sequences[si].flags & 0x20)) { + char animFileName[256]; + snprintf(animFileName, sizeof(animFileName), "%s%04u-%02u.anim", + basePath.c_str(), model.sequences[si].id, model.sequences[si].variationIndex); + auto animData = assetManager->readFileOptional(animFileName); + if (!animData.empty()) { + pipeline::M2Loader::loadAnimFile(m2Data, animData, si, model); + } + } + } + + return model; +} + void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x, float y, float z, float orientation) { if (!renderer || !renderer->getCharacterRenderer() || !assetManager) return; @@ -4525,47 +4792,13 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x // Load model from disk (only once per displayId) modelId = nextCreatureModelId_++; - auto m2Data = assetManager->readFile(m2Path); - if (m2Data.empty()) { - LOG_WARNING("Failed to read creature M2: ", m2Path); + pipeline::M2Model model = loadCreatureM2Sync(m2Path); + if (!model.isValid()) { nonRenderableCreatureDisplayIds_.insert(displayId); creaturePermanentFailureGuids_.insert(guid); return; } - pipeline::M2Model model = pipeline::M2Loader::load(m2Data); - if (model.vertices.empty()) { - LOG_WARNING("Failed to parse creature M2: ", m2Path); - nonRenderableCreatureDisplayIds_.insert(displayId); - creaturePermanentFailureGuids_.insert(guid); - return; - } - - // Load skin file (only for WotLK M2s - vanilla has embedded skin) - if (model.version >= 264) { - std::string skinPath = m2Path.substr(0, m2Path.size() - 3) + "00.skin"; - auto skinData = assetManager->readFile(skinPath); - if (!skinData.empty()) { - pipeline::M2Loader::loadSkin(skinData, model); - } else { - LOG_WARNING("Missing skin file for WotLK creature M2: ", skinPath); - } - } - - // Load external .anim files for sequences without flag 0x20 - std::string basePath = m2Path.substr(0, m2Path.size() - 3); - for (uint32_t si = 0; si < model.sequences.size(); si++) { - if (!(model.sequences[si].flags & 0x20)) { - char animFileName[256]; - snprintf(animFileName, sizeof(animFileName), "%s%04u-%02u.anim", - basePath.c_str(), model.sequences[si].id, model.sequences[si].variationIndex); - auto animData = assetManager->readFileOptional(animFileName); - if (!animData.empty()) { - pipeline::M2Loader::loadAnimFile(m2Data, animData, si, model); - } - } - } - if (!charRenderer->loadModel(model, modelId)) { LOG_WARNING("Failed to load creature model: ", m2Path); nonRenderableCreatureDisplayIds_.insert(displayId); @@ -4576,11 +4809,23 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x displayIdModelCache_[displayId] = modelId; } - // Apply skin textures from CreatureDisplayInfo.dbc (only for newly loaded models) + // Apply skin textures from CreatureDisplayInfo.dbc (only once per displayId model). + // Track separately from model cache because async loading may upload the model + // before textures are applied. auto itDisplayData = displayDataMap_.find(displayId); - if (!modelCached && itDisplayData != displayDataMap_.end()) { + bool needsTextures = (displayIdTexturesApplied_.find(displayId) == displayIdTexturesApplied_.end()); + if (needsTextures && itDisplayData != displayDataMap_.end()) { + auto texStart = std::chrono::steady_clock::now(); + displayIdTexturesApplied_.insert(displayId); const auto& dispData = itDisplayData->second; + // Use pre-decoded textures from async creature load (if available) + auto itPreDec = displayIdPredecodedTextures_.find(displayId); + bool hasPreDec = (itPreDec != displayIdPredecodedTextures_.end()); + if (hasPreDec) { + charRenderer->setPredecodedBLPCache(&itPreDec->second); + } + // Get model directory for texture path construction std::string modelDir; size_t lastSlash = m2Path.find_last_of("\\/"); @@ -4613,336 +4858,217 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x LOG_DEBUG(" Found humanoid extra: raceId=", (int)extra.raceId, " sexId=", (int)extra.sexId, " hairStyle=", (int)extra.hairStyleId, " hairColor=", (int)extra.hairColorId, " bakeName='", extra.bakeName, "'"); - LOG_DEBUG("NPC equip: chest=", extra.equipDisplayId[3], - " legs=", extra.equipDisplayId[5], - " feet=", extra.equipDisplayId[6], - " hands=", extra.equipDisplayId[8], - " bake='", extra.bakeName, "'"); - // Build equipment texture region layers from NPC equipment display IDs - // (texture-only compositing — no geoset changes to avoid invisibility bugs) - std::vector> npcRegionLayers; - std::string npcCapeTexturePath; - auto npcItemDisplayDbc = assetManager->loadDBC("ItemDisplayInfo.dbc"); - if (npcItemDisplayDbc) { - static const char* npcComponentDirs[] = { - "ArmUpperTexture", "ArmLowerTexture", "HandTexture", - "TorsoUpperTexture", "TorsoLowerTexture", - "LegUpperTexture", "LegLowerTexture", "FootTexture", - }; - const auto* idiL = pipeline::getActiveDBCLayout() - ? pipeline::getActiveDBCLayout()->getLayout("ItemDisplayInfo") : nullptr; - // Texture component region fields (8 regions: ArmUpper..Foot) - // Binary DBC (23 fields) has textures at 14+ - const uint32_t texRegionFields[8] = { - idiL ? (*idiL)["TextureArmUpper"] : 14u, - idiL ? (*idiL)["TextureArmLower"] : 15u, - idiL ? (*idiL)["TextureHand"] : 16u, - idiL ? (*idiL)["TextureTorsoUpper"]: 17u, - idiL ? (*idiL)["TextureTorsoLower"]: 18u, - idiL ? (*idiL)["TextureLegUpper"] : 19u, - idiL ? (*idiL)["TextureLegLower"] : 20u, - idiL ? (*idiL)["TextureFoot"] : 21u, - }; - const bool npcIsFemale = (extra.sexId == 1); - const bool npcHasArmArmor = (extra.equipDisplayId[7] != 0 || extra.equipDisplayId[8] != 0); - - auto regionAllowedForNpcSlot = [](int eqSlot, int region) -> bool { - // Regions: 0 ArmUpper, 1 ArmLower, 2 Hand, 3 TorsoUpper, 4 TorsoLower, - // 5 LegUpper, 6 LegLower, 7 Foot - switch (eqSlot) { - case 2: // shirt - case 3: // chest - return region <= 4; - case 4: // belt - // TODO(#npc-belt-region): belt torso-lower overlay can - // cut out male abdomen on some humanoid NPCs. - // Keep disabled until region compositing is fixed. - return false; - case 5: // legs - return region == 5 || region == 6; - case 6: // feet - return region == 7; - case 7: // wrist - // Bracer overlays on NPCs often produce bad arm artifacts. - // Keep disabled until slot-accurate arm compositing is implemented. - return false; - case 8: // hands - // Keep glove textures to hand region only; arm regions from glove - // items can produce furry/looping forearm artifacts on some NPCs. - return region == 2; - case 9: // tabard - return region == 3 || region == 4; - default: - return false; - } - }; - auto regionAllowedForNpcSlotCtx = [&](int eqSlot, int region) -> bool { - // Shirt (slot 2) without arm armor: restrict to torso only - // to avoid bare-skin shirt textures bleeding onto arms. - // Chest (slot 3) always paints arms — plate/mail chest armor - // must cover the full upper body even without separate gloves. - if (eqSlot == 2 && !npcHasArmArmor) { - return (region == 3 || region == 4); - } - return regionAllowedForNpcSlot(eqSlot, region); - }; - - // Iterate all 11 NPC equipment slots; use slot-aware region filtering - for (int eqSlot = 0; eqSlot < 11; eqSlot++) { - uint32_t did = extra.equipDisplayId[eqSlot]; - if (did == 0) continue; - int32_t recIdx = npcItemDisplayDbc->findRecordById(did); - if (recIdx < 0) continue; - - for (int region = 0; region < 8; region++) { - if (!regionAllowedForNpcSlotCtx(eqSlot, region)) continue; - std::string texName = npcItemDisplayDbc->getString( - static_cast(recIdx), texRegionFields[region]); - if (texName.empty()) continue; - - std::string base = "Item\\TextureComponents\\" + - std::string(npcComponentDirs[region]) + "\\" + texName; - std::string genderPath = base + (npcIsFemale ? "_F.blp" : "_M.blp"); - std::string unisexPath = base + "_U.blp"; - std::string basePath = base + ".blp"; - std::string fullPath; - if (assetManager->fileExists(genderPath)) fullPath = genderPath; - else if (assetManager->fileExists(unisexPath)) fullPath = unisexPath; - else if (assetManager->fileExists(basePath)) fullPath = basePath; - else continue; - - npcRegionLayers.emplace_back(region, fullPath); - } - } - - // Cloak/cape texture is separate from the body atlas. - // Read equipped cape displayId (slot 10) and resolve the best cape texture path. - uint32_t capeDisplayId = extra.equipDisplayId[10]; - if (capeDisplayId != 0) { - int32_t capeRecIdx = npcItemDisplayDbc->findRecordById(capeDisplayId); - if (capeRecIdx >= 0) { - const uint32_t leftTexField = idiL ? (*idiL)["LeftModelTexture"] : 3u; - const uint32_t rightTexField = leftTexField + 1u; // modelTexture_2 in 3.3.5a - - std::vector capeNames; - auto addName = [&](const std::string& n) { - if (!n.empty() && std::find(capeNames.begin(), capeNames.end(), n) == capeNames.end()) { - capeNames.push_back(n); - } - }; - std::string leftName = npcItemDisplayDbc->getString( - static_cast(capeRecIdx), leftTexField); - std::string rightName = npcItemDisplayDbc->getString( - static_cast(capeRecIdx), rightTexField); - // Female models often prefer modelTexture_2. - if (npcIsFemale) { - addName(rightName); - addName(leftName); - } else { - addName(leftName); - addName(rightName); - } - - auto hasBlpExt = [](const std::string& p) { - if (p.size() < 4) return false; - std::string ext = p.substr(p.size() - 4); - std::transform(ext.begin(), ext.end(), ext.begin(), - [](unsigned char c) { return static_cast(std::tolower(c)); }); - return ext == ".blp"; - }; - - std::vector capeCandidates; - auto addCapeCandidate = [&](const std::string& p) { - if (p.empty()) return; - if (std::find(capeCandidates.begin(), capeCandidates.end(), p) == capeCandidates.end()) { - capeCandidates.push_back(p); - } - }; - - for (const auto& nameRaw : capeNames) { - std::string name = nameRaw; - std::replace(name.begin(), name.end(), '/', '\\'); - bool hasDir = (name.find('\\') != std::string::npos); - bool hasExt = hasBlpExt(name); - if (hasDir) { - addCapeCandidate(name); - if (!hasExt) addCapeCandidate(name + ".blp"); - } else { - std::string base = "Item\\ObjectComponents\\Cape\\" + name; - addCapeCandidate(base); - if (!hasExt) addCapeCandidate(base + ".blp"); - // Some data sets use gender/unisex suffix variants. - addCapeCandidate(base + (npcIsFemale ? "_F.blp" : "_M.blp")); - addCapeCandidate(base + "_U.blp"); - } - } - - for (const auto& candidate : capeCandidates) { - if (assetManager->fileExists(candidate)) { - npcCapeTexturePath = candidate; - break; - } - } - } - } - } - - // Use baked texture for body skin (types 1, 2) - // Type 6 (hair) needs its own texture from CharSections.dbc - const bool allowNpcRegionComposite = true; - rendering::VkTexture* bakedSkinTex = nullptr; - if (!extra.bakeName.empty()) { - std::string bakePath = "Textures\\BakedNpcTextures\\" + extra.bakeName; - rendering::VkTexture* finalTex = charRenderer->loadTexture(bakePath); - bakedSkinTex = finalTex; - if (finalTex && modelData) { - for (size_t ti = 0; ti < modelData->textures.size(); ti++) { - uint32_t texType = modelData->textures[ti].type; - if (texType == 1) { - charRenderer->setModelTexture(modelId, static_cast(ti), finalTex); - hasHumanoidTexture = true; - LOG_DEBUG("NPC baked type1 slot=", ti, " modelId=", modelId, - " tex=", bakePath); - } - } - } - } - // Fallback: if baked texture failed or bakeName was empty, build from CharSections - if (!hasHumanoidTexture) { - LOG_DEBUG(" Trying CharSections fallback for NPC skin"); - - // Build skin texture from CharSections.dbc (same as player character) - auto csFallbackDbc = assetManager->loadDBC("CharSections.dbc"); - if (csFallbackDbc) { - const auto* csFL = pipeline::getActiveDBCLayout() - ? pipeline::getActiveDBCLayout()->getLayout("CharSections") : nullptr; - uint32_t npcRace = static_cast(extra.raceId); - uint32_t npcSex = static_cast(extra.sexId); - uint32_t npcSkin = static_cast(extra.skinId); - uint32_t npcFace = static_cast(extra.faceId); - std::string npcSkinPath, npcFaceLower, npcFaceUpper; - std::vector npcUnderwear; - - for (uint32_t r = 0; r < csFallbackDbc->getRecordCount(); r++) { - uint32_t rId = csFallbackDbc->getUInt32(r, csFL ? (*csFL)["RaceID"] : 1); - uint32_t sId = csFallbackDbc->getUInt32(r, csFL ? (*csFL)["SexID"] : 2); - if (rId != npcRace || sId != npcSex) continue; - - uint32_t section = csFallbackDbc->getUInt32(r, csFL ? (*csFL)["BaseSection"] : 3); - uint32_t variation = csFallbackDbc->getUInt32(r, csFL ? (*csFL)["VariationIndex"] : 8); - uint32_t color = csFallbackDbc->getUInt32(r, csFL ? (*csFL)["ColorIndex"] : 9); - uint32_t tex1F = csFL ? (*csFL)["Texture1"] : 4; - - // Section 0 = skin: match colorIndex = skinId - if (section == 0 && npcSkinPath.empty() && color == npcSkin) { - npcSkinPath = csFallbackDbc->getString(r, tex1F); - } - // Section 1 = face: match variation=faceId, color=skinId - else if (section == 1 && npcFaceLower.empty() && - variation == npcFace && color == npcSkin) { - npcFaceLower = csFallbackDbc->getString(r, tex1F); - npcFaceUpper = csFallbackDbc->getString(r, tex1F + 1); - } - // Section 4 = underwear: match color=skinId - else if (section == 4 && npcUnderwear.empty() && color == npcSkin) { - for (uint32_t f = tex1F; f <= tex1F + 2; f++) { - std::string tex = csFallbackDbc->getString(r, f); - if (!tex.empty()) npcUnderwear.push_back(tex); - } - } - } - - LOG_DEBUG("NPC CharSections lookup: race=", npcRace, " sex=", npcSex, - " skin=", npcSkin, " face=", npcFace, - " skinPath='", npcSkinPath, "' faceLower='", npcFaceLower, "'"); - if (!npcSkinPath.empty()) { - // Composite skin + face + underwear - std::vector skinLayers; - skinLayers.push_back(npcSkinPath); - if (!npcFaceLower.empty()) skinLayers.push_back(npcFaceLower); - if (!npcFaceUpper.empty()) skinLayers.push_back(npcFaceUpper); - for (const auto& uw : npcUnderwear) skinLayers.push_back(uw); - - rendering::VkTexture* npcSkinTex = nullptr; - if (allowNpcRegionComposite && !npcRegionLayers.empty()) { - npcSkinTex = charRenderer->compositeWithRegions(npcSkinPath, - std::vector(skinLayers.begin() + 1, skinLayers.end()), - npcRegionLayers); - } else if (skinLayers.size() > 1) { - npcSkinTex = charRenderer->compositeTextures(skinLayers); - } else { - npcSkinTex = charRenderer->loadTexture(npcSkinPath); - } - - if (npcSkinTex && modelData) { - int slotsSet = 0; - for (size_t ti = 0; ti < modelData->textures.size(); ti++) { - uint32_t texType = modelData->textures[ti].type; - if (texType == 1 || texType == 11 || texType == 12 || texType == 13) { - charRenderer->setModelTexture(modelId, static_cast(ti), npcSkinTex); - hasHumanoidTexture = true; - slotsSet++; - } - } - LOG_DEBUG("NPC CharSections: skin='", npcSkinPath, "' regions=", - npcRegionLayers.size(), " applied=", hasHumanoidTexture, - " slots=", slotsSet, - " modelId=", modelId, " texCount=", modelData->textures.size()); - } - } + // Collect model texture slot info (type 1 = skin, type 6 = hair) + std::vector skinSlots, hairSlots; + if (modelData) { + for (size_t ti = 0; ti < modelData->textures.size(); ti++) { + uint32_t texType = modelData->textures[ti].type; + if (texType == 1 || texType == 11 || texType == 12 || texType == 13) + skinSlots.push_back(static_cast(ti)); + if (texType == 6) + hairSlots.push_back(static_cast(ti)); } } - // Load hair texture from CharSections.dbc (section 3) - auto charSectionsDbc = assetManager->loadDBC("CharSections.dbc"); - if (charSectionsDbc) { - const auto* csL2 = pipeline::getActiveDBCLayout() ? pipeline::getActiveDBCLayout()->getLayout("CharSections") : nullptr; - uint32_t targetRace = static_cast(extra.raceId); - uint32_t targetSex = static_cast(extra.sexId); - std::string hairTexPath; + // Copy extra data for the async task (avoid dangling reference) + HumanoidDisplayExtra extraCopy = extra; - for (uint32_t r = 0; r < charSectionsDbc->getRecordCount(); r++) { - uint32_t raceId = charSectionsDbc->getUInt32(r, csL2 ? (*csL2)["RaceID"] : 1); - uint32_t sexId = charSectionsDbc->getUInt32(r, csL2 ? (*csL2)["SexID"] : 2); - uint32_t section = charSectionsDbc->getUInt32(r, csL2 ? (*csL2)["BaseSection"] : 3); - uint32_t variation = charSectionsDbc->getUInt32(r, csL2 ? (*csL2)["VariationIndex"] : 4); - uint32_t colorIdx = charSectionsDbc->getUInt32(r, csL2 ? (*csL2)["ColorIndex"] : 5); + // Launch async task: ALL DBC lookups, path resolution, and BLP pre-decode + // happen on a background thread. Only GPU texture upload runs on main thread + // (in processAsyncNpcCompositeResults). + auto* am = assetManager.get(); + AsyncNpcCompositeLoad load; + load.future = std::async(std::launch::async, + [am, extraCopy, skinSlots = std::move(skinSlots), + hairSlots = std::move(hairSlots), modelId, displayId]() mutable -> PreparedNpcComposite { + PreparedNpcComposite result; + DeferredNpcComposite& def = result.info; + def.modelId = modelId; + def.displayId = displayId; + def.skinTextureSlots = std::move(skinSlots); + def.hairTextureSlots = std::move(hairSlots); - if (raceId != targetRace || sexId != targetSex) continue; - if (section != 3) continue; // Section 3 = hair - if (variation != static_cast(extra.hairStyleId)) continue; - if (colorIdx != static_cast(extra.hairColorId)) continue; + std::vector allPaths; // paths to pre-decode - hairTexPath = charSectionsDbc->getString(r, csL2 ? (*csL2)["Texture1"] : 6); - break; - } + // --- Baked skin texture --- + if (!extraCopy.bakeName.empty()) { + def.bakedSkinPath = "Textures\\BakedNpcTextures\\" + extraCopy.bakeName; + def.hasBakedSkin = true; + allPaths.push_back(def.bakedSkinPath); + } - if (!hairTexPath.empty()) { - rendering::VkTexture* hairTex = charRenderer->loadTexture(hairTexPath); - rendering::VkTexture* whTex = charRenderer->loadTexture(""); - if (hairTex && hairTex != whTex && modelData) { - for (size_t ti = 0; ti < modelData->textures.size(); ti++) { - if (modelData->textures[ti].type == 6) { - charRenderer->setModelTexture(modelId, static_cast(ti), hairTex); + // --- CharSections fallback (skin/face/underwear) --- + if (!def.hasBakedSkin) { + auto csDbc = am->loadDBC("CharSections.dbc"); + if (csDbc) { + const auto* csL = pipeline::getActiveDBCLayout() + ? pipeline::getActiveDBCLayout()->getLayout("CharSections") : nullptr; + uint32_t npcRace = static_cast(extraCopy.raceId); + uint32_t npcSex = static_cast(extraCopy.sexId); + uint32_t npcSkin = static_cast(extraCopy.skinId); + uint32_t npcFace = static_cast(extraCopy.faceId); + std::string npcFaceLower, npcFaceUpper; + std::vector npcUnderwear; + + for (uint32_t r = 0; r < csDbc->getRecordCount(); r++) { + uint32_t rId = csDbc->getUInt32(r, csL ? (*csL)["RaceID"] : 1); + uint32_t sId = csDbc->getUInt32(r, csL ? (*csL)["SexID"] : 2); + if (rId != npcRace || sId != npcSex) continue; + + uint32_t section = csDbc->getUInt32(r, csL ? (*csL)["BaseSection"] : 3); + uint32_t variation = csDbc->getUInt32(r, csL ? (*csL)["VariationIndex"] : 4); + uint32_t color = csDbc->getUInt32(r, csL ? (*csL)["ColorIndex"] : 5); + uint32_t tex1F = csL ? (*csL)["Texture1"] : 6; + + if (section == 0 && def.basePath.empty() && color == npcSkin) { + def.basePath = csDbc->getString(r, tex1F); + } else if (section == 1 && npcFaceLower.empty() && + variation == npcFace && color == npcSkin) { + npcFaceLower = csDbc->getString(r, tex1F); + npcFaceUpper = csDbc->getString(r, tex1F + 1); + } else if (section == 4 && npcUnderwear.empty() && color == npcSkin) { + for (uint32_t f = tex1F; f <= tex1F + 2; f++) { + std::string tex = csDbc->getString(r, f); + if (!tex.empty()) npcUnderwear.push_back(tex); + } + } + } + + if (!def.basePath.empty()) { + allPaths.push_back(def.basePath); + if (!npcFaceLower.empty()) { def.overlayPaths.push_back(npcFaceLower); allPaths.push_back(npcFaceLower); } + if (!npcFaceUpper.empty()) { def.overlayPaths.push_back(npcFaceUpper); allPaths.push_back(npcFaceUpper); } + for (const auto& uw : npcUnderwear) { def.overlayPaths.push_back(uw); allPaths.push_back(uw); } } } } - } - // Bald NPCs (hairStyle=0 or no CharSections match): set type-6 to - // the skin/baked texture so the scalp cap renders with skin color. - if (hairTexPath.empty() && bakedSkinTex && modelData) { - for (size_t ti = 0; ti < modelData->textures.size(); ti++) { - if (modelData->textures[ti].type == 6) { - charRenderer->setModelTexture(modelId, static_cast(ti), bakedSkinTex); + + // --- Equipment region layers (ItemDisplayInfo DBC) --- + auto idiDbc = am->loadDBC("ItemDisplayInfo.dbc"); + if (idiDbc) { + static const char* componentDirs[] = { + "ArmUpperTexture", "ArmLowerTexture", "HandTexture", + "TorsoUpperTexture", "TorsoLowerTexture", + "LegUpperTexture", "LegLowerTexture", "FootTexture", + }; + const auto* idiL = pipeline::getActiveDBCLayout() + ? pipeline::getActiveDBCLayout()->getLayout("ItemDisplayInfo") : nullptr; + const uint32_t texRegionFields[8] = { + idiL ? (*idiL)["TextureArmUpper"] : 14u, + idiL ? (*idiL)["TextureArmLower"] : 15u, + idiL ? (*idiL)["TextureHand"] : 16u, + idiL ? (*idiL)["TextureTorsoUpper"]: 17u, + idiL ? (*idiL)["TextureTorsoLower"]: 18u, + idiL ? (*idiL)["TextureLegUpper"] : 19u, + idiL ? (*idiL)["TextureLegLower"] : 20u, + idiL ? (*idiL)["TextureFoot"] : 21u, + }; + const bool npcIsFemale = (extraCopy.sexId == 1); + const bool npcHasArmArmor = (extraCopy.equipDisplayId[7] != 0 || extraCopy.equipDisplayId[8] != 0); + + auto regionAllowedForNpcSlot = [](int eqSlot, int region) -> bool { + switch (eqSlot) { + case 2: case 3: return region <= 4; + case 4: return false; + case 5: return region == 5 || region == 6; + case 6: return region == 7; + case 7: return false; + case 8: return region == 2; + case 9: return region == 3 || region == 4; + default: return false; + } + }; + + for (int eqSlot = 0; eqSlot < 11; eqSlot++) { + uint32_t did = extraCopy.equipDisplayId[eqSlot]; + if (did == 0) continue; + int32_t recIdx = idiDbc->findRecordById(did); + if (recIdx < 0) continue; + + for (int region = 0; region < 8; region++) { + if (!regionAllowedForNpcSlot(eqSlot, region)) continue; + if (eqSlot == 2 && !npcHasArmArmor && !(region == 3 || region == 4)) continue; + std::string texName = idiDbc->getString( + static_cast(recIdx), texRegionFields[region]); + if (texName.empty()) continue; + + std::string base = "Item\\TextureComponents\\" + + std::string(componentDirs[region]) + "\\" + texName; + std::string genderPath = base + (npcIsFemale ? "_F.blp" : "_M.blp"); + std::string unisexPath = base + "_U.blp"; + std::string basePath = base + ".blp"; + std::string fullPath; + if (am->fileExists(genderPath)) fullPath = genderPath; + else if (am->fileExists(unisexPath)) fullPath = unisexPath; + else if (am->fileExists(basePath)) fullPath = basePath; + else continue; + + def.regionLayers.emplace_back(region, fullPath); + allPaths.push_back(fullPath); + } } } - } - } - // Do not apply cape textures at model scope here. Type-2 texture slots are - // shared per model and this can leak cape textures/white fallbacks onto - // unrelated humanoid NPCs that use the same modelId. + // Determine compositing mode + if (!def.basePath.empty()) { + bool needsComposite = !def.overlayPaths.empty() || !def.regionLayers.empty(); + if (needsComposite && !def.skinTextureSlots.empty()) { + def.hasComposite = true; + } else if (!def.skinTextureSlots.empty()) { + def.hasSimpleSkin = true; + } + } + + // --- Hair texture from CharSections (section 3) --- + { + auto csDbc = am->loadDBC("CharSections.dbc"); + if (csDbc) { + const auto* csL = pipeline::getActiveDBCLayout() + ? pipeline::getActiveDBCLayout()->getLayout("CharSections") : nullptr; + uint32_t targetRace = static_cast(extraCopy.raceId); + uint32_t targetSex = static_cast(extraCopy.sexId); + + for (uint32_t r = 0; r < csDbc->getRecordCount(); r++) { + uint32_t raceId = csDbc->getUInt32(r, csL ? (*csL)["RaceID"] : 1); + uint32_t sexId = csDbc->getUInt32(r, csL ? (*csL)["SexID"] : 2); + if (raceId != targetRace || sexId != targetSex) continue; + uint32_t section = csDbc->getUInt32(r, csL ? (*csL)["BaseSection"] : 3); + if (section != 3) continue; + uint32_t variation = csDbc->getUInt32(r, csL ? (*csL)["VariationIndex"] : 4); + uint32_t colorIdx = csDbc->getUInt32(r, csL ? (*csL)["ColorIndex"] : 5); + if (variation != static_cast(extraCopy.hairStyleId)) continue; + if (colorIdx != static_cast(extraCopy.hairColorId)) continue; + def.hairTexturePath = csDbc->getString(r, csL ? (*csL)["Texture1"] : 6); + break; + } + + if (!def.hairTexturePath.empty()) { + allPaths.push_back(def.hairTexturePath); + } else if (def.hasBakedSkin && !def.hairTextureSlots.empty()) { + def.useBakedForHair = true; + // bakedSkinPath already in allPaths + } + } + } + + // --- Pre-decode all BLP textures on this background thread --- + for (const auto& path : allPaths) { + std::string key = path; + std::replace(key.begin(), key.end(), '/', '\\'); + std::transform(key.begin(), key.end(), key.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (result.predecodedTextures.count(key)) continue; + auto blp = am->loadTexture(key); + if (blp.isValid()) { + result.predecodedTextures[key] = std::move(blp); + } + } + + return result; + }); + asyncNpcCompositeLoads_.push_back(std::move(load)); + hasHumanoidTexture = true; // skip non-humanoid skin block } else { LOG_WARNING(" extraDisplayId ", dispData.extraDisplayId, " not found in humanoidExtraMap"); } @@ -5021,6 +5147,18 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x } } } + + // Clear pre-decoded cache after applying all display textures + charRenderer->setPredecodedBLPCache(nullptr); + displayIdPredecodedTextures_.erase(displayId); + { + auto texEnd = std::chrono::steady_clock::now(); + float texMs = std::chrono::duration(texEnd - texStart).count(); + if (texMs > 3.0f) { + LOG_WARNING("spawnCreature texture setup took ", texMs, "ms displayId=", displayId, + " hasPreDec=", hasPreDec, " extra=", dispData.extraDisplayId); + } + } } // Use the entity's latest server-authoritative position rather than the stale spawn @@ -5058,7 +5196,9 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x // Per-instance hair/skin texture overrides — runs for ALL NPCs (including cached models) // so that each NPC gets its own hair/skin color regardless of model sharing. + // Uses pre-built CharSections cache (O(1) lookup instead of O(N) DBC scan). { + if (!charSectionsCacheBuilt_) buildCharSectionsCache(); auto itDD = displayDataMap_.find(displayId); if (itDD != displayDataMap_.end() && itDD->second.extraDisplayId != 0) { auto itExtra2 = humanoidExtraMap_.find(itDD->second.extraDisplayId); @@ -5066,37 +5206,19 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x const auto& extra = itExtra2->second; const auto* md = charRenderer->getModelData(modelId); if (md) { - auto charSectionsDbc2 = assetManager->loadDBC("CharSections.dbc"); - if (charSectionsDbc2) { - const auto* csL = pipeline::getActiveDBCLayout() - ? pipeline::getActiveDBCLayout()->getLayout("CharSections") : nullptr; - uint32_t tgtRace = static_cast(extra.raceId); - uint32_t tgtSex = static_cast(extra.sexId); - - // Look up hair texture (section 3) + // Look up hair texture (section 3) via cache rendering::VkTexture* whiteTex = charRenderer->loadTexture(""); - for (uint32_t r = 0; r < charSectionsDbc2->getRecordCount(); r++) { - uint32_t rId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["RaceID"] : 1); - uint32_t sId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["SexID"] : 2); - if (rId != tgtRace || sId != tgtSex) continue; - uint32_t sec = charSectionsDbc2->getUInt32(r, csL ? (*csL)["BaseSection"] : 3); - if (sec != 3) continue; - uint32_t var = charSectionsDbc2->getUInt32(r, csL ? (*csL)["VariationIndex"] : 4); - uint32_t col = charSectionsDbc2->getUInt32(r, csL ? (*csL)["ColorIndex"] : 5); - if (var != static_cast(extra.hairStyleId)) continue; - if (col != static_cast(extra.hairColorId)) continue; - std::string hairPath = charSectionsDbc2->getString(r, csL ? (*csL)["Texture1"] : 6); - if (!hairPath.empty()) { - rendering::VkTexture* hairTex = charRenderer->loadTexture(hairPath); - if (hairTex && hairTex != whiteTex) { - for (size_t ti = 0; ti < md->textures.size(); ti++) { - if (md->textures[ti].type == 6) { - charRenderer->setTextureSlotOverride(instanceId, static_cast(ti), hairTex); - } + std::string hairPath = lookupCharSection( + extra.raceId, extra.sexId, 3, extra.hairStyleId, extra.hairColorId, 0); + if (!hairPath.empty()) { + rendering::VkTexture* hairTex = charRenderer->loadTexture(hairPath); + if (hairTex && hairTex != whiteTex) { + for (size_t ti = 0; ti < md->textures.size(); ti++) { + if (md->textures[ti].type == 6) { + charRenderer->setTextureSlotOverride(instanceId, static_cast(ti), hairTex); } } } - break; } // Look up skin texture (section 0) for per-instance skin color. @@ -5108,30 +5230,20 @@ void Application::spawnOnlineCreature(uint64_t guid, uint32_t displayId, float x if (extra.equipDisplayId[s] != 0) hasEquipOrBake = true; } if (!hasEquipOrBake) { - for (uint32_t r = 0; r < charSectionsDbc2->getRecordCount(); r++) { - uint32_t rId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["RaceID"] : 1); - uint32_t sId = charSectionsDbc2->getUInt32(r, csL ? (*csL)["SexID"] : 2); - if (rId != tgtRace || sId != tgtSex) continue; - uint32_t sec = charSectionsDbc2->getUInt32(r, csL ? (*csL)["BaseSection"] : 3); - if (sec != 0) continue; - uint32_t col = charSectionsDbc2->getUInt32(r, csL ? (*csL)["ColorIndex"] : 5); - if (col != static_cast(extra.skinId)) continue; - std::string skinPath = charSectionsDbc2->getString(r, csL ? (*csL)["Texture1"] : 6); - if (!skinPath.empty()) { - rendering::VkTexture* skinTex = charRenderer->loadTexture(skinPath); - if (skinTex) { - for (size_t ti = 0; ti < md->textures.size(); ti++) { - uint32_t tt = md->textures[ti].type; - if (tt == 1 || tt == 11) { - charRenderer->setTextureSlotOverride(instanceId, static_cast(ti), skinTex); - } + std::string skinPath = lookupCharSection( + extra.raceId, extra.sexId, 0, 0, extra.skinId, 0); + if (!skinPath.empty()) { + rendering::VkTexture* skinTex = charRenderer->loadTexture(skinPath); + if (skinTex) { + for (size_t ti = 0; ti < md->textures.size(); ti++) { + uint32_t tt = md->textures[ti].type; + if (tt == 1 || tt == 11) { + charRenderer->setTextureSlotOverride(instanceId, static_cast(ti), skinTex); } } } - break; } } - } } } } @@ -6692,7 +6804,184 @@ void Application::spawnOnlineGameObject(uint64_t guid, uint32_t entry, uint32_t " displayId=", displayId, " at (", x, ", ", y, ", ", z, ")"); } +void Application::processAsyncCreatureResults() { + // Check completed async model loads and finalize on main thread (GPU upload + instance creation). + // Limit GPU model uploads per frame to avoid spikes, but always drain cheap bookkeeping. + static constexpr int kMaxModelUploadsPerFrame = 1; + int modelUploads = 0; + + for (auto it = asyncCreatureLoads_.begin(); it != asyncCreatureLoads_.end(); ) { + if (!it->future.valid() || + it->future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { + ++it; + continue; + } + + // Peek: if this result needs a NEW model upload (not cached) and we've hit + // the upload budget, defer to next frame without consuming the future. + if (modelUploads >= kMaxModelUploadsPerFrame) { + // Check if this displayId already has a cached model (cheap spawn, no GPU upload). + // We can't peek the displayId without getting the future, so just break. + break; + } + + auto result = it->future.get(); + it = asyncCreatureLoads_.erase(it); + + if (result.permanent_failure) { + nonRenderableCreatureDisplayIds_.insert(result.displayId); + creaturePermanentFailureGuids_.insert(result.guid); + pendingCreatureSpawnGuids_.erase(result.guid); + creatureSpawnRetryCounts_.erase(result.guid); + continue; + } + if (!result.valid || !result.model) { + pendingCreatureSpawnGuids_.erase(result.guid); + creatureSpawnRetryCounts_.erase(result.guid); + continue; + } + + // Model parsed on background thread — upload to GPU on main thread. + auto* charRenderer = renderer ? renderer->getCharacterRenderer() : nullptr; + if (!charRenderer) { + pendingCreatureSpawnGuids_.erase(result.guid); + continue; + } + + // Upload model to GPU (must happen on main thread) + // Use pre-decoded BLP cache to skip main-thread texture decode + auto uploadStart = std::chrono::steady_clock::now(); + charRenderer->setPredecodedBLPCache(&result.predecodedTextures); + if (!charRenderer->loadModel(*result.model, result.modelId)) { + charRenderer->setPredecodedBLPCache(nullptr); + nonRenderableCreatureDisplayIds_.insert(result.displayId); + creaturePermanentFailureGuids_.insert(result.guid); + pendingCreatureSpawnGuids_.erase(result.guid); + creatureSpawnRetryCounts_.erase(result.guid); + continue; + } + charRenderer->setPredecodedBLPCache(nullptr); + { + auto uploadEnd = std::chrono::steady_clock::now(); + float uploadMs = std::chrono::duration(uploadEnd - uploadStart).count(); + if (uploadMs > 3.0f) { + LOG_WARNING("charRenderer->loadModel took ", uploadMs, "ms displayId=", result.displayId, + " preDecoded=", result.predecodedTextures.size()); + } + } + // Save remaining pre-decoded textures (display skins) for spawnOnlineCreature + if (!result.predecodedTextures.empty()) { + displayIdPredecodedTextures_[result.displayId] = std::move(result.predecodedTextures); + } + displayIdModelCache_[result.displayId] = result.modelId; + modelUploads++; + + pendingCreatureSpawnGuids_.erase(result.guid); + creatureSpawnRetryCounts_.erase(result.guid); + + // Re-queue as a normal pending spawn — model is now cached, so sync spawn is fast + // (only creates instance + applies textures, no file I/O). + if (!creatureInstances_.count(result.guid) && + !creaturePermanentFailureGuids_.count(result.guid)) { + PendingCreatureSpawn s{}; + s.guid = result.guid; + s.displayId = result.displayId; + s.x = result.x; + s.y = result.y; + s.z = result.z; + s.orientation = result.orientation; + pendingCreatureSpawns_.push_back(s); + pendingCreatureSpawnGuids_.insert(result.guid); + } + } +} + +void Application::processAsyncNpcCompositeResults() { + auto* charRenderer = renderer ? renderer->getCharacterRenderer() : nullptr; + if (!charRenderer) return; + + for (auto it = asyncNpcCompositeLoads_.begin(); it != asyncNpcCompositeLoads_.end(); ) { + if (!it->future.valid() || + it->future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { + ++it; + continue; + } + auto result = it->future.get(); + it = asyncNpcCompositeLoads_.erase(it); + + const auto& info = result.info; + + // Set pre-decoded cache so texture loads skip synchronous BLP decode + charRenderer->setPredecodedBLPCache(&result.predecodedTextures); + + // --- Apply skin to type-1 slots --- + rendering::VkTexture* skinTex = nullptr; + + if (info.hasBakedSkin) { + // Baked skin: load from pre-decoded cache + skinTex = charRenderer->loadTexture(info.bakedSkinPath); + } + + if (info.hasComposite) { + // Composite with face/underwear/equipment regions on top of base skin + rendering::VkTexture* compositeTex = nullptr; + if (!info.regionLayers.empty()) { + compositeTex = charRenderer->compositeWithRegions(info.basePath, + info.overlayPaths, info.regionLayers); + } else if (!info.overlayPaths.empty()) { + std::vector skinLayers; + skinLayers.push_back(info.basePath); + for (const auto& op : info.overlayPaths) skinLayers.push_back(op); + compositeTex = charRenderer->compositeTextures(skinLayers); + } + if (compositeTex) skinTex = compositeTex; + } else if (info.hasSimpleSkin) { + // Simple skin: just base texture, no compositing + auto* baseTex = charRenderer->loadTexture(info.basePath); + if (baseTex) skinTex = baseTex; + } + + if (skinTex) { + for (uint32_t slot : info.skinTextureSlots) { + charRenderer->setModelTexture(info.modelId, slot, skinTex); + } + } + + // --- Apply hair texture to type-6 slots --- + if (!info.hairTexturePath.empty()) { + rendering::VkTexture* hairTex = charRenderer->loadTexture(info.hairTexturePath); + rendering::VkTexture* whTex = charRenderer->loadTexture(""); + if (hairTex && hairTex != whTex) { + for (uint32_t slot : info.hairTextureSlots) { + charRenderer->setModelTexture(info.modelId, slot, hairTex); + } + } + } else if (info.useBakedForHair && skinTex) { + // Bald NPC: use skin/baked texture for scalp cap + for (uint32_t slot : info.hairTextureSlots) { + charRenderer->setModelTexture(info.modelId, slot, skinTex); + } + } + + charRenderer->setPredecodedBLPCache(nullptr); + } +} + void Application::processCreatureSpawnQueue() { + auto startTime = std::chrono::steady_clock::now(); + // Budget: max 2ms per frame for creature spawning to prevent stutter. + static constexpr float kSpawnBudgetMs = 2.0f; + + // First, finalize any async model loads that completed on background threads. + processAsyncCreatureResults(); + { + auto now = std::chrono::steady_clock::now(); + float asyncMs = std::chrono::duration(now - startTime).count(); + if (asyncMs > 3.0f) { + LOG_WARNING("processAsyncCreatureResults took ", asyncMs, "ms"); + } + } + if (pendingCreatureSpawns_.empty()) return; if (!creatureLookupsBuilt_) { buildCreatureDisplayLookups(); @@ -6700,13 +6989,21 @@ void Application::processCreatureSpawnQueue() { } int processed = 0; - int newModelLoads = 0; + int asyncLaunched = 0; size_t rotationsLeft = pendingCreatureSpawns_.size(); while (!pendingCreatureSpawns_.empty() && processed < MAX_SPAWNS_PER_FRAME && rotationsLeft > 0) { + // Check time budget every iteration (including first — async results may + // have already consumed the budget via GPU model uploads). + { + auto now = std::chrono::steady_clock::now(); + float elapsedMs = std::chrono::duration(now - startTime).count(); + if (elapsedMs >= kSpawnBudgetMs) break; + } + PendingCreatureSpawn s = pendingCreatureSpawns_.front(); - pendingCreatureSpawns_.erase(pendingCreatureSpawns_.begin()); + pendingCreatureSpawns_.pop_front(); if (nonRenderableCreatureDisplayIds_.count(s.displayId)) { pendingCreatureSpawnGuids_.erase(s.guid); @@ -6717,15 +7014,269 @@ void Application::processCreatureSpawnQueue() { } const bool needsNewModel = (displayIdModelCache_.find(s.displayId) == displayIdModelCache_.end()); - if (needsNewModel && newModelLoads >= MAX_NEW_CREATURE_MODELS_PER_FRAME) { - // Defer additional first-time model/texture loads to later frames so - // movement stays responsive in dense areas. - pendingCreatureSpawns_.push_back(s); - rotationsLeft--; + + // For new models: launch async load on background thread instead of blocking. + if (needsNewModel) { + if (static_cast(asyncCreatureLoads_.size()) + asyncLaunched >= MAX_ASYNC_CREATURE_LOADS) { + // Too many in-flight — defer to next frame + pendingCreatureSpawns_.push_back(s); + rotationsLeft--; + continue; + } + + std::string m2Path = getModelPathForDisplayId(s.displayId); + if (m2Path.empty()) { + nonRenderableCreatureDisplayIds_.insert(s.displayId); + creaturePermanentFailureGuids_.insert(s.guid); + pendingCreatureSpawnGuids_.erase(s.guid); + creatureSpawnRetryCounts_.erase(s.guid); + processed++; + rotationsLeft = pendingCreatureSpawns_.size(); + continue; + } + + // Check for invisible stalkers + { + std::string lowerPath = m2Path; + std::transform(lowerPath.begin(), lowerPath.end(), lowerPath.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (lowerPath.find("invisiblestalker") != std::string::npos || + lowerPath.find("invisible_stalker") != std::string::npos) { + nonRenderableCreatureDisplayIds_.insert(s.displayId); + creaturePermanentFailureGuids_.insert(s.guid); + pendingCreatureSpawnGuids_.erase(s.guid); + processed++; + rotationsLeft = pendingCreatureSpawns_.size(); + continue; + } + } + + // Launch async M2 load — file I/O and parsing happen off the main thread. + uint32_t modelId = nextCreatureModelId_++; + auto* am = assetManager.get(); + + // Collect display skin texture paths for background pre-decode + std::vector displaySkinPaths; + { + auto itDD = displayDataMap_.find(s.displayId); + if (itDD != displayDataMap_.end()) { + std::string modelDir; + size_t lastSlash = m2Path.find_last_of("\\/"); + if (lastSlash != std::string::npos) modelDir = m2Path.substr(0, lastSlash + 1); + + auto resolveForAsync = [&](const std::string& skinField) { + if (skinField.empty()) return; + std::string raw = skinField; + std::replace(raw.begin(), raw.end(), '/', '\\'); + while (!raw.empty() && std::isspace(static_cast(raw.front()))) raw.erase(raw.begin()); + while (!raw.empty() && std::isspace(static_cast(raw.back()))) raw.pop_back(); + if (raw.empty()) return; + bool hasExt = raw.size() >= 4 && raw.substr(raw.size()-4) == ".blp"; + bool hasDir = raw.find('\\') != std::string::npos; + std::vector candidates; + if (hasDir) { + candidates.push_back(raw); + if (!hasExt) candidates.push_back(raw + ".blp"); + } else { + candidates.push_back(modelDir + raw); + if (!hasExt) candidates.push_back(modelDir + raw + ".blp"); + candidates.push_back(raw); + if (!hasExt) candidates.push_back(raw + ".blp"); + } + for (const auto& c : candidates) { + if (am->fileExists(c)) { displaySkinPaths.push_back(c); return; } + } + }; + resolveForAsync(itDD->second.skin1); + resolveForAsync(itDD->second.skin2); + resolveForAsync(itDD->second.skin3); + + // Pre-decode humanoid NPC textures (bake, skin, face, underwear, hair, equipment) + if (itDD->second.extraDisplayId != 0) { + auto itHE = humanoidExtraMap_.find(itDD->second.extraDisplayId); + if (itHE != humanoidExtraMap_.end()) { + const auto& he = itHE->second; + // Baked texture + if (!he.bakeName.empty()) { + displaySkinPaths.push_back("Textures\\BakedNpcTextures\\" + he.bakeName); + } + // CharSections: skin, face, underwear + auto csDbc = am->loadDBC("CharSections.dbc"); + if (csDbc) { + const auto* csL = pipeline::getActiveDBCLayout() + ? pipeline::getActiveDBCLayout()->getLayout("CharSections") : nullptr; + uint32_t nRace = static_cast(he.raceId); + uint32_t nSex = static_cast(he.sexId); + uint32_t nSkin = static_cast(he.skinId); + uint32_t nFace = static_cast(he.faceId); + for (uint32_t r = 0; r < csDbc->getRecordCount(); r++) { + uint32_t rId = csDbc->getUInt32(r, csL ? (*csL)["RaceID"] : 1); + uint32_t sId = csDbc->getUInt32(r, csL ? (*csL)["SexID"] : 2); + if (rId != nRace || sId != nSex) continue; + uint32_t section = csDbc->getUInt32(r, csL ? (*csL)["BaseSection"] : 3); + uint32_t variation = csDbc->getUInt32(r, csL ? (*csL)["VariationIndex"] : 4); + uint32_t color = csDbc->getUInt32(r, csL ? (*csL)["ColorIndex"] : 5); + uint32_t tex1F = csL ? (*csL)["Texture1"] : 6; + if (section == 0 && color == nSkin) { + std::string t = csDbc->getString(r, tex1F); + if (!t.empty()) displaySkinPaths.push_back(t); + } else if (section == 1 && variation == nFace && color == nSkin) { + std::string t1 = csDbc->getString(r, tex1F); + std::string t2 = csDbc->getString(r, tex1F + 1); + if (!t1.empty()) displaySkinPaths.push_back(t1); + if (!t2.empty()) displaySkinPaths.push_back(t2); + } else if (section == 3 && variation == static_cast(he.hairStyleId) + && color == static_cast(he.hairColorId)) { + std::string t = csDbc->getString(r, tex1F); + if (!t.empty()) displaySkinPaths.push_back(t); + } else if (section == 4 && color == nSkin) { + for (uint32_t f = tex1F; f <= tex1F + 2; f++) { + std::string t = csDbc->getString(r, f); + if (!t.empty()) displaySkinPaths.push_back(t); + } + } + } + } + // Equipment region textures + auto idiDbc = am->loadDBC("ItemDisplayInfo.dbc"); + if (idiDbc) { + static const char* compDirs[] = { + "ArmUpperTexture", "ArmLowerTexture", "HandTexture", + "TorsoUpperTexture", "TorsoLowerTexture", + "LegUpperTexture", "LegLowerTexture", "FootTexture", + }; + const auto* idiL = pipeline::getActiveDBCLayout() + ? pipeline::getActiveDBCLayout()->getLayout("ItemDisplayInfo") : nullptr; + const uint32_t trf[8] = { + idiL ? (*idiL)["TextureArmUpper"] : 14u, + idiL ? (*idiL)["TextureArmLower"] : 15u, + idiL ? (*idiL)["TextureHand"] : 16u, + idiL ? (*idiL)["TextureTorsoUpper"]: 17u, + idiL ? (*idiL)["TextureTorsoLower"]: 18u, + idiL ? (*idiL)["TextureLegUpper"] : 19u, + idiL ? (*idiL)["TextureLegLower"] : 20u, + idiL ? (*idiL)["TextureFoot"] : 21u, + }; + const bool isFem = (he.sexId == 1); + for (int eq = 0; eq < 11; eq++) { + uint32_t did = he.equipDisplayId[eq]; + if (did == 0) continue; + int32_t recIdx = idiDbc->findRecordById(did); + if (recIdx < 0) continue; + for (int region = 0; region < 8; region++) { + std::string texName = idiDbc->getString(static_cast(recIdx), trf[region]); + if (texName.empty()) continue; + std::string base = "Item\\TextureComponents\\" + + std::string(compDirs[region]) + "\\" + texName; + std::string gp = base + (isFem ? "_F.blp" : "_M.blp"); + std::string up = base + "_U.blp"; + if (am->fileExists(gp)) displaySkinPaths.push_back(gp); + else if (am->fileExists(up)) displaySkinPaths.push_back(up); + else displaySkinPaths.push_back(base + ".blp"); + } + } + } + } + } + } + } + + AsyncCreatureLoad load; + load.future = std::async(std::launch::async, + [am, m2Path, modelId, s, skinPaths = std::move(displaySkinPaths)]() -> PreparedCreatureModel { + PreparedCreatureModel result; + result.guid = s.guid; + result.displayId = s.displayId; + result.modelId = modelId; + result.x = s.x; + result.y = s.y; + result.z = s.z; + result.orientation = s.orientation; + + auto m2Data = am->readFile(m2Path); + if (m2Data.empty()) { + result.permanent_failure = true; + return result; + } + + auto model = std::make_shared(pipeline::M2Loader::load(m2Data)); + if (model->vertices.empty()) { + result.permanent_failure = true; + return result; + } + + // Load skin file + if (model->version >= 264) { + std::string skinPath = m2Path.substr(0, m2Path.size() - 3) + "00.skin"; + auto skinData = am->readFile(skinPath); + if (!skinData.empty()) { + pipeline::M2Loader::loadSkin(skinData, *model); + } + } + + // Load external .anim files + std::string basePath = m2Path.substr(0, m2Path.size() - 3); + for (uint32_t si = 0; si < model->sequences.size(); si++) { + if (!(model->sequences[si].flags & 0x20)) { + char animFileName[256]; + snprintf(animFileName, sizeof(animFileName), "%s%04u-%02u.anim", + basePath.c_str(), model->sequences[si].id, model->sequences[si].variationIndex); + auto animData = am->readFileOptional(animFileName); + if (!animData.empty()) { + pipeline::M2Loader::loadAnimFile(m2Data, animData, si, *model); + } + } + } + + // Pre-decode model textures on background thread + for (const auto& tex : model->textures) { + if (tex.filename.empty()) continue; + std::string texKey = tex.filename; + std::replace(texKey.begin(), texKey.end(), '/', '\\'); + std::transform(texKey.begin(), texKey.end(), texKey.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (result.predecodedTextures.find(texKey) != result.predecodedTextures.end()) continue; + auto blp = am->loadTexture(texKey); + if (blp.isValid()) { + result.predecodedTextures[texKey] = std::move(blp); + } + } + + // Pre-decode display skin textures (skin1/skin2/skin3 from CreatureDisplayInfo) + for (const auto& sp : skinPaths) { + std::string key = sp; + std::replace(key.begin(), key.end(), '/', '\\'); + std::transform(key.begin(), key.end(), key.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (result.predecodedTextures.count(key)) continue; + auto blp = am->loadTexture(key); + if (blp.isValid()) { + result.predecodedTextures[key] = std::move(blp); + } + } + + result.model = std::move(model); + result.valid = true; + return result; + }); + asyncCreatureLoads_.push_back(std::move(load)); + asyncLaunched++; + // Don't erase from pendingCreatureSpawnGuids_ — the async result handler will do it + rotationsLeft = pendingCreatureSpawns_.size(); + processed++; continue; } - spawnOnlineCreature(s.guid, s.displayId, s.x, s.y, s.z, s.orientation); + // Cached model — spawn is fast (no file I/O, just instance creation + texture setup) + { + auto spawnStart = std::chrono::steady_clock::now(); + spawnOnlineCreature(s.guid, s.displayId, s.x, s.y, s.z, s.orientation); + auto spawnEnd = std::chrono::steady_clock::now(); + float spawnMs = std::chrono::duration(spawnEnd - spawnStart).count(); + if (spawnMs > 3.0f) { + LOG_WARNING("spawnOnlineCreature took ", spawnMs, "ms displayId=", s.displayId); + } + } pendingCreatureSpawnGuids_.erase(s.guid); // If spawn still failed, retry for a limited number of frames. @@ -6752,9 +7303,6 @@ void Application::processCreatureSpawnQueue() { } else { creatureSpawnRetryCounts_.erase(s.guid); } - if (needsNewModel) { - newModelLoads++; - } rotationsLeft = pendingCreatureSpawns_.size(); processed++; } @@ -6787,23 +7335,327 @@ void Application::processPlayerSpawnQueue() { } } +std::vector Application::resolveEquipmentTexturePaths(uint64_t guid, + const std::array& displayInfoIds, + const std::array& /*inventoryTypes*/) const { + std::vector paths; + + auto it = onlinePlayerAppearance_.find(guid); + if (it == onlinePlayerAppearance_.end()) return paths; + const OnlinePlayerAppearanceState& st = it->second; + + // Add base skin + underwear paths + if (!st.bodySkinPath.empty()) paths.push_back(st.bodySkinPath); + for (const auto& up : st.underwearPaths) { + if (!up.empty()) paths.push_back(up); + } + + // Resolve equipment region texture paths (same logic as setOnlinePlayerEquipment) + auto displayInfoDbc = assetManager->loadDBC("ItemDisplayInfo.dbc"); + if (!displayInfoDbc) return paths; + const auto* idiL = pipeline::getActiveDBCLayout() + ? pipeline::getActiveDBCLayout()->getLayout("ItemDisplayInfo") : nullptr; + + static const char* componentDirs[] = { + "ArmUpperTexture", "ArmLowerTexture", "HandTexture", + "TorsoUpperTexture", "TorsoLowerTexture", + "LegUpperTexture", "LegLowerTexture", "FootTexture", + }; + const uint32_t texRegionFields[8] = { + idiL ? (*idiL)["TextureArmUpper"] : 14u, + idiL ? (*idiL)["TextureArmLower"] : 15u, + idiL ? (*idiL)["TextureHand"] : 16u, + idiL ? (*idiL)["TextureTorsoUpper"]: 17u, + idiL ? (*idiL)["TextureTorsoLower"]: 18u, + idiL ? (*idiL)["TextureLegUpper"] : 19u, + idiL ? (*idiL)["TextureLegLower"] : 20u, + idiL ? (*idiL)["TextureFoot"] : 21u, + }; + const bool isFemale = (st.genderId == 1); + + for (int s = 0; s < 19; s++) { + uint32_t did = displayInfoIds[s]; + if (did == 0) continue; + int32_t recIdx = displayInfoDbc->findRecordById(did); + if (recIdx < 0) continue; + for (int region = 0; region < 8; region++) { + std::string texName = displayInfoDbc->getString( + static_cast(recIdx), texRegionFields[region]); + if (texName.empty()) continue; + std::string base = "Item\\TextureComponents\\" + + std::string(componentDirs[region]) + "\\" + texName; + std::string genderPath = base + (isFemale ? "_F.blp" : "_M.blp"); + std::string unisexPath = base + "_U.blp"; + if (assetManager->fileExists(genderPath)) paths.push_back(genderPath); + else if (assetManager->fileExists(unisexPath)) paths.push_back(unisexPath); + else paths.push_back(base + ".blp"); + } + } + return paths; +} + +void Application::processAsyncEquipmentResults() { + for (auto it = asyncEquipmentLoads_.begin(); it != asyncEquipmentLoads_.end(); ) { + if (!it->future.valid() || + it->future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { + ++it; + continue; + } + auto result = it->future.get(); + it = asyncEquipmentLoads_.erase(it); + + auto* charRenderer = renderer ? renderer->getCharacterRenderer() : nullptr; + if (!charRenderer) continue; + + // Set pre-decoded cache so compositeWithRegions skips synchronous BLP decode + charRenderer->setPredecodedBLPCache(&result.predecodedTextures); + setOnlinePlayerEquipment(result.guid, result.displayInfoIds, result.inventoryTypes); + charRenderer->setPredecodedBLPCache(nullptr); + } +} + void Application::processDeferredEquipmentQueue() { + // First, finalize any completed async pre-decodes + processAsyncEquipmentResults(); + if (deferredEquipmentQueue_.empty()) return; - // Process at most 1 per frame — compositeWithRegions is expensive + // Limit in-flight async equipment loads + if (asyncEquipmentLoads_.size() >= 2) return; + auto [guid, equipData] = deferredEquipmentQueue_.front(); deferredEquipmentQueue_.erase(deferredEquipmentQueue_.begin()); - setOnlinePlayerEquipment(guid, equipData.first, equipData.second); + + // Resolve all texture paths that compositeWithRegions will need + auto texturePaths = resolveEquipmentTexturePaths(guid, equipData.first, equipData.second); + + if (texturePaths.empty()) { + // No textures to pre-decode — just apply directly (fast path) + setOnlinePlayerEquipment(guid, equipData.first, equipData.second); + return; + } + + // Launch background BLP pre-decode + auto* am = assetManager.get(); + auto displayInfoIds = equipData.first; + auto inventoryTypes = equipData.second; + AsyncEquipmentLoad load; + load.future = std::async(std::launch::async, + [am, guid, displayInfoIds, inventoryTypes, paths = std::move(texturePaths)]() -> PreparedEquipmentUpdate { + PreparedEquipmentUpdate result; + result.guid = guid; + result.displayInfoIds = displayInfoIds; + result.inventoryTypes = inventoryTypes; + for (const auto& path : paths) { + std::string key = path; + std::replace(key.begin(), key.end(), '/', '\\'); + std::transform(key.begin(), key.end(), key.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (result.predecodedTextures.count(key)) continue; + auto blp = am->loadTexture(key); + if (blp.isValid()) { + result.predecodedTextures[key] = std::move(blp); + } + } + return result; + }); + asyncEquipmentLoads_.push_back(std::move(load)); +} + +void Application::processAsyncGameObjectResults() { + for (auto it = asyncGameObjectLoads_.begin(); it != asyncGameObjectLoads_.end(); ) { + if (!it->future.valid() || + it->future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { + ++it; + continue; + } + + auto result = it->future.get(); + it = asyncGameObjectLoads_.erase(it); + + if (!result.valid || !result.isWmo || !result.wmoModel) { + // Fallback: spawn via sync path (likely an M2 or failed WMO) + spawnOnlineGameObject(result.guid, result.entry, result.displayId, + result.x, result.y, result.z, result.orientation); + continue; + } + + // WMO parsed on background thread — do GPU upload + instance creation on main thread + auto* wmoRenderer = renderer ? renderer->getWMORenderer() : nullptr; + if (!wmoRenderer) continue; + + uint32_t modelId = 0; + auto itCache = gameObjectDisplayIdWmoCache_.find(result.displayId); + if (itCache != gameObjectDisplayIdWmoCache_.end()) { + modelId = itCache->second; + } else { + modelId = nextGameObjectWmoModelId_++; + wmoRenderer->setPredecodedBLPCache(&result.predecodedTextures); + if (!wmoRenderer->loadModel(*result.wmoModel, modelId)) { + wmoRenderer->setPredecodedBLPCache(nullptr); + LOG_WARNING("Failed to load async gameobject WMO: ", result.modelPath); + continue; + } + wmoRenderer->setPredecodedBLPCache(nullptr); + gameObjectDisplayIdWmoCache_[result.displayId] = modelId; + } + + glm::vec3 renderPos = core::coords::canonicalToRender( + glm::vec3(result.x, result.y, result.z)); + uint32_t instanceId = wmoRenderer->createInstance( + modelId, renderPos, glm::vec3(0.0f, 0.0f, result.orientation), 1.0f); + if (instanceId == 0) continue; + + gameObjectInstances_[result.guid] = {modelId, instanceId, true}; + + // Queue transport doodad loading if applicable + std::string lowerPath = result.modelPath; + std::transform(lowerPath.begin(), lowerPath.end(), lowerPath.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (lowerPath.find("transport") != std::string::npos) { + const auto* doodadTemplates = wmoRenderer->getDoodadTemplates(modelId); + if (doodadTemplates && !doodadTemplates->empty()) { + PendingTransportDoodadBatch batch; + batch.guid = result.guid; + batch.modelId = modelId; + batch.instanceId = instanceId; + batch.x = result.x; + batch.y = result.y; + batch.z = result.z; + batch.orientation = result.orientation; + batch.doodadBudget = doodadTemplates->size(); + pendingTransportDoodadBatches_.push_back(batch); + } + } + } } void Application::processGameObjectSpawnQueue() { + // Finalize any completed async WMO loads first + processAsyncGameObjectResults(); + if (pendingGameObjectSpawns_.empty()) return; - int spawned = 0; - while (!pendingGameObjectSpawns_.empty() && spawned < MAX_SPAWNS_PER_FRAME) { + // Process spawns: cached WMOs and M2s go sync (cheap), uncached WMOs go async + auto startTime = std::chrono::steady_clock::now(); + static constexpr float kBudgetMs = 2.0f; + static constexpr int kMaxAsyncLoads = 2; + + while (!pendingGameObjectSpawns_.empty()) { + float elapsedMs = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + if (elapsedMs >= kBudgetMs) break; + auto& s = pendingGameObjectSpawns_.front(); + + // Check if this is an uncached WMO that needs async loading + std::string modelPath; + if (gameObjectLookupsBuilt_) { + // Check transport overrides first + bool isTransport = gameHandler && gameHandler->isTransportGuid(s.guid); + if (isTransport) { + if (s.entry == 20808 || s.entry == 176231 || s.entry == 176310) + modelPath = "World\\wmo\\transports\\transport_ship\\transportship.wmo"; + else if (s.displayId == 807 || s.displayId == 808 || s.displayId == 175080 || s.displayId == 176495 || s.displayId == 164871) + modelPath = "World\\wmo\\transports\\transport_zeppelin\\transport_zeppelin.wmo"; + else if (s.displayId == 1587) + modelPath = "World\\wmo\\transports\\transport_horde_zeppelin\\Transport_Horde_Zeppelin.wmo"; + else if (s.displayId == 2454 || s.displayId == 181688 || s.displayId == 190536) + modelPath = "World\\wmo\\transports\\icebreaker\\Transport_Icebreaker_ship.wmo"; + } + if (modelPath.empty()) + modelPath = getGameObjectModelPathForDisplayId(s.displayId); + } + + std::string lowerPath = modelPath; + std::transform(lowerPath.begin(), lowerPath.end(), lowerPath.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + bool isWmo = lowerPath.size() >= 4 && lowerPath.substr(lowerPath.size() - 4) == ".wmo"; + bool isCached = isWmo && gameObjectDisplayIdWmoCache_.count(s.displayId); + + if (isWmo && !isCached && !modelPath.empty() && + static_cast(asyncGameObjectLoads_.size()) < kMaxAsyncLoads) { + // Launch async WMO load — file I/O + parse on background thread + auto* am = assetManager.get(); + PendingGameObjectSpawn capture = s; + std::string capturePath = modelPath; + AsyncGameObjectLoad load; + load.future = std::async(std::launch::async, + [am, capture, capturePath]() -> PreparedGameObjectWMO { + PreparedGameObjectWMO result; + result.guid = capture.guid; + result.entry = capture.entry; + result.displayId = capture.displayId; + result.x = capture.x; + result.y = capture.y; + result.z = capture.z; + result.orientation = capture.orientation; + result.modelPath = capturePath; + result.isWmo = true; + + auto wmoData = am->readFile(capturePath); + if (wmoData.empty()) return result; + + auto wmo = std::make_shared( + pipeline::WMOLoader::load(wmoData)); + + // Load groups + if (wmo->nGroups > 0) { + std::string basePath = capturePath; + std::string ext; + if (basePath.size() > 4) { + ext = basePath.substr(basePath.size() - 4); + basePath = basePath.substr(0, basePath.size() - 4); + } + for (uint32_t gi = 0; gi < wmo->nGroups; gi++) { + char suffix[16]; + snprintf(suffix, sizeof(suffix), "_%03u%s", gi, ext.c_str()); + auto groupData = am->readFile(basePath + suffix); + if (groupData.empty()) { + snprintf(suffix, sizeof(suffix), "_%03u.wmo", gi); + groupData = am->readFile(basePath + suffix); + } + if (!groupData.empty()) { + pipeline::WMOLoader::loadGroup(groupData, *wmo, gi); + } + } + } + + // Pre-decode WMO textures on background thread + for (const auto& texPath : wmo->textures) { + if (texPath.empty()) continue; + std::string texKey = texPath; + size_t nul = texKey.find('\0'); + if (nul != std::string::npos) texKey.resize(nul); + std::replace(texKey.begin(), texKey.end(), '/', '\\'); + std::transform(texKey.begin(), texKey.end(), texKey.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (texKey.empty()) continue; + // Convert to .blp extension + if (texKey.size() >= 4) { + std::string ext = texKey.substr(texKey.size() - 4); + if (ext == ".tga" || ext == ".dds") { + texKey = texKey.substr(0, texKey.size() - 4) + ".blp"; + } + } + if (result.predecodedTextures.find(texKey) != result.predecodedTextures.end()) continue; + auto blp = am->loadTexture(texKey); + if (blp.isValid()) { + result.predecodedTextures[texKey] = std::move(blp); + } + } + + result.wmoModel = wmo; + result.valid = true; + return result; + }); + asyncGameObjectLoads_.push_back(std::move(load)); + pendingGameObjectSpawns_.erase(pendingGameObjectSpawns_.begin()); + continue; + } + + // Cached WMO or M2 — spawn synchronously (cheap) spawnOnlineGameObject(s.guid, s.entry, s.displayId, s.x, s.y, s.z, s.orientation); pendingGameObjectSpawns_.erase(pendingGameObjectSpawns_.begin()); - spawned++; } } @@ -6815,9 +7667,16 @@ void Application::processPendingTransportDoodads() { auto* m2Renderer = renderer->getM2Renderer(); if (!wmoRenderer || !m2Renderer) return; + auto startTime = std::chrono::steady_clock::now(); + static constexpr float kDoodadBudgetMs = 4.0f; + size_t budgetLeft = MAX_TRANSPORT_DOODADS_PER_FRAME; for (auto it = pendingTransportDoodadBatches_.begin(); it != pendingTransportDoodadBatches_.end() && budgetLeft > 0;) { + // Time budget check + float elapsedMs = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + if (elapsedMs >= kDoodadBudgetMs) break; auto goIt = gameObjectInstances_.find(it->guid); if (goIt == gameObjectInstances_.end() || !goIt->second.isWmo || goIt->second.instanceId != it->instanceId || goIt->second.modelId != it->modelId) { @@ -6833,6 +7692,11 @@ void Application::processPendingTransportDoodads() { const size_t maxIndex = std::min(it->doodadBudget, doodadTemplates->size()); while (it->nextIndex < maxIndex && budgetLeft > 0) { + // Per-doodad time budget (each does synchronous file I/O + parse + GPU upload) + float innerMs = std::chrono::duration( + std::chrono::steady_clock::now() - startTime).count(); + if (innerMs >= kDoodadBudgetMs) { budgetLeft = 0; break; } + const auto& doodadTemplate = (*doodadTemplates)[it->nextIndex]; it->nextIndex++; budgetLeft--; @@ -7492,5 +8356,121 @@ void Application::setupTestTransport() { LOG_INFO("========================================"); } +// ─── World Preloader ───────────────────────────────────────────────────────── +// Pre-warms AssetManager file cache with ADT files (and their _obj0 variants) +// for tiles around the expected spawn position. Runs in background so that +// when loadOnlineWorldTerrain eventually asks TerrainManager workers to parse +// the same files, every readFile() is an instant cache hit instead of disk I/O. + +void Application::startWorldPreload(uint32_t mapId, const std::string& mapName, + float serverX, float serverY) { + cancelWorldPreload(); + if (!assetManager || !assetManager->isInitialized() || mapName.empty()) return; + + glm::vec3 canonical = core::coords::serverToCanonical(glm::vec3(serverX, serverY, 0.0f)); + auto [tileX, tileY] = core::coords::canonicalToTile(canonical.x, canonical.y); + + worldPreload_ = std::make_unique(); + worldPreload_->mapId = mapId; + worldPreload_->mapName = mapName; + worldPreload_->centerTileX = tileX; + worldPreload_->centerTileY = tileY; + + LOG_INFO("World preload: starting for map '", mapName, "' tile [", tileX, ",", tileY, "]"); + + // Build list of tiles to preload (radius 1 = 3x3 = 9 tiles, matching load screen) + struct TileJob { int x, y; }; + auto jobs = std::make_shared>(); + // Center tile first (most important) + jobs->push_back({tileX, tileY}); + for (int dx = -1; dx <= 1; dx++) { + for (int dy = -1; dy <= 1; dy++) { + if (dx == 0 && dy == 0) continue; + int tx = tileX + dx, ty = tileY + dy; + if (tx < 0 || tx > 63 || ty < 0 || ty > 63) continue; + jobs->push_back({tx, ty}); + } + } + + // Spawn worker threads (one per tile for maximum parallelism) + auto cancelFlag = &worldPreload_->cancel; + auto* am = assetManager.get(); + std::string mn = mapName; + + int numWorkers = std::min(static_cast(jobs->size()), 4); + auto nextJob = std::make_shared>(0); + + for (int w = 0; w < numWorkers; w++) { + worldPreload_->workers.emplace_back([am, mn, jobs, nextJob, cancelFlag]() { + while (!cancelFlag->load(std::memory_order_relaxed)) { + int idx = nextJob->fetch_add(1, std::memory_order_relaxed); + if (idx >= static_cast(jobs->size())) break; + + int tx = (*jobs)[idx].x; + int ty = (*jobs)[idx].y; + + // Read ADT file (warms file cache) + std::string adtPath = "World\\Maps\\" + mn + "\\" + mn + "_" + + std::to_string(tx) + "_" + std::to_string(ty) + ".adt"; + am->readFile(adtPath); + if (cancelFlag->load(std::memory_order_relaxed)) break; + + // Read obj0 variant + std::string objPath = "World\\Maps\\" + mn + "\\" + mn + "_" + + std::to_string(tx) + "_" + std::to_string(ty) + "_obj0.adt"; + am->readFile(objPath); + } + LOG_DEBUG("World preload worker finished"); + }); + } +} + +void Application::cancelWorldPreload() { + if (!worldPreload_) return; + worldPreload_->cancel.store(true, std::memory_order_relaxed); + for (auto& t : worldPreload_->workers) { + if (t.joinable()) t.join(); + } + LOG_INFO("World preload: cancelled (map=", worldPreload_->mapName, + " tile=[", worldPreload_->centerTileX, ",", worldPreload_->centerTileY, "])"); + worldPreload_.reset(); +} + +void Application::saveLastWorldInfo(uint32_t mapId, const std::string& mapName, + float serverX, float serverY) { +#ifdef _WIN32 + const char* base = std::getenv("APPDATA"); + std::string dir = base ? std::string(base) + "\\wowee" : "."; +#else + const char* home = std::getenv("HOME"); + std::string dir = home ? std::string(home) + "/.wowee" : "."; +#endif + std::filesystem::create_directories(dir); + std::ofstream f(dir + "/last_world.cfg"); + if (f) { + f << mapId << "\n" << mapName << "\n" << serverX << "\n" << serverY << "\n"; + } +} + +Application::LastWorldInfo Application::loadLastWorldInfo() const { +#ifdef _WIN32 + const char* base = std::getenv("APPDATA"); + std::string dir = base ? std::string(base) + "\\wowee" : "."; +#else + const char* home = std::getenv("HOME"); + std::string dir = home ? std::string(home) + "/.wowee" : "."; +#endif + LastWorldInfo info; + std::ifstream f(dir + "/last_world.cfg"); + if (!f) return info; + std::string line; + if (std::getline(f, line)) info.mapId = static_cast(std::stoul(line)); + if (std::getline(f, line)) info.mapName = line; + if (std::getline(f, line)) info.x = std::stof(line); + if (std::getline(f, line)) info.y = std::stof(line); + info.valid = !info.mapName.empty(); + return info; +} + } // namespace core } // namespace wowee diff --git a/src/game/game_handler.cpp b/src/game/game_handler.cpp index e80e727f..9a7aed97 100644 --- a/src/game/game_handler.cpp +++ b/src/game/game_handler.cpp @@ -541,7 +541,13 @@ void GameHandler::update(float deltaTime) { // Update socket (processes incoming data and triggers callbacks) if (socket) { + auto socketStart = std::chrono::steady_clock::now(); socket->update(); + float socketMs = std::chrono::duration( + std::chrono::steady_clock::now() - socketStart).count(); + if (socketMs > 3.0f) { + LOG_WARNING("SLOW socket->update: ", socketMs, "ms"); + } } // Detect server-side disconnect (socket closed during update) diff --git a/src/rendering/character_renderer.cpp b/src/rendering/character_renderer.cpp index 2126e5e5..baaaf3e6 100644 --- a/src/rendering/character_renderer.cpp +++ b/src/rendering/character_renderer.cpp @@ -197,6 +197,29 @@ bool CharacterRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFram vkCreateDescriptorPool(device, &ci, nullptr, &boneDescPool_); } + // --- Material UBO ring buffers (one per frame slot) --- + { + VkPhysicalDeviceProperties props; + vkGetPhysicalDeviceProperties(ctx->getPhysicalDevice(), &props); + materialUboAlignment_ = static_cast(props.limits.minUniformBufferOffsetAlignment); + if (materialUboAlignment_ < 1) materialUboAlignment_ = 1; + // Round up UBO size to alignment + uint32_t alignedUboSize = (sizeof(CharMaterialUBO) + materialUboAlignment_ - 1) & ~(materialUboAlignment_ - 1); + uint32_t ringSize = alignedUboSize * MATERIAL_RING_CAPACITY; + for (int i = 0; i < 2; i++) { + VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bci.size = ringSize; + bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + VmaAllocationCreateInfo aci{}; + aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + aci.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo allocInfo{}; + vmaCreateBuffer(ctx->getAllocator(), &bci, &aci, + &materialRingBuffer_[i], &materialRingAlloc_[i], &allocInfo); + materialRingMapped_[i] = allocInfo.pMappedData; + } + } + // --- Pipeline layout --- // set 0 = perFrame, set 1 = material, set 2 = bones // Push constant: mat4 model = 64 bytes @@ -352,14 +375,15 @@ void CharacterRenderer::shutdown() { if (pipelineLayout_) { vkDestroyPipelineLayout(device, pipelineLayout_, nullptr); pipelineLayout_ = VK_NULL_HANDLE; } - // Release any deferred transient material UBOs. + // Destroy material ring buffers for (int i = 0; i < 2; i++) { - for (const auto& b : transientMaterialUbos_[i]) { - if (b.first) { - vmaDestroyBuffer(alloc, b.first, b.second); - } + if (materialRingBuffer_[i]) { + vmaDestroyBuffer(alloc, materialRingBuffer_[i], materialRingAlloc_[i]); + materialRingBuffer_[i] = VK_NULL_HANDLE; + materialRingAlloc_[i] = VK_NULL_HANDLE; + materialRingMapped_[i] = nullptr; } - transientMaterialUbos_[i].clear(); + materialRingOffset_[i] = 0; } // Destroy descriptor pools and layouts @@ -391,7 +415,6 @@ void CharacterRenderer::clear() { vkDeviceWaitIdle(vkCtx_->getDevice()); VkDevice device = vkCtx_->getDevice(); - VmaAllocator alloc = vkCtx_->getAllocator(); // Destroy GPU resources for all models for (auto& pair : models) { @@ -441,14 +464,9 @@ void CharacterRenderer::clear() { models.clear(); instances.clear(); - // Release deferred transient material UBOs + // Reset material ring buffer offsets (buffers persist, just reset write position) for (int i = 0; i < 2; i++) { - for (const auto& b : transientMaterialUbos_[i]) { - if (b.first) { - vmaDestroyBuffer(alloc, b.first, b.second); - } - } - transientMaterialUbos_[i].clear(); + materialRingOffset_[i] = 0; } // Reset descriptor pools (don't destroy — reuse for new allocations) @@ -607,7 +625,18 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) { return whiteTexture_.get(); } - auto blpImage = assetManager->loadTexture(key); + // Check pre-decoded BLP cache first (populated by background threads) + pipeline::BLPImage blpImage; + if (predecodedBLPCache_) { + auto pit = predecodedBLPCache_->find(key); + if (pit != predecodedBLPCache_->end()) { + blpImage = std::move(pit->second); + predecodedBLPCache_->erase(pit); + } + } + if (!blpImage.isValid()) { + blpImage = assetManager->loadTexture(key); + } if (!blpImage.isValid()) { // Return white fallback but don't cache the failure — allow retry // on next character load in case the asset becomes available. @@ -658,13 +687,16 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) { e.hasAlpha = hasAlpha; e.colorKeyBlack = colorKeyBlackHint; - // Generate normal/height map from diffuse texture - float nhVariance = 0.0f; - auto nhMap = generateNormalHeightMap(blpImage.data.data(), blpImage.width, blpImage.height, nhVariance); - if (nhMap) { - e.heightMapVariance = nhVariance; - e.approxBytes += approxTextureBytesWithMips(blpImage.width, blpImage.height); - e.normalHeightMap = std::move(nhMap); + // Defer normal/height map generation to avoid stalling loadModel. + // Normal maps are generated in processPendingNormalMaps() at a per-frame budget. + if (blpImage.width >= 32 && blpImage.height >= 32) { + PendingNormalMap pending; + pending.cacheKey = key; + pending.pixels.assign(blpImage.data.begin(), blpImage.data.end()); + pending.width = blpImage.width; + pending.height = blpImage.height; + pendingNormalMaps_.push_back(std::move(pending)); + e.normalMapPending = true; } textureCacheBytes_ += e.approxBytes; @@ -676,6 +708,34 @@ VkTexture* CharacterRenderer::loadTexture(const std::string& path) { return texPtr; } +void CharacterRenderer::processPendingNormalMaps(int budget) { + if (pendingNormalMaps_.empty() || !vkCtx_) return; + + int processed = 0; + while (!pendingNormalMaps_.empty() && processed < budget) { + auto pending = std::move(pendingNormalMaps_.front()); + pendingNormalMaps_.pop_front(); + + auto it = textureCache.find(pending.cacheKey); + if (it == textureCache.end()) continue; // texture was evicted + + float nhVariance = 0.0f; + vkCtx_->beginUploadBatch(); + auto nhMap = generateNormalHeightMap(pending.pixels.data(), + pending.width, pending.height, nhVariance); + vkCtx_->endUploadBatch(); + + if (nhMap) { + it->second.heightMapVariance = nhVariance; + it->second.approxBytes += approxTextureBytesWithMips(pending.width, pending.height); + textureCacheBytes_ += approxTextureBytesWithMips(pending.width, pending.height); + it->second.normalHeightMap = std::move(nhMap); + } + it->second.normalMapPending = false; + processed++; + } +} + // Alpha-blend overlay onto composite at (dstX, dstY) static void blitOverlay(std::vector& composite, int compW, int compH, const pipeline::BLPImage& overlay, int dstX, int dstY) { @@ -807,7 +867,19 @@ VkTexture* CharacterRenderer::compositeTextures(const std::vector& } // Load base layer - auto base = assetManager->loadTexture(layerPaths[0]); + pipeline::BLPImage base; + if (predecodedBLPCache_) { + std::string key = layerPaths[0]; + std::replace(key.begin(), key.end(), '/', '\\'); + std::transform(key.begin(), key.end(), key.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + auto pit = predecodedBLPCache_->find(key); + if (pit != predecodedBLPCache_->end()) { + base = std::move(pit->second); + predecodedBLPCache_->erase(pit); + } + } + if (!base.isValid()) base = assetManager->loadTexture(layerPaths[0]); if (!base.isValid()) { core::Logger::getInstance().warning("Composite: failed to load base layer: ", layerPaths[0]); return whiteTexture_.get(); @@ -848,7 +920,19 @@ VkTexture* CharacterRenderer::compositeTextures(const std::vector& for (size_t layer = 1; layer < layerPaths.size(); layer++) { if (layerPaths[layer].empty()) continue; - auto overlay = assetManager->loadTexture(layerPaths[layer]); + pipeline::BLPImage overlay; + if (predecodedBLPCache_) { + std::string key = layerPaths[layer]; + std::replace(key.begin(), key.end(), '/', '\\'); + std::transform(key.begin(), key.end(), key.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + auto pit = predecodedBLPCache_->find(key); + if (pit != predecodedBLPCache_->end()) { + overlay = std::move(pit->second); + predecodedBLPCache_->erase(pit); + } + } + if (!overlay.isValid()) overlay = assetManager->loadTexture(layerPaths[layer]); if (!overlay.isValid()) { core::Logger::getInstance().warning("Composite: FAILED to load overlay: ", layerPaths[layer]); continue; @@ -1025,7 +1109,19 @@ VkTexture* CharacterRenderer::compositeWithRegions(const std::string& basePath, return whiteTexture_.get(); } - auto base = assetManager->loadTexture(basePath); + pipeline::BLPImage base; + if (predecodedBLPCache_) { + std::string key = basePath; + std::replace(key.begin(), key.end(), '/', '\\'); + std::transform(key.begin(), key.end(), key.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + auto pit = predecodedBLPCache_->find(key); + if (pit != predecodedBLPCache_->end()) { + base = std::move(pit->second); + predecodedBLPCache_->erase(pit); + } + } + if (!base.isValid()) base = assetManager->loadTexture(basePath); if (!base.isValid()) { return whiteTexture_.get(); } @@ -1064,7 +1160,19 @@ VkTexture* CharacterRenderer::compositeWithRegions(const std::string& basePath, bool upscaled = (base.width == 256 && base.height == 256 && width == 512); for (const auto& ul : baseLayers) { if (ul.empty()) continue; - auto overlay = assetManager->loadTexture(ul); + pipeline::BLPImage overlay; + if (predecodedBLPCache_) { + std::string key = ul; + std::replace(key.begin(), key.end(), '/', '\\'); + std::transform(key.begin(), key.end(), key.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + auto pit = predecodedBLPCache_->find(key); + if (pit != predecodedBLPCache_->end()) { + overlay = std::move(pit->second); + predecodedBLPCache_->erase(pit); + } + } + if (!overlay.isValid()) overlay = assetManager->loadTexture(ul); if (!overlay.isValid()) continue; if (overlay.width == width && overlay.height == height) { @@ -1142,7 +1250,19 @@ VkTexture* CharacterRenderer::compositeWithRegions(const std::string& basePath, int regionIdx = rl.first; if (regionIdx < 0 || regionIdx >= 8) continue; - auto overlay = assetManager->loadTexture(rl.second); + pipeline::BLPImage overlay; + if (predecodedBLPCache_) { + std::string key = rl.second; + std::replace(key.begin(), key.end(), '/', '\\'); + std::transform(key.begin(), key.end(), key.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + auto pit = predecodedBLPCache_->find(key); + if (pit != predecodedBLPCache_->end()) { + overlay = std::move(pit->second); + predecodedBLPCache_->erase(pit); + } + } + if (!overlay.isValid()) overlay = assetManager->loadTexture(rl.second); if (!overlay.isValid()) { core::Logger::getInstance().warning("compositeWithRegions: failed to load ", rl.second); continue; @@ -1247,6 +1367,10 @@ bool CharacterRenderer::loadModel(const pipeline::M2Model& model, uint32_t id) { M2ModelGPU gpuModel; gpuModel.data = model; + // Batch all GPU uploads (VB, IB, textures) into a single command buffer + // submission with one fence wait, instead of one fence wait per upload. + vkCtx_->beginUploadBatch(); + // Setup GPU buffers setupModelBuffers(gpuModel); @@ -1259,6 +1383,8 @@ bool CharacterRenderer::loadModel(const pipeline::M2Model& model, uint32_t id) { gpuModel.textureIds.push_back(texPtr); } + vkCtx_->endUploadBatch(); + models[id] = std::move(gpuModel); core::Logger::getInstance().debug("Loaded M2 model ", id, " (", model.vertices.size(), @@ -1388,8 +1514,9 @@ uint32_t CharacterRenderer::createInstance(uint32_t modelId, const glm::vec3& po instance.scale = scale; // Initialize bone matrices to identity - auto& model = models[modelId].data; - instance.boneMatrices.resize(std::max(static_cast(1), model.bones.size()), glm::mat4(1.0f)); + auto& gpuRef = models[modelId]; + instance.boneMatrices.resize(std::max(static_cast(1), gpuRef.data.bones.size()), glm::mat4(1.0f)); + instance.cachedModel = &gpuRef; uint32_t id = instance.id; instances[id] = std::move(instance); @@ -1448,8 +1575,14 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) { const float animUpdateRadius = static_cast(envSizeOrDefault("WOWEE_CHAR_ANIM_RADIUS", 120)); const float animUpdateRadiusSq = animUpdateRadius * animUpdateRadius; - // Update fade-in opacity - for (auto& [id, inst] : instances) { + // Single pass: fade-in, movement, and animation bone collection + std::vector> toUpdate; + toUpdate.reserve(instances.size()); + + for (auto& pair : instances) { + auto& inst = pair.second; + + // Update fade-in opacity if (inst.fadeInDuration > 0.0f && inst.opacity < 1.0f) { inst.fadeInTime += deltaTime; inst.opacity = std::min(1.0f, inst.fadeInTime / inst.fadeInDuration); @@ -1457,10 +1590,8 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) { inst.fadeInDuration = 0.0f; } } - } - // Interpolate creature movement - for (auto& [id, inst] : instances) { + // Interpolate creature movement if (inst.isMoving) { inst.moveElapsed += deltaTime; float t = inst.moveElapsed / inst.moveDuration; @@ -1469,36 +1600,26 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) { inst.isMoving = false; // Return to idle when movement completes if (inst.currentAnimationId == 4 || inst.currentAnimationId == 5) { - playAnimation(id, 0, true); + playAnimation(pair.first, 0, true); } } else { inst.position = glm::mix(inst.moveStart, inst.moveEnd, t); } } - } - // Only update animations for nearby characters (performance optimization) - // Collect instances that need bone recomputation, with distance-based throttling - std::vector> toUpdate; - toUpdate.reserve(instances.size()); - - for (auto& pair : instances) { - auto& inst = pair.second; - - // Skip weapon instances — their transforms are set by parent bones + // Skip weapon instances for animation — their transforms are set by parent bones if (inst.hasOverrideModelMatrix) continue; float distSq = glm::distance2(inst.position, cameraPos); if (distSq >= animUpdateRadiusSq) continue; // Always advance animation time (cheap) - auto modelIt = models.find(inst.modelId); - if (modelIt != models.end() && !modelIt->second.data.sequences.empty()) { + if (inst.cachedModel && !inst.cachedModel->data.sequences.empty()) { if (inst.currentSequenceIndex < 0) { inst.currentSequenceIndex = 0; - inst.currentAnimationId = modelIt->second.data.sequences[0].id; + inst.currentAnimationId = inst.cachedModel->data.sequences[0].id; } - const auto& seq = modelIt->second.data.sequences[inst.currentSequenceIndex]; + const auto& seq = inst.cachedModel->data.sequences[inst.currentSequenceIndex]; inst.animationTime += deltaTime * 1000.0f; if (seq.duration > 0 && inst.animationTime >= static_cast(seq.duration)) { if (inst.animationLoop) { @@ -1509,10 +1630,11 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) { } } - // Distance-tiered bone throttling: near=every frame, mid=every 3rd, far=every 6th + // Distance-tiered bone throttling: near=every frame, mid=every 4th, far=every 8th uint32_t boneInterval = 1; - if (distSq > 60.0f * 60.0f) boneInterval = 6; - else if (distSq > 30.0f * 30.0f) boneInterval = 3; + if (distSq > 40.0f * 40.0f) boneInterval = 8; + else if (distSq > 20.0f * 20.0f) boneInterval = 4; + else if (distSq > 10.0f * 10.0f) boneInterval = 2; inst.boneUpdateCounter++; bool needsBones = (inst.boneUpdateCounter >= boneInterval) || inst.boneMatrices.empty(); @@ -1527,7 +1649,7 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) { // Thread bone matrix computation in chunks if (updatedCount >= 8 && numAnimThreads_ > 1) { static const size_t minAnimWorkPerThread = std::max( - 16, envSizeOrDefault("WOWEE_CHAR_ANIM_WORK_PER_THREAD", 64)); + 8, envSizeOrDefault("WOWEE_CHAR_ANIM_WORK_PER_THREAD", 16)); const size_t maxUsefulThreads = std::max( 1, (updatedCount + minAnimWorkPerThread - 1) / minAnimWorkPerThread); const size_t numThreads = std::min(static_cast(numAnimThreads_), maxUsefulThreads); @@ -1596,11 +1718,8 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) { } void CharacterRenderer::updateAnimation(CharacterInstance& instance, float deltaTime) { - auto modelIt = models.find(instance.modelId); - if (modelIt == models.end()) { - return; - } - const auto& model = modelIt->second.data; + if (!instance.cachedModel) return; + const auto& model = instance.cachedModel->data; if (model.sequences.empty()) { return; @@ -1713,7 +1832,8 @@ glm::quat CharacterRenderer::interpolateQuat(const pipeline::M2AnimationTrack& t // --- Bone transform calculation --- void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) { - auto& model = models[instance.modelId].data; + if (!instance.cachedModel) return; + auto& model = instance.cachedModel->data; if (model.bones.empty()) { return; @@ -1722,8 +1842,6 @@ void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) { size_t numBones = model.bones.size(); instance.boneMatrices.resize(numBones); - static bool dumpedOnce = false; - for (size_t i = 0; i < numBones; i++) { const auto& bone = model.bones[i]; @@ -1731,19 +1849,6 @@ void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) { // At rest this is identity, so no separate bind pose is needed glm::mat4 localTransform = getBoneTransform(bone, instance.animationTime, instance.currentSequenceIndex); - // Debug: dump first frame bone data - if (!dumpedOnce && i < 5) { - glm::vec3 t = interpolateVec3(bone.translation, instance.currentSequenceIndex, instance.animationTime, glm::vec3(0.0f)); - glm::quat r = interpolateQuat(bone.rotation, instance.currentSequenceIndex, instance.animationTime); - glm::vec3 s = interpolateVec3(bone.scale, instance.currentSequenceIndex, instance.animationTime, glm::vec3(1.0f)); - core::Logger::getInstance().info("Bone ", i, " parent=", bone.parentBone, - " pivot=(", bone.pivot.x, ",", bone.pivot.y, ",", bone.pivot.z, ")", - " t=(", t.x, ",", t.y, ",", t.z, ")", - " r=(", r.w, ",", r.x, ",", r.y, ",", r.z, ")", - " s=(", s.x, ",", s.y, ",", s.z, ")", - " seqIdx=", instance.currentSequenceIndex); - } - // Compose with parent if (bone.parentBone >= 0 && static_cast(bone.parentBone) < numBones) { instance.boneMatrices[i] = instance.boneMatrices[bone.parentBone] * localTransform; @@ -1751,12 +1856,6 @@ void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) { instance.boneMatrices[i] = localTransform; } } - if (!dumpedOnce) { - dumpedOnce = true; - // Dump final matrix for bone 0 - auto& m = instance.boneMatrices[0]; - core::Logger::getInstance().info("Bone 0 final matrix row0=(", m[0][0], ",", m[1][0], ",", m[2][0], ",", m[3][0], ")"); - } } glm::mat4 CharacterRenderer::getBoneTransform(const pipeline::M2Bone& bone, float time, int sequenceIndex) { @@ -1791,22 +1890,19 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, uint32_t frameIndex = vkCtx_->getCurrentFrame(); uint32_t frameSlot = frameIndex % 2u; - // Reset transient material allocations once per frame slot. - // beginFrame() waits on this slot's fence before recording. + // Reset material ring buffer and descriptor pool once per frame slot. if (lastMaterialPoolResetFrame_ != frameIndex) { - VmaAllocator alloc = vkCtx_->getAllocator(); - for (const auto& b : transientMaterialUbos_[frameSlot]) { - if (b.first) { - vmaDestroyBuffer(alloc, b.first, b.second); - } - } - transientMaterialUbos_[frameSlot].clear(); + materialRingOffset_[frameSlot] = 0; if (materialDescPools_[frameSlot]) { vkResetDescriptorPool(vkCtx_->getDevice(), materialDescPools_[frameSlot], 0); } lastMaterialPoolResetFrame_ = frameIndex; } + // Pre-compute aligned UBO stride for ring buffer sub-allocation + const uint32_t uboStride = (sizeof(CharMaterialUBO) + materialUboAlignment_ - 1) & ~(materialUboAlignment_ - 1); + const uint32_t ringCapacityBytes = uboStride * MATERIAL_RING_CAPACITY; + // Bind per-frame descriptor set (set 0) -- shared across all draws vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout_, 0, 1, &perFrameSet, 0, nullptr); @@ -1838,9 +1934,8 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, } } - auto modelIt = models.find(instance.modelId); - if (modelIt == models.end()) continue; - const auto& gpuModel = modelIt->second; + if (!instance.cachedModel) continue; + const auto& gpuModel = *instance.cachedModel; // Skip models without GPU buffers if (!gpuModel.vertexBuffer) continue; @@ -2176,27 +2271,18 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, matData.heightMapVariance = batchHeightVariance; matData.normalMapStrength = normalMapStrength_; - // Create a small UBO for this batch's material - VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; - bci.size = sizeof(CharMaterialUBO); - bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - VmaAllocationCreateInfo aci{}; - aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; - aci.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; - VmaAllocationInfo allocInfo{}; - ::VkBuffer matUBO = VK_NULL_HANDLE; - VmaAllocation matUBOAlloc = VK_NULL_HANDLE; - vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &matUBO, &matUBOAlloc, &allocInfo); - if (allocInfo.pMappedData) { - memcpy(allocInfo.pMappedData, &matData, sizeof(CharMaterialUBO)); - } + // Sub-allocate material UBO from ring buffer + uint32_t matOffset = materialRingOffset_[frameSlot]; + if (matOffset + uboStride > ringCapacityBytes) continue; // ring exhausted + memcpy(static_cast(materialRingMapped_[frameSlot]) + matOffset, &matData, sizeof(CharMaterialUBO)); + materialRingOffset_[frameSlot] = matOffset + uboStride; // Write descriptor set: binding 0 = texture, binding 1 = material UBO, binding 2 = normal/height map VkTexture* bindTex = (texPtr && texPtr->isValid()) ? texPtr : whiteTexture_.get(); VkDescriptorImageInfo imgInfo = bindTex->descriptorInfo(); VkDescriptorBufferInfo bufInfo{}; - bufInfo.buffer = matUBO; - bufInfo.offset = 0; + bufInfo.buffer = materialRingBuffer_[frameSlot]; + bufInfo.offset = matOffset; bufInfo.range = sizeof(CharMaterialUBO); VkDescriptorImageInfo nhImgInfo = normalMap->descriptorInfo(); @@ -2229,8 +2315,6 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, pipelineLayout_, 1, 1, &materialSet, 0, nullptr); vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0); - - transientMaterialUbos_[frameSlot].emplace_back(matUBO, matUBOAlloc); } } else { // Draw entire model with first texture @@ -2271,24 +2355,16 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, matData.heightMapVariance = 0.0f; matData.normalMapStrength = normalMapStrength_; - VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; - bci.size = sizeof(CharMaterialUBO); - bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - VmaAllocationCreateInfo aci{}; - aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; - aci.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; - VmaAllocationInfo allocInfo{}; - ::VkBuffer matUBO = VK_NULL_HANDLE; - VmaAllocation matUBOAlloc = VK_NULL_HANDLE; - vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &matUBO, &matUBOAlloc, &allocInfo); - if (allocInfo.pMappedData) { - memcpy(allocInfo.pMappedData, &matData, sizeof(CharMaterialUBO)); - } + // Sub-allocate material UBO from ring buffer + uint32_t matOffset2 = materialRingOffset_[frameSlot]; + if (matOffset2 + uboStride > ringCapacityBytes) continue; // ring exhausted + memcpy(static_cast(materialRingMapped_[frameSlot]) + matOffset2, &matData, sizeof(CharMaterialUBO)); + materialRingOffset_[frameSlot] = matOffset2 + uboStride; VkDescriptorImageInfo imgInfo = texPtr->descriptorInfo(); VkDescriptorBufferInfo bufInfo{}; - bufInfo.buffer = matUBO; - bufInfo.offset = 0; + bufInfo.buffer = materialRingBuffer_[frameSlot]; + bufInfo.offset = matOffset2; bufInfo.range = sizeof(CharMaterialUBO); VkDescriptorImageInfo nhImgInfo2 = flatNormalTexture_->descriptorInfo(); @@ -2320,8 +2396,6 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, pipelineLayout_, 1, 1, &materialSet, 0, nullptr); vkCmdDrawIndexed(cmd, gpuModel.indexCount, 1, 0, 0, 0); - - transientMaterialUbos_[frameSlot].emplace_back(matUBO, matUBOAlloc); } } } @@ -2513,9 +2587,8 @@ void CharacterRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& light glm::vec3 diff = inst.position - shadowCenter; if (glm::dot(diff, diff) > shadowRadiusSq) continue; - auto modelIt = models.find(inst.modelId); - if (modelIt == models.end()) continue; - const M2ModelGPU& gpuModel = modelIt->second; + if (!inst.cachedModel) continue; + const M2ModelGPU& gpuModel = *inst.cachedModel; if (!gpuModel.vertexBuffer) continue; glm::mat4 modelMat = inst.hasOverrideModelMatrix diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp index d76843a0..d455e494 100644 --- a/src/rendering/m2_renderer.cpp +++ b/src/rendering/m2_renderer.cpp @@ -678,6 +678,7 @@ void M2Renderer::shutdown() { instances.clear(); spatialGrid.clear(); instanceIndexById.clear(); + instanceDedupMap_.clear(); // Delete cached textures textureCache.clear(); @@ -1184,6 +1185,10 @@ bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) { } } + // Batch all GPU uploads (VB, IB, textures) into a single command buffer + // submission with one fence wait, instead of one fence wait per upload. + vkCtx_->beginUploadBatch(); + if (hasGeometry) { // Create VBO with interleaved vertex data // Format: position (3), normal (3), texcoord0 (2), texcoord1 (2), boneWeights (4), boneIndices (4 as float) @@ -1535,6 +1540,8 @@ bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) { } } + vkCtx_->endUploadBatch(); + // Allocate Vulkan descriptor sets and UBOs for each batch for (auto& bgpu : gpuModel.batches) { // Create combined UBO for M2Params (binding 1) + M2Material (binding 2) @@ -1613,17 +1620,16 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position, } const auto& mdlRef = modelIt->second; - // Ground clutter is procedurally scattered and high-count; avoid O(N) dedup - // scans that can hitch when new tiles stream in. + // Deduplicate: skip if same model already at nearly the same position. + // Uses hash map for O(1) lookup instead of O(N) scan. if (!mdlRef.isGroundDetail) { - // Deduplicate: skip if same model already at nearly the same position - for (const auto& existing : instances) { - if (existing.modelId == modelId) { - glm::vec3 d = existing.position - position; - if (glm::dot(d, d) < 0.01f) { - return existing.id; - } - } + DedupKey dk{modelId, + static_cast(std::round(position.x * 10.0f)), + static_cast(std::round(position.y * 10.0f)), + static_cast(std::round(position.z * 10.0f))}; + auto dit = instanceDedupMap_.find(dk); + if (dit != instanceDedupMap_.end()) { + return dit->second; } } @@ -1651,6 +1657,7 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position, instance.cachedIsInvisibleTrap = mdlRef.isInvisibleTrap; instance.cachedIsInstancePortal = mdlRef.isInstancePortal; instance.cachedIsValid = mdlRef.isValid(); + instance.cachedModel = &mdlRef; // Initialize animation: play first sequence (usually Stand/Idle) const auto& mdl = mdlRef; @@ -1662,6 +1669,15 @@ uint32_t M2Renderer::createInstance(uint32_t modelId, const glm::vec3& position, instance.variationTimer = 3000.0f + static_cast(rand() % 8000); } + // Register in dedup map before pushing (uses original position, not ground-adjusted) + if (!mdlRef.isGroundDetail) { + DedupKey dk{modelId, + static_cast(std::round(position.x * 10.0f)), + static_cast(std::round(position.y * 10.0f)), + static_cast(std::round(position.z * 10.0f))}; + instanceDedupMap_[dk] = instance.id; + } + instances.push_back(instance); size_t idx = instances.size() - 1; // Track special instances for fast-path iteration @@ -1700,13 +1716,15 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4& return 0; } - // Deduplicate: skip if same model already at nearly the same position - for (const auto& existing : instances) { - if (existing.modelId == modelId) { - glm::vec3 d = existing.position - position; - if (glm::dot(d, d) < 0.01f) { - return existing.id; - } + // Deduplicate: O(1) hash lookup + { + DedupKey dk{modelId, + static_cast(std::round(position.x * 10.0f)), + static_cast(std::round(position.y * 10.0f)), + static_cast(std::round(position.z * 10.0f))}; + auto dit = instanceDedupMap_.find(dk); + if (dit != instanceDedupMap_.end()) { + return dit->second; } } @@ -1731,6 +1749,7 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4& instance.cachedIsGroundDetail = mdl2.isGroundDetail; instance.cachedIsInvisibleTrap = mdl2.isInvisibleTrap; instance.cachedIsValid = mdl2.isValid(); + instance.cachedModel = &mdl2; // Initialize animation if (mdl2.hasAnimation && !mdl2.disableAnimation && !mdl2.sequences.empty()) { @@ -1743,6 +1762,15 @@ uint32_t M2Renderer::createInstanceWithMatrix(uint32_t modelId, const glm::mat4& instance.animTime = static_cast(rand()) / RAND_MAX * 10000.0f; } + // Register in dedup map + { + DedupKey dk{modelId, + static_cast(std::round(position.x * 10.0f)), + static_cast(std::round(position.y * 10.0f)), + static_cast(std::round(position.z * 10.0f))}; + instanceDedupMap_[dk] = instance.id; + } + instances.push_back(instance); size_t idx = instances.size() - 1; if (mdl2.isSmoke) { @@ -2000,9 +2028,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm:: instance.animTime += dtMs * (instance.animSpeed - 1.0f); // For animation looping/variation, we need the actual model data. - auto it = models.find(instance.modelId); - if (it == models.end()) continue; - const M2ModelGPU& model = it->second; + if (!instance.cachedModel) continue; + const M2ModelGPU& model = *instance.cachedModel; // Validate sequence index if (instance.currentSequenceIndex < 0 || @@ -2058,6 +2085,14 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm:: float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f); if (cullRadius > 0.0f && !updateFrustum.intersectsSphere(instance.position, paddedRadius)) continue; + // Distance-based frame skipping: update distant bones less frequently + uint32_t boneInterval = 1; + if (distSq > 200.0f * 200.0f) boneInterval = 8; + else if (distSq > 100.0f * 100.0f) boneInterval = 4; + else if (distSq > 50.0f * 50.0f) boneInterval = 2; + instance.frameSkipCounter++; + if ((instance.frameSkipCounter % boneInterval) != 0) continue; + boneWorkIndices_.push_back(idx); } @@ -2071,9 +2106,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm:: for (size_t i : boneWorkIndices_) { if (i >= instances.size()) continue; auto& inst = instances[i]; - auto mdlIt = models.find(inst.modelId); - if (mdlIt == models.end()) continue; - computeBoneMatrices(mdlIt->second, inst); + if (!inst.cachedModel) continue; + computeBoneMatrices(*inst.cachedModel, inst); } } else { // Parallel — dispatch across worker threads @@ -2086,9 +2120,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm:: for (size_t i : boneWorkIndices_) { if (i >= instances.size()) continue; auto& inst = instances[i]; - auto mdlIt = models.find(inst.modelId); - if (mdlIt == models.end()) continue; - computeBoneMatrices(mdlIt->second, inst); + if (!inst.cachedModel) continue; + computeBoneMatrices(*inst.cachedModel, inst); } } else { const size_t chunkSize = animCount / numThreads; @@ -2109,9 +2142,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm:: size_t idx = boneWorkIndices_[j]; if (idx >= instances.size()) continue; auto& inst = instances[idx]; - auto mdlIt = models.find(inst.modelId); - if (mdlIt == models.end()) continue; - computeBoneMatrices(mdlIt->second, inst); + if (!inst.cachedModel) continue; + computeBoneMatrices(*inst.cachedModel, inst); } })); start = end; @@ -2133,9 +2165,8 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm:: glm::vec3 toCam = instance.position - cachedCamPos_; float distSq = glm::dot(toCam, toCam); if (distSq > cachedMaxRenderDistSq_) continue; - auto mdlIt = models.find(instance.modelId); - if (mdlIt == models.end()) continue; - emitParticles(instance, mdlIt->second, deltaTime); + if (!instance.cachedModel) continue; + emitParticles(instance, *instance.cachedModel, deltaTime); updateParticles(instance, deltaTime); } @@ -2839,9 +2870,8 @@ void M2Renderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSpaceMa glm::vec3 diff = instance.position - shadowCenter; if (glm::dot(diff, diff) > shadowRadiusSq) continue; - auto modelIt = models.find(instance.modelId); - if (modelIt == models.end()) continue; - const M2ModelGPU& model = modelIt->second; + if (!instance.cachedModel) continue; + const M2ModelGPU& model = *instance.cachedModel; // Filter: only draw foliage models in foliage pass, non-foliage in non-foliage pass if (model.shadowWindFoliage != foliagePass) continue; @@ -2947,8 +2977,7 @@ std::vector M2Renderer::getWaterVegetationPositions(const glm::vec3& std::vector result; float maxDistSq = maxDist * maxDist; for (const auto& inst : instances) { - auto it = models.find(inst.modelId); - if (it == models.end() || !it->second.isWaterVegetation) continue; + if (!inst.cachedModel || !inst.cachedModel->isWaterVegetation) continue; glm::vec3 diff = inst.position - camPos; if (glm::dot(diff, diff) <= maxDistSq) { result.push_back(inst.position); @@ -3059,9 +3088,8 @@ void M2Renderer::emitParticles(M2Instance& inst, const M2ModelGPU& gpu, float dt } void M2Renderer::updateParticles(M2Instance& inst, float dt) { - auto it = models.find(inst.modelId); - if (it == models.end()) return; - const auto& gpu = it->second; + if (!inst.cachedModel) return; + const auto& gpu = *inst.cachedModel; for (size_t i = 0; i < inst.particles.size(); ) { auto& p = inst.particles[i]; @@ -3136,9 +3164,8 @@ void M2Renderer::renderM2Particles(VkCommandBuffer cmd, VkDescriptorSet perFrame for (auto& inst : instances) { if (inst.particles.empty()) continue; - auto it = models.find(inst.modelId); - if (it == models.end()) continue; - const auto& gpu = it->second; + if (!inst.cachedModel) continue; + const auto& gpu = *inst.cachedModel; for (const auto& p : inst.particles) { if (p.emitterIndex < 0 || p.emitterIndex >= static_cast(gpu.particleEmitters.size())) continue; @@ -3477,6 +3504,7 @@ void M2Renderer::clear() { instances.clear(); spatialGrid.clear(); instanceIndexById.clear(); + instanceDedupMap_.clear(); smokeParticles.clear(); smokeInstanceIndices_.clear(); portalInstanceIndices_.clear(); @@ -3513,6 +3541,7 @@ M2Renderer::GridCell M2Renderer::toCell(const glm::vec3& p) const { void M2Renderer::rebuildSpatialIndex() { spatialGrid.clear(); instanceIndexById.clear(); + instanceDedupMap_.clear(); instanceIndexById.reserve(instances.size()); smokeInstanceIndices_.clear(); portalInstanceIndices_.clear(); @@ -3521,9 +3550,22 @@ void M2Renderer::rebuildSpatialIndex() { particleInstanceIndices_.clear(); for (size_t i = 0; i < instances.size(); i++) { - const auto& inst = instances[i]; + auto& inst = instances[i]; instanceIndexById[inst.id] = i; + // Re-cache model pointer (may have changed after model map modifications) + auto mdlIt = models.find(inst.modelId); + inst.cachedModel = (mdlIt != models.end()) ? &mdlIt->second : nullptr; + + // Rebuild dedup map (skip ground detail) + if (!inst.cachedIsGroundDetail) { + DedupKey dk{inst.modelId, + static_cast(std::round(inst.position.x * 10.0f)), + static_cast(std::round(inst.position.y * 10.0f)), + static_cast(std::round(inst.position.z * 10.0f))}; + instanceDedupMap_[dk] = inst.id; + } + if (inst.cachedIsSmoke) { smokeInstanceIndices_.push_back(i); } @@ -3647,8 +3689,18 @@ VkTexture* M2Renderer::loadTexture(const std::string& path, uint32_t texFlags) { containsToken(key, "campfire") || containsToken(key, "bonfire"); - // Load BLP texture - pipeline::BLPImage blp = assetManager->loadTexture(key); + // Check pre-decoded BLP cache first (populated by background worker threads) + pipeline::BLPImage blp; + if (predecodedBLPCache_) { + auto pit = predecodedBLPCache_->find(key); + if (pit != predecodedBLPCache_->end()) { + blp = std::move(pit->second); + predecodedBLPCache_->erase(pit); + } + } + if (!blp.isValid()) { + blp = assetManager->loadTexture(key); + } if (!blp.isValid()) { // Return white fallback but don't cache the failure — MPQ reads can // fail transiently during streaming; allow retry on next model load. @@ -3714,9 +3766,8 @@ VkTexture* M2Renderer::loadTexture(const std::string& path, uint32_t texFlags) { uint32_t M2Renderer::getTotalTriangleCount() const { uint32_t total = 0; for (const auto& instance : instances) { - auto it = models.find(instance.modelId); - if (it != models.end()) { - total += it->second.indexCount / 3; + if (instance.cachedModel) { + total += instance.cachedModel->indexCount / 3; } } return total; @@ -3738,11 +3789,10 @@ std::optional M2Renderer::getFloorHeight(float glX, float glY, float glZ, continue; } - auto it = models.find(instance.modelId); - if (it == models.end()) continue; + if (!instance.cachedModel) continue; if (instance.scale <= 0.001f) continue; - const M2ModelGPU& model = it->second; + const M2ModelGPU& model = *instance.cachedModel; if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue; if (instance.skipCollision) continue; @@ -3894,10 +3944,9 @@ bool M2Renderer::checkCollision(const glm::vec3& from, const glm::vec3& to, if (from.z > instance.worldBoundsMax.z + 2.5f && adjustedPos.z > instance.worldBoundsMax.z + 2.5f) continue; if (from.z + 2.5f < instance.worldBoundsMin.z && adjustedPos.z + 2.5f < instance.worldBoundsMin.z) continue; - auto it = models.find(instance.modelId); - if (it == models.end()) continue; + if (!instance.cachedModel) continue; - const M2ModelGPU& model = it->second; + const M2ModelGPU& model = *instance.cachedModel; if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue; if (instance.skipCollision) continue; if (instance.scale <= 0.001f) continue; @@ -4135,10 +4184,9 @@ float M2Renderer::raycastBoundingBoxes(const glm::vec3& origin, const glm::vec3& continue; } - auto it = models.find(instance.modelId); - if (it == models.end()) continue; + if (!instance.cachedModel) continue; - const M2ModelGPU& model = it->second; + const M2ModelGPU& model = *instance.cachedModel; if (model.collisionNoBlock || model.isInvisibleTrap || model.isSpellEffect) continue; glm::vec3 localMin, localMax; getTightCollisionBounds(model, localMin, localMax); diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 5f3e48ae..55ba1370 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -2434,6 +2434,9 @@ void Renderer::update(float deltaTime) { cameraController->update(deltaTime); auto cameraEnd = std::chrono::steady_clock::now(); lastCameraUpdateMs = std::chrono::duration(cameraEnd - cameraStart).count(); + if (lastCameraUpdateMs > 3.0) { + LOG_WARNING("SLOW cameraController->update: ", lastCameraUpdateMs, "ms"); + } // Update 3D audio listener position/orientation to match camera if (camera) { @@ -2527,7 +2530,13 @@ void Renderer::update(float deltaTime) { // Update terrain streaming if (terrainManager && camera) { + auto terrStart = std::chrono::steady_clock::now(); terrainManager->update(*camera, deltaTime); + float terrMs = std::chrono::duration( + std::chrono::steady_clock::now() - terrStart).count(); + if (terrMs > 5.0f) { + LOG_WARNING("SLOW terrainManager->update: ", terrMs, "ms"); + } } // Update sky system (skybox time, star twinkle, clouds, celestial moon phases) @@ -2579,7 +2588,14 @@ void Renderer::update(float deltaTime) { // Update character animations if (characterRenderer && camera) { + auto charAnimStart = std::chrono::steady_clock::now(); characterRenderer->update(deltaTime, camera->getPosition()); + float charAnimMs = std::chrono::duration( + std::chrono::steady_clock::now() - charAnimStart).count(); + if (charAnimMs > 5.0f) { + LOG_WARNING("SLOW characterRenderer->update: ", charAnimMs, "ms (", + characterRenderer->getInstanceCount(), " instances)"); + } } // Update AudioEngine (cleanup finished sounds, etc.) @@ -2766,8 +2782,15 @@ void Renderer::update(float deltaTime) { // Update M2 doodad animations (pass camera for frustum-culling bone computation) if (m2Renderer && camera) { + auto m2Start = std::chrono::steady_clock::now(); m2Renderer->update(deltaTime, camera->getPosition(), camera->getProjectionMatrix() * camera->getViewMatrix()); + float m2Ms = std::chrono::duration( + std::chrono::steady_clock::now() - m2Start).count(); + if (m2Ms > 3.0f) { + LOG_WARNING("SLOW m2Renderer->update: ", m2Ms, "ms (", + m2Renderer->getInstanceCount(), " instances)"); + } } // Helper: play zone music, dispatching local files (file: prefix) vs MPQ paths diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp index b164d969..97527c8c 100644 --- a/src/rendering/terrain_manager.cpp +++ b/src/rendering/terrain_manager.cpp @@ -1,5 +1,6 @@ #include "rendering/terrain_manager.hpp" #include "rendering/terrain_renderer.hpp" +#include "rendering/vk_context.hpp" #include "rendering/water_renderer.hpp" #include "rendering/m2_renderer.hpp" #include "rendering/wmo_renderer.hpp" @@ -53,12 +54,12 @@ int computeTerrainWorkerCount() { unsigned hc = std::thread::hardware_concurrency(); if (hc > 0) { - // Terrain streaming should leave CPU room for render/update threads. - const unsigned availableCores = (hc > 1u) ? (hc - 1u) : 1u; - const unsigned targetWorkers = std::max(2u, availableCores / 2u); + // Use most cores for loading — leave 1-2 for render/update threads. + const unsigned reserved = (hc >= 8u) ? 2u : 1u; + const unsigned targetWorkers = std::max(4u, hc - reserved); return static_cast(targetWorkers); } - return 2; // Fallback + return 4; // Fallback } bool decodeLayerAlpha(const pipeline::MapChunk& chunk, size_t layerIdx, std::vector& outAlpha) { @@ -230,9 +231,14 @@ bool TerrainManager::loadTile(int x, int y) { return false; } + VkContext* vkCtx = terrainRenderer ? terrainRenderer->getVkContext() : nullptr; + if (vkCtx) vkCtx->beginUploadBatch(); + FinalizingTile ft; ft.pending = std::move(pending); while (!advanceFinalization(ft)) {} + + if (vkCtx) vkCtx->endUploadBatchSync(); // Sync — caller expects tile ready return true; } @@ -372,6 +378,15 @@ std::shared_ptr TerrainManager::prepareTile(int x, int y) { int& skippedSkinNotFound) -> bool { if (preparedModelIds.find(modelId) != preparedModelIds.end()) return true; + // Skip file I/O + parsing for models already uploaded to GPU from previous tiles + { + std::lock_guard lock(uploadedM2IdsMutex_); + if (uploadedM2Ids_.count(modelId)) { + preparedModelIds.insert(modelId); + return true; + } + } + std::vector m2Data = assetManager->readFile(m2Path); if (m2Data.empty()) { skippedFileNotFound++; @@ -397,6 +412,20 @@ std::shared_ptr TerrainManager::prepareTile(int x, int y) { return false; } + // Pre-decode M2 model textures on background thread + for (const auto& tex : m2Model.textures) { + if (tex.filename.empty()) continue; + std::string texKey = tex.filename; + std::replace(texKey.begin(), texKey.end(), '/', '\\'); + std::transform(texKey.begin(), texKey.end(), texKey.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (pending->preloadedM2Textures.find(texKey) != pending->preloadedM2Textures.end()) continue; + auto blp = assetManager->loadTexture(texKey); + if (blp.isValid()) { + pending->preloadedM2Textures[texKey] = std::move(blp); + } + } + PendingTile::M2Ready ready; ready.modelId = modelId; ready.model = std::move(m2Model); @@ -551,19 +580,44 @@ std::shared_ptr TerrainManager::prepareTile(int x, int y) { } uint32_t doodadModelId = static_cast(std::hash{}(m2Path)); - std::vector m2Data = assetManager->readFile(m2Path); - if (m2Data.empty()) continue; - pipeline::M2Model m2Model = pipeline::M2Loader::load(m2Data); - if (m2Model.name.empty()) { - m2Model.name = m2Path; + // Skip file I/O if model already uploaded from a previous tile + bool modelAlreadyUploaded = false; + { + std::lock_guard lock(uploadedM2IdsMutex_); + modelAlreadyUploaded = uploadedM2Ids_.count(doodadModelId) > 0; } - std::string skinPath = m2Path.substr(0, m2Path.size() - 3) + "00.skin"; - std::vector skinData = assetManager->readFile(skinPath); - if (!skinData.empty() && m2Model.version >= 264) { - pipeline::M2Loader::loadSkin(skinData, m2Model); + + pipeline::M2Model m2Model; + if (!modelAlreadyUploaded) { + std::vector m2Data = assetManager->readFile(m2Path); + if (m2Data.empty()) continue; + + m2Model = pipeline::M2Loader::load(m2Data); + if (m2Model.name.empty()) { + m2Model.name = m2Path; + } + std::string skinPath = m2Path.substr(0, m2Path.size() - 3) + "00.skin"; + std::vector skinData = assetManager->readFile(skinPath); + if (!skinData.empty() && m2Model.version >= 264) { + pipeline::M2Loader::loadSkin(skinData, m2Model); + } + if (!m2Model.isValid()) continue; + + // Pre-decode doodad M2 textures on background thread + for (const auto& tex : m2Model.textures) { + if (tex.filename.empty()) continue; + std::string texKey = tex.filename; + std::replace(texKey.begin(), texKey.end(), '/', '\\'); + std::transform(texKey.begin(), texKey.end(), texKey.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (pending->preloadedM2Textures.find(texKey) != pending->preloadedM2Textures.end()) continue; + auto blp = assetManager->loadTexture(texKey); + if (blp.isValid()) { + pending->preloadedM2Textures[texKey] = std::move(blp); + } + } } - if (!m2Model.isValid()) continue; // Build doodad's local transform (WoW coordinates) // WMO doodads use quaternion rotation @@ -633,6 +687,32 @@ std::shared_ptr TerrainManager::prepareTile(int x, int y) { } } + // Pre-decode WMO textures on background thread + for (const auto& texPath : wmoModel.textures) { + if (texPath.empty()) continue; + std::string texKey = texPath; + // Truncate at NUL (WMO paths can have stray bytes) + size_t nul = texKey.find('\0'); + if (nul != std::string::npos) texKey.resize(nul); + std::replace(texKey.begin(), texKey.end(), '/', '\\'); + std::transform(texKey.begin(), texKey.end(), texKey.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (texKey.empty()) continue; + if (pending->preloadedWMOTextures.find(texKey) != pending->preloadedWMOTextures.end()) continue; + // Try .blp variant + std::string blpKey = texKey; + if (blpKey.size() >= 4) { + std::string ext = blpKey.substr(blpKey.size() - 4); + if (ext == ".tga" || ext == ".dds") { + blpKey = blpKey.substr(0, blpKey.size() - 4) + ".blp"; + } + } + auto blp = assetManager->loadTexture(blpKey); + if (blp.isValid()) { + pending->preloadedWMOTextures[blpKey] = std::move(blp); + } + } + PendingTile::WMOReady ready; // Cache WMO model uploads by path; placement dedup uses uniqueId separately. ready.modelId = static_cast(std::hash{}(wmoPath)); @@ -695,27 +775,39 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { return true; } - LOG_DEBUG("Finalizing tile [", x, ",", y, "] (incremental)"); - - // Upload pre-loaded textures - if (!pending->preloadedTextures.empty()) { - terrainRenderer->uploadPreloadedTextures(pending->preloadedTextures); - } - - // Upload terrain mesh to GPU - if (!terrainRenderer->loadTerrain(pending->mesh, pending->terrain.textures, x, y)) { - LOG_ERROR("Failed to upload terrain to GPU for tile [", x, ",", y, "]"); - failedTiles[coord] = true; - { - std::lock_guard lock(queueMutex); - pendingTiles.erase(coord); + // Upload pre-loaded textures (once) + if (!ft.terrainPreloaded) { + LOG_DEBUG("Finalizing tile [", x, ",", y, "] (incremental)"); + if (!pending->preloadedTextures.empty()) { + terrainRenderer->uploadPreloadedTextures(pending->preloadedTextures); } - ft.phase = FinalizationPhase::DONE; - return true; + ft.terrainPreloaded = true; + // Yield after preload to give time budget a chance to interrupt + return false; } - // Load water immediately after terrain (same frame) — water is now - // deduplicated to ~1-2 merged surfaces per tile, so this is fast. + // Upload terrain chunks incrementally (16 per call to spread across frames) + if (!ft.terrainMeshDone) { + if (pending->mesh.validChunkCount == 0) { + LOG_ERROR("Failed to upload terrain to GPU for tile [", x, ",", y, "]"); + failedTiles[coord] = true; + { + std::lock_guard lock(queueMutex); + pendingTiles.erase(coord); + } + ft.phase = FinalizationPhase::DONE; + return true; + } + bool allDone = terrainRenderer->loadTerrainIncremental( + pending->mesh, pending->terrain.textures, x, y, + ft.terrainChunkNext, 32); + if (!allDone) { + return false; // More chunks remain — yield to time budget + } + ft.terrainMeshDone = true; + } + + // Load water after all terrain chunks are uploaded if (waterRenderer) { size_t beforeSurfaces = waterRenderer->getSurfaceCount(); waterRenderer->loadFromTerrain(pending->terrain, true, x, y); @@ -738,13 +830,24 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { } case FinalizationPhase::M2_MODELS: { - // Upload ONE M2 model per call + // Upload multiple M2 models per call (batched GPU uploads) if (m2Renderer && ft.m2ModelIndex < pending->m2Models.size()) { - auto& m2Ready = pending->m2Models[ft.m2ModelIndex]; - if (m2Renderer->loadModel(m2Ready.model, m2Ready.modelId)) { - ft.uploadedM2ModelIds.insert(m2Ready.modelId); + // Set pre-decoded BLP cache so loadTexture() skips main-thread BLP decode + m2Renderer->setPredecodedBLPCache(&pending->preloadedM2Textures); + constexpr size_t kModelsPerStep = 4; + size_t uploaded = 0; + while (ft.m2ModelIndex < pending->m2Models.size() && uploaded < kModelsPerStep) { + auto& m2Ready = pending->m2Models[ft.m2ModelIndex]; + if (m2Renderer->loadModel(m2Ready.model, m2Ready.modelId)) { + ft.uploadedM2ModelIds.insert(m2Ready.modelId); + // Track uploaded model IDs so background threads can skip re-reading + std::lock_guard lock(uploadedM2IdsMutex_); + uploadedM2Ids_.insert(m2Ready.modelId); + } + ft.m2ModelIndex++; + uploaded++; } - ft.m2ModelIndex++; + m2Renderer->setPredecodedBLPCache(nullptr); // Stay in this phase until all models uploaded if (ft.m2ModelIndex < pending->m2Models.size()) { return false; @@ -786,22 +889,28 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { } case FinalizationPhase::WMO_MODELS: { - // Upload ONE WMO model per call + // Upload multiple WMO models per call (batched GPU uploads) if (wmoRenderer && assetManager) { wmoRenderer->initialize(nullptr, VK_NULL_HANDLE, assetManager); + // Set pre-decoded BLP cache and defer normal maps during streaming + wmoRenderer->setPredecodedBLPCache(&pending->preloadedWMOTextures); + wmoRenderer->setDeferNormalMaps(true); - if (ft.wmoModelIndex < pending->wmoModels.size()) { + constexpr size_t kWmosPerStep = 1; + size_t uploaded = 0; + while (ft.wmoModelIndex < pending->wmoModels.size() && uploaded < kWmosPerStep) { auto& wmoReady = pending->wmoModels[ft.wmoModelIndex]; - // Deduplicate if (wmoReady.uniqueId != 0 && placedWmoIds.count(wmoReady.uniqueId)) { ft.wmoModelIndex++; - if (ft.wmoModelIndex < pending->wmoModels.size()) return false; } else { wmoRenderer->loadModel(wmoReady.model, wmoReady.modelId); ft.wmoModelIndex++; - if (ft.wmoModelIndex < pending->wmoModels.size()) return false; + uploaded++; } } + wmoRenderer->setDeferNormalMaps(false); + wmoRenderer->setPredecodedBLPCache(nullptr); + if (ft.wmoModelIndex < pending->wmoModels.size()) return false; } ft.phase = FinalizationPhase::WMO_INSTANCES; return false; @@ -862,17 +971,28 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) { } case FinalizationPhase::WMO_DOODADS: { - // Upload ONE WMO doodad M2 per call + // Upload multiple WMO doodad M2s per call (batched GPU uploads) if (m2Renderer && ft.wmoDoodadIndex < pending->wmoDoodads.size()) { - auto& doodad = pending->wmoDoodads[ft.wmoDoodadIndex]; - m2Renderer->loadModel(doodad.model, doodad.modelId); - uint32_t wmoDoodadInstId = m2Renderer->createInstanceWithMatrix( - doodad.modelId, doodad.modelMatrix, doodad.worldPosition); - if (wmoDoodadInstId) { - m2Renderer->setSkipCollision(wmoDoodadInstId, true); - ft.m2InstanceIds.push_back(wmoDoodadInstId); + // Set pre-decoded BLP cache for doodad M2 textures + m2Renderer->setPredecodedBLPCache(&pending->preloadedM2Textures); + constexpr size_t kDoodadsPerStep = 4; + size_t uploaded = 0; + while (ft.wmoDoodadIndex < pending->wmoDoodads.size() && uploaded < kDoodadsPerStep) { + auto& doodad = pending->wmoDoodads[ft.wmoDoodadIndex]; + if (m2Renderer->loadModel(doodad.model, doodad.modelId)) { + std::lock_guard lock(uploadedM2IdsMutex_); + uploadedM2Ids_.insert(doodad.modelId); + } + uint32_t wmoDoodadInstId = m2Renderer->createInstanceWithMatrix( + doodad.modelId, doodad.modelMatrix, doodad.worldPosition); + if (wmoDoodadInstId) { + m2Renderer->setSkipCollision(wmoDoodadInstId, true); + ft.m2InstanceIds.push_back(wmoDoodadInstId); + } + ft.wmoDoodadIndex++; + uploaded++; } - ft.wmoDoodadIndex++; + m2Renderer->setPredecodedBLPCache(nullptr); if (ft.wmoDoodadIndex < pending->wmoDoodads.size()) return false; } ft.phase = FinalizationPhase::WATER; @@ -1030,11 +1150,6 @@ void TerrainManager::workerLoop() { } void TerrainManager::processReadyTiles() { - // Process tiles with time budget to avoid frame spikes - // Taxi mode gets a slightly larger budget to avoid visible late-pop terrain/models. - const float timeBudgetMs = taxiStreamingMode_ ? 8.0f : 5.0f; - auto startTime = std::chrono::high_resolution_clock::now(); - // Move newly ready tiles into the finalizing deque. // Keep them in pendingTiles so streamTiles() won't re-enqueue them. { @@ -1050,21 +1165,32 @@ void TerrainManager::processReadyTiles() { } } - // Drive incremental finalization within time budget - while (!finalizingTiles_.empty()) { + VkContext* vkCtx = terrainRenderer ? terrainRenderer->getVkContext() : nullptr; + + // Reclaim completed async uploads from previous frames (non-blocking) + if (vkCtx) vkCtx->pollUploadBatches(); + + // Nothing to finalize — done. + if (finalizingTiles_.empty()) return; + + // Async upload batch: record GPU copies into a command buffer, submit with + // a fence, but DON'T wait. The fence is polled on subsequent frames. + // This eliminates the main-thread stall from vkWaitForFences entirely. + const int maxSteps = taxiStreamingMode_ ? 8 : 2; + int steps = 0; + + if (vkCtx) vkCtx->beginUploadBatch(); + + while (!finalizingTiles_.empty() && steps < maxSteps) { auto& ft = finalizingTiles_.front(); bool done = advanceFinalization(ft); - if (done) { finalizingTiles_.pop_front(); } - - auto now = std::chrono::high_resolution_clock::now(); - float elapsedMs = std::chrono::duration(now - startTime).count(); - if (elapsedMs >= timeBudgetMs) { - break; - } + steps++; } + + if (vkCtx) vkCtx->endUploadBatch(); // Async — submits but doesn't wait } void TerrainManager::processAllReadyTiles() { @@ -1082,12 +1208,19 @@ void TerrainManager::processAllReadyTiles() { } } } + + // Batch all GPU uploads across all tiles into a single submission + VkContext* vkCtx = terrainRenderer ? terrainRenderer->getVkContext() : nullptr; + if (vkCtx) vkCtx->beginUploadBatch(); + // Finalize all tiles completely (no time budget — used for loading screens) while (!finalizingTiles_.empty()) { auto& ft = finalizingTiles_.front(); while (!advanceFinalization(ft)) {} finalizingTiles_.pop_front(); } + + if (vkCtx) vkCtx->endUploadBatchSync(); // Sync — load screen needs data ready } void TerrainManager::processOneReadyTile() { @@ -1106,9 +1239,14 @@ void TerrainManager::processOneReadyTile() { } // Finalize ONE tile completely, then return so caller can update the screen if (!finalizingTiles_.empty()) { + VkContext* vkCtx = terrainRenderer ? terrainRenderer->getVkContext() : nullptr; + if (vkCtx) vkCtx->beginUploadBatch(); + auto& ft = finalizingTiles_.front(); while (!advanceFinalization(ft)) {} finalizingTiles_.pop_front(); + + if (vkCtx) vkCtx->endUploadBatchSync(); // Sync — load screen needs data ready } } @@ -1328,6 +1466,10 @@ void TerrainManager::unloadAll() { finalizingTiles_.clear(); placedDoodadIds.clear(); placedWmoIds.clear(); + { + std::lock_guard lock(uploadedM2IdsMutex_); + uploadedM2Ids_.clear(); + } LOG_INFO("Unloading all terrain tiles"); loadedTiles.clear(); @@ -1376,6 +1518,10 @@ void TerrainManager::softReset() { finalizingTiles_.clear(); placedDoodadIds.clear(); placedWmoIds.clear(); + { + std::lock_guard lock(uploadedM2IdsMutex_); + uploadedM2Ids_.clear(); + } // Clear tile cache — keys are (x,y) without map name, so stale entries from // a different map with overlapping coordinates would produce wrong geometry. diff --git a/src/rendering/terrain_renderer.cpp b/src/rendering/terrain_renderer.cpp index 6e312233..fb20ce42 100644 --- a/src/rendering/terrain_renderer.cpp +++ b/src/rendering/terrain_renderer.cpp @@ -326,6 +326,8 @@ bool TerrainRenderer::loadTerrain(const pipeline::TerrainMesh& mesh, } LOG_DEBUG("Loading terrain mesh: ", mesh.validChunkCount, " chunks"); + vkCtx->beginUploadBatch(); + for (int y = 0; y < 16; y++) { for (int x = 0; x < 16; x++) { const auto& chunk = mesh.getChunk(x, y); @@ -405,10 +407,102 @@ bool TerrainRenderer::loadTerrain(const pipeline::TerrainMesh& mesh, } } + vkCtx->endUploadBatch(); + LOG_DEBUG("Loaded ", chunks.size(), " terrain chunks to GPU"); return !chunks.empty(); } +bool TerrainRenderer::loadTerrainIncremental(const pipeline::TerrainMesh& mesh, + const std::vector& texturePaths, + int tileX, int tileY, + int& chunkIndex, int maxChunksPerCall) { + // Batch all GPU uploads (VBs, IBs, textures) into a single command buffer + // submission with one fence wait, instead of one per buffer/texture. + vkCtx->beginUploadBatch(); + + int uploaded = 0; + while (chunkIndex < 256 && uploaded < maxChunksPerCall) { + int cy = chunkIndex / 16; + int cx = chunkIndex % 16; + chunkIndex++; + + const auto& chunk = mesh.getChunk(cx, cy); + if (!chunk.isValid()) continue; + + TerrainChunkGPU gpuChunk = uploadChunk(chunk); + if (!gpuChunk.isValid()) continue; + + calculateBoundingSphere(gpuChunk, chunk); + + if (!chunk.layers.empty()) { + uint32_t baseTexId = chunk.layers[0].textureId; + if (baseTexId < texturePaths.size()) { + gpuChunk.baseTexture = loadTexture(texturePaths[baseTexId]); + } else { + gpuChunk.baseTexture = whiteTexture.get(); + } + + for (size_t i = 1; i < chunk.layers.size() && i < 4; i++) { + const auto& layer = chunk.layers[i]; + int li = static_cast(i) - 1; + + VkTexture* layerTex = whiteTexture.get(); + if (layer.textureId < texturePaths.size()) { + layerTex = loadTexture(texturePaths[layer.textureId]); + } + gpuChunk.layerTextures[li] = layerTex; + + VkTexture* alphaTex = opaqueAlphaTexture.get(); + if (!layer.alphaData.empty()) { + alphaTex = createAlphaTexture(layer.alphaData); + } + gpuChunk.alphaTextures[li] = alphaTex; + gpuChunk.layerCount = static_cast(i); + } + } else { + gpuChunk.baseTexture = whiteTexture.get(); + } + + gpuChunk.tileX = tileX; + gpuChunk.tileY = tileY; + + TerrainParamsUBO params{}; + params.layerCount = gpuChunk.layerCount; + params.hasLayer1 = gpuChunk.layerCount >= 1 ? 1 : 0; + params.hasLayer2 = gpuChunk.layerCount >= 2 ? 1 : 0; + params.hasLayer3 = gpuChunk.layerCount >= 3 ? 1 : 0; + + VkBufferCreateInfo bufCI{}; + bufCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufCI.size = sizeof(TerrainParamsUBO); + bufCI.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + + VmaAllocationCreateInfo allocCI{}; + allocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + allocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + + VmaAllocationInfo mapInfo{}; + vmaCreateBuffer(vkCtx->getAllocator(), &bufCI, &allocCI, + &gpuChunk.paramsUBO, &gpuChunk.paramsAlloc, &mapInfo); + if (mapInfo.pMappedData) { + std::memcpy(mapInfo.pMappedData, ¶ms, sizeof(params)); + } + + gpuChunk.materialSet = allocateMaterialSet(); + if (gpuChunk.materialSet) { + writeMaterialDescriptors(gpuChunk.materialSet, gpuChunk); + } + + chunks.push_back(std::move(gpuChunk)); + uploaded++; + } + + vkCtx->endUploadBatch(); + + return chunkIndex >= 256; +} + TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) { TerrainChunkGPU gpuChunk; @@ -496,6 +590,9 @@ void TerrainRenderer::uploadPreloadedTextures( [](unsigned char c) { return static_cast(std::tolower(c)); }); return key; }; + // Batch all texture uploads into a single command buffer submission + vkCtx->beginUploadBatch(); + for (const auto& [path, blp] : textures) { std::string key = normalizeKey(path); if (textureCache.find(key) != textureCache.end()) continue; @@ -515,6 +612,8 @@ void TerrainRenderer::uploadPreloadedTextures( textureCacheBytes_ += e.approxBytes; textureCache[key] = std::move(e); } + + vkCtx->endUploadBatch(); } VkTexture* TerrainRenderer::createAlphaTexture(const std::vector& alphaData) { diff --git a/src/rendering/vk_context.cpp b/src/rendering/vk_context.cpp index e1a76cee..79e7eac3 100644 --- a/src/rendering/vk_context.cpp +++ b/src/rendering/vk_context.cpp @@ -67,6 +67,14 @@ void VkContext::shutdown() { frame = {}; } + // Clean up any in-flight async upload batches (device already idle) + for (auto& batch : inFlightBatches_) { + // Staging buffers: skip destroy — allocator is about to be torn down + vkDestroyFence(device, batch.fence, nullptr); + // Command buffer freed when pool is destroyed below + } + inFlightBatches_.clear(); + if (immFence) { vkDestroyFence(device, immFence, nullptr); immFence = VK_NULL_HANDLE; } if (immCommandPool) { vkDestroyCommandPool(device, immCommandPool, nullptr); immCommandPool = VK_NULL_HANDLE; } @@ -1423,10 +1431,121 @@ void VkContext::endSingleTimeCommands(VkCommandBuffer cmd) { } void VkContext::immediateSubmit(std::function&& function) { + if (inUploadBatch_) { + // Record into the batch command buffer — no submit, no fence wait + function(batchCmd_); + return; + } VkCommandBuffer cmd = beginSingleTimeCommands(); function(cmd); endSingleTimeCommands(cmd); } +void VkContext::beginUploadBatch() { + uploadBatchDepth_++; + if (inUploadBatch_) return; // already in a batch (nested call) + inUploadBatch_ = true; + batchCmd_ = beginSingleTimeCommands(); +} + +void VkContext::endUploadBatch() { + if (uploadBatchDepth_ <= 0) return; + uploadBatchDepth_--; + if (uploadBatchDepth_ > 0) return; // still inside an outer batch + + inUploadBatch_ = false; + + if (batchStagingBuffers_.empty()) { + // No GPU copies were recorded — skip the submit entirely. + vkEndCommandBuffer(batchCmd_); + vkFreeCommandBuffers(device, immCommandPool, 1, &batchCmd_); + batchCmd_ = VK_NULL_HANDLE; + return; + } + + // Submit commands with a NEW fence — don't wait, let GPU work in parallel. + vkEndCommandBuffer(batchCmd_); + + VkFenceCreateInfo fenceInfo{}; + fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + VkFence fence = VK_NULL_HANDLE; + vkCreateFence(device, &fenceInfo, nullptr, &fence); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &batchCmd_; + vkQueueSubmit(graphicsQueue, 1, &submitInfo, fence); + + // Stash everything for later cleanup when fence signals + InFlightBatch batch; + batch.fence = fence; + batch.cmd = batchCmd_; + batch.stagingBuffers = std::move(batchStagingBuffers_); + inFlightBatches_.push_back(std::move(batch)); + + batchCmd_ = VK_NULL_HANDLE; + batchStagingBuffers_.clear(); +} + +void VkContext::endUploadBatchSync() { + if (uploadBatchDepth_ <= 0) return; + uploadBatchDepth_--; + if (uploadBatchDepth_ > 0) return; + + inUploadBatch_ = false; + + if (batchStagingBuffers_.empty()) { + vkEndCommandBuffer(batchCmd_); + vkFreeCommandBuffers(device, immCommandPool, 1, &batchCmd_); + batchCmd_ = VK_NULL_HANDLE; + return; + } + + // Synchronous path for load screens — submit and wait + endSingleTimeCommands(batchCmd_); + batchCmd_ = VK_NULL_HANDLE; + + for (auto& staging : batchStagingBuffers_) { + destroyBuffer(allocator, staging); + } + batchStagingBuffers_.clear(); +} + +void VkContext::pollUploadBatches() { + if (inFlightBatches_.empty()) return; + + for (auto it = inFlightBatches_.begin(); it != inFlightBatches_.end(); ) { + VkResult result = vkGetFenceStatus(device, it->fence); + if (result == VK_SUCCESS) { + // GPU finished — free resources + for (auto& staging : it->stagingBuffers) { + destroyBuffer(allocator, staging); + } + vkFreeCommandBuffers(device, immCommandPool, 1, &it->cmd); + vkDestroyFence(device, it->fence, nullptr); + it = inFlightBatches_.erase(it); + } else { + ++it; + } + } +} + +void VkContext::waitAllUploads() { + for (auto& batch : inFlightBatches_) { + vkWaitForFences(device, 1, &batch.fence, VK_TRUE, UINT64_MAX); + for (auto& staging : batch.stagingBuffers) { + destroyBuffer(allocator, staging); + } + vkFreeCommandBuffers(device, immCommandPool, 1, &batch.cmd); + vkDestroyFence(device, batch.fence, nullptr); + } + inFlightBatches_.clear(); +} + +void VkContext::deferStagingCleanup(AllocatedBuffer staging) { + batchStagingBuffers_.push_back(staging); +} + } // namespace rendering } // namespace wowee diff --git a/src/rendering/vk_texture.cpp b/src/rendering/vk_texture.cpp index fba6d72b..415e3d56 100644 --- a/src/rendering/vk_texture.cpp +++ b/src/rendering/vk_texture.cpp @@ -96,7 +96,11 @@ bool VkTexture::upload(VkContext& ctx, const uint8_t* pixels, uint32_t width, ui generateMipmaps(ctx, format, width, height); } - destroyBuffer(ctx.getAllocator(), staging); + if (ctx.isInUploadBatch()) { + ctx.deferStagingCleanup(staging); + } else { + destroyBuffer(ctx.getAllocator(), staging); + } return true; } @@ -162,7 +166,11 @@ bool VkTexture::uploadMips(VkContext& ctx, const uint8_t* const* mipData, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); }); - destroyBuffer(ctx.getAllocator(), staging); + if (ctx.isInUploadBatch()) { + ctx.deferStagingCleanup(staging); + } else { + destroyBuffer(ctx.getAllocator(), staging); + } return true; } diff --git a/src/rendering/vk_utils.cpp b/src/rendering/vk_utils.cpp index d105c986..3a2f51d1 100644 --- a/src/rendering/vk_utils.cpp +++ b/src/rendering/vk_utils.cpp @@ -198,8 +198,12 @@ AllocatedBuffer uploadBuffer(VkContext& ctx, const void* data, VkDeviceSize size vkCmdCopyBuffer(cmd, staging.buffer, gpuBuffer.buffer, 1, ©Region); }); - // Destroy staging buffer - destroyBuffer(ctx.getAllocator(), staging); + // Destroy staging buffer (deferred if in batch mode) + if (ctx.isInUploadBatch()) { + ctx.deferStagingCleanup(staging); + } else { + destroyBuffer(ctx.getAllocator(), staging); + } return gpuBuffer; } diff --git a/src/rendering/wmo_renderer.cpp b/src/rendering/wmo_renderer.cpp index ff6b0035..5dec0e3e 100644 --- a/src/rendering/wmo_renderer.cpp +++ b/src/rendering/wmo_renderer.cpp @@ -419,6 +419,10 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) { core::Logger::getInstance().debug(" WMO bounds: min=(", model.boundingBoxMin.x, ", ", model.boundingBoxMin.y, ", ", model.boundingBoxMin.z, ") max=(", model.boundingBoxMax.x, ", ", model.boundingBoxMax.y, ", ", model.boundingBoxMax.z, ")"); + // Batch all GPU uploads (textures, VBs, IBs) into a single command buffer + // submission with one fence wait, instead of one per upload. + vkCtx_->beginUploadBatch(); + // Load textures for this model core::Logger::getInstance().debug(" WMO has ", model.textures.size(), " texture paths, ", model.materials.size(), " materials"); if (assetManager && !model.textures.empty()) { @@ -720,6 +724,8 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) { groupRes.allUntextured = !anyTextured && !groupRes.mergedBatches.empty(); } + vkCtx_->endUploadBatch(); + // Copy portal data for visibility culling modelData.portalVertices = model.portalVertices; for (const auto& portal : model.portals) { @@ -2319,13 +2325,27 @@ VkTexture* WMORenderer::loadTexture(const std::string& path) { const auto& attemptedCandidates = uniqueCandidates; // Try loading all candidates until one succeeds + // Check pre-decoded BLP cache first (populated by background worker threads) pipeline::BLPImage blp; std::string resolvedKey; - for (const auto& c : attemptedCandidates) { - blp = assetManager->loadTexture(c); - if (blp.isValid()) { - resolvedKey = c; - break; + if (predecodedBLPCache_) { + for (const auto& c : uniqueCandidates) { + auto pit = predecodedBLPCache_->find(c); + if (pit != predecodedBLPCache_->end()) { + blp = std::move(pit->second); + predecodedBLPCache_->erase(pit); + resolvedKey = c; + break; + } + } + } + if (!blp.isValid()) { + for (const auto& c : attemptedCandidates) { + blp = assetManager->loadTexture(c); + if (blp.isValid()) { + resolvedKey = c; + break; + } } } if (!blp.isValid()) { @@ -2363,10 +2383,10 @@ VkTexture* WMORenderer::loadTexture(const std::string& path) { texture->createSampler(vkCtx_->getDevice(), VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_REPEAT); - // Generate normal+height map from diffuse pixels + // Generate normal+height map from diffuse pixels (skip during streaming to avoid CPU stalls) float nhVariance = 0.0f; std::unique_ptr nhMap; - if (normalMappingEnabled_ || pomEnabled_) { + if ((normalMappingEnabled_ || pomEnabled_) && !deferNormalMaps_) { nhMap = generateNormalHeightMap(blp.data.data(), blp.width, blp.height, nhVariance); if (nhMap) { approxBytes *= 2; // account for normal map in budget