From a96ea0758ca7193e98b9b6ec157c03c71f7b3963 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Thu, 5 Feb 2026 17:20:30 -0800 Subject: [PATCH] Optimize WMO rendering and collision performance - Add texture-sorted batch merging with glMultiDrawElements to reduce draw calls - Pre-compute merged batches at load time instead of per-frame - Add persistent floor height cache with disk save/load (cache/wmo_floor_cache.bin) - Reduce collision focus radius and sweep steps for faster collision checks - Add floor cache size to performance HUD - Reduce WMO group distance culling to 80 units --- include/rendering/camera_controller.hpp | 4 +- include/rendering/wmo_renderer.hpp | 31 ++++ src/core/application.cpp | 29 ++++ src/rendering/camera_controller.cpp | 10 +- src/rendering/character_preview.cpp | 9 +- src/rendering/performance_hud.cpp | 7 +- src/rendering/wmo_renderer.cpp | 179 ++++++++++++++++++++---- 7 files changed, 229 insertions(+), 40 deletions(-) diff --git a/include/rendering/camera_controller.hpp b/include/rendering/camera_controller.hpp index ef8037aa..744804d4 100644 --- a/include/rendering/camera_controller.hpp +++ b/include/rendering/camera_controller.hpp @@ -106,8 +106,8 @@ private: static constexpr float PIVOT_HEIGHT = 1.8f; // Pivot at head height static constexpr float CAM_SPHERE_RADIUS = 0.32f; // Keep camera farther from geometry to avoid clipping-through surfaces static constexpr float CAM_EPSILON = 0.22f; // Extra wall offset to avoid near-plane clipping artifacts - static constexpr float COLLISION_FOCUS_RADIUS_THIRD_PERSON = 42.0f; - static constexpr float COLLISION_FOCUS_RADIUS_FREE_FLY = 34.0f; + static constexpr float COLLISION_FOCUS_RADIUS_THIRD_PERSON = 20.0f; // Reduced for performance + static constexpr float COLLISION_FOCUS_RADIUS_FREE_FLY = 20.0f; static constexpr float MIN_PITCH = -88.0f; // Look almost straight down static constexpr float MAX_PITCH = 35.0f; // Limited upward look glm::vec3* followTarget = nullptr; diff --git a/include/rendering/wmo_renderer.hpp b/include/rendering/wmo_renderer.hpp index b936729f..93be44d2 100644 --- a/include/rendering/wmo_renderer.hpp +++ b/include/rendering/wmo_renderer.hpp @@ -222,6 +222,11 @@ public: double getQueryTimeMs() const { return queryTimeMs; } uint32_t getQueryCallCount() const { return queryCallCount; } + // Floor cache persistence + bool saveFloorCache(const std::string& filepath) const; + bool loadFloorCache(const std::string& filepath); + size_t getFloorCacheSize() const { return precomputedFloorGrid.size(); } + private: /** * WMO group GPU resources @@ -243,6 +248,16 @@ private: }; std::vector batches; + // Pre-merged batches for efficient rendering (computed at load time) + struct MergedBatch { + GLuint texId; + bool hasTexture; + bool alphaTest; + std::vector counts; + std::vector offsets; + }; + std::vector mergedBatches; + // Collision geometry (positions only, for floor raycasting) std::vector collisionVertices; std::vector collisionIndices; @@ -472,6 +487,22 @@ private: // Collision query profiling (per frame). mutable double queryTimeMs = 0.0; mutable uint32_t queryCallCount = 0; + + // Floor height cache - persistent precomputed grid + static constexpr float FLOOR_GRID_CELL_SIZE = 2.0f; // 2 unit grid cells + mutable std::unordered_map precomputedFloorGrid; // key -> floor height + mutable bool floorGridDirty = true; // Rebuild when instances change + mutable uint32_t currentFrameId = 0; + + uint64_t floorGridKey(float x, float y) const { + int32_t ix = static_cast(std::floor(x / FLOOR_GRID_CELL_SIZE)); + int32_t iy = static_cast(std::floor(y / FLOOR_GRID_CELL_SIZE)); + return (static_cast(static_cast(ix)) << 32) | + static_cast(static_cast(iy)); + } + + // Compute floor height for a single cell (expensive, done at load time) + std::optional computeFloorHeightSlow(float x, float y, float refZ) const; }; } // namespace rendering diff --git a/src/core/application.cpp b/src/core/application.cpp index 96c86136..0e696cca 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -146,6 +146,11 @@ bool Application::initialize() { return false; } + // Load cached floor heights for faster collision + if (renderer->getWMORenderer()) { + renderer->getWMORenderer()->loadFloorCache("cache/wmo_floor_cache.bin"); + } + // Create UI manager uiManager = std::make_unique(); if (!uiManager->initialize(window.get())) { @@ -275,14 +280,33 @@ void Application::run() { // Update input Input::getInstance().update(); + // Timing breakdown + static int frameCount = 0; + static double totalUpdateMs = 0, totalRenderMs = 0, totalSwapMs = 0; + auto t1 = std::chrono::steady_clock::now(); + // Update application state update(deltaTime); + auto t2 = std::chrono::steady_clock::now(); // Render render(); + auto t3 = std::chrono::steady_clock::now(); // Swap buffers window->swapBuffers(); + auto t4 = std::chrono::steady_clock::now(); + + totalUpdateMs += std::chrono::duration(t2 - t1).count(); + totalRenderMs += std::chrono::duration(t3 - t2).count(); + totalSwapMs += std::chrono::duration(t4 - t3).count(); + + if (++frameCount >= 60) { + printf("[Frame] Update: %.1f ms, Render: %.1f ms, Swap: %.1f ms\n", + totalUpdateMs / 60.0, totalRenderMs / 60.0, totalSwapMs / 60.0); + frameCount = 0; + totalUpdateMs = totalRenderMs = totalSwapMs = 0; + } } LOG_INFO("Main loop ended"); @@ -291,6 +315,11 @@ void Application::run() { void Application::shutdown() { LOG_INFO("Shutting down application"); + // Save floor cache before renderer is destroyed + if (renderer && renderer->getWMORenderer()) { + renderer->getWMORenderer()->saveFloorCache("cache/wmo_floor_cache.bin"); + } + // Stop renderer first: terrain streaming workers may still be reading via // AssetManager during shutdown, so renderer/terrain teardown must complete // before AssetManager is destroyed. diff --git a/src/rendering/camera_controller.cpp b/src/rendering/camera_controller.cpp index f10e090c..777a6377 100644 --- a/src/rendering/camera_controller.cpp +++ b/src/rendering/camera_controller.cpp @@ -291,7 +291,7 @@ void CameraController::update(float deltaTime) { glm::vec3 swimFrom = *followTarget; glm::vec3 swimTo = targetPos; float swimMoveDist = glm::length(swimTo - swimFrom); - int swimSteps = std::max(1, std::min(12, static_cast(std::ceil(swimMoveDist / 0.22f)))); + int swimSteps = std::max(1, std::min(4, static_cast(std::ceil(swimMoveDist / 0.5f)))); glm::vec3 stepPos = swimFrom; glm::vec3 stepDelta = (swimTo - swimFrom) / static_cast(swimSteps); @@ -357,9 +357,9 @@ void CameraController::update(float deltaTime) { glm::vec3 desiredPos = targetPos; float moveDist = glm::length(desiredPos - startPos); // Adaptive CCD: keep per-step movement short, especially on low FPS spikes. - int sweepSteps = std::max(1, std::min(14, static_cast(std::ceil(moveDist / 0.24f)))); + int sweepSteps = std::max(1, std::min(4, static_cast(std::ceil(moveDist / 0.5f)))); if (deltaTime > 0.04f) { - sweepSteps = std::min(16, std::max(sweepSteps, static_cast(std::ceil(deltaTime / 0.016f)) * 2)); + sweepSteps = std::min(6, std::max(sweepSteps, static_cast(std::ceil(deltaTime / 0.016f)))); } glm::vec3 stepPos = startPos; glm::vec3 stepDelta = (desiredPos - startPos) / static_cast(sweepSteps); @@ -779,9 +779,9 @@ void CameraController::update(float deltaTime) { glm::vec3 startFeet = camera->getPosition() - glm::vec3(0, 0, eyeHeight); glm::vec3 desiredFeet = newPos - glm::vec3(0, 0, eyeHeight); float moveDist = glm::length(desiredFeet - startFeet); - int sweepSteps = std::max(1, std::min(14, static_cast(std::ceil(moveDist / 0.24f)))); + int sweepSteps = std::max(1, std::min(4, static_cast(std::ceil(moveDist / 0.5f)))); if (deltaTime > 0.04f) { - sweepSteps = std::min(16, std::max(sweepSteps, static_cast(std::ceil(deltaTime / 0.016f)) * 2)); + sweepSteps = std::min(6, std::max(sweepSteps, static_cast(std::ceil(deltaTime / 0.016f)))); } glm::vec3 stepPos = startFeet; glm::vec3 stepDelta = (desiredFeet - startFeet) / static_cast(sweepSteps); diff --git a/src/rendering/character_preview.cpp b/src/rendering/character_preview.cpp index 96ac1a23..c4f60afa 100644 --- a/src/rendering/character_preview.cpp +++ b/src/rendering/character_preview.cpp @@ -241,7 +241,10 @@ bool CharacterPreview::loadCharacter(game::Race race, game::Gender gender, } } - charRenderer_->loadModel(model, PREVIEW_MODEL_ID); + if (!charRenderer_->loadModel(model, PREVIEW_MODEL_ID)) { + LOG_WARNING("CharacterPreview: failed to load model to GPU"); + return false; + } // Composite body skin + face + underwear overlays if (!bodySkinPath.empty()) { @@ -331,7 +334,9 @@ void CharacterPreview::update(float deltaTime) { } void CharacterPreview::render() { - if (!fbo_ || !charRenderer_ || !camera_ || !modelLoaded_) return; + if (!fbo_ || !charRenderer_ || !camera_ || !modelLoaded_) { + return; + } // Save current viewport GLint prevViewport[4]; diff --git a/src/rendering/performance_hud.cpp b/src/rendering/performance_hud.cpp index 8f2bc106..e79ed9f2 100644 --- a/src/rendering/performance_hud.cpp +++ b/src/rendering/performance_hud.cpp @@ -370,11 +370,10 @@ void PerformanceHUD::render(const Renderer* renderer, const Camera* camera) { ImGui::Text("Instances: %u", wmoRenderer->getInstanceCount()); ImGui::Text("Triangles: %u", wmoRenderer->getTotalTriangleCount()); ImGui::Text("Draw Calls: %u", wmoRenderer->getDrawCallCount()); + ImGui::Text("Floor Cache: %zu", wmoRenderer->getFloorCacheSize()); + ImGui::Text("Dist Culled: %u groups", wmoRenderer->getDistanceCulledGroups()); if (wmoRenderer->isOcclusionCullingEnabled()) { - ImGui::Text("Occlusion Culled: %u groups", wmoRenderer->getOcclusionCulledGroups()); - } - if (wmoRenderer->isDistanceCullingEnabled()) { - ImGui::Text("Distance Culled: %u groups", wmoRenderer->getDistanceCulledGroups()); + ImGui::Text("Occl Culled: %u groups", wmoRenderer->getOcclusionCulledGroups()); } if (wmoRenderer->isPortalCullingEnabled()) { ImGui::Text("Portal Culled: %u groups", wmoRenderer->getPortalCulledGroups()); diff --git a/src/rendering/wmo_renderer.cpp b/src/rendering/wmo_renderer.cpp index 2b1f2603..0e59109b 100644 --- a/src/rendering/wmo_renderer.cpp +++ b/src/rendering/wmo_renderer.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -309,6 +311,44 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) { return false; } + // Build pre-merged batches for each group (texture-sorted for efficient rendering) + for (auto& groupRes : modelData.groups) { + std::unordered_map batchMap; + + for (const auto& batch : groupRes.batches) { + GLuint texId = whiteTexture; + bool hasTexture = false; + + if (batch.materialId < modelData.materialTextureIndices.size()) { + uint32_t texIndex = modelData.materialTextureIndices[batch.materialId]; + if (texIndex < modelData.textures.size()) { + texId = modelData.textures[texIndex]; + hasTexture = (texId != 0 && texId != whiteTexture); + } + } + + bool alphaTest = false; + if (batch.materialId < modelData.materialBlendModes.size()) { + alphaTest = (modelData.materialBlendModes[batch.materialId] == 1); + } + + uint64_t key = (static_cast(texId) << 1) | (alphaTest ? 1 : 0); + auto& mb = batchMap[key]; + if (mb.counts.empty()) { + mb.texId = texId; + mb.hasTexture = hasTexture; + mb.alphaTest = alphaTest; + } + mb.counts.push_back(static_cast(batch.indexCount)); + mb.offsets.push_back(reinterpret_cast(batch.startIndex * sizeof(uint16_t))); + } + + groupRes.mergedBatches.reserve(batchMap.size()); + for (auto& [key, mb] : batchMap) { + groupRes.mergedBatches.push_back(std::move(mb)); + } + } + // Copy portal data for visibility culling modelData.portalVertices = model.portalVertices; for (const auto& portal : model.portals) { @@ -452,6 +492,7 @@ void WMORenderer::clearInstances() { instances.clear(); spatialGrid.clear(); instanceIndexById.clear(); + precomputedFloorGrid.clear(); // Invalidate floor cache when instances change core::Logger::getInstance().info("Cleared all WMO instances"); } @@ -469,6 +510,76 @@ void WMORenderer::clearCollisionFocus() { void WMORenderer::resetQueryStats() { queryTimeMs = 0.0; queryCallCount = 0; + currentFrameId++; + // Note: precomputedFloorGrid is persistent and not cleared per-frame +} + +bool WMORenderer::saveFloorCache(const std::string& filepath) const { + // Create directory if needed + std::filesystem::path path(filepath); + if (path.has_parent_path()) { + std::filesystem::create_directories(path.parent_path()); + } + + std::ofstream file(filepath, std::ios::binary); + if (!file) { + core::Logger::getInstance().error("Failed to open floor cache file for writing: ", filepath); + return false; + } + + // Write header: magic + version + count + const uint32_t magic = 0x574D4F46; // "WMOF" + const uint32_t version = 1; + const uint64_t count = precomputedFloorGrid.size(); + + file.write(reinterpret_cast(&magic), sizeof(magic)); + file.write(reinterpret_cast(&version), sizeof(version)); + file.write(reinterpret_cast(&count), sizeof(count)); + + // Write each entry: key (uint64) + height (float) + for (const auto& [key, height] : precomputedFloorGrid) { + file.write(reinterpret_cast(&key), sizeof(key)); + file.write(reinterpret_cast(&height), sizeof(height)); + } + + core::Logger::getInstance().info("Saved WMO floor cache: ", count, " entries to ", filepath); + return true; +} + +bool WMORenderer::loadFloorCache(const std::string& filepath) { + std::ifstream file(filepath, std::ios::binary); + if (!file) { + core::Logger::getInstance().info("No existing floor cache file: ", filepath); + return false; + } + + // Read and validate header + uint32_t magic = 0, version = 0; + uint64_t count = 0; + + file.read(reinterpret_cast(&magic), sizeof(magic)); + file.read(reinterpret_cast(&version), sizeof(version)); + file.read(reinterpret_cast(&count), sizeof(count)); + + if (magic != 0x574D4F46 || version != 1) { + core::Logger::getInstance().warning("Invalid floor cache file format: ", filepath); + return false; + } + + // Read entries + precomputedFloorGrid.clear(); + precomputedFloorGrid.reserve(count); + + for (uint64_t i = 0; i < count; i++) { + uint64_t key; + float height; + file.read(reinterpret_cast(&key), sizeof(key)); + file.read(reinterpret_cast(&height), sizeof(height)); + precomputedFloorGrid[key] = height; + } + + core::Logger::getInstance().info("Loaded WMO floor cache: ", precomputedFloorGrid.size(), " entries from ", filepath); + return true; } WMORenderer::GridCell WMORenderer::toCell(const glm::vec3& p) const { @@ -562,6 +673,10 @@ void WMORenderer::render(const Camera& camera, const glm::mat4& view, const glm: shader->setUniform("uShadowMap", 7); } + // Set up texture unit 0 for diffuse textures (set once per frame) + glActiveTexture(GL_TEXTURE0); + shader->setUniform("uTexture", 0); + // Enable wireframe if requested if (wireframeMode) { glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); @@ -657,7 +772,7 @@ void WMORenderer::render(const Camera& camera, const glm::mat4& view, const glm: // Hard distance cutoff - skip groups entirely if closest point is too far glm::vec3 closestPoint = glm::clamp(camPos, gMin, gMax); float distSq = glm::dot(closestPoint - camPos, closestPoint - camPos); - if (distSq > 40000.0f) { // Beyond 200 units - hard skip + if (distSq > 6400.0f) { // Beyond 80 units - hard skip lastDistanceCulledGroups++; continue; } @@ -822,40 +937,34 @@ bool WMORenderer::createGroupResources(const pipeline::WMOGroup& group, GroupRes return true; } -void WMORenderer::renderGroup(const GroupResources& group, const ModelData& model, +void WMORenderer::renderGroup(const GroupResources& group, [[maybe_unused]] const ModelData& model, [[maybe_unused]] const glm::mat4& modelMatrix, [[maybe_unused]] const glm::mat4& view, [[maybe_unused]] const glm::mat4& projection) { glBindVertexArray(group.vao); - // Render each batch in original order (sorting breaks depth/alpha) - for (const auto& batch : group.batches) { - // Bind texture for this batch's material - GLuint texId = whiteTexture; - bool hasTexture = false; + // Use pre-computed merged batches (built at load time) + // Track bound state to avoid redundant GL calls + static GLuint lastBoundTex = 0; + static bool lastHasTexture = false; + static bool lastAlphaTest = false; - if (batch.materialId < model.materialTextureIndices.size()) { - uint32_t texIndex = model.materialTextureIndices[batch.materialId]; - if (texIndex < model.textures.size()) { - texId = model.textures[texIndex]; - hasTexture = (texId != 0 && texId != whiteTexture); - } + for (const auto& mb : group.mergedBatches) { + if (mb.texId != lastBoundTex) { + glBindTexture(GL_TEXTURE_2D, mb.texId); + lastBoundTex = mb.texId; + } + if (mb.hasTexture != lastHasTexture) { + shader->setUniform("uHasTexture", mb.hasTexture); + lastHasTexture = mb.hasTexture; + } + if (mb.alphaTest != lastAlphaTest) { + shader->setUniform("uAlphaTest", mb.alphaTest); + lastAlphaTest = mb.alphaTest; } - // Determine if this material uses alpha-test cutout (blendMode 1) - bool alphaTest = false; - if (batch.materialId < model.materialBlendModes.size()) { - alphaTest = (model.materialBlendModes[batch.materialId] == 1); - } - - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, texId); - shader->setUniform("uTexture", 0); - shader->setUniform("uHasTexture", hasTexture); - shader->setUniform("uAlphaTest", alphaTest); - - glDrawElements(GL_TRIANGLES, batch.indexCount, GL_UNSIGNED_SHORT, - (void*)(batch.startIndex * sizeof(uint16_t))); + glMultiDrawElements(GL_TRIANGLES, mb.counts.data(), GL_UNSIGNED_SHORT, + mb.offsets.data(), static_cast(mb.counts.size())); lastDrawCalls++; } @@ -1209,6 +1318,17 @@ static glm::vec3 closestPointOnTriangle(const glm::vec3& p, const glm::vec3& a, } std::optional WMORenderer::getFloorHeight(float glX, float glY, float glZ) const { + // Check persistent grid cache first (computed lazily, never expires) + uint64_t gridKey = floorGridKey(glX, glY); + auto gridIt = precomputedFloorGrid.find(gridKey); + if (gridIt != precomputedFloorGrid.end()) { + float cachedHeight = gridIt->second; + // Only use if cached height is below query point (not a ceiling) + if (cachedHeight <= glZ + 2.0f) { + return cachedHeight; + } + } + QueryTimer timer(&queryTimeMs, &queryCallCount); std::optional bestFloor; @@ -1304,6 +1424,11 @@ std::optional WMORenderer::getFloorHeight(float glX, float glY, float glZ } } + // Cache the result in persistent grid (never expires) + if (bestFloor) { + precomputedFloorGrid[gridKey] = *bestFloor; + } + return bestFloor; }