Optimize WMO rendering and collision performance

- Add texture-sorted batch merging with glMultiDrawElements to reduce draw calls - Pre-compute merged batches at load time instead of per-frame - Add persistent floor height cache with disk save/load (cache/wmo_floor_cache.bin) - Reduce collision focus radius and sweep steps for faster collision checks - Add floor cache size to performance HUD - Reduce WMO group distance culling to 80 units
2026-05-08 10:03:51 +00:00 · 2026-02-05 17:20:30 -08:00 · 2026-02-05 17:20:30 -08:00 · a96ea0758c
commit a96ea0758c
parent 44b03a06a0
7 changed files with 229 additions and 40 deletions
--- a/src/rendering/wmo_renderer.cpp
+++ b/src/rendering/wmo_renderer.cpp
@ -12,6 +12,8 @@
 #include <algorithm>
 #include <chrono>
 #include <cmath>
+#include <filesystem>
+#include <fstream>
 #include <limits>
 #include <unordered_set>

@ -309,6 +311,44 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) {
        return false;
    }

+    // Build pre-merged batches for each group (texture-sorted for efficient rendering)
+    for (auto& groupRes : modelData.groups) {
+        std::unordered_map<uint64_t, GroupResources::MergedBatch> batchMap;
+
+        for (const auto& batch : groupRes.batches) {
+            GLuint texId = whiteTexture;
+            bool hasTexture = false;
+
+            if (batch.materialId < modelData.materialTextureIndices.size()) {
+                uint32_t texIndex = modelData.materialTextureIndices[batch.materialId];
+                if (texIndex < modelData.textures.size()) {
+                    texId = modelData.textures[texIndex];
+                    hasTexture = (texId != 0 && texId != whiteTexture);
+                }
+            }
+
+            bool alphaTest = false;
+            if (batch.materialId < modelData.materialBlendModes.size()) {
+                alphaTest = (modelData.materialBlendModes[batch.materialId] == 1);
+            }
+
+            uint64_t key = (static_cast<uint64_t>(texId) << 1) | (alphaTest ? 1 : 0);
+            auto& mb = batchMap[key];
+            if (mb.counts.empty()) {
+                mb.texId = texId;
+                mb.hasTexture = hasTexture;
+                mb.alphaTest = alphaTest;
+            }
+            mb.counts.push_back(static_cast<GLsizei>(batch.indexCount));
+            mb.offsets.push_back(reinterpret_cast<const void*>(batch.startIndex * sizeof(uint16_t)));
+        }
+
+        groupRes.mergedBatches.reserve(batchMap.size());
+        for (auto& [key, mb] : batchMap) {
+            groupRes.mergedBatches.push_back(std::move(mb));
+        }
+    }
+
    // Copy portal data for visibility culling
    modelData.portalVertices = model.portalVertices;
    for (const auto& portal : model.portals) {
@ -452,6 +492,7 @@ void WMORenderer::clearInstances() {
    instances.clear();
    spatialGrid.clear();
    instanceIndexById.clear();
+    precomputedFloorGrid.clear();  // Invalidate floor cache when instances change
    core::Logger::getInstance().info("Cleared all WMO instances");
 }

@ -469,6 +510,76 @@ void WMORenderer::clearCollisionFocus() {
 void WMORenderer::resetQueryStats() {
    queryTimeMs = 0.0;
    queryCallCount = 0;
+    currentFrameId++;
+    // Note: precomputedFloorGrid is persistent and not cleared per-frame
+}
+
+bool WMORenderer::saveFloorCache(const std::string& filepath) const {
+    // Create directory if needed
+    std::filesystem::path path(filepath);
+    if (path.has_parent_path()) {
+        std::filesystem::create_directories(path.parent_path());
+    }
+
+    std::ofstream file(filepath, std::ios::binary);
+    if (!file) {
+        core::Logger::getInstance().error("Failed to open floor cache file for writing: ", filepath);
+        return false;
+    }
+
+    // Write header: magic + version + count
+    const uint32_t magic = 0x574D4F46;  // "WMOF"
+    const uint32_t version = 1;
+    const uint64_t count = precomputedFloorGrid.size();
+
+    file.write(reinterpret_cast<const char*>(&magic), sizeof(magic));
+    file.write(reinterpret_cast<const char*>(&version), sizeof(version));
+    file.write(reinterpret_cast<const char*>(&count), sizeof(count));
+
+    // Write each entry: key (uint64) + height (float)
+    for (const auto& [key, height] : precomputedFloorGrid) {
+        file.write(reinterpret_cast<const char*>(&key), sizeof(key));
+        file.write(reinterpret_cast<const char*>(&height), sizeof(height));
+    }
+
+    core::Logger::getInstance().info("Saved WMO floor cache: ", count, " entries to ", filepath);
+    return true;
+}
+
+bool WMORenderer::loadFloorCache(const std::string& filepath) {
+    std::ifstream file(filepath, std::ios::binary);
+    if (!file) {
+        core::Logger::getInstance().info("No existing floor cache file: ", filepath);
+        return false;
+    }
+
+    // Read and validate header
+    uint32_t magic = 0, version = 0;
+    uint64_t count = 0;
+
+    file.read(reinterpret_cast<char*>(&magic), sizeof(magic));
+    file.read(reinterpret_cast<char*>(&version), sizeof(version));
+    file.read(reinterpret_cast<char*>(&count), sizeof(count));
+
+    if (magic != 0x574D4F46 || version != 1) {
+        core::Logger::getInstance().warning("Invalid floor cache file format: ", filepath);
+        return false;
+    }
+
+    // Read entries
+    precomputedFloorGrid.clear();
+    precomputedFloorGrid.reserve(count);
+
+    for (uint64_t i = 0; i < count; i++) {
+        uint64_t key;
+        float height;
+        file.read(reinterpret_cast<char*>(&key), sizeof(key));
+        file.read(reinterpret_cast<char*>(&height), sizeof(height));
+        precomputedFloorGrid[key] = height;
+    }
+
+    core::Logger::getInstance().info("Loaded WMO floor cache: ", precomputedFloorGrid.size(), " entries from ", filepath);
+    return true;
 }

 WMORenderer::GridCell WMORenderer::toCell(const glm::vec3& p) const {
@ -562,6 +673,10 @@ void WMORenderer::render(const Camera& camera, const glm::mat4& view, const glm:
        shader->setUniform("uShadowMap", 7);
    }

+    // Set up texture unit 0 for diffuse textures (set once per frame)
+    glActiveTexture(GL_TEXTURE0);
+    shader->setUniform("uTexture", 0);
+
    // Enable wireframe if requested
    if (wireframeMode) {
        glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
@ -657,7 +772,7 @@ void WMORenderer::render(const Camera& camera, const glm::mat4& view, const glm:
                // Hard distance cutoff - skip groups entirely if closest point is too far
                glm::vec3 closestPoint = glm::clamp(camPos, gMin, gMax);
                float distSq = glm::dot(closestPoint - camPos, closestPoint - camPos);
-                if (distSq > 40000.0f) {  // Beyond 200 units - hard skip
+                if (distSq > 6400.0f) {  // Beyond 80 units - hard skip
                    lastDistanceCulledGroups++;
                    continue;
                }
@ -822,40 +937,34 @@ bool WMORenderer::createGroupResources(const pipeline::WMOGroup& group, GroupRes
    return true;
 }

-void WMORenderer::renderGroup(const GroupResources& group, const ModelData& model,
+void WMORenderer::renderGroup(const GroupResources& group, [[maybe_unused]] const ModelData& model,
                              [[maybe_unused]] const glm::mat4& modelMatrix,
                              [[maybe_unused]] const glm::mat4& view,
                              [[maybe_unused]] const glm::mat4& projection) {
    glBindVertexArray(group.vao);

-    // Render each batch in original order (sorting breaks depth/alpha)
-    for (const auto& batch : group.batches) {
-        // Bind texture for this batch's material
-        GLuint texId = whiteTexture;
-        bool hasTexture = false;
+    // Use pre-computed merged batches (built at load time)
+    // Track bound state to avoid redundant GL calls
+    static GLuint lastBoundTex = 0;
+    static bool lastHasTexture = false;
+    static bool lastAlphaTest = false;

-        if (batch.materialId < model.materialTextureIndices.size()) {
-            uint32_t texIndex = model.materialTextureIndices[batch.materialId];
-            if (texIndex < model.textures.size()) {
-                texId = model.textures[texIndex];
-                hasTexture = (texId != 0 && texId != whiteTexture);
-            }
+    for (const auto& mb : group.mergedBatches) {
+        if (mb.texId != lastBoundTex) {
+            glBindTexture(GL_TEXTURE_2D, mb.texId);
+            lastBoundTex = mb.texId;
+        }
+        if (mb.hasTexture != lastHasTexture) {
+            shader->setUniform("uHasTexture", mb.hasTexture);
+            lastHasTexture = mb.hasTexture;
+        }
+        if (mb.alphaTest != lastAlphaTest) {
+            shader->setUniform("uAlphaTest", mb.alphaTest);
+            lastAlphaTest = mb.alphaTest;
        }

-        // Determine if this material uses alpha-test cutout (blendMode 1)
-        bool alphaTest = false;
-        if (batch.materialId < model.materialBlendModes.size()) {
-            alphaTest = (model.materialBlendModes[batch.materialId] == 1);
-        }
-
-        glActiveTexture(GL_TEXTURE0);
-        glBindTexture(GL_TEXTURE_2D, texId);
-        shader->setUniform("uTexture", 0);
-        shader->setUniform("uHasTexture", hasTexture);
-        shader->setUniform("uAlphaTest", alphaTest);
-
-        glDrawElements(GL_TRIANGLES, batch.indexCount, GL_UNSIGNED_SHORT,
-                      (void*)(batch.startIndex * sizeof(uint16_t)));
+        glMultiDrawElements(GL_TRIANGLES, mb.counts.data(), GL_UNSIGNED_SHORT,
+                            mb.offsets.data(), static_cast<GLsizei>(mb.counts.size()));
        lastDrawCalls++;
    }

@ -1209,6 +1318,17 @@ static glm::vec3 closestPointOnTriangle(const glm::vec3& p, const glm::vec3& a,
 }

 std::optional<float> WMORenderer::getFloorHeight(float glX, float glY, float glZ) const {
+    // Check persistent grid cache first (computed lazily, never expires)
+    uint64_t gridKey = floorGridKey(glX, glY);
+    auto gridIt = precomputedFloorGrid.find(gridKey);
+    if (gridIt != precomputedFloorGrid.end()) {
+        float cachedHeight = gridIt->second;
+        // Only use if cached height is below query point (not a ceiling)
+        if (cachedHeight <= glZ + 2.0f) {
+            return cachedHeight;
+        }
+    }
+
    QueryTimer timer(&queryTimeMs, &queryCallCount);
    std::optional<float> bestFloor;

@ -1304,6 +1424,11 @@ std::optional<float> WMORenderer::getFloorHeight(float glX, float glY, float glZ
        }
    }

+    // Cache the result in persistent grid (never expires)
+    if (bestFloor) {
+        precomputedFloorGrid[gridKey] = *bestFloor;
+    }
+
    return bestFloor;
 }