From 16b43367003334747a44f1089a619b1e1d1cb7a3 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 7 Mar 2026 12:19:59 -0800 Subject: [PATCH] Batch GPU uploads to eliminate per-upload fence waits (stutter fix) Every uploadBuffer/VkTexture::upload called immediateSubmit which did a separate vkQueueSubmit + vkWaitForFences. Loading a single creature model with textures caused 4-8+ fence waits; terrain chunks caused 80+ per batch. Added beginUploadBatch/endUploadBatch to VkContext: records all upload commands into a single command buffer, submits once with one fence wait. Staging buffers are deferred for cleanup after the batch completes. Wrapped in batch mode: - CharacterRenderer::loadModel (creature VB/IB + textures) - M2Renderer::loadModel (doodad VB/IB + textures) - TerrainRenderer::loadTerrain/loadTerrainIncremental (chunk geometry + textures) - TerrainRenderer::uploadPreloadedTextures - WMORenderer::loadModel (group geometry + textures) --- include/rendering/vk_context.hpp | 14 ++++++++++++ src/rendering/character_renderer.cpp | 6 +++++ src/rendering/m2_renderer.cpp | 6 +++++ src/rendering/terrain_renderer.cpp | 15 ++++++++++++ src/rendering/vk_context.cpp | 34 ++++++++++++++++++++++++++++ src/rendering/vk_texture.cpp | 12 ++++++++-- src/rendering/vk_utils.cpp | 8 +++++-- src/rendering/wmo_renderer.cpp | 6 +++++ 8 files changed, 97 insertions(+), 4 deletions(-) diff --git a/include/rendering/vk_context.hpp b/include/rendering/vk_context.hpp index 3a242940..dab96d2a 100644 --- a/include/rendering/vk_context.hpp +++ b/include/rendering/vk_context.hpp @@ -1,5 +1,6 @@ #pragma once +#include "rendering/vk_utils.hpp" #include #include #include @@ -46,6 +47,13 @@ public: // Immediate submit for one-off GPU work (descriptor pool creation, etc.) void immediateSubmit(std::function&& function); + // Batch upload mode: records multiple upload commands into a single + // command buffer, then submits with ONE fence wait instead of one per upload. + void beginUploadBatch(); + void endUploadBatch(); + bool isInUploadBatch() const { return inUploadBatch_; } + void deferStagingCleanup(AllocatedBuffer staging); + // Accessors VkInstance getInstance() const { return instance; } VkPhysicalDevice getPhysicalDevice() const { return physicalDevice; } @@ -143,6 +151,12 @@ private: VkCommandPool immCommandPool = VK_NULL_HANDLE; VkFence immFence = VK_NULL_HANDLE; + // Batch upload state (nesting-safe via depth counter) + int uploadBatchDepth_ = 0; + bool inUploadBatch_ = false; + VkCommandBuffer batchCmd_ = VK_NULL_HANDLE; + std::vector batchStagingBuffers_; + // Depth buffer (shared across all framebuffers) VkImage depthImage = VK_NULL_HANDLE; VkImageView depthImageView = VK_NULL_HANDLE; diff --git a/src/rendering/character_renderer.cpp b/src/rendering/character_renderer.cpp index 2126e5e5..9aa99c72 100644 --- a/src/rendering/character_renderer.cpp +++ b/src/rendering/character_renderer.cpp @@ -1247,6 +1247,10 @@ bool CharacterRenderer::loadModel(const pipeline::M2Model& model, uint32_t id) { M2ModelGPU gpuModel; gpuModel.data = model; + // Batch all GPU uploads (VB, IB, textures) into a single command buffer + // submission with one fence wait, instead of one fence wait per upload. + vkCtx_->beginUploadBatch(); + // Setup GPU buffers setupModelBuffers(gpuModel); @@ -1259,6 +1263,8 @@ bool CharacterRenderer::loadModel(const pipeline::M2Model& model, uint32_t id) { gpuModel.textureIds.push_back(texPtr); } + vkCtx_->endUploadBatch(); + models[id] = std::move(gpuModel); core::Logger::getInstance().debug("Loaded M2 model ", id, " (", model.vertices.size(), diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp index d6df9dfe..c4e7a727 100644 --- a/src/rendering/m2_renderer.cpp +++ b/src/rendering/m2_renderer.cpp @@ -1185,6 +1185,10 @@ bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) { } } + // Batch all GPU uploads (VB, IB, textures) into a single command buffer + // submission with one fence wait, instead of one fence wait per upload. + vkCtx_->beginUploadBatch(); + if (hasGeometry) { // Create VBO with interleaved vertex data // Format: position (3), normal (3), texcoord0 (2), texcoord1 (2), boneWeights (4), boneIndices (4 as float) @@ -1536,6 +1540,8 @@ bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) { } } + vkCtx_->endUploadBatch(); + // Allocate Vulkan descriptor sets and UBOs for each batch for (auto& bgpu : gpuModel.batches) { // Create combined UBO for M2Params (binding 1) + M2Material (binding 2) diff --git a/src/rendering/terrain_renderer.cpp b/src/rendering/terrain_renderer.cpp index 227178d5..fb20ce42 100644 --- a/src/rendering/terrain_renderer.cpp +++ b/src/rendering/terrain_renderer.cpp @@ -326,6 +326,8 @@ bool TerrainRenderer::loadTerrain(const pipeline::TerrainMesh& mesh, } LOG_DEBUG("Loading terrain mesh: ", mesh.validChunkCount, " chunks"); + vkCtx->beginUploadBatch(); + for (int y = 0; y < 16; y++) { for (int x = 0; x < 16; x++) { const auto& chunk = mesh.getChunk(x, y); @@ -405,6 +407,8 @@ bool TerrainRenderer::loadTerrain(const pipeline::TerrainMesh& mesh, } } + vkCtx->endUploadBatch(); + LOG_DEBUG("Loaded ", chunks.size(), " terrain chunks to GPU"); return !chunks.empty(); } @@ -413,6 +417,10 @@ bool TerrainRenderer::loadTerrainIncremental(const pipeline::TerrainMesh& mesh, const std::vector& texturePaths, int tileX, int tileY, int& chunkIndex, int maxChunksPerCall) { + // Batch all GPU uploads (VBs, IBs, textures) into a single command buffer + // submission with one fence wait, instead of one per buffer/texture. + vkCtx->beginUploadBatch(); + int uploaded = 0; while (chunkIndex < 256 && uploaded < maxChunksPerCall) { int cy = chunkIndex / 16; @@ -490,6 +498,8 @@ bool TerrainRenderer::loadTerrainIncremental(const pipeline::TerrainMesh& mesh, uploaded++; } + vkCtx->endUploadBatch(); + return chunkIndex >= 256; } @@ -580,6 +590,9 @@ void TerrainRenderer::uploadPreloadedTextures( [](unsigned char c) { return static_cast(std::tolower(c)); }); return key; }; + // Batch all texture uploads into a single command buffer submission + vkCtx->beginUploadBatch(); + for (const auto& [path, blp] : textures) { std::string key = normalizeKey(path); if (textureCache.find(key) != textureCache.end()) continue; @@ -599,6 +612,8 @@ void TerrainRenderer::uploadPreloadedTextures( textureCacheBytes_ += e.approxBytes; textureCache[key] = std::move(e); } + + vkCtx->endUploadBatch(); } VkTexture* TerrainRenderer::createAlphaTexture(const std::vector& alphaData) { diff --git a/src/rendering/vk_context.cpp b/src/rendering/vk_context.cpp index e1a76cee..dc73c685 100644 --- a/src/rendering/vk_context.cpp +++ b/src/rendering/vk_context.cpp @@ -1423,10 +1423,44 @@ void VkContext::endSingleTimeCommands(VkCommandBuffer cmd) { } void VkContext::immediateSubmit(std::function&& function) { + if (inUploadBatch_) { + // Record into the batch command buffer — no submit, no fence wait + function(batchCmd_); + return; + } VkCommandBuffer cmd = beginSingleTimeCommands(); function(cmd); endSingleTimeCommands(cmd); } +void VkContext::beginUploadBatch() { + uploadBatchDepth_++; + if (inUploadBatch_) return; // already in a batch (nested call) + inUploadBatch_ = true; + batchCmd_ = beginSingleTimeCommands(); +} + +void VkContext::endUploadBatch() { + if (uploadBatchDepth_ <= 0) return; + uploadBatchDepth_--; + if (uploadBatchDepth_ > 0) return; // still inside an outer batch + + inUploadBatch_ = false; + + // Submit all recorded commands with a single fence wait + endSingleTimeCommands(batchCmd_); + batchCmd_ = VK_NULL_HANDLE; + + // Destroy all deferred staging buffers + for (auto& staging : batchStagingBuffers_) { + destroyBuffer(allocator, staging); + } + batchStagingBuffers_.clear(); +} + +void VkContext::deferStagingCleanup(AllocatedBuffer staging) { + batchStagingBuffers_.push_back(staging); +} + } // namespace rendering } // namespace wowee diff --git a/src/rendering/vk_texture.cpp b/src/rendering/vk_texture.cpp index fba6d72b..415e3d56 100644 --- a/src/rendering/vk_texture.cpp +++ b/src/rendering/vk_texture.cpp @@ -96,7 +96,11 @@ bool VkTexture::upload(VkContext& ctx, const uint8_t* pixels, uint32_t width, ui generateMipmaps(ctx, format, width, height); } - destroyBuffer(ctx.getAllocator(), staging); + if (ctx.isInUploadBatch()) { + ctx.deferStagingCleanup(staging); + } else { + destroyBuffer(ctx.getAllocator(), staging); + } return true; } @@ -162,7 +166,11 @@ bool VkTexture::uploadMips(VkContext& ctx, const uint8_t* const* mipData, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); }); - destroyBuffer(ctx.getAllocator(), staging); + if (ctx.isInUploadBatch()) { + ctx.deferStagingCleanup(staging); + } else { + destroyBuffer(ctx.getAllocator(), staging); + } return true; } diff --git a/src/rendering/vk_utils.cpp b/src/rendering/vk_utils.cpp index d105c986..3a2f51d1 100644 --- a/src/rendering/vk_utils.cpp +++ b/src/rendering/vk_utils.cpp @@ -198,8 +198,12 @@ AllocatedBuffer uploadBuffer(VkContext& ctx, const void* data, VkDeviceSize size vkCmdCopyBuffer(cmd, staging.buffer, gpuBuffer.buffer, 1, ©Region); }); - // Destroy staging buffer - destroyBuffer(ctx.getAllocator(), staging); + // Destroy staging buffer (deferred if in batch mode) + if (ctx.isInUploadBatch()) { + ctx.deferStagingCleanup(staging); + } else { + destroyBuffer(ctx.getAllocator(), staging); + } return gpuBuffer; } diff --git a/src/rendering/wmo_renderer.cpp b/src/rendering/wmo_renderer.cpp index ff6b0035..691abaa1 100644 --- a/src/rendering/wmo_renderer.cpp +++ b/src/rendering/wmo_renderer.cpp @@ -419,6 +419,10 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) { core::Logger::getInstance().debug(" WMO bounds: min=(", model.boundingBoxMin.x, ", ", model.boundingBoxMin.y, ", ", model.boundingBoxMin.z, ") max=(", model.boundingBoxMax.x, ", ", model.boundingBoxMax.y, ", ", model.boundingBoxMax.z, ")"); + // Batch all GPU uploads (textures, VBs, IBs) into a single command buffer + // submission with one fence wait, instead of one per upload. + vkCtx_->beginUploadBatch(); + // Load textures for this model core::Logger::getInstance().debug(" WMO has ", model.textures.size(), " texture paths, ", model.materials.size(), " materials"); if (assetManager && !model.textures.empty()) { @@ -720,6 +724,8 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) { groupRes.allUntextured = !anyTextured && !groupRes.mergedBatches.empty(); } + vkCtx_->endUploadBatch(); + // Copy portal data for visibility culling modelData.portalVertices = model.portalVertices; for (const auto& portal : model.portals) {