From a152023e5e24314698e9c8d4707299ee995f63e4 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Tue, 24 Mar 2026 11:44:54 -0700 Subject: [PATCH 1/9] fix: add VkSampler cache to prevent sampler exhaustion crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validation layers revealed 9965 VkSamplers allocated against a device limit of 4000 — every VkTexture created its own sampler even when configurations were identical. This exhausted NVIDIA's sampler pool and caused intermittent SIGSEGV in vkCmdBeginRenderPass. Add a thread-safe sampler cache in VkContext that deduplicates samplers by FNV-1a hash of all 14 VkSamplerCreateInfo fields. All texture, render target, renderer, water, and loading screen sampler creation now goes through getOrCreateSampler(). Textures set ownsSampler_=false so shared samplers aren't double-freed. Also auto-disable anisotropy in the cache when the physical device doesn't support the samplerAnisotropy feature, fixing the validation error VUID-VkSamplerCreateInfo-anisotropyEnable-01070. --- include/rendering/vk_context.hpp | 21 +++++ include/rendering/vk_render_target.hpp | 1 + include/rendering/vk_texture.hpp | 1 + src/rendering/loading_screen.cpp | 9 +-- src/rendering/renderer.cpp | 23 +++--- src/rendering/vk_context.cpp | 105 +++++++++++++++++++++++-- src/rendering/vk_render_target.cpp | 11 ++- src/rendering/vk_texture.cpp | 44 +++++++++-- src/rendering/water_renderer.cpp | 15 ++-- src/ui/auth_screen.cpp | 4 +- 10 files changed, 194 insertions(+), 40 deletions(-) diff --git a/include/rendering/vk_context.hpp b/include/rendering/vk_context.hpp index 4c0764a9..fbc16e2a 100644 --- a/include/rendering/vk_context.hpp +++ b/include/rendering/vk_context.hpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace wowee { namespace rendering { @@ -119,6 +121,18 @@ public: VkImageView getDepthResolveImageView() const { return depthResolveImageView; } VkImageView getDepthImageView() const { return depthImageView; } + // Sampler cache: returns a shared VkSampler matching the given create info. + // Callers must NOT destroy the returned sampler — it is owned by VkContext. + // Automatically clamps anisotropy if the device doesn't support it. + VkSampler getOrCreateSampler(const VkSamplerCreateInfo& info); + + // Whether the physical device supports sampler anisotropy. + bool isSamplerAnisotropySupported() const { return samplerAnisotropySupported_; } + + // Global sampler cache accessor (set during VkContext::initialize, cleared on shutdown). + // Used by VkTexture and other code that only has a VkDevice handle. + static VkContext* globalInstance() { return sInstance_; } + // UI texture upload: creates a Vulkan texture from RGBA data and returns // a VkDescriptorSet suitable for use as ImTextureID. // The caller does NOT need to free the result — resources are tracked and @@ -239,6 +253,13 @@ private: }; std::vector uiTextures_; + // Sampler cache — deduplicates VkSamplers by configuration hash. + std::mutex samplerCacheMutex_; + std::unordered_map samplerCache_; + bool samplerAnisotropySupported_ = false; + + static VkContext* sInstance_; + #ifndef NDEBUG bool enableValidation = true; #else diff --git a/include/rendering/vk_render_target.hpp b/include/rendering/vk_render_target.hpp index ffa1cd4f..a954bc5b 100644 --- a/include/rendering/vk_render_target.hpp +++ b/include/rendering/vk_render_target.hpp @@ -73,6 +73,7 @@ private: bool hasDepth_ = false; VkSampleCountFlagBits msaaSamples_ = VK_SAMPLE_COUNT_1_BIT; VkSampler sampler_ = VK_NULL_HANDLE; + bool ownsSampler_ = true; VkRenderPass renderPass_ = VK_NULL_HANDLE; VkFramebuffer framebuffer_ = VK_NULL_HANDLE; }; diff --git a/include/rendering/vk_texture.hpp b/include/rendering/vk_texture.hpp index 83167d9d..51c57db8 100644 --- a/include/rendering/vk_texture.hpp +++ b/include/rendering/vk_texture.hpp @@ -72,6 +72,7 @@ private: AllocatedImage image_{}; VkSampler sampler_ = VK_NULL_HANDLE; uint32_t mipLevels_ = 1; + bool ownsSampler_ = true; // false when sampler comes from VkContext cache }; } // namespace rendering diff --git a/src/rendering/loading_screen.cpp b/src/rendering/loading_screen.cpp index 92c1fe1c..8bbf4013 100644 --- a/src/rendering/loading_screen.cpp +++ b/src/rendering/loading_screen.cpp @@ -40,10 +40,7 @@ void LoadingScreen::shutdown() { // ImGui manages descriptor set lifetime bgDescriptorSet = VK_NULL_HANDLE; } - if (bgSampler) { - vkDestroySampler(device, bgSampler, nullptr); - bgSampler = VK_NULL_HANDLE; - } + bgSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache if (bgImageView) { vkDestroyImageView(device, bgImageView, nullptr); bgImageView = VK_NULL_HANDLE; @@ -94,7 +91,7 @@ bool LoadingScreen::loadImage(const std::string& path) { if (bgImage) { VkDevice device = vkCtx->getDevice(); vkDeviceWaitIdle(device); - if (bgSampler) { vkDestroySampler(device, bgSampler, nullptr); bgSampler = VK_NULL_HANDLE; } + bgSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache if (bgImageView) { vkDestroyImageView(device, bgImageView, nullptr); bgImageView = VK_NULL_HANDLE; } if (bgImage) { vkDestroyImage(device, bgImage, nullptr); bgImage = VK_NULL_HANDLE; } if (bgMemory) { vkFreeMemory(device, bgMemory, nullptr); bgMemory = VK_NULL_HANDLE; } @@ -230,7 +227,7 @@ bool LoadingScreen::loadImage(const std::string& path) { samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - vkCreateSampler(device, &samplerInfo, nullptr, &bgSampler); + bgSampler = vkCtx->getOrCreateSampler(samplerInfo); } // Register with ImGui as a texture diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index e39621c6..7fd90840 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -343,7 +343,8 @@ bool Renderer::createPerFrameResources() { sampCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; sampCI.compareEnable = VK_TRUE; sampCI.compareOp = VK_COMPARE_OP_LESS_OR_EQUAL; - if (vkCreateSampler(device, &sampCI, nullptr, &shadowSampler) != VK_SUCCESS) { + shadowSampler = vkCtx->getOrCreateSampler(sampCI); + if (shadowSampler == VK_NULL_HANDLE) { LOG_ERROR("Failed to create shadow sampler"); return false; } @@ -597,7 +598,7 @@ void Renderer::destroyPerFrameResources() { shadowDepthLayout_[i] = VK_IMAGE_LAYOUT_UNDEFINED; } if (shadowRenderPass) { vkDestroyRenderPass(device, shadowRenderPass, nullptr); shadowRenderPass = VK_NULL_HANDLE; } - if (shadowSampler) { vkDestroySampler(device, shadowSampler, nullptr); shadowSampler = VK_NULL_HANDLE; } + shadowSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache } void Renderer::updatePerFrameUBO() { @@ -4057,7 +4058,8 @@ bool Renderer::initFSRResources() { samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - if (vkCreateSampler(device, &samplerInfo, nullptr, &fsr_.sceneSampler) != VK_SUCCESS) { + fsr_.sceneSampler = vkCtx->getOrCreateSampler(samplerInfo); + if (fsr_.sceneSampler == VK_NULL_HANDLE) { LOG_ERROR("FSR: failed to create sampler"); destroyFSRResources(); return false; @@ -4171,7 +4173,7 @@ void Renderer::destroyFSRResources() { if (fsr_.descPool) { vkDestroyDescriptorPool(device, fsr_.descPool, nullptr); fsr_.descPool = VK_NULL_HANDLE; fsr_.descSet = VK_NULL_HANDLE; } if (fsr_.descSetLayout) { vkDestroyDescriptorSetLayout(device, fsr_.descSetLayout, nullptr); fsr_.descSetLayout = VK_NULL_HANDLE; } if (fsr_.sceneFramebuffer) { vkDestroyFramebuffer(device, fsr_.sceneFramebuffer, nullptr); fsr_.sceneFramebuffer = VK_NULL_HANDLE; } - if (fsr_.sceneSampler) { vkDestroySampler(device, fsr_.sceneSampler, nullptr); fsr_.sceneSampler = VK_NULL_HANDLE; } + fsr_.sceneSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache destroyImage(device, alloc, fsr_.sceneDepthResolve); destroyImage(device, alloc, fsr_.sceneMsaaColor); destroyImage(device, alloc, fsr_.sceneDepth); @@ -4350,11 +4352,11 @@ bool Renderer::initFSR2Resources() { samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - vkCreateSampler(device, &samplerInfo, nullptr, &fsr2_.linearSampler); + fsr2_.linearSampler = vkCtx->getOrCreateSampler(samplerInfo); samplerInfo.minFilter = VK_FILTER_NEAREST; samplerInfo.magFilter = VK_FILTER_NEAREST; - vkCreateSampler(device, &samplerInfo, nullptr, &fsr2_.nearestSampler); + fsr2_.nearestSampler = vkCtx->getOrCreateSampler(samplerInfo); #if WOWEE_HAS_AMD_FSR2 // Initialize AMD FSR2 context; fall back to internal path on any failure. @@ -4753,8 +4755,8 @@ void Renderer::destroyFSR2Resources() { if (fsr2_.motionVecDescSetLayout) { vkDestroyDescriptorSetLayout(device, fsr2_.motionVecDescSetLayout, nullptr); fsr2_.motionVecDescSetLayout = VK_NULL_HANDLE; } if (fsr2_.sceneFramebuffer) { vkDestroyFramebuffer(device, fsr2_.sceneFramebuffer, nullptr); fsr2_.sceneFramebuffer = VK_NULL_HANDLE; } - if (fsr2_.linearSampler) { vkDestroySampler(device, fsr2_.linearSampler, nullptr); fsr2_.linearSampler = VK_NULL_HANDLE; } - if (fsr2_.nearestSampler) { vkDestroySampler(device, fsr2_.nearestSampler, nullptr); fsr2_.nearestSampler = VK_NULL_HANDLE; } + fsr2_.linearSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache + fsr2_.nearestSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache destroyImage(device, alloc, fsr2_.motionVectors); for (int i = 0; i < 2; i++) destroyImage(device, alloc, fsr2_.history[i]); @@ -5273,7 +5275,8 @@ bool Renderer::initFXAAResources() { samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - if (vkCreateSampler(device, &samplerInfo, nullptr, &fxaa_.sceneSampler) != VK_SUCCESS) { + fxaa_.sceneSampler = vkCtx->getOrCreateSampler(samplerInfo); + if (fxaa_.sceneSampler == VK_NULL_HANDLE) { LOG_ERROR("FXAA: failed to create sampler"); destroyFXAAResources(); return false; @@ -5383,7 +5386,7 @@ void Renderer::destroyFXAAResources() { if (fxaa_.descPool) { vkDestroyDescriptorPool(device, fxaa_.descPool, nullptr); fxaa_.descPool = VK_NULL_HANDLE; fxaa_.descSet = VK_NULL_HANDLE; } if (fxaa_.descSetLayout) { vkDestroyDescriptorSetLayout(device, fxaa_.descSetLayout, nullptr); fxaa_.descSetLayout = VK_NULL_HANDLE; } if (fxaa_.sceneFramebuffer) { vkDestroyFramebuffer(device, fxaa_.sceneFramebuffer, nullptr); fxaa_.sceneFramebuffer = VK_NULL_HANDLE; } - if (fxaa_.sceneSampler) { vkDestroySampler(device, fxaa_.sceneSampler, nullptr); fxaa_.sceneSampler = VK_NULL_HANDLE; } + fxaa_.sceneSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache destroyImage(device, alloc, fxaa_.sceneDepthResolve); destroyImage(device, alloc, fxaa_.sceneMsaaColor); destroyImage(device, alloc, fxaa_.sceneDepth); diff --git a/src/rendering/vk_context.cpp b/src/rendering/vk_context.cpp index 9d1427d5..323af430 100644 --- a/src/rendering/vk_context.cpp +++ b/src/rendering/vk_context.cpp @@ -13,6 +13,44 @@ namespace wowee { namespace rendering { +VkContext* VkContext::sInstance_ = nullptr; + +// Hash a VkSamplerCreateInfo into a 64-bit key for the sampler cache. +static uint64_t hashSamplerCreateInfo(const VkSamplerCreateInfo& s) { + // Pack the relevant fields into a deterministic hash. + // FNV-1a 64-bit on the raw config values. + uint64_t h = 14695981039346656037ULL; + auto mix = [&](uint64_t v) { + h ^= v; + h *= 1099511628211ULL; + }; + mix(static_cast(s.minFilter)); + mix(static_cast(s.magFilter)); + mix(static_cast(s.mipmapMode)); + mix(static_cast(s.addressModeU)); + mix(static_cast(s.addressModeV)); + mix(static_cast(s.addressModeW)); + mix(static_cast(s.anisotropyEnable)); + // Bit-cast floats to uint32_t for hashing + uint32_t aniso; + std::memcpy(&aniso, &s.maxAnisotropy, sizeof(aniso)); + mix(static_cast(aniso)); + uint32_t maxLodBits; + std::memcpy(&maxLodBits, &s.maxLod, sizeof(maxLodBits)); + mix(static_cast(maxLodBits)); + uint32_t minLodBits; + std::memcpy(&minLodBits, &s.minLod, sizeof(minLodBits)); + mix(static_cast(minLodBits)); + mix(static_cast(s.compareEnable)); + mix(static_cast(s.compareOp)); + mix(static_cast(s.borderColor)); + uint32_t biasBits; + std::memcpy(&biasBits, &s.mipLodBias, sizeof(biasBits)); + mix(static_cast(biasBits)); + mix(static_cast(s.unnormalizedCoordinates)); + return h; +} + static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( VkDebugUtilsMessageSeverityFlagBitsEXT severity, [[maybe_unused]] VkDebugUtilsMessageTypeFlagsEXT type, @@ -52,6 +90,14 @@ bool VkContext::initialize(SDL_Window* window) { if (!createSyncObjects()) return false; if (!createImGuiResources()) return false; + // Query anisotropy support from the physical device. + VkPhysicalDeviceFeatures supportedFeatures{}; + vkGetPhysicalDeviceFeatures(physicalDevice, &supportedFeatures); + samplerAnisotropySupported_ = (supportedFeatures.samplerAnisotropy == VK_TRUE); + LOG_INFO("Sampler anisotropy supported: ", samplerAnisotropySupported_ ? "YES" : "NO"); + + sInstance_ = this; + LOG_INFO("Vulkan context initialized successfully"); return true; } @@ -97,6 +143,15 @@ void VkContext::shutdown() { pipelineCache_ = VK_NULL_HANDLE; } + // Destroy all cached samplers. + for (auto& [key, sampler] : samplerCache_) { + if (sampler) vkDestroySampler(device, sampler, nullptr); + } + samplerCache_.clear(); + LOG_INFO("Sampler cache cleared"); + + sInstance_ = nullptr; + LOG_WARNING("VkContext::shutdown - destroySwapchain..."); destroySwapchain(); @@ -135,6 +190,46 @@ void VkContext::runDeferredCleanup(uint32_t frameIndex) { q.clear(); } +VkSampler VkContext::getOrCreateSampler(const VkSamplerCreateInfo& info) { + // Clamp anisotropy if the device doesn't support the feature. + VkSamplerCreateInfo adjusted = info; + if (!samplerAnisotropySupported_) { + adjusted.anisotropyEnable = VK_FALSE; + adjusted.maxAnisotropy = 1.0f; + } + + uint64_t key = hashSamplerCreateInfo(adjusted); + + { + std::lock_guard lock(samplerCacheMutex_); + auto it = samplerCache_.find(key); + if (it != samplerCache_.end()) { + return it->second; + } + } + + // Create a new sampler outside the lock (vkCreateSampler is thread-safe + // for distinct create infos, but we re-lock to insert). + VkSampler sampler = VK_NULL_HANDLE; + if (vkCreateSampler(device, &adjusted, nullptr, &sampler) != VK_SUCCESS) { + LOG_ERROR("getOrCreateSampler: vkCreateSampler failed"); + return VK_NULL_HANDLE; + } + + { + std::lock_guard lock(samplerCacheMutex_); + // Double-check: another thread may have inserted while we were creating. + auto [it, inserted] = samplerCache_.emplace(key, sampler); + if (!inserted) { + // Another thread won the race — destroy our duplicate and use theirs. + vkDestroySampler(device, sampler, nullptr); + return it->second; + } + } + + return sampler; +} + bool VkContext::createInstance(SDL_Window* window) { // Get required SDL extensions unsigned int sdlExtCount = 0; @@ -980,10 +1075,7 @@ void VkContext::destroyImGuiResources() { if (tex.memory) vkFreeMemory(device, tex.memory, nullptr); } uiTextures_.clear(); - if (uiTextureSampler_) { - vkDestroySampler(device, uiTextureSampler_, nullptr); - uiTextureSampler_ = VK_NULL_HANDLE; - } + uiTextureSampler_ = VK_NULL_HANDLE; // Owned by sampler cache if (imguiDescriptorPool) { vkDestroyDescriptorPool(device, imguiDescriptorPool, nullptr); @@ -1015,7 +1107,7 @@ VkDescriptorSet VkContext::uploadImGuiTexture(const uint8_t* rgba, int width, in VkDeviceSize imageSize = static_cast(width) * height * 4; - // Create shared sampler on first call + // Create shared sampler on first call (via sampler cache) if (!uiTextureSampler_) { VkSamplerCreateInfo si{}; si.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; @@ -1024,7 +1116,8 @@ VkDescriptorSet VkContext::uploadImGuiTexture(const uint8_t* rgba, int width, in si.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; si.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; si.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - if (vkCreateSampler(device, &si, nullptr, &uiTextureSampler_) != VK_SUCCESS) { + uiTextureSampler_ = getOrCreateSampler(si); + if (!uiTextureSampler_) { LOG_ERROR("Failed to create UI texture sampler"); return VK_NULL_HANDLE; } diff --git a/src/rendering/vk_render_target.cpp b/src/rendering/vk_render_target.cpp index f2099bbf..4692d45f 100644 --- a/src/rendering/vk_render_target.cpp +++ b/src/rendering/vk_render_target.cpp @@ -49,7 +49,7 @@ bool VkRenderTarget::create(VkContext& ctx, uint32_t width, uint32_t height, } } - // Create sampler (linear filtering, clamp to edge) + // Create sampler (linear filtering, clamp to edge) via cache VkSamplerCreateInfo samplerInfo{}; samplerInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; samplerInfo.minFilter = VK_FILTER_LINEAR; @@ -61,11 +61,13 @@ bool VkRenderTarget::create(VkContext& ctx, uint32_t width, uint32_t height, samplerInfo.minLod = 0.0f; samplerInfo.maxLod = 0.0f; - if (vkCreateSampler(device, &samplerInfo, nullptr, &sampler_) != VK_SUCCESS) { + sampler_ = ctx.getOrCreateSampler(samplerInfo); + if (sampler_ == VK_NULL_HANDLE) { LOG_ERROR("VkRenderTarget: failed to create sampler"); destroy(device, allocator); return false; } + ownsSampler_ = false; // Create render pass if (useMSAA) { @@ -259,10 +261,11 @@ void VkRenderTarget::destroy(VkDevice device, VmaAllocator allocator) { vkDestroyRenderPass(device, renderPass_, nullptr); renderPass_ = VK_NULL_HANDLE; } - if (sampler_) { + if (sampler_ && ownsSampler_) { vkDestroySampler(device, sampler_, nullptr); - sampler_ = VK_NULL_HANDLE; } + sampler_ = VK_NULL_HANDLE; + ownsSampler_ = true; destroyImage(device, allocator, resolveImage_); destroyImage(device, allocator, depthImage_); destroyImage(device, allocator, colorImage_); diff --git a/src/rendering/vk_texture.cpp b/src/rendering/vk_texture.cpp index 415e3d56..6ef1abac 100644 --- a/src/rendering/vk_texture.cpp +++ b/src/rendering/vk_texture.cpp @@ -13,9 +13,11 @@ VkTexture::~VkTexture() { } VkTexture::VkTexture(VkTexture&& other) noexcept - : image_(other.image_), sampler_(other.sampler_), mipLevels_(other.mipLevels_) { + : image_(other.image_), sampler_(other.sampler_), mipLevels_(other.mipLevels_), + ownsSampler_(other.ownsSampler_) { other.image_ = {}; other.sampler_ = VK_NULL_HANDLE; + other.ownsSampler_ = true; } VkTexture& VkTexture::operator=(VkTexture&& other) noexcept { @@ -23,8 +25,10 @@ VkTexture& VkTexture::operator=(VkTexture&& other) noexcept { image_ = other.image_; sampler_ = other.sampler_; mipLevels_ = other.mipLevels_; + ownsSampler_ = other.ownsSampler_; other.image_ = {}; other.sampler_ = VK_NULL_HANDLE; + other.ownsSampler_ = true; } return *this; } @@ -214,11 +218,20 @@ bool VkTexture::createSampler(VkDevice device, samplerInfo.minLod = 0.0f; samplerInfo.maxLod = static_cast(mipLevels_); + // Use sampler cache if VkContext is available. + auto* ctx = VkContext::globalInstance(); + if (ctx) { + sampler_ = ctx->getOrCreateSampler(samplerInfo); + ownsSampler_ = false; + return sampler_ != VK_NULL_HANDLE; + } + + // Fallback: no VkContext (shouldn't happen in normal use). if (vkCreateSampler(device, &samplerInfo, nullptr, &sampler_) != VK_SUCCESS) { LOG_ERROR("Failed to create texture sampler"); return false; } - + ownsSampler_ = true; return true; } @@ -246,11 +259,20 @@ bool VkTexture::createSampler(VkDevice device, samplerInfo.minLod = 0.0f; samplerInfo.maxLod = static_cast(mipLevels_); + // Use sampler cache if VkContext is available. + auto* ctx = VkContext::globalInstance(); + if (ctx) { + sampler_ = ctx->getOrCreateSampler(samplerInfo); + ownsSampler_ = false; + return sampler_ != VK_NULL_HANDLE; + } + + // Fallback: no VkContext (shouldn't happen in normal use). if (vkCreateSampler(device, &samplerInfo, nullptr, &sampler_) != VK_SUCCESS) { LOG_ERROR("Failed to create texture sampler"); return false; } - + ownsSampler_ = true; return true; } @@ -269,19 +291,29 @@ bool VkTexture::createShadowSampler(VkDevice device) { samplerInfo.minLod = 0.0f; samplerInfo.maxLod = 1.0f; + // Use sampler cache if VkContext is available. + auto* ctx = VkContext::globalInstance(); + if (ctx) { + sampler_ = ctx->getOrCreateSampler(samplerInfo); + ownsSampler_ = false; + return sampler_ != VK_NULL_HANDLE; + } + + // Fallback: no VkContext (shouldn't happen in normal use). if (vkCreateSampler(device, &samplerInfo, nullptr, &sampler_) != VK_SUCCESS) { LOG_ERROR("Failed to create shadow sampler"); return false; } - + ownsSampler_ = true; return true; } void VkTexture::destroy(VkDevice device, VmaAllocator allocator) { - if (sampler_ != VK_NULL_HANDLE) { + if (sampler_ != VK_NULL_HANDLE && ownsSampler_) { vkDestroySampler(device, sampler_, nullptr); - sampler_ = VK_NULL_HANDLE; } + sampler_ = VK_NULL_HANDLE; + ownsSampler_ = true; destroyImage(device, allocator, image_); } diff --git a/src/rendering/water_renderer.cpp b/src/rendering/water_renderer.cpp index 81b1819e..ac9069f4 100644 --- a/src/rendering/water_renderer.cpp +++ b/src/rendering/water_renderer.cpp @@ -352,8 +352,8 @@ void WaterRenderer::destroySceneHistoryResources() { if (sh.depthImage) { vmaDestroyImage(vkCtx->getAllocator(), sh.depthImage, sh.depthAlloc); sh.depthImage = VK_NULL_HANDLE; sh.depthAlloc = VK_NULL_HANDLE; } sh.sceneSet = VK_NULL_HANDLE; } - if (sceneColorSampler) { vkDestroySampler(device, sceneColorSampler, nullptr); sceneColorSampler = VK_NULL_HANDLE; } - if (sceneDepthSampler) { vkDestroySampler(device, sceneDepthSampler, nullptr); sceneDepthSampler = VK_NULL_HANDLE; } + sceneColorSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache + sceneDepthSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache sceneHistoryExtent = {0, 0}; sceneHistoryReady = false; } @@ -374,13 +374,15 @@ void WaterRenderer::createSceneHistoryResources(VkExtent2D extent, VkFormat colo sampCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; sampCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; sampCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - if (vkCreateSampler(device, &sampCI, nullptr, &sceneColorSampler) != VK_SUCCESS) { + sceneColorSampler = vkCtx->getOrCreateSampler(sampCI); + if (sceneColorSampler == VK_NULL_HANDLE) { LOG_ERROR("WaterRenderer: failed to create scene color sampler"); return; } sampCI.magFilter = VK_FILTER_NEAREST; sampCI.minFilter = VK_FILTER_NEAREST; - if (vkCreateSampler(device, &sampCI, nullptr, &sceneDepthSampler) != VK_SUCCESS) { + sceneDepthSampler = vkCtx->getOrCreateSampler(sampCI); + if (sceneDepthSampler == VK_NULL_HANDLE) { LOG_ERROR("WaterRenderer: failed to create scene depth sampler"); return; } @@ -1718,7 +1720,8 @@ void WaterRenderer::createReflectionResources() { sampCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; sampCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; sampCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - if (vkCreateSampler(device, &sampCI, nullptr, &reflectionSampler) != VK_SUCCESS) { + reflectionSampler = vkCtx->getOrCreateSampler(sampCI); + if (reflectionSampler == VK_NULL_HANDLE) { LOG_ERROR("WaterRenderer: failed to create reflection sampler"); return; } @@ -1848,7 +1851,7 @@ void WaterRenderer::destroyReflectionResources() { if (reflectionDepthView) { vkDestroyImageView(device, reflectionDepthView, nullptr); reflectionDepthView = VK_NULL_HANDLE; } if (reflectionColorImage) { vmaDestroyImage(allocator, reflectionColorImage, reflectionColorAlloc); reflectionColorImage = VK_NULL_HANDLE; } if (reflectionDepthImage) { vmaDestroyImage(allocator, reflectionDepthImage, reflectionDepthAlloc); reflectionDepthImage = VK_NULL_HANDLE; } - if (reflectionSampler) { vkDestroySampler(device, reflectionSampler, nullptr); reflectionSampler = VK_NULL_HANDLE; } + reflectionSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache if (reflectionUBO) { AllocatedBuffer ab{}; ab.buffer = reflectionUBO; ab.allocation = reflectionUBOAlloc; destroyBuffer(allocator, ab); diff --git a/src/ui/auth_screen.cpp b/src/ui/auth_screen.cpp index 777285cf..95cfabc3 100644 --- a/src/ui/auth_screen.cpp +++ b/src/ui/auth_screen.cpp @@ -915,7 +915,7 @@ bool AuthScreen::loadBackgroundImage() { samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - vkCreateSampler(device, &samplerInfo, nullptr, &bgSampler); + bgSampler = bgVkCtx->getOrCreateSampler(samplerInfo); } bgDescriptorSet = ImGui_ImplVulkan_AddTexture(bgSampler, bgImageView, @@ -930,7 +930,7 @@ void AuthScreen::destroyBackgroundImage() { VkDevice device = bgVkCtx->getDevice(); vkDeviceWaitIdle(device); if (bgDescriptorSet) { ImGui_ImplVulkan_RemoveTexture(bgDescriptorSet); bgDescriptorSet = VK_NULL_HANDLE; } - if (bgSampler) { vkDestroySampler(device, bgSampler, nullptr); bgSampler = VK_NULL_HANDLE; } + bgSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache if (bgImageView) { vkDestroyImageView(device, bgImageView, nullptr); bgImageView = VK_NULL_HANDLE; } if (bgImage) { vkDestroyImage(device, bgImage, nullptr); bgImage = VK_NULL_HANDLE; } if (bgMemory) { vkFreeMemory(device, bgMemory, nullptr); bgMemory = VK_NULL_HANDLE; } From 9a6a430768719ef19ee512b1558244b37fdee5d2 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Tue, 24 Mar 2026 13:05:27 -0700 Subject: [PATCH 2/9] fix: track render pass subpass mode to prevent ImGui secondary violation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When parallel recording is active, the scene pass uses VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS. Post-processing paths (FSR/FXAA) end the scene pass and begin a new INLINE render pass for the swapchain output. ImGui rendering must use the correct mode — secondary buffers for SECONDARY passes, direct calls for INLINE. Previously the check used a static condition based on enabled features (!fsr && !fsr2 && !fxaa && parallel), which could mismatch if a feature was enabled but initialization failed. Replace with endFrameInlineMode_ flag that tracks the actual current render pass mode at runtime, eliminating the validation error VUID-vkCmdDrawIndexed-commandBuffer-recording that caused intermittent NVIDIA driver crashes. --- include/rendering/renderer.hpp | 1 + src/rendering/renderer.cpp | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index b53e87d1..80b33fe6 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -668,6 +668,7 @@ private: VkCommandBuffer secondaryCmds_[NUM_SECONDARIES][MAX_FRAMES] = {}; bool parallelRecordingEnabled_ = false; // set true after pools/buffers created + bool endFrameInlineMode_ = false; // true when endFrame switched to INLINE render pass bool createSecondaryCommandResources(); void destroySecondaryCommandResources(); VkCommandBuffer beginSecondary(uint32_t secondaryIndex); diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 7fd90840..801f28e2 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -1215,6 +1215,11 @@ void Renderer::beginFrame() { void Renderer::endFrame() { if (!vkCtx || currentCmd == VK_NULL_HANDLE) return; + // Track whether a post-processing path switched to an INLINE render pass. + // beginFrame() may have started the scene pass with SECONDARY_COMMAND_BUFFERS; + // post-proc paths end it and begin a new INLINE pass for the swapchain output. + endFrameInlineMode_ = false; + if (fsr2_.enabled && fsr2_.sceneFramebuffer) { // End the off-screen scene render pass vkCmdEndRenderPass(currentCmd); @@ -1297,7 +1302,7 @@ void Renderer::endFrame() { rpInfo.clearValueCount = msaaOn ? (vkCtx->getDepthResolveImageView() ? 4u : 3u) : 2u; rpInfo.pClearValues = clearValues; - vkCmdBeginRenderPass(currentCmd, &rpInfo, VK_SUBPASS_CONTENTS_INLINE); + endFrameInlineMode_ = true; vkCmdBeginRenderPass(currentCmd, &rpInfo, VK_SUBPASS_CONTENTS_INLINE); VkExtent2D ext = vkCtx->getSwapchainExtent(); VkViewport vp{}; @@ -1434,18 +1439,22 @@ void Renderer::endFrame() { renderFSRUpscale(); } - // ImGui rendering — must respect subpass contents mode - // Parallel recording only applies when no post-process pass is active. - if (!fsr_.enabled && !fsr2_.enabled && !fxaa_.enabled && parallelRecordingEnabled_) { - // Scene pass was begun with VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, - // so ImGui must be recorded into a secondary command buffer. + // ImGui rendering — must respect the subpass contents mode of the + // CURRENT render pass. Post-processing paths (FSR/FXAA) end the scene + // pass and begin a new INLINE pass; if none ran, we're still inside the + // scene pass which may be SECONDARY_COMMAND_BUFFERS when parallel recording + // is active. Track this via endFrameInlineMode_ (set true by any post-proc + // path that started an INLINE render pass). + if (parallelRecordingEnabled_ && !endFrameInlineMode_) { + // Still in the scene pass with SECONDARY_COMMAND_BUFFERS — record + // ImGui into a secondary command buffer. VkCommandBuffer imguiCmd = beginSecondary(SEC_IMGUI); setSecondaryViewportScissor(imguiCmd); ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), imguiCmd); vkEndCommandBuffer(imguiCmd); vkCmdExecuteCommands(currentCmd, 1, &imguiCmd); } else { - // FSR swapchain pass uses INLINE mode; non-parallel also uses INLINE. + // INLINE render pass (post-process pass or non-parallel mode). ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), currentCmd); } From 891b9e5822cabfc3fa3be51d23e63b4de11be4ec Mon Sep 17 00:00:00 2001 From: Kelsi Date: Tue, 24 Mar 2026 13:20:06 -0700 Subject: [PATCH 3/9] fix: show friendly map names on loading screen (Outland not Expansion01) Add mapDisplayName() with friendly names for continents: "Eastern Kingdoms", "Kalimdor", "Outland", "Northrend". The loading screen previously showed WDT directory names like "Expansion01" when Map.dbc's localized name field was empty or matched the internal name. --- include/core/application.hpp | 1 + src/core/application.cpp | 30 +++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/include/core/application.hpp b/include/core/application.hpp index a22a210e..9004ebe4 100644 --- a/include/core/application.hpp +++ b/include/core/application.hpp @@ -97,6 +97,7 @@ private: void spawnPlayerCharacter(); std::string getPlayerModelPath() const; static const char* mapIdToName(uint32_t mapId); + static const char* mapDisplayName(uint32_t mapId); void loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float z); void buildFactionHostilityMap(uint8_t playerRace); pipeline::M2Model loadCreatureM2Sync(const std::string& m2Path); diff --git a/src/core/application.cpp b/src/core/application.cpp index 73ea9cb4..db1f99a0 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -87,6 +87,17 @@ bool envFlagEnabled(const char* key, bool defaultValue = false) { } // namespace +const char* Application::mapDisplayName(uint32_t mapId) { + // Friendly display names for the loading screen + switch (mapId) { + case 0: return "Eastern Kingdoms"; + case 1: return "Kalimdor"; + case 530: return "Outland"; + case 571: return "Northrend"; + default: return nullptr; + } +} + const char* Application::mapIdToName(uint32_t mapId) { // Fallback when Map.dbc is unavailable. Names must match WDT directory names // (case-insensitive — AssetManager lowercases all paths). @@ -4468,13 +4479,18 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float window->swapBuffers(); }; - // Set zone name on loading screen from Map.dbc - if (gameHandler) { - std::string mapDisplayName = gameHandler->getMapName(mapId); - if (!mapDisplayName.empty()) - loadingScreen.setZoneName(mapDisplayName); - else - loadingScreen.setZoneName("Loading..."); + // Set zone name on loading screen — prefer friendly display name, then DBC + { + const char* friendly = mapDisplayName(mapId); + if (friendly) { + loadingScreen.setZoneName(friendly); + } else if (gameHandler) { + std::string dbcName = gameHandler->getMapName(mapId); + if (!dbcName.empty()) + loadingScreen.setZoneName(dbcName); + else + loadingScreen.setZoneName("Loading..."); + } } showProgress("Entering world...", 0.0f); From 7a5d80e8017bcca615632d27db78fdd5e91ec1fe Mon Sep 17 00:00:00 2001 From: Kelsi Date: Tue, 24 Mar 2026 13:34:52 -0700 Subject: [PATCH 4/9] fix: flush GPU before first render frame after world load Add vkDeviceWaitIdle after world loading completes to ensure all async texture uploads and resource creation are fully flushed before the first render frame. Mitigates intermittent NVIDIA driver crashes at vkCmdBeginRenderPass during initial world entry. --- src/core/application.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/core/application.cpp b/src/core/application.cpp index db1f99a0..614c5883 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -5311,6 +5311,14 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float showProgress("Entering world...", 1.0f); + // Ensure all GPU resources (textures, buffers, pipelines) created during + // world load are fully flushed before the first render frame. Without this, + // vkCmdBeginRenderPass can crash on NVIDIA 590.x when resources from async + // uploads haven't completed their queue operations. + if (renderer && renderer->getVkContext()) { + vkDeviceWaitIdle(renderer->getVkContext()->getDevice()); + } + if (loadingScreenOk) { loadingScreen.shutdown(); } From 05e85d9fa717b7e36e443afe6cac92f7fb40ed0a Mon Sep 17 00:00:00 2001 From: Kelsi Date: Tue, 24 Mar 2026 13:46:01 -0700 Subject: [PATCH 5/9] fix: correct melee swing sound paths to match WoW MPQ layout The melee swing clips used non-existent paths (SwordSwing, MeleeSwing) instead of the actual WoW 3.3.5a weapon swing files: WeaponSwings/ mWooshMedium and mWooshLarge for hit swings, MissSwings/MissWhoosh for misses. Fixes "No melee swing SFX found in assets" warning. --- src/audio/activity_sound_manager.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/audio/activity_sound_manager.cpp b/src/audio/activity_sound_manager.cpp index 4f02b35e..3a0bfe54 100644 --- a/src/audio/activity_sound_manager.cpp +++ b/src/audio/activity_sound_manager.cpp @@ -52,18 +52,14 @@ bool ActivitySoundManager::initialize(pipeline::AssetManager* assets) { preloadLandingSet(FootstepSurface::SNOW, "Snow"); preloadCandidates(meleeSwingClips, { - "Sound\\Item\\Weapons\\Sword\\SwordSwing1.wav", - "Sound\\Item\\Weapons\\Sword\\SwordSwing2.wav", - "Sound\\Item\\Weapons\\Sword\\SwordSwing3.wav", - "Sound\\Item\\Weapons\\Sword\\SwordHit1.wav", - "Sound\\Item\\Weapons\\Sword\\SwordHit2.wav", - "Sound\\Item\\Weapons\\Sword\\SwordHit3.wav", - "Sound\\Item\\Weapons\\OneHanded\\Sword\\SwordSwing1.wav", - "Sound\\Item\\Weapons\\OneHanded\\Sword\\SwordSwing2.wav", - "Sound\\Item\\Weapons\\OneHanded\\Sword\\SwordSwing3.wav", - "Sound\\Item\\Weapons\\Melee\\MeleeSwing1.wav", - "Sound\\Item\\Weapons\\Melee\\MeleeSwing2.wav", - "Sound\\Item\\Weapons\\Melee\\MeleeSwing3.wav" + "Sound\\Item\\Weapons\\WeaponSwings\\mWooshMedium1.wav", + "Sound\\Item\\Weapons\\WeaponSwings\\mWooshMedium2.wav", + "Sound\\Item\\Weapons\\WeaponSwings\\mWooshMedium3.wav", + "Sound\\Item\\Weapons\\WeaponSwings\\mWooshLarge1.wav", + "Sound\\Item\\Weapons\\WeaponSwings\\mWooshLarge2.wav", + "Sound\\Item\\Weapons\\WeaponSwings\\mWooshLarge3.wav", + "Sound\\Item\\Weapons\\MissSwings\\MissWhoosh1Handed.wav", + "Sound\\Item\\Weapons\\MissSwings\\MissWhoosh2Handed.wav" }); initialized = true; From ed0cb0ad258a41f413e11e6c828549b0b0590f97 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Tue, 24 Mar 2026 13:56:20 -0700 Subject: [PATCH 6/9] perf: time-budget tile finalization to prevent 1+ second main-loop stalls processReadyTiles was calling advanceFinalization with a step limit of 1 but a single step (texture upload or M2 model load) could take 1060ms. Replace the step counter with an 8ms wall-clock time budget (16ms during taxi) so finalization yields to the render loop before causing a visible stall. Heavy tiles spread across multiple frames instead of blocking. --- src/rendering/terrain_manager.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/rendering/terrain_manager.cpp b/src/rendering/terrain_manager.cpp index ba929d7c..4f54abfb 100644 --- a/src/rendering/terrain_manager.cpp +++ b/src/rendering/terrain_manager.cpp @@ -179,7 +179,7 @@ void TerrainManager::update(const Camera& camera, float deltaTime) { } // Always process ready tiles each frame (GPU uploads from background thread) - // Time budget prevents frame spikes from heavy tiles + // Time-budgeted internally to prevent frame spikes. processReadyTiles(); timeSinceLastUpdate += deltaTime; @@ -1223,18 +1223,25 @@ void TerrainManager::processReadyTiles() { // Async upload batch: record GPU copies into a command buffer, submit with // a fence, but DON'T wait. The fence is polled on subsequent frames. // This eliminates the main-thread stall from vkWaitForFences entirely. - const int maxSteps = taxiStreamingMode_ ? 4 : 1; - int steps = 0; + // + // Time-budgeted: yield after 8ms to prevent main-loop stalls. Each + // advanceFinalization step is designed to be small, but texture uploads + // and M2 model loads can occasionally spike. The budget ensures we + // spread heavy tiles across multiple frames instead of blocking. + const auto budgetStart = std::chrono::steady_clock::now(); + const float budgetMs = taxiStreamingMode_ ? 16.0f : 8.0f; if (vkCtx) vkCtx->beginUploadBatch(); - while (!finalizingTiles_.empty() && steps < maxSteps) { + while (!finalizingTiles_.empty()) { auto& ft = finalizingTiles_.front(); bool done = advanceFinalization(ft); if (done) { finalizingTiles_.pop_front(); } - steps++; + float elapsed = std::chrono::duration( + std::chrono::steady_clock::now() - budgetStart).count(); + if (elapsed >= budgetMs) break; } if (vkCtx) vkCtx->endUploadBatch(); // Async — submits but doesn't wait From 1dd382301330b7e2e7f1230515e063f7a3e4ceff Mon Sep 17 00:00:00 2001 From: Kelsi Date: Tue, 24 Mar 2026 14:09:16 -0700 Subject: [PATCH 7/9] perf: use second GPU queue for parallel texture/buffer uploads Request 2 queues from the graphics family when available (NVIDIA exposes 16, AMD 2+). Upload batches now submit to queue[1] while rendering uses queue[0], enabling parallel GPU transfers without queue-family ownership transfer barriers (same family). Falls back to single-queue path on GPUs with only 1 queue in the graphics family. Transfer command pool is separate to avoid contention. --- include/rendering/vk_context.hpp | 7 ++ src/rendering/vk_context.cpp | 158 +++++++++++++++++++++++++++---- 2 files changed, 144 insertions(+), 21 deletions(-) diff --git a/include/rendering/vk_context.hpp b/include/rendering/vk_context.hpp index fbc16e2a..c9926cf5 100644 --- a/include/rendering/vk_context.hpp +++ b/include/rendering/vk_context.hpp @@ -78,6 +78,7 @@ public: bool isNvidiaGpu() const { return gpuVendorId_ == 0x10DE; } VkQueue getGraphicsQueue() const { return graphicsQueue; } uint32_t getGraphicsQueueFamily() const { return graphicsQueueFamily; } + bool hasDedicatedTransferQueue() const { return hasDedicatedTransfer_; } VmaAllocator getAllocator() const { return allocator; } VkSurfaceKHR getSurface() const { return surface; } VkPipelineCache getPipelineCache() const { return pipelineCache_; } @@ -175,6 +176,12 @@ private: uint32_t graphicsQueueFamily = 0; uint32_t presentQueueFamily = 0; + // Dedicated transfer queue (second queue from same graphics family) + VkQueue transferQueue_ = VK_NULL_HANDLE; + VkCommandPool transferCommandPool_ = VK_NULL_HANDLE; + bool hasDedicatedTransfer_ = false; + uint32_t graphicsQueueFamilyQueueCount_ = 1; // queried in selectPhysicalDevice + // Swapchain VkSwapchainKHR swapchain = VK_NULL_HANDLE; VkFormat swapchainFormat = VK_FORMAT_UNDEFINED; diff --git a/src/rendering/vk_context.cpp b/src/rendering/vk_context.cpp index 323af430..3314ff83 100644 --- a/src/rendering/vk_context.cpp +++ b/src/rendering/vk_context.cpp @@ -135,6 +135,7 @@ void VkContext::shutdown() { if (immFence) { vkDestroyFence(device, immFence, nullptr); immFence = VK_NULL_HANDLE; } if (immCommandPool) { vkDestroyCommandPool(device, immCommandPool, nullptr); immCommandPool = VK_NULL_HANDLE; } + if (transferCommandPool_) { vkDestroyCommandPool(device, transferCommandPool_, nullptr); transferCommandPool_ = VK_NULL_HANDLE; } // Persist pipeline cache to disk before tearing down the device. savePipelineCache(); @@ -328,11 +329,52 @@ bool VkContext::selectPhysicalDevice() { VK_VERSION_MINOR(props.apiVersion), ".", VK_VERSION_PATCH(props.apiVersion)); LOG_INFO("Depth resolve support: ", depthResolveSupported_ ? "YES" : "NO"); + // Probe queue families to see if the graphics family supports multiple queues + // (used in createLogicalDevice to request a second queue for parallel uploads). + auto queueFamilies = vkbPhysicalDevice_.get_queue_families(); + for (uint32_t i = 0; i < static_cast(queueFamilies.size()); i++) { + if (queueFamilies[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { + graphicsQueueFamilyQueueCount_ = queueFamilies[i].queueCount; + LOG_INFO("Graphics queue family ", i, " supports ", graphicsQueueFamilyQueueCount_, " queue(s)"); + break; + } + } + return true; } bool VkContext::createLogicalDevice() { vkb::DeviceBuilder deviceBuilder{vkbPhysicalDevice_}; + + // If the graphics queue family supports >= 2 queues, request a second one + // for parallel texture/buffer uploads. Both queues share the same family + // so no queue-ownership-transfer barriers are needed. + const bool requestTransferQueue = (graphicsQueueFamilyQueueCount_ >= 2); + + if (requestTransferQueue) { + // Build a custom queue description list: 2 queues from the graphics + // family, 1 queue from every other family (so present etc. still work). + auto families = vkbPhysicalDevice_.get_queue_families(); + uint32_t gfxFamily = UINT32_MAX; + for (uint32_t i = 0; i < static_cast(families.size()); i++) { + if (families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { + gfxFamily = i; + break; + } + } + + std::vector queueDescs; + for (uint32_t i = 0; i < static_cast(families.size()); i++) { + if (i == gfxFamily) { + // Request 2 queues: [0] graphics, [1] transfer uploads + queueDescs.emplace_back(i, std::vector{1.0f, 1.0f}); + } else { + queueDescs.emplace_back(i, std::vector{1.0f}); + } + } + deviceBuilder.custom_queue_setup(queueDescs); + } + auto devRet = deviceBuilder.build(); if (!devRet) { LOG_ERROR("Failed to create Vulkan logical device: ", devRet.error().message()); @@ -342,22 +384,45 @@ bool VkContext::createLogicalDevice() { auto vkbDevice = devRet.value(); device = vkbDevice.device; - auto gqRet = vkbDevice.get_queue(vkb::QueueType::graphics); - if (!gqRet) { - LOG_ERROR("Failed to get graphics queue"); - return false; - } - graphicsQueue = gqRet.value(); - graphicsQueueFamily = vkbDevice.get_queue_index(vkb::QueueType::graphics).value(); + if (requestTransferQueue) { + // With custom_queue_setup, we must retrieve queues manually. + auto families = vkbPhysicalDevice_.get_queue_families(); + uint32_t gfxFamily = UINT32_MAX; + for (uint32_t i = 0; i < static_cast(families.size()); i++) { + if (families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { + gfxFamily = i; + break; + } + } + graphicsQueueFamily = gfxFamily; + vkGetDeviceQueue(device, gfxFamily, 0, &graphicsQueue); + vkGetDeviceQueue(device, gfxFamily, 1, &transferQueue_); + hasDedicatedTransfer_ = true; - auto pqRet = vkbDevice.get_queue(vkb::QueueType::present); - if (!pqRet) { - // Fall back to graphics queue for presentation + // Present queue: try the graphics family first (most common), otherwise + // find a family that supports presentation. presentQueue = graphicsQueue; - presentQueueFamily = graphicsQueueFamily; + presentQueueFamily = gfxFamily; + + LOG_INFO("Dedicated transfer queue enabled (family ", gfxFamily, ", queue index 1)"); } else { - presentQueue = pqRet.value(); - presentQueueFamily = vkbDevice.get_queue_index(vkb::QueueType::present).value(); + // Standard path — let vkb resolve queues. + auto gqRet = vkbDevice.get_queue(vkb::QueueType::graphics); + if (!gqRet) { + LOG_ERROR("Failed to get graphics queue"); + return false; + } + graphicsQueue = gqRet.value(); + graphicsQueueFamily = vkbDevice.get_queue_index(vkb::QueueType::graphics).value(); + + auto pqRet = vkbDevice.get_queue(vkb::QueueType::present); + if (!pqRet) { + presentQueue = graphicsQueue; + presentQueueFamily = graphicsQueueFamily; + } else { + presentQueue = pqRet.value(); + presentQueueFamily = vkbDevice.get_queue_index(vkb::QueueType::present).value(); + } } LOG_INFO("Vulkan logical device created"); @@ -588,6 +653,19 @@ bool VkContext::createCommandPools() { return false; } + // Separate command pool for the transfer queue (same family, different queue) + if (hasDedicatedTransfer_) { + VkCommandPoolCreateInfo transferPoolInfo{}; + transferPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + transferPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + transferPoolInfo.queueFamilyIndex = graphicsQueueFamily; + + if (vkCreateCommandPool(device, &transferPoolInfo, nullptr, &transferCommandPool_) != VK_SUCCESS) { + LOG_ERROR("Failed to create transfer command pool"); + return false; + } + } + return true; } @@ -1709,7 +1787,21 @@ void VkContext::beginUploadBatch() { uploadBatchDepth_++; if (inUploadBatch_) return; // already in a batch (nested call) inUploadBatch_ = true; - batchCmd_ = beginSingleTimeCommands(); + + // Allocate from transfer pool if available, otherwise from immCommandPool. + VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; + + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.commandPool = pool; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandBufferCount = 1; + vkAllocateCommandBuffers(device, &allocInfo, &batchCmd_); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + vkBeginCommandBuffer(batchCmd_, &beginInfo); } void VkContext::endUploadBatch() { @@ -1719,10 +1811,12 @@ void VkContext::endUploadBatch() { inUploadBatch_ = false; + VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; + if (batchStagingBuffers_.empty()) { // No GPU copies were recorded — skip the submit entirely. vkEndCommandBuffer(batchCmd_); - vkFreeCommandBuffers(device, immCommandPool, 1, &batchCmd_); + vkFreeCommandBuffers(device, pool, 1, &batchCmd_); batchCmd_ = VK_NULL_HANDLE; return; } @@ -1739,7 +1833,10 @@ void VkContext::endUploadBatch() { submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &batchCmd_; - vkQueueSubmit(graphicsQueue, 1, &submitInfo, fence); + + // Submit to the dedicated transfer queue if available, otherwise graphics. + VkQueue targetQueue = hasDedicatedTransfer_ ? transferQueue_ : graphicsQueue; + vkQueueSubmit(targetQueue, 1, &submitInfo, fence); // Stash everything for later cleanup when fence signals InFlightBatch batch; @@ -1759,15 +1856,30 @@ void VkContext::endUploadBatchSync() { inUploadBatch_ = false; + VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; + if (batchStagingBuffers_.empty()) { vkEndCommandBuffer(batchCmd_); - vkFreeCommandBuffers(device, immCommandPool, 1, &batchCmd_); + vkFreeCommandBuffers(device, pool, 1, &batchCmd_); batchCmd_ = VK_NULL_HANDLE; return; } - // Synchronous path for load screens — submit and wait - endSingleTimeCommands(batchCmd_); + // Synchronous path for load screens — submit and wait on the target queue. + VkQueue targetQueue = hasDedicatedTransfer_ ? transferQueue_ : graphicsQueue; + + vkEndCommandBuffer(batchCmd_); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &batchCmd_; + + vkQueueSubmit(targetQueue, 1, &submitInfo, immFence); + vkWaitForFences(device, 1, &immFence, VK_TRUE, UINT64_MAX); + vkResetFences(device, 1, &immFence); + + vkFreeCommandBuffers(device, pool, 1, &batchCmd_); batchCmd_ = VK_NULL_HANDLE; for (auto& staging : batchStagingBuffers_) { @@ -1779,6 +1891,8 @@ void VkContext::endUploadBatchSync() { void VkContext::pollUploadBatches() { if (inFlightBatches_.empty()) return; + VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; + for (auto it = inFlightBatches_.begin(); it != inFlightBatches_.end(); ) { VkResult result = vkGetFenceStatus(device, it->fence); if (result == VK_SUCCESS) { @@ -1786,7 +1900,7 @@ void VkContext::pollUploadBatches() { for (auto& staging : it->stagingBuffers) { destroyBuffer(allocator, staging); } - vkFreeCommandBuffers(device, immCommandPool, 1, &it->cmd); + vkFreeCommandBuffers(device, pool, 1, &it->cmd); vkDestroyFence(device, it->fence, nullptr); it = inFlightBatches_.erase(it); } else { @@ -1796,12 +1910,14 @@ void VkContext::pollUploadBatches() { } void VkContext::waitAllUploads() { + VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; + for (auto& batch : inFlightBatches_) { vkWaitForFences(device, 1, &batch.fence, VK_TRUE, UINT64_MAX); for (auto& staging : batch.stagingBuffers) { destroyBuffer(allocator, staging); } - vkFreeCommandBuffers(device, immCommandPool, 1, &batch.cmd); + vkFreeCommandBuffers(device, pool, 1, &batch.cmd); vkDestroyFence(device, batch.fence, nullptr); } inFlightBatches_.clear(); From 432da20b3e2897ca06e8dd58da93d6d349d8be61 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Tue, 24 Mar 2026 14:22:28 -0700 Subject: [PATCH 8/9] feat: enable crafting sounds and add Create All button Remove the isProfessionSpell sound suppression so crafting spells play precast and cast-complete audio like combat spells. Crafting was previously silent by design but users expect audio feedback. Add "Create All" button to the tradeskill UI that queues 999 crafts. The server automatically stops the queue when materials run out (SPELL_FAILED_REAGENTS cancels the craft queue). This matches the real WoW client's behavior for batch crafting. --- src/game/game_handler.cpp | 42 +++++++++++++++++---------------------- src/ui/game_screen.cpp | 6 ++++++ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/game/game_handler.cpp b/src/game/game_handler.cpp index 981f04f7..57488d72 100644 --- a/src/game/game_handler.cpp +++ b/src/game/game_handler.cpp @@ -19592,18 +19592,15 @@ void GameHandler::handleSpellStart(network::Packet& packet) { castTimeRemaining = castTimeTotal; if (addonEventCallback_) addonEventCallback_("CURRENT_SPELL_CAST_CHANGED", {}); - // Play precast (channeling) sound with correct magic school - // Skip sound for profession/tradeskill spells (crafting should be silent) - if (!isProfessionSpell(data.spellId)) { - if (auto* renderer = core::Application::getInstance().getRenderer()) { - if (auto* ssm = renderer->getSpellSoundManager()) { - loadSpellNameCache(); - auto it = spellNameCache_.find(data.spellId); - auto school = (it != spellNameCache_.end() && it->second.schoolMask) - ? schoolMaskToMagicSchool(it->second.schoolMask) - : audio::SpellSoundManager::MagicSchool::ARCANE; - ssm->playPrecast(school, audio::SpellSoundManager::SpellPower::MEDIUM); - } + // Play precast sound with correct magic school (including crafting spells) + if (auto* renderer = core::Application::getInstance().getRenderer()) { + if (auto* ssm = renderer->getSpellSoundManager()) { + loadSpellNameCache(); + auto it = spellNameCache_.find(data.spellId); + auto school = (it != spellNameCache_.end() && it->second.schoolMask) + ? schoolMaskToMagicSchool(it->second.schoolMask) + : audio::SpellSoundManager::MagicSchool::ARCANE; + ssm->playPrecast(school, audio::SpellSoundManager::SpellPower::MEDIUM); } } @@ -19639,18 +19636,15 @@ void GameHandler::handleSpellGo(network::Packet& packet) { // Cast completed if (data.casterUnit == playerGuid) { - // Play cast-complete sound with correct magic school - // Skip sound for profession/tradeskill spells (crafting should be silent) - if (!isProfessionSpell(data.spellId)) { - if (auto* renderer = core::Application::getInstance().getRenderer()) { - if (auto* ssm = renderer->getSpellSoundManager()) { - loadSpellNameCache(); - auto it = spellNameCache_.find(data.spellId); - auto school = (it != spellNameCache_.end() && it->second.schoolMask) - ? schoolMaskToMagicSchool(it->second.schoolMask) - : audio::SpellSoundManager::MagicSchool::ARCANE; - ssm->playCast(school); - } + // Play cast-complete sound with correct magic school (including crafting) + if (auto* renderer = core::Application::getInstance().getRenderer()) { + if (auto* ssm = renderer->getSpellSoundManager()) { + loadSpellNameCache(); + auto it = spellNameCache_.find(data.spellId); + auto school = (it != spellNameCache_.end() && it->second.schoolMask) + ? schoolMaskToMagicSchool(it->second.schoolMask) + : audio::SpellSoundManager::MagicSchool::ARCANE; + ssm->playCast(school); } } diff --git a/src/ui/game_screen.cpp b/src/ui/game_screen.cpp index b74a78b5..26f0b8f2 100644 --- a/src/ui/game_screen.cpp +++ b/src/ui/game_screen.cpp @@ -17436,6 +17436,12 @@ void GameScreen::renderTrainerWindow(game::GameHandler& gameHandler) { gameHandler.startCraftQueue(selectedCraftSpell, craftQuantity); } } + ImGui::SameLine(); + if (ImGui::Button("Create All")) { + // Queue a large count — server stops the queue automatically + // when materials run out (sends SPELL_FAILED_REAGENTS). + gameHandler.startCraftQueue(selectedCraftSpell, 999); + } if (!canCraft) ImGui::EndDisabled(); } } From 6bfa3dc4023bd17522578008cdb8a348f336a094 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Tue, 24 Mar 2026 14:33:22 -0700 Subject: [PATCH 9/9] fix: suppress spell sounds and melee swing for crafting/profession spells Crafting spells (bandages, smelting, etc.) were playing magic precast/ cast-complete audio and triggering melee weapon swing animations because they have physical school mask (1). Re-add isProfessionSpell check to skip spell sounds and melee animation for tradeskill spells. The character still plays the generic cast animation via spellCastAnimCallback. --- src/game/game_handler.cpp | 44 ++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/game/game_handler.cpp b/src/game/game_handler.cpp index 57488d72..65d7becc 100644 --- a/src/game/game_handler.cpp +++ b/src/game/game_handler.cpp @@ -19592,15 +19592,18 @@ void GameHandler::handleSpellStart(network::Packet& packet) { castTimeRemaining = castTimeTotal; if (addonEventCallback_) addonEventCallback_("CURRENT_SPELL_CAST_CHANGED", {}); - // Play precast sound with correct magic school (including crafting spells) - if (auto* renderer = core::Application::getInstance().getRenderer()) { - if (auto* ssm = renderer->getSpellSoundManager()) { - loadSpellNameCache(); - auto it = spellNameCache_.find(data.spellId); - auto school = (it != spellNameCache_.end() && it->second.schoolMask) - ? schoolMaskToMagicSchool(it->second.schoolMask) - : audio::SpellSoundManager::MagicSchool::ARCANE; - ssm->playPrecast(school, audio::SpellSoundManager::SpellPower::MEDIUM); + // Play precast sound — skip profession/tradeskill spells (they use crafting + // animations/sounds, not magic spell audio). + if (!isProfessionSpell(data.spellId)) { + if (auto* renderer = core::Application::getInstance().getRenderer()) { + if (auto* ssm = renderer->getSpellSoundManager()) { + loadSpellNameCache(); + auto it = spellNameCache_.find(data.spellId); + auto school = (it != spellNameCache_.end() && it->second.schoolMask) + ? schoolMaskToMagicSchool(it->second.schoolMask) + : audio::SpellSoundManager::MagicSchool::ARCANE; + ssm->playPrecast(school, audio::SpellSoundManager::SpellPower::MEDIUM); + } } } @@ -19636,25 +19639,28 @@ void GameHandler::handleSpellGo(network::Packet& packet) { // Cast completed if (data.casterUnit == playerGuid) { - // Play cast-complete sound with correct magic school (including crafting) - if (auto* renderer = core::Application::getInstance().getRenderer()) { - if (auto* ssm = renderer->getSpellSoundManager()) { - loadSpellNameCache(); - auto it = spellNameCache_.find(data.spellId); - auto school = (it != spellNameCache_.end() && it->second.schoolMask) - ? schoolMaskToMagicSchool(it->second.schoolMask) - : audio::SpellSoundManager::MagicSchool::ARCANE; - ssm->playCast(school); + // Play cast-complete sound — skip profession spells (no magic sound for crafting) + if (!isProfessionSpell(data.spellId)) { + if (auto* renderer = core::Application::getInstance().getRenderer()) { + if (auto* ssm = renderer->getSpellSoundManager()) { + loadSpellNameCache(); + auto it = spellNameCache_.find(data.spellId); + auto school = (it != spellNameCache_.end() && it->second.schoolMask) + ? schoolMaskToMagicSchool(it->second.schoolMask) + : audio::SpellSoundManager::MagicSchool::ARCANE; + ssm->playCast(school); + } } } // Instant melee abilities → trigger attack animation // Detect via physical school mask (1 = Physical) from the spell DBC cache. + // Skip profession spells — crafting should not swing weapons. // This covers warrior, rogue, DK, paladin, feral druid, and hunter melee // abilities generically instead of maintaining a brittle per-spell-ID list. uint32_t sid = data.spellId; bool isMeleeAbility = false; - { + if (!isProfessionSpell(sid)) { loadSpellNameCache(); auto cacheIt = spellNameCache_.find(sid); if (cacheIt != spellNameCache_.end() && cacheIt->second.schoolMask == 1) {