perf: use second GPU queue for parallel texture/buffer uploads

Request 2 queues from the graphics family when available (NVIDIA exposes 16, AMD 2+). Upload batches now submit to queue[1] while rendering uses queue[0], enabling parallel GPU transfers without queue-family ownership transfer barriers (same family). Falls back to single-queue path on GPUs with only 1 queue in the graphics family. Transfer command pool is separate to avoid contention.
2026-05-10 02:53:51 +00:00 · 2026-03-24 14:09:16 -07:00 · 2026-03-24 14:09:16 -07:00 · 1dd3823013
commit 1dd3823013
parent ed0cb0ad25
2 changed files with 144 additions and 21 deletions
--- a/include/rendering/vk_context.hpp
+++ b/include/rendering/vk_context.hpp
@ -78,6 +78,7 @@ public:
    bool isNvidiaGpu() const { return gpuVendorId_ == 0x10DE; }
    VkQueue getGraphicsQueue() const { return graphicsQueue; }
    uint32_t getGraphicsQueueFamily() const { return graphicsQueueFamily; }
+    bool hasDedicatedTransferQueue() const { return hasDedicatedTransfer_; }
    VmaAllocator getAllocator() const { return allocator; }
    VkSurfaceKHR getSurface() const { return surface; }
    VkPipelineCache getPipelineCache() const { return pipelineCache_; }
@ -175,6 +176,12 @@ private:
    uint32_t graphicsQueueFamily = 0;
    uint32_t presentQueueFamily = 0;

+    // Dedicated transfer queue (second queue from same graphics family)
+    VkQueue transferQueue_ = VK_NULL_HANDLE;
+    VkCommandPool transferCommandPool_ = VK_NULL_HANDLE;
+    bool hasDedicatedTransfer_ = false;
+    uint32_t graphicsQueueFamilyQueueCount_ = 1; // queried in selectPhysicalDevice
+
    // Swapchain
    VkSwapchainKHR swapchain = VK_NULL_HANDLE;
    VkFormat swapchainFormat = VK_FORMAT_UNDEFINED;