perf: use second GPU queue for parallel texture/buffer uploads

Request 2 queues from the graphics family when available (NVIDIA
exposes 16, AMD 2+). Upload batches now submit to queue[1] while
rendering uses queue[0], enabling parallel GPU transfers without
queue-family ownership transfer barriers (same family).

Falls back to single-queue path on GPUs with only 1 queue in the
graphics family. Transfer command pool is separate to avoid contention.
This commit is contained in:
Kelsi 2026-03-24 14:09:16 -07:00
parent ed0cb0ad25
commit 1dd3823013
2 changed files with 144 additions and 21 deletions

View file

@ -78,6 +78,7 @@ public:
bool isNvidiaGpu() const { return gpuVendorId_ == 0x10DE; }
VkQueue getGraphicsQueue() const { return graphicsQueue; }
uint32_t getGraphicsQueueFamily() const { return graphicsQueueFamily; }
bool hasDedicatedTransferQueue() const { return hasDedicatedTransfer_; }
VmaAllocator getAllocator() const { return allocator; }
VkSurfaceKHR getSurface() const { return surface; }
VkPipelineCache getPipelineCache() const { return pipelineCache_; }
@ -175,6 +176,12 @@ private:
uint32_t graphicsQueueFamily = 0;
uint32_t presentQueueFamily = 0;
// Dedicated transfer queue (second queue from same graphics family)
VkQueue transferQueue_ = VK_NULL_HANDLE;
VkCommandPool transferCommandPool_ = VK_NULL_HANDLE;
bool hasDedicatedTransfer_ = false;
uint32_t graphicsQueueFamilyQueueCount_ = 1; // queried in selectPhysicalDevice
// Swapchain
VkSwapchainKHR swapchain = VK_NULL_HANDLE;
VkFormat swapchainFormat = VK_FORMAT_UNDEFINED;