#define VMA_IMPLEMENTATION #include "rendering/vk_context.hpp" #include "core/logger.hpp" #include #include #include #include #include #include #include #include namespace wowee { namespace rendering { VkContext* VkContext::sInstance_ = nullptr; // Hash a VkSamplerCreateInfo into a 64-bit key for the sampler cache. // FNV-1a chosen for speed and low collision rate on small structured data. // Constants from: http://www.isthe.com/chongo/tech/comp/fnv/ static constexpr uint64_t kFnv1aOffsetBasis = 14695981039346656037ULL; static constexpr uint64_t kFnv1aPrime = 1099511628211ULL; static uint64_t hashSamplerCreateInfo(const VkSamplerCreateInfo& s) { uint64_t h = kFnv1aOffsetBasis; auto mix = [&](uint64_t v) { h ^= v; h *= kFnv1aPrime; }; mix(static_cast(s.minFilter)); mix(static_cast(s.magFilter)); mix(static_cast(s.mipmapMode)); mix(static_cast(s.addressModeU)); mix(static_cast(s.addressModeV)); mix(static_cast(s.addressModeW)); mix(static_cast(s.anisotropyEnable)); // Bit-cast floats to uint32_t for hashing uint32_t aniso; std::memcpy(&aniso, &s.maxAnisotropy, sizeof(aniso)); mix(static_cast(aniso)); uint32_t maxLodBits; std::memcpy(&maxLodBits, &s.maxLod, sizeof(maxLodBits)); mix(static_cast(maxLodBits)); uint32_t minLodBits; std::memcpy(&minLodBits, &s.minLod, sizeof(minLodBits)); mix(static_cast(minLodBits)); mix(static_cast(s.compareEnable)); mix(static_cast(s.compareOp)); mix(static_cast(s.borderColor)); uint32_t biasBits; std::memcpy(&biasBits, &s.mipLodBias, sizeof(biasBits)); mix(static_cast(biasBits)); mix(static_cast(s.unnormalizedCoordinates)); return h; } static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( VkDebugUtilsMessageSeverityFlagBitsEXT severity, [[maybe_unused]] VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* callbackData, [[maybe_unused]] void* userData) { if (severity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { LOG_ERROR("Vulkan: ", callbackData->pMessage); } else if (severity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { LOG_WARNING("Vulkan: ", callbackData->pMessage); } return VK_FALSE; } VkContext::~VkContext() { shutdown(); } bool VkContext::initialize(SDL_Window* window) { LOG_INFO("Initializing Vulkan context"); if (!createInstance(window)) return false; if (!createSurface(window)) return false; if (!selectPhysicalDevice()) return false; if (!createLogicalDevice()) return false; if (!createAllocator()) return false; // Pipeline cache: try to load from disk, fall back to empty cache. // Not fatal — if it fails we just skip caching. createPipelineCache(); int w, h; SDL_Vulkan_GetDrawableSize(window, &w, &h); if (!createSwapchain(w, h)) return false; if (!createCommandPools()) return false; if (!createSyncObjects()) return false; if (!createImGuiResources()) return false; // Query anisotropy support from the physical device. VkPhysicalDeviceFeatures supportedFeatures{}; vkGetPhysicalDeviceFeatures(physicalDevice, &supportedFeatures); samplerAnisotropySupported_ = (supportedFeatures.samplerAnisotropy == VK_TRUE); LOG_INFO("Sampler anisotropy supported: ", samplerAnisotropySupported_ ? "YES" : "NO"); sInstance_ = this; LOG_INFO("Vulkan context initialized successfully"); return true; } void VkContext::shutdown() { if (!device && !instance) return; // Already shut down or never initialized LOG_WARNING("VkContext::shutdown - vkDeviceWaitIdle..."); if (device) { vkDeviceWaitIdle(device); } // Clear deferred cleanup queues WITHOUT executing them. By this point the // sub-renderers (which own the descriptor pools/buffers these lambdas // reference) have already been destroyed, so running them would call // vkFreeDescriptorSets on invalid pools. vkDestroyDevice reclaims all // device-child resources anyway. for (uint32_t fi = 0; fi < MAX_FRAMES_IN_FLIGHT; fi++) { deferredCleanup_[fi].clear(); } LOG_WARNING("VkContext::shutdown - destroyImGuiResources..."); destroyImGuiResources(); // Destroy sync objects for (auto& frame : frames) { if (frame.inFlightFence) vkDestroyFence(device, frame.inFlightFence, nullptr); if (frame.commandPool) vkDestroyCommandPool(device, frame.commandPool, nullptr); frame = {}; } for (auto sem : imageAcquiredSemaphores_) { if (sem) vkDestroySemaphore(device, sem, nullptr); } imageAcquiredSemaphores_.clear(); for (auto sem : renderFinishedSemaphores_) { if (sem) vkDestroySemaphore(device, sem, nullptr); } renderFinishedSemaphores_.clear(); if (nextAcquireSemaphore_) { vkDestroySemaphore(device, nextAcquireSemaphore_, nullptr); nextAcquireSemaphore_ = VK_NULL_HANDLE; } // Clean up any in-flight async upload batches (device already idle) for (auto& batch : inFlightBatches_) { // Staging buffers: skip destroy — allocator is about to be torn down vkDestroyFence(device, batch.fence, nullptr); // Command buffer freed when pool is destroyed below } inFlightBatches_.clear(); if (immFence) { vkDestroyFence(device, immFence, nullptr); immFence = VK_NULL_HANDLE; } if (immCommandPool) { vkDestroyCommandPool(device, immCommandPool, nullptr); immCommandPool = VK_NULL_HANDLE; } if (transferCommandPool_) { vkDestroyCommandPool(device, transferCommandPool_, nullptr); transferCommandPool_ = VK_NULL_HANDLE; } // Persist pipeline cache to disk before tearing down the device. savePipelineCache(); if (pipelineCache_) { vkDestroyPipelineCache(device, pipelineCache_, nullptr); pipelineCache_ = VK_NULL_HANDLE; } // Destroy all cached samplers. for (auto& [key, sampler] : samplerCache_) { if (sampler) vkDestroySampler(device, sampler, nullptr); } samplerCache_.clear(); LOG_INFO("Sampler cache cleared"); sInstance_ = nullptr; LOG_WARNING("VkContext::shutdown - destroySwapchain..."); destroySwapchain(); // Skip vmaDestroyAllocator — it walks every allocation to free it, which // takes many seconds with thousands of loaded textures/models. The driver // reclaims all device memory when we destroy the device, and the OS reclaims // everything on process exit. Skipping this makes shutdown instant. allocator = VK_NULL_HANDLE; LOG_WARNING("VkContext::shutdown - vkDestroyDevice..."); if (device) { vkDestroyDevice(device, nullptr); device = VK_NULL_HANDLE; } if (surface) { vkDestroySurfaceKHR(instance, surface, nullptr); surface = VK_NULL_HANDLE; } if (debugMessenger) { auto func = reinterpret_cast( vkGetInstanceProcAddr(instance, "vkDestroyDebugUtilsMessengerEXT")); if (func) func(instance, debugMessenger, nullptr); debugMessenger = VK_NULL_HANDLE; } if (instance) { vkDestroyInstance(instance, nullptr); instance = VK_NULL_HANDLE; } LOG_WARNING("Vulkan context shutdown complete"); } void VkContext::deferAfterFrameFence(std::function&& fn) { deferredCleanup_[currentFrame].push_back(std::move(fn)); } void VkContext::deferAfterAllFrameFences(std::function&& fn) { // Shared resources (material descriptor sets, vertex/index buffers) are // bound by every in-flight frame's command buffer. deferAfterFrameFence // only waits for ONE slot's fence — the other slot may still be executing. // Add to every slot; a shared counter ensures the lambda runs exactly once, // after the LAST slot has been fenced. auto counter = std::make_shared(MAX_FRAMES_IN_FLIGHT); auto sharedFn = std::make_shared>(std::move(fn)); for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { deferredCleanup_[i].push_back([counter, sharedFn]() { if (--(*counter) == 0) { (*sharedFn)(); } }); } } void VkContext::runDeferredCleanup(uint32_t frameIndex) { auto& q = deferredCleanup_[frameIndex]; if (q.empty()) return; for (auto& fn : q) { if (fn) fn(); } q.clear(); } VkSampler VkContext::getOrCreateSampler(const VkSamplerCreateInfo& info) { // Clamp anisotropy if the device doesn't support the feature. VkSamplerCreateInfo adjusted = info; if (!samplerAnisotropySupported_) { adjusted.anisotropyEnable = VK_FALSE; adjusted.maxAnisotropy = 1.0f; } uint64_t key = hashSamplerCreateInfo(adjusted); { std::lock_guard lock(samplerCacheMutex_); auto it = samplerCache_.find(key); if (it != samplerCache_.end()) { return it->second; } } // Create a new sampler outside the lock (vkCreateSampler is thread-safe // for distinct create infos, but we re-lock to insert). VkSampler sampler = VK_NULL_HANDLE; if (vkCreateSampler(device, &adjusted, nullptr, &sampler) != VK_SUCCESS) { LOG_ERROR("getOrCreateSampler: vkCreateSampler failed"); return VK_NULL_HANDLE; } { std::lock_guard lock(samplerCacheMutex_); // Double-check: another thread may have inserted while we were creating. auto [it, inserted] = samplerCache_.emplace(key, sampler); if (!inserted) { // Another thread won the race — destroy our duplicate and use theirs. vkDestroySampler(device, sampler, nullptr); return it->second; } } return sampler; } bool VkContext::createInstance(SDL_Window* window) { // Get required SDL extensions unsigned int sdlExtCount = 0; SDL_Vulkan_GetInstanceExtensions(window, &sdlExtCount, nullptr); std::vector sdlExts(sdlExtCount); SDL_Vulkan_GetInstanceExtensions(window, &sdlExtCount, sdlExts.data()); vkb::InstanceBuilder builder; builder.set_app_name("Wowee") .set_app_version(VK_MAKE_VERSION(1, 0, 0)) .require_api_version(1, 1, 0); for (auto ext : sdlExts) { builder.enable_extension(ext); } if (enableValidation) { builder.request_validation_layers(true) .set_debug_callback(debugCallback); } auto instRet = builder.build(); if (!instRet) { LOG_ERROR("Failed to create Vulkan instance: ", instRet.error().message()); return false; } vkbInstance_ = instRet.value(); instance = vkbInstance_.instance; debugMessenger = vkbInstance_.debug_messenger; LOG_INFO("Vulkan instance created"); return true; } bool VkContext::createSurface(SDL_Window* window) { if (!SDL_Vulkan_CreateSurface(window, instance, &surface)) { LOG_ERROR("Failed to create Vulkan surface: ", SDL_GetError()); return false; } return true; } bool VkContext::selectPhysicalDevice() { vkb::PhysicalDeviceSelector selector{vkbInstance_}; VkPhysicalDeviceFeatures requiredFeatures{}; requiredFeatures.samplerAnisotropy = VK_TRUE; requiredFeatures.fillModeNonSolid = VK_TRUE; // wireframe debug pipelines selector.set_surface(surface) .set_minimum_version(1, 1) .set_required_features(requiredFeatures) .prefer_gpu_device_type(vkb::PreferredDeviceType::discrete); auto physRet = selector.select(); if (!physRet) { LOG_ERROR("Failed to select Vulkan physical device: ", physRet.error().message()); return false; } vkbPhysicalDevice_ = physRet.value(); physicalDevice = vkbPhysicalDevice_.physical_device; VkPhysicalDeviceProperties props; vkGetPhysicalDeviceProperties(physicalDevice, &props); uint32_t apiVersion = props.apiVersion; gpuVendorId_ = props.vendorID; std::strncpy(gpuName_, props.deviceName, sizeof(gpuName_) - 1); gpuName_[sizeof(gpuName_) - 1] = '\0'; LOG_INFO("GPU: ", gpuName_, " (vendor 0x", std::hex, gpuVendorId_, std::dec, ")"); VkPhysicalDeviceDepthStencilResolveProperties dsResolveProps{}; dsResolveProps.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES; VkPhysicalDeviceProperties2 props2{}; props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; props2.pNext = &dsResolveProps; vkGetPhysicalDeviceProperties2(physicalDevice, &props2); if (apiVersion >= VK_API_VERSION_1_2) { VkResolveModeFlags modes = dsResolveProps.supportedDepthResolveModes; if (modes & VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) { depthResolveMode_ = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; depthResolveSupported_ = true; } else if (modes & VK_RESOLVE_MODE_MIN_BIT) { depthResolveMode_ = VK_RESOLVE_MODE_MIN_BIT; depthResolveSupported_ = true; } else if (modes & VK_RESOLVE_MODE_MAX_BIT) { depthResolveMode_ = VK_RESOLVE_MODE_MAX_BIT; depthResolveSupported_ = true; } else if (modes & VK_RESOLVE_MODE_AVERAGE_BIT) { depthResolveMode_ = VK_RESOLVE_MODE_AVERAGE_BIT; depthResolveSupported_ = true; } } else { depthResolveSupported_ = false; depthResolveMode_ = VK_RESOLVE_MODE_NONE; } LOG_INFO("Vulkan device: ", props.deviceName); LOG_INFO("Vulkan API version: ", VK_VERSION_MAJOR(props.apiVersion), ".", VK_VERSION_MINOR(props.apiVersion), ".", VK_VERSION_PATCH(props.apiVersion)); LOG_INFO("Depth resolve support: ", depthResolveSupported_ ? "YES" : "NO"); // Probe queue families to see if the graphics family supports multiple queues // (used in createLogicalDevice to request a second queue for parallel uploads). auto queueFamilies = vkbPhysicalDevice_.get_queue_families(); for (uint32_t i = 0; i < static_cast(queueFamilies.size()); i++) { if (queueFamilies[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { graphicsQueueFamilyQueueCount_ = queueFamilies[i].queueCount; LOG_INFO("Graphics queue family ", i, " supports ", graphicsQueueFamilyQueueCount_, " queue(s)"); break; } } return true; } bool VkContext::createLogicalDevice() { vkb::DeviceBuilder deviceBuilder{vkbPhysicalDevice_}; // If the graphics queue family supports >= 2 queues, request a second one // for parallel texture/buffer uploads. Both queues share the same family // so no queue-ownership-transfer barriers are needed. const bool requestTransferQueue = (graphicsQueueFamilyQueueCount_ >= 2); if (requestTransferQueue) { // Build a custom queue description list: 2 queues from the graphics // family, 1 queue from every other family (so present etc. still work). auto families = vkbPhysicalDevice_.get_queue_families(); uint32_t gfxFamily = UINT32_MAX; for (uint32_t i = 0; i < static_cast(families.size()); i++) { if (families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { gfxFamily = i; break; } } std::vector queueDescs; for (uint32_t i = 0; i < static_cast(families.size()); i++) { if (i == gfxFamily) { // Request 2 queues: [0] graphics, [1] transfer uploads queueDescs.emplace_back(i, std::vector{1.0f, 1.0f}); } else { queueDescs.emplace_back(i, std::vector{1.0f}); } } deviceBuilder.custom_queue_setup(queueDescs); } auto devRet = deviceBuilder.build(); if (!devRet) { LOG_ERROR("Failed to create Vulkan logical device: ", devRet.error().message()); return false; } auto vkbDevice = devRet.value(); device = vkbDevice.device; if (requestTransferQueue) { // With custom_queue_setup, we must retrieve queues manually. auto families = vkbPhysicalDevice_.get_queue_families(); uint32_t gfxFamily = UINT32_MAX; for (uint32_t i = 0; i < static_cast(families.size()); i++) { if (families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { gfxFamily = i; break; } } graphicsQueueFamily = gfxFamily; vkGetDeviceQueue(device, gfxFamily, 0, &graphicsQueue); vkGetDeviceQueue(device, gfxFamily, 1, &transferQueue_); hasDedicatedTransfer_ = true; // Present queue: try the graphics family first (most common), otherwise // find a family that supports presentation. presentQueue = graphicsQueue; presentQueueFamily = gfxFamily; LOG_INFO("Dedicated transfer queue enabled (family ", gfxFamily, ", queue index 1)"); } else { // Standard path — let vkb resolve queues. auto gqRet = vkbDevice.get_queue(vkb::QueueType::graphics); if (!gqRet) { LOG_ERROR("Failed to get graphics queue"); return false; } graphicsQueue = gqRet.value(); graphicsQueueFamily = vkbDevice.get_queue_index(vkb::QueueType::graphics).value(); auto pqRet = vkbDevice.get_queue(vkb::QueueType::present); if (!pqRet) { presentQueue = graphicsQueue; presentQueueFamily = graphicsQueueFamily; } else { presentQueue = pqRet.value(); presentQueueFamily = vkbDevice.get_queue_index(vkb::QueueType::present).value(); } } LOG_INFO("Vulkan logical device created"); return true; } bool VkContext::createAllocator() { VmaAllocatorCreateInfo allocInfo{}; allocInfo.instance = instance; allocInfo.physicalDevice = physicalDevice; allocInfo.device = device; allocInfo.vulkanApiVersion = VK_API_VERSION_1_1; if (vmaCreateAllocator(&allocInfo, &allocator) != VK_SUCCESS) { LOG_ERROR("Failed to create VMA allocator"); return false; } LOG_INFO("VMA allocator created"); return true; } // --------------------------------------------------------------------------- // Pipeline cache persistence // --------------------------------------------------------------------------- static std::string getPipelineCachePath() { #ifdef _WIN32 if (const char* appdata = std::getenv("APPDATA")) return std::string(appdata) + "\\wowee\\pipeline_cache.bin"; return ".\\pipeline_cache.bin"; #elif defined(__APPLE__) if (const char* home = std::getenv("HOME")) return std::string(home) + "/Library/Caches/wowee/pipeline_cache.bin"; return "./pipeline_cache.bin"; #else if (const char* home = std::getenv("HOME")) return std::string(home) + "/.local/share/wowee/pipeline_cache.bin"; return "./pipeline_cache.bin"; #endif } bool VkContext::createPipelineCache() { // NVIDIA drivers have their own built-in pipeline/shader disk cache. // Using VkPipelineCache on NVIDIA 590.x causes vkCmdBeginRenderPass to // SIGSEGV inside libnvidia-glcore — skip entirely on NVIDIA GPUs. if (gpuVendorId_ == 0x10DE) { LOG_INFO("Pipeline cache: skipped (NVIDIA driver provides built-in caching)"); return true; } std::string path = getPipelineCachePath(); // Try to load existing cache data from disk. std::vector cacheData; { std::ifstream file(path, std::ios::binary | std::ios::ate); if (file.is_open()) { auto size = file.tellg(); if (size > 0) { cacheData.resize(static_cast(size)); file.seekg(0); file.read(cacheData.data(), size); if (!file) { LOG_WARNING("Pipeline cache file read failed, starting with empty cache"); cacheData.clear(); } } } } VkPipelineCacheCreateInfo cacheCI{}; cacheCI.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; cacheCI.initialDataSize = cacheData.size(); cacheCI.pInitialData = cacheData.empty() ? nullptr : cacheData.data(); VkResult result = vkCreatePipelineCache(device, &cacheCI, nullptr, &pipelineCache_); if (result != VK_SUCCESS) { // If loading stale/corrupt data caused failure, retry with empty cache. if (!cacheData.empty()) { LOG_WARNING("Pipeline cache creation failed with saved data, retrying empty"); cacheCI.initialDataSize = 0; cacheCI.pInitialData = nullptr; result = vkCreatePipelineCache(device, &cacheCI, nullptr, &pipelineCache_); } if (result != VK_SUCCESS) { LOG_WARNING("Pipeline cache creation failed — pipelines will not be cached"); pipelineCache_ = VK_NULL_HANDLE; return false; } } if (!cacheData.empty()) { LOG_INFO("Pipeline cache loaded from disk (", cacheData.size(), " bytes)"); } else { LOG_INFO("Pipeline cache created (empty)"); } return true; } void VkContext::savePipelineCache() { if (!pipelineCache_ || !device) return; size_t dataSize = 0; if (vkGetPipelineCacheData(device, pipelineCache_, &dataSize, nullptr) != VK_SUCCESS || dataSize == 0) { LOG_WARNING("Failed to query pipeline cache size"); return; } std::vector data(dataSize); if (vkGetPipelineCacheData(device, pipelineCache_, &dataSize, data.data()) != VK_SUCCESS) { LOG_WARNING("Failed to retrieve pipeline cache data"); return; } std::string path = getPipelineCachePath(); std::filesystem::create_directories(std::filesystem::path(path).parent_path()); std::ofstream file(path, std::ios::binary | std::ios::trunc); if (!file.is_open()) { LOG_WARNING("Failed to open pipeline cache file for writing: ", path); return; } file.write(data.data(), static_cast(dataSize)); file.close(); LOG_INFO("Pipeline cache saved to disk (", dataSize, " bytes)"); } bool VkContext::createSwapchain(int width, int height) { vkb::SwapchainBuilder swapchainBuilder{physicalDevice, device, surface}; auto& builder = swapchainBuilder .set_desired_format({VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}) .set_desired_extent(static_cast(width), static_cast(height)) .set_image_usage_flags(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) .set_desired_min_image_count(2) .set_old_swapchain(swapchain); if (vsync_) { builder.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR); } else { builder.set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR); builder.add_fallback_present_mode(VK_PRESENT_MODE_MAILBOX_KHR); builder.add_fallback_present_mode(VK_PRESENT_MODE_FIFO_RELAXED_KHR); } auto swapRet = builder.build(); if (!swapRet) { LOG_ERROR("Failed to create Vulkan swapchain: ", swapRet.error().message()); return false; } // Destroy old swapchain if recreating if (swapchain != VK_NULL_HANDLE) { destroySwapchain(); } auto vkbSwap = swapRet.value(); swapchain = vkbSwap.swapchain; swapchainFormat = vkbSwap.image_format; swapchainExtent = vkbSwap.extent; swapchainImages = vkbSwap.get_images().value(); swapchainImageViews = vkbSwap.get_image_views().value(); // Create framebuffers for ImGui render pass (created after ImGui resources) // Will be created in createImGuiResources or recreateSwapchain LOG_INFO("Vulkan swapchain created: ", swapchainExtent.width, "x", swapchainExtent.height, " (", swapchainImages.size(), " images)"); swapchainDirty = false; return true; } void VkContext::destroySwapchain() { for (auto fb : swapchainFramebuffers) { if (fb) vkDestroyFramebuffer(device, fb, nullptr); } swapchainFramebuffers.clear(); for (auto iv : swapchainImageViews) { if (iv) vkDestroyImageView(device, iv, nullptr); } swapchainImageViews.clear(); swapchainImages.clear(); if (swapchain) { vkDestroySwapchainKHR(device, swapchain, nullptr); swapchain = VK_NULL_HANDLE; } } bool VkContext::createCommandPools() { // Per-frame command pools (resettable) for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { VkCommandPoolCreateInfo poolInfo{}; poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; poolInfo.queueFamilyIndex = graphicsQueueFamily; if (vkCreateCommandPool(device, &poolInfo, nullptr, &frames[i].commandPool) != VK_SUCCESS) { LOG_ERROR("Failed to create command pool for frame ", i); return false; } VkCommandBufferAllocateInfo allocInfo{}; allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocInfo.commandPool = frames[i].commandPool; allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocInfo.commandBufferCount = 1; if (vkAllocateCommandBuffers(device, &allocInfo, &frames[i].commandBuffer) != VK_SUCCESS) { LOG_ERROR("Failed to allocate command buffer for frame ", i); return false; } } // Immediate submit pool VkCommandPoolCreateInfo immPoolInfo{}; immPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; immPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; immPoolInfo.queueFamilyIndex = graphicsQueueFamily; if (vkCreateCommandPool(device, &immPoolInfo, nullptr, &immCommandPool) != VK_SUCCESS) { LOG_ERROR("Failed to create immediate command pool"); return false; } // Separate command pool for the transfer queue (same family, different queue) if (hasDedicatedTransfer_) { VkCommandPoolCreateInfo transferPoolInfo{}; transferPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; transferPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; transferPoolInfo.queueFamilyIndex = graphicsQueueFamily; if (vkCreateCommandPool(device, &transferPoolInfo, nullptr, &transferCommandPool_) != VK_SUCCESS) { LOG_ERROR("Failed to create transfer command pool"); return false; } } return true; } bool VkContext::createSyncObjects() { VkSemaphoreCreateInfo semInfo{}; semInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; VkFenceCreateInfo fenceInfo{}; fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; // Start signaled so first frame doesn't block for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { if (vkCreateFence(device, &fenceInfo, nullptr, &frames[i].inFlightFence) != VK_SUCCESS) { LOG_ERROR("Failed to create sync objects for frame ", i); return false; } } // Per-swapchain-image semaphores: avoids reuse while the presentation engine // still holds a reference. After acquiring image N we swap the acquire semaphore // into imageAcquiredSemaphores_[N], recycling the old one for the next acquire. const uint32_t imgCount = static_cast(swapchainImages.size()); imageAcquiredSemaphores_.resize(imgCount); renderFinishedSemaphores_.resize(imgCount); for (uint32_t i = 0; i < imgCount; i++) { if (vkCreateSemaphore(device, &semInfo, nullptr, &imageAcquiredSemaphores_[i]) != VK_SUCCESS || vkCreateSemaphore(device, &semInfo, nullptr, &renderFinishedSemaphores_[i]) != VK_SUCCESS) { LOG_ERROR("Failed to create per-image semaphores for image ", i); return false; } } // One extra acquire semaphore — we need it for the next vkAcquireNextImageKHR // before we know which image we'll get. if (vkCreateSemaphore(device, &semInfo, nullptr, &nextAcquireSemaphore_) != VK_SUCCESS) { LOG_ERROR("Failed to create next-acquire semaphore"); return false; } // Immediate submit fence (not signaled initially) VkFenceCreateInfo immFenceInfo{}; immFenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; if (vkCreateFence(device, &immFenceInfo, nullptr, &immFence) != VK_SUCCESS) { LOG_ERROR("Failed to create immediate submit fence"); return false; } return true; } bool VkContext::createDepthBuffer() { VkImageCreateInfo imgInfo{}; imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; imgInfo.imageType = VK_IMAGE_TYPE_2D; imgInfo.format = depthFormat; imgInfo.extent = {swapchainExtent.width, swapchainExtent.height, 1}; imgInfo.mipLevels = 1; imgInfo.arrayLayers = 1; imgInfo.samples = msaaSamples_; imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL; imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo allocInfo{}; allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; if (vmaCreateImage(allocator, &imgInfo, &allocInfo, &depthImage, &depthAllocation, nullptr) != VK_SUCCESS) { LOG_ERROR("Failed to create depth image"); return false; } VkImageViewCreateInfo viewInfo{}; viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; viewInfo.image = depthImage; viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; viewInfo.format = depthFormat; viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; viewInfo.subresourceRange.levelCount = 1; viewInfo.subresourceRange.layerCount = 1; if (vkCreateImageView(device, &viewInfo, nullptr, &depthImageView) != VK_SUCCESS) { LOG_ERROR("Failed to create depth image view"); return false; } return true; } void VkContext::destroyDepthBuffer() { if (depthImageView) { vkDestroyImageView(device, depthImageView, nullptr); depthImageView = VK_NULL_HANDLE; } if (depthImage) { vmaDestroyImage(allocator, depthImage, depthAllocation); depthImage = VK_NULL_HANDLE; depthAllocation = VK_NULL_HANDLE; } } bool VkContext::createMsaaColorImage() { if (msaaSamples_ == VK_SAMPLE_COUNT_1_BIT) return true; // No MSAA image needed // Check if lazily allocated memory is available — only use TRANSIENT when it is. // AMD GPUs (especially RDNA4) don't expose lazily allocated memory; using TRANSIENT // without it can cause the driver to optimize for tile-only storage, leading to // crashes during MSAA resolve when the backing memory was never populated. bool hasLazyMemory = false; VkPhysicalDeviceMemoryProperties memProps; vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProps); for (uint32_t i = 0; i < memProps.memoryTypeCount; i++) { if (memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) { hasLazyMemory = true; break; } } VkImageCreateInfo imgInfo{}; imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; imgInfo.imageType = VK_IMAGE_TYPE_2D; imgInfo.format = swapchainFormat; imgInfo.extent = {swapchainExtent.width, swapchainExtent.height, 1}; imgInfo.mipLevels = 1; imgInfo.arrayLayers = 1; imgInfo.samples = msaaSamples_; imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL; VmaAllocationCreateInfo allocInfo{}; allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; if (hasLazyMemory) { imgInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT; allocInfo.preferredFlags = VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; } else { imgInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; } if (vmaCreateImage(allocator, &imgInfo, &allocInfo, &msaaColorImage_, &msaaColorAllocation_, nullptr) != VK_SUCCESS) { // Retry without TRANSIENT (some drivers reject it at high sample counts) imgInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; allocInfo.preferredFlags = 0; if (vmaCreateImage(allocator, &imgInfo, &allocInfo, &msaaColorImage_, &msaaColorAllocation_, nullptr) != VK_SUCCESS) { LOG_ERROR("Failed to create MSAA color image"); return false; } } VkImageViewCreateInfo viewInfo{}; viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; viewInfo.image = msaaColorImage_; viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; viewInfo.format = swapchainFormat; viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; viewInfo.subresourceRange.levelCount = 1; viewInfo.subresourceRange.layerCount = 1; if (vkCreateImageView(device, &viewInfo, nullptr, &msaaColorView_) != VK_SUCCESS) { LOG_ERROR("Failed to create MSAA color image view"); return false; } return true; } void VkContext::destroyMsaaColorImage() { if (msaaColorView_) { vkDestroyImageView(device, msaaColorView_, nullptr); msaaColorView_ = VK_NULL_HANDLE; } if (msaaColorImage_) { vmaDestroyImage(allocator, msaaColorImage_, msaaColorAllocation_); msaaColorImage_ = VK_NULL_HANDLE; msaaColorAllocation_ = VK_NULL_HANDLE; } } bool VkContext::createDepthResolveImage() { if (msaaSamples_ == VK_SAMPLE_COUNT_1_BIT || !depthResolveSupported_) return true; VkImageCreateInfo imgInfo{}; imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; imgInfo.imageType = VK_IMAGE_TYPE_2D; imgInfo.format = depthFormat; imgInfo.extent = {swapchainExtent.width, swapchainExtent.height, 1}; imgInfo.mipLevels = 1; imgInfo.arrayLayers = 1; imgInfo.samples = VK_SAMPLE_COUNT_1_BIT; imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL; imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo allocInfo{}; allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; if (vmaCreateImage(allocator, &imgInfo, &allocInfo, &depthResolveImage, &depthResolveAllocation, nullptr) != VK_SUCCESS) { LOG_ERROR("Failed to create depth resolve image"); return false; } VkImageViewCreateInfo viewInfo{}; viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; viewInfo.image = depthResolveImage; viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; viewInfo.format = depthFormat; viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; viewInfo.subresourceRange.levelCount = 1; viewInfo.subresourceRange.layerCount = 1; if (vkCreateImageView(device, &viewInfo, nullptr, &depthResolveImageView) != VK_SUCCESS) { LOG_ERROR("Failed to create depth resolve image view"); return false; } return true; } void VkContext::destroyDepthResolveImage() { if (depthResolveImageView) { vkDestroyImageView(device, depthResolveImageView, nullptr); depthResolveImageView = VK_NULL_HANDLE; } if (depthResolveImage) { vmaDestroyImage(allocator, depthResolveImage, depthResolveAllocation); depthResolveImage = VK_NULL_HANDLE; depthResolveAllocation = VK_NULL_HANDLE; } } VkSampleCountFlagBits VkContext::getMaxUsableSampleCount() const { VkPhysicalDeviceProperties props; vkGetPhysicalDeviceProperties(physicalDevice, &props); VkSampleCountFlags counts = props.limits.framebufferColorSampleCounts & props.limits.framebufferDepthSampleCounts; if (counts & VK_SAMPLE_COUNT_8_BIT) return VK_SAMPLE_COUNT_8_BIT; if (counts & VK_SAMPLE_COUNT_4_BIT) return VK_SAMPLE_COUNT_4_BIT; if (counts & VK_SAMPLE_COUNT_2_BIT) return VK_SAMPLE_COUNT_2_BIT; return VK_SAMPLE_COUNT_1_BIT; } void VkContext::setMsaaSamples(VkSampleCountFlagBits samples) { // Clamp to max supported VkSampleCountFlagBits maxSamples = getMaxUsableSampleCount(); if (samples > maxSamples) samples = maxSamples; msaaSamples_ = samples; swapchainDirty = true; } bool VkContext::createImGuiResources() { // Create depth buffer first if (!createDepthBuffer()) return false; // Create MSAA color image if needed if (!createMsaaColorImage()) return false; // Create single-sample depth resolve image for MSAA path (if supported) if (!createDepthResolveImage()) return false; bool useMsaa = (msaaSamples_ > VK_SAMPLE_COUNT_1_BIT); if (useMsaa) { const bool useDepthResolve = (depthResolveImageView != VK_NULL_HANDLE); // MSAA render pass: 3 or 4 attachments VkAttachmentDescription attachments[4] = {}; // Attachment 0: MSAA color target attachments[0].format = swapchainFormat; attachments[0].samples = msaaSamples_; attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; // Attachment 1: Depth (multisampled) attachments[1].format = depthFormat; attachments[1].samples = msaaSamples_; attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; // Attachment 2: Resolve target (swapchain image) attachments[2].format = swapchainFormat; attachments[2].samples = VK_SAMPLE_COUNT_1_BIT; attachments[2].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[2].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[2].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[2].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[2].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[2].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; if (useDepthResolve) { attachments[3].format = depthFormat; attachments[3].samples = VK_SAMPLE_COUNT_1_BIT; attachments[3].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[3].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[3].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[3].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[3].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[3].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; } if (useDepthResolve) { VkAttachmentDescription2 attachments2[4]{}; for (int i = 0; i < 4; ++i) { attachments2[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; attachments2[i].format = attachments[i].format; attachments2[i].samples = attachments[i].samples; attachments2[i].loadOp = attachments[i].loadOp; attachments2[i].storeOp = attachments[i].storeOp; attachments2[i].stencilLoadOp = attachments[i].stencilLoadOp; attachments2[i].stencilStoreOp = attachments[i].stencilStoreOp; attachments2[i].initialLayout = attachments[i].initialLayout; attachments2[i].finalLayout = attachments[i].finalLayout; } VkAttachmentReference2 colorRef2{}; colorRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; colorRef2.attachment = 0; colorRef2.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentReference2 depthRef2{}; depthRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; depthRef2.attachment = 1; depthRef2.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkAttachmentReference2 resolveRef2{}; resolveRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; resolveRef2.attachment = 2; resolveRef2.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentReference2 depthResolveRef2{}; depthResolveRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; depthResolveRef2.attachment = 3; depthResolveRef2.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkSubpassDescriptionDepthStencilResolve dsResolve{}; dsResolve.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE; dsResolve.depthResolveMode = depthResolveMode_; dsResolve.stencilResolveMode = VK_RESOLVE_MODE_NONE; dsResolve.pDepthStencilResolveAttachment = &depthResolveRef2; VkSubpassDescription2 subpass2{}; subpass2.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2; subpass2.pNext = &dsResolve; subpass2.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass2.colorAttachmentCount = 1; subpass2.pColorAttachments = &colorRef2; subpass2.pDepthStencilAttachment = &depthRef2; subpass2.pResolveAttachments = &resolveRef2; VkSubpassDependency2 dep2{}; dep2.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2; dep2.srcSubpass = VK_SUBPASS_EXTERNAL; dep2.dstSubpass = 0; dep2.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dep2.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dep2.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; VkRenderPassCreateInfo2 rpInfo2{}; rpInfo2.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2; rpInfo2.attachmentCount = 4; rpInfo2.pAttachments = attachments2; rpInfo2.subpassCount = 1; rpInfo2.pSubpasses = &subpass2; rpInfo2.dependencyCount = 1; rpInfo2.pDependencies = &dep2; if (vkCreateRenderPass2(device, &rpInfo2, nullptr, &imguiRenderPass) != VK_SUCCESS) { LOG_ERROR("Failed to create MSAA render pass (depth resolve)"); return false; } } else { VkAttachmentReference colorRef{}; colorRef.attachment = 0; colorRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentReference depthRef{}; depthRef.attachment = 1; depthRef.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkAttachmentReference resolveRef{}; resolveRef.attachment = 2; resolveRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkSubpassDescription subpass{}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &colorRef; subpass.pDepthStencilAttachment = &depthRef; subpass.pResolveAttachments = &resolveRef; VkSubpassDependency dependency{}; dependency.srcSubpass = VK_SUBPASS_EXTERNAL; dependency.dstSubpass = 0; dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dependency.srcAccessMask = 0; dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; VkRenderPassCreateInfo rpInfo{}; rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; rpInfo.attachmentCount = 3; rpInfo.pAttachments = attachments; rpInfo.subpassCount = 1; rpInfo.pSubpasses = &subpass; rpInfo.dependencyCount = 1; rpInfo.pDependencies = &dependency; if (vkCreateRenderPass(device, &rpInfo, nullptr, &imguiRenderPass) != VK_SUCCESS) { LOG_ERROR("Failed to create MSAA render pass"); return false; } } // Framebuffers: [msaaColorView, depthView, swapchainView, depthResolveView?] swapchainFramebuffers.resize(swapchainImageViews.size()); for (size_t i = 0; i < swapchainImageViews.size(); i++) { VkImageView fbAttachments[4] = {msaaColorView_, depthImageView, swapchainImageViews[i], depthResolveImageView}; VkFramebufferCreateInfo fbInfo{}; fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fbInfo.renderPass = imguiRenderPass; fbInfo.attachmentCount = useDepthResolve ? 4 : 3; fbInfo.pAttachments = fbAttachments; fbInfo.width = swapchainExtent.width; fbInfo.height = swapchainExtent.height; fbInfo.layers = 1; if (vkCreateFramebuffer(device, &fbInfo, nullptr, &swapchainFramebuffers[i]) != VK_SUCCESS) { LOG_ERROR("Failed to create MSAA swapchain framebuffer ", i); return false; } } } else { // Non-MSAA render pass: 2 attachments (color + depth) — original path VkAttachmentDescription attachments[2] = {}; // Color attachment (swapchain image) attachments[0].format = swapchainFormat; attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[0].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; // Depth attachment attachments[1].format = depthFormat; attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkAttachmentReference colorRef{}; colorRef.attachment = 0; colorRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentReference depthRef{}; depthRef.attachment = 1; depthRef.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkSubpassDescription subpass{}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &colorRef; subpass.pDepthStencilAttachment = &depthRef; VkSubpassDependency dependency{}; dependency.srcSubpass = VK_SUBPASS_EXTERNAL; dependency.dstSubpass = 0; dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dependency.srcAccessMask = 0; dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; VkRenderPassCreateInfo rpInfo{}; rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; rpInfo.attachmentCount = 2; rpInfo.pAttachments = attachments; rpInfo.subpassCount = 1; rpInfo.pSubpasses = &subpass; rpInfo.dependencyCount = 1; rpInfo.pDependencies = &dependency; if (vkCreateRenderPass(device, &rpInfo, nullptr, &imguiRenderPass) != VK_SUCCESS) { LOG_ERROR("Failed to create render pass"); return false; } // Framebuffers: [swapchainView, depthView] swapchainFramebuffers.resize(swapchainImageViews.size()); for (size_t i = 0; i < swapchainImageViews.size(); i++) { VkImageView fbAttachments[2] = {swapchainImageViews[i], depthImageView}; VkFramebufferCreateInfo fbInfo{}; fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fbInfo.renderPass = imguiRenderPass; fbInfo.attachmentCount = 2; fbInfo.pAttachments = fbAttachments; fbInfo.width = swapchainExtent.width; fbInfo.height = swapchainExtent.height; fbInfo.layers = 1; if (vkCreateFramebuffer(device, &fbInfo, nullptr, &swapchainFramebuffers[i]) != VK_SUCCESS) { LOG_ERROR("Failed to create swapchain framebuffer ", i); return false; } } } // Create descriptor pool for ImGui. // Budget: ~10 internal ImGui sets + up to 2000 UI icon textures (spells, // items, talents, buffs, etc.) that are uploaded and cached for the session. static constexpr uint32_t IMGUI_POOL_SIZE = 2048; VkDescriptorPoolSize poolSizes[] = { {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, IMGUI_POOL_SIZE}, }; VkDescriptorPoolCreateInfo dpInfo{}; dpInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; dpInfo.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; dpInfo.maxSets = IMGUI_POOL_SIZE; dpInfo.poolSizeCount = 1; dpInfo.pPoolSizes = poolSizes; if (vkCreateDescriptorPool(device, &dpInfo, nullptr, &imguiDescriptorPool) != VK_SUCCESS) { LOG_ERROR("Failed to create ImGui descriptor pool"); return false; } return true; } void VkContext::destroyImGuiResources() { // Destroy uploaded UI textures for (auto& tex : uiTextures_) { if (tex.view) vkDestroyImageView(device, tex.view, nullptr); if (tex.image) vkDestroyImage(device, tex.image, nullptr); if (tex.memory) vkFreeMemory(device, tex.memory, nullptr); } uiTextures_.clear(); uiTextureSampler_ = VK_NULL_HANDLE; // Owned by sampler cache if (imguiDescriptorPool) { vkDestroyDescriptorPool(device, imguiDescriptorPool, nullptr); imguiDescriptorPool = VK_NULL_HANDLE; } destroyMsaaColorImage(); destroyDepthResolveImage(); destroyDepthBuffer(); // Framebuffers are destroyed in destroySwapchain() if (imguiRenderPass) { vkDestroyRenderPass(device, imguiRenderPass, nullptr); imguiRenderPass = VK_NULL_HANDLE; } } static uint32_t findMemType(VkPhysicalDevice physDev, uint32_t typeFilter, VkMemoryPropertyFlags props) { VkPhysicalDeviceMemoryProperties memProps; vkGetPhysicalDeviceMemoryProperties(physDev, &memProps); for (uint32_t i = 0; i < memProps.memoryTypeCount; i++) { if ((typeFilter & (1 << i)) && (memProps.memoryTypes[i].propertyFlags & props) == props) return i; } LOG_ERROR("VkContext: no suitable memory type found"); return UINT32_MAX; } VkDescriptorSet VkContext::uploadImGuiTexture(const uint8_t* rgba, int width, int height) { if (!device || !physicalDevice || width <= 0 || height <= 0 || !rgba) return VK_NULL_HANDLE; VkDeviceSize imageSize = static_cast(width) * height * 4; // Create shared sampler on first call (via sampler cache) if (!uiTextureSampler_) { VkSamplerCreateInfo si{}; si.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; si.magFilter = VK_FILTER_LINEAR; si.minFilter = VK_FILTER_LINEAR; si.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; si.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; si.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; uiTextureSampler_ = getOrCreateSampler(si); if (!uiTextureSampler_) { LOG_ERROR("Failed to create UI texture sampler"); return VK_NULL_HANDLE; } } // Staging buffer VkBuffer stagingBuffer; VkDeviceMemory stagingMemory; { VkBufferCreateInfo bufInfo{}; bufInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bufInfo.size = imageSize; bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; bufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; if (vkCreateBuffer(device, &bufInfo, nullptr, &stagingBuffer) != VK_SUCCESS) return VK_NULL_HANDLE; VkMemoryRequirements memReqs; vkGetBufferMemoryRequirements(device, stagingBuffer, &memReqs); VkMemoryAllocateInfo allocInfo{}; allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; allocInfo.allocationSize = memReqs.size; allocInfo.memoryTypeIndex = findMemType(physicalDevice, memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); if (vkAllocateMemory(device, &allocInfo, nullptr, &stagingMemory) != VK_SUCCESS) { vkDestroyBuffer(device, stagingBuffer, nullptr); return VK_NULL_HANDLE; } vkBindBufferMemory(device, stagingBuffer, stagingMemory, 0); void* mapped; vkMapMemory(device, stagingMemory, 0, imageSize, 0, &mapped); memcpy(mapped, rgba, imageSize); vkUnmapMemory(device, stagingMemory); } // Create image VkImage image; VkDeviceMemory imageMemory; { VkImageCreateInfo imgInfo{}; imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; imgInfo.imageType = VK_IMAGE_TYPE_2D; imgInfo.format = VK_FORMAT_R8G8B8A8_UNORM; imgInfo.extent = {static_cast(width), static_cast(height), 1}; imgInfo.mipLevels = 1; imgInfo.arrayLayers = 1; imgInfo.samples = VK_SAMPLE_COUNT_1_BIT; imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL; imgInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; imgInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; imgInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; if (vkCreateImage(device, &imgInfo, nullptr, &image) != VK_SUCCESS) { vkDestroyBuffer(device, stagingBuffer, nullptr); vkFreeMemory(device, stagingMemory, nullptr); return VK_NULL_HANDLE; } VkMemoryRequirements memReqs; vkGetImageMemoryRequirements(device, image, &memReqs); VkMemoryAllocateInfo allocInfo{}; allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; allocInfo.allocationSize = memReqs.size; allocInfo.memoryTypeIndex = findMemType(physicalDevice, memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); if (vkAllocateMemory(device, &allocInfo, nullptr, &imageMemory) != VK_SUCCESS) { vkDestroyImage(device, image, nullptr); vkDestroyBuffer(device, stagingBuffer, nullptr); vkFreeMemory(device, stagingMemory, nullptr); return VK_NULL_HANDLE; } vkBindImageMemory(device, image, imageMemory, 0); } // Upload via immediate submit immediateSubmit([&](VkCommandBuffer cmd) { VkImageMemoryBarrier barrier{}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.image = image; barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; barrier.srcAccessMask = 0; barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); VkBufferImageCopy region{}; region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; region.imageExtent = {static_cast(width), static_cast(height), 1}; vkCmdCopyBufferToImage(cmd, stagingBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); }); // Cleanup staging vkDestroyBuffer(device, stagingBuffer, nullptr); vkFreeMemory(device, stagingMemory, nullptr); // Create image view VkImageView imageView; { VkImageViewCreateInfo viewInfo{}; viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; viewInfo.image = image; viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; viewInfo.format = VK_FORMAT_R8G8B8A8_UNORM; viewInfo.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; if (vkCreateImageView(device, &viewInfo, nullptr, &imageView) != VK_SUCCESS) { vkDestroyImage(device, image, nullptr); vkFreeMemory(device, imageMemory, nullptr); return VK_NULL_HANDLE; } } // Register with ImGui (allocates from imguiDescriptorPool) VkDescriptorSet ds = ImGui_ImplVulkan_AddTexture(uiTextureSampler_, imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); if (!ds) { LOG_ERROR("ImGui descriptor pool exhausted — cannot upload UI texture"); vkDestroyImageView(device, imageView, nullptr); vkDestroyImage(device, image, nullptr); vkFreeMemory(device, imageMemory, nullptr); return VK_NULL_HANDLE; } // Track for cleanup uiTextures_.push_back({image, imageMemory, imageView}); return ds; } bool VkContext::recreateSwapchain(int width, int height) { vkDeviceWaitIdle(device); // Destroy old framebuffers for (auto fb : swapchainFramebuffers) { if (fb) vkDestroyFramebuffer(device, fb, nullptr); } swapchainFramebuffers.clear(); // Destroy old image views for (auto iv : swapchainImageViews) { if (iv) vkDestroyImageView(device, iv, nullptr); } swapchainImageViews.clear(); VkSwapchainKHR oldSwapchain = swapchain; vkb::SwapchainBuilder swapchainBuilder{physicalDevice, device, surface}; auto& builder = swapchainBuilder .set_desired_format({VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}) .set_desired_extent(static_cast(width), static_cast(height)) .set_image_usage_flags(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) .set_desired_min_image_count(2) .set_old_swapchain(oldSwapchain); if (vsync_) { builder.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR); } else { builder.set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR); builder.add_fallback_present_mode(VK_PRESENT_MODE_MAILBOX_KHR); builder.add_fallback_present_mode(VK_PRESENT_MODE_FIFO_RELAXED_KHR); } auto swapRet = builder.build(); if (!swapRet) { // Destroy old swapchain now that we failed (it can't be used either) if (oldSwapchain) { vkDestroySwapchainKHR(device, oldSwapchain, nullptr); swapchain = VK_NULL_HANDLE; } LOG_ERROR("Failed to recreate swapchain: ", swapRet.error().message()); // Keep swapchainDirty=true so the next frame retries swapchainDirty = true; return false; } // Success — safe to retire the old swapchain if (oldSwapchain) { vkDestroySwapchainKHR(device, oldSwapchain, nullptr); } auto vkbSwap = swapRet.value(); swapchain = vkbSwap.swapchain; swapchainFormat = vkbSwap.image_format; swapchainExtent = vkbSwap.extent; swapchainImages = vkbSwap.get_images().value(); swapchainImageViews = vkbSwap.get_image_views().value(); // Resize per-image semaphore arrays if the swapchain image count changed { const uint32_t newCount = static_cast(swapchainImages.size()); const uint32_t oldCount = static_cast(imageAcquiredSemaphores_.size()); VkSemaphoreCreateInfo semInfo{}; semInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; // Destroy excess semaphores if shrinking for (uint32_t i = newCount; i < oldCount; i++) { if (imageAcquiredSemaphores_[i]) vkDestroySemaphore(device, imageAcquiredSemaphores_[i], nullptr); if (renderFinishedSemaphores_[i]) vkDestroySemaphore(device, renderFinishedSemaphores_[i], nullptr); } imageAcquiredSemaphores_.resize(newCount); renderFinishedSemaphores_.resize(newCount); // Create new semaphores if growing for (uint32_t i = oldCount; i < newCount; i++) { vkCreateSemaphore(device, &semInfo, nullptr, &imageAcquiredSemaphores_[i]); vkCreateSemaphore(device, &semInfo, nullptr, &renderFinishedSemaphores_[i]); } } // Recreate depth buffer + MSAA color image + depth resolve image destroyMsaaColorImage(); destroyDepthResolveImage(); destroyDepthBuffer(); // Destroy old render pass (needs recreation if MSAA changed) if (imguiRenderPass) { vkDestroyRenderPass(device, imguiRenderPass, nullptr); imguiRenderPass = VK_NULL_HANDLE; } if (!createDepthBuffer()) return false; if (!createMsaaColorImage()) return false; if (!createDepthResolveImage()) return false; bool useMsaa = (msaaSamples_ > VK_SAMPLE_COUNT_1_BIT); if (useMsaa) { const bool useDepthResolve = (depthResolveImageView != VK_NULL_HANDLE); // MSAA render pass: 3 or 4 attachments VkAttachmentDescription attachments[4] = {}; attachments[0].format = swapchainFormat; attachments[0].samples = msaaSamples_; attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; attachments[1].format = depthFormat; attachments[1].samples = msaaSamples_; attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; attachments[2].format = swapchainFormat; attachments[2].samples = VK_SAMPLE_COUNT_1_BIT; attachments[2].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[2].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[2].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[2].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[2].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[2].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; if (useDepthResolve) { attachments[3].format = depthFormat; attachments[3].samples = VK_SAMPLE_COUNT_1_BIT; attachments[3].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[3].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[3].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[3].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[3].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[3].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; } if (useDepthResolve) { VkAttachmentDescription2 attachments2[4]{}; for (int i = 0; i < 4; ++i) { attachments2[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; attachments2[i].format = attachments[i].format; attachments2[i].samples = attachments[i].samples; attachments2[i].loadOp = attachments[i].loadOp; attachments2[i].storeOp = attachments[i].storeOp; attachments2[i].stencilLoadOp = attachments[i].stencilLoadOp; attachments2[i].stencilStoreOp = attachments[i].stencilStoreOp; attachments2[i].initialLayout = attachments[i].initialLayout; attachments2[i].finalLayout = attachments[i].finalLayout; } VkAttachmentReference2 colorRef2{}; colorRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; colorRef2.attachment = 0; colorRef2.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentReference2 depthRef2{}; depthRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; depthRef2.attachment = 1; depthRef2.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkAttachmentReference2 resolveRef2{}; resolveRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; resolveRef2.attachment = 2; resolveRef2.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentReference2 depthResolveRef2{}; depthResolveRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; depthResolveRef2.attachment = 3; depthResolveRef2.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkSubpassDescriptionDepthStencilResolve dsResolve{}; dsResolve.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE; dsResolve.depthResolveMode = depthResolveMode_; dsResolve.stencilResolveMode = VK_RESOLVE_MODE_NONE; dsResolve.pDepthStencilResolveAttachment = &depthResolveRef2; VkSubpassDescription2 subpass2{}; subpass2.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2; subpass2.pNext = &dsResolve; subpass2.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass2.colorAttachmentCount = 1; subpass2.pColorAttachments = &colorRef2; subpass2.pDepthStencilAttachment = &depthRef2; subpass2.pResolveAttachments = &resolveRef2; VkSubpassDependency2 dep2{}; dep2.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2; dep2.srcSubpass = VK_SUBPASS_EXTERNAL; dep2.dstSubpass = 0; dep2.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dep2.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dep2.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; VkRenderPassCreateInfo2 rpInfo2{}; rpInfo2.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2; rpInfo2.attachmentCount = 4; rpInfo2.pAttachments = attachments2; rpInfo2.subpassCount = 1; rpInfo2.pSubpasses = &subpass2; rpInfo2.dependencyCount = 1; rpInfo2.pDependencies = &dep2; if (vkCreateRenderPass2(device, &rpInfo2, nullptr, &imguiRenderPass) != VK_SUCCESS) { LOG_ERROR("Failed to recreate MSAA render pass (depth resolve)"); return false; } } else { VkAttachmentReference colorRef{0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; VkAttachmentReference depthRef{1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; VkAttachmentReference resolveRef{2, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; VkSubpassDescription subpass{}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &colorRef; subpass.pDepthStencilAttachment = &depthRef; subpass.pResolveAttachments = &resolveRef; VkSubpassDependency dependency{}; dependency.srcSubpass = VK_SUBPASS_EXTERNAL; dependency.dstSubpass = 0; dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dependency.srcAccessMask = 0; dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; VkRenderPassCreateInfo rpInfo{}; rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; rpInfo.attachmentCount = 3; rpInfo.pAttachments = attachments; rpInfo.subpassCount = 1; rpInfo.pSubpasses = &subpass; rpInfo.dependencyCount = 1; rpInfo.pDependencies = &dependency; if (vkCreateRenderPass(device, &rpInfo, nullptr, &imguiRenderPass) != VK_SUCCESS) { LOG_ERROR("Failed to recreate MSAA render pass"); return false; } } swapchainFramebuffers.resize(swapchainImageViews.size()); for (size_t i = 0; i < swapchainImageViews.size(); i++) { VkImageView fbAttachments[4] = {msaaColorView_, depthImageView, swapchainImageViews[i], depthResolveImageView}; VkFramebufferCreateInfo fbInfo{}; fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fbInfo.renderPass = imguiRenderPass; fbInfo.attachmentCount = useDepthResolve ? 4 : 3; fbInfo.pAttachments = fbAttachments; fbInfo.width = swapchainExtent.width; fbInfo.height = swapchainExtent.height; fbInfo.layers = 1; if (vkCreateFramebuffer(device, &fbInfo, nullptr, &swapchainFramebuffers[i]) != VK_SUCCESS) { LOG_ERROR("Failed to recreate MSAA swapchain framebuffer ", i); return false; } } } else { // Non-MSAA render pass: 2 attachments VkAttachmentDescription attachments[2] = {}; attachments[0].format = swapchainFormat; attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[0].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; attachments[1].format = depthFormat; attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkAttachmentReference colorRef{0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; VkAttachmentReference depthRef{1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; VkSubpassDescription subpass{}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &colorRef; subpass.pDepthStencilAttachment = &depthRef; VkSubpassDependency dependency{}; dependency.srcSubpass = VK_SUBPASS_EXTERNAL; dependency.dstSubpass = 0; dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; dependency.srcAccessMask = 0; dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; VkRenderPassCreateInfo rpInfo{}; rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; rpInfo.attachmentCount = 2; rpInfo.pAttachments = attachments; rpInfo.subpassCount = 1; rpInfo.pSubpasses = &subpass; rpInfo.dependencyCount = 1; rpInfo.pDependencies = &dependency; if (vkCreateRenderPass(device, &rpInfo, nullptr, &imguiRenderPass) != VK_SUCCESS) { LOG_ERROR("Failed to recreate render pass"); return false; } swapchainFramebuffers.resize(swapchainImageViews.size()); for (size_t i = 0; i < swapchainImageViews.size(); i++) { VkImageView fbAttachments[2] = {swapchainImageViews[i], depthImageView}; VkFramebufferCreateInfo fbInfo{}; fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fbInfo.renderPass = imguiRenderPass; fbInfo.attachmentCount = 2; fbInfo.pAttachments = fbAttachments; fbInfo.width = swapchainExtent.width; fbInfo.height = swapchainExtent.height; fbInfo.layers = 1; if (vkCreateFramebuffer(device, &fbInfo, nullptr, &swapchainFramebuffers[i]) != VK_SUCCESS) { LOG_ERROR("Failed to recreate swapchain framebuffer ", i); return false; } } } swapchainDirty = false; LOG_INFO("Swapchain recreated: ", swapchainExtent.width, "x", swapchainExtent.height); return true; } VkCommandBuffer VkContext::beginFrame(uint32_t& imageIndex) { if (deviceLost_) return VK_NULL_HANDLE; if (swapchain == VK_NULL_HANDLE) return VK_NULL_HANDLE; // Swapchain lost; recreate pending auto& frame = frames[currentFrame]; // Wait for this frame's fence (with timeout to detect GPU hangs) static int beginFrameCounter = 0; beginFrameCounter++; VkResult fenceResult = vkWaitForFences(device, 1, &frame.inFlightFence, VK_TRUE, 5000000000ULL); // 5 second timeout if (fenceResult == VK_TIMEOUT) { LOG_ERROR("beginFrame[", beginFrameCounter, "] FENCE TIMEOUT (5s) on frame slot ", currentFrame, " — GPU hang detected!"); return VK_NULL_HANDLE; } if (fenceResult != VK_SUCCESS) { LOG_ERROR("beginFrame[", beginFrameCounter, "] fence wait failed: ", static_cast(fenceResult)); if (fenceResult == VK_ERROR_DEVICE_LOST) { deviceLost_ = true; } return VK_NULL_HANDLE; } // Any work queued for this frame slot is now guaranteed to be unused by the GPU. runDeferredCleanup(currentFrame); // Acquire next swapchain image using the free semaphore. // After acquiring we swap it into the per-image slot so the old per-image // semaphore (now released by the presentation engine) becomes the free one. VkResult result = vkAcquireNextImageKHR(device, swapchain, UINT64_MAX, nextAcquireSemaphore_, VK_NULL_HANDLE, &imageIndex); if (result == VK_ERROR_OUT_OF_DATE_KHR) { swapchainDirty = true; return VK_NULL_HANDLE; } if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) { LOG_ERROR("Failed to acquire swapchain image"); return VK_NULL_HANDLE; } // Swap semaphores: the image's old acquire semaphore is now free (the presentation // engine released it when this image was re-acquired). The semaphore we just used // becomes the per-image one for submit/present. currentAcquireSemaphore_ = nextAcquireSemaphore_; nextAcquireSemaphore_ = imageAcquiredSemaphores_[imageIndex]; imageAcquiredSemaphores_[imageIndex] = currentAcquireSemaphore_; vkResetFences(device, 1, &frame.inFlightFence); vkResetCommandBuffer(frame.commandBuffer, 0); VkCommandBufferBeginInfo beginInfo{}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; vkBeginCommandBuffer(frame.commandBuffer, &beginInfo); return frame.commandBuffer; } void VkContext::endFrame(VkCommandBuffer cmd, uint32_t imageIndex) { static int endFrameCounter = 0; endFrameCounter++; VkResult endResult = vkEndCommandBuffer(cmd); if (endResult != VK_SUCCESS) { LOG_ERROR("endFrame[", endFrameCounter, "] vkEndCommandBuffer FAILED: ", static_cast(endResult)); } auto& frame = frames[currentFrame]; VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; // Use per-image semaphores: acquire semaphore was swapped into the per-image // slot in beginFrame; renderFinished is also indexed by the acquired image. VkSemaphore& acquireSem = imageAcquiredSemaphores_[imageIndex]; VkSemaphore& renderSem = renderFinishedSemaphores_[imageIndex]; VkSubmitInfo submitInfo{}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.waitSemaphoreCount = 1; submitInfo.pWaitSemaphores = &acquireSem; submitInfo.pWaitDstStageMask = &waitStage; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &cmd; submitInfo.signalSemaphoreCount = 1; submitInfo.pSignalSemaphores = &renderSem; VkResult submitResult = vkQueueSubmit(graphicsQueue, 1, &submitInfo, frame.inFlightFence); if (submitResult != VK_SUCCESS) { LOG_ERROR("endFrame[", endFrameCounter, "] vkQueueSubmit FAILED: ", static_cast(submitResult)); if (submitResult == VK_ERROR_DEVICE_LOST) { deviceLost_ = true; } } VkPresentInfoKHR presentInfo{}; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; presentInfo.waitSemaphoreCount = 1; presentInfo.pWaitSemaphores = &renderSem; presentInfo.swapchainCount = 1; presentInfo.pSwapchains = &swapchain; presentInfo.pImageIndices = &imageIndex; VkResult result = vkQueuePresentKHR(presentQueue, &presentInfo); if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) { swapchainDirty = true; } currentFrame = (currentFrame + 1) % MAX_FRAMES_IN_FLIGHT; } VkCommandBuffer VkContext::beginSingleTimeCommands() { VkCommandBufferAllocateInfo allocInfo{}; allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocInfo.commandPool = immCommandPool; allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocInfo.commandBufferCount = 1; VkCommandBuffer cmd; vkAllocateCommandBuffers(device, &allocInfo, &cmd); VkCommandBufferBeginInfo beginInfo{}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; vkBeginCommandBuffer(cmd, &beginInfo); return cmd; } void VkContext::endSingleTimeCommands(VkCommandBuffer cmd) { vkEndCommandBuffer(cmd); VkSubmitInfo submitInfo{}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &cmd; vkQueueSubmit(graphicsQueue, 1, &submitInfo, immFence); vkWaitForFences(device, 1, &immFence, VK_TRUE, UINT64_MAX); vkResetFences(device, 1, &immFence); vkFreeCommandBuffers(device, immCommandPool, 1, &cmd); } void VkContext::immediateSubmit(std::function&& function) { if (inUploadBatch_) { // Record into the batch command buffer — no submit, no fence wait function(batchCmd_); return; } VkCommandBuffer cmd = beginSingleTimeCommands(); function(cmd); endSingleTimeCommands(cmd); } void VkContext::beginUploadBatch() { uploadBatchDepth_++; if (inUploadBatch_) return; // already in a batch (nested call) inUploadBatch_ = true; // Allocate from transfer pool if available, otherwise from immCommandPool. VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; VkCommandBufferAllocateInfo allocInfo{}; allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocInfo.commandPool = pool; allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocInfo.commandBufferCount = 1; vkAllocateCommandBuffers(device, &allocInfo, &batchCmd_); VkCommandBufferBeginInfo beginInfo{}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; vkBeginCommandBuffer(batchCmd_, &beginInfo); } void VkContext::endUploadBatch() { if (uploadBatchDepth_ <= 0) return; uploadBatchDepth_--; if (uploadBatchDepth_ > 0) return; // still inside an outer batch inUploadBatch_ = false; VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; if (batchStagingBuffers_.empty()) { // No GPU copies were recorded — skip the submit entirely. vkEndCommandBuffer(batchCmd_); vkFreeCommandBuffers(device, pool, 1, &batchCmd_); batchCmd_ = VK_NULL_HANDLE; return; } // Submit commands with a NEW fence — don't wait, let GPU work in parallel. vkEndCommandBuffer(batchCmd_); VkFenceCreateInfo fenceInfo{}; fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; VkFence fence = VK_NULL_HANDLE; vkCreateFence(device, &fenceInfo, nullptr, &fence); VkSubmitInfo submitInfo{}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &batchCmd_; // Submit to the dedicated transfer queue if available, otherwise graphics. VkQueue targetQueue = hasDedicatedTransfer_ ? transferQueue_ : graphicsQueue; vkQueueSubmit(targetQueue, 1, &submitInfo, fence); // Stash everything for later cleanup when fence signals InFlightBatch batch; batch.fence = fence; batch.cmd = batchCmd_; batch.stagingBuffers = std::move(batchStagingBuffers_); inFlightBatches_.push_back(std::move(batch)); batchCmd_ = VK_NULL_HANDLE; batchStagingBuffers_.clear(); } void VkContext::endUploadBatchSync() { if (uploadBatchDepth_ <= 0) return; uploadBatchDepth_--; if (uploadBatchDepth_ > 0) return; inUploadBatch_ = false; VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; if (batchStagingBuffers_.empty()) { vkEndCommandBuffer(batchCmd_); vkFreeCommandBuffers(device, pool, 1, &batchCmd_); batchCmd_ = VK_NULL_HANDLE; return; } // Synchronous path for load screens — submit and wait on the target queue. VkQueue targetQueue = hasDedicatedTransfer_ ? transferQueue_ : graphicsQueue; vkEndCommandBuffer(batchCmd_); VkSubmitInfo submitInfo{}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &batchCmd_; vkQueueSubmit(targetQueue, 1, &submitInfo, immFence); vkWaitForFences(device, 1, &immFence, VK_TRUE, UINT64_MAX); vkResetFences(device, 1, &immFence); vkFreeCommandBuffers(device, pool, 1, &batchCmd_); batchCmd_ = VK_NULL_HANDLE; for (auto& staging : batchStagingBuffers_) { destroyBuffer(allocator, staging); } batchStagingBuffers_.clear(); } void VkContext::pollUploadBatches() { if (inFlightBatches_.empty()) return; VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; for (auto it = inFlightBatches_.begin(); it != inFlightBatches_.end(); ) { VkResult result = vkGetFenceStatus(device, it->fence); if (result == VK_SUCCESS) { // GPU finished — free resources for (auto& staging : it->stagingBuffers) { destroyBuffer(allocator, staging); } vkFreeCommandBuffers(device, pool, 1, &it->cmd); vkDestroyFence(device, it->fence, nullptr); it = inFlightBatches_.erase(it); } else { ++it; } } } void VkContext::waitAllUploads() { VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool; for (auto& batch : inFlightBatches_) { vkWaitForFences(device, 1, &batch.fence, VK_TRUE, UINT64_MAX); for (auto& staging : batch.stagingBuffers) { destroyBuffer(allocator, staging); } vkFreeCommandBuffers(device, pool, 1, &batch.cmd); vkDestroyFence(device, batch.fence, nullptr); } inFlightBatches_.clear(); } void VkContext::deferStagingCleanup(AllocatedBuffer staging) { batchStagingBuffers_.push_back(staging); } } // namespace rendering } // namespace wowee