Kelsidavis-WoWee/src/rendering/vk_context.cpp
Pavel Okhlopkov 4b9b3026f4 feat(rendering): add HiZ occlusion culling & fix WMO interior shadows
Implement GPU-driven Hierarchical-Z occlusion culling for M2 doodads
using a depth pyramid built from the previous frame's depth buffer.
The cull shader projects bounding spheres via prevViewProj (temporal
reprojection) and samples the HiZ pyramid to reject hidden objects
before the main render pass.

Key implementation details:
- Separate early compute submission (beginSingleTimeCommands + fence
  wait) eliminates 2-frame visibility staleness
- Conservative safeguards prevent false culls: screen-edge guard,
  full VP row-vector AABB projection (Cauchy-Schwarz), 50% sphere
  inflation, depth bias, mip+1, min screen size threshold, camera
  motion dampening (auto-disable on fast rotations), and per-instance
  previouslyVisible flag tracking
- Graceful fallback to frustum-only culling if HiZ init fails

Fix dark WMO interiors by gating shadow map sampling on isInterior==0
in the WMO fragment shader. Interior groups (flag 0x2000) now rely
solely on pre-baked MOCV vertex-color lighting + MOHD ambient color.
Disable interiorDarken globally (was incorrectly darkening outdoor M2s
when camera was inside a WMO). Use isInsideInteriorWMO() instead of
isInsideWMO() for correct indoor detection.

New files:
- hiz_system.hpp/cpp: pyramid image management, compute pipeline,
  descriptors, mip-chain build dispatch, resize handling
- hiz_build.comp.glsl: MAX-depth 2x2 reduction compute shader
- m2_cull_hiz.comp.glsl: frustum + HiZ occlusion cull compute shader
- test_indoor_shadows.cpp: 14 unit tests for shadow/interior contracts

Modified:
- CullUniformsGPU expanded 128->272 bytes (HiZ params, viewProj,
  prevViewProj)
- Depth buffer images gain VK_IMAGE_USAGE_SAMPLED_BIT for HiZ reads
- wmo.frag.glsl: interior branch before unlit, shadow skip for 0x2000
- Render graph: hiz_build + compute_cull disabled (run in early compute)
- .gitignore: ignore compiled .spv binaries
- MEGA_BONE_MAX_INSTANCES: 2048 -> 4096

Signed-off-by: Pavel Okhlopkov <pavel.okhlopkov@flant.com>
2026-04-06 16:40:59 +03:00

2105 lines
87 KiB
C++

#define VMA_IMPLEMENTATION
#include "rendering/vk_context.hpp"
#include "core/logger.hpp"
#include <VkBootstrap.h>
#include <SDL2/SDL_vulkan.h>
#include <imgui_impl_vulkan.h>
#include <algorithm>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <string>
namespace wowee {
namespace rendering {
VkContext* VkContext::sInstance_ = nullptr;
// Hash a VkSamplerCreateInfo into a 64-bit key for the sampler cache.
// FNV-1a chosen for speed and low collision rate on small structured data.
// Constants from: http://www.isthe.com/chongo/tech/comp/fnv/
static constexpr uint64_t kFnv1aOffsetBasis = 14695981039346656037ULL;
static constexpr uint64_t kFnv1aPrime = 1099511628211ULL;
static uint64_t hashSamplerCreateInfo(const VkSamplerCreateInfo& s) {
uint64_t h = kFnv1aOffsetBasis;
auto mix = [&](uint64_t v) {
h ^= v;
h *= kFnv1aPrime;
};
mix(static_cast<uint64_t>(s.minFilter));
mix(static_cast<uint64_t>(s.magFilter));
mix(static_cast<uint64_t>(s.mipmapMode));
mix(static_cast<uint64_t>(s.addressModeU));
mix(static_cast<uint64_t>(s.addressModeV));
mix(static_cast<uint64_t>(s.addressModeW));
mix(static_cast<uint64_t>(s.anisotropyEnable));
// Bit-cast floats to uint32_t for hashing
uint32_t aniso;
std::memcpy(&aniso, &s.maxAnisotropy, sizeof(aniso));
mix(static_cast<uint64_t>(aniso));
uint32_t maxLodBits;
std::memcpy(&maxLodBits, &s.maxLod, sizeof(maxLodBits));
mix(static_cast<uint64_t>(maxLodBits));
uint32_t minLodBits;
std::memcpy(&minLodBits, &s.minLod, sizeof(minLodBits));
mix(static_cast<uint64_t>(minLodBits));
mix(static_cast<uint64_t>(s.compareEnable));
mix(static_cast<uint64_t>(s.compareOp));
mix(static_cast<uint64_t>(s.borderColor));
uint32_t biasBits;
std::memcpy(&biasBits, &s.mipLodBias, sizeof(biasBits));
mix(static_cast<uint64_t>(biasBits));
mix(static_cast<uint64_t>(s.unnormalizedCoordinates));
return h;
}
static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
VkDebugUtilsMessageSeverityFlagBitsEXT severity,
[[maybe_unused]] VkDebugUtilsMessageTypeFlagsEXT type,
const VkDebugUtilsMessengerCallbackDataEXT* callbackData,
[[maybe_unused]] void* userData)
{
if (severity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
LOG_ERROR("Vulkan: ", callbackData->pMessage);
} else if (severity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
LOG_WARNING("Vulkan: ", callbackData->pMessage);
}
return VK_FALSE;
}
VkContext::~VkContext() {
shutdown();
}
bool VkContext::initialize(SDL_Window* window) {
LOG_INFO("Initializing Vulkan context");
if (!createInstance(window)) return false;
if (!createSurface(window)) return false;
if (!selectPhysicalDevice()) return false;
if (!createLogicalDevice()) return false;
if (!createAllocator()) return false;
// Pipeline cache: try to load from disk, fall back to empty cache.
// Not fatal — if it fails we just skip caching.
createPipelineCache();
int w, h;
SDL_Vulkan_GetDrawableSize(window, &w, &h);
if (!createSwapchain(w, h)) return false;
if (!createCommandPools()) return false;
if (!createSyncObjects()) return false;
if (!createImGuiResources()) return false;
// Query anisotropy support from the physical device.
VkPhysicalDeviceFeatures supportedFeatures{};
vkGetPhysicalDeviceFeatures(physicalDevice, &supportedFeatures);
samplerAnisotropySupported_ = (supportedFeatures.samplerAnisotropy == VK_TRUE);
LOG_INFO("Sampler anisotropy supported: ", samplerAnisotropySupported_ ? "YES" : "NO");
sInstance_ = this;
LOG_INFO("Vulkan context initialized successfully");
return true;
}
void VkContext::shutdown() {
if (!device && !instance) return; // Already shut down or never initialized
LOG_DEBUG("VkContext::shutdown - vkDeviceWaitIdle...");
if (device) {
vkDeviceWaitIdle(device);
}
// Clear deferred cleanup queues WITHOUT executing them. By this point the
// sub-renderers (which own the descriptor pools/buffers these lambdas
// reference) have already been destroyed, so running them would call
// vkFreeDescriptorSets on invalid pools. vkDestroyDevice reclaims all
// device-child resources anyway.
for (uint32_t fi = 0; fi < MAX_FRAMES_IN_FLIGHT; fi++) {
deferredCleanup_[fi].clear();
}
LOG_DEBUG("VkContext::shutdown - destroyImGuiResources...");
destroyImGuiResources();
// Destroy sync objects
for (auto& frame : frames) {
if (frame.inFlightFence) vkDestroyFence(device, frame.inFlightFence, nullptr);
if (frame.commandPool) vkDestroyCommandPool(device, frame.commandPool, nullptr);
frame = {};
}
for (auto sem : imageAcquiredSemaphores_) { if (sem) vkDestroySemaphore(device, sem, nullptr); }
imageAcquiredSemaphores_.clear();
for (auto sem : renderFinishedSemaphores_) { if (sem) vkDestroySemaphore(device, sem, nullptr); }
renderFinishedSemaphores_.clear();
if (nextAcquireSemaphore_) { vkDestroySemaphore(device, nextAcquireSemaphore_, nullptr); nextAcquireSemaphore_ = VK_NULL_HANDLE; }
// Clean up any in-flight async upload batches (device already idle)
for (auto& batch : inFlightBatches_) {
// Staging buffers: skip destroy — allocator is about to be torn down
vkDestroyFence(device, batch.fence, nullptr);
// Command buffer freed when pool is destroyed below
}
inFlightBatches_.clear();
if (immFence) { vkDestroyFence(device, immFence, nullptr); immFence = VK_NULL_HANDLE; }
if (immCommandPool) { vkDestroyCommandPool(device, immCommandPool, nullptr); immCommandPool = VK_NULL_HANDLE; }
if (transferCommandPool_) { vkDestroyCommandPool(device, transferCommandPool_, nullptr); transferCommandPool_ = VK_NULL_HANDLE; }
// Persist pipeline cache to disk before tearing down the device.
savePipelineCache();
if (pipelineCache_) {
vkDestroyPipelineCache(device, pipelineCache_, nullptr);
pipelineCache_ = VK_NULL_HANDLE;
}
// Destroy all cached samplers.
for (auto& [key, sampler] : samplerCache_) {
if (sampler) vkDestroySampler(device, sampler, nullptr);
}
samplerCache_.clear();
LOG_INFO("Sampler cache cleared");
sInstance_ = nullptr;
LOG_DEBUG("VkContext::shutdown - destroySwapchain...");
destroySwapchain();
// Skip vmaDestroyAllocator — it walks every allocation to free it, which
// takes many seconds with thousands of loaded textures/models. The driver
// reclaims all device memory when we destroy the device, and the OS reclaims
// everything on process exit. Skipping this makes shutdown instant.
allocator = VK_NULL_HANDLE;
LOG_DEBUG("VkContext::shutdown - vkDestroyDevice...");
if (device) { vkDestroyDevice(device, nullptr); device = VK_NULL_HANDLE; }
if (surface) { vkDestroySurfaceKHR(instance, surface, nullptr); surface = VK_NULL_HANDLE; }
if (debugMessenger) {
auto func = reinterpret_cast<PFN_vkDestroyDebugUtilsMessengerEXT>(
vkGetInstanceProcAddr(instance, "vkDestroyDebugUtilsMessengerEXT"));
if (func) func(instance, debugMessenger, nullptr);
debugMessenger = VK_NULL_HANDLE;
}
if (instance) { vkDestroyInstance(instance, nullptr); instance = VK_NULL_HANDLE; }
LOG_DEBUG("Vulkan context shutdown complete");
}
void VkContext::deferAfterFrameFence(std::function<void()>&& fn) {
deferredCleanup_[currentFrame].push_back(std::move(fn));
}
void VkContext::deferAfterAllFrameFences(std::function<void()>&& fn) {
// Shared resources (material descriptor sets, vertex/index buffers) are
// bound by every in-flight frame's command buffer. deferAfterFrameFence
// only waits for ONE slot's fence — the other slot may still be executing.
// Add to every slot; a shared counter ensures the lambda runs exactly once,
// after the LAST slot has been fenced.
auto counter = std::make_shared<uint32_t>(MAX_FRAMES_IN_FLIGHT);
auto sharedFn = std::make_shared<std::function<void()>>(std::move(fn));
for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
deferredCleanup_[i].push_back([counter, sharedFn]() {
if (--(*counter) == 0) {
(*sharedFn)();
}
});
}
}
void VkContext::runDeferredCleanup(uint32_t frameIndex) {
auto& q = deferredCleanup_[frameIndex];
if (q.empty()) return;
for (auto& fn : q) {
if (fn) fn();
}
q.clear();
}
VkSampler VkContext::getOrCreateSampler(const VkSamplerCreateInfo& info) {
// Clamp anisotropy if the device doesn't support the feature.
VkSamplerCreateInfo adjusted = info;
if (!samplerAnisotropySupported_) {
adjusted.anisotropyEnable = VK_FALSE;
adjusted.maxAnisotropy = 1.0f;
}
uint64_t key = hashSamplerCreateInfo(adjusted);
{
std::lock_guard<std::mutex> lock(samplerCacheMutex_);
auto it = samplerCache_.find(key);
if (it != samplerCache_.end()) {
return it->second;
}
}
// Create a new sampler outside the lock (vkCreateSampler is thread-safe
// for distinct create infos, but we re-lock to insert).
VkSampler sampler = VK_NULL_HANDLE;
if (vkCreateSampler(device, &adjusted, nullptr, &sampler) != VK_SUCCESS) {
LOG_ERROR("getOrCreateSampler: vkCreateSampler failed");
return VK_NULL_HANDLE;
}
{
std::lock_guard<std::mutex> lock(samplerCacheMutex_);
// Double-check: another thread may have inserted while we were creating.
auto [it, inserted] = samplerCache_.emplace(key, sampler);
if (!inserted) {
// Another thread won the race — destroy our duplicate and use theirs.
vkDestroySampler(device, sampler, nullptr);
return it->second;
}
}
return sampler;
}
bool VkContext::createInstance(SDL_Window* window) {
// Get required SDL extensions
unsigned int sdlExtCount = 0;
SDL_Vulkan_GetInstanceExtensions(window, &sdlExtCount, nullptr);
std::vector<const char*> sdlExts(sdlExtCount);
SDL_Vulkan_GetInstanceExtensions(window, &sdlExtCount, sdlExts.data());
vkb::InstanceBuilder builder;
builder.set_app_name("Wowee")
.set_app_version(VK_MAKE_VERSION(1, 0, 0))
.require_api_version(1, 2, 0)
.set_minimum_instance_version(1, 1, 0);
for (auto ext : sdlExts) {
builder.enable_extension(ext);
}
if (enableValidation) {
builder.request_validation_layers(true)
.set_debug_callback(debugCallback);
}
auto instRet = builder.build();
if (!instRet) {
LOG_ERROR("Failed to create Vulkan instance: ", instRet.error().message());
return false;
}
vkbInstance_ = instRet.value();
instance = vkbInstance_.instance;
debugMessenger = vkbInstance_.debug_messenger;
// Query the actual instance API version for gating core 1.2+ calls
uint32_t instVer = VK_API_VERSION_1_1;
if (vkEnumerateInstanceVersion(&instVer) != VK_SUCCESS)
instVer = VK_API_VERSION_1_1;
instanceApiVersion_ = instVer;
LOG_INFO("Vulkan instance created (instance API version: ",
VK_VERSION_MAJOR(instVer), ".", VK_VERSION_MINOR(instVer), ".",
VK_VERSION_PATCH(instVer), ")");
return true;
}
bool VkContext::createSurface(SDL_Window* window) {
if (!SDL_Vulkan_CreateSurface(window, instance, &surface)) {
LOG_ERROR("Failed to create Vulkan surface: ", SDL_GetError());
return false;
}
return true;
}
bool VkContext::selectPhysicalDevice() {
vkb::PhysicalDeviceSelector selector{vkbInstance_};
VkPhysicalDeviceFeatures requiredFeatures{};
requiredFeatures.samplerAnisotropy = VK_TRUE;
requiredFeatures.fillModeNonSolid = VK_TRUE; // wireframe debug pipelines
requiredFeatures.shaderStorageImageWriteWithoutFormat = VK_TRUE; // FSR2 compute shaders
requiredFeatures.shaderInt16 = VK_TRUE; // FSR2 compute shaders
selector.set_surface(surface)
.set_minimum_version(1, 1)
.set_required_features(requiredFeatures)
.prefer_gpu_device_type(vkb::PreferredDeviceType::discrete);
auto physRet = selector.select();
if (!physRet) {
LOG_ERROR("Failed to select Vulkan physical device: ", physRet.error().message());
return false;
}
vkbPhysicalDevice_ = physRet.value();
physicalDevice = vkbPhysicalDevice_.physical_device;
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(physicalDevice, &props);
(void)props.apiVersion; // Available if needed for version checks
gpuVendorId_ = props.vendorID;
std::strncpy(gpuName_, props.deviceName, sizeof(gpuName_) - 1);
gpuName_[sizeof(gpuName_) - 1] = '\0';
LOG_INFO("GPU: ", gpuName_, " (vendor 0x", std::hex, gpuVendorId_, std::dec, ")");
VkPhysicalDeviceDepthStencilResolveProperties dsResolveProps{};
dsResolveProps.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES;
VkPhysicalDeviceProperties2 props2{};
props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
props2.pNext = &dsResolveProps;
vkGetPhysicalDeviceProperties2(physicalDevice, &props2);
// Gate on instance API version — vkCreateRenderPass2 is core 1.2 and only
// available when the instance was created with apiVersion >= 1.2.
// The device may report 1.2+ but a 1.1 instance won't have the function pointer.
if (instanceApiVersion_ >= VK_API_VERSION_1_2) {
VkResolveModeFlags modes = dsResolveProps.supportedDepthResolveModes;
if (modes & VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
depthResolveMode_ = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
depthResolveSupported_ = true;
} else if (modes & VK_RESOLVE_MODE_MIN_BIT) {
depthResolveMode_ = VK_RESOLVE_MODE_MIN_BIT;
depthResolveSupported_ = true;
} else if (modes & VK_RESOLVE_MODE_MAX_BIT) {
depthResolveMode_ = VK_RESOLVE_MODE_MAX_BIT;
depthResolveSupported_ = true;
} else if (modes & VK_RESOLVE_MODE_AVERAGE_BIT) {
depthResolveMode_ = VK_RESOLVE_MODE_AVERAGE_BIT;
depthResolveSupported_ = true;
}
} else {
depthResolveSupported_ = false;
depthResolveMode_ = VK_RESOLVE_MODE_NONE;
}
LOG_INFO("Vulkan device: ", props.deviceName);
LOG_INFO("Vulkan API version: ", VK_VERSION_MAJOR(props.apiVersion), ".",
VK_VERSION_MINOR(props.apiVersion), ".", VK_VERSION_PATCH(props.apiVersion));
LOG_INFO("Depth resolve support: ", depthResolveSupported_ ? "YES" : "NO");
// Probe queue families to see if the graphics family supports multiple queues
// (used in createLogicalDevice to request a second queue for parallel uploads).
auto queueFamilies = vkbPhysicalDevice_.get_queue_families();
for (uint32_t i = 0; i < static_cast<uint32_t>(queueFamilies.size()); i++) {
if (queueFamilies[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
graphicsQueueFamilyQueueCount_ = queueFamilies[i].queueCount;
LOG_INFO("Graphics queue family ", i, " supports ", graphicsQueueFamilyQueueCount_, " queue(s)");
break;
}
}
return true;
}
bool VkContext::createLogicalDevice() {
vkb::DeviceBuilder deviceBuilder{vkbPhysicalDevice_};
// Enable optional Vulkan 1.2 features for FSR2 compute shaders (shaderFloat16)
VkPhysicalDeviceVulkan12Features enabled12{};
enabled12.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
if (instanceApiVersion_ >= VK_API_VERSION_1_2) {
VkPhysicalDeviceVulkan12Features supported12{};
supported12.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
VkPhysicalDeviceFeatures2 features2{};
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = &supported12;
vkGetPhysicalDeviceFeatures2(physicalDevice, &features2);
if (supported12.shaderFloat16) {
enabled12.shaderFloat16 = VK_TRUE;
LOG_INFO("Enabling shaderFloat16 for FSR2 compute shaders");
}
deviceBuilder.add_pNext(&enabled12);
}
// Enable AMD device coherent memory feature if the extension was enabled
// (prevents validation errors when VMA selects memory types with DEVICE_COHERENT_BIT_AMD)
VkPhysicalDeviceCoherentMemoryFeaturesAMD coherentFeatures{};
coherentFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD;
if (vkbPhysicalDevice_.enable_extension_if_present(VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NAME)) {
coherentFeatures.deviceCoherentMemory = VK_TRUE;
deviceBuilder.add_pNext(&coherentFeatures);
LOG_INFO("Enabling AMD device coherent memory");
}
// If the graphics queue family supports >= 2 queues, request a second one
// for parallel texture/buffer uploads. Both queues share the same family
// so no queue-ownership-transfer barriers are needed.
const bool requestTransferQueue = (graphicsQueueFamilyQueueCount_ >= 2);
if (requestTransferQueue) {
// Build a custom queue description list: 2 queues from the graphics
// family, 1 queue from every other family (so present etc. still work).
auto families = vkbPhysicalDevice_.get_queue_families();
uint32_t gfxFamily = UINT32_MAX;
for (uint32_t i = 0; i < static_cast<uint32_t>(families.size()); i++) {
if (families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
gfxFamily = i;
break;
}
}
std::vector<vkb::CustomQueueDescription> queueDescs;
for (uint32_t i = 0; i < static_cast<uint32_t>(families.size()); i++) {
if (i == gfxFamily) {
// Request 2 queues: [0] graphics, [1] transfer uploads
queueDescs.emplace_back(i, std::vector<float>{1.0f, 1.0f});
} else {
queueDescs.emplace_back(i, std::vector<float>{1.0f});
}
}
deviceBuilder.custom_queue_setup(queueDescs);
}
auto devRet = deviceBuilder.build();
if (!devRet) {
LOG_ERROR("Failed to create Vulkan logical device: ", devRet.error().message());
return false;
}
auto vkbDevice = devRet.value();
device = vkbDevice.device;
if (requestTransferQueue) {
// With custom_queue_setup, we must retrieve queues manually.
auto families = vkbPhysicalDevice_.get_queue_families();
uint32_t gfxFamily = UINT32_MAX;
for (uint32_t i = 0; i < static_cast<uint32_t>(families.size()); i++) {
if (families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
gfxFamily = i;
break;
}
}
graphicsQueueFamily = gfxFamily;
vkGetDeviceQueue(device, gfxFamily, 0, &graphicsQueue);
vkGetDeviceQueue(device, gfxFamily, 1, &transferQueue_);
hasDedicatedTransfer_ = true;
// Present queue: try the graphics family first (most common), otherwise
// find a family that supports presentation.
presentQueue = graphicsQueue;
presentQueueFamily = gfxFamily;
LOG_INFO("Dedicated transfer queue enabled (family ", gfxFamily, ", queue index 1)");
} else {
// Standard path — let vkb resolve queues.
auto gqRet = vkbDevice.get_queue(vkb::QueueType::graphics);
if (!gqRet) {
LOG_ERROR("Failed to get graphics queue");
return false;
}
graphicsQueue = gqRet.value();
graphicsQueueFamily = vkbDevice.get_queue_index(vkb::QueueType::graphics).value();
auto pqRet = vkbDevice.get_queue(vkb::QueueType::present);
if (!pqRet) {
presentQueue = graphicsQueue;
presentQueueFamily = graphicsQueueFamily;
} else {
presentQueue = pqRet.value();
presentQueueFamily = vkbDevice.get_queue_index(vkb::QueueType::present).value();
}
}
LOG_INFO("Vulkan logical device created");
return true;
}
bool VkContext::createAllocator() {
VmaAllocatorCreateInfo allocInfo{};
allocInfo.instance = instance;
allocInfo.physicalDevice = physicalDevice;
allocInfo.device = device;
allocInfo.vulkanApiVersion = instanceApiVersion_;
if (vmaCreateAllocator(&allocInfo, &allocator) != VK_SUCCESS) {
LOG_ERROR("Failed to create VMA allocator");
return false;
}
LOG_INFO("VMA allocator created");
return true;
}
// ---------------------------------------------------------------------------
// Pipeline cache persistence
// ---------------------------------------------------------------------------
static std::string getPipelineCachePath() {
#ifdef _WIN32
if (const char* appdata = std::getenv("APPDATA"))
return std::string(appdata) + "\\wowee\\pipeline_cache.bin";
return ".\\pipeline_cache.bin";
#elif defined(__APPLE__)
if (const char* home = std::getenv("HOME"))
return std::string(home) + "/Library/Caches/wowee/pipeline_cache.bin";
return "./pipeline_cache.bin";
#else
if (const char* home = std::getenv("HOME"))
return std::string(home) + "/.local/share/wowee/pipeline_cache.bin";
return "./pipeline_cache.bin";
#endif
}
bool VkContext::createPipelineCache() {
// NVIDIA drivers have their own built-in pipeline/shader disk cache.
// Using VkPipelineCache on NVIDIA 590.x causes vkCmdBeginRenderPass to
// SIGSEGV inside libnvidia-glcore — skip entirely on NVIDIA GPUs.
if (gpuVendorId_ == 0x10DE) {
LOG_INFO("Pipeline cache: skipped (NVIDIA driver provides built-in caching)");
return true;
}
std::string path = getPipelineCachePath();
// Try to load existing cache data from disk.
std::vector<char> cacheData;
{
std::ifstream file(path, std::ios::binary | std::ios::ate);
if (file.is_open()) {
auto size = file.tellg();
if (size > 0) {
cacheData.resize(static_cast<size_t>(size));
file.seekg(0);
file.read(cacheData.data(), size);
if (!file) {
LOG_WARNING("Pipeline cache file read failed, starting with empty cache");
cacheData.clear();
}
}
}
}
VkPipelineCacheCreateInfo cacheCI{};
cacheCI.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
cacheCI.initialDataSize = cacheData.size();
cacheCI.pInitialData = cacheData.empty() ? nullptr : cacheData.data();
VkResult result = vkCreatePipelineCache(device, &cacheCI, nullptr, &pipelineCache_);
if (result != VK_SUCCESS) {
// If loading stale/corrupt data caused failure, retry with empty cache.
if (!cacheData.empty()) {
LOG_WARNING("Pipeline cache creation failed with saved data, retrying empty");
cacheCI.initialDataSize = 0;
cacheCI.pInitialData = nullptr;
result = vkCreatePipelineCache(device, &cacheCI, nullptr, &pipelineCache_);
}
if (result != VK_SUCCESS) {
LOG_WARNING("Pipeline cache creation failed — pipelines will not be cached");
pipelineCache_ = VK_NULL_HANDLE;
return false;
}
}
if (!cacheData.empty()) {
LOG_INFO("Pipeline cache loaded from disk (", cacheData.size(), " bytes)");
} else {
LOG_INFO("Pipeline cache created (empty)");
}
return true;
}
void VkContext::savePipelineCache() {
if (!pipelineCache_ || !device) return;
size_t dataSize = 0;
if (vkGetPipelineCacheData(device, pipelineCache_, &dataSize, nullptr) != VK_SUCCESS || dataSize == 0) {
LOG_WARNING("Failed to query pipeline cache size");
return;
}
std::vector<char> data(dataSize);
if (vkGetPipelineCacheData(device, pipelineCache_, &dataSize, data.data()) != VK_SUCCESS) {
LOG_WARNING("Failed to retrieve pipeline cache data");
return;
}
std::string path = getPipelineCachePath();
std::filesystem::create_directories(std::filesystem::path(path).parent_path());
std::ofstream file(path, std::ios::binary | std::ios::trunc);
if (!file.is_open()) {
LOG_WARNING("Failed to open pipeline cache file for writing: ", path);
return;
}
file.write(data.data(), static_cast<std::streamsize>(dataSize));
file.close();
LOG_INFO("Pipeline cache saved to disk (", dataSize, " bytes)");
}
bool VkContext::createSwapchain(int width, int height) {
vkb::SwapchainBuilder swapchainBuilder{physicalDevice, device, surface};
auto& builder = swapchainBuilder
.set_desired_format({VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR})
.set_desired_extent(static_cast<uint32_t>(width), static_cast<uint32_t>(height))
.set_image_usage_flags(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)
.set_desired_min_image_count(2)
.set_old_swapchain(swapchain);
if (vsync_) {
builder.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR);
} else {
builder.set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR);
builder.add_fallback_present_mode(VK_PRESENT_MODE_MAILBOX_KHR);
builder.add_fallback_present_mode(VK_PRESENT_MODE_FIFO_RELAXED_KHR);
}
auto swapRet = builder.build();
if (!swapRet) {
LOG_ERROR("Failed to create Vulkan swapchain: ", swapRet.error().message());
return false;
}
// Destroy old swapchain if recreating
if (swapchain != VK_NULL_HANDLE) {
destroySwapchain();
}
auto vkbSwap = swapRet.value();
swapchain = vkbSwap.swapchain;
swapchainFormat = vkbSwap.image_format;
swapchainExtent = vkbSwap.extent;
swapchainImages = vkbSwap.get_images().value();
swapchainImageViews = vkbSwap.get_image_views().value();
// Create framebuffers for ImGui render pass (created after ImGui resources)
// Will be created in createImGuiResources or recreateSwapchain
LOG_INFO("Vulkan swapchain created: ", swapchainExtent.width, "x", swapchainExtent.height,
" (", swapchainImages.size(), " images)");
swapchainDirty = false;
return true;
}
void VkContext::destroySwapchain() {
for (auto fb : swapchainFramebuffers) {
if (fb) vkDestroyFramebuffer(device, fb, nullptr);
}
swapchainFramebuffers.clear();
for (auto iv : swapchainImageViews) {
if (iv) vkDestroyImageView(device, iv, nullptr);
}
swapchainImageViews.clear();
swapchainImages.clear();
if (swapchain) {
vkDestroySwapchainKHR(device, swapchain, nullptr);
swapchain = VK_NULL_HANDLE;
}
}
bool VkContext::createCommandPools() {
// Per-frame command pools (resettable)
for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
VkCommandPoolCreateInfo poolInfo{};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
poolInfo.queueFamilyIndex = graphicsQueueFamily;
if (vkCreateCommandPool(device, &poolInfo, nullptr, &frames[i].commandPool) != VK_SUCCESS) {
LOG_ERROR("Failed to create command pool for frame ", i);
return false;
}
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.commandPool = frames[i].commandPool;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandBufferCount = 1;
if (vkAllocateCommandBuffers(device, &allocInfo, &frames[i].commandBuffer) != VK_SUCCESS) {
LOG_ERROR("Failed to allocate command buffer for frame ", i);
return false;
}
}
// Immediate submit pool
VkCommandPoolCreateInfo immPoolInfo{};
immPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
immPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
immPoolInfo.queueFamilyIndex = graphicsQueueFamily;
if (vkCreateCommandPool(device, &immPoolInfo, nullptr, &immCommandPool) != VK_SUCCESS) {
LOG_ERROR("Failed to create immediate command pool");
return false;
}
// Separate command pool for the transfer queue (same family, different queue)
if (hasDedicatedTransfer_) {
VkCommandPoolCreateInfo transferPoolInfo{};
transferPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
transferPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
transferPoolInfo.queueFamilyIndex = graphicsQueueFamily;
if (vkCreateCommandPool(device, &transferPoolInfo, nullptr, &transferCommandPool_) != VK_SUCCESS) {
LOG_ERROR("Failed to create transfer command pool");
return false;
}
}
return true;
}
bool VkContext::createSyncObjects() {
VkSemaphoreCreateInfo semInfo{};
semInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
VkFenceCreateInfo fenceInfo{};
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; // Start signaled so first frame doesn't block
for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
if (vkCreateFence(device, &fenceInfo, nullptr, &frames[i].inFlightFence) != VK_SUCCESS) {
LOG_ERROR("Failed to create sync objects for frame ", i);
return false;
}
}
// Per-swapchain-image semaphores: avoids reuse while the presentation engine
// still holds a reference. After acquiring image N we swap the acquire semaphore
// into imageAcquiredSemaphores_[N], recycling the old one for the next acquire.
const uint32_t imgCount = static_cast<uint32_t>(swapchainImages.size());
imageAcquiredSemaphores_.resize(imgCount);
renderFinishedSemaphores_.resize(imgCount);
for (uint32_t i = 0; i < imgCount; i++) {
if (vkCreateSemaphore(device, &semInfo, nullptr, &imageAcquiredSemaphores_[i]) != VK_SUCCESS ||
vkCreateSemaphore(device, &semInfo, nullptr, &renderFinishedSemaphores_[i]) != VK_SUCCESS) {
LOG_ERROR("Failed to create per-image semaphores for image ", i);
return false;
}
}
// One extra acquire semaphore — we need it for the next vkAcquireNextImageKHR
// before we know which image we'll get.
if (vkCreateSemaphore(device, &semInfo, nullptr, &nextAcquireSemaphore_) != VK_SUCCESS) {
LOG_ERROR("Failed to create next-acquire semaphore");
return false;
}
// Immediate submit fence (not signaled initially)
VkFenceCreateInfo immFenceInfo{};
immFenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
if (vkCreateFence(device, &immFenceInfo, nullptr, &immFence) != VK_SUCCESS) {
LOG_ERROR("Failed to create immediate submit fence");
return false;
}
return true;
}
bool VkContext::createDepthBuffer() {
VkImageCreateInfo imgInfo{};
imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imgInfo.imageType = VK_IMAGE_TYPE_2D;
imgInfo.format = depthFormat;
imgInfo.extent = {swapchainExtent.width, swapchainExtent.height, 1};
imgInfo.mipLevels = 1;
imgInfo.arrayLayers = 1;
imgInfo.samples = msaaSamples_;
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
VmaAllocationCreateInfo allocInfo{};
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
if (vmaCreateImage(allocator, &imgInfo, &allocInfo, &depthImage, &depthAllocation, nullptr) != VK_SUCCESS) {
LOG_ERROR("Failed to create depth image");
return false;
}
VkImageViewCreateInfo viewInfo{};
viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
viewInfo.image = depthImage;
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
viewInfo.format = depthFormat;
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
viewInfo.subresourceRange.levelCount = 1;
viewInfo.subresourceRange.layerCount = 1;
if (vkCreateImageView(device, &viewInfo, nullptr, &depthImageView) != VK_SUCCESS) {
LOG_ERROR("Failed to create depth image view");
return false;
}
return true;
}
void VkContext::destroyDepthBuffer() {
if (depthImageView) { vkDestroyImageView(device, depthImageView, nullptr); depthImageView = VK_NULL_HANDLE; }
if (depthImage) { vmaDestroyImage(allocator, depthImage, depthAllocation); depthImage = VK_NULL_HANDLE; depthAllocation = VK_NULL_HANDLE; }
}
bool VkContext::createMsaaColorImage() {
if (msaaSamples_ == VK_SAMPLE_COUNT_1_BIT) return true; // No MSAA image needed
// Check if lazily allocated memory is available — only use TRANSIENT when it is.
// AMD GPUs (especially RDNA4) don't expose lazily allocated memory; using TRANSIENT
// without it can cause the driver to optimize for tile-only storage, leading to
// crashes during MSAA resolve when the backing memory was never populated.
bool hasLazyMemory = false;
VkPhysicalDeviceMemoryProperties memProps;
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProps);
for (uint32_t i = 0; i < memProps.memoryTypeCount; i++) {
if (memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) {
hasLazyMemory = true;
break;
}
}
VkImageCreateInfo imgInfo{};
imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imgInfo.imageType = VK_IMAGE_TYPE_2D;
imgInfo.format = swapchainFormat;
imgInfo.extent = {swapchainExtent.width, swapchainExtent.height, 1};
imgInfo.mipLevels = 1;
imgInfo.arrayLayers = 1;
imgInfo.samples = msaaSamples_;
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
VmaAllocationCreateInfo allocInfo{};
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
if (hasLazyMemory) {
imgInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT;
allocInfo.preferredFlags = VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
} else {
imgInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
}
if (vmaCreateImage(allocator, &imgInfo, &allocInfo, &msaaColorImage_, &msaaColorAllocation_, nullptr) != VK_SUCCESS) {
// Retry without TRANSIENT (some drivers reject it at high sample counts)
imgInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
allocInfo.preferredFlags = 0;
if (vmaCreateImage(allocator, &imgInfo, &allocInfo, &msaaColorImage_, &msaaColorAllocation_, nullptr) != VK_SUCCESS) {
LOG_ERROR("Failed to create MSAA color image");
return false;
}
}
VkImageViewCreateInfo viewInfo{};
viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
viewInfo.image = msaaColorImage_;
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
viewInfo.format = swapchainFormat;
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
viewInfo.subresourceRange.levelCount = 1;
viewInfo.subresourceRange.layerCount = 1;
if (vkCreateImageView(device, &viewInfo, nullptr, &msaaColorView_) != VK_SUCCESS) {
LOG_ERROR("Failed to create MSAA color image view");
return false;
}
return true;
}
void VkContext::destroyMsaaColorImage() {
if (msaaColorView_) { vkDestroyImageView(device, msaaColorView_, nullptr); msaaColorView_ = VK_NULL_HANDLE; }
if (msaaColorImage_) { vmaDestroyImage(allocator, msaaColorImage_, msaaColorAllocation_); msaaColorImage_ = VK_NULL_HANDLE; msaaColorAllocation_ = VK_NULL_HANDLE; }
}
bool VkContext::createDepthResolveImage() {
if (msaaSamples_ == VK_SAMPLE_COUNT_1_BIT || !depthResolveSupported_) return true;
VkImageCreateInfo imgInfo{};
imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imgInfo.imageType = VK_IMAGE_TYPE_2D;
imgInfo.format = depthFormat;
imgInfo.extent = {swapchainExtent.width, swapchainExtent.height, 1};
imgInfo.mipLevels = 1;
imgInfo.arrayLayers = 1;
imgInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imgInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
| VK_IMAGE_USAGE_SAMPLED_BIT; // HiZ pyramid reads depth as texture
VmaAllocationCreateInfo allocInfo{};
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
if (vmaCreateImage(allocator, &imgInfo, &allocInfo, &depthResolveImage, &depthResolveAllocation, nullptr) != VK_SUCCESS) {
LOG_ERROR("Failed to create depth resolve image");
return false;
}
VkImageViewCreateInfo viewInfo{};
viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
viewInfo.image = depthResolveImage;
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
viewInfo.format = depthFormat;
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
viewInfo.subresourceRange.levelCount = 1;
viewInfo.subresourceRange.layerCount = 1;
if (vkCreateImageView(device, &viewInfo, nullptr, &depthResolveImageView) != VK_SUCCESS) {
LOG_ERROR("Failed to create depth resolve image view");
return false;
}
return true;
}
void VkContext::destroyDepthResolveImage() {
if (depthResolveImageView) {
vkDestroyImageView(device, depthResolveImageView, nullptr);
depthResolveImageView = VK_NULL_HANDLE;
}
if (depthResolveImage) {
vmaDestroyImage(allocator, depthResolveImage, depthResolveAllocation);
depthResolveImage = VK_NULL_HANDLE;
depthResolveAllocation = VK_NULL_HANDLE;
}
}
VkSampleCountFlagBits VkContext::getMaxUsableSampleCount() const {
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(physicalDevice, &props);
VkSampleCountFlags counts = props.limits.framebufferColorSampleCounts
& props.limits.framebufferDepthSampleCounts;
if (counts & VK_SAMPLE_COUNT_8_BIT) return VK_SAMPLE_COUNT_8_BIT;
if (counts & VK_SAMPLE_COUNT_4_BIT) return VK_SAMPLE_COUNT_4_BIT;
if (counts & VK_SAMPLE_COUNT_2_BIT) return VK_SAMPLE_COUNT_2_BIT;
return VK_SAMPLE_COUNT_1_BIT;
}
void VkContext::setMsaaSamples(VkSampleCountFlagBits samples) {
// Clamp to max supported
VkSampleCountFlagBits maxSamples = getMaxUsableSampleCount();
if (samples > maxSamples) samples = maxSamples;
msaaSamples_ = samples;
swapchainDirty = true;
}
bool VkContext::createImGuiResources() {
// Create depth buffer first
if (!createDepthBuffer()) return false;
// Create MSAA color image if needed
if (!createMsaaColorImage()) return false;
// Create single-sample depth resolve image for MSAA path (if supported)
if (!createDepthResolveImage()) return false;
bool useMsaa = (msaaSamples_ > VK_SAMPLE_COUNT_1_BIT);
if (useMsaa) {
const bool useDepthResolve = (depthResolveImageView != VK_NULL_HANDLE);
// MSAA render pass: 3 or 4 attachments
VkAttachmentDescription attachments[4] = {};
// Attachment 0: MSAA color target
attachments[0].format = swapchainFormat;
attachments[0].samples = msaaSamples_;
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
// Attachment 1: Depth (multisampled)
attachments[1].format = depthFormat;
attachments[1].samples = msaaSamples_;
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
// Attachment 2: Resolve target (swapchain image)
attachments[2].format = swapchainFormat;
attachments[2].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[2].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[2].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[2].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[2].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[2].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[2].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
if (useDepthResolve) {
attachments[3].format = depthFormat;
attachments[3].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[3].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[3].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[3].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[3].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[3].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[3].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
}
if (useDepthResolve) {
VkAttachmentDescription2 attachments2[4]{};
for (int i = 0; i < 4; ++i) {
attachments2[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
attachments2[i].format = attachments[i].format;
attachments2[i].samples = attachments[i].samples;
attachments2[i].loadOp = attachments[i].loadOp;
attachments2[i].storeOp = attachments[i].storeOp;
attachments2[i].stencilLoadOp = attachments[i].stencilLoadOp;
attachments2[i].stencilStoreOp = attachments[i].stencilStoreOp;
attachments2[i].initialLayout = attachments[i].initialLayout;
attachments2[i].finalLayout = attachments[i].finalLayout;
}
VkAttachmentReference2 colorRef2{};
colorRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
colorRef2.attachment = 0;
colorRef2.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkAttachmentReference2 depthRef2{};
depthRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
depthRef2.attachment = 1;
depthRef2.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkAttachmentReference2 resolveRef2{};
resolveRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
resolveRef2.attachment = 2;
resolveRef2.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkAttachmentReference2 depthResolveRef2{};
depthResolveRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
depthResolveRef2.attachment = 3;
depthResolveRef2.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkSubpassDescriptionDepthStencilResolve dsResolve{};
dsResolve.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE;
dsResolve.depthResolveMode = depthResolveMode_;
dsResolve.stencilResolveMode = VK_RESOLVE_MODE_NONE;
dsResolve.pDepthStencilResolveAttachment = &depthResolveRef2;
VkSubpassDescription2 subpass2{};
subpass2.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2;
subpass2.pNext = &dsResolve;
subpass2.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass2.colorAttachmentCount = 1;
subpass2.pColorAttachments = &colorRef2;
subpass2.pDepthStencilAttachment = &depthRef2;
subpass2.pResolveAttachments = &resolveRef2;
VkSubpassDependency2 dep2{};
dep2.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2;
dep2.srcSubpass = VK_SUBPASS_EXTERNAL;
dep2.dstSubpass = 0;
dep2.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dep2.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dep2.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
VkRenderPassCreateInfo2 rpInfo2{};
rpInfo2.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2;
rpInfo2.attachmentCount = 4;
rpInfo2.pAttachments = attachments2;
rpInfo2.subpassCount = 1;
rpInfo2.pSubpasses = &subpass2;
rpInfo2.dependencyCount = 1;
rpInfo2.pDependencies = &dep2;
if (vkCreateRenderPass2(device, &rpInfo2, nullptr, &imguiRenderPass) != VK_SUCCESS) {
LOG_ERROR("Failed to create MSAA render pass (depth resolve)");
return false;
}
} else {
VkAttachmentReference colorRef{};
colorRef.attachment = 0;
colorRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkAttachmentReference depthRef{};
depthRef.attachment = 1;
depthRef.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkAttachmentReference resolveRef{};
resolveRef.attachment = 2;
resolveRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &colorRef;
subpass.pDepthStencilAttachment = &depthRef;
subpass.pResolveAttachments = &resolveRef;
VkSubpassDependency dependency{};
dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
dependency.dstSubpass = 0;
dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dependency.srcAccessMask = 0;
dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
VkRenderPassCreateInfo rpInfo{};
rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
rpInfo.attachmentCount = 3;
rpInfo.pAttachments = attachments;
rpInfo.subpassCount = 1;
rpInfo.pSubpasses = &subpass;
rpInfo.dependencyCount = 1;
rpInfo.pDependencies = &dependency;
if (vkCreateRenderPass(device, &rpInfo, nullptr, &imguiRenderPass) != VK_SUCCESS) {
LOG_ERROR("Failed to create MSAA render pass");
return false;
}
}
// Framebuffers: [msaaColorView, depthView, swapchainView, depthResolveView?]
swapchainFramebuffers.resize(swapchainImageViews.size());
for (size_t i = 0; i < swapchainImageViews.size(); i++) {
VkImageView fbAttachments[4] = {msaaColorView_, depthImageView, swapchainImageViews[i], depthResolveImageView};
VkFramebufferCreateInfo fbInfo{};
fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
fbInfo.renderPass = imguiRenderPass;
fbInfo.attachmentCount = useDepthResolve ? 4 : 3;
fbInfo.pAttachments = fbAttachments;
fbInfo.width = swapchainExtent.width;
fbInfo.height = swapchainExtent.height;
fbInfo.layers = 1;
if (vkCreateFramebuffer(device, &fbInfo, nullptr, &swapchainFramebuffers[i]) != VK_SUCCESS) {
LOG_ERROR("Failed to create MSAA swapchain framebuffer ", i);
return false;
}
}
} else {
// Non-MSAA render pass: 2 attachments (color + depth) — original path
VkAttachmentDescription attachments[2] = {};
// Color attachment (swapchain image)
attachments[0].format = swapchainFormat;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[0].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
// Depth attachment
attachments[1].format = depthFormat;
attachments[1].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkAttachmentReference colorRef{};
colorRef.attachment = 0;
colorRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkAttachmentReference depthRef{};
depthRef.attachment = 1;
depthRef.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &colorRef;
subpass.pDepthStencilAttachment = &depthRef;
VkSubpassDependency dependency{};
dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
dependency.dstSubpass = 0;
dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dependency.srcAccessMask = 0;
dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
VkRenderPassCreateInfo rpInfo{};
rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
rpInfo.attachmentCount = 2;
rpInfo.pAttachments = attachments;
rpInfo.subpassCount = 1;
rpInfo.pSubpasses = &subpass;
rpInfo.dependencyCount = 1;
rpInfo.pDependencies = &dependency;
if (vkCreateRenderPass(device, &rpInfo, nullptr, &imguiRenderPass) != VK_SUCCESS) {
LOG_ERROR("Failed to create render pass");
return false;
}
// Framebuffers: [swapchainView, depthView]
swapchainFramebuffers.resize(swapchainImageViews.size());
for (size_t i = 0; i < swapchainImageViews.size(); i++) {
VkImageView fbAttachments[2] = {swapchainImageViews[i], depthImageView};
VkFramebufferCreateInfo fbInfo{};
fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
fbInfo.renderPass = imguiRenderPass;
fbInfo.attachmentCount = 2;
fbInfo.pAttachments = fbAttachments;
fbInfo.width = swapchainExtent.width;
fbInfo.height = swapchainExtent.height;
fbInfo.layers = 1;
if (vkCreateFramebuffer(device, &fbInfo, nullptr, &swapchainFramebuffers[i]) != VK_SUCCESS) {
LOG_ERROR("Failed to create swapchain framebuffer ", i);
return false;
}
}
}
// Create descriptor pool for ImGui.
// Budget: ~10 internal ImGui sets + up to 2000 UI icon textures (spells,
// items, talents, buffs, etc.) that are uploaded and cached for the session.
static constexpr uint32_t IMGUI_POOL_SIZE = 2048;
VkDescriptorPoolSize poolSizes[] = {
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, IMGUI_POOL_SIZE},
};
VkDescriptorPoolCreateInfo dpInfo{};
dpInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
dpInfo.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
dpInfo.maxSets = IMGUI_POOL_SIZE;
dpInfo.poolSizeCount = 1;
dpInfo.pPoolSizes = poolSizes;
if (vkCreateDescriptorPool(device, &dpInfo, nullptr, &imguiDescriptorPool) != VK_SUCCESS) {
LOG_ERROR("Failed to create ImGui descriptor pool");
return false;
}
return true;
}
void VkContext::destroyImGuiResources() {
// Destroy uploaded UI textures
for (auto& tex : uiTextures_) {
if (tex.view) vkDestroyImageView(device, tex.view, nullptr);
if (tex.image) vkDestroyImage(device, tex.image, nullptr);
if (tex.memory) vkFreeMemory(device, tex.memory, nullptr);
}
uiTextures_.clear();
uiTextureSampler_ = VK_NULL_HANDLE; // Owned by sampler cache
if (imguiDescriptorPool) {
vkDestroyDescriptorPool(device, imguiDescriptorPool, nullptr);
imguiDescriptorPool = VK_NULL_HANDLE;
}
destroyMsaaColorImage();
destroyDepthResolveImage();
destroyDepthBuffer();
// Framebuffers are destroyed in destroySwapchain()
if (imguiRenderPass) {
vkDestroyRenderPass(device, imguiRenderPass, nullptr);
imguiRenderPass = VK_NULL_HANDLE;
}
}
static uint32_t findMemType(VkPhysicalDevice physDev, uint32_t typeFilter, VkMemoryPropertyFlags props) {
VkPhysicalDeviceMemoryProperties memProps;
vkGetPhysicalDeviceMemoryProperties(physDev, &memProps);
for (uint32_t i = 0; i < memProps.memoryTypeCount; i++) {
if ((typeFilter & (1 << i)) && (memProps.memoryTypes[i].propertyFlags & props) == props)
return i;
}
LOG_ERROR("VkContext: no suitable memory type found");
return UINT32_MAX;
}
VkDescriptorSet VkContext::uploadImGuiTexture(const uint8_t* rgba, int width, int height) {
if (!device || !physicalDevice || width <= 0 || height <= 0 || !rgba)
return VK_NULL_HANDLE;
VkDeviceSize imageSize = static_cast<VkDeviceSize>(width) * height * 4;
// Create shared sampler on first call (via sampler cache)
if (!uiTextureSampler_) {
VkSamplerCreateInfo si{};
si.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
si.magFilter = VK_FILTER_LINEAR;
si.minFilter = VK_FILTER_LINEAR;
si.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
si.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
si.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
uiTextureSampler_ = getOrCreateSampler(si);
if (!uiTextureSampler_) {
LOG_ERROR("Failed to create UI texture sampler");
return VK_NULL_HANDLE;
}
}
// Staging buffer
VkBuffer stagingBuffer;
VkDeviceMemory stagingMemory;
{
VkBufferCreateInfo bufInfo{};
bufInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufInfo.size = imageSize;
bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
bufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
if (vkCreateBuffer(device, &bufInfo, nullptr, &stagingBuffer) != VK_SUCCESS)
return VK_NULL_HANDLE;
VkMemoryRequirements memReqs;
vkGetBufferMemoryRequirements(device, stagingBuffer, &memReqs);
VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memReqs.size;
allocInfo.memoryTypeIndex = findMemType(physicalDevice, memReqs.memoryTypeBits,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
if (vkAllocateMemory(device, &allocInfo, nullptr, &stagingMemory) != VK_SUCCESS) {
vkDestroyBuffer(device, stagingBuffer, nullptr);
return VK_NULL_HANDLE;
}
vkBindBufferMemory(device, stagingBuffer, stagingMemory, 0);
void* mapped;
vkMapMemory(device, stagingMemory, 0, imageSize, 0, &mapped);
memcpy(mapped, rgba, imageSize);
vkUnmapMemory(device, stagingMemory);
}
// Create image
VkImage image;
VkDeviceMemory imageMemory;
{
VkImageCreateInfo imgInfo{};
imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imgInfo.imageType = VK_IMAGE_TYPE_2D;
imgInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
imgInfo.extent = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1};
imgInfo.mipLevels = 1;
imgInfo.arrayLayers = 1;
imgInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imgInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
imgInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
imgInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
if (vkCreateImage(device, &imgInfo, nullptr, &image) != VK_SUCCESS) {
vkDestroyBuffer(device, stagingBuffer, nullptr);
vkFreeMemory(device, stagingMemory, nullptr);
return VK_NULL_HANDLE;
}
VkMemoryRequirements memReqs;
vkGetImageMemoryRequirements(device, image, &memReqs);
VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memReqs.size;
allocInfo.memoryTypeIndex = findMemType(physicalDevice, memReqs.memoryTypeBits,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (vkAllocateMemory(device, &allocInfo, nullptr, &imageMemory) != VK_SUCCESS) {
vkDestroyImage(device, image, nullptr);
vkDestroyBuffer(device, stagingBuffer, nullptr);
vkFreeMemory(device, stagingMemory, nullptr);
return VK_NULL_HANDLE;
}
vkBindImageMemory(device, image, imageMemory, 0);
}
// Upload via immediate submit
immediateSubmit([&](VkCommandBuffer cmd) {
VkImageMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = image;
barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier);
VkBufferImageCopy region{};
region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
region.imageExtent = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1};
vkCmdCopyBufferToImage(cmd, stagingBuffer, image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier);
});
// Cleanup staging
vkDestroyBuffer(device, stagingBuffer, nullptr);
vkFreeMemory(device, stagingMemory, nullptr);
// Create image view
VkImageView imageView;
{
VkImageViewCreateInfo viewInfo{};
viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
viewInfo.image = image;
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
viewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
viewInfo.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
if (vkCreateImageView(device, &viewInfo, nullptr, &imageView) != VK_SUCCESS) {
vkDestroyImage(device, image, nullptr);
vkFreeMemory(device, imageMemory, nullptr);
return VK_NULL_HANDLE;
}
}
// Register with ImGui (allocates from imguiDescriptorPool)
VkDescriptorSet ds = ImGui_ImplVulkan_AddTexture(uiTextureSampler_, imageView,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
if (!ds) {
LOG_ERROR("ImGui descriptor pool exhausted — cannot upload UI texture");
vkDestroyImageView(device, imageView, nullptr);
vkDestroyImage(device, image, nullptr);
vkFreeMemory(device, imageMemory, nullptr);
return VK_NULL_HANDLE;
}
// Track for cleanup
uiTextures_.push_back({image, imageMemory, imageView});
return ds;
}
bool VkContext::recreateSwapchain(int width, int height) {
vkDeviceWaitIdle(device);
// Destroy old framebuffers
for (auto fb : swapchainFramebuffers) {
if (fb) vkDestroyFramebuffer(device, fb, nullptr);
}
swapchainFramebuffers.clear();
// Destroy old image views
for (auto iv : swapchainImageViews) {
if (iv) vkDestroyImageView(device, iv, nullptr);
}
swapchainImageViews.clear();
VkSwapchainKHR oldSwapchain = swapchain;
vkb::SwapchainBuilder swapchainBuilder{physicalDevice, device, surface};
auto& builder = swapchainBuilder
.set_desired_format({VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR})
.set_desired_extent(static_cast<uint32_t>(width), static_cast<uint32_t>(height))
.set_image_usage_flags(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)
.set_desired_min_image_count(2)
.set_old_swapchain(oldSwapchain);
if (vsync_) {
builder.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR);
} else {
builder.set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR);
builder.add_fallback_present_mode(VK_PRESENT_MODE_MAILBOX_KHR);
builder.add_fallback_present_mode(VK_PRESENT_MODE_FIFO_RELAXED_KHR);
}
auto swapRet = builder.build();
if (!swapRet) {
// Destroy old swapchain now that we failed (it can't be used either)
if (oldSwapchain) {
vkDestroySwapchainKHR(device, oldSwapchain, nullptr);
swapchain = VK_NULL_HANDLE;
}
LOG_ERROR("Failed to recreate swapchain: ", swapRet.error().message());
// Keep swapchainDirty=true so the next frame retries
swapchainDirty = true;
return false;
}
// Success — safe to retire the old swapchain
if (oldSwapchain) {
vkDestroySwapchainKHR(device, oldSwapchain, nullptr);
}
auto vkbSwap = swapRet.value();
swapchain = vkbSwap.swapchain;
swapchainFormat = vkbSwap.image_format;
swapchainExtent = vkbSwap.extent;
swapchainImages = vkbSwap.get_images().value();
swapchainImageViews = vkbSwap.get_image_views().value();
// Resize per-image semaphore arrays if the swapchain image count changed
{
const uint32_t newCount = static_cast<uint32_t>(swapchainImages.size());
const uint32_t oldCount = static_cast<uint32_t>(imageAcquiredSemaphores_.size());
VkSemaphoreCreateInfo semInfo{};
semInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
// Destroy excess semaphores if shrinking
for (uint32_t i = newCount; i < oldCount; i++) {
if (imageAcquiredSemaphores_[i]) vkDestroySemaphore(device, imageAcquiredSemaphores_[i], nullptr);
if (renderFinishedSemaphores_[i]) vkDestroySemaphore(device, renderFinishedSemaphores_[i], nullptr);
}
imageAcquiredSemaphores_.resize(newCount);
renderFinishedSemaphores_.resize(newCount);
// Create new semaphores if growing
for (uint32_t i = oldCount; i < newCount; i++) {
vkCreateSemaphore(device, &semInfo, nullptr, &imageAcquiredSemaphores_[i]);
vkCreateSemaphore(device, &semInfo, nullptr, &renderFinishedSemaphores_[i]);
}
}
// Recreate depth buffer + MSAA color image + depth resolve image
destroyMsaaColorImage();
destroyDepthResolveImage();
destroyDepthBuffer();
// Destroy old render pass (needs recreation if MSAA changed)
if (imguiRenderPass) {
vkDestroyRenderPass(device, imguiRenderPass, nullptr);
imguiRenderPass = VK_NULL_HANDLE;
}
if (!createDepthBuffer()) return false;
if (!createMsaaColorImage()) return false;
if (!createDepthResolveImage()) return false;
bool useMsaa = (msaaSamples_ > VK_SAMPLE_COUNT_1_BIT);
if (useMsaa) {
const bool useDepthResolve = (depthResolveImageView != VK_NULL_HANDLE);
// MSAA render pass: 3 or 4 attachments
VkAttachmentDescription attachments[4] = {};
attachments[0].format = swapchainFormat;
attachments[0].samples = msaaSamples_;
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachments[1].format = depthFormat;
attachments[1].samples = msaaSamples_;
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[2].format = swapchainFormat;
attachments[2].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[2].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[2].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[2].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[2].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[2].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[2].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
if (useDepthResolve) {
attachments[3].format = depthFormat;
attachments[3].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[3].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[3].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[3].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[3].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[3].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[3].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
}
if (useDepthResolve) {
VkAttachmentDescription2 attachments2[4]{};
for (int i = 0; i < 4; ++i) {
attachments2[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
attachments2[i].format = attachments[i].format;
attachments2[i].samples = attachments[i].samples;
attachments2[i].loadOp = attachments[i].loadOp;
attachments2[i].storeOp = attachments[i].storeOp;
attachments2[i].stencilLoadOp = attachments[i].stencilLoadOp;
attachments2[i].stencilStoreOp = attachments[i].stencilStoreOp;
attachments2[i].initialLayout = attachments[i].initialLayout;
attachments2[i].finalLayout = attachments[i].finalLayout;
}
VkAttachmentReference2 colorRef2{};
colorRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
colorRef2.attachment = 0;
colorRef2.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkAttachmentReference2 depthRef2{};
depthRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
depthRef2.attachment = 1;
depthRef2.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkAttachmentReference2 resolveRef2{};
resolveRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
resolveRef2.attachment = 2;
resolveRef2.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkAttachmentReference2 depthResolveRef2{};
depthResolveRef2.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2;
depthResolveRef2.attachment = 3;
depthResolveRef2.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkSubpassDescriptionDepthStencilResolve dsResolve{};
dsResolve.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE;
dsResolve.depthResolveMode = depthResolveMode_;
dsResolve.stencilResolveMode = VK_RESOLVE_MODE_NONE;
dsResolve.pDepthStencilResolveAttachment = &depthResolveRef2;
VkSubpassDescription2 subpass2{};
subpass2.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2;
subpass2.pNext = &dsResolve;
subpass2.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass2.colorAttachmentCount = 1;
subpass2.pColorAttachments = &colorRef2;
subpass2.pDepthStencilAttachment = &depthRef2;
subpass2.pResolveAttachments = &resolveRef2;
VkSubpassDependency2 dep2{};
dep2.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2;
dep2.srcSubpass = VK_SUBPASS_EXTERNAL;
dep2.dstSubpass = 0;
dep2.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dep2.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dep2.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
VkRenderPassCreateInfo2 rpInfo2{};
rpInfo2.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2;
rpInfo2.attachmentCount = 4;
rpInfo2.pAttachments = attachments2;
rpInfo2.subpassCount = 1;
rpInfo2.pSubpasses = &subpass2;
rpInfo2.dependencyCount = 1;
rpInfo2.pDependencies = &dep2;
if (vkCreateRenderPass2(device, &rpInfo2, nullptr, &imguiRenderPass) != VK_SUCCESS) {
LOG_ERROR("Failed to recreate MSAA render pass (depth resolve)");
return false;
}
} else {
VkAttachmentReference colorRef{0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
VkAttachmentReference depthRef{1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL};
VkAttachmentReference resolveRef{2, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &colorRef;
subpass.pDepthStencilAttachment = &depthRef;
subpass.pResolveAttachments = &resolveRef;
VkSubpassDependency dependency{};
dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
dependency.dstSubpass = 0;
dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dependency.srcAccessMask = 0;
dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
VkRenderPassCreateInfo rpInfo{};
rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
rpInfo.attachmentCount = 3;
rpInfo.pAttachments = attachments;
rpInfo.subpassCount = 1;
rpInfo.pSubpasses = &subpass;
rpInfo.dependencyCount = 1;
rpInfo.pDependencies = &dependency;
if (vkCreateRenderPass(device, &rpInfo, nullptr, &imguiRenderPass) != VK_SUCCESS) {
LOG_ERROR("Failed to recreate MSAA render pass");
return false;
}
}
swapchainFramebuffers.resize(swapchainImageViews.size());
for (size_t i = 0; i < swapchainImageViews.size(); i++) {
VkImageView fbAttachments[4] = {msaaColorView_, depthImageView, swapchainImageViews[i], depthResolveImageView};
VkFramebufferCreateInfo fbInfo{};
fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
fbInfo.renderPass = imguiRenderPass;
fbInfo.attachmentCount = useDepthResolve ? 4 : 3;
fbInfo.pAttachments = fbAttachments;
fbInfo.width = swapchainExtent.width;
fbInfo.height = swapchainExtent.height;
fbInfo.layers = 1;
if (vkCreateFramebuffer(device, &fbInfo, nullptr, &swapchainFramebuffers[i]) != VK_SUCCESS) {
LOG_ERROR("Failed to recreate MSAA swapchain framebuffer ", i);
return false;
}
}
} else {
// Non-MSAA render pass: 2 attachments
VkAttachmentDescription attachments[2] = {};
attachments[0].format = swapchainFormat;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[0].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
attachments[1].format = depthFormat;
attachments[1].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkAttachmentReference colorRef{0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
VkAttachmentReference depthRef{1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL};
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &colorRef;
subpass.pDepthStencilAttachment = &depthRef;
VkSubpassDependency dependency{};
dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
dependency.dstSubpass = 0;
dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dependency.srcAccessMask = 0;
dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
VkRenderPassCreateInfo rpInfo{};
rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
rpInfo.attachmentCount = 2;
rpInfo.pAttachments = attachments;
rpInfo.subpassCount = 1;
rpInfo.pSubpasses = &subpass;
rpInfo.dependencyCount = 1;
rpInfo.pDependencies = &dependency;
if (vkCreateRenderPass(device, &rpInfo, nullptr, &imguiRenderPass) != VK_SUCCESS) {
LOG_ERROR("Failed to recreate render pass");
return false;
}
swapchainFramebuffers.resize(swapchainImageViews.size());
for (size_t i = 0; i < swapchainImageViews.size(); i++) {
VkImageView fbAttachments[2] = {swapchainImageViews[i], depthImageView};
VkFramebufferCreateInfo fbInfo{};
fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
fbInfo.renderPass = imguiRenderPass;
fbInfo.attachmentCount = 2;
fbInfo.pAttachments = fbAttachments;
fbInfo.width = swapchainExtent.width;
fbInfo.height = swapchainExtent.height;
fbInfo.layers = 1;
if (vkCreateFramebuffer(device, &fbInfo, nullptr, &swapchainFramebuffers[i]) != VK_SUCCESS) {
LOG_ERROR("Failed to recreate swapchain framebuffer ", i);
return false;
}
}
}
swapchainDirty = false;
LOG_INFO("Swapchain recreated: ", swapchainExtent.width, "x", swapchainExtent.height);
return true;
}
VkCommandBuffer VkContext::beginFrame(uint32_t& imageIndex) {
if (deviceLost_) return VK_NULL_HANDLE;
if (swapchain == VK_NULL_HANDLE) return VK_NULL_HANDLE; // Swapchain lost; recreate pending
auto& frame = frames[currentFrame];
// Wait for this frame's fence (with timeout to detect GPU hangs)
static int beginFrameCounter = 0;
beginFrameCounter++;
VkResult fenceResult = vkWaitForFences(device, 1, &frame.inFlightFence, VK_TRUE, 5000000000ULL); // 5 second timeout
if (fenceResult == VK_TIMEOUT) {
LOG_ERROR("beginFrame[", beginFrameCounter, "] FENCE TIMEOUT (5s) on frame slot ", currentFrame, " — GPU hang detected!");
return VK_NULL_HANDLE;
}
if (fenceResult != VK_SUCCESS) {
LOG_ERROR("beginFrame[", beginFrameCounter, "] fence wait failed: ", static_cast<int>(fenceResult));
if (fenceResult == VK_ERROR_DEVICE_LOST) {
deviceLost_ = true;
}
return VK_NULL_HANDLE;
}
// Any work queued for this frame slot is now guaranteed to be unused by the GPU.
runDeferredCleanup(currentFrame);
// Acquire next swapchain image using the free semaphore.
// After acquiring we swap it into the per-image slot so the old per-image
// semaphore (now released by the presentation engine) becomes the free one.
VkResult result = vkAcquireNextImageKHR(device, swapchain, UINT64_MAX,
nextAcquireSemaphore_, VK_NULL_HANDLE, &imageIndex);
if (result == VK_ERROR_OUT_OF_DATE_KHR) {
swapchainDirty = true;
return VK_NULL_HANDLE;
}
if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) {
LOG_ERROR("Failed to acquire swapchain image");
return VK_NULL_HANDLE;
}
// Swap semaphores: the image's old acquire semaphore is now free (the presentation
// engine released it when this image was re-acquired). The semaphore we just used
// becomes the per-image one for submit/present.
currentAcquireSemaphore_ = nextAcquireSemaphore_;
nextAcquireSemaphore_ = imageAcquiredSemaphores_[imageIndex];
imageAcquiredSemaphores_[imageIndex] = currentAcquireSemaphore_;
vkResetFences(device, 1, &frame.inFlightFence);
vkResetCommandBuffer(frame.commandBuffer, 0);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(frame.commandBuffer, &beginInfo);
// If async upload batches are still in flight (submitted to the transfer queue),
// wait for their fences and insert a memory barrier so the graphics queue sees
// the completed layout transitions and transfer writes.
if (!inFlightBatches_.empty()) {
waitAllUploads();
VkMemoryBarrier memBarrier{};
memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(frame.commandBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0, 1, &memBarrier, 0, nullptr, 0, nullptr);
}
return frame.commandBuffer;
}
void VkContext::endFrame(VkCommandBuffer cmd, uint32_t imageIndex) {
static int endFrameCounter = 0;
endFrameCounter++;
VkResult endResult = vkEndCommandBuffer(cmd);
if (endResult != VK_SUCCESS) {
LOG_ERROR("endFrame[", endFrameCounter, "] vkEndCommandBuffer FAILED: ", static_cast<int>(endResult));
}
auto& frame = frames[currentFrame];
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
// Use per-image semaphores: acquire semaphore was swapped into the per-image
// slot in beginFrame; renderFinished is also indexed by the acquired image.
VkSemaphore& acquireSem = imageAcquiredSemaphores_[imageIndex];
VkSemaphore& renderSem = renderFinishedSemaphores_[imageIndex];
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = &acquireSem;
submitInfo.pWaitDstStageMask = &waitStage;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &cmd;
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &renderSem;
VkResult submitResult = vkQueueSubmit(graphicsQueue, 1, &submitInfo, frame.inFlightFence);
if (submitResult != VK_SUCCESS) {
LOG_ERROR("endFrame[", endFrameCounter, "] vkQueueSubmit FAILED: ", static_cast<int>(submitResult));
if (submitResult == VK_ERROR_DEVICE_LOST) {
deviceLost_ = true;
}
}
VkPresentInfoKHR presentInfo{};
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
presentInfo.waitSemaphoreCount = 1;
presentInfo.pWaitSemaphores = &renderSem;
presentInfo.swapchainCount = 1;
presentInfo.pSwapchains = &swapchain;
presentInfo.pImageIndices = &imageIndex;
VkResult result = vkQueuePresentKHR(presentQueue, &presentInfo);
if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) {
swapchainDirty = true;
}
currentFrame = (currentFrame + 1) % MAX_FRAMES_IN_FLIGHT;
}
VkCommandBuffer VkContext::beginSingleTimeCommands() {
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.commandPool = immCommandPool;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandBufferCount = 1;
VkCommandBuffer cmd;
vkAllocateCommandBuffers(device, &allocInfo, &cmd);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(cmd, &beginInfo);
return cmd;
}
void VkContext::endSingleTimeCommands(VkCommandBuffer cmd) {
vkEndCommandBuffer(cmd);
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &cmd;
vkQueueSubmit(graphicsQueue, 1, &submitInfo, immFence);
vkWaitForFences(device, 1, &immFence, VK_TRUE, UINT64_MAX);
vkResetFences(device, 1, &immFence);
vkFreeCommandBuffers(device, immCommandPool, 1, &cmd);
}
void VkContext::immediateSubmit(std::function<void(VkCommandBuffer cmd)>&& function) {
if (inUploadBatch_) {
// Record into the batch command buffer — no submit, no fence wait
function(batchCmd_);
return;
}
VkCommandBuffer cmd = beginSingleTimeCommands();
function(cmd);
endSingleTimeCommands(cmd);
}
void VkContext::beginUploadBatch() {
uploadBatchDepth_++;
if (inUploadBatch_) return; // already in a batch (nested call)
inUploadBatch_ = true;
// Allocate from transfer pool if available, otherwise from immCommandPool.
VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool;
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.commandPool = pool;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandBufferCount = 1;
vkAllocateCommandBuffers(device, &allocInfo, &batchCmd_);
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(batchCmd_, &beginInfo);
}
void VkContext::endUploadBatch() {
if (uploadBatchDepth_ <= 0) return;
uploadBatchDepth_--;
if (uploadBatchDepth_ > 0) return; // still inside an outer batch
inUploadBatch_ = false;
VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool;
if (batchStagingBuffers_.empty()) {
// No GPU copies were recorded — skip the submit entirely.
vkEndCommandBuffer(batchCmd_);
vkFreeCommandBuffers(device, pool, 1, &batchCmd_);
batchCmd_ = VK_NULL_HANDLE;
return;
}
// Submit commands with a NEW fence — don't wait, let GPU work in parallel.
vkEndCommandBuffer(batchCmd_);
VkFenceCreateInfo fenceInfo{};
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
VkFence fence = VK_NULL_HANDLE;
vkCreateFence(device, &fenceInfo, nullptr, &fence);
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &batchCmd_;
// Submit to the dedicated transfer queue if available, otherwise graphics.
VkQueue targetQueue = hasDedicatedTransfer_ ? transferQueue_ : graphicsQueue;
vkQueueSubmit(targetQueue, 1, &submitInfo, fence);
// Stash everything for later cleanup when fence signals
InFlightBatch batch;
batch.fence = fence;
batch.cmd = batchCmd_;
batch.stagingBuffers = std::move(batchStagingBuffers_);
inFlightBatches_.push_back(std::move(batch));
batchCmd_ = VK_NULL_HANDLE;
batchStagingBuffers_.clear();
}
void VkContext::endUploadBatchSync() {
if (uploadBatchDepth_ <= 0) return;
uploadBatchDepth_--;
if (uploadBatchDepth_ > 0) return;
inUploadBatch_ = false;
VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool;
if (batchStagingBuffers_.empty()) {
vkEndCommandBuffer(batchCmd_);
vkFreeCommandBuffers(device, pool, 1, &batchCmd_);
batchCmd_ = VK_NULL_HANDLE;
return;
}
// Synchronous path for load screens — submit and wait on the target queue.
VkQueue targetQueue = hasDedicatedTransfer_ ? transferQueue_ : graphicsQueue;
vkEndCommandBuffer(batchCmd_);
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &batchCmd_;
vkQueueSubmit(targetQueue, 1, &submitInfo, immFence);
vkWaitForFences(device, 1, &immFence, VK_TRUE, UINT64_MAX);
vkResetFences(device, 1, &immFence);
vkFreeCommandBuffers(device, pool, 1, &batchCmd_);
batchCmd_ = VK_NULL_HANDLE;
for (auto& staging : batchStagingBuffers_) {
destroyBuffer(allocator, staging);
}
batchStagingBuffers_.clear();
}
void VkContext::pollUploadBatches() {
if (inFlightBatches_.empty()) return;
VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool;
for (auto it = inFlightBatches_.begin(); it != inFlightBatches_.end(); ) {
VkResult result = vkGetFenceStatus(device, it->fence);
if (result == VK_SUCCESS) {
// GPU finished — free resources
for (auto& staging : it->stagingBuffers) {
destroyBuffer(allocator, staging);
}
vkFreeCommandBuffers(device, pool, 1, &it->cmd);
vkDestroyFence(device, it->fence, nullptr);
it = inFlightBatches_.erase(it);
} else {
++it;
}
}
}
void VkContext::waitAllUploads() {
VkCommandPool pool = hasDedicatedTransfer_ ? transferCommandPool_ : immCommandPool;
for (auto& batch : inFlightBatches_) {
vkWaitForFences(device, 1, &batch.fence, VK_TRUE, UINT64_MAX);
for (auto& staging : batch.stagingBuffers) {
destroyBuffer(allocator, staging);
}
vkFreeCommandBuffers(device, pool, 1, &batch.cmd);
vkDestroyFence(device, batch.fence, nullptr);
}
inFlightBatches_.clear();
}
void VkContext::deferStagingCleanup(AllocatedBuffer staging) {
batchStagingBuffers_.push_back(staging);
}
} // namespace rendering
} // namespace wowee