diff --git a/include/rendering/character_preview.hpp b/include/rendering/character_preview.hpp index 965d6913..edacc289 100644 --- a/include/rendering/character_preview.hpp +++ b/include/rendering/character_preview.hpp @@ -75,8 +75,11 @@ private: void* previewUBOMapped_[MAX_FRAMES] = {}; VkDescriptorSet previewPerFrameSet_[MAX_FRAMES] = {}; - // Dummy 1x1 white texture for shadow map placeholder - std::unique_ptr dummyWhiteTex_; + // Dummy 1x1 depth texture for shadow map placeholder (sampler2DShadow compatible) + VkImage dummyShadowImage_ = VK_NULL_HANDLE; + VkImageView dummyShadowView_ = VK_NULL_HANDLE; + VmaAllocation dummyShadowAlloc_ = VK_NULL_HANDLE; + VkSampler dummyShadowSampler_ = VK_NULL_HANDLE; // owned by VkContext sampler cache // ImGui texture handle for displaying the preview (VkDescriptorSet in Vulkan backend) VkDescriptorSet imguiTextureId_ = VK_NULL_HANDLE; diff --git a/include/rendering/vk_context.hpp b/include/rendering/vk_context.hpp index 4cc7c109..50c283fc 100644 --- a/include/rendering/vk_context.hpp +++ b/include/rendering/vk_context.hpp @@ -19,8 +19,6 @@ static constexpr uint32_t MAX_FRAMES_IN_FLIGHT = 2; struct FrameData { VkCommandPool commandPool = VK_NULL_HANDLE; VkCommandBuffer commandBuffer = VK_NULL_HANDLE; - VkSemaphore imageAvailableSemaphore = VK_NULL_HANDLE; - VkSemaphore renderFinishedSemaphore = VK_NULL_HANDLE; VkFence inFlightFence = VK_NULL_HANDLE; }; @@ -197,6 +195,12 @@ private: FrameData frames[MAX_FRAMES_IN_FLIGHT]; uint32_t currentFrame = 0; + // Per-swapchain-image semaphores (avoids reuse while presentation engine holds them) + std::vector imageAcquiredSemaphores_; // [swapchainImageCount], per-image + std::vector renderFinishedSemaphores_; // [swapchainImageCount], per-image + VkSemaphore nextAcquireSemaphore_ = VK_NULL_HANDLE; // free semaphore for next acquire + VkSemaphore currentAcquireSemaphore_ = VK_NULL_HANDLE; // the one used for the current frame + // Immediate submit resources VkCommandPool immCommandPool = VK_NULL_HANDLE; VkFence immFence = VK_NULL_HANDLE; diff --git a/src/rendering/character_preview.cpp b/src/rendering/character_preview.cpp index c3895eb4..ebdbf13e 100644 --- a/src/rendering/character_preview.cpp +++ b/src/rendering/character_preview.cpp @@ -135,13 +135,72 @@ void CharacterPreview::createFBO() { vkCtx_->endSingleTimeCommands(cmd); } - // 2. Create 1x1 dummy white texture (shadow map placeholder) + // 2. Create 1x1 dummy depth texture (shadow map placeholder, depth=1.0 = no shadow). + // Must be a depth format for sampler2DShadow compatibility. { - uint8_t white[] = {255, 255, 255, 255}; - dummyWhiteTex_ = std::make_unique(); - dummyWhiteTex_->upload(*vkCtx_, white, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, false); - dummyWhiteTex_->createSampler(device, VK_FILTER_NEAREST, VK_FILTER_NEAREST, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); + VkImageCreateInfo imgCI{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + imgCI.imageType = VK_IMAGE_TYPE_2D; + imgCI.format = VK_FORMAT_D16_UNORM; + imgCI.extent = {1, 1, 1}; + imgCI.mipLevels = 1; + imgCI.arrayLayers = 1; + imgCI.samples = VK_SAMPLE_COUNT_1_BIT; + imgCI.tiling = VK_IMAGE_TILING_OPTIMAL; + imgCI.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + VmaAllocationCreateInfo allocCI{}; + allocCI.usage = VMA_MEMORY_USAGE_GPU_ONLY; + if (vmaCreateImage(vkCtx_->getAllocator(), &imgCI, &allocCI, + &dummyShadowImage_, &dummyShadowAlloc_, nullptr) != VK_SUCCESS) { + LOG_ERROR("CharacterPreview: failed to create dummy shadow image"); + return; + } + VkImageViewCreateInfo viewCI{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + viewCI.image = dummyShadowImage_; + viewCI.viewType = VK_IMAGE_VIEW_TYPE_2D; + viewCI.format = VK_FORMAT_D16_UNORM; + viewCI.subresourceRange = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1}; + if (vkCreateImageView(device, &viewCI, nullptr, &dummyShadowView_) != VK_SUCCESS) { + LOG_ERROR("CharacterPreview: failed to create dummy shadow image view"); + return; + } + // Clear to depth 1.0 and transition to shader-read layout + vkCtx_->immediateSubmit([&](VkCommandBuffer cmd) { + VkImageMemoryBarrier toTransfer{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + toTransfer.image = dummyShadowImage_; + toTransfer.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + toTransfer.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + toTransfer.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + toTransfer.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + toTransfer.subresourceRange = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1}; + toTransfer.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &toTransfer); + VkClearDepthStencilValue clearVal{1.0f, 0}; + VkImageSubresourceRange range{VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1}; + vkCmdClearDepthStencilImage(cmd, dummyShadowImage_, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearVal, 1, &range); + VkImageMemoryBarrier toRead{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + toRead.image = dummyShadowImage_; + toRead.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + toRead.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + toRead.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + toRead.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + toRead.subresourceRange = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1}; + toRead.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + toRead.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &toRead); + }); + // Comparison sampler for sampler2DShadow + VkSamplerCreateInfo sampCI{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + sampCI.magFilter = VK_FILTER_NEAREST; + sampCI.minFilter = VK_FILTER_NEAREST; + sampCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + sampCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + sampCI.compareEnable = VK_TRUE; + sampCI.compareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + dummyShadowSampler_ = vkCtx_->getOrCreateSampler(sampCI); } // 3. Create descriptor pool for per-frame sets (2 UBO + 2 sampler) @@ -200,7 +259,10 @@ void CharacterPreview::createFBO() { descBuf.offset = 0; descBuf.range = sizeof(GPUPerFrameData); - VkDescriptorImageInfo shadowImg = dummyWhiteTex_->descriptorInfo(); + VkDescriptorImageInfo shadowImg{}; + shadowImg.sampler = dummyShadowSampler_; + shadowImg.imageView = dummyShadowView_; + shadowImg.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; VkWriteDescriptorSet writes[2]{}; writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -250,7 +312,9 @@ void CharacterPreview::destroyFBO() { previewDescPool_ = VK_NULL_HANDLE; } - dummyWhiteTex_.reset(); + // dummyShadowSampler_ is owned by VkContext sampler cache — do NOT destroy + if (dummyShadowView_) { vkDestroyImageView(device, dummyShadowView_, nullptr); dummyShadowView_ = VK_NULL_HANDLE; } + if (dummyShadowImage_) { vmaDestroyImage(allocator, dummyShadowImage_, dummyShadowAlloc_); dummyShadowImage_ = VK_NULL_HANDLE; dummyShadowAlloc_ = VK_NULL_HANDLE; } if (renderTarget_) { renderTarget_->destroy(device, allocator); diff --git a/src/rendering/vk_context.cpp b/src/rendering/vk_context.cpp index 4369f9c3..9c8cf36f 100644 --- a/src/rendering/vk_context.cpp +++ b/src/rendering/vk_context.cpp @@ -122,11 +122,14 @@ void VkContext::shutdown() { // Destroy sync objects for (auto& frame : frames) { if (frame.inFlightFence) vkDestroyFence(device, frame.inFlightFence, nullptr); - if (frame.renderFinishedSemaphore) vkDestroySemaphore(device, frame.renderFinishedSemaphore, nullptr); - if (frame.imageAvailableSemaphore) vkDestroySemaphore(device, frame.imageAvailableSemaphore, nullptr); if (frame.commandPool) vkDestroyCommandPool(device, frame.commandPool, nullptr); frame = {}; } + for (auto sem : imageAcquiredSemaphores_) { if (sem) vkDestroySemaphore(device, sem, nullptr); } + imageAcquiredSemaphores_.clear(); + for (auto sem : renderFinishedSemaphores_) { if (sem) vkDestroySemaphore(device, sem, nullptr); } + renderFinishedSemaphores_.clear(); + if (nextAcquireSemaphore_) { vkDestroySemaphore(device, nextAcquireSemaphore_, nullptr); nextAcquireSemaphore_ = VK_NULL_HANDLE; } // Clean up any in-flight async upload batches (device already idle) for (auto& batch : inFlightBatches_) { @@ -684,14 +687,32 @@ bool VkContext::createSyncObjects() { fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; // Start signaled so first frame doesn't block for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { - if (vkCreateSemaphore(device, &semInfo, nullptr, &frames[i].imageAvailableSemaphore) != VK_SUCCESS || - vkCreateSemaphore(device, &semInfo, nullptr, &frames[i].renderFinishedSemaphore) != VK_SUCCESS || - vkCreateFence(device, &fenceInfo, nullptr, &frames[i].inFlightFence) != VK_SUCCESS) { + if (vkCreateFence(device, &fenceInfo, nullptr, &frames[i].inFlightFence) != VK_SUCCESS) { LOG_ERROR("Failed to create sync objects for frame ", i); return false; } } + // Per-swapchain-image semaphores: avoids reuse while the presentation engine + // still holds a reference. After acquiring image N we swap the acquire semaphore + // into imageAcquiredSemaphores_[N], recycling the old one for the next acquire. + const uint32_t imgCount = static_cast(swapchainImages.size()); + imageAcquiredSemaphores_.resize(imgCount); + renderFinishedSemaphores_.resize(imgCount); + for (uint32_t i = 0; i < imgCount; i++) { + if (vkCreateSemaphore(device, &semInfo, nullptr, &imageAcquiredSemaphores_[i]) != VK_SUCCESS || + vkCreateSemaphore(device, &semInfo, nullptr, &renderFinishedSemaphores_[i]) != VK_SUCCESS) { + LOG_ERROR("Failed to create per-image semaphores for image ", i); + return false; + } + } + // One extra acquire semaphore — we need it for the next vkAcquireNextImageKHR + // before we know which image we'll get. + if (vkCreateSemaphore(device, &semInfo, nullptr, &nextAcquireSemaphore_) != VK_SUCCESS) { + LOG_ERROR("Failed to create next-acquire semaphore"); + return false; + } + // Immediate submit fence (not signaled initially) VkFenceCreateInfo immFenceInfo{}; immFenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; @@ -1416,6 +1437,26 @@ bool VkContext::recreateSwapchain(int width, int height) { swapchainImages = vkbSwap.get_images().value(); swapchainImageViews = vkbSwap.get_image_views().value(); + // Resize per-image semaphore arrays if the swapchain image count changed + { + const uint32_t newCount = static_cast(swapchainImages.size()); + const uint32_t oldCount = static_cast(imageAcquiredSemaphores_.size()); + VkSemaphoreCreateInfo semInfo{}; + semInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + // Destroy excess semaphores if shrinking + for (uint32_t i = newCount; i < oldCount; i++) { + if (imageAcquiredSemaphores_[i]) vkDestroySemaphore(device, imageAcquiredSemaphores_[i], nullptr); + if (renderFinishedSemaphores_[i]) vkDestroySemaphore(device, renderFinishedSemaphores_[i], nullptr); + } + imageAcquiredSemaphores_.resize(newCount); + renderFinishedSemaphores_.resize(newCount); + // Create new semaphores if growing + for (uint32_t i = oldCount; i < newCount; i++) { + vkCreateSemaphore(device, &semInfo, nullptr, &imageAcquiredSemaphores_[i]); + vkCreateSemaphore(device, &semInfo, nullptr, &renderFinishedSemaphores_[i]); + } + } + // Recreate depth buffer + MSAA color image + depth resolve image destroyMsaaColorImage(); destroyDepthResolveImage(); @@ -1693,9 +1734,11 @@ VkCommandBuffer VkContext::beginFrame(uint32_t& imageIndex) { // Any work queued for this frame slot is now guaranteed to be unused by the GPU. runDeferredCleanup(currentFrame); - // Acquire next swapchain image + // Acquire next swapchain image using the free semaphore. + // After acquiring we swap it into the per-image slot so the old per-image + // semaphore (now released by the presentation engine) becomes the free one. VkResult result = vkAcquireNextImageKHR(device, swapchain, UINT64_MAX, - frame.imageAvailableSemaphore, VK_NULL_HANDLE, &imageIndex); + nextAcquireSemaphore_, VK_NULL_HANDLE, &imageIndex); if (result == VK_ERROR_OUT_OF_DATE_KHR) { swapchainDirty = true; @@ -1706,6 +1749,13 @@ VkCommandBuffer VkContext::beginFrame(uint32_t& imageIndex) { return VK_NULL_HANDLE; } + // Swap semaphores: the image's old acquire semaphore is now free (the presentation + // engine released it when this image was re-acquired). The semaphore we just used + // becomes the per-image one for submit/present. + currentAcquireSemaphore_ = nextAcquireSemaphore_; + nextAcquireSemaphore_ = imageAcquiredSemaphores_[imageIndex]; + imageAcquiredSemaphores_[imageIndex] = currentAcquireSemaphore_; + vkResetFences(device, 1, &frame.inFlightFence); vkResetCommandBuffer(frame.commandBuffer, 0); @@ -1731,15 +1781,20 @@ void VkContext::endFrame(VkCommandBuffer cmd, uint32_t imageIndex) { VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + // Use per-image semaphores: acquire semaphore was swapped into the per-image + // slot in beginFrame; renderFinished is also indexed by the acquired image. + VkSemaphore& acquireSem = imageAcquiredSemaphores_[imageIndex]; + VkSemaphore& renderSem = renderFinishedSemaphores_[imageIndex]; + VkSubmitInfo submitInfo{}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.waitSemaphoreCount = 1; - submitInfo.pWaitSemaphores = &frame.imageAvailableSemaphore; + submitInfo.pWaitSemaphores = &acquireSem; submitInfo.pWaitDstStageMask = &waitStage; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &cmd; submitInfo.signalSemaphoreCount = 1; - submitInfo.pSignalSemaphores = &frame.renderFinishedSemaphore; + submitInfo.pSignalSemaphores = &renderSem; VkResult submitResult = vkQueueSubmit(graphicsQueue, 1, &submitInfo, frame.inFlightFence); if (submitResult != VK_SUCCESS) { @@ -1752,7 +1807,7 @@ void VkContext::endFrame(VkCommandBuffer cmd, uint32_t imageIndex) { VkPresentInfoKHR presentInfo{}; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; presentInfo.waitSemaphoreCount = 1; - presentInfo.pWaitSemaphores = &frame.renderFinishedSemaphore; + presentInfo.pWaitSemaphores = &renderSem; presentInfo.swapchainCount = 1; presentInfo.pSwapchains = &swapchain; presentInfo.pImageIndices = &imageIndex;