mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-04-17 17:43:52 +00:00
fix(rendering): per-image semaphores and depth-format shadow placeholder
Avoid semaphore reuse while the presentation engine still holds a reference by switching from per-frame-slot to per-swapchain-image semaphores with a rotating free semaphore for acquire. Replace the R8G8B8A8_UNORM dummy white texture in CharacterPreview with a proper D16_UNORM depth texture cleared to 1.0, matching the sampler2DShadow expectation in shaders. AMD RADV enforces strict format/sampler type compatibility.
This commit is contained in:
parent
4f7912cf45
commit
b2cb98e969
4 changed files with 148 additions and 22 deletions
|
|
@ -135,13 +135,72 @@ void CharacterPreview::createFBO() {
|
|||
vkCtx_->endSingleTimeCommands(cmd);
|
||||
}
|
||||
|
||||
// 2. Create 1x1 dummy white texture (shadow map placeholder)
|
||||
// 2. Create 1x1 dummy depth texture (shadow map placeholder, depth=1.0 = no shadow).
|
||||
// Must be a depth format for sampler2DShadow compatibility.
|
||||
{
|
||||
uint8_t white[] = {255, 255, 255, 255};
|
||||
dummyWhiteTex_ = std::make_unique<VkTexture>();
|
||||
dummyWhiteTex_->upload(*vkCtx_, white, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, false);
|
||||
dummyWhiteTex_->createSampler(device, VK_FILTER_NEAREST, VK_FILTER_NEAREST,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE);
|
||||
VkImageCreateInfo imgCI{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
|
||||
imgCI.imageType = VK_IMAGE_TYPE_2D;
|
||||
imgCI.format = VK_FORMAT_D16_UNORM;
|
||||
imgCI.extent = {1, 1, 1};
|
||||
imgCI.mipLevels = 1;
|
||||
imgCI.arrayLayers = 1;
|
||||
imgCI.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
imgCI.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
imgCI.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
|
||||
VmaAllocationCreateInfo allocCI{};
|
||||
allocCI.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
if (vmaCreateImage(vkCtx_->getAllocator(), &imgCI, &allocCI,
|
||||
&dummyShadowImage_, &dummyShadowAlloc_, nullptr) != VK_SUCCESS) {
|
||||
LOG_ERROR("CharacterPreview: failed to create dummy shadow image");
|
||||
return;
|
||||
}
|
||||
VkImageViewCreateInfo viewCI{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
viewCI.image = dummyShadowImage_;
|
||||
viewCI.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
viewCI.format = VK_FORMAT_D16_UNORM;
|
||||
viewCI.subresourceRange = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1};
|
||||
if (vkCreateImageView(device, &viewCI, nullptr, &dummyShadowView_) != VK_SUCCESS) {
|
||||
LOG_ERROR("CharacterPreview: failed to create dummy shadow image view");
|
||||
return;
|
||||
}
|
||||
// Clear to depth 1.0 and transition to shader-read layout
|
||||
vkCtx_->immediateSubmit([&](VkCommandBuffer cmd) {
|
||||
VkImageMemoryBarrier toTransfer{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
toTransfer.image = dummyShadowImage_;
|
||||
toTransfer.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
toTransfer.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
toTransfer.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
toTransfer.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
toTransfer.subresourceRange = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1};
|
||||
toTransfer.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &toTransfer);
|
||||
VkClearDepthStencilValue clearVal{1.0f, 0};
|
||||
VkImageSubresourceRange range{VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1};
|
||||
vkCmdClearDepthStencilImage(cmd, dummyShadowImage_, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearVal, 1, &range);
|
||||
VkImageMemoryBarrier toRead{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
toRead.image = dummyShadowImage_;
|
||||
toRead.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
toRead.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
toRead.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
toRead.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
toRead.subresourceRange = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1};
|
||||
toRead.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
toRead.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &toRead);
|
||||
});
|
||||
// Comparison sampler for sampler2DShadow
|
||||
VkSamplerCreateInfo sampCI{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
|
||||
sampCI.magFilter = VK_FILTER_NEAREST;
|
||||
sampCI.minFilter = VK_FILTER_NEAREST;
|
||||
sampCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
|
||||
sampCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
|
||||
sampCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
|
||||
sampCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
|
||||
sampCI.compareEnable = VK_TRUE;
|
||||
sampCI.compareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
|
||||
dummyShadowSampler_ = vkCtx_->getOrCreateSampler(sampCI);
|
||||
}
|
||||
|
||||
// 3. Create descriptor pool for per-frame sets (2 UBO + 2 sampler)
|
||||
|
|
@ -200,7 +259,10 @@ void CharacterPreview::createFBO() {
|
|||
descBuf.offset = 0;
|
||||
descBuf.range = sizeof(GPUPerFrameData);
|
||||
|
||||
VkDescriptorImageInfo shadowImg = dummyWhiteTex_->descriptorInfo();
|
||||
VkDescriptorImageInfo shadowImg{};
|
||||
shadowImg.sampler = dummyShadowSampler_;
|
||||
shadowImg.imageView = dummyShadowView_;
|
||||
shadowImg.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
|
||||
VkWriteDescriptorSet writes[2]{};
|
||||
writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
|
|
@ -250,7 +312,9 @@ void CharacterPreview::destroyFBO() {
|
|||
previewDescPool_ = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
dummyWhiteTex_.reset();
|
||||
// dummyShadowSampler_ is owned by VkContext sampler cache — do NOT destroy
|
||||
if (dummyShadowView_) { vkDestroyImageView(device, dummyShadowView_, nullptr); dummyShadowView_ = VK_NULL_HANDLE; }
|
||||
if (dummyShadowImage_) { vmaDestroyImage(allocator, dummyShadowImage_, dummyShadowAlloc_); dummyShadowImage_ = VK_NULL_HANDLE; dummyShadowAlloc_ = VK_NULL_HANDLE; }
|
||||
|
||||
if (renderTarget_) {
|
||||
renderTarget_->destroy(device, allocator);
|
||||
|
|
|
|||
|
|
@ -122,11 +122,14 @@ void VkContext::shutdown() {
|
|||
// Destroy sync objects
|
||||
for (auto& frame : frames) {
|
||||
if (frame.inFlightFence) vkDestroyFence(device, frame.inFlightFence, nullptr);
|
||||
if (frame.renderFinishedSemaphore) vkDestroySemaphore(device, frame.renderFinishedSemaphore, nullptr);
|
||||
if (frame.imageAvailableSemaphore) vkDestroySemaphore(device, frame.imageAvailableSemaphore, nullptr);
|
||||
if (frame.commandPool) vkDestroyCommandPool(device, frame.commandPool, nullptr);
|
||||
frame = {};
|
||||
}
|
||||
for (auto sem : imageAcquiredSemaphores_) { if (sem) vkDestroySemaphore(device, sem, nullptr); }
|
||||
imageAcquiredSemaphores_.clear();
|
||||
for (auto sem : renderFinishedSemaphores_) { if (sem) vkDestroySemaphore(device, sem, nullptr); }
|
||||
renderFinishedSemaphores_.clear();
|
||||
if (nextAcquireSemaphore_) { vkDestroySemaphore(device, nextAcquireSemaphore_, nullptr); nextAcquireSemaphore_ = VK_NULL_HANDLE; }
|
||||
|
||||
// Clean up any in-flight async upload batches (device already idle)
|
||||
for (auto& batch : inFlightBatches_) {
|
||||
|
|
@ -684,14 +687,32 @@ bool VkContext::createSyncObjects() {
|
|||
fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; // Start signaled so first frame doesn't block
|
||||
|
||||
for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
|
||||
if (vkCreateSemaphore(device, &semInfo, nullptr, &frames[i].imageAvailableSemaphore) != VK_SUCCESS ||
|
||||
vkCreateSemaphore(device, &semInfo, nullptr, &frames[i].renderFinishedSemaphore) != VK_SUCCESS ||
|
||||
vkCreateFence(device, &fenceInfo, nullptr, &frames[i].inFlightFence) != VK_SUCCESS) {
|
||||
if (vkCreateFence(device, &fenceInfo, nullptr, &frames[i].inFlightFence) != VK_SUCCESS) {
|
||||
LOG_ERROR("Failed to create sync objects for frame ", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Per-swapchain-image semaphores: avoids reuse while the presentation engine
|
||||
// still holds a reference. After acquiring image N we swap the acquire semaphore
|
||||
// into imageAcquiredSemaphores_[N], recycling the old one for the next acquire.
|
||||
const uint32_t imgCount = static_cast<uint32_t>(swapchainImages.size());
|
||||
imageAcquiredSemaphores_.resize(imgCount);
|
||||
renderFinishedSemaphores_.resize(imgCount);
|
||||
for (uint32_t i = 0; i < imgCount; i++) {
|
||||
if (vkCreateSemaphore(device, &semInfo, nullptr, &imageAcquiredSemaphores_[i]) != VK_SUCCESS ||
|
||||
vkCreateSemaphore(device, &semInfo, nullptr, &renderFinishedSemaphores_[i]) != VK_SUCCESS) {
|
||||
LOG_ERROR("Failed to create per-image semaphores for image ", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// One extra acquire semaphore — we need it for the next vkAcquireNextImageKHR
|
||||
// before we know which image we'll get.
|
||||
if (vkCreateSemaphore(device, &semInfo, nullptr, &nextAcquireSemaphore_) != VK_SUCCESS) {
|
||||
LOG_ERROR("Failed to create next-acquire semaphore");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Immediate submit fence (not signaled initially)
|
||||
VkFenceCreateInfo immFenceInfo{};
|
||||
immFenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
|
|
@ -1416,6 +1437,26 @@ bool VkContext::recreateSwapchain(int width, int height) {
|
|||
swapchainImages = vkbSwap.get_images().value();
|
||||
swapchainImageViews = vkbSwap.get_image_views().value();
|
||||
|
||||
// Resize per-image semaphore arrays if the swapchain image count changed
|
||||
{
|
||||
const uint32_t newCount = static_cast<uint32_t>(swapchainImages.size());
|
||||
const uint32_t oldCount = static_cast<uint32_t>(imageAcquiredSemaphores_.size());
|
||||
VkSemaphoreCreateInfo semInfo{};
|
||||
semInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||
// Destroy excess semaphores if shrinking
|
||||
for (uint32_t i = newCount; i < oldCount; i++) {
|
||||
if (imageAcquiredSemaphores_[i]) vkDestroySemaphore(device, imageAcquiredSemaphores_[i], nullptr);
|
||||
if (renderFinishedSemaphores_[i]) vkDestroySemaphore(device, renderFinishedSemaphores_[i], nullptr);
|
||||
}
|
||||
imageAcquiredSemaphores_.resize(newCount);
|
||||
renderFinishedSemaphores_.resize(newCount);
|
||||
// Create new semaphores if growing
|
||||
for (uint32_t i = oldCount; i < newCount; i++) {
|
||||
vkCreateSemaphore(device, &semInfo, nullptr, &imageAcquiredSemaphores_[i]);
|
||||
vkCreateSemaphore(device, &semInfo, nullptr, &renderFinishedSemaphores_[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Recreate depth buffer + MSAA color image + depth resolve image
|
||||
destroyMsaaColorImage();
|
||||
destroyDepthResolveImage();
|
||||
|
|
@ -1693,9 +1734,11 @@ VkCommandBuffer VkContext::beginFrame(uint32_t& imageIndex) {
|
|||
// Any work queued for this frame slot is now guaranteed to be unused by the GPU.
|
||||
runDeferredCleanup(currentFrame);
|
||||
|
||||
// Acquire next swapchain image
|
||||
// Acquire next swapchain image using the free semaphore.
|
||||
// After acquiring we swap it into the per-image slot so the old per-image
|
||||
// semaphore (now released by the presentation engine) becomes the free one.
|
||||
VkResult result = vkAcquireNextImageKHR(device, swapchain, UINT64_MAX,
|
||||
frame.imageAvailableSemaphore, VK_NULL_HANDLE, &imageIndex);
|
||||
nextAcquireSemaphore_, VK_NULL_HANDLE, &imageIndex);
|
||||
|
||||
if (result == VK_ERROR_OUT_OF_DATE_KHR) {
|
||||
swapchainDirty = true;
|
||||
|
|
@ -1706,6 +1749,13 @@ VkCommandBuffer VkContext::beginFrame(uint32_t& imageIndex) {
|
|||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// Swap semaphores: the image's old acquire semaphore is now free (the presentation
|
||||
// engine released it when this image was re-acquired). The semaphore we just used
|
||||
// becomes the per-image one for submit/present.
|
||||
currentAcquireSemaphore_ = nextAcquireSemaphore_;
|
||||
nextAcquireSemaphore_ = imageAcquiredSemaphores_[imageIndex];
|
||||
imageAcquiredSemaphores_[imageIndex] = currentAcquireSemaphore_;
|
||||
|
||||
vkResetFences(device, 1, &frame.inFlightFence);
|
||||
vkResetCommandBuffer(frame.commandBuffer, 0);
|
||||
|
||||
|
|
@ -1731,15 +1781,20 @@ void VkContext::endFrame(VkCommandBuffer cmd, uint32_t imageIndex) {
|
|||
|
||||
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
// Use per-image semaphores: acquire semaphore was swapped into the per-image
|
||||
// slot in beginFrame; renderFinished is also indexed by the acquired image.
|
||||
VkSemaphore& acquireSem = imageAcquiredSemaphores_[imageIndex];
|
||||
VkSemaphore& renderSem = renderFinishedSemaphores_[imageIndex];
|
||||
|
||||
VkSubmitInfo submitInfo{};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.waitSemaphoreCount = 1;
|
||||
submitInfo.pWaitSemaphores = &frame.imageAvailableSemaphore;
|
||||
submitInfo.pWaitSemaphores = &acquireSem;
|
||||
submitInfo.pWaitDstStageMask = &waitStage;
|
||||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &cmd;
|
||||
submitInfo.signalSemaphoreCount = 1;
|
||||
submitInfo.pSignalSemaphores = &frame.renderFinishedSemaphore;
|
||||
submitInfo.pSignalSemaphores = &renderSem;
|
||||
|
||||
VkResult submitResult = vkQueueSubmit(graphicsQueue, 1, &submitInfo, frame.inFlightFence);
|
||||
if (submitResult != VK_SUCCESS) {
|
||||
|
|
@ -1752,7 +1807,7 @@ void VkContext::endFrame(VkCommandBuffer cmd, uint32_t imageIndex) {
|
|||
VkPresentInfoKHR presentInfo{};
|
||||
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||
presentInfo.waitSemaphoreCount = 1;
|
||||
presentInfo.pWaitSemaphores = &frame.renderFinishedSemaphore;
|
||||
presentInfo.pWaitSemaphores = &renderSem;
|
||||
presentInfo.swapchainCount = 1;
|
||||
presentInfo.pSwapchains = &swapchain;
|
||||
presentInfo.pImageIndices = &imageIndex;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue