fix(rendering): defer model buffer destruction and per-frame FXAA descriptors

CharacterRenderer::destroyModelGPU now defers vertex/index buffer
destruction when replacing models mid-stream, preventing use-after-free
on AMD RADV. FXAA descriptor sets are now per-frame to eliminate
write-read races between in-flight command buffers. Water reflection
descriptor update narrowed to current frame only.
This commit is contained in:
Kelsi 2026-04-03 19:17:55 -07:00
parent e19bf76d88
commit 40e72d535e
5 changed files with 71 additions and 43 deletions

View file

@ -208,7 +208,7 @@ private:
void calculateBoneMatrices(CharacterInstance& instance); void calculateBoneMatrices(CharacterInstance& instance);
glm::mat4 getBoneTransform(const pipeline::M2Bone& bone, float time, int sequenceIndex); glm::mat4 getBoneTransform(const pipeline::M2Bone& bone, float time, int sequenceIndex);
glm::mat4 getModelMatrix(const CharacterInstance& instance) const; glm::mat4 getModelMatrix(const CharacterInstance& instance) const;
void destroyModelGPU(M2ModelGPU& gpuModel); void destroyModelGPU(M2ModelGPU& gpuModel, bool defer = false);
void destroyInstanceBones(CharacterInstance& inst, bool defer = false); void destroyInstanceBones(CharacterInstance& inst, bool defer = false);
// Keyframe interpolation helpers // Keyframe interpolation helpers

View file

@ -182,7 +182,9 @@ private:
VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; VkPipelineLayout pipelineLayout = VK_NULL_HANDLE;
VkDescriptorSetLayout descSetLayout = VK_NULL_HANDLE; VkDescriptorSetLayout descSetLayout = VK_NULL_HANDLE;
VkDescriptorPool descPool = VK_NULL_HANDLE; VkDescriptorPool descPool = VK_NULL_HANDLE;
VkDescriptorSet descSet = VK_NULL_HANDLE; // Per-frame descriptor sets to avoid race with in-flight command buffers
static constexpr uint32_t DESC_SET_COUNT = 2; // matches MAX_FRAMES_IN_FLIGHT
VkDescriptorSet descSet[DESC_SET_COUNT] = {};
}; };
FXAAState fxaa_; FXAAState fxaa_;
bool initFXAAResources(); bool initFXAAResources();

View file

@ -472,11 +472,31 @@ void CharacterRenderer::createFallbackTextures(VkDevice device) {
} }
} }
void CharacterRenderer::destroyModelGPU(M2ModelGPU& gpuModel) { void CharacterRenderer::destroyModelGPU(M2ModelGPU& gpuModel, bool defer) {
if (!vkCtx_) return; if (!vkCtx_) return;
VmaAllocator alloc = vkCtx_->getAllocator(); VmaAllocator alloc = vkCtx_->getAllocator();
if (gpuModel.vertexBuffer) { vmaDestroyBuffer(alloc, gpuModel.vertexBuffer, gpuModel.vertexAlloc); gpuModel.vertexBuffer = VK_NULL_HANDLE; }
if (gpuModel.indexBuffer) { vmaDestroyBuffer(alloc, gpuModel.indexBuffer, gpuModel.indexAlloc); gpuModel.indexBuffer = VK_NULL_HANDLE; } // Snapshot raw handles and null the model fields immediately
::VkBuffer vb = gpuModel.vertexBuffer;
VmaAllocation vbAlloc = gpuModel.vertexAlloc;
::VkBuffer ib = gpuModel.indexBuffer;
VmaAllocation ibAlloc = gpuModel.indexAlloc;
gpuModel.vertexBuffer = VK_NULL_HANDLE;
gpuModel.vertexAlloc = VK_NULL_HANDLE;
gpuModel.indexBuffer = VK_NULL_HANDLE;
gpuModel.indexAlloc = VK_NULL_HANDLE;
if (!defer) {
// Safe after vkDeviceWaitIdle (shutdown / clear paths)
if (vb) vmaDestroyBuffer(alloc, vb, vbAlloc);
if (ib) vmaDestroyBuffer(alloc, ib, ibAlloc);
} else if (vb || ib) {
// Streaming path: in-flight command buffers may still reference these
vkCtx_->deferAfterFrameFence([alloc, vb, vbAlloc, ib, ibAlloc]() {
if (vb) vmaDestroyBuffer(alloc, vb, vbAlloc);
if (ib) vmaDestroyBuffer(alloc, ib, ibAlloc);
});
}
} }
void CharacterRenderer::destroyInstanceBones(CharacterInstance& inst, bool defer) { void CharacterRenderer::destroyInstanceBones(CharacterInstance& inst, bool defer) {
@ -1412,7 +1432,7 @@ bool CharacterRenderer::loadModel(const pipeline::M2Model& model, uint32_t id) {
if (models.find(id) != models.end()) { if (models.find(id) != models.end()) {
core::Logger::getInstance().warning("Model ID ", id, " already loaded, replacing"); core::Logger::getInstance().warning("Model ID ", id, " already loaded, replacing");
destroyModelGPU(models[id]); destroyModelGPU(models[id], /*defer=*/true);
} }
M2ModelGPU gpuModel; M2ModelGPU gpuModel;

View file

@ -198,7 +198,9 @@ bool PostProcessPipeline::executePostProcessing(VkCommandBuffer cmd, uint32_t im
// FSR3+FXAA combined: re-point FXAA's descriptor to the FSR3 temporal output // FSR3+FXAA combined: re-point FXAA's descriptor to the FSR3 temporal output
// so renderFXAAPass() applies spatial AA on the temporally-stabilized frame. // so renderFXAAPass() applies spatial AA on the temporally-stabilized frame.
// This must happen outside the render pass (descriptor updates are CPU-side). // This must happen outside the render pass (descriptor updates are CPU-side).
if (fxaa_.enabled && fxaa_.descSet && fxaa_.sceneSampler) { // Use per-frame descriptor set to avoid race with in-flight command buffers.
uint32_t fxaaFrameIdx = vkCtx_->getCurrentFrame();
if (fxaa_.enabled && fxaa_.descSet[fxaaFrameIdx] && fxaa_.sceneSampler) {
VkImageView fsr3OutputView = VK_NULL_HANDLE; VkImageView fsr3OutputView = VK_NULL_HANDLE;
if (fsr2_.useAmdBackend) { if (fsr2_.useAmdBackend) {
if (fsr2_.amdFsr3FramegenRuntimeActive && fsr2_.framegenOutput.image) if (fsr2_.amdFsr3FramegenRuntimeActive && fsr2_.framegenOutput.image)
@ -215,7 +217,7 @@ bool PostProcessPipeline::executePostProcessing(VkCommandBuffer cmd, uint32_t im
imgInfo.sampler = fxaa_.sceneSampler; imgInfo.sampler = fxaa_.sceneSampler;
VkWriteDescriptorSet write{}; VkWriteDescriptorSet write{};
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.dstSet = fxaa_.descSet; write.dstSet = fxaa_.descSet[fxaaFrameIdx];
write.dstBinding = 0; write.dstBinding = 0;
write.descriptorCount = 1; write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -257,23 +259,23 @@ bool PostProcessPipeline::executePostProcessing(VkCommandBuffer cmd, uint32_t im
// of RCAS sharpening. FXAA descriptor is temporarily pointed to the FSR3 // of RCAS sharpening. FXAA descriptor is temporarily pointed to the FSR3
// history buffer (which is already in SHADER_READ_ONLY_OPTIMAL). This gives // history buffer (which is already in SHADER_READ_ONLY_OPTIMAL). This gives
// FSR3 temporal stability + FXAA spatial edge smoothing ("ultra quality native"). // FSR3 temporal stability + FXAA spatial edge smoothing ("ultra quality native").
if (fxaa_.enabled && fxaa_.pipeline && fxaa_.descSet) { if (fxaa_.enabled && fxaa_.pipeline && fxaa_.descSet[fxaaFrameIdx]) {
renderFXAAPass(); renderFXAAPass();
} else { } else {
// Draw RCAS sharpening from accumulated history buffer // Draw RCAS sharpening from accumulated history buffer
renderFSR2Sharpen(); renderFSR2Sharpen();
} }
// Restore FXAA descriptor to its normal scene color source so standalone // Restore this frame's FXAA descriptor to its normal scene color source
// FXAA frames are not affected by the FSR3 history pointer set above. // so standalone FXAA frames are not affected by the FSR3 history pointer.
if (fxaa_.enabled && fxaa_.descSet && fxaa_.sceneSampler && fxaa_.sceneColor.imageView) { if (fxaa_.enabled && fxaa_.descSet[fxaaFrameIdx] && fxaa_.sceneSampler && fxaa_.sceneColor.imageView) {
VkDescriptorImageInfo restoreInfo{}; VkDescriptorImageInfo restoreInfo{};
restoreInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; restoreInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
restoreInfo.imageView = fxaa_.sceneColor.imageView; restoreInfo.imageView = fxaa_.sceneColor.imageView;
restoreInfo.sampler = fxaa_.sceneSampler; restoreInfo.sampler = fxaa_.sceneSampler;
VkWriteDescriptorSet restoreWrite{}; VkWriteDescriptorSet restoreWrite{};
restoreWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; restoreWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
restoreWrite.dstSet = fxaa_.descSet; restoreWrite.dstSet = fxaa_.descSet[fxaaFrameIdx];
restoreWrite.dstBinding = 0; restoreWrite.dstBinding = 0;
restoreWrite.descriptorCount = 1; restoreWrite.descriptorCount = 1;
restoreWrite.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; restoreWrite.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -1754,36 +1756,41 @@ bool PostProcessPipeline::initFXAAResources() {
layoutInfo.pBindings = &binding; layoutInfo.pBindings = &binding;
vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &fxaa_.descSetLayout); vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &fxaa_.descSetLayout);
constexpr uint32_t setCount = FXAAState::DESC_SET_COUNT;
VkDescriptorPoolSize poolSize{}; VkDescriptorPoolSize poolSize{};
poolSize.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; poolSize.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
poolSize.descriptorCount = 1; poolSize.descriptorCount = setCount;
VkDescriptorPoolCreateInfo poolInfo{}; VkDescriptorPoolCreateInfo poolInfo{};
poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
poolInfo.maxSets = 1; poolInfo.maxSets = setCount;
poolInfo.poolSizeCount = 1; poolInfo.poolSizeCount = 1;
poolInfo.pPoolSizes = &poolSize; poolInfo.pPoolSizes = &poolSize;
vkCreateDescriptorPool(device, &poolInfo, nullptr, &fxaa_.descPool); vkCreateDescriptorPool(device, &poolInfo, nullptr, &fxaa_.descPool);
VkDescriptorSetLayout layouts[setCount];
for (uint32_t i = 0; i < setCount; i++) layouts[i] = fxaa_.descSetLayout;
VkDescriptorSetAllocateInfo dsAllocInfo{}; VkDescriptorSetAllocateInfo dsAllocInfo{};
dsAllocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; dsAllocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
dsAllocInfo.descriptorPool = fxaa_.descPool; dsAllocInfo.descriptorPool = fxaa_.descPool;
dsAllocInfo.descriptorSetCount = 1; dsAllocInfo.descriptorSetCount = setCount;
dsAllocInfo.pSetLayouts = &fxaa_.descSetLayout; dsAllocInfo.pSetLayouts = layouts;
vkAllocateDescriptorSets(device, &dsAllocInfo, &fxaa_.descSet); vkAllocateDescriptorSets(device, &dsAllocInfo, fxaa_.descSet);
// Bind the resolved 1x sceneColor // Bind the resolved 1x sceneColor to all per-frame sets
VkDescriptorImageInfo imgInfo{}; VkDescriptorImageInfo imgInfo{};
imgInfo.sampler = fxaa_.sceneSampler; imgInfo.sampler = fxaa_.sceneSampler;
imgInfo.imageView = fxaa_.sceneColor.imageView; imgInfo.imageView = fxaa_.sceneColor.imageView;
imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
VkWriteDescriptorSet write{}; for (uint32_t i = 0; i < setCount; i++) {
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; VkWriteDescriptorSet write{};
write.dstSet = fxaa_.descSet; write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.dstBinding = 0; write.dstSet = fxaa_.descSet[i];
write.descriptorCount = 1; write.dstBinding = 0;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; write.descriptorCount = 1;
write.pImageInfo = &imgInfo; write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr); write.pImageInfo = &imgInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
// Pipeline layout — push constant holds vec4(rcpFrame.xy, sharpness, pad) // Pipeline layout — push constant holds vec4(rcpFrame.xy, sharpness, pad)
VkPushConstantRange pc{}; VkPushConstantRange pc{};
@ -1843,7 +1850,7 @@ void PostProcessPipeline::destroyFXAAResources() {
if (fxaa_.pipeline) { vkDestroyPipeline(device, fxaa_.pipeline, nullptr); fxaa_.pipeline = VK_NULL_HANDLE; } if (fxaa_.pipeline) { vkDestroyPipeline(device, fxaa_.pipeline, nullptr); fxaa_.pipeline = VK_NULL_HANDLE; }
if (fxaa_.pipelineLayout) { vkDestroyPipelineLayout(device, fxaa_.pipelineLayout, nullptr); fxaa_.pipelineLayout = VK_NULL_HANDLE; } if (fxaa_.pipelineLayout) { vkDestroyPipelineLayout(device, fxaa_.pipelineLayout, nullptr); fxaa_.pipelineLayout = VK_NULL_HANDLE; }
if (fxaa_.descPool) { vkDestroyDescriptorPool(device, fxaa_.descPool, nullptr); fxaa_.descPool = VK_NULL_HANDLE; fxaa_.descSet = VK_NULL_HANDLE; } if (fxaa_.descPool) { vkDestroyDescriptorPool(device, fxaa_.descPool, nullptr); fxaa_.descPool = VK_NULL_HANDLE; for (auto& s : fxaa_.descSet) s = VK_NULL_HANDLE; }
if (fxaa_.descSetLayout) { vkDestroyDescriptorSetLayout(device, fxaa_.descSetLayout, nullptr); fxaa_.descSetLayout = VK_NULL_HANDLE; } if (fxaa_.descSetLayout) { vkDestroyDescriptorSetLayout(device, fxaa_.descSetLayout, nullptr); fxaa_.descSetLayout = VK_NULL_HANDLE; }
if (fxaa_.sceneFramebuffer) { vkDestroyFramebuffer(device, fxaa_.sceneFramebuffer, nullptr); fxaa_.sceneFramebuffer = VK_NULL_HANDLE; } if (fxaa_.sceneFramebuffer) { vkDestroyFramebuffer(device, fxaa_.sceneFramebuffer, nullptr); fxaa_.sceneFramebuffer = VK_NULL_HANDLE; }
fxaa_.sceneSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache fxaa_.sceneSampler = VK_NULL_HANDLE; // Owned by VkContext sampler cache
@ -1857,9 +1864,10 @@ void PostProcessPipeline::renderFXAAPass() {
if (!fxaa_.pipeline || currentCmd_ == VK_NULL_HANDLE) return; if (!fxaa_.pipeline || currentCmd_ == VK_NULL_HANDLE) return;
VkExtent2D ext = vkCtx_->getSwapchainExtent(); VkExtent2D ext = vkCtx_->getSwapchainExtent();
uint32_t fi = vkCtx_->getCurrentFrame();
vkCmdBindPipeline(currentCmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, fxaa_.pipeline); vkCmdBindPipeline(currentCmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, fxaa_.pipeline);
vkCmdBindDescriptorSets(currentCmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindDescriptorSets(currentCmd_, VK_PIPELINE_BIND_POINT_GRAPHICS,
fxaa_.pipelineLayout, 0, 1, &fxaa_.descSet, 0, nullptr); fxaa_.pipelineLayout, 0, 1, &fxaa_.descSet[fi], 0, nullptr);
// Pass rcpFrame + sharpness + effect flag (vec4, 16 bytes). // Pass rcpFrame + sharpness + effect flag (vec4, 16 bytes).
// When FSR2/FSR3 is active alongside FXAA, forward FSR2's sharpness so the // When FSR2/FSR3 is active alongside FXAA, forward FSR2's sharpness so the

View file

@ -1920,27 +1920,25 @@ void WaterRenderer::endReflectionPass(VkCommandBuffer cmd) {
vkCmdEndRenderPass(cmd); vkCmdEndRenderPass(cmd);
reflectionColorLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; reflectionColorLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
// Update all per-frame scene descriptor sets with the freshly rendered reflection texture // Update only the current frame's scene descriptor set with the reflection texture.
if (reflectionColorView && reflectionSampler) { // Updating all frames would race with in-flight command buffers that have the
VkDescriptorImageInfo reflInfo{}; // other frame's descriptor set bound.
reflInfo.sampler = reflectionSampler; if (reflectionColorView && reflectionSampler && vkCtx) {
reflInfo.imageView = reflectionColorView; uint32_t fi = vkCtx->getCurrentFrame() % SCENE_HISTORY_FRAMES;
reflInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; if (sceneHistory[fi].sceneSet) {
VkDescriptorImageInfo reflInfo{};
reflInfo.sampler = reflectionSampler;
reflInfo.imageView = reflectionColorView;
reflInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
std::vector<VkWriteDescriptorSet> writes;
for (uint32_t f = 0; f < SCENE_HISTORY_FRAMES; f++) {
if (!sceneHistory[f].sceneSet) continue;
VkWriteDescriptorSet write{}; VkWriteDescriptorSet write{};
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.dstSet = sceneHistory[f].sceneSet; write.dstSet = sceneHistory[fi].sceneSet;
write.dstBinding = 2; write.dstBinding = 2;
write.descriptorCount = 1; write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = &reflInfo; write.pImageInfo = &reflInfo;
writes.push_back(write); vkUpdateDescriptorSets(vkCtx->getDevice(), 1, &write, 0, nullptr);
}
if (!writes.empty()) {
vkUpdateDescriptorSets(vkCtx->getDevice(), static_cast<uint32_t>(writes.size()), writes.data(), 0, nullptr);
} }
} }
} }