diff --git a/assets/shaders/fsr2_accumulate.comp.glsl b/assets/shaders/fsr2_accumulate.comp.glsl new file mode 100644 index 00000000..a998b52c --- /dev/null +++ b/assets/shaders/fsr2_accumulate.comp.glsl @@ -0,0 +1,115 @@ +#version 450 + +layout(local_size_x = 8, local_size_y = 8) in; + +// Inputs (internal resolution) +layout(set = 0, binding = 0) uniform sampler2D sceneColor; +layout(set = 0, binding = 1) uniform sampler2D depthBuffer; +layout(set = 0, binding = 2) uniform sampler2D motionVectors; + +// History (display resolution) +layout(set = 0, binding = 3) uniform sampler2D historyInput; + +// Output (display resolution) +layout(set = 0, binding = 4, rgba16f) uniform writeonly image2D historyOutput; + +layout(push_constant) uniform PushConstants { + vec4 internalSize; // xy = internal resolution, zw = 1/internal + vec4 displaySize; // xy = display resolution, zw = 1/display + vec4 jitterOffset; // xy = current jitter (pixel-space), zw = unused + vec4 params; // x = resetHistory (1=reset), y = sharpness, zw = unused +} pc; + +// RGB <-> YCoCg for neighborhood clamping +vec3 rgbToYCoCg(vec3 rgb) { + float y = 0.25 * rgb.r + 0.5 * rgb.g + 0.25 * rgb.b; + float co = 0.5 * rgb.r - 0.5 * rgb.b; + float cg = -0.25 * rgb.r + 0.5 * rgb.g - 0.25 * rgb.b; + return vec3(y, co, cg); +} + +vec3 yCoCgToRgb(vec3 ycocg) { + float y = ycocg.x; + float co = ycocg.y; + float cg = ycocg.z; + return vec3(y + co - cg, y + cg, y - co - cg); +} + +void main() { + ivec2 outPixel = ivec2(gl_GlobalInvocationID.xy); + ivec2 outSize = ivec2(pc.displaySize.xy); + if (outPixel.x >= outSize.x || outPixel.y >= outSize.y) return; + + // Output UV in display space + vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw; + + // Map display pixel to internal resolution UV (accounting for jitter) + vec2 internalUV = outUV; + + // Sample current frame color at internal resolution + vec3 currentColor = texture(sceneColor, internalUV).rgb; + + // Sample motion vector at internal resolution + vec2 inUV = outUV; // Approximate — display maps to internal via scale + vec2 motion = texture(motionVectors, inUV).rg; + + // Reproject: where was this pixel in the previous frame's history? + vec2 historyUV = outUV - motion; + + // History reset: on teleport / camera cut, just use current frame + if (pc.params.x > 0.5) { + imageStore(historyOutput, outPixel, vec4(currentColor, 1.0)); + return; + } + + // Sample reprojected history + vec3 historyColor = texture(historyInput, historyUV).rgb; + + // Neighborhood clamping in YCoCg space to prevent ghosting + // Sample 3x3 neighborhood from current frame + vec2 texelSize = pc.internalSize.zw; + vec3 samples[9]; + int idx = 0; + for (int dy = -1; dy <= 1; dy++) { + for (int dx = -1; dx <= 1; dx++) { + samples[idx] = rgbToYCoCg(texture(sceneColor, internalUV + vec2(dx, dy) * texelSize).rgb); + idx++; + } + } + + // Compute AABB in YCoCg + vec3 boxMin = samples[0]; + vec3 boxMax = samples[0]; + for (int i = 1; i < 9; i++) { + boxMin = min(boxMin, samples[i]); + boxMax = max(boxMax, samples[i]); + } + + // Slightly expand the box to reduce flickering on edges + vec3 boxCenter = (boxMin + boxMax) * 0.5; + vec3 boxExtent = (boxMax - boxMin) * 0.5; + boxMin = boxCenter - boxExtent * 1.25; + boxMax = boxCenter + boxExtent * 1.25; + + // Clamp history to the neighborhood AABB + vec3 historyYCoCg = rgbToYCoCg(historyColor); + vec3 clampedHistory = clamp(historyYCoCg, boxMin, boxMax); + historyColor = yCoCgToRgb(clampedHistory); + + // Check if history UV is valid (within [0,1]) + float historyValid = (historyUV.x >= 0.0 && historyUV.x <= 1.0 && + historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0; + + // Blend factor: use more current frame for disoccluded regions + // Luminance difference between clamped history and original → confidence + float clampDist = length(historyYCoCg - clampedHistory); + float blendFactor = mix(0.05, 0.3, clamp(clampDist * 4.0, 0.0, 1.0)); + + // If history is off-screen, use current frame entirely + blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid); + + // Final blend + vec3 result = mix(historyColor, currentColor, blendFactor); + + imageStore(historyOutput, outPixel, vec4(result, 1.0)); +} diff --git a/assets/shaders/fsr2_accumulate.comp.spv b/assets/shaders/fsr2_accumulate.comp.spv new file mode 100644 index 00000000..4d31fba7 Binary files /dev/null and b/assets/shaders/fsr2_accumulate.comp.spv differ diff --git a/assets/shaders/fsr2_motion.comp.glsl b/assets/shaders/fsr2_motion.comp.glsl new file mode 100644 index 00000000..f4f68c2c --- /dev/null +++ b/assets/shaders/fsr2_motion.comp.glsl @@ -0,0 +1,44 @@ +#version 450 + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(set = 0, binding = 0) uniform sampler2D depthBuffer; +layout(set = 0, binding = 1, rg16f) uniform writeonly image2D motionVectors; + +layout(push_constant) uniform PushConstants { + mat4 invViewProj; // Inverse of current jittered VP + mat4 prevViewProj; // Previous frame unjittered VP + vec4 resolution; // xy = internal size, zw = 1/internal size + vec4 jitterOffset; // xy = current jitter (NDC), zw = previous jitter +} pc; + +void main() { + ivec2 pixelCoord = ivec2(gl_GlobalInvocationID.xy); + ivec2 imgSize = ivec2(pc.resolution.xy); + if (pixelCoord.x >= imgSize.x || pixelCoord.y >= imgSize.y) return; + + // Sample depth (Vulkan: 0 = near, 1 = far) + float depth = texelFetch(depthBuffer, pixelCoord, 0).r; + + // Pixel center in NDC [-1, 1] + vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw; + vec2 ndc = uv * 2.0 - 1.0; + + // Reconstruct world position from depth + vec4 clipPos = vec4(ndc, depth, 1.0); + vec4 worldPos = pc.invViewProj * clipPos; + worldPos /= worldPos.w; + + // Project into previous frame's clip space (unjittered) + vec4 prevClip = pc.prevViewProj * worldPos; + vec2 prevNdc = prevClip.xy / prevClip.w; + vec2 prevUV = prevNdc * 0.5 + 0.5; + + // Remove jitter from current UV to get unjittered position + vec2 unjitteredUV = uv - pc.jitterOffset.xy * 0.5; + + // Motion = previous position - current unjittered position (in UV space) + vec2 motion = prevUV - unjitteredUV; + + imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0)); +} diff --git a/assets/shaders/fsr2_motion.comp.spv b/assets/shaders/fsr2_motion.comp.spv new file mode 100644 index 00000000..813c4b9d Binary files /dev/null and b/assets/shaders/fsr2_motion.comp.spv differ diff --git a/assets/shaders/fsr2_sharpen.frag.glsl b/assets/shaders/fsr2_sharpen.frag.glsl new file mode 100644 index 00000000..b4dd928b --- /dev/null +++ b/assets/shaders/fsr2_sharpen.frag.glsl @@ -0,0 +1,46 @@ +#version 450 + +layout(location = 0) in vec2 TexCoord; +layout(location = 0) out vec4 FragColor; + +layout(set = 0, binding = 0) uniform sampler2D inputImage; + +layout(push_constant) uniform PushConstants { + vec4 params; // x = 1/width, y = 1/height, z = sharpness (0-2), w = unused +} pc; + +void main() { + vec2 texelSize = pc.params.xy; + float sharpness = pc.params.z; + + // RCAS: Robust Contrast-Adaptive Sharpening + // 5-tap cross pattern + vec3 center = texture(inputImage, TexCoord).rgb; + vec3 north = texture(inputImage, TexCoord + vec2(0.0, -texelSize.y)).rgb; + vec3 south = texture(inputImage, TexCoord + vec2(0.0, texelSize.y)).rgb; + vec3 west = texture(inputImage, TexCoord + vec2(-texelSize.x, 0.0)).rgb; + vec3 east = texture(inputImage, TexCoord + vec2( texelSize.x, 0.0)).rgb; + + // Compute local contrast (min/max of neighborhood) + vec3 minRGB = min(center, min(min(north, south), min(west, east))); + vec3 maxRGB = max(center, max(max(north, south), max(west, east))); + + // Adaptive sharpening weight based on local contrast + // High contrast = less sharpening (prevent ringing) + vec3 range = maxRGB - minRGB; + vec3 rcpRange = 1.0 / (range + 0.001); + + // Sharpening amount: inversely proportional to contrast + float luma = dot(center, vec3(0.299, 0.587, 0.114)); + float lumaRange = max(range.r, max(range.g, range.b)); + float w = clamp(1.0 - lumaRange * 2.0, 0.0, 1.0) * sharpness * 0.25; + + // Apply sharpening via unsharp mask + vec3 avg = (north + south + west + east) * 0.25; + vec3 sharpened = center + (center - avg) * w; + + // Clamp to prevent ringing artifacts + sharpened = clamp(sharpened, minRGB, maxRGB); + + FragColor = vec4(sharpened, 1.0); +} diff --git a/assets/shaders/fsr2_sharpen.frag.spv b/assets/shaders/fsr2_sharpen.frag.spv new file mode 100644 index 00000000..99aba03a Binary files /dev/null and b/assets/shaders/fsr2_sharpen.frag.spv differ diff --git a/include/rendering/camera.hpp b/include/rendering/camera.hpp index 0464007f..99a4879a 100644 --- a/include/rendering/camera.hpp +++ b/include/rendering/camera.hpp @@ -23,9 +23,16 @@ public: const glm::vec3& getPosition() const { return position; } const glm::mat4& getViewMatrix() const { return viewMatrix; } const glm::mat4& getProjectionMatrix() const { return projectionMatrix; } + const glm::mat4& getUnjitteredProjectionMatrix() const { return unjitteredProjectionMatrix; } glm::mat4 getViewProjectionMatrix() const { return projectionMatrix * viewMatrix; } + glm::mat4 getUnjitteredViewProjectionMatrix() const { return unjitteredProjectionMatrix * viewMatrix; } float getAspectRatio() const { return aspectRatio; } + // Sub-pixel jitter for temporal upscaling (FSR 2) + void setJitter(float jx, float jy); + void clearJitter(); + glm::vec2 getJitter() const { return jitterOffset; } + glm::vec3 getForward() const; glm::vec3 getRight() const; glm::vec3 getUp() const; @@ -46,6 +53,8 @@ private: glm::mat4 viewMatrix = glm::mat4(1.0f); glm::mat4 projectionMatrix = glm::mat4(1.0f); + glm::mat4 unjitteredProjectionMatrix = glm::mat4(1.0f); + glm::vec2 jitterOffset = glm::vec2(0.0f); // NDC jitter (applied to projection) }; } // namespace rendering diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index cbb9c7e1..13f77fe2 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -261,13 +261,15 @@ public: float getShadowDistance() const { return shadowDistance_; } void setMsaaSamples(VkSampleCountFlagBits samples); - // FSR 1.0 (FidelityFX Super Resolution) upscaling + // FSR (FidelityFX Super Resolution) upscaling void setFSREnabled(bool enabled); bool isFSREnabled() const { return fsr_.enabled; } void setFSRQuality(float scaleFactor); // 0.50=Perf, 0.59=Balanced, 0.67=Quality, 0.77=UltraQuality void setFSRSharpness(float sharpness); // 0.0 - 2.0 float getFSRScaleFactor() const { return fsr_.scaleFactor; } float getFSRSharpness() const { return fsr_.sharpness; } + void setFSR2Enabled(bool enabled); + bool isFSR2Enabled() const { return fsr2_.enabled; } void setWaterRefractionEnabled(bool enabled); bool isWaterRefractionEnabled() const; @@ -363,6 +365,65 @@ private: void destroyFSRResources(); void renderFSRUpscale(); + // FSR 2.2 temporal upscaling state + struct FSR2State { + bool enabled = false; + bool needsRecreate = false; + float scaleFactor = 0.77f; + float sharpness = 0.5f; + uint32_t internalWidth = 0; + uint32_t internalHeight = 0; + + // Off-screen scene targets (internal resolution, no MSAA — FSR2 replaces AA) + AllocatedImage sceneColor{}; + AllocatedImage sceneDepth{}; + VkFramebuffer sceneFramebuffer = VK_NULL_HANDLE; + + // Samplers + VkSampler linearSampler = VK_NULL_HANDLE; // For color + VkSampler nearestSampler = VK_NULL_HANDLE; // For depth / motion vectors + + // Motion vector buffer (internal resolution) + AllocatedImage motionVectors{}; + + // History buffers (display resolution, ping-pong) + AllocatedImage history[2]{}; + uint32_t currentHistory = 0; // Output index (0 or 1) + + // Compute pipelines + VkPipeline motionVecPipeline = VK_NULL_HANDLE; + VkPipelineLayout motionVecPipelineLayout = VK_NULL_HANDLE; + VkDescriptorSetLayout motionVecDescSetLayout = VK_NULL_HANDLE; + VkDescriptorPool motionVecDescPool = VK_NULL_HANDLE; + VkDescriptorSet motionVecDescSet = VK_NULL_HANDLE; + + VkPipeline accumulatePipeline = VK_NULL_HANDLE; + VkPipelineLayout accumulatePipelineLayout = VK_NULL_HANDLE; + VkDescriptorSetLayout accumulateDescSetLayout = VK_NULL_HANDLE; + VkDescriptorPool accumulateDescPool = VK_NULL_HANDLE; + VkDescriptorSet accumulateDescSets[2] = {}; // Per ping-pong + + // RCAS sharpening pass (display resolution) + VkPipeline sharpenPipeline = VK_NULL_HANDLE; + VkPipelineLayout sharpenPipelineLayout = VK_NULL_HANDLE; + VkDescriptorSetLayout sharpenDescSetLayout = VK_NULL_HANDLE; + VkDescriptorPool sharpenDescPool = VK_NULL_HANDLE; + VkDescriptorSet sharpenDescSet = VK_NULL_HANDLE; + + // Previous frame state for motion vector reprojection + glm::mat4 prevViewProjection = glm::mat4(1.0f); + glm::vec2 prevJitter = glm::vec2(0.0f); + uint32_t frameIndex = 0; + bool needsHistoryReset = true; + }; + FSR2State fsr2_; + bool initFSR2Resources(); + void destroyFSR2Resources(); + void dispatchMotionVectors(); + void dispatchTemporalAccumulate(); + void renderFSR2Sharpen(); + static float halton(uint32_t index, uint32_t base); + // Footstep event tracking (animation-driven) uint32_t footstepLastAnimationId = 0; float footstepLastNormTime = 0.0f; diff --git a/src/rendering/camera.cpp b/src/rendering/camera.cpp index f8b45f3c..bd1ebe0a 100644 --- a/src/rendering/camera.cpp +++ b/src/rendering/camera.cpp @@ -20,6 +20,13 @@ void Camera::updateProjectionMatrix() { projectionMatrix = glm::perspective(glm::radians(fov), aspectRatio, nearPlane, farPlane); // Vulkan clip-space has Y pointing down; flip the projection's Y axis. projectionMatrix[1][1] *= -1.0f; + unjitteredProjectionMatrix = projectionMatrix; + + // Re-apply jitter if active + if (jitterOffset.x != 0.0f || jitterOffset.y != 0.0f) { + projectionMatrix[2][0] += jitterOffset.x; + projectionMatrix[2][1] += jitterOffset.y; + } } glm::vec3 Camera::getForward() const { @@ -40,6 +47,21 @@ glm::vec3 Camera::getUp() const { return glm::normalize(glm::cross(getRight(), getForward())); } +void Camera::setJitter(float jx, float jy) { + // Remove old jitter, apply new + projectionMatrix[2][0] -= jitterOffset.x; + projectionMatrix[2][1] -= jitterOffset.y; + jitterOffset = glm::vec2(jx, jy); + projectionMatrix[2][0] += jitterOffset.x; + projectionMatrix[2][1] += jitterOffset.y; +} + +void Camera::clearJitter() { + projectionMatrix[2][0] -= jitterOffset.x; + projectionMatrix[2][1] -= jitterOffset.y; + jitterOffset = glm::vec2(0.0f); +} + Ray Camera::screenToWorldRay(float screenX, float screenY, float screenW, float screenH) const { float ndcX = (2.0f * screenX / screenW) - 1.0f; // Vulkan Y-flip is baked into projectionMatrix, so NDC Y maps directly: diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 4e2b66f5..81686219 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -837,6 +837,7 @@ void Renderer::shutdown() { } destroyFSRResources(); + destroyFSR2Resources(); destroyPerFrameResources(); zoneManager.reset(); @@ -937,6 +938,7 @@ void Renderer::applyMsaaChange() { if (selCirclePipeline) { vkDestroyPipeline(device, selCirclePipeline, nullptr); selCirclePipeline = VK_NULL_HANDLE; } if (overlayPipeline) { vkDestroyPipeline(device, overlayPipeline, nullptr); overlayPipeline = VK_NULL_HANDLE; } if (fsr_.sceneFramebuffer) destroyFSRResources(); // Will be lazily recreated in beginFrame() + if (fsr2_.sceneFramebuffer) destroyFSR2Resources(); // Reinitialize ImGui Vulkan backend with new MSAA sample count ImGui_ImplVulkan_Shutdown(); @@ -972,13 +974,26 @@ void Renderer::beginFrame() { fsr_.needsRecreate = false; if (!fsr_.enabled) LOG_INFO("FSR: disabled"); } - if (fsr_.enabled && !fsr_.sceneFramebuffer) { + if (fsr_.enabled && !fsr2_.enabled && !fsr_.sceneFramebuffer) { if (!initFSRResources()) { LOG_ERROR("FSR: initialization failed, disabling"); fsr_.enabled = false; } } + // FSR 2.2 resource management + if (fsr2_.needsRecreate && fsr2_.sceneFramebuffer) { + destroyFSR2Resources(); + fsr2_.needsRecreate = false; + if (!fsr2_.enabled) LOG_INFO("FSR2: disabled"); + } + if (fsr2_.enabled && !fsr2_.sceneFramebuffer) { + if (!initFSR2Resources()) { + LOG_ERROR("FSR2: initialization failed, disabling"); + fsr2_.enabled = false; + } + } + // Handle swapchain recreation if needed if (vkCtx->isSwapchainDirty()) { vkCtx->recreateSwapchain(window->getWidth(), window->getHeight()); @@ -987,10 +1002,14 @@ void Renderer::beginFrame() { waterRenderer->recreatePipelines(); } // Recreate FSR resources for new swapchain dimensions - if (fsr_.enabled) { + if (fsr_.enabled && !fsr2_.enabled) { destroyFSRResources(); initFSRResources(); } + if (fsr2_.enabled) { + destroyFSR2Resources(); + initFSR2Resources(); + } } // Acquire swapchain image and begin command buffer @@ -1000,6 +1019,14 @@ void Renderer::beginFrame() { return; } + // Apply FSR2 jitter to camera projection before UBO upload + if (fsr2_.enabled && fsr2_.sceneFramebuffer && camera) { + // Halton(2,3) sequence for sub-pixel jitter, scaled to internal resolution + float jx = (halton(fsr2_.frameIndex + 1, 2) - 0.5f) * 2.0f / static_cast(fsr2_.internalWidth); + float jy = (halton(fsr2_.frameIndex + 1, 3) - 0.5f) * 2.0f / static_cast(fsr2_.internalHeight); + camera->setJitter(jx, jy); + } + // Update per-frame UBO with current camera/lighting state updatePerFrameUBO(); @@ -1044,7 +1071,10 @@ void Renderer::beginFrame() { rpInfo.renderPass = vkCtx->getImGuiRenderPass(); VkExtent2D renderExtent; - if (fsr_.enabled && fsr_.sceneFramebuffer) { + if (fsr2_.enabled && fsr2_.sceneFramebuffer) { + rpInfo.framebuffer = fsr2_.sceneFramebuffer; + renderExtent = { fsr2_.internalWidth, fsr2_.internalHeight }; + } else if (fsr_.enabled && fsr_.sceneFramebuffer) { rpInfo.framebuffer = fsr_.sceneFramebuffer; renderExtent = { fsr_.internalWidth, fsr_.internalHeight }; } else { @@ -1097,7 +1127,60 @@ void Renderer::beginFrame() { void Renderer::endFrame() { if (!vkCtx || currentCmd == VK_NULL_HANDLE) return; - if (fsr_.enabled && fsr_.sceneFramebuffer) { + if (fsr2_.enabled && fsr2_.sceneFramebuffer) { + // End the off-screen scene render pass + vkCmdEndRenderPass(currentCmd); + + // Compute passes: motion vectors → temporal accumulation + dispatchMotionVectors(); + dispatchTemporalAccumulate(); + + // Transition history output: GENERAL → SHADER_READ_ONLY for sharpen pass + transitionImageLayout(currentCmd, fsr2_.history[fsr2_.currentHistory].image, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + + // Begin swapchain render pass at full resolution for sharpening + ImGui + VkRenderPassBeginInfo rpInfo{}; + rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rpInfo.renderPass = vkCtx->getImGuiRenderPass(); + rpInfo.framebuffer = vkCtx->getSwapchainFramebuffers()[currentImageIndex]; + rpInfo.renderArea.offset = {0, 0}; + rpInfo.renderArea.extent = vkCtx->getSwapchainExtent(); + + bool msaaOn = (vkCtx->getMsaaSamples() > VK_SAMPLE_COUNT_1_BIT); + VkClearValue clearValues[4]{}; + clearValues[0].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; + clearValues[1].depthStencil = {1.0f, 0}; + clearValues[2].color = {{0.0f, 0.0f, 0.0f, 1.0f}}; + clearValues[3].depthStencil = {1.0f, 0}; + rpInfo.clearValueCount = msaaOn ? (vkCtx->getDepthResolveImageView() ? 4u : 3u) : 2u; + rpInfo.pClearValues = clearValues; + + vkCmdBeginRenderPass(currentCmd, &rpInfo, VK_SUBPASS_CONTENTS_INLINE); + + VkExtent2D ext = vkCtx->getSwapchainExtent(); + VkViewport vp{}; + vp.width = static_cast(ext.width); + vp.height = static_cast(ext.height); + vp.maxDepth = 1.0f; + vkCmdSetViewport(currentCmd, 0, 1, &vp); + VkRect2D sc{}; + sc.extent = ext; + vkCmdSetScissor(currentCmd, 0, 1, &sc); + + // Draw RCAS sharpening from accumulated history buffer + renderFSR2Sharpen(); + + // Store current VP for next frame's motion vectors, advance frame + fsr2_.prevViewProjection = camera->getUnjitteredViewProjectionMatrix(); + fsr2_.prevJitter = camera->getJitter(); + camera->clearJitter(); + fsr2_.currentHistory = 1 - fsr2_.currentHistory; + fsr2_.frameIndex++; + + } else if (fsr_.enabled && fsr_.sceneFramebuffer) { // End the off-screen scene render pass vkCmdEndRenderPass(currentCmd); @@ -1149,7 +1232,7 @@ void Renderer::endFrame() { } // ImGui rendering — must respect subpass contents mode - if (!fsr_.enabled && parallelRecordingEnabled_) { + if (!fsr_.enabled && !fsr2_.enabled && parallelRecordingEnabled_) { // Scene pass was begun with VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, // so ImGui must be recorded into a secondary command buffer. VkCommandBuffer imguiCmd = beginSecondary(SEC_IMGUI); @@ -3572,19 +3655,576 @@ void Renderer::setFSREnabled(bool enabled) { void Renderer::setFSRQuality(float scaleFactor) { scaleFactor = glm::clamp(scaleFactor, 0.5f, 1.0f); - if (fsr_.scaleFactor == scaleFactor) return; fsr_.scaleFactor = scaleFactor; + fsr2_.scaleFactor = scaleFactor; // Don't destroy/recreate mid-frame — mark for lazy recreation in next beginFrame() if (fsr_.enabled && fsr_.sceneFramebuffer) { fsr_.needsRecreate = true; } + if (fsr2_.enabled && fsr2_.sceneFramebuffer) { + fsr2_.needsRecreate = true; + fsr2_.needsHistoryReset = true; + } } void Renderer::setFSRSharpness(float sharpness) { fsr_.sharpness = glm::clamp(sharpness, 0.0f, 2.0f); + fsr2_.sharpness = glm::clamp(sharpness, 0.0f, 2.0f); } -// ========================= End FSR ========================= +// ========================= End FSR 1.0 ========================= + +// ========================= FSR 2.2 Temporal Upscaling ========================= + +float Renderer::halton(uint32_t index, uint32_t base) { + float f = 1.0f; + float r = 0.0f; + uint32_t current = index; + while (current > 0) { + f /= static_cast(base); + r += f * static_cast(current % base); + current /= base; + } + return r; +} + +bool Renderer::initFSR2Resources() { + if (!vkCtx) return false; + + VkDevice device = vkCtx->getDevice(); + VmaAllocator alloc = vkCtx->getAllocator(); + VkExtent2D swapExtent = vkCtx->getSwapchainExtent(); + + fsr2_.internalWidth = static_cast(swapExtent.width * fsr2_.scaleFactor); + fsr2_.internalHeight = static_cast(swapExtent.height * fsr2_.scaleFactor); + fsr2_.internalWidth = (fsr2_.internalWidth + 1) & ~1u; + fsr2_.internalHeight = (fsr2_.internalHeight + 1) & ~1u; + + LOG_INFO("FSR2: initializing at ", fsr2_.internalWidth, "x", fsr2_.internalHeight, + " -> ", swapExtent.width, "x", swapExtent.height, + " (scale=", fsr2_.scaleFactor, ")"); + + VkFormat colorFmt = vkCtx->getSwapchainFormat(); + VkFormat depthFmt = vkCtx->getDepthFormat(); + + // Scene color (internal resolution, 1x — FSR2 replaces MSAA) + fsr2_.sceneColor = createImage(device, alloc, fsr2_.internalWidth, fsr2_.internalHeight, + colorFmt, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!fsr2_.sceneColor.image) { LOG_ERROR("FSR2: failed to create scene color"); return false; } + + // Scene depth (internal resolution, 1x, sampled for motion vectors) + fsr2_.sceneDepth = createImage(device, alloc, fsr2_.internalWidth, fsr2_.internalHeight, + depthFmt, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!fsr2_.sceneDepth.image) { LOG_ERROR("FSR2: failed to create scene depth"); destroyFSR2Resources(); return false; } + + // Motion vector buffer (internal resolution) + fsr2_.motionVectors = createImage(device, alloc, fsr2_.internalWidth, fsr2_.internalHeight, + VK_FORMAT_R16G16_SFLOAT, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!fsr2_.motionVectors.image) { LOG_ERROR("FSR2: failed to create motion vectors"); destroyFSR2Resources(); return false; } + + // History buffers (display resolution, ping-pong) + for (int i = 0; i < 2; i++) { + fsr2_.history[i] = createImage(device, alloc, swapExtent.width, swapExtent.height, + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!fsr2_.history[i].image) { LOG_ERROR("FSR2: failed to create history buffer ", i); destroyFSR2Resources(); return false; } + } + + // Scene framebuffer (non-MSAA: [color, depth]) + // Must use the same render pass as the swapchain — which must be non-MSAA when FSR2 is active + VkImageView fbAttachments[2] = { fsr2_.sceneColor.imageView, fsr2_.sceneDepth.imageView }; + VkFramebufferCreateInfo fbInfo{}; + fbInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fbInfo.renderPass = vkCtx->getImGuiRenderPass(); + fbInfo.attachmentCount = 2; + fbInfo.pAttachments = fbAttachments; + fbInfo.width = fsr2_.internalWidth; + fbInfo.height = fsr2_.internalHeight; + fbInfo.layers = 1; + if (vkCreateFramebuffer(device, &fbInfo, nullptr, &fsr2_.sceneFramebuffer) != VK_SUCCESS) { + LOG_ERROR("FSR2: failed to create scene framebuffer"); + destroyFSR2Resources(); + return false; + } + + // Samplers + VkSamplerCreateInfo samplerInfo{}; + samplerInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + samplerInfo.minFilter = VK_FILTER_LINEAR; + samplerInfo.magFilter = VK_FILTER_LINEAR; + samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + vkCreateSampler(device, &samplerInfo, nullptr, &fsr2_.linearSampler); + + samplerInfo.minFilter = VK_FILTER_NEAREST; + samplerInfo.magFilter = VK_FILTER_NEAREST; + vkCreateSampler(device, &samplerInfo, nullptr, &fsr2_.nearestSampler); + + // --- Motion Vector Compute Pipeline --- + { + // Descriptor set layout: binding 0 = depth (sampler), binding 1 = motion vectors (storage image) + VkDescriptorSetLayoutBinding bindings[2] = {}; + bindings[0].binding = 0; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[0].descriptorCount = 1; + bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[1].binding = 1; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + bindings[1].descriptorCount = 1; + bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo layoutInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + layoutInfo.bindingCount = 2; + layoutInfo.pBindings = bindings; + vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &fsr2_.motionVecDescSetLayout); + + VkPushConstantRange pc{}; + pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + pc.offset = 0; + pc.size = 2 * sizeof(glm::mat4) + 2 * sizeof(glm::vec4); // 160 bytes + + VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &fsr2_.motionVecDescSetLayout; + plCI.pushConstantRangeCount = 1; + plCI.pPushConstantRanges = &pc; + vkCreatePipelineLayout(device, &plCI, nullptr, &fsr2_.motionVecPipelineLayout); + + VkShaderModule compMod; + if (!compMod.loadFromFile(device, "assets/shaders/fsr2_motion.comp.spv")) { + LOG_ERROR("FSR2: failed to load motion vector compute shader"); + destroyFSR2Resources(); + return false; + } + + VkComputePipelineCreateInfo cpCI{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + cpCI.stage = compMod.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT); + cpCI.layout = fsr2_.motionVecPipelineLayout; + if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &cpCI, nullptr, &fsr2_.motionVecPipeline) != VK_SUCCESS) { + LOG_ERROR("FSR2: failed to create motion vector pipeline"); + compMod.destroy(); + destroyFSR2Resources(); + return false; + } + compMod.destroy(); + + // Descriptor pool + set + VkDescriptorPoolSize poolSizes[2] = {}; + poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1}; + poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1}; + VkDescriptorPoolCreateInfo poolInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + poolInfo.maxSets = 1; + poolInfo.poolSizeCount = 2; + poolInfo.pPoolSizes = poolSizes; + vkCreateDescriptorPool(device, &poolInfo, nullptr, &fsr2_.motionVecDescPool); + + VkDescriptorSetAllocateInfo dsAI{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + dsAI.descriptorPool = fsr2_.motionVecDescPool; + dsAI.descriptorSetCount = 1; + dsAI.pSetLayouts = &fsr2_.motionVecDescSetLayout; + vkAllocateDescriptorSets(device, &dsAI, &fsr2_.motionVecDescSet); + + // Write descriptors + VkDescriptorImageInfo depthImgInfo{}; + depthImgInfo.sampler = fsr2_.nearestSampler; + depthImgInfo.imageView = fsr2_.sceneDepth.imageView; + depthImgInfo.imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + + VkDescriptorImageInfo mvImgInfo{}; + mvImgInfo.imageView = fsr2_.motionVectors.imageView; + mvImgInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + VkWriteDescriptorSet writes[2] = {}; + writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[0].dstSet = fsr2_.motionVecDescSet; + writes[0].dstBinding = 0; + writes[0].descriptorCount = 1; + writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + writes[0].pImageInfo = &depthImgInfo; + + writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[1].dstSet = fsr2_.motionVecDescSet; + writes[1].dstBinding = 1; + writes[1].descriptorCount = 1; + writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + writes[1].pImageInfo = &mvImgInfo; + + vkUpdateDescriptorSets(device, 2, writes, 0, nullptr); + } + + // --- Temporal Accumulation Compute Pipeline --- + { + // bindings: 0=sceneColor, 1=depth, 2=motionVectors, 3=historyInput, 4=historyOutput + VkDescriptorSetLayoutBinding bindings[5] = {}; + for (int i = 0; i < 4; i++) { + bindings[i].binding = i; + bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[i].descriptorCount = 1; + bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + } + bindings[4].binding = 4; + bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + bindings[4].descriptorCount = 1; + bindings[4].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo layoutInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + layoutInfo.bindingCount = 5; + layoutInfo.pBindings = bindings; + vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &fsr2_.accumulateDescSetLayout); + + VkPushConstantRange pc{}; + pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + pc.offset = 0; + pc.size = 4 * sizeof(glm::vec4); // 64 bytes + + VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &fsr2_.accumulateDescSetLayout; + plCI.pushConstantRangeCount = 1; + plCI.pPushConstantRanges = &pc; + vkCreatePipelineLayout(device, &plCI, nullptr, &fsr2_.accumulatePipelineLayout); + + VkShaderModule compMod; + if (!compMod.loadFromFile(device, "assets/shaders/fsr2_accumulate.comp.spv")) { + LOG_ERROR("FSR2: failed to load accumulation compute shader"); + destroyFSR2Resources(); + return false; + } + + VkComputePipelineCreateInfo cpCI{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + cpCI.stage = compMod.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT); + cpCI.layout = fsr2_.accumulatePipelineLayout; + if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &cpCI, nullptr, &fsr2_.accumulatePipeline) != VK_SUCCESS) { + LOG_ERROR("FSR2: failed to create accumulation pipeline"); + compMod.destroy(); + destroyFSR2Resources(); + return false; + } + compMod.destroy(); + + // Descriptor pool: 2 sets (ping-pong), each with 4 samplers + 1 storage image + VkDescriptorPoolSize poolSizes[2] = {}; + poolSizes[0] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 8}; + poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2}; + VkDescriptorPoolCreateInfo poolInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + poolInfo.maxSets = 2; + poolInfo.poolSizeCount = 2; + poolInfo.pPoolSizes = poolSizes; + vkCreateDescriptorPool(device, &poolInfo, nullptr, &fsr2_.accumulateDescPool); + + // Allocate 2 descriptor sets (one per ping-pong direction) + VkDescriptorSetLayout layouts[2] = { fsr2_.accumulateDescSetLayout, fsr2_.accumulateDescSetLayout }; + VkDescriptorSetAllocateInfo dsAI{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + dsAI.descriptorPool = fsr2_.accumulateDescPool; + dsAI.descriptorSetCount = 2; + dsAI.pSetLayouts = layouts; + vkAllocateDescriptorSets(device, &dsAI, fsr2_.accumulateDescSets); + + // Write descriptors for both ping-pong sets + for (int pp = 0; pp < 2; pp++) { + int inputHistory = 1 - pp; // Read from the other + int outputHistory = pp; // Write to this one + + VkDescriptorImageInfo colorInfo{fsr2_.linearSampler, fsr2_.sceneColor.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo depthInfo{fsr2_.nearestSampler, fsr2_.sceneDepth.imageView, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo mvInfo{fsr2_.nearestSampler, fsr2_.motionVectors.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo histInInfo{fsr2_.linearSampler, fsr2_.history[inputHistory].imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo histOutInfo{VK_NULL_HANDLE, fsr2_.history[outputHistory].imageView, VK_IMAGE_LAYOUT_GENERAL}; + + VkWriteDescriptorSet writes[5] = {}; + for (int w = 0; w < 5; w++) { + writes[w].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[w].dstSet = fsr2_.accumulateDescSets[pp]; + writes[w].dstBinding = w; + writes[w].descriptorCount = 1; + } + writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[0].pImageInfo = &colorInfo; + writes[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[1].pImageInfo = &depthInfo; + writes[2].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[2].pImageInfo = &mvInfo; + writes[3].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; writes[3].pImageInfo = &histInInfo; + writes[4].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; writes[4].pImageInfo = &histOutInfo; + + vkUpdateDescriptorSets(device, 5, writes, 0, nullptr); + } + } + + // --- RCAS Sharpening Pipeline (fragment shader, fullscreen pass) --- + { + VkDescriptorSetLayoutBinding binding{}; + binding.binding = 0; + binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkDescriptorSetLayoutCreateInfo layoutInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + layoutInfo.bindingCount = 1; + layoutInfo.pBindings = &binding; + vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &fsr2_.sharpenDescSetLayout); + + VkPushConstantRange pc{}; + pc.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + pc.offset = 0; + pc.size = sizeof(glm::vec4); + + VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &fsr2_.sharpenDescSetLayout; + plCI.pushConstantRangeCount = 1; + plCI.pPushConstantRanges = &pc; + vkCreatePipelineLayout(device, &plCI, nullptr, &fsr2_.sharpenPipelineLayout); + + VkShaderModule vertMod, fragMod; + if (!vertMod.loadFromFile(device, "assets/shaders/postprocess.vert.spv") || + !fragMod.loadFromFile(device, "assets/shaders/fsr2_sharpen.frag.spv")) { + LOG_ERROR("FSR2: failed to load sharpen shaders"); + destroyFSR2Resources(); + return false; + } + + fsr2_.sharpenPipeline = PipelineBuilder() + .setShaders(vertMod.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), + fragMod.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) + .setVertexInput({}, {}) + .setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST) + .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE) + .setNoDepthTest() + .setColorBlendAttachment(PipelineBuilder::blendDisabled()) + .setMultisample(VK_SAMPLE_COUNT_1_BIT) + .setLayout(fsr2_.sharpenPipelineLayout) + .setRenderPass(vkCtx->getImGuiRenderPass()) + .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}) + .build(device); + + vertMod.destroy(); + fragMod.destroy(); + + if (!fsr2_.sharpenPipeline) { + LOG_ERROR("FSR2: failed to create sharpen pipeline"); + destroyFSR2Resources(); + return false; + } + + // Descriptor pool + set for sharpen pass (reads from history output) + VkDescriptorPoolSize poolSize{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2}; + VkDescriptorPoolCreateInfo poolInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + poolInfo.maxSets = 1; + poolInfo.poolSizeCount = 1; + poolInfo.pPoolSizes = &poolSize; + vkCreateDescriptorPool(device, &poolInfo, nullptr, &fsr2_.sharpenDescPool); + + VkDescriptorSetAllocateInfo dsAI{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + dsAI.descriptorPool = fsr2_.sharpenDescPool; + dsAI.descriptorSetCount = 1; + dsAI.pSetLayouts = &fsr2_.sharpenDescSetLayout; + vkAllocateDescriptorSets(device, &dsAI, &fsr2_.sharpenDescSet); + // Descriptor updated dynamically each frame to point at the correct history buffer + } + + fsr2_.needsHistoryReset = true; + fsr2_.frameIndex = 0; + LOG_INFO("FSR2: initialized successfully"); + return true; +} + +void Renderer::destroyFSR2Resources() { + if (!vkCtx) return; + VkDevice device = vkCtx->getDevice(); + VmaAllocator alloc = vkCtx->getAllocator(); + + vkDeviceWaitIdle(device); + + if (fsr2_.sharpenPipeline) { vkDestroyPipeline(device, fsr2_.sharpenPipeline, nullptr); fsr2_.sharpenPipeline = VK_NULL_HANDLE; } + if (fsr2_.sharpenPipelineLayout) { vkDestroyPipelineLayout(device, fsr2_.sharpenPipelineLayout, nullptr); fsr2_.sharpenPipelineLayout = VK_NULL_HANDLE; } + if (fsr2_.sharpenDescPool) { vkDestroyDescriptorPool(device, fsr2_.sharpenDescPool, nullptr); fsr2_.sharpenDescPool = VK_NULL_HANDLE; fsr2_.sharpenDescSet = VK_NULL_HANDLE; } + if (fsr2_.sharpenDescSetLayout) { vkDestroyDescriptorSetLayout(device, fsr2_.sharpenDescSetLayout, nullptr); fsr2_.sharpenDescSetLayout = VK_NULL_HANDLE; } + + if (fsr2_.accumulatePipeline) { vkDestroyPipeline(device, fsr2_.accumulatePipeline, nullptr); fsr2_.accumulatePipeline = VK_NULL_HANDLE; } + if (fsr2_.accumulatePipelineLayout) { vkDestroyPipelineLayout(device, fsr2_.accumulatePipelineLayout, nullptr); fsr2_.accumulatePipelineLayout = VK_NULL_HANDLE; } + if (fsr2_.accumulateDescPool) { vkDestroyDescriptorPool(device, fsr2_.accumulateDescPool, nullptr); fsr2_.accumulateDescPool = VK_NULL_HANDLE; fsr2_.accumulateDescSets[0] = fsr2_.accumulateDescSets[1] = VK_NULL_HANDLE; } + if (fsr2_.accumulateDescSetLayout) { vkDestroyDescriptorSetLayout(device, fsr2_.accumulateDescSetLayout, nullptr); fsr2_.accumulateDescSetLayout = VK_NULL_HANDLE; } + + if (fsr2_.motionVecPipeline) { vkDestroyPipeline(device, fsr2_.motionVecPipeline, nullptr); fsr2_.motionVecPipeline = VK_NULL_HANDLE; } + if (fsr2_.motionVecPipelineLayout) { vkDestroyPipelineLayout(device, fsr2_.motionVecPipelineLayout, nullptr); fsr2_.motionVecPipelineLayout = VK_NULL_HANDLE; } + if (fsr2_.motionVecDescPool) { vkDestroyDescriptorPool(device, fsr2_.motionVecDescPool, nullptr); fsr2_.motionVecDescPool = VK_NULL_HANDLE; fsr2_.motionVecDescSet = VK_NULL_HANDLE; } + if (fsr2_.motionVecDescSetLayout) { vkDestroyDescriptorSetLayout(device, fsr2_.motionVecDescSetLayout, nullptr); fsr2_.motionVecDescSetLayout = VK_NULL_HANDLE; } + + if (fsr2_.sceneFramebuffer) { vkDestroyFramebuffer(device, fsr2_.sceneFramebuffer, nullptr); fsr2_.sceneFramebuffer = VK_NULL_HANDLE; } + if (fsr2_.linearSampler) { vkDestroySampler(device, fsr2_.linearSampler, nullptr); fsr2_.linearSampler = VK_NULL_HANDLE; } + if (fsr2_.nearestSampler) { vkDestroySampler(device, fsr2_.nearestSampler, nullptr); fsr2_.nearestSampler = VK_NULL_HANDLE; } + + destroyImage(device, alloc, fsr2_.motionVectors); + for (int i = 0; i < 2; i++) destroyImage(device, alloc, fsr2_.history[i]); + destroyImage(device, alloc, fsr2_.sceneDepth); + destroyImage(device, alloc, fsr2_.sceneColor); + + fsr2_.internalWidth = 0; + fsr2_.internalHeight = 0; +} + +void Renderer::dispatchMotionVectors() { + if (!fsr2_.motionVecPipeline || currentCmd == VK_NULL_HANDLE) return; + + // Transition depth: DEPTH_STENCIL_ATTACHMENT → DEPTH_STENCIL_READ_ONLY + transitionImageLayout(currentCmd, fsr2_.sceneDepth.image, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + // Transition motion vectors: UNDEFINED → GENERAL + transitionImageLayout(currentCmd, fsr2_.motionVectors.image, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.motionVecPipeline); + vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, + fsr2_.motionVecPipelineLayout, 0, 1, &fsr2_.motionVecDescSet, 0, nullptr); + + // Push constants: invViewProj, prevViewProj, resolution, jitterOffset + struct { + glm::mat4 invViewProj; + glm::mat4 prevViewProj; + glm::vec4 resolution; + glm::vec4 jitterOffset; + } pc; + + glm::mat4 currentVP = camera->getProjectionMatrix() * camera->getViewMatrix(); + pc.invViewProj = glm::inverse(currentVP); + pc.prevViewProj = fsr2_.prevViewProjection; + pc.resolution = glm::vec4( + static_cast(fsr2_.internalWidth), + static_cast(fsr2_.internalHeight), + 1.0f / fsr2_.internalWidth, + 1.0f / fsr2_.internalHeight); + glm::vec2 jitter = camera->getJitter(); + pc.jitterOffset = glm::vec4(jitter.x, jitter.y, fsr2_.prevJitter.x, fsr2_.prevJitter.y); + + vkCmdPushConstants(currentCmd, fsr2_.motionVecPipelineLayout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc); + + uint32_t gx = (fsr2_.internalWidth + 7) / 8; + uint32_t gy = (fsr2_.internalHeight + 7) / 8; + vkCmdDispatch(currentCmd, gx, gy, 1); + + // Transition motion vectors: GENERAL → SHADER_READ_ONLY for accumulation + transitionImageLayout(currentCmd, fsr2_.motionVectors.image, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); +} + +void Renderer::dispatchTemporalAccumulate() { + if (!fsr2_.accumulatePipeline || currentCmd == VK_NULL_HANDLE) return; + + VkExtent2D swapExtent = vkCtx->getSwapchainExtent(); + uint32_t outputIdx = fsr2_.currentHistory; + uint32_t inputIdx = 1 - outputIdx; + + // Transition scene color: PRESENT_SRC_KHR → SHADER_READ_ONLY + transitionImageLayout(currentCmd, fsr2_.sceneColor.image, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + // Transition history input: GENERAL/UNDEFINED → SHADER_READ_ONLY + transitionImageLayout(currentCmd, fsr2_.history[inputIdx].image, + fsr2_.needsHistoryReset ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + // Transition history output: UNDEFINED → GENERAL + transitionImageLayout(currentCmd, fsr2_.history[outputIdx].image, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); + + vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.accumulatePipeline); + vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, + fsr2_.accumulatePipelineLayout, 0, 1, &fsr2_.accumulateDescSets[outputIdx], 0, nullptr); + + // Push constants + struct { + glm::vec4 internalSize; + glm::vec4 displaySize; + glm::vec4 jitterOffset; + glm::vec4 params; + } pc; + + pc.internalSize = glm::vec4( + static_cast(fsr2_.internalWidth), static_cast(fsr2_.internalHeight), + 1.0f / fsr2_.internalWidth, 1.0f / fsr2_.internalHeight); + pc.displaySize = glm::vec4( + static_cast(swapExtent.width), static_cast(swapExtent.height), + 1.0f / swapExtent.width, 1.0f / swapExtent.height); + glm::vec2 jitter = camera->getJitter(); + pc.jitterOffset = glm::vec4(jitter.x, jitter.y, 0.0f, 0.0f); + pc.params = glm::vec4(fsr2_.needsHistoryReset ? 1.0f : 0.0f, fsr2_.sharpness, 0.0f, 0.0f); + + vkCmdPushConstants(currentCmd, fsr2_.accumulatePipelineLayout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc); + + uint32_t gx = (swapExtent.width + 7) / 8; + uint32_t gy = (swapExtent.height + 7) / 8; + vkCmdDispatch(currentCmd, gx, gy, 1); + + fsr2_.needsHistoryReset = false; +} + +void Renderer::renderFSR2Sharpen() { + if (!fsr2_.sharpenPipeline || currentCmd == VK_NULL_HANDLE) return; + + VkExtent2D ext = vkCtx->getSwapchainExtent(); + uint32_t outputIdx = fsr2_.currentHistory; + + // Update sharpen descriptor to point at current history output + VkDescriptorImageInfo imgInfo{}; + imgInfo.sampler = fsr2_.linearSampler; + imgInfo.imageView = fsr2_.history[outputIdx].imageView; + imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + write.dstSet = fsr2_.sharpenDescSet; + write.dstBinding = 0; + write.descriptorCount = 1; + write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write.pImageInfo = &imgInfo; + vkUpdateDescriptorSets(vkCtx->getDevice(), 1, &write, 0, nullptr); + + vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, fsr2_.sharpenPipeline); + vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + fsr2_.sharpenPipelineLayout, 0, 1, &fsr2_.sharpenDescSet, 0, nullptr); + + glm::vec4 params(1.0f / ext.width, 1.0f / ext.height, fsr2_.sharpness, 0.0f); + vkCmdPushConstants(currentCmd, fsr2_.sharpenPipelineLayout, + VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(glm::vec4), ¶ms); + + vkCmdDraw(currentCmd, 3, 1, 0, 0); +} + +void Renderer::setFSR2Enabled(bool enabled) { + if (fsr2_.enabled == enabled) return; + fsr2_.enabled = enabled; + + if (enabled) { + // FSR2 replaces both FSR1 and MSAA + if (fsr_.enabled) { + fsr_.enabled = false; + fsr_.needsRecreate = true; + } + // Use FSR1's scale factor and sharpness as defaults + fsr2_.scaleFactor = fsr_.scaleFactor; + fsr2_.sharpness = fsr_.sharpness; + fsr2_.needsHistoryReset = true; + } else { + fsr2_.needsRecreate = true; + if (camera) camera->clearJitter(); + } +} + +// ========================= End FSR 2.2 ========================= void Renderer::renderWorld(game::World* world, game::GameHandler* gameHandler) { (void)world; diff --git a/src/ui/game_screen.cpp b/src/ui/game_screen.cpp index 8b79cd4c..96800895 100644 --- a/src/ui/game_screen.cpp +++ b/src/ui/game_screen.cpp @@ -6290,13 +6290,21 @@ void GameScreen::renderSettingsWindow() { saveSettings(); } } - // FSR 1.0 Upscaling + // FSR Upscaling { - if (ImGui::Checkbox("FSR Upscaling (Experimental)", &pendingFSR)) { - if (renderer) renderer->setFSREnabled(pendingFSR); + // FSR mode selection: Off, FSR 1.0 (Spatial), FSR 2.2 (Temporal) + const char* fsrModeLabels[] = { "Off", "FSR 1.0 (Spatial)", "FSR 2.2 (Temporal)" }; + int fsrMode = pendingFSR ? 1 : 0; + if (renderer && renderer->isFSR2Enabled()) fsrMode = 2; + if (ImGui::Combo("Upscaling", &fsrMode, fsrModeLabels, 3)) { + pendingFSR = (fsrMode == 1); + if (renderer) { + renderer->setFSREnabled(fsrMode == 1); + renderer->setFSR2Enabled(fsrMode == 2); + } saveSettings(); } - if (pendingFSR) { + if (fsrMode > 0) { const char* fsrQualityLabels[] = { "Ultra Quality (77%)", "Quality (67%)", "Balanced (59%)", "Performance (50%)" }; static const float fsrScaleFactors[] = { 0.77f, 0.67f, 0.59f, 0.50f }; if (ImGui::Combo("FSR Quality", &pendingFSRQuality, fsrQualityLabels, 4)) {