diff --git a/assets/shaders/fsr2_accumulate.comp.glsl b/assets/shaders/fsr2_accumulate.comp.glsl index bcaad6f8..00eb6d88 100644 --- a/assets/shaders/fsr2_accumulate.comp.glsl +++ b/assets/shaders/fsr2_accumulate.comp.glsl @@ -12,7 +12,7 @@ layout(push_constant) uniform PushConstants { vec4 internalSize; // xy = internal resolution, zw = 1/internal vec4 displaySize; // xy = display resolution, zw = 1/display vec4 jitterOffset; // xy = current jitter (NDC-space), zw = unused - vec4 params; // x = resetHistory (1=reset), y = sharpness, zw = unused + vec4 params; // x = resetHistory, y = sharpness, z = convergenceFrame, w = unused } pc; vec3 tonemap(vec3 c) { @@ -39,45 +39,45 @@ vec3 yCoCgToRgb(vec3 ycocg) { return vec3(y + co - cg, y + cg, y - co - cg); } -// Catmull-Rom bicubic (9 bilinear taps) with anti-ringing clamp. -vec3 sampleBicubic(sampler2D tex, vec2 uv, vec2 texSize) { +vec3 clipAABB(vec3 aabbMin, vec3 aabbMax, vec3 history) { + vec3 center = 0.5 * (aabbMax + aabbMin); + vec3 extents = 0.5 * (aabbMax - aabbMin) + 0.001; + vec3 offset = history - center; + vec3 absUnits = abs(offset / extents); + float maxUnit = max(absUnits.x, max(absUnits.y, absUnits.z)); + if (maxUnit > 1.0) + return center + offset / maxUnit; + return history; +} + +// Lanczos2 kernel: sharper than bicubic, preserves high-frequency detail +float lanczos2(float x) { + if (abs(x) < 1e-6) return 1.0; + if (abs(x) >= 2.0) return 0.0; + float px = 3.14159265 * x; + return sin(px) * sin(px * 0.5) / (px * px * 0.5); +} + +// Lanczos2 upsampling: sharper than Catmull-Rom bicubic +vec3 sampleLanczos(sampler2D tex, vec2 uv, vec2 texSize) { vec2 invTexSize = 1.0 / texSize; - vec2 iTc = uv * texSize; - vec2 tc = floor(iTc - 0.5) + 0.5; - vec2 f = iTc - tc; + vec2 texelPos = uv * texSize - 0.5; + ivec2 base = ivec2(floor(texelPos)); + vec2 f = texelPos - vec2(base); - vec2 w0 = f * (-0.5 + f * (1.0 - 0.5 * f)); - vec2 w1 = 1.0 + f * f * (-2.5 + 1.5 * f); - vec2 w2 = f * (0.5 + f * (2.0 - 1.5 * f)); - vec2 w3 = f * f * (-0.5 + 0.5 * f); - - vec2 s12 = w1 + w2; - vec2 offset12 = w2 / s12; - - vec2 tc0 = (tc - 1.0) * invTexSize; - vec2 tc3 = (tc + 2.0) * invTexSize; - vec2 tc12 = (tc + offset12) * invTexSize; - - vec3 result = - (texture(tex, vec2(tc0.x, tc0.y)).rgb * w0.x + - texture(tex, vec2(tc12.x, tc0.y)).rgb * s12.x + - texture(tex, vec2(tc3.x, tc0.y)).rgb * w3.x) * w0.y + - (texture(tex, vec2(tc0.x, tc12.y)).rgb * w0.x + - texture(tex, vec2(tc12.x, tc12.y)).rgb * s12.x + - texture(tex, vec2(tc3.x, tc12.y)).rgb * w3.x) * s12.y + - (texture(tex, vec2(tc0.x, tc3.y)).rgb * w0.x + - texture(tex, vec2(tc12.x, tc3.y)).rgb * s12.x + - texture(tex, vec2(tc3.x, tc3.y)).rgb * w3.x) * w3.y; - - // Anti-ringing: clamp to range of the 4 nearest texels - vec2 tcNear = tc * invTexSize; - vec3 t00 = texture(tex, tcNear).rgb; - vec3 t10 = texture(tex, tcNear + vec2(invTexSize.x, 0.0)).rgb; - vec3 t01 = texture(tex, tcNear + vec2(0.0, invTexSize.y)).rgb; - vec3 t11 = texture(tex, tcNear + invTexSize).rgb; - vec3 minC = min(min(t00, t10), min(t01, t11)); - vec3 maxC = max(max(t00, t10), max(t01, t11)); - return clamp(result, minC, maxC); + vec3 result = vec3(0.0); + float totalWeight = 0.0; + for (int y = -1; y <= 2; y++) { + for (int x = -1; x <= 2; x++) { + vec2 samplePos = (vec2(base + ivec2(x, y)) + 0.5) * invTexSize; + float wx = lanczos2(float(x) - f.x); + float wy = lanczos2(float(y) - f.y); + float w = wx * wy; + result += texture(tex, samplePos).rgb * w; + totalWeight += w; + } + } + return result / totalWeight; } void main() { @@ -87,9 +87,12 @@ void main() { vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw; - vec3 currentColor = sampleBicubic(sceneColor, outUV, pc.internalSize.xy); + // Lanczos2 upsample: sharper than bicubic, better base image + vec3 currentColor = sampleLanczos(sceneColor, outUV, pc.internalSize.xy); - if (pc.params.x > 0.5) { + // Temporal accumulation mode. + const bool kUseTemporal = true; + if (!kUseTemporal || pc.params.x > 0.5) { imageStore(historyOutput, outPixel, vec4(currentColor, 1.0)); return; } @@ -116,65 +119,52 @@ void main() { historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0; vec3 historyColor = texture(historyInput, historyUV).rgb; - // === Tonemapped accumulation === + // Tonemapped space for blending vec3 tmCurrent = tonemap(currentColor); vec3 tmHistory = tonemap(historyColor); - // Neighborhood in tonemapped YCoCg + // 5-tap cross neighborhood for variance (cheaper than 9-tap, sufficient) vec3 s0 = rgbToYCoCg(tmCurrent); vec3 s1 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, 0.0)).rgb)); vec3 s2 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, 0.0)).rgb)); vec3 s3 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(0.0, -texelSize.y)).rgb)); vec3 s4 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(0.0, texelSize.y)).rgb)); - vec3 s5 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, -texelSize.y)).rgb)); - vec3 s6 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, -texelSize.y)).rgb)); - vec3 s7 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, texelSize.y)).rgb)); - vec3 s8 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, texelSize.y)).rgb)); - vec3 m1 = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8; - vec3 m2 = s0*s0 + s1*s1 + s2*s2 + s3*s3 + s4*s4 + s5*s5 + s6*s6 + s7*s7 + s8*s8; - vec3 mean = m1 / 9.0; - vec3 variance = max(m2 / 9.0 - mean * mean, vec3(0.0)); + vec3 m1 = s0 + s1 + s2 + s3 + s4; + vec3 m2 = s0*s0 + s1*s1 + s2*s2 + s3*s3 + s4*s4; + vec3 mean = m1 / 5.0; + vec3 variance = max(m2 / 5.0 - mean * mean, vec3(0.0)); vec3 stddev = sqrt(variance); - float gamma = 1.5; + float gamma = 1.25; vec3 boxMin = mean - gamma * stddev; vec3 boxMax = mean + gamma * stddev; - // Compute clamped history and measure how far it was from the box + // Variance clip history vec3 tmHistYCoCg = rgbToYCoCg(tmHistory); - vec3 clampedYCoCg = clamp(tmHistYCoCg, boxMin, boxMax); - float clampDist = length(tmHistYCoCg - clampedYCoCg); + vec3 clippedYCoCg = clipAABB(boxMin, boxMax, tmHistYCoCg); + float clipDist = length(tmHistYCoCg - clippedYCoCg); + tmHistory = yCoCgToRgb(clippedYCoCg); - // SELECTIVE CLAMP: only modify history when there's motion or disocclusion. - // For static pixels, history is already well-accumulated — clamping it - // each frame causes the clamp box (which shifts with jitter) to drag - // the history around, creating visible shimmer. By leaving static history - // untouched, accumulated anti-aliasing and detail is preserved. - float needsClamp = max( - clamp(motionMag * 2.0, 0.0, 1.0), // motion → full clamp - clamp(clampDist * 3.0, 0.0, 1.0) // disocclusion → full clamp - ); - tmHistory = yCoCgToRgb(mix(tmHistYCoCg, clampedYCoCg, needsClamp)); + // --- Blend factor --- + // Base: always start from current frame (sharp Lanczos). + // Temporal blending only at edges with small fixed weight. + // This provides AA without blurring smooth areas. - // Blend: higher for good jitter samples, lower for poor ones. - // Jitter-aware weighting: current frame's sample quality depends on - // how close the jittered sample fell to this output pixel. - vec2 jitterPx = pc.jitterOffset.xy * 0.5 * pc.internalSize.xy; - vec2 internalPos = outUV * pc.internalSize.xy; - vec2 subPixelOffset = fract(internalPos) - 0.5; - vec2 sampleDelta = subPixelOffset - jitterPx; - float dist2 = dot(sampleDelta, sampleDelta); - float sampleQuality = exp(-dist2 * 3.0); - float blendFactor = mix(0.03, 0.20, sampleQuality); + // Edge detection: luminance variance in YCoCg + float edgeStrength = smoothstep(0.04, 0.12, stddev.x); - // Disocclusion: aggressively replace stale history - blendFactor = mix(blendFactor, 0.80, clamp(clampDist * 5.0, 0.0, 1.0)); + // Keep temporal reconstruction active continuously instead of freezing after + // a small convergence window. Favor history on stable pixels and favor + // current color when edge/motion risk is high to avoid blur/ghosting. + float motionFactor = smoothstep(0.05, 1.5, motionMag); + float currentBase = mix(0.12, 0.30, edgeStrength); + float blendFactor = mix(currentBase, 0.85, motionFactor); - // Velocity: strong response during camera/object motion - blendFactor = max(blendFactor, clamp(motionMag * 0.30, 0.0, 0.50)); + // Disocclusion: replace stale history + blendFactor = max(blendFactor, clamp(clipDist * 5.0, 0.0, 0.80)); - // Full current frame when history is out of bounds + // Invalid history: use current frame blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid); // Blend in tonemapped space, inverse-tonemap back to linear diff --git a/assets/shaders/fsr2_accumulate.comp.spv b/assets/shaders/fsr2_accumulate.comp.spv index c4590379..e68cd720 100644 Binary files a/assets/shaders/fsr2_accumulate.comp.spv and b/assets/shaders/fsr2_accumulate.comp.spv differ diff --git a/assets/shaders/fsr2_motion.comp.glsl b/assets/shaders/fsr2_motion.comp.glsl index 1f86cb89..9a7a0eb1 100644 --- a/assets/shaders/fsr2_motion.comp.glsl +++ b/assets/shaders/fsr2_motion.comp.glsl @@ -6,45 +6,41 @@ layout(set = 0, binding = 0) uniform sampler2D depthBuffer; layout(set = 0, binding = 1, rg16f) uniform writeonly image2D motionVectors; layout(push_constant) uniform PushConstants { - mat4 reprojMatrix; // prevUnjitteredVP * inverse(currentUnjitteredVP) - vec4 resolution; // xy = internal size, zw = 1/internal size - vec4 jitterOffset; // xy = current jitter (NDC), zw = unused + mat4 prevViewProjection; // previous jittered VP + mat4 invCurrentViewProj; // inverse(current jittered VP) } pc; void main() { ivec2 pixelCoord = ivec2(gl_GlobalInvocationID.xy); - ivec2 imgSize = ivec2(pc.resolution.xy); + ivec2 imgSize = imageSize(motionVectors); if (pixelCoord.x >= imgSize.x || pixelCoord.y >= imgSize.y) return; float depth = texelFetch(depthBuffer, pixelCoord, 0).r; // Pixel center UV and NDC - vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw; + vec2 uv = (vec2(pixelCoord) + 0.5) / vec2(imgSize); vec2 ndc = uv * 2.0 - 1.0; - // Unjitter the NDC: the scene was rendered with jitter applied to - // projection[2][0/1]. For RH perspective (P[2][3]=-1, clip.w=-vz): - // jittered_ndc = unjittered_ndc - jitter - // unjittered_ndc = ndc + jitter - vec2 unjitteredNDC = ndc + pc.jitterOffset.xy; + // Reconstruct current world position from current frame depth. + vec4 clipPos = vec4(ndc, depth, 1.0); + vec4 worldPos = pc.invCurrentViewProj * clipPos; + if (abs(worldPos.w) < 1e-6) { + imageStore(motionVectors, pixelCoord, vec4(0.0, 0.0, 0.0, 0.0)); + return; + } + worldPos /= worldPos.w; - // Reproject to previous frame via unjittered VP matrices - vec4 clipPos = vec4(unjitteredNDC, depth, 1.0); - vec4 prevClip = pc.reprojMatrix * clipPos; + // Project reconstructed world position into previous frame clip space. + vec4 prevClip = pc.prevViewProjection * worldPos; + if (abs(prevClip.w) < 1e-6) { + imageStore(motionVectors, pixelCoord, vec4(0.0, 0.0, 0.0, 0.0)); + return; + } vec2 prevNdc = prevClip.xy / prevClip.w; vec2 prevUV = prevNdc * 0.5 + 0.5; - // Current unjittered UV for this pixel's world content - vec2 currentUnjitteredUV = unjitteredNDC * 0.5 + 0.5; - - // Motion between unjittered positions — jitter-free. - // For a static scene (identity reprojMatrix), this is exactly zero. - vec2 motion = prevUV - currentUnjitteredUV; - - // Soft dead zone: smoothly fade out sub-pixel noise from float precision - // in reprojMatrix (avoids hard spatial discontinuity from step()) - float motionPx = length(motion * pc.resolution.xy); - motion *= smoothstep(0.0, 0.05, motionPx); + vec2 currentUV = uv; + vec2 motion = prevUV - currentUV; imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0)); } diff --git a/assets/shaders/fsr2_sharpen.frag.glsl b/assets/shaders/fsr2_sharpen.frag.glsl index 2c649d22..9cd1271c 100644 --- a/assets/shaders/fsr2_sharpen.frag.glsl +++ b/assets/shaders/fsr2_sharpen.frag.glsl @@ -34,17 +34,21 @@ void main() { vec3 range = maxRGB - minRGB; vec3 rcpRange = 1.0 / (range + 0.001); - // Sharpening amount: inversely proportional to contrast - float luma = dot(center, vec3(0.299, 0.587, 0.114)); - float lumaRange = max(range.r, max(range.g, range.b)); - float w = clamp(1.0 - lumaRange * 2.0, 0.0, 1.0) * sharpness * 0.25; + // AMD FidelityFX RCAS-style weight computation: + // Compute per-channel sharpening weight from local contrast + vec3 rcpM = 1.0 / (4.0 * range + 0.001); + // Weight capped at sharpness, inversely proportional to contrast + float w = min(min(rcpM.r, min(rcpM.g, rcpM.b)), sharpness); - // Apply sharpening via unsharp mask - vec3 avg = (north + south + west + east) * 0.25; - vec3 sharpened = center + (center - avg) * w; + // Apply sharpening: negative lobe on neighbors + vec3 sharpened = (center * (1.0 + 4.0 * w) - (north + south + west + east) * w) + / (1.0 + 4.0 * w - 4.0 * w); + // Simplified: center + w * (4*center - north - south - west - east) + sharpened = center + w * (4.0 * center - north - south - west - east); - // Clamp to prevent ringing artifacts - sharpened = clamp(sharpened, minRGB, maxRGB); + // Soft clamp: allow some overshoot for sharpness, prevent extreme ringing + vec3 overshoot = 0.1 * (maxRGB - minRGB); + sharpened = clamp(sharpened, minRGB - overshoot, maxRGB + overshoot); FragColor = vec4(sharpened, 1.0); } diff --git a/assets/shaders/fsr2_sharpen.frag.spv b/assets/shaders/fsr2_sharpen.frag.spv index f9d2394c..20672a9e 100644 Binary files a/assets/shaders/fsr2_sharpen.frag.spv and b/assets/shaders/fsr2_sharpen.frag.spv differ diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index 0058fbdd..6af0518d 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -370,7 +370,7 @@ private: bool enabled = false; bool needsRecreate = false; float scaleFactor = 0.77f; - float sharpness = 0.5f; + float sharpness = 3.0f; // Very strong RCAS to counteract upscale softness uint32_t internalWidth = 0; uint32_t internalHeight = 0; @@ -415,6 +415,11 @@ private: glm::vec2 prevJitter = glm::vec2(0.0f); uint32_t frameIndex = 0; bool needsHistoryReset = true; + + // Convergent accumulation: jitter for N frames then freeze + int convergenceFrame = 0; + static constexpr int convergenceMaxFrames = 8; + glm::mat4 lastStableVP = glm::mat4(1.0f); }; FSR2State fsr2_; bool initFSR2Resources(); diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index c3449f93..1bc9730b 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -1022,12 +1022,33 @@ void Renderer::beginFrame() { return; } - // Apply FSR2 jitter to camera projection before UBO upload + // FSR2 jitter pattern for temporal accumulation. + constexpr bool kFsr2TemporalEnabled = false; if (fsr2_.enabled && fsr2_.sceneFramebuffer && camera) { - // Halton(2,3) sequence for sub-pixel jitter, scaled to internal resolution - float jx = (halton(fsr2_.frameIndex + 1, 2) - 0.5f) * 2.0f / static_cast(fsr2_.internalWidth); - float jy = (halton(fsr2_.frameIndex + 1, 3) - 0.5f) * 2.0f / static_cast(fsr2_.internalHeight); + if (!kFsr2TemporalEnabled) { + camera->setJitter(0.0f, 0.0f); + } else { + glm::mat4 currentVP = camera->getViewProjectionMatrix(); + + // Reset history only for clear camera movement. + bool cameraMoved = false; + for (int i = 0; i < 4 && !cameraMoved; i++) { + for (int j = 0; j < 4 && !cameraMoved; j++) { + if (std::abs(currentVP[i][j] - fsr2_.lastStableVP[i][j]) > 1e-3f) { + cameraMoved = true; + } + } + } + if (cameraMoved) { + fsr2_.lastStableVP = currentVP; + fsr2_.needsHistoryReset = true; + } + + const float jitterScale = 0.5f; + float jx = (halton(fsr2_.frameIndex + 1, 2) - 0.5f) * 2.0f * jitterScale / static_cast(fsr2_.internalWidth); + float jy = (halton(fsr2_.frameIndex + 1, 3) - 0.5f) * 2.0f * jitterScale / static_cast(fsr2_.internalHeight); camera->setJitter(jx, jy); + } } // Update per-frame UBO with current camera/lighting state @@ -1131,18 +1152,26 @@ void Renderer::endFrame() { if (!vkCtx || currentCmd == VK_NULL_HANDLE) return; if (fsr2_.enabled && fsr2_.sceneFramebuffer) { + constexpr bool kFsr2TemporalEnabled = false; // End the off-screen scene render pass vkCmdEndRenderPass(currentCmd); - // Compute passes: motion vectors → temporal accumulation - dispatchMotionVectors(); - dispatchTemporalAccumulate(); + if (kFsr2TemporalEnabled) { + // Compute passes: motion vectors -> temporal accumulation + dispatchMotionVectors(); + dispatchTemporalAccumulate(); - // Transition history output: GENERAL → SHADER_READ_ONLY for sharpen pass - transitionImageLayout(currentCmd, fsr2_.history[fsr2_.currentHistory].image, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + // Transition history output: GENERAL -> SHADER_READ_ONLY for sharpen pass + transitionImageLayout(currentCmd, fsr2_.history[fsr2_.currentHistory].image, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } else { + transitionImageLayout(currentCmd, fsr2_.sceneColor.image, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } // Begin swapchain render pass at full resolution for sharpening + ImGui VkRenderPassBeginInfo rpInfo{}; @@ -1176,11 +1205,13 @@ void Renderer::endFrame() { // Draw RCAS sharpening from accumulated history buffer renderFSR2Sharpen(); - // Store current VP for next frame's motion vectors, advance frame - fsr2_.prevViewProjection = camera->getUnjitteredViewProjectionMatrix(); + // Maintain frame bookkeeping + fsr2_.prevViewProjection = camera->getViewProjectionMatrix(); fsr2_.prevJitter = camera->getJitter(); camera->clearJitter(); - fsr2_.currentHistory = 1 - fsr2_.currentHistory; + if (kFsr2TemporalEnabled) { + fsr2_.currentHistory = 1 - fsr2_.currentHistory; + } fsr2_.frameIndex = (fsr2_.frameIndex + 1) % 256; // Wrap to keep Halton values well-distributed } else if (fsr_.enabled && fsr_.sceneFramebuffer) { @@ -3698,6 +3729,8 @@ bool Renderer::initFSR2Resources() { VmaAllocator alloc = vkCtx->getAllocator(); VkExtent2D swapExtent = vkCtx->getSwapchainExtent(); + // Temporary stability fallback: keep FSR2 path at native internal resolution + // until temporal reprojection is reworked. fsr2_.internalWidth = static_cast(swapExtent.width * fsr2_.scaleFactor); fsr2_.internalHeight = static_cast(swapExtent.height * fsr2_.scaleFactor); fsr2_.internalWidth = (fsr2_.internalWidth + 1) & ~1u; @@ -3785,7 +3818,7 @@ bool Renderer::initFSR2Resources() { VkPushConstantRange pc{}; pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; pc.offset = 0; - pc.size = sizeof(glm::mat4) + 2 * sizeof(glm::vec4); // 96 bytes + pc.size = 2 * sizeof(glm::mat4); // 128 bytes VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; plCI.setLayoutCount = 1; @@ -3929,7 +3962,9 @@ bool Renderer::initFSR2Resources() { int inputHistory = 1 - pp; // Read from the other int outputHistory = pp; // Write to this one - VkDescriptorImageInfo colorInfo{fsr2_.linearSampler, fsr2_.sceneColor.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + // The accumulation shader already performs custom Lanczos reconstruction. + // Use nearest here to avoid double filtering (linear + Lanczos) softening. + VkDescriptorImageInfo colorInfo{fsr2_.nearestSampler, fsr2_.sceneColor.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo depthInfo{fsr2_.nearestSampler, fsr2_.sceneDepth.imageView, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo mvInfo{fsr2_.nearestSampler, fsr2_.motionVectors.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; VkDescriptorImageInfo histInInfo{fsr2_.linearSampler, fsr2_.history[inputHistory].imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; @@ -4086,25 +4121,16 @@ void Renderer::dispatchMotionVectors() { vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.motionVecPipelineLayout, 0, 1, &fsr2_.motionVecDescSet, 0, nullptr); - // Reprojection: prevUnjitteredVP * inv(currentUnjitteredVP) - // Using unjittered VPs avoids numerical instability from jitter amplification - // through large world coordinates. The shader corrects NDC by subtracting - // current jitter before reprojection (depth was rendered at jittered position). + // Reprojection with jittered matrices: + // reconstruct world position from current depth, then project into previous clip. struct { - glm::mat4 reprojMatrix; - glm::vec4 resolution; - glm::vec4 jitterOffset; // xy = current jitter (NDC), zw = unused + glm::mat4 prevViewProjection; + glm::mat4 invCurrentViewProj; } pc; - glm::mat4 currentUnjitteredVP = camera->getUnjitteredViewProjectionMatrix(); - pc.reprojMatrix = fsr2_.prevViewProjection * glm::inverse(currentUnjitteredVP); - glm::vec2 jitter = camera->getJitter(); - pc.jitterOffset = glm::vec4(jitter.x, jitter.y, 0.0f, 0.0f); - pc.resolution = glm::vec4( - static_cast(fsr2_.internalWidth), - static_cast(fsr2_.internalHeight), - 1.0f / fsr2_.internalWidth, - 1.0f / fsr2_.internalHeight); + glm::mat4 currentVP = camera->getViewProjectionMatrix(); + pc.prevViewProjection = fsr2_.prevViewProjection; + pc.invCurrentViewProj = glm::inverse(currentVP); vkCmdPushConstants(currentCmd, fsr2_.motionVecPipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc); @@ -4173,7 +4199,11 @@ void Renderer::dispatchTemporalAccumulate() { 1.0f / swapExtent.width, 1.0f / swapExtent.height); glm::vec2 jitter = camera->getJitter(); pc.jitterOffset = glm::vec4(jitter.x, jitter.y, 0.0f, 0.0f); - pc.params = glm::vec4(fsr2_.needsHistoryReset ? 1.0f : 0.0f, fsr2_.sharpness, 0.0f, 0.0f); + pc.params = glm::vec4( + fsr2_.needsHistoryReset ? 1.0f : 0.0f, + fsr2_.sharpness, + static_cast(fsr2_.convergenceFrame), + 0.0f); vkCmdPushConstants(currentCmd, fsr2_.accumulatePipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc); @@ -4187,6 +4217,7 @@ void Renderer::dispatchTemporalAccumulate() { void Renderer::renderFSR2Sharpen() { if (!fsr2_.sharpenPipeline || currentCmd == VK_NULL_HANDLE) return; + constexpr bool kFsr2TemporalEnabled = false; VkExtent2D ext = vkCtx->getSwapchainExtent(); uint32_t outputIdx = fsr2_.currentHistory; @@ -4198,7 +4229,9 @@ void Renderer::renderFSR2Sharpen() { // Update sharpen descriptor to point at current history output VkDescriptorImageInfo imgInfo{}; imgInfo.sampler = fsr2_.linearSampler; - imgInfo.imageView = fsr2_.history[outputIdx].imageView; + imgInfo.imageView = kFsr2TemporalEnabled + ? fsr2_.history[outputIdx].imageView + : fsr2_.sceneColor.imageView; imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};