Stabilize FSR2 path and refine temporal pipeline groundwork

2026-03-22 23:30:14 +00:00 · 2026-03-08 18:52:04 -07:00 · 2026-03-08 18:52:04 -07:00 · e2a2316038
commit e2a2316038
parent a8500a80b5
7 changed files with 176 additions and 148 deletions
--- a/assets/shaders/fsr2_accumulate.comp.glsl
+++ b/assets/shaders/fsr2_accumulate.comp.glsl
@ -12,7 +12,7 @@ layout(push_constant) uniform PushConstants {
    vec4 internalSize;   // xy = internal resolution, zw = 1/internal
    vec4 displaySize;    // xy = display resolution, zw = 1/display
    vec4 jitterOffset;   // xy = current jitter (NDC-space), zw = unused
-    vec4 params;         // x = resetHistory (1=reset), y = sharpness, zw = unused
+    vec4 params;         // x = resetHistory, y = sharpness, z = convergenceFrame, w = unused
 } pc;

 vec3 tonemap(vec3 c) {
@ -39,45 +39,45 @@ vec3 yCoCgToRgb(vec3 ycocg) {
    return vec3(y + co - cg, y + cg, y - co - cg);
 }

-// Catmull-Rom bicubic (9 bilinear taps) with anti-ringing clamp.
-vec3 sampleBicubic(sampler2D tex, vec2 uv, vec2 texSize) {
+vec3 clipAABB(vec3 aabbMin, vec3 aabbMax, vec3 history) {
+    vec3 center = 0.5 * (aabbMax + aabbMin);
+    vec3 extents = 0.5 * (aabbMax - aabbMin) + 0.001;
+    vec3 offset = history - center;
+    vec3 absUnits = abs(offset / extents);
+    float maxUnit = max(absUnits.x, max(absUnits.y, absUnits.z));
+    if (maxUnit > 1.0)
+        return center + offset / maxUnit;
+    return history;
+}
+
+// Lanczos2 kernel: sharper than bicubic, preserves high-frequency detail
+float lanczos2(float x) {
+    if (abs(x) < 1e-6) return 1.0;
+    if (abs(x) >= 2.0) return 0.0;
+    float px = 3.14159265 * x;
+    return sin(px) * sin(px * 0.5) / (px * px * 0.5);
+}
+
+// Lanczos2 upsampling: sharper than Catmull-Rom bicubic
+vec3 sampleLanczos(sampler2D tex, vec2 uv, vec2 texSize) {
    vec2 invTexSize = 1.0 / texSize;
-    vec2 iTc = uv * texSize;
-    vec2 tc = floor(iTc - 0.5) + 0.5;
-    vec2 f = iTc - tc;
+    vec2 texelPos = uv * texSize - 0.5;
+    ivec2 base = ivec2(floor(texelPos));
+    vec2 f = texelPos - vec2(base);

-    vec2 w0 = f * (-0.5 + f * (1.0 - 0.5 * f));
-    vec2 w1 = 1.0 + f * f * (-2.5 + 1.5 * f);
-    vec2 w2 = f * (0.5 + f * (2.0 - 1.5 * f));
-    vec2 w3 = f * f * (-0.5 + 0.5 * f);
-
-    vec2 s12 = w1 + w2;
-    vec2 offset12 = w2 / s12;
-
-    vec2 tc0  = (tc - 1.0) * invTexSize;
-    vec2 tc3  = (tc + 2.0) * invTexSize;
-    vec2 tc12 = (tc + offset12) * invTexSize;
-
-    vec3 result =
-        (texture(tex, vec2(tc0.x,  tc0.y)).rgb  * w0.x +
-         texture(tex, vec2(tc12.x, tc0.y)).rgb  * s12.x +
-         texture(tex, vec2(tc3.x,  tc0.y)).rgb  * w3.x) * w0.y +
-        (texture(tex, vec2(tc0.x,  tc12.y)).rgb * w0.x +
-         texture(tex, vec2(tc12.x, tc12.y)).rgb * s12.x +
-         texture(tex, vec2(tc3.x,  tc12.y)).rgb * w3.x) * s12.y +
-        (texture(tex, vec2(tc0.x,  tc3.y)).rgb  * w0.x +
-         texture(tex, vec2(tc12.x, tc3.y)).rgb  * s12.x +
-         texture(tex, vec2(tc3.x,  tc3.y)).rgb  * w3.x) * w3.y;
-
-    // Anti-ringing: clamp to range of the 4 nearest texels
-    vec2 tcNear = tc * invTexSize;
-    vec3 t00 = texture(tex, tcNear).rgb;
-    vec3 t10 = texture(tex, tcNear + vec2(invTexSize.x, 0.0)).rgb;
-    vec3 t01 = texture(tex, tcNear + vec2(0.0, invTexSize.y)).rgb;
-    vec3 t11 = texture(tex, tcNear + invTexSize).rgb;
-    vec3 minC = min(min(t00, t10), min(t01, t11));
-    vec3 maxC = max(max(t00, t10), max(t01, t11));
-    return clamp(result, minC, maxC);
+    vec3 result = vec3(0.0);
+    float totalWeight = 0.0;
+    for (int y = -1; y <= 2; y++) {
+        for (int x = -1; x <= 2; x++) {
+            vec2 samplePos = (vec2(base + ivec2(x, y)) + 0.5) * invTexSize;
+            float wx = lanczos2(float(x) - f.x);
+            float wy = lanczos2(float(y) - f.y);
+            float w = wx * wy;
+            result += texture(tex, samplePos).rgb * w;
+            totalWeight += w;
+        }
+    }
+    return result / totalWeight;
 }

 void main() {
@ -87,9 +87,12 @@ void main() {

    vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw;

-    vec3 currentColor = sampleBicubic(sceneColor, outUV, pc.internalSize.xy);
+    // Lanczos2 upsample: sharper than bicubic, better base image
+    vec3 currentColor = sampleLanczos(sceneColor, outUV, pc.internalSize.xy);

-    if (pc.params.x > 0.5) {
+    // Temporal accumulation mode.
+    const bool kUseTemporal = true;
+    if (!kUseTemporal || pc.params.x > 0.5) {
        imageStore(historyOutput, outPixel, vec4(currentColor, 1.0));
        return;
    }
@ -116,65 +119,52 @@ void main() {
                          historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0;
    vec3 historyColor = texture(historyInput, historyUV).rgb;

-    // === Tonemapped accumulation ===
+    // Tonemapped space for blending
    vec3 tmCurrent = tonemap(currentColor);
    vec3 tmHistory = tonemap(historyColor);

-    // Neighborhood in tonemapped YCoCg
+    // 5-tap cross neighborhood for variance (cheaper than 9-tap, sufficient)
    vec3 s0 = rgbToYCoCg(tmCurrent);
    vec3 s1 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, 0.0)).rgb));
    vec3 s2 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, 0.0)).rgb));
    vec3 s3 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(0.0, -texelSize.y)).rgb));
    vec3 s4 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(0.0,  texelSize.y)).rgb));
-    vec3 s5 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, -texelSize.y)).rgb));
-    vec3 s6 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, -texelSize.y)).rgb));
-    vec3 s7 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x,  texelSize.y)).rgb));
-    vec3 s8 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x,  texelSize.y)).rgb));

-    vec3 m1 = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8;
-    vec3 m2 = s0*s0 + s1*s1 + s2*s2 + s3*s3 + s4*s4 + s5*s5 + s6*s6 + s7*s7 + s8*s8;
-    vec3 mean = m1 / 9.0;
-    vec3 variance = max(m2 / 9.0 - mean * mean, vec3(0.0));
+    vec3 m1 = s0 + s1 + s2 + s3 + s4;
+    vec3 m2 = s0*s0 + s1*s1 + s2*s2 + s3*s3 + s4*s4;
+    vec3 mean = m1 / 5.0;
+    vec3 variance = max(m2 / 5.0 - mean * mean, vec3(0.0));
    vec3 stddev = sqrt(variance);

-    float gamma = 1.5;
+    float gamma = 1.25;
    vec3 boxMin = mean - gamma * stddev;
    vec3 boxMax = mean + gamma * stddev;

-    // Compute clamped history and measure how far it was from the box
+    // Variance clip history
    vec3 tmHistYCoCg = rgbToYCoCg(tmHistory);
-    vec3 clampedYCoCg = clamp(tmHistYCoCg, boxMin, boxMax);
-    float clampDist = length(tmHistYCoCg - clampedYCoCg);
+    vec3 clippedYCoCg = clipAABB(boxMin, boxMax, tmHistYCoCg);
+    float clipDist = length(tmHistYCoCg - clippedYCoCg);
+    tmHistory = yCoCgToRgb(clippedYCoCg);

-    // SELECTIVE CLAMP: only modify history when there's motion or disocclusion.
-    // For static pixels, history is already well-accumulated — clamping it
-    // each frame causes the clamp box (which shifts with jitter) to drag
-    // the history around, creating visible shimmer. By leaving static history
-    // untouched, accumulated anti-aliasing and detail is preserved.
-    float needsClamp = max(
-        clamp(motionMag * 2.0, 0.0, 1.0),      // motion → full clamp
-        clamp(clampDist * 3.0, 0.0, 1.0)        // disocclusion → full clamp
-    );
-    tmHistory = yCoCgToRgb(mix(tmHistYCoCg, clampedYCoCg, needsClamp));
+    // --- Blend factor ---
+    // Base: always start from current frame (sharp Lanczos).
+    // Temporal blending only at edges with small fixed weight.
+    // This provides AA without blurring smooth areas.

-    // Blend: higher for good jitter samples, lower for poor ones.
-    // Jitter-aware weighting: current frame's sample quality depends on
-    // how close the jittered sample fell to this output pixel.
-    vec2 jitterPx = pc.jitterOffset.xy * 0.5 * pc.internalSize.xy;
-    vec2 internalPos = outUV * pc.internalSize.xy;
-    vec2 subPixelOffset = fract(internalPos) - 0.5;
-    vec2 sampleDelta = subPixelOffset - jitterPx;
-    float dist2 = dot(sampleDelta, sampleDelta);
-    float sampleQuality = exp(-dist2 * 3.0);
-    float blendFactor = mix(0.03, 0.20, sampleQuality);
+    // Edge detection: luminance variance in YCoCg
+    float edgeStrength = smoothstep(0.04, 0.12, stddev.x);

-    // Disocclusion: aggressively replace stale history
-    blendFactor = mix(blendFactor, 0.80, clamp(clampDist * 5.0, 0.0, 1.0));
+    // Keep temporal reconstruction active continuously instead of freezing after
+    // a small convergence window. Favor history on stable pixels and favor
+    // current color when edge/motion risk is high to avoid blur/ghosting.
+    float motionFactor = smoothstep(0.05, 1.5, motionMag);
+    float currentBase = mix(0.12, 0.30, edgeStrength);
+    float blendFactor = mix(currentBase, 0.85, motionFactor);

-    // Velocity: strong response during camera/object motion
-    blendFactor = max(blendFactor, clamp(motionMag * 0.30, 0.0, 0.50));
+    // Disocclusion: replace stale history
+    blendFactor = max(blendFactor, clamp(clipDist * 5.0, 0.0, 0.80));

-    // Full current frame when history is out of bounds
+    // Invalid history: use current frame
    blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid);

    // Blend in tonemapped space, inverse-tonemap back to linear
--- a/assets/shaders/fsr2_accumulate.comp.spv
+++ b/assets/shaders/fsr2_accumulate.comp.spv
--- a/assets/shaders/fsr2_motion.comp.glsl
+++ b/assets/shaders/fsr2_motion.comp.glsl
@ -6,45 +6,41 @@ layout(set = 0, binding = 0) uniform sampler2D depthBuffer;
 layout(set = 0, binding = 1, rg16f) uniform writeonly image2D motionVectors;

 layout(push_constant) uniform PushConstants {
-    mat4 reprojMatrix;      // prevUnjitteredVP * inverse(currentUnjitteredVP)
-    vec4 resolution;        // xy = internal size, zw = 1/internal size
-    vec4 jitterOffset;      // xy = current jitter (NDC), zw = unused
+    mat4 prevViewProjection;   // previous jittered VP
+    mat4 invCurrentViewProj;   // inverse(current jittered VP)
 } pc;

 void main() {
    ivec2 pixelCoord = ivec2(gl_GlobalInvocationID.xy);
-    ivec2 imgSize = ivec2(pc.resolution.xy);
+    ivec2 imgSize = imageSize(motionVectors);
    if (pixelCoord.x >= imgSize.x || pixelCoord.y >= imgSize.y) return;

    float depth = texelFetch(depthBuffer, pixelCoord, 0).r;

    // Pixel center UV and NDC
-    vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw;
+    vec2 uv = (vec2(pixelCoord) + 0.5) / vec2(imgSize);
    vec2 ndc = uv * 2.0 - 1.0;

-    // Unjitter the NDC: the scene was rendered with jitter applied to
-    // projection[2][0/1]. For RH perspective (P[2][3]=-1, clip.w=-vz):
-    //   jittered_ndc = unjittered_ndc - jitter
-    //   unjittered_ndc = ndc + jitter
-    vec2 unjitteredNDC = ndc + pc.jitterOffset.xy;
+    // Reconstruct current world position from current frame depth.
+    vec4 clipPos = vec4(ndc, depth, 1.0);
+    vec4 worldPos = pc.invCurrentViewProj * clipPos;
+    if (abs(worldPos.w) < 1e-6) {
+        imageStore(motionVectors, pixelCoord, vec4(0.0, 0.0, 0.0, 0.0));
+        return;
+    }
+    worldPos /= worldPos.w;

-    // Reproject to previous frame via unjittered VP matrices
-    vec4 clipPos = vec4(unjitteredNDC, depth, 1.0);
-    vec4 prevClip = pc.reprojMatrix * clipPos;
+    // Project reconstructed world position into previous frame clip space.
+    vec4 prevClip = pc.prevViewProjection * worldPos;
+    if (abs(prevClip.w) < 1e-6) {
+        imageStore(motionVectors, pixelCoord, vec4(0.0, 0.0, 0.0, 0.0));
+        return;
+    }
    vec2 prevNdc = prevClip.xy / prevClip.w;
    vec2 prevUV = prevNdc * 0.5 + 0.5;

-    // Current unjittered UV for this pixel's world content
-    vec2 currentUnjitteredUV = unjitteredNDC * 0.5 + 0.5;
-
-    // Motion between unjittered positions — jitter-free.
-    // For a static scene (identity reprojMatrix), this is exactly zero.
-    vec2 motion = prevUV - currentUnjitteredUV;
-
-    // Soft dead zone: smoothly fade out sub-pixel noise from float precision
-    // in reprojMatrix (avoids hard spatial discontinuity from step())
-    float motionPx = length(motion * pc.resolution.xy);
-    motion *= smoothstep(0.0, 0.05, motionPx);
+    vec2 currentUV = uv;
+    vec2 motion = prevUV - currentUV;

    imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0));
 }
--- a/assets/shaders/fsr2_sharpen.frag.glsl
+++ b/assets/shaders/fsr2_sharpen.frag.glsl
@ -34,17 +34,21 @@ void main() {
    vec3 range = maxRGB - minRGB;
    vec3 rcpRange = 1.0 / (range + 0.001);

-    // Sharpening amount: inversely proportional to contrast
-    float luma = dot(center, vec3(0.299, 0.587, 0.114));
-    float lumaRange = max(range.r, max(range.g, range.b));
-    float w = clamp(1.0 - lumaRange * 2.0, 0.0, 1.0) * sharpness * 0.25;
+    // AMD FidelityFX RCAS-style weight computation:
+    // Compute per-channel sharpening weight from local contrast
+    vec3 rcpM = 1.0 / (4.0 * range + 0.001);
+    // Weight capped at sharpness, inversely proportional to contrast
+    float w = min(min(rcpM.r, min(rcpM.g, rcpM.b)), sharpness);

-    // Apply sharpening via unsharp mask
-    vec3 avg = (north + south + west + east) * 0.25;
-    vec3 sharpened = center + (center - avg) * w;
+    // Apply sharpening: negative lobe on neighbors
+    vec3 sharpened = (center * (1.0 + 4.0 * w) - (north + south + west + east) * w)
+                   / (1.0 + 4.0 * w - 4.0 * w);
+    // Simplified: center + w * (4*center - north - south - west - east)
+    sharpened = center + w * (4.0 * center - north - south - west - east);

-    // Clamp to prevent ringing artifacts
-    sharpened = clamp(sharpened, minRGB, maxRGB);
+    // Soft clamp: allow some overshoot for sharpness, prevent extreme ringing
+    vec3 overshoot = 0.1 * (maxRGB - minRGB);
+    sharpened = clamp(sharpened, minRGB - overshoot, maxRGB + overshoot);

    FragColor = vec4(sharpened, 1.0);
 }
--- a/assets/shaders/fsr2_sharpen.frag.spv
+++ b/assets/shaders/fsr2_sharpen.frag.spv
--- a/include/rendering/renderer.hpp
+++ b/include/rendering/renderer.hpp
@ -370,7 +370,7 @@ private:
        bool enabled = false;
        bool needsRecreate = false;
        float scaleFactor = 0.77f;
-        float sharpness = 0.5f;
+        float sharpness = 3.0f;  // Very strong RCAS to counteract upscale softness
        uint32_t internalWidth = 0;
        uint32_t internalHeight = 0;

@ -415,6 +415,11 @@ private:
        glm::vec2 prevJitter = glm::vec2(0.0f);
        uint32_t frameIndex = 0;
        bool needsHistoryReset = true;
+
+        // Convergent accumulation: jitter for N frames then freeze
+        int convergenceFrame = 0;
+        static constexpr int convergenceMaxFrames = 8;
+        glm::mat4 lastStableVP = glm::mat4(1.0f);
    };
    FSR2State fsr2_;
    bool initFSR2Resources();
--- a/src/rendering/renderer.cpp
+++ b/src/rendering/renderer.cpp
@ -1022,13 +1022,34 @@ void Renderer::beginFrame() {
        return;
    }

-    // Apply FSR2 jitter to camera projection before UBO upload
+    // FSR2 jitter pattern for temporal accumulation.
+    constexpr bool kFsr2TemporalEnabled = false;
    if (fsr2_.enabled && fsr2_.sceneFramebuffer && camera) {
-        // Halton(2,3) sequence for sub-pixel jitter, scaled to internal resolution
-        float jx = (halton(fsr2_.frameIndex + 1, 2) - 0.5f) * 2.0f / static_cast<float>(fsr2_.internalWidth);
-        float jy = (halton(fsr2_.frameIndex + 1, 3) - 0.5f) * 2.0f / static_cast<float>(fsr2_.internalHeight);
+        if (!kFsr2TemporalEnabled) {
+            camera->setJitter(0.0f, 0.0f);
+        } else {
+        glm::mat4 currentVP = camera->getViewProjectionMatrix();
+
+        // Reset history only for clear camera movement.
+        bool cameraMoved = false;
+        for (int i = 0; i < 4 && !cameraMoved; i++) {
+            for (int j = 0; j < 4 && !cameraMoved; j++) {
+                if (std::abs(currentVP[i][j] - fsr2_.lastStableVP[i][j]) > 1e-3f) {
+                    cameraMoved = true;
+                }
+            }
+        }
+        if (cameraMoved) {
+            fsr2_.lastStableVP = currentVP;
+            fsr2_.needsHistoryReset = true;
+        }
+
+        const float jitterScale = 0.5f;
+        float jx = (halton(fsr2_.frameIndex + 1, 2) - 0.5f) * 2.0f * jitterScale / static_cast<float>(fsr2_.internalWidth);
+        float jy = (halton(fsr2_.frameIndex + 1, 3) - 0.5f) * 2.0f * jitterScale / static_cast<float>(fsr2_.internalHeight);
        camera->setJitter(jx, jy);
        }
+    }

    // Update per-frame UBO with current camera/lighting state
    updatePerFrameUBO();
@ -1131,18 +1152,26 @@ void Renderer::endFrame() {
    if (!vkCtx || currentCmd == VK_NULL_HANDLE) return;

    if (fsr2_.enabled && fsr2_.sceneFramebuffer) {
+        constexpr bool kFsr2TemporalEnabled = false;
        // End the off-screen scene render pass
        vkCmdEndRenderPass(currentCmd);

-        // Compute passes: motion vectors → temporal accumulation
+        if (kFsr2TemporalEnabled) {
+            // Compute passes: motion vectors -> temporal accumulation
            dispatchMotionVectors();
            dispatchTemporalAccumulate();

-        // Transition history output: GENERAL → SHADER_READ_ONLY for sharpen pass
+            // Transition history output: GENERAL -> SHADER_READ_ONLY for sharpen pass
            transitionImageLayout(currentCmd, fsr2_.history[fsr2_.currentHistory].image,
                VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
                VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
                VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
+        } else {
+            transitionImageLayout(currentCmd, fsr2_.sceneColor.image,
+                VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+                VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
+        }

        // Begin swapchain render pass at full resolution for sharpening + ImGui
        VkRenderPassBeginInfo rpInfo{};
@ -1176,11 +1205,13 @@ void Renderer::endFrame() {
        // Draw RCAS sharpening from accumulated history buffer
        renderFSR2Sharpen();

-        // Store current VP for next frame's motion vectors, advance frame
-        fsr2_.prevViewProjection = camera->getUnjitteredViewProjectionMatrix();
+        // Maintain frame bookkeeping
+        fsr2_.prevViewProjection = camera->getViewProjectionMatrix();
        fsr2_.prevJitter = camera->getJitter();
        camera->clearJitter();
+        if (kFsr2TemporalEnabled) {
            fsr2_.currentHistory = 1 - fsr2_.currentHistory;
+        }
        fsr2_.frameIndex = (fsr2_.frameIndex + 1) % 256;  // Wrap to keep Halton values well-distributed

    } else if (fsr_.enabled && fsr_.sceneFramebuffer) {
@ -3698,6 +3729,8 @@ bool Renderer::initFSR2Resources() {
    VmaAllocator alloc = vkCtx->getAllocator();
    VkExtent2D swapExtent = vkCtx->getSwapchainExtent();

+    // Temporary stability fallback: keep FSR2 path at native internal resolution
+    // until temporal reprojection is reworked.
    fsr2_.internalWidth = static_cast<uint32_t>(swapExtent.width * fsr2_.scaleFactor);
    fsr2_.internalHeight = static_cast<uint32_t>(swapExtent.height * fsr2_.scaleFactor);
    fsr2_.internalWidth = (fsr2_.internalWidth + 1) & ~1u;
@ -3785,7 +3818,7 @@ bool Renderer::initFSR2Resources() {
        VkPushConstantRange pc{};
        pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
        pc.offset = 0;
-        pc.size = sizeof(glm::mat4) + 2 * sizeof(glm::vec4);  // 96 bytes
+        pc.size = 2 * sizeof(glm::mat4);  // 128 bytes

        VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
        plCI.setLayoutCount = 1;
@ -3929,7 +3962,9 @@ bool Renderer::initFSR2Resources() {
            int inputHistory = 1 - pp;   // Read from the other
            int outputHistory = pp;       // Write to this one

-            VkDescriptorImageInfo colorInfo{fsr2_.linearSampler, fsr2_.sceneColor.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL};
+            // The accumulation shader already performs custom Lanczos reconstruction.
+            // Use nearest here to avoid double filtering (linear + Lanczos) softening.
+            VkDescriptorImageInfo colorInfo{fsr2_.nearestSampler, fsr2_.sceneColor.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL};
            VkDescriptorImageInfo depthInfo{fsr2_.nearestSampler, fsr2_.sceneDepth.imageView, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL};
            VkDescriptorImageInfo mvInfo{fsr2_.nearestSampler, fsr2_.motionVectors.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL};
            VkDescriptorImageInfo histInInfo{fsr2_.linearSampler, fsr2_.history[inputHistory].imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL};
@ -4086,25 +4121,16 @@ void Renderer::dispatchMotionVectors() {
    vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE,
        fsr2_.motionVecPipelineLayout, 0, 1, &fsr2_.motionVecDescSet, 0, nullptr);

-    // Reprojection: prevUnjitteredVP * inv(currentUnjitteredVP)
-    // Using unjittered VPs avoids numerical instability from jitter amplification
-    // through large world coordinates. The shader corrects NDC by subtracting
-    // current jitter before reprojection (depth was rendered at jittered position).
+    // Reprojection with jittered matrices:
+    // reconstruct world position from current depth, then project into previous clip.
    struct {
-        glm::mat4 reprojMatrix;
-        glm::vec4 resolution;
-        glm::vec4 jitterOffset;   // xy = current jitter (NDC), zw = unused
+        glm::mat4 prevViewProjection;
+        glm::mat4 invCurrentViewProj;
    } pc;

-    glm::mat4 currentUnjitteredVP = camera->getUnjitteredViewProjectionMatrix();
-    pc.reprojMatrix = fsr2_.prevViewProjection * glm::inverse(currentUnjitteredVP);
-    glm::vec2 jitter = camera->getJitter();
-    pc.jitterOffset = glm::vec4(jitter.x, jitter.y, 0.0f, 0.0f);
-    pc.resolution = glm::vec4(
-        static_cast<float>(fsr2_.internalWidth),
-        static_cast<float>(fsr2_.internalHeight),
-        1.0f / fsr2_.internalWidth,
-        1.0f / fsr2_.internalHeight);
+    glm::mat4 currentVP = camera->getViewProjectionMatrix();
+    pc.prevViewProjection = fsr2_.prevViewProjection;
+    pc.invCurrentViewProj = glm::inverse(currentVP);

    vkCmdPushConstants(currentCmd, fsr2_.motionVecPipelineLayout,
        VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc);
@ -4173,7 +4199,11 @@ void Renderer::dispatchTemporalAccumulate() {
        1.0f / swapExtent.width, 1.0f / swapExtent.height);
    glm::vec2 jitter = camera->getJitter();
    pc.jitterOffset = glm::vec4(jitter.x, jitter.y, 0.0f, 0.0f);
-    pc.params = glm::vec4(fsr2_.needsHistoryReset ? 1.0f : 0.0f, fsr2_.sharpness, 0.0f, 0.0f);
+    pc.params = glm::vec4(
+        fsr2_.needsHistoryReset ? 1.0f : 0.0f,
+        fsr2_.sharpness,
+        static_cast<float>(fsr2_.convergenceFrame),
+        0.0f);

    vkCmdPushConstants(currentCmd, fsr2_.accumulatePipelineLayout,
        VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc);
@ -4187,6 +4217,7 @@ void Renderer::dispatchTemporalAccumulate() {

 void Renderer::renderFSR2Sharpen() {
    if (!fsr2_.sharpenPipeline || currentCmd == VK_NULL_HANDLE) return;
+    constexpr bool kFsr2TemporalEnabled = false;

    VkExtent2D ext = vkCtx->getSwapchainExtent();
    uint32_t outputIdx = fsr2_.currentHistory;
@ -4198,7 +4229,9 @@ void Renderer::renderFSR2Sharpen() {
    // Update sharpen descriptor to point at current history output
    VkDescriptorImageInfo imgInfo{};
    imgInfo.sampler = fsr2_.linearSampler;
-    imgInfo.imageView = fsr2_.history[outputIdx].imageView;
+    imgInfo.imageView = kFsr2TemporalEnabled
+        ? fsr2_.history[outputIdx].imageView
+        : fsr2_.sceneColor.imageView;
    imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;

    VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};