mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-03-22 23:30:14 +00:00
Stabilize FSR2 path and refine temporal pipeline groundwork
This commit is contained in:
parent
a8500a80b5
commit
e2a2316038
7 changed files with 176 additions and 148 deletions
|
|
@ -12,7 +12,7 @@ layout(push_constant) uniform PushConstants {
|
|||
vec4 internalSize; // xy = internal resolution, zw = 1/internal
|
||||
vec4 displaySize; // xy = display resolution, zw = 1/display
|
||||
vec4 jitterOffset; // xy = current jitter (NDC-space), zw = unused
|
||||
vec4 params; // x = resetHistory (1=reset), y = sharpness, zw = unused
|
||||
vec4 params; // x = resetHistory, y = sharpness, z = convergenceFrame, w = unused
|
||||
} pc;
|
||||
|
||||
vec3 tonemap(vec3 c) {
|
||||
|
|
@ -39,45 +39,45 @@ vec3 yCoCgToRgb(vec3 ycocg) {
|
|||
return vec3(y + co - cg, y + cg, y - co - cg);
|
||||
}
|
||||
|
||||
// Catmull-Rom bicubic (9 bilinear taps) with anti-ringing clamp.
|
||||
vec3 sampleBicubic(sampler2D tex, vec2 uv, vec2 texSize) {
|
||||
vec3 clipAABB(vec3 aabbMin, vec3 aabbMax, vec3 history) {
|
||||
vec3 center = 0.5 * (aabbMax + aabbMin);
|
||||
vec3 extents = 0.5 * (aabbMax - aabbMin) + 0.001;
|
||||
vec3 offset = history - center;
|
||||
vec3 absUnits = abs(offset / extents);
|
||||
float maxUnit = max(absUnits.x, max(absUnits.y, absUnits.z));
|
||||
if (maxUnit > 1.0)
|
||||
return center + offset / maxUnit;
|
||||
return history;
|
||||
}
|
||||
|
||||
// Lanczos2 kernel: sharper than bicubic, preserves high-frequency detail
|
||||
float lanczos2(float x) {
|
||||
if (abs(x) < 1e-6) return 1.0;
|
||||
if (abs(x) >= 2.0) return 0.0;
|
||||
float px = 3.14159265 * x;
|
||||
return sin(px) * sin(px * 0.5) / (px * px * 0.5);
|
||||
}
|
||||
|
||||
// Lanczos2 upsampling: sharper than Catmull-Rom bicubic
|
||||
vec3 sampleLanczos(sampler2D tex, vec2 uv, vec2 texSize) {
|
||||
vec2 invTexSize = 1.0 / texSize;
|
||||
vec2 iTc = uv * texSize;
|
||||
vec2 tc = floor(iTc - 0.5) + 0.5;
|
||||
vec2 f = iTc - tc;
|
||||
vec2 texelPos = uv * texSize - 0.5;
|
||||
ivec2 base = ivec2(floor(texelPos));
|
||||
vec2 f = texelPos - vec2(base);
|
||||
|
||||
vec2 w0 = f * (-0.5 + f * (1.0 - 0.5 * f));
|
||||
vec2 w1 = 1.0 + f * f * (-2.5 + 1.5 * f);
|
||||
vec2 w2 = f * (0.5 + f * (2.0 - 1.5 * f));
|
||||
vec2 w3 = f * f * (-0.5 + 0.5 * f);
|
||||
|
||||
vec2 s12 = w1 + w2;
|
||||
vec2 offset12 = w2 / s12;
|
||||
|
||||
vec2 tc0 = (tc - 1.0) * invTexSize;
|
||||
vec2 tc3 = (tc + 2.0) * invTexSize;
|
||||
vec2 tc12 = (tc + offset12) * invTexSize;
|
||||
|
||||
vec3 result =
|
||||
(texture(tex, vec2(tc0.x, tc0.y)).rgb * w0.x +
|
||||
texture(tex, vec2(tc12.x, tc0.y)).rgb * s12.x +
|
||||
texture(tex, vec2(tc3.x, tc0.y)).rgb * w3.x) * w0.y +
|
||||
(texture(tex, vec2(tc0.x, tc12.y)).rgb * w0.x +
|
||||
texture(tex, vec2(tc12.x, tc12.y)).rgb * s12.x +
|
||||
texture(tex, vec2(tc3.x, tc12.y)).rgb * w3.x) * s12.y +
|
||||
(texture(tex, vec2(tc0.x, tc3.y)).rgb * w0.x +
|
||||
texture(tex, vec2(tc12.x, tc3.y)).rgb * s12.x +
|
||||
texture(tex, vec2(tc3.x, tc3.y)).rgb * w3.x) * w3.y;
|
||||
|
||||
// Anti-ringing: clamp to range of the 4 nearest texels
|
||||
vec2 tcNear = tc * invTexSize;
|
||||
vec3 t00 = texture(tex, tcNear).rgb;
|
||||
vec3 t10 = texture(tex, tcNear + vec2(invTexSize.x, 0.0)).rgb;
|
||||
vec3 t01 = texture(tex, tcNear + vec2(0.0, invTexSize.y)).rgb;
|
||||
vec3 t11 = texture(tex, tcNear + invTexSize).rgb;
|
||||
vec3 minC = min(min(t00, t10), min(t01, t11));
|
||||
vec3 maxC = max(max(t00, t10), max(t01, t11));
|
||||
return clamp(result, minC, maxC);
|
||||
vec3 result = vec3(0.0);
|
||||
float totalWeight = 0.0;
|
||||
for (int y = -1; y <= 2; y++) {
|
||||
for (int x = -1; x <= 2; x++) {
|
||||
vec2 samplePos = (vec2(base + ivec2(x, y)) + 0.5) * invTexSize;
|
||||
float wx = lanczos2(float(x) - f.x);
|
||||
float wy = lanczos2(float(y) - f.y);
|
||||
float w = wx * wy;
|
||||
result += texture(tex, samplePos).rgb * w;
|
||||
totalWeight += w;
|
||||
}
|
||||
}
|
||||
return result / totalWeight;
|
||||
}
|
||||
|
||||
void main() {
|
||||
|
|
@ -87,9 +87,12 @@ void main() {
|
|||
|
||||
vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw;
|
||||
|
||||
vec3 currentColor = sampleBicubic(sceneColor, outUV, pc.internalSize.xy);
|
||||
// Lanczos2 upsample: sharper than bicubic, better base image
|
||||
vec3 currentColor = sampleLanczos(sceneColor, outUV, pc.internalSize.xy);
|
||||
|
||||
if (pc.params.x > 0.5) {
|
||||
// Temporal accumulation mode.
|
||||
const bool kUseTemporal = true;
|
||||
if (!kUseTemporal || pc.params.x > 0.5) {
|
||||
imageStore(historyOutput, outPixel, vec4(currentColor, 1.0));
|
||||
return;
|
||||
}
|
||||
|
|
@ -116,65 +119,52 @@ void main() {
|
|||
historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0;
|
||||
vec3 historyColor = texture(historyInput, historyUV).rgb;
|
||||
|
||||
// === Tonemapped accumulation ===
|
||||
// Tonemapped space for blending
|
||||
vec3 tmCurrent = tonemap(currentColor);
|
||||
vec3 tmHistory = tonemap(historyColor);
|
||||
|
||||
// Neighborhood in tonemapped YCoCg
|
||||
// 5-tap cross neighborhood for variance (cheaper than 9-tap, sufficient)
|
||||
vec3 s0 = rgbToYCoCg(tmCurrent);
|
||||
vec3 s1 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, 0.0)).rgb));
|
||||
vec3 s2 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, 0.0)).rgb));
|
||||
vec3 s3 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(0.0, -texelSize.y)).rgb));
|
||||
vec3 s4 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(0.0, texelSize.y)).rgb));
|
||||
vec3 s5 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, -texelSize.y)).rgb));
|
||||
vec3 s6 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, -texelSize.y)).rgb));
|
||||
vec3 s7 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, texelSize.y)).rgb));
|
||||
vec3 s8 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, texelSize.y)).rgb));
|
||||
|
||||
vec3 m1 = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8;
|
||||
vec3 m2 = s0*s0 + s1*s1 + s2*s2 + s3*s3 + s4*s4 + s5*s5 + s6*s6 + s7*s7 + s8*s8;
|
||||
vec3 mean = m1 / 9.0;
|
||||
vec3 variance = max(m2 / 9.0 - mean * mean, vec3(0.0));
|
||||
vec3 m1 = s0 + s1 + s2 + s3 + s4;
|
||||
vec3 m2 = s0*s0 + s1*s1 + s2*s2 + s3*s3 + s4*s4;
|
||||
vec3 mean = m1 / 5.0;
|
||||
vec3 variance = max(m2 / 5.0 - mean * mean, vec3(0.0));
|
||||
vec3 stddev = sqrt(variance);
|
||||
|
||||
float gamma = 1.5;
|
||||
float gamma = 1.25;
|
||||
vec3 boxMin = mean - gamma * stddev;
|
||||
vec3 boxMax = mean + gamma * stddev;
|
||||
|
||||
// Compute clamped history and measure how far it was from the box
|
||||
// Variance clip history
|
||||
vec3 tmHistYCoCg = rgbToYCoCg(tmHistory);
|
||||
vec3 clampedYCoCg = clamp(tmHistYCoCg, boxMin, boxMax);
|
||||
float clampDist = length(tmHistYCoCg - clampedYCoCg);
|
||||
vec3 clippedYCoCg = clipAABB(boxMin, boxMax, tmHistYCoCg);
|
||||
float clipDist = length(tmHistYCoCg - clippedYCoCg);
|
||||
tmHistory = yCoCgToRgb(clippedYCoCg);
|
||||
|
||||
// SELECTIVE CLAMP: only modify history when there's motion or disocclusion.
|
||||
// For static pixels, history is already well-accumulated — clamping it
|
||||
// each frame causes the clamp box (which shifts with jitter) to drag
|
||||
// the history around, creating visible shimmer. By leaving static history
|
||||
// untouched, accumulated anti-aliasing and detail is preserved.
|
||||
float needsClamp = max(
|
||||
clamp(motionMag * 2.0, 0.0, 1.0), // motion → full clamp
|
||||
clamp(clampDist * 3.0, 0.0, 1.0) // disocclusion → full clamp
|
||||
);
|
||||
tmHistory = yCoCgToRgb(mix(tmHistYCoCg, clampedYCoCg, needsClamp));
|
||||
// --- Blend factor ---
|
||||
// Base: always start from current frame (sharp Lanczos).
|
||||
// Temporal blending only at edges with small fixed weight.
|
||||
// This provides AA without blurring smooth areas.
|
||||
|
||||
// Blend: higher for good jitter samples, lower for poor ones.
|
||||
// Jitter-aware weighting: current frame's sample quality depends on
|
||||
// how close the jittered sample fell to this output pixel.
|
||||
vec2 jitterPx = pc.jitterOffset.xy * 0.5 * pc.internalSize.xy;
|
||||
vec2 internalPos = outUV * pc.internalSize.xy;
|
||||
vec2 subPixelOffset = fract(internalPos) - 0.5;
|
||||
vec2 sampleDelta = subPixelOffset - jitterPx;
|
||||
float dist2 = dot(sampleDelta, sampleDelta);
|
||||
float sampleQuality = exp(-dist2 * 3.0);
|
||||
float blendFactor = mix(0.03, 0.20, sampleQuality);
|
||||
// Edge detection: luminance variance in YCoCg
|
||||
float edgeStrength = smoothstep(0.04, 0.12, stddev.x);
|
||||
|
||||
// Disocclusion: aggressively replace stale history
|
||||
blendFactor = mix(blendFactor, 0.80, clamp(clampDist * 5.0, 0.0, 1.0));
|
||||
// Keep temporal reconstruction active continuously instead of freezing after
|
||||
// a small convergence window. Favor history on stable pixels and favor
|
||||
// current color when edge/motion risk is high to avoid blur/ghosting.
|
||||
float motionFactor = smoothstep(0.05, 1.5, motionMag);
|
||||
float currentBase = mix(0.12, 0.30, edgeStrength);
|
||||
float blendFactor = mix(currentBase, 0.85, motionFactor);
|
||||
|
||||
// Velocity: strong response during camera/object motion
|
||||
blendFactor = max(blendFactor, clamp(motionMag * 0.30, 0.0, 0.50));
|
||||
// Disocclusion: replace stale history
|
||||
blendFactor = max(blendFactor, clamp(clipDist * 5.0, 0.0, 0.80));
|
||||
|
||||
// Full current frame when history is out of bounds
|
||||
// Invalid history: use current frame
|
||||
blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid);
|
||||
|
||||
// Blend in tonemapped space, inverse-tonemap back to linear
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -6,45 +6,41 @@ layout(set = 0, binding = 0) uniform sampler2D depthBuffer;
|
|||
layout(set = 0, binding = 1, rg16f) uniform writeonly image2D motionVectors;
|
||||
|
||||
layout(push_constant) uniform PushConstants {
|
||||
mat4 reprojMatrix; // prevUnjitteredVP * inverse(currentUnjitteredVP)
|
||||
vec4 resolution; // xy = internal size, zw = 1/internal size
|
||||
vec4 jitterOffset; // xy = current jitter (NDC), zw = unused
|
||||
mat4 prevViewProjection; // previous jittered VP
|
||||
mat4 invCurrentViewProj; // inverse(current jittered VP)
|
||||
} pc;
|
||||
|
||||
void main() {
|
||||
ivec2 pixelCoord = ivec2(gl_GlobalInvocationID.xy);
|
||||
ivec2 imgSize = ivec2(pc.resolution.xy);
|
||||
ivec2 imgSize = imageSize(motionVectors);
|
||||
if (pixelCoord.x >= imgSize.x || pixelCoord.y >= imgSize.y) return;
|
||||
|
||||
float depth = texelFetch(depthBuffer, pixelCoord, 0).r;
|
||||
|
||||
// Pixel center UV and NDC
|
||||
vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw;
|
||||
vec2 uv = (vec2(pixelCoord) + 0.5) / vec2(imgSize);
|
||||
vec2 ndc = uv * 2.0 - 1.0;
|
||||
|
||||
// Unjitter the NDC: the scene was rendered with jitter applied to
|
||||
// projection[2][0/1]. For RH perspective (P[2][3]=-1, clip.w=-vz):
|
||||
// jittered_ndc = unjittered_ndc - jitter
|
||||
// unjittered_ndc = ndc + jitter
|
||||
vec2 unjitteredNDC = ndc + pc.jitterOffset.xy;
|
||||
// Reconstruct current world position from current frame depth.
|
||||
vec4 clipPos = vec4(ndc, depth, 1.0);
|
||||
vec4 worldPos = pc.invCurrentViewProj * clipPos;
|
||||
if (abs(worldPos.w) < 1e-6) {
|
||||
imageStore(motionVectors, pixelCoord, vec4(0.0, 0.0, 0.0, 0.0));
|
||||
return;
|
||||
}
|
||||
worldPos /= worldPos.w;
|
||||
|
||||
// Reproject to previous frame via unjittered VP matrices
|
||||
vec4 clipPos = vec4(unjitteredNDC, depth, 1.0);
|
||||
vec4 prevClip = pc.reprojMatrix * clipPos;
|
||||
// Project reconstructed world position into previous frame clip space.
|
||||
vec4 prevClip = pc.prevViewProjection * worldPos;
|
||||
if (abs(prevClip.w) < 1e-6) {
|
||||
imageStore(motionVectors, pixelCoord, vec4(0.0, 0.0, 0.0, 0.0));
|
||||
return;
|
||||
}
|
||||
vec2 prevNdc = prevClip.xy / prevClip.w;
|
||||
vec2 prevUV = prevNdc * 0.5 + 0.5;
|
||||
|
||||
// Current unjittered UV for this pixel's world content
|
||||
vec2 currentUnjitteredUV = unjitteredNDC * 0.5 + 0.5;
|
||||
|
||||
// Motion between unjittered positions — jitter-free.
|
||||
// For a static scene (identity reprojMatrix), this is exactly zero.
|
||||
vec2 motion = prevUV - currentUnjitteredUV;
|
||||
|
||||
// Soft dead zone: smoothly fade out sub-pixel noise from float precision
|
||||
// in reprojMatrix (avoids hard spatial discontinuity from step())
|
||||
float motionPx = length(motion * pc.resolution.xy);
|
||||
motion *= smoothstep(0.0, 0.05, motionPx);
|
||||
vec2 currentUV = uv;
|
||||
vec2 motion = prevUV - currentUV;
|
||||
|
||||
imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,17 +34,21 @@ void main() {
|
|||
vec3 range = maxRGB - minRGB;
|
||||
vec3 rcpRange = 1.0 / (range + 0.001);
|
||||
|
||||
// Sharpening amount: inversely proportional to contrast
|
||||
float luma = dot(center, vec3(0.299, 0.587, 0.114));
|
||||
float lumaRange = max(range.r, max(range.g, range.b));
|
||||
float w = clamp(1.0 - lumaRange * 2.0, 0.0, 1.0) * sharpness * 0.25;
|
||||
// AMD FidelityFX RCAS-style weight computation:
|
||||
// Compute per-channel sharpening weight from local contrast
|
||||
vec3 rcpM = 1.0 / (4.0 * range + 0.001);
|
||||
// Weight capped at sharpness, inversely proportional to contrast
|
||||
float w = min(min(rcpM.r, min(rcpM.g, rcpM.b)), sharpness);
|
||||
|
||||
// Apply sharpening via unsharp mask
|
||||
vec3 avg = (north + south + west + east) * 0.25;
|
||||
vec3 sharpened = center + (center - avg) * w;
|
||||
// Apply sharpening: negative lobe on neighbors
|
||||
vec3 sharpened = (center * (1.0 + 4.0 * w) - (north + south + west + east) * w)
|
||||
/ (1.0 + 4.0 * w - 4.0 * w);
|
||||
// Simplified: center + w * (4*center - north - south - west - east)
|
||||
sharpened = center + w * (4.0 * center - north - south - west - east);
|
||||
|
||||
// Clamp to prevent ringing artifacts
|
||||
sharpened = clamp(sharpened, minRGB, maxRGB);
|
||||
// Soft clamp: allow some overshoot for sharpness, prevent extreme ringing
|
||||
vec3 overshoot = 0.1 * (maxRGB - minRGB);
|
||||
sharpened = clamp(sharpened, minRGB - overshoot, maxRGB + overshoot);
|
||||
|
||||
FragColor = vec4(sharpened, 1.0);
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue