mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-03-22 23:30:14 +00:00
FSR2: fix motion vector jitter, add bicubic anti-ringing, depth-dilated MVs
- Motion shader: unjitter NDC before reprojection (ndc+jitter, not ndc-jitter), compute motion against unjittered UV so static scenes produce zero motion - Pass jitter offset to motion shader (push constant 80→96 bytes) - Accumulate shader: restore Catmull-Rom bicubic with anti-ringing clamp to prevent negative-lobe halos at edges while maintaining sharpness - Add depth-dilated motion vectors (3x3 nearest-to-camera) to prevent background MVs bleeding over foreground edges - Widen neighborhood clamp gamma to 3.0, uniform 5% blend with disocclusion/velocity reactive boosting
This commit is contained in:
parent
e94eb7f2d1
commit
c3047c33ba
5 changed files with 108 additions and 18 deletions
|
|
@ -29,20 +29,84 @@ vec3 yCoCgToRgb(vec3 ycocg) {
|
||||||
return vec3(y + co - cg, y + cg, y - co - cg);
|
return vec3(y + co - cg, y + cg, y - co - cg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Catmull-Rom bicubic (9 bilinear taps) with anti-ringing clamp.
|
||||||
|
// Sharper than bilinear; anti-ringing prevents edge halos that shift with jitter.
|
||||||
|
vec3 sampleBicubic(sampler2D tex, vec2 uv, vec2 texSize) {
|
||||||
|
vec2 invTexSize = 1.0 / texSize;
|
||||||
|
vec2 iTc = uv * texSize;
|
||||||
|
vec2 tc = floor(iTc - 0.5) + 0.5;
|
||||||
|
vec2 f = iTc - tc;
|
||||||
|
|
||||||
|
// Catmull-Rom weights
|
||||||
|
vec2 w0 = f * (-0.5 + f * (1.0 - 0.5 * f));
|
||||||
|
vec2 w1 = 1.0 + f * f * (-2.5 + 1.5 * f);
|
||||||
|
vec2 w2 = f * (0.5 + f * (2.0 - 1.5 * f));
|
||||||
|
vec2 w3 = f * f * (-0.5 + 0.5 * f);
|
||||||
|
|
||||||
|
vec2 s12 = w1 + w2;
|
||||||
|
vec2 offset12 = w2 / s12;
|
||||||
|
|
||||||
|
vec2 tc0 = (tc - 1.0) * invTexSize;
|
||||||
|
vec2 tc3 = (tc + 2.0) * invTexSize;
|
||||||
|
vec2 tc12 = (tc + offset12) * invTexSize;
|
||||||
|
|
||||||
|
// 3x3 bilinear taps covering 4x4 texel support
|
||||||
|
vec3 result =
|
||||||
|
(texture(tex, vec2(tc0.x, tc0.y)).rgb * w0.x +
|
||||||
|
texture(tex, vec2(tc12.x, tc0.y)).rgb * s12.x +
|
||||||
|
texture(tex, vec2(tc3.x, tc0.y)).rgb * w3.x) * w0.y +
|
||||||
|
(texture(tex, vec2(tc0.x, tc12.y)).rgb * w0.x +
|
||||||
|
texture(tex, vec2(tc12.x, tc12.y)).rgb * s12.x +
|
||||||
|
texture(tex, vec2(tc3.x, tc12.y)).rgb * w3.x) * s12.y +
|
||||||
|
(texture(tex, vec2(tc0.x, tc3.y)).rgb * w0.x +
|
||||||
|
texture(tex, vec2(tc12.x, tc3.y)).rgb * s12.x +
|
||||||
|
texture(tex, vec2(tc3.x, tc3.y)).rgb * w3.x) * w3.y;
|
||||||
|
|
||||||
|
// Anti-ringing: clamp to range of the 4 nearest texels.
|
||||||
|
// Prevents Catmull-Rom negative lobe overshoots at high-contrast edges.
|
||||||
|
vec2 tcNear = tc * invTexSize;
|
||||||
|
vec3 t00 = texture(tex, tcNear).rgb;
|
||||||
|
vec3 t10 = texture(tex, tcNear + vec2(invTexSize.x, 0.0)).rgb;
|
||||||
|
vec3 t01 = texture(tex, tcNear + vec2(0.0, invTexSize.y)).rgb;
|
||||||
|
vec3 t11 = texture(tex, tcNear + invTexSize).rgb;
|
||||||
|
vec3 minC = min(min(t00, t10), min(t01, t11));
|
||||||
|
vec3 maxC = max(max(t00, t10), max(t01, t11));
|
||||||
|
return clamp(result, minC, maxC);
|
||||||
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
ivec2 outPixel = ivec2(gl_GlobalInvocationID.xy);
|
ivec2 outPixel = ivec2(gl_GlobalInvocationID.xy);
|
||||||
ivec2 outSize = ivec2(pc.displaySize.xy);
|
ivec2 outSize = ivec2(pc.displaySize.xy);
|
||||||
if (outPixel.x >= outSize.x || outPixel.y >= outSize.y) return;
|
if (outPixel.x >= outSize.x || outPixel.y >= outSize.y) return;
|
||||||
|
|
||||||
vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw;
|
vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw;
|
||||||
vec3 currentColor = texture(sceneColor, outUV).rgb;
|
|
||||||
|
// Bicubic upsampling with anti-ringing: sharp without edge halos
|
||||||
|
vec3 currentColor = sampleBicubic(sceneColor, outUV, pc.internalSize.xy);
|
||||||
|
|
||||||
if (pc.params.x > 0.5) {
|
if (pc.params.x > 0.5) {
|
||||||
imageStore(historyOutput, outPixel, vec4(currentColor, 1.0));
|
imageStore(historyOutput, outPixel, vec4(currentColor, 1.0));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
vec2 motion = texture(motionVectors, outUV).rg;
|
// Depth-dilated motion vector: pick the MV from the nearest-to-camera
|
||||||
|
// pixel in a 3x3 neighborhood. Prevents background MVs from bleeding
|
||||||
|
// over foreground edges.
|
||||||
|
vec2 texelSize = pc.internalSize.zw;
|
||||||
|
float closestDepth = texture(depthBuffer, outUV).r;
|
||||||
|
vec2 closestOffset = vec2(0.0);
|
||||||
|
for (int y = -1; y <= 1; y++) {
|
||||||
|
for (int x = -1; x <= 1; x++) {
|
||||||
|
vec2 off = vec2(float(x), float(y)) * texelSize;
|
||||||
|
float d = texture(depthBuffer, outUV + off).r;
|
||||||
|
if (d < closestDepth) {
|
||||||
|
closestDepth = d;
|
||||||
|
closestOffset = off;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vec2 motion = texture(motionVectors, outUV + closestOffset).rg;
|
||||||
|
|
||||||
vec2 historyUV = outUV + motion;
|
vec2 historyUV = outUV + motion;
|
||||||
|
|
||||||
float historyValid = (historyUV.x >= 0.0 && historyUV.x <= 1.0 &&
|
float historyValid = (historyUV.x >= 0.0 && historyUV.x <= 1.0 &&
|
||||||
|
|
@ -50,8 +114,9 @@ void main() {
|
||||||
|
|
||||||
vec3 historyColor = texture(historyInput, historyUV).rgb;
|
vec3 historyColor = texture(historyInput, historyUV).rgb;
|
||||||
|
|
||||||
// Neighborhood clamping in YCoCg space
|
// Neighborhood clamping in YCoCg space with wide gamma.
|
||||||
vec2 texelSize = pc.internalSize.zw;
|
// Wide gamma (3.0) prevents jitter-chasing: the clamp box only catches
|
||||||
|
// truly stale history (disocclusion), not normal jitter variation.
|
||||||
vec3 s0 = rgbToYCoCg(currentColor);
|
vec3 s0 = rgbToYCoCg(currentColor);
|
||||||
vec3 s1 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, 0.0)).rgb);
|
vec3 s1 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, 0.0)).rgb);
|
||||||
vec3 s2 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, 0.0)).rgb);
|
vec3 s2 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, 0.0)).rgb);
|
||||||
|
|
@ -68,7 +133,7 @@ void main() {
|
||||||
vec3 variance = max(m2 / 9.0 - mean * mean, vec3(0.0));
|
vec3 variance = max(m2 / 9.0 - mean * mean, vec3(0.0));
|
||||||
vec3 stddev = sqrt(variance);
|
vec3 stddev = sqrt(variance);
|
||||||
|
|
||||||
float gamma = 1.5;
|
float gamma = 3.0;
|
||||||
vec3 boxMin = mean - gamma * stddev;
|
vec3 boxMin = mean - gamma * stddev;
|
||||||
vec3 boxMax = mean + gamma * stddev;
|
vec3 boxMax = mean + gamma * stddev;
|
||||||
|
|
||||||
|
|
@ -77,7 +142,19 @@ void main() {
|
||||||
historyColor = yCoCgToRgb(clampedHistory);
|
historyColor = yCoCgToRgb(clampedHistory);
|
||||||
|
|
||||||
float clampDist = length(historyYCoCg - clampedHistory);
|
float clampDist = length(historyYCoCg - clampedHistory);
|
||||||
float blendFactor = mix(0.05, 0.30, clamp(clampDist * 2.0, 0.0, 1.0));
|
|
||||||
|
// Uniform 5% blend: ~45 frames for 90% convergence.
|
||||||
|
// Simpler than edge-aware; the anti-ringing bicubic handles edge stability.
|
||||||
|
float blendFactor = 0.05;
|
||||||
|
|
||||||
|
// Disocclusion: large clamp distance → rapidly replace stale history
|
||||||
|
blendFactor = mix(blendFactor, 0.60, clamp(clampDist * 5.0, 0.0, 1.0));
|
||||||
|
|
||||||
|
// Velocity: higher blend during motion reduces ghosting
|
||||||
|
float motionMag = length(motion * pc.displaySize.xy);
|
||||||
|
blendFactor = max(blendFactor, clamp(motionMag * 0.15, 0.0, 0.35));
|
||||||
|
|
||||||
|
// Full current frame when history is out of bounds
|
||||||
blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid);
|
blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid);
|
||||||
|
|
||||||
vec3 result = mix(historyColor, currentColor, blendFactor);
|
vec3 result = mix(historyColor, currentColor, blendFactor);
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -8,6 +8,7 @@ layout(set = 0, binding = 1, rg16f) uniform writeonly image2D motionVectors;
|
||||||
layout(push_constant) uniform PushConstants {
|
layout(push_constant) uniform PushConstants {
|
||||||
mat4 reprojMatrix; // prevUnjitteredVP * inverse(currentUnjitteredVP)
|
mat4 reprojMatrix; // prevUnjitteredVP * inverse(currentUnjitteredVP)
|
||||||
vec4 resolution; // xy = internal size, zw = 1/internal size
|
vec4 resolution; // xy = internal size, zw = 1/internal size
|
||||||
|
vec4 jitterOffset; // xy = current jitter (NDC), zw = unused
|
||||||
} pc;
|
} pc;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
|
|
@ -15,21 +16,30 @@ void main() {
|
||||||
ivec2 imgSize = ivec2(pc.resolution.xy);
|
ivec2 imgSize = ivec2(pc.resolution.xy);
|
||||||
if (pixelCoord.x >= imgSize.x || pixelCoord.y >= imgSize.y) return;
|
if (pixelCoord.x >= imgSize.x || pixelCoord.y >= imgSize.y) return;
|
||||||
|
|
||||||
// Sample depth (Vulkan: 0 = near, 1 = far)
|
|
||||||
float depth = texelFetch(depthBuffer, pixelCoord, 0).r;
|
float depth = texelFetch(depthBuffer, pixelCoord, 0).r;
|
||||||
|
|
||||||
// Pixel center in UV [0,1] and NDC [-1,1]
|
// Pixel center UV and NDC
|
||||||
vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw;
|
vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw;
|
||||||
vec2 ndc = uv * 2.0 - 1.0;
|
vec2 ndc = uv * 2.0 - 1.0;
|
||||||
|
|
||||||
// Clip-to-clip reprojection: current unjittered clip → previous unjittered clip
|
// Unjitter the NDC: the scene was rendered with jitter applied to
|
||||||
vec4 clipPos = vec4(ndc, depth, 1.0);
|
// projection[2][0/1]. For RH perspective (P[2][3]=-1, clip.w=-vz):
|
||||||
|
// jittered_ndc = unjittered_ndc - jitter
|
||||||
|
// unjittered_ndc = ndc + jitter
|
||||||
|
vec2 unjitteredNDC = ndc + pc.jitterOffset.xy;
|
||||||
|
|
||||||
|
// Reproject to previous frame via unjittered VP matrices
|
||||||
|
vec4 clipPos = vec4(unjitteredNDC, depth, 1.0);
|
||||||
vec4 prevClip = pc.reprojMatrix * clipPos;
|
vec4 prevClip = pc.reprojMatrix * clipPos;
|
||||||
vec2 prevNdc = prevClip.xy / prevClip.w;
|
vec2 prevNdc = prevClip.xy / prevClip.w;
|
||||||
vec2 prevUV = prevNdc * 0.5 + 0.5;
|
vec2 prevUV = prevNdc * 0.5 + 0.5;
|
||||||
|
|
||||||
// Motion = previous position - current position (both unjittered, in UV space)
|
// Current unjittered UV for this pixel's world content
|
||||||
vec2 motion = prevUV - uv;
|
vec2 currentUnjitteredUV = unjitteredNDC * 0.5 + 0.5;
|
||||||
|
|
||||||
|
// Motion between unjittered positions — jitter-free.
|
||||||
|
// For a static scene (identity reprojMatrix), this is exactly zero.
|
||||||
|
vec2 motion = prevUV - currentUnjitteredUV;
|
||||||
|
|
||||||
imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0));
|
imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -3785,7 +3785,7 @@ bool Renderer::initFSR2Resources() {
|
||||||
VkPushConstantRange pc{};
|
VkPushConstantRange pc{};
|
||||||
pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||||
pc.offset = 0;
|
pc.offset = 0;
|
||||||
pc.size = sizeof(glm::mat4) + sizeof(glm::vec4); // 80 bytes
|
pc.size = sizeof(glm::mat4) + 2 * sizeof(glm::vec4); // 96 bytes
|
||||||
|
|
||||||
VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
|
||||||
plCI.setLayoutCount = 1;
|
plCI.setLayoutCount = 1;
|
||||||
|
|
@ -4086,17 +4086,20 @@ void Renderer::dispatchMotionVectors() {
|
||||||
vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||||
fsr2_.motionVecPipelineLayout, 0, 1, &fsr2_.motionVecDescSet, 0, nullptr);
|
fsr2_.motionVecPipelineLayout, 0, 1, &fsr2_.motionVecDescSet, 0, nullptr);
|
||||||
|
|
||||||
// Single reprojection matrix: prevUnjitteredVP * inv(currentUnjitteredVP)
|
// Reprojection: prevUnjitteredVP * inv(currentUnjitteredVP)
|
||||||
// Both matrices are unjittered — jitter only affects sub-pixel sampling,
|
// Using unjittered VPs avoids numerical instability from jitter amplification
|
||||||
// not motion vector computation. This avoids numerical instability from
|
// through large world coordinates. The shader corrects NDC by subtracting
|
||||||
// jitter amplification through large world coordinates.
|
// current jitter before reprojection (depth was rendered at jittered position).
|
||||||
struct {
|
struct {
|
||||||
glm::mat4 reprojMatrix; // prevUnjitteredVP * inv(currentUnjitteredVP)
|
glm::mat4 reprojMatrix;
|
||||||
glm::vec4 resolution;
|
glm::vec4 resolution;
|
||||||
|
glm::vec4 jitterOffset; // xy = current jitter (NDC), zw = unused
|
||||||
} pc;
|
} pc;
|
||||||
|
|
||||||
glm::mat4 currentUnjitteredVP = camera->getUnjitteredViewProjectionMatrix();
|
glm::mat4 currentUnjitteredVP = camera->getUnjitteredViewProjectionMatrix();
|
||||||
pc.reprojMatrix = fsr2_.prevViewProjection * glm::inverse(currentUnjitteredVP);
|
pc.reprojMatrix = fsr2_.prevViewProjection * glm::inverse(currentUnjitteredVP);
|
||||||
|
glm::vec2 jitter = camera->getJitter();
|
||||||
|
pc.jitterOffset = glm::vec4(jitter.x, jitter.y, 0.0f, 0.0f);
|
||||||
pc.resolution = glm::vec4(
|
pc.resolution = glm::vec4(
|
||||||
static_cast<float>(fsr2_.internalWidth),
|
static_cast<float>(fsr2_.internalWidth),
|
||||||
static_cast<float>(fsr2_.internalHeight),
|
static_cast<float>(fsr2_.internalHeight),
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue