From e94eb7f2d1a59fa5cc11e48658a7102ddb68f2a2 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sun, 8 Mar 2026 01:22:15 -0800 Subject: [PATCH] FSR2 temporal upscaling fixes: unjittered reprojection, sharpen Y-flip, MSAA guard, descriptor double-buffering - Motion vectors: single unjittered reprojection matrix (80 bytes) instead of two jittered matrices (160 bytes), eliminating numerical instability from jitter amplification through large world coordinates - Sharpen pass: fix Y-flip for correct UV sampling, double-buffer descriptor sets to avoid race with in-flight command buffers - MSAA: auto-disable when FSR2 enabled, grey out AA setting in UI - Accumulation: variance-based neighborhood clamping in YCoCg space, correct history layout transitions - Frame index: wrap at 256 for stable Halton sequence --- assets/shaders/fsr2_accumulate.comp.glsl | 84 ++++++++--------------- assets/shaders/fsr2_accumulate.comp.spv | Bin 8436 -> 10592 bytes assets/shaders/fsr2_motion.comp.glsl | 21 ++---- assets/shaders/fsr2_motion.comp.spv | Bin 3668 -> 3096 bytes assets/shaders/fsr2_sharpen.frag.glsl | 14 ++-- assets/shaders/fsr2_sharpen.frag.spv | Bin 3984 -> 4152 bytes include/rendering/renderer.hpp | 2 +- src/rendering/renderer.cpp | 72 +++++++++++-------- src/ui/game_screen.cpp | 8 ++- 9 files changed, 95 insertions(+), 106 deletions(-) diff --git a/assets/shaders/fsr2_accumulate.comp.glsl b/assets/shaders/fsr2_accumulate.comp.glsl index a998b52c..7fb0cb27 100644 --- a/assets/shaders/fsr2_accumulate.comp.glsl +++ b/assets/shaders/fsr2_accumulate.comp.glsl @@ -2,25 +2,19 @@ layout(local_size_x = 8, local_size_y = 8) in; -// Inputs (internal resolution) layout(set = 0, binding = 0) uniform sampler2D sceneColor; layout(set = 0, binding = 1) uniform sampler2D depthBuffer; layout(set = 0, binding = 2) uniform sampler2D motionVectors; - -// History (display resolution) layout(set = 0, binding = 3) uniform sampler2D historyInput; - -// Output (display resolution) layout(set = 0, binding = 4, rgba16f) uniform writeonly image2D historyOutput; layout(push_constant) uniform PushConstants { vec4 internalSize; // xy = internal resolution, zw = 1/internal vec4 displaySize; // xy = display resolution, zw = 1/display - vec4 jitterOffset; // xy = current jitter (pixel-space), zw = unused + vec4 jitterOffset; // xy = current jitter (NDC-space), zw = unused vec4 params; // x = resetHistory (1=reset), y = sharpness, zw = unused } pc; -// RGB <-> YCoCg for neighborhood clamping vec3 rgbToYCoCg(vec3 rgb) { float y = 0.25 * rgb.r + 0.5 * rgb.g + 0.25 * rgb.b; float co = 0.5 * rgb.r - 0.5 * rgb.b; @@ -40,76 +34,52 @@ void main() { ivec2 outSize = ivec2(pc.displaySize.xy); if (outPixel.x >= outSize.x || outPixel.y >= outSize.y) return; - // Output UV in display space vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw; + vec3 currentColor = texture(sceneColor, outUV).rgb; - // Map display pixel to internal resolution UV (accounting for jitter) - vec2 internalUV = outUV; - - // Sample current frame color at internal resolution - vec3 currentColor = texture(sceneColor, internalUV).rgb; - - // Sample motion vector at internal resolution - vec2 inUV = outUV; // Approximate — display maps to internal via scale - vec2 motion = texture(motionVectors, inUV).rg; - - // Reproject: where was this pixel in the previous frame's history? - vec2 historyUV = outUV - motion; - - // History reset: on teleport / camera cut, just use current frame if (pc.params.x > 0.5) { imageStore(historyOutput, outPixel, vec4(currentColor, 1.0)); return; } - // Sample reprojected history + vec2 motion = texture(motionVectors, outUV).rg; + vec2 historyUV = outUV + motion; + + float historyValid = (historyUV.x >= 0.0 && historyUV.x <= 1.0 && + historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0; + vec3 historyColor = texture(historyInput, historyUV).rgb; - // Neighborhood clamping in YCoCg space to prevent ghosting - // Sample 3x3 neighborhood from current frame + // Neighborhood clamping in YCoCg space vec2 texelSize = pc.internalSize.zw; - vec3 samples[9]; - int idx = 0; - for (int dy = -1; dy <= 1; dy++) { - for (int dx = -1; dx <= 1; dx++) { - samples[idx] = rgbToYCoCg(texture(sceneColor, internalUV + vec2(dx, dy) * texelSize).rgb); - idx++; - } - } + vec3 s0 = rgbToYCoCg(currentColor); + vec3 s1 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, 0.0)).rgb); + vec3 s2 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, 0.0)).rgb); + vec3 s3 = rgbToYCoCg(texture(sceneColor, outUV + vec2(0.0, -texelSize.y)).rgb); + vec3 s4 = rgbToYCoCg(texture(sceneColor, outUV + vec2(0.0, texelSize.y)).rgb); + vec3 s5 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, -texelSize.y)).rgb); + vec3 s6 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, -texelSize.y)).rgb); + vec3 s7 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, texelSize.y)).rgb); + vec3 s8 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, texelSize.y)).rgb); - // Compute AABB in YCoCg - vec3 boxMin = samples[0]; - vec3 boxMax = samples[0]; - for (int i = 1; i < 9; i++) { - boxMin = min(boxMin, samples[i]); - boxMax = max(boxMax, samples[i]); - } + vec3 m1 = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8; + vec3 m2 = s0*s0 + s1*s1 + s2*s2 + s3*s3 + s4*s4 + s5*s5 + s6*s6 + s7*s7 + s8*s8; + vec3 mean = m1 / 9.0; + vec3 variance = max(m2 / 9.0 - mean * mean, vec3(0.0)); + vec3 stddev = sqrt(variance); - // Slightly expand the box to reduce flickering on edges - vec3 boxCenter = (boxMin + boxMax) * 0.5; - vec3 boxExtent = (boxMax - boxMin) * 0.5; - boxMin = boxCenter - boxExtent * 1.25; - boxMax = boxCenter + boxExtent * 1.25; + float gamma = 1.5; + vec3 boxMin = mean - gamma * stddev; + vec3 boxMax = mean + gamma * stddev; - // Clamp history to the neighborhood AABB vec3 historyYCoCg = rgbToYCoCg(historyColor); vec3 clampedHistory = clamp(historyYCoCg, boxMin, boxMax); historyColor = yCoCgToRgb(clampedHistory); - // Check if history UV is valid (within [0,1]) - float historyValid = (historyUV.x >= 0.0 && historyUV.x <= 1.0 && - historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0; - - // Blend factor: use more current frame for disoccluded regions - // Luminance difference between clamped history and original → confidence float clampDist = length(historyYCoCg - clampedHistory); - float blendFactor = mix(0.05, 0.3, clamp(clampDist * 4.0, 0.0, 1.0)); - - // If history is off-screen, use current frame entirely + float blendFactor = mix(0.05, 0.30, clamp(clampDist * 2.0, 0.0, 1.0)); blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid); - // Final blend vec3 result = mix(historyColor, currentColor, blendFactor); - imageStore(historyOutput, outPixel, vec4(result, 1.0)); } diff --git a/assets/shaders/fsr2_accumulate.comp.spv b/assets/shaders/fsr2_accumulate.comp.spv index 4d31fba7cc189364681e035d9dddc1e48c3845d2..47529d75ae3b2f227713851a616bea13ce070b00 100644 GIT binary patch literal 10592 zcmZvh2b>*M6~-so>=GbA=pb!FP*9LwMFJtf5(p*~ks^ebw{JHOclWKkZ({-|BuG&d z8%Pl_f?#3+i4Bnwu!03tQ0$`E3wE*l|IfQ8xz5jF&+mTUcg{Wco-=o5vP;+GnHvl@7RU)(b0wD?PWuag+tAe#=`#QSfk!< zE@>DyHH&MG)Q2Ye^MGz5^7Xq}dt=rZ8(ef+>&!W=IfHvF893l@_SwaXr4H_)j-ptm z`?S_6gNq_&3Y|c&x9Wq8&asAN3D0D#w>tWsVuFmBmBq}?VvboC(_NeeZcVfoG?zAp zfXtg(_G)lwVc$?|QEh1M$dXpQ)^4^&<~q;17Io&F+Pt6v?3ns=F&uNW}w`(Kq zad=O$L8$|a=199SHc}fZCzQU<9DCp293LI3Eh}9y$9r*pv(2RW0|VoYHk5qbWj^on zXl<-E%p%q}Ag_1!wE|2nFF&2peO<*^pBNi!jI`&phFW7#^zPyy=}%G{5kw;SA`)IjTg0&Sd=?;f87mpx(B_msZ!j5 zTk`SoDY^b$xZesjZ3cY9gw^%i$C$Gx++15=MRCcjt>IgOdZk?YWAZ(dzK>K zxnSeX=lnXOd-h)L4Dwv4<>tCRboNTWdF2C1vkvz-h{ZaK!RC#9H^I&~?^dWj*ye39 zT2H6$DaG={oxeRZ-o$7g^X6CZ9jN@xIE*vj)hS5Leab!GUeWKjcWB97_gbtAbN)LS z-KT51{!IzTdeHK+uBh$iq#5g)a-RvWSo7ABU2fR`(4ht-{zeAZO*yh<>1~QzstexuixdI`(4ht z-{qY9J&+`c16G1v23 zV{YGDug26PW(@4SJZ2oOeiwOs=e9BDsjniB+61QlSJr(Equ1Z@sq;^(QuCP>$68Cl z?#FjR>}MHRJz_2ZyPrJfLb!VD=OVE4)c?jBuJ2lof2L>d@3olw^POV6`*5!pGkUpK zV_t`;IbZxQx+_+5kKVsaG56hqdp};Ev;}0BUxsa$Yd75o=Nqs8 zR;%K31{{_voWZe}EY8(Z?_~_vbzO1f!StNZ-dX zHSdu)a^C~4=Kds_`h6+)yT?=-9OF6t6xem_t?{nmKJI7qav#RrhpD*_aqQy( zu>05^vrnFf2Ql@iac$D9(S1Z7$Job1VApYPYcbbwAD?FQav#P#jH$T~aqQzW;Oag; zi>4m^_#9X*`Vn~?V;`RfyN-J^-ZfTJm34gq)9-w1@t&_?RF8A{Mew7EdxpM*xvw~v zUj`eaJ{dDlyaT=h)~|VaE{$=WM;P^6Z*Z}r8=Gt|}uVL!(F8g}Y)Xmd{nd>n| zea`cF{swjxrtitb4`UsD-^4b?)b&4+w5a!6@cf}td`gGOSrl8MNPi~yPm#kO}|0YuX*L?xvtQODcG;p;Ko1m%3oK3-M(U;A@=24HnOb5FkebJZA!R}dKwJ%$u>DRm*jInNi zf6^a)*$Ql5)b;!OmRj^>8*tv2ZQ*%ewu4vuvOSu5^x=6aCO`8#ps7b6b_AA!zDR_n~0tsYl(1 zfn85u)cq2$d(u~}`=x05H7|##+u!r`N8Lw&ty|r?_hVGEZok(@f}daveiYpAnS0Ye z8*Yqx#2gLw`w%g6;Kr#(%rW3+vzTMy#;HflabVw9k*5!CoO;9@5AMMtW-iz`^@uqE z?75DZ6T!x*N6b91_dH@w0vo3uF)st#pNN?cHcs8%`8N}_h(8&e_wJO${r%70oeDQb zJ?5MSw!gt&o_NeT9d3+z?Bf++bLoqDXMnv2!Ou)Q@|*=XMm_SJ4K|m)nD5F;ifxVZ(Yl%mmMQ~%(BTpS{E`2etAME`O-bg(148V<1k3555 zbLoqDi@~0W;LXG%&-rj;)FaQUz~<5y^M=5lo8ZHVN1hS5G3t@01vZzym^TV8pS|?l zy*ly8GX^(CJ@Sl$&85%tw}4SCa!#mY3;1or^Y$9>lFY75=Wr>SvHH!^W>kwj7l6$j zc`gM1zdRSA8>>I&z7}l0;IB(O>be+ijQXW%KbL^brEeNLcke5}{1kg&`j=zwbs3|+ z$p3n<+O$;jWng}aT`+ylxs*{|d^zJ4Sj>L|IQHxBXm5lY6aK5f&c8bC^-W+k<6{4B z2AfBpYh1~w7CGJm&ewP=JYVB&@O+K8!_|z7d;bowdGxu))r@Mf#yi3Oj;HThusq)R z*MWWKtLwjpQ7z`)09FfrBiI^(zYFYo)6(CXZUXbe|EJ(@L;&NBG57V1`eW`bU^RU= zGsgF|O6hT6TF6b*5O)r z!Og86`9BQir!c?s^&1!Y^+%u9fbCuIkHD`=-25MfyM8>+9|Nm}#eMxa8b9Tl<}{D7 zu6Z}3e%H8(m`{LjNo${ZW9G?!KiVegah|q?Z-J@Dyyt!RnD~2G~9HVv%cCu=(_PPpw@o^6mz%*7kff^~n1IuzOUGdAozF`|-W49`jxZ zcAk2i`xk-Dr_cS^d$q{>VsLdoJ`d`VcOURxn0n0H7wo*e*ZaZMW8VH?_oKc%_4)v? z`SeAu1HtCXdwmdGJ#rlkw%6)W+aX}{>9aP^ky_lZL&5ex_+en{5B?IcbK;pj9PAw9 z^!=N2+|kyb`AyNi@-x2)+%vx!W1R6>;QY+buJB_Lj~*NeH&1@%kAkaPoB8Ase>B+m zy#I6H>c+dCJmQZ98=v?1IJmm;?nfTI=_~d1{CK!Im#6zU7i^4r%sByUp140Jf{jy; zn0a7p34RjTIg$5eU}MxHW+sm@OEX!O=?fbp&9_D`DWzOgP&hMQ6Ip@FJ<))=`}4G50k((r@F0uW&8Z$YTx{$jy}wQDM>>bXUvYoG@%=j_1(p?j`NYJ z@N(0TiJ2JRz`yb6tr(5T_DNfNS6fF%Tl=!E-oe4H;p(PRp{rCZ7rOe2LxtXIaYMnl ziMhC9xwkaZmk($nB459W+LNd;G|;oEa!zZdb>M&v{YNgK&ju={Jh+*8B*`Y-t14#> z^h8V}D*@kI=^a2j#yM=t@LY6nrN$pDCdinDxtPVdn5Eldnv%8P%1E`dxUo_I<_S!BY38)D>ggi;Id?26goJ4;QMKnkorE!TD1?A08L%8>K6?)B((#bKCDSuf4Wtz%rrNOf?;N!ia`V|=ck z=H%`%enq(!p60$6zFJ_jvc+@_9)v6QZN%S@Yz6mmFvO3FH;NmRC&0tq>jz7P;naQ# zJ6%V#{-@DBm5rx)w`f0;+1=^*hU8i7Vj9Z{VdIExWep21pmW}##x2`eg-yBUWDmG( z9nupIo#vheKUTXr*$cb3M016{<+Ww2#qCoUxBpna)!n6{$1bhCFvc%sbjG#9^^^+b zzLUG%nzU{Q+)!b7q(qQCUx3|L7_6>aGSc7AOxl>Y&k66-S;%r?(js8*F|XhlnsQTf zoaffKmK@iRaXzHl*c0;^XD!d%-iTM&b82R4d%p61ZT&IcGnMyi>u*G?r9Csnjp=#H z$9leuwZC-U<7)Gcc-JN$uWdXk*_jUPVGg4mFWwc@jBY`8qXQn`sja^S+vlyEG6#FR z49n|x9b?SF*yh>^Ns`SAcfsBn)S}6yUwe9L)okNu)a*s!cP`_QuxEqqHS*5`lmCA6 zpABk3jJN+aXgTBU?;N!K$?HECZ4WWO^T3XkUw~iRvlRI*02^;U$M>S`*}a+#@?6N} z#k#y|(zViWUhV#ja~}3MfJB{jVDm=b-m|W?d8fnrGBhu*i*!DCM?AN#)@BaM|v9DR!KSFyZ!I=7|Rk3ZIaV)&O~U$le0&`m)6-=qjCjZU4h-AzzT?97oIY4*Z>_T*&DrjO@46Uo{-^3}-*tKaGdbIO&*p5$`@YM^ z`>xB|zUT6`@3*||`z>$#UJH9F`F*eD?S*x=@3QdQkMA+<6WKq{Ko{b(rUTaJ%!P=Y z;zf9J39&xAx{(W!g`5QUU3)R7p@_Z+=|=_-bN8ZOkI0*Q9lDfp@?PdHXSvsd^_$x> zq-}23bP%zg&unx1+<5~cA2EJQI4&PEj4gi~d3<(N5y#2jL>{>jME(Wpz7g&9GCuhY zh@59$9JMxr?Z1QkC=fE<^0Qg6)31E#o@Lu6;Q&Ba_jLdpovq`kb44{SKsl zuiu&RvDfdymQ%(f4antL&Ub^&YmEDSDO%1x?c+U&oIaly+WLGyydUx15V0Qs8?%P> z^2~h@F-G3H*B~7{^*r+*Li8(Modbk#>9X)&md|hVS??(}1qKA)x zjgcSgp>_{^98SODWe?Whg4P%HZvb0A>`#D=xhz}5Pl9hi-Hi6~Td41@@M6%*!mSOdo;$iz6-5C?uYxquJwJ0wLOPlL*(Orcp&5C&Ex%G zu6xlBWcWd}IpnvZA3}UCJdAez)z6dv8|X(6Fa6&{>sMRnKaQBovEmr}6R>Nb z?f(4~xjM7mzn>xA>kUJ3fNpRk5|FYLq5j529}>p26K3LJU{y)=F=Z*q9ewQ13T}iH6A-(s{z|u z@-e0nEFZO+!2Bf#Am-B_>#5H*a=o0VI%+lJvsUc?1Z+9iJ@!B@zXz^Y^sybjSSRPa zJ+^Zh&F+y&*v80Tm2s27@-wm;es{}7&MDwn4|DE_Z4KY2+EcNOk&m3y!19sPZ+E%K zxf9qrVebq!CdTgqmh*R8)Y%p6IDN5ia*<;lkmwq*54y?F}4LP7xo-*esA{1c3mfD-&k|8`QtZhk~r3QW6bSu z8U2xKU$C6MebCy`*M8tBGTlqLIi@^D_coMef zC+@AqU}NMbXL*-^`Ae1{`pxTaWcgUrljRZL`S!9LEZ2s3uH{yM`Ag5WetWdn@Sl?T z=ff=pFGCz-y;ijTsCOz@F6?%&b9ElpIt^@Y`N)4dn7_pQj@NHoK{tXJT8wlWIQuR)Xb1;$B__$6s30oaQmsnjL8U*0_b3v%t5awb!6mW3SC@Yn+Yk znE1|L1D5j&?wpL14sLD6`Mngs2hN3ayz%;73x5yTgLU=$d*C?4|HHDr&p5B`kYnlJ z@2c$)&-!Gv|9>?VJqZHk$K~(s zJ+S3t+$^x~9{JJiZ0rd(pT5Yo7ua00k?5fXEFZb%fbF3LiClYw&8N>jb?$PJcOP*5 z-2Cp4kG%VV?NL6)?GLW+$8QMv7m$MD(-*mp0-Gzp)<3fxPT;s;h{1kYv{F&bY+cUo-I?nh)aQ@6M zuCteBcC5i7Z1d#L{7KmI&dq$<5x)d%e182~vE_}oo_5490~?=TbN$r-O}=kC+Z{eoQB}y+oZe fz{beOm@~ooURGioCm;L03T%(o)bD=F`HcHNoe)lz diff --git a/assets/shaders/fsr2_motion.comp.glsl b/assets/shaders/fsr2_motion.comp.glsl index f4f68c2c..b0b39375 100644 --- a/assets/shaders/fsr2_motion.comp.glsl +++ b/assets/shaders/fsr2_motion.comp.glsl @@ -6,10 +6,8 @@ layout(set = 0, binding = 0) uniform sampler2D depthBuffer; layout(set = 0, binding = 1, rg16f) uniform writeonly image2D motionVectors; layout(push_constant) uniform PushConstants { - mat4 invViewProj; // Inverse of current jittered VP - mat4 prevViewProj; // Previous frame unjittered VP + mat4 reprojMatrix; // prevUnjitteredVP * inverse(currentUnjitteredVP) vec4 resolution; // xy = internal size, zw = 1/internal size - vec4 jitterOffset; // xy = current jitter (NDC), zw = previous jitter } pc; void main() { @@ -20,25 +18,18 @@ void main() { // Sample depth (Vulkan: 0 = near, 1 = far) float depth = texelFetch(depthBuffer, pixelCoord, 0).r; - // Pixel center in NDC [-1, 1] + // Pixel center in UV [0,1] and NDC [-1,1] vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw; vec2 ndc = uv * 2.0 - 1.0; - // Reconstruct world position from depth + // Clip-to-clip reprojection: current unjittered clip → previous unjittered clip vec4 clipPos = vec4(ndc, depth, 1.0); - vec4 worldPos = pc.invViewProj * clipPos; - worldPos /= worldPos.w; - - // Project into previous frame's clip space (unjittered) - vec4 prevClip = pc.prevViewProj * worldPos; + vec4 prevClip = pc.reprojMatrix * clipPos; vec2 prevNdc = prevClip.xy / prevClip.w; vec2 prevUV = prevNdc * 0.5 + 0.5; - // Remove jitter from current UV to get unjittered position - vec2 unjitteredUV = uv - pc.jitterOffset.xy * 0.5; - - // Motion = previous position - current unjittered position (in UV space) - vec2 motion = prevUV - unjitteredUV; + // Motion = previous position - current position (both unjittered, in UV space) + vec2 motion = prevUV - uv; imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0)); } diff --git a/assets/shaders/fsr2_motion.comp.spv b/assets/shaders/fsr2_motion.comp.spv index 813c4b9dbf213b51b98f167c1ceceac241558285..faa3d8362634407aa4109d6e924888330bdb0a92 100644 GIT binary patch delta 793 zcmYjPO-lk%6usm4k%<&!kG;E`8Th(l# zWmj&f`0+5N0k+Y!YVB6Vx^mvNq7(5H!YTs`=#la zp3^DcpJfd39Vf1*l1;3Z0geou$d>?aoDtpSQ5EYukNRi8>$=Q31#e2zhA()BOGwax zs05|qeEVWq8;qj?Y{HhD)*K1&8O+x%8}Ui3d%y&+f&+&z_m$Rj4F7994VDMcLf4U; zpZ%Yk!*n3#9c*EP2tozm1UNv_0_LL9sB>sQ8vR+vTn3oSPhmX|XaLl6$YK4N(x^Wv z?Hp_RGPaE-<0f!Y8+$j7HewkWDYLu ZdN+X=lNsb46D4fI4Y^4d_(91A@C#%-RnhyXj{iLAkMfK(Fy($v%*EnIfAl|Jl0f^*YLu2; z6KE9jZTZA!%(&TgXH&VQcVetv;kv5e9DzmEr0hqJ`FukDP&gr6(H_oNXQ_j>V_(869@@^1@_>AdUvzc80 diff --git a/assets/shaders/fsr2_sharpen.frag.glsl b/assets/shaders/fsr2_sharpen.frag.glsl index b4dd928b..2c649d22 100644 --- a/assets/shaders/fsr2_sharpen.frag.glsl +++ b/assets/shaders/fsr2_sharpen.frag.glsl @@ -10,16 +10,20 @@ layout(push_constant) uniform PushConstants { } pc; void main() { + // Undo the vertex shader Y flip (postprocess.vert flips for Vulkan overlay, + // but we need standard UV coords for texture sampling) + vec2 tc = vec2(TexCoord.x, 1.0 - TexCoord.y); + vec2 texelSize = pc.params.xy; float sharpness = pc.params.z; // RCAS: Robust Contrast-Adaptive Sharpening // 5-tap cross pattern - vec3 center = texture(inputImage, TexCoord).rgb; - vec3 north = texture(inputImage, TexCoord + vec2(0.0, -texelSize.y)).rgb; - vec3 south = texture(inputImage, TexCoord + vec2(0.0, texelSize.y)).rgb; - vec3 west = texture(inputImage, TexCoord + vec2(-texelSize.x, 0.0)).rgb; - vec3 east = texture(inputImage, TexCoord + vec2( texelSize.x, 0.0)).rgb; + vec3 center = texture(inputImage, tc).rgb; + vec3 north = texture(inputImage, tc + vec2(0.0, -texelSize.y)).rgb; + vec3 south = texture(inputImage, tc + vec2(0.0, texelSize.y)).rgb; + vec3 west = texture(inputImage, tc + vec2(-texelSize.x, 0.0)).rgb; + vec3 east = texture(inputImage, tc + vec2( texelSize.x, 0.0)).rgb; // Compute local contrast (min/max of neighborhood) vec3 minRGB = min(center, min(min(north, south), min(west, east))); diff --git a/assets/shaders/fsr2_sharpen.frag.spv b/assets/shaders/fsr2_sharpen.frag.spv index 99aba03a78ee43bcb0840210d120f2610bb1845b..f9d2394cd951d6e28aefe6d1a8dc337853427c6d 100644 GIT binary patch literal 4152 zcmZ9N`F2!A5XLWrL{=407I6v29mEX~K>`7BT%rk!8!ll?#xR=9#F+^MT!M&bL#Z>eN|mm-M9Nr+w`srvTRy5C!3f3k+q)r*>sc) zt)n>>dUy7&syBvKtzL7h5f^4{%|K%=$}Y%eFlwX4az)1+>>PG6aq1KRwUIT;{$$dO zY<5=Y9Vqnm6?(S~433Qr)EfszN&_S1N@-xITq_MW%KJ*j%}nCTmBEqmp){bKh_r4y zHD*zxF_>lT{On^_X<}=&S{nj3wZ-~N6Qz-z<%5(!>&TWe_K(;1Zmm}8jbf!yhj-vn z_ZchJild0;UnbAy)@sAjLuhIO6C=e`mLWn!Iv>}B@W}9e{&wB=6dA5U!T~kwJmy{ zd*G`J4zCaYOYozMPIU)=8Gd%#qUPS(%PZJl6Mt`9NoV%?D&`#H^u@drV4lsvKWNg) zZu|TR?3&#-`;?!{wY8jC_sjR?W86O|?Tp6GWOUEnr{I2Hg8L4-67HV*eF(qYZ$Zkt zr|`lQ?zbR~_gj#1?_~iF{U0d3&F;CHzMW|u=-*0IOkH#yQJQaX3a&Ix%74OHfH6Tc|HGEVcr?mbB^y;uFrFR3+5S@TgyL7dwh)k z?BzDh81+6*k7xFFOkMMthI!`9?^)3w`B#I}{A=Lmcm2N0J2BUh?(4c-Ka{_VyWncE zr|ZGqL-p74eck{zx4!88Zm{+BMeiHI>Y7j5yXRVe^u7t4_P!aO_TB@Z+WQtX_2_jg z*k0A6*L%R5F@4eNHn8>dMX&dQ)is~ASMQwu==DBu+Ux!BwATmV_L`pY2jS|m&%I!^ z==~wEn*B%b4};CEFM96-TVG%F-rvOX)|2*LK++$*?*OO0?}Vql?}AV5{Sh?v==D*s zTH5PlaC7O4ULOZrPha%<1Xx}3NqgN6*Ke<${UdZu;HT7xhZubS=-p)3pr4 z)3xk{+mGuqR)6FwgVVJ<4^P+f0zBp#t3Uc3Nqp8YHhiOr&wh<{E&gZdi~3ctTJSNj z>-F1gzb|6Os5`%sYY|_EtM%vq%j4iiVxE8V?8A)JAF=zv^AkIk$4;Q>bB;A@jA~Ky zAlUu?i1-f1LzvpDnDGa4tq1?r;MXvFoQIjiIj>{t5px)9O!_^21Fjx1NAj3!7$eV{ zV0HK1yhpKkU(VMbzGI2c`o`MtB%{8_`4(91NWQ+e!D=V4_|JR??B3~{%m0hD-^HxG z0@LrEHYW1E2kycm@B3iqsE76;Sp5Uc-i|Y>nZx(^5tg3ykKvy6c+a2YG3qhrQ?O?| zVm<>KryenZT=eUR8 zV6Hi8o&}%5)T6g=!Pe5}9-U@Xi@M)|)q;Ny-ktCtz(bht%{~1Qi}&VSWQw5L>_>FGW6FcDk5hA5DjZQdrYQ8SkFGa-(aB0pj z7Q&#!8uq(Wb2R8Be&+$PkdxkJ{j_4FDXUx)Z+&1e|@ zX3h8;W&7gV(1>l(O#e{tv6#~jXszN~HFwDBWCvruO_;&@?V5?Te7l9M9@e)-ar&dd zdU(6cYQ0UE_0&hLdt=O5$$y7t@}cihB=R#<2Rkh0V`-24oD2KO3>7WhVEoSVL#f6aBhP?hfCe-jDv?BePue zKKbaY+hZSVxL-IvdUe07Gv@4dkBq)Nqxz`jUKugyV|p?Ahy48AAm|Xa*)?>o#htFz#QWza$ zxV83*D)G}FuKLOD@;tg%wpaXM(5tidr@ZZ#d`eaTXKBt zPb~E=YF?6A&i94UF2(2jKp1T$vg(;H33GS&TJ-;gy+4$(H{UXTW@0SwW#Lwt<-H=z z8g$b>5=Or&qqb$uXyo7>KbCbl{1ah%-QM|{Ffr)1=FJff#&2tH2$Opv?u}oC(HN%v lCX7Z-(|(V&dd=hN)n?6q$XHK2etLyH*EQq&PZ^HL{s%84D--|# diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index 13f77fe2..0058fbdd 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -408,7 +408,7 @@ private: VkPipelineLayout sharpenPipelineLayout = VK_NULL_HANDLE; VkDescriptorSetLayout sharpenDescSetLayout = VK_NULL_HANDLE; VkDescriptorPool sharpenDescPool = VK_NULL_HANDLE; - VkDescriptorSet sharpenDescSet = VK_NULL_HANDLE; + VkDescriptorSet sharpenDescSets[2] = {}; // Previous frame state for motion vector reprojection glm::mat4 prevViewProjection = glm::mat4(1.0f); diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 81686219..063bae9a 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -876,6 +876,9 @@ bool Renderer::isWaterRefractionEnabled() const { void Renderer::setMsaaSamples(VkSampleCountFlagBits samples) { if (!vkCtx) return; + // FSR2 requires non-MSAA render pass — block MSAA changes while FSR2 is active + if (fsr2_.enabled && samples > VK_SAMPLE_COUNT_1_BIT) return; + // Clamp to device maximum VkSampleCountFlagBits maxSamples = vkCtx->getMaxUsableSampleCount(); if (samples > maxSamples) samples = maxSamples; @@ -1178,7 +1181,7 @@ void Renderer::endFrame() { fsr2_.prevJitter = camera->getJitter(); camera->clearJitter(); fsr2_.currentHistory = 1 - fsr2_.currentHistory; - fsr2_.frameIndex++; + fsr2_.frameIndex = (fsr2_.frameIndex + 1) % 256; // Wrap to keep Halton values well-distributed } else if (fsr_.enabled && fsr_.sceneFramebuffer) { // End the off-screen scene render pass @@ -3782,7 +3785,7 @@ bool Renderer::initFSR2Resources() { VkPushConstantRange pc{}; pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; pc.offset = 0; - pc.size = 2 * sizeof(glm::mat4) + 2 * sizeof(glm::vec4); // 160 bytes + pc.size = sizeof(glm::mat4) + sizeof(glm::vec4); // 80 bytes VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; plCI.setLayoutCount = 1; @@ -4005,20 +4008,21 @@ bool Renderer::initFSR2Resources() { return false; } - // Descriptor pool + set for sharpen pass (reads from history output) + // Descriptor pool + sets for sharpen pass (double-buffered to avoid race condition) VkDescriptorPoolSize poolSize{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2}; VkDescriptorPoolCreateInfo poolInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; - poolInfo.maxSets = 1; + poolInfo.maxSets = 2; poolInfo.poolSizeCount = 1; poolInfo.pPoolSizes = &poolSize; vkCreateDescriptorPool(device, &poolInfo, nullptr, &fsr2_.sharpenDescPool); + VkDescriptorSetLayout layouts[2] = {fsr2_.sharpenDescSetLayout, fsr2_.sharpenDescSetLayout}; VkDescriptorSetAllocateInfo dsAI{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; dsAI.descriptorPool = fsr2_.sharpenDescPool; - dsAI.descriptorSetCount = 1; - dsAI.pSetLayouts = &fsr2_.sharpenDescSetLayout; - vkAllocateDescriptorSets(device, &dsAI, &fsr2_.sharpenDescSet); - // Descriptor updated dynamically each frame to point at the correct history buffer + dsAI.descriptorSetCount = 2; + dsAI.pSetLayouts = layouts; + vkAllocateDescriptorSets(device, &dsAI, fsr2_.sharpenDescSets); + // Descriptors updated dynamically each frame to point at the correct history buffer } fsr2_.needsHistoryReset = true; @@ -4036,7 +4040,7 @@ void Renderer::destroyFSR2Resources() { if (fsr2_.sharpenPipeline) { vkDestroyPipeline(device, fsr2_.sharpenPipeline, nullptr); fsr2_.sharpenPipeline = VK_NULL_HANDLE; } if (fsr2_.sharpenPipelineLayout) { vkDestroyPipelineLayout(device, fsr2_.sharpenPipelineLayout, nullptr); fsr2_.sharpenPipelineLayout = VK_NULL_HANDLE; } - if (fsr2_.sharpenDescPool) { vkDestroyDescriptorPool(device, fsr2_.sharpenDescPool, nullptr); fsr2_.sharpenDescPool = VK_NULL_HANDLE; fsr2_.sharpenDescSet = VK_NULL_HANDLE; } + if (fsr2_.sharpenDescPool) { vkDestroyDescriptorPool(device, fsr2_.sharpenDescPool, nullptr); fsr2_.sharpenDescPool = VK_NULL_HANDLE; fsr2_.sharpenDescSets[0] = fsr2_.sharpenDescSets[1] = VK_NULL_HANDLE; } if (fsr2_.sharpenDescSetLayout) { vkDestroyDescriptorSetLayout(device, fsr2_.sharpenDescSetLayout, nullptr); fsr2_.sharpenDescSetLayout = VK_NULL_HANDLE; } if (fsr2_.accumulatePipeline) { vkDestroyPipeline(device, fsr2_.accumulatePipeline, nullptr); fsr2_.accumulatePipeline = VK_NULL_HANDLE; } @@ -4082,24 +4086,22 @@ void Renderer::dispatchMotionVectors() { vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.motionVecPipelineLayout, 0, 1, &fsr2_.motionVecDescSet, 0, nullptr); - // Push constants: invViewProj, prevViewProj, resolution, jitterOffset + // Single reprojection matrix: prevUnjitteredVP * inv(currentUnjitteredVP) + // Both matrices are unjittered — jitter only affects sub-pixel sampling, + // not motion vector computation. This avoids numerical instability from + // jitter amplification through large world coordinates. struct { - glm::mat4 invViewProj; - glm::mat4 prevViewProj; + glm::mat4 reprojMatrix; // prevUnjitteredVP * inv(currentUnjitteredVP) glm::vec4 resolution; - glm::vec4 jitterOffset; } pc; - glm::mat4 currentVP = camera->getProjectionMatrix() * camera->getViewMatrix(); - pc.invViewProj = glm::inverse(currentVP); - pc.prevViewProj = fsr2_.prevViewProjection; + glm::mat4 currentUnjitteredVP = camera->getUnjitteredViewProjectionMatrix(); + pc.reprojMatrix = fsr2_.prevViewProjection * glm::inverse(currentUnjitteredVP); pc.resolution = glm::vec4( static_cast(fsr2_.internalWidth), static_cast(fsr2_.internalHeight), 1.0f / fsr2_.internalWidth, 1.0f / fsr2_.internalHeight); - glm::vec2 jitter = camera->getJitter(); - pc.jitterOffset = glm::vec4(jitter.x, jitter.y, fsr2_.prevJitter.x, fsr2_.prevJitter.y); vkCmdPushConstants(currentCmd, fsr2_.motionVecPipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pc), &pc); @@ -4128,17 +4130,24 @@ void Renderer::dispatchTemporalAccumulate() { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); - // Transition history input: GENERAL/UNDEFINED → SHADER_READ_ONLY + // History layout lifecycle: + // First frame: both in UNDEFINED + // Subsequent frames: both in SHADER_READ_ONLY (output was transitioned for sharpen, + // input was left in SHADER_READ_ONLY from its sharpen read) + VkImageLayout historyOldLayout = fsr2_.needsHistoryReset + ? VK_IMAGE_LAYOUT_UNDEFINED + : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + // Transition history input: SHADER_READ_ONLY → SHADER_READ_ONLY (barrier for sync) transitionImageLayout(currentCmd, fsr2_.history[inputIdx].image, - fsr2_.needsHistoryReset ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_GENERAL, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + historyOldLayout, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, // sharpen read in previous frame VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); - // Transition history output: UNDEFINED → GENERAL + // Transition history output: SHADER_READ_ONLY → GENERAL (for compute write) transitionImageLayout(currentCmd, fsr2_.history[outputIdx].image, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + historyOldLayout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.accumulatePipeline); @@ -4179,6 +4188,10 @@ void Renderer::renderFSR2Sharpen() { VkExtent2D ext = vkCtx->getSwapchainExtent(); uint32_t outputIdx = fsr2_.currentHistory; + // Use per-frame descriptor set to avoid race with in-flight command buffers + uint32_t frameIdx = vkCtx->getCurrentFrame(); + VkDescriptorSet descSet = fsr2_.sharpenDescSets[frameIdx]; + // Update sharpen descriptor to point at current history output VkDescriptorImageInfo imgInfo{}; imgInfo.sampler = fsr2_.linearSampler; @@ -4186,7 +4199,7 @@ void Renderer::renderFSR2Sharpen() { imgInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; - write.dstSet = fsr2_.sharpenDescSet; + write.dstSet = descSet; write.dstBinding = 0; write.descriptorCount = 1; write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; @@ -4195,7 +4208,7 @@ void Renderer::renderFSR2Sharpen() { vkCmdBindPipeline(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, fsr2_.sharpenPipeline); vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_GRAPHICS, - fsr2_.sharpenPipelineLayout, 0, 1, &fsr2_.sharpenDescSet, 0, nullptr); + fsr2_.sharpenPipelineLayout, 0, 1, &descSet, 0, nullptr); glm::vec4 params(1.0f / ext.width, 1.0f / ext.height, fsr2_.sharpness, 0.0f); vkCmdPushConstants(currentCmd, fsr2_.sharpenPipelineLayout, @@ -4214,6 +4227,11 @@ void Renderer::setFSR2Enabled(bool enabled) { fsr_.enabled = false; fsr_.needsRecreate = true; } + // FSR2 requires non-MSAA render pass (its framebuffer has 2 attachments) + if (vkCtx && vkCtx->getMsaaSamples() > VK_SAMPLE_COUNT_1_BIT) { + pendingMsaaSamples_ = VK_SAMPLE_COUNT_1_BIT; + msaaChangePending_ = true; + } // Use FSR1's scale factor and sharpness as defaults fsr2_.scaleFactor = fsr_.scaleFactor; fsr2_.sharpness = fsr_.sharpness; diff --git a/src/ui/game_screen.cpp b/src/ui/game_screen.cpp index 96800895..eab00305 100644 --- a/src/ui/game_screen.cpp +++ b/src/ui/game_screen.cpp @@ -6281,7 +6281,13 @@ void GameScreen::renderSettingsWindow() { } { const char* aaLabels[] = { "Off", "2x MSAA", "4x MSAA", "8x MSAA" }; - if (ImGui::Combo("Anti-Aliasing", &pendingAntiAliasing, aaLabels, 4)) { + bool fsr2Active = renderer && renderer->isFSR2Enabled(); + if (fsr2Active) { + ImGui::BeginDisabled(); + int disabled = 0; + ImGui::Combo("Anti-Aliasing (FSR2)", &disabled, "Off (FSR2 active)\0", 1); + ImGui::EndDisabled(); + } else if (ImGui::Combo("Anti-Aliasing", &pendingAntiAliasing, aaLabels, 4)) { static const VkSampleCountFlagBits aaSamples[] = { VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_2_BIT, VK_SAMPLE_COUNT_4_BIT, VK_SAMPLE_COUNT_8_BIT