Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance

- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection
- Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through
- Fix WMO doodad quaternion component ordering (X/Y swap)
- Linear normal map strength blend in shader for smooth slider control
- Enable shadow sampling for interior WMO groups (covered outdoor areas)
- Backfill deferred normal/height maps after streaming with descriptor rebind
- M2: prepareRender only iterates animated instances, bone dirty flag
- M2: remove worker thread VMA allocation, skip unready bone instances
- WMO: persistent visibility vectors, sequential culling
- Add FSR EASU/RCAS shaders
This commit is contained in:
Kelsi 2026-03-07 22:03:28 -08:00
parent 16c6c2b6a0
commit a4966e486f
25 changed files with 1467 additions and 352 deletions

View file

@ -0,0 +1,102 @@
#version 450
// FSR 1.0 EASU (Edge Adaptive Spatial Upsampling) — Fragment Shader
// Based on AMD FidelityFX Super Resolution 1.0
// Implements edge-adaptive bilinear upsampling with directional filtering
layout(set = 0, binding = 0) uniform sampler2D uInput;
layout(push_constant) uniform FSRConstants {
vec4 con0; // inputSize.xy, 1/inputSize.xy
vec4 con1; // inputSize.xy / outputSize.xy, 0.5 * inputSize.xy / outputSize.xy
vec4 con2; // outputSize.xy, 1/outputSize.xy
vec4 con3; // sharpness, 0, 0, 0
} fsr;
layout(location = 0) in vec2 TexCoord;
layout(location = 0) out vec4 outColor;
// Fetch a texel with offset (in input pixels)
vec3 fsrFetch(vec2 p, vec2 off) {
return textureLod(uInput, (p + off + 0.5) * fsr.con0.zw, 0.0).rgb;
}
void main() {
// Undo the vertex shader Y flip (postprocess.vert flips for Vulkan overlay,
// but we need standard UV coords for texture sampling)
vec2 tc = vec2(TexCoord.x, 1.0 - TexCoord.y);
// Map output pixel to input space
vec2 pp = tc * fsr.con2.xy; // output pixel position
vec2 ip = pp * fsr.con1.xy - 0.5; // input pixel position (centered)
vec2 fp = floor(ip);
vec2 ff = ip - fp;
// 12-tap filter: 4x3 grid around the pixel
// b c
// e f g h
// i j k l
// n o
vec3 b = fsrFetch(fp, vec2( 0, -1));
vec3 c = fsrFetch(fp, vec2( 1, -1));
vec3 e = fsrFetch(fp, vec2(-1, 0));
vec3 f = fsrFetch(fp, vec2( 0, 0));
vec3 g = fsrFetch(fp, vec2( 1, 0));
vec3 h = fsrFetch(fp, vec2( 2, 0));
vec3 i = fsrFetch(fp, vec2(-1, 1));
vec3 j = fsrFetch(fp, vec2( 0, 1));
vec3 k = fsrFetch(fp, vec2( 1, 1));
vec3 l = fsrFetch(fp, vec2( 2, 1));
vec3 n = fsrFetch(fp, vec2( 0, 2));
vec3 o = fsrFetch(fp, vec2( 1, 2));
// Luma (use green channel as good perceptual approximation)
float bL = b.g, cL = c.g, eL = e.g, fL = f.g;
float gL = g.g, hL = h.g, iL = i.g, jL = j.g;
float kL = k.g, lL = l.g, nL = n.g, oL = o.g;
// Directional edge detection
// Compute gradients in 4 directions (N-S, E-W, NE-SW, NW-SE)
float dc = cL - jL;
float db = bL - kL;
float de = eL - hL;
float di = iL - lL;
// Length of the edge in each direction
float lenH = abs(eL - fL) + abs(fL - gL) + abs(iL - jL) + abs(jL - kL);
float lenV = abs(bL - fL) + abs(fL - jL) + abs(cL - gL) + abs(gL - kL);
// Determine dominant edge direction
float dirH = lenV / (lenH + lenV + 1e-7);
float dirV = lenH / (lenH + lenV + 1e-7);
// Bilinear weights
float w1 = (1.0 - ff.x) * (1.0 - ff.y);
float w2 = ff.x * (1.0 - ff.y);
float w3 = (1.0 - ff.x) * ff.y;
float w4 = ff.x * ff.y;
// Edge-aware sharpening: boost weights along edges
float sharpness = fsr.con3.x;
float edgeStr = max(abs(lenH - lenV) / (lenH + lenV + 1e-7), 0.0);
float sharp = mix(0.0, sharpness, edgeStr);
// Sharpen bilinear by pulling toward nearest texel
float maxW = max(max(w1, w2), max(w3, w4));
w1 = mix(w1, float(w1 == maxW), sharp * 0.25);
w2 = mix(w2, float(w2 == maxW), sharp * 0.25);
w3 = mix(w3, float(w3 == maxW), sharp * 0.25);
w4 = mix(w4, float(w4 == maxW), sharp * 0.25);
// Normalize
float wSum = w1 + w2 + w3 + w4;
w1 /= wSum; w2 /= wSum; w3 /= wSum; w4 /= wSum;
// Final color: weighted blend of the 4 nearest texels with edge awareness
vec3 color = f * w1 + g * w2 + j * w3 + k * w4;
// Optional: blend in some of the surrounding texels for anti-aliasing
float aa = 0.125 * edgeStr;
color = mix(color, (b + c + e + h + i + l + n + o) / 8.0, aa * 0.15);
outColor = vec4(clamp(color, 0.0, 1.0), 1.0);
}

Binary file not shown.

View file

@ -0,0 +1,43 @@
#version 450
// FSR 1.0 RCAS (Robust Contrast Adaptive Sharpening) — Fragment Shader
// Based on AMD FidelityFX Super Resolution 1.0
// Applies contrast-adaptive sharpening after EASU upscaling
layout(set = 0, binding = 0) uniform sampler2D uInput;
layout(push_constant) uniform RCASConstants {
vec4 con0; // 1/outputSize.xy, outputSize.xy
vec4 con1; // sharpness (x), 0, 0, 0
} rcas;
layout(location = 0) in vec2 TexCoord;
layout(location = 0) out vec4 outColor;
void main() {
// Fetch center and 4-neighborhood
vec2 texelSize = rcas.con0.xy;
vec3 c = texture(uInput, TexCoord).rgb;
vec3 n = texture(uInput, TexCoord + vec2( 0, -texelSize.y)).rgb;
vec3 s = texture(uInput, TexCoord + vec2( 0, texelSize.y)).rgb;
vec3 w = texture(uInput, TexCoord + vec2(-texelSize.x, 0)).rgb;
vec3 e = texture(uInput, TexCoord + vec2( texelSize.x, 0)).rgb;
// Luma (green channel approximation)
float cL = c.g, nL = n.g, sL = s.g, wL = w.g, eL = e.g;
// Min/max of neighborhood
float minL = min(min(nL, sL), min(wL, eL));
float maxL = max(max(nL, sL), max(wL, eL));
// Contrast adaptive sharpening weight
// Higher contrast = less sharpening to avoid ringing
float contrast = maxL - minL;
float sharpness = rcas.con1.x;
float w0 = sharpness * (1.0 - smoothstep(0.0, 0.3, contrast));
// Apply sharpening: center + w0 * (center - average_neighbors)
vec3 avg = (n + s + w + e) * 0.25;
vec3 sharpened = c + w0 * (c - avg);
outColor = vec4(clamp(sharpened, 0.0, 1.0), 1.0);
}

Binary file not shown.

View file

@ -149,21 +149,21 @@ void main() {
vec3 norm = vertexNormal;
if (enableNormalMap != 0 && lodFactor < 0.99 && normalMapStrength > 0.001) {
vec3 mapNormal = texture(uNormalHeightMap, finalUV).rgb * 2.0 - 1.0;
// Scale XY by strength to control effect intensity
mapNormal.xy *= normalMapStrength;
mapNormal = normalize(mapNormal);
vec3 worldNormal = normalize(TBN * mapNormal);
if (!gl_FrontFacing) worldNormal = -worldNormal;
// Blend: strength + LOD both contribute to fade toward vertex normal
float blendFactor = max(lodFactor, 1.0 - normalMapStrength);
norm = normalize(mix(worldNormal, vertexNormal, blendFactor));
// Linear blend: strength controls how much normal map detail shows,
// LOD fades out at distance. Both multiply for smooth falloff.
float blend = clamp(normalMapStrength, 0.0, 1.0) * (1.0 - lodFactor);
norm = normalize(mix(vertexNormal, worldNormal, blend));
}
vec3 result;
// Sample shadow map — skip for interior WMO groups (no sun indoors)
// Sample shadow map for all WMO groups (interior groups with 0x2000 flag
// include covered outdoor areas like archways/streets that should receive shadows)
float shadow = 1.0;
if (shadowParams.x > 0.5 && isInterior == 0) {
if (shadowParams.x > 0.5) {
vec3 ldir = normalize(-lightDir.xyz);
float normalOffset = SHADOW_TEXEL * 2.0 * (1.0 - abs(dot(norm, ldir)));
vec3 biasedPos = FragPos + norm * normalOffset;

Binary file not shown.