diff --git a/BUILD_INSTRUCTIONS.md b/BUILD_INSTRUCTIONS.md index 37fb3b3f..54f2041c 100644 --- a/BUILD_INSTRUCTIONS.md +++ b/BUILD_INSTRUCTIONS.md @@ -29,10 +29,14 @@ sudo apt install -y \ sudo pacman -S --needed \ base-devel cmake pkgconf git \ sdl2 glew glm openssl zlib \ - vulkan-devel vulkan-tools shaderc \ + vulkan-headers vulkan-icd-loader vulkan-tools shaderc \ ffmpeg unicorn stormlib ``` +> **Note:** `vulkan-headers` provides the `vulkan/vulkan.h` development headers required +> at build time. `vulkan-devel` is a group that includes these on some distros but is not +> available by name on Arch — install `vulkan-headers` and `vulkan-icd-loader` explicitly. + --- ## 🐧 Linux (All Distros) diff --git a/PKGBUILD b/PKGBUILD new file mode 100644 index 00000000..013be0a1 --- /dev/null +++ b/PKGBUILD @@ -0,0 +1,107 @@ +# Maintainer: +# Contributor: + +pkgname=wowee-git +pkgver=r.1 +pkgrel=1 +pkgdesc="Open-source World of Warcraft client with Vulkan renderer (WotLK 3.3.5a / TBC / Classic)" +arch=('x86_64') +url="https://github.com/Kelsidavis/WoWee" +license=('MIT') +depends=( + 'sdl2' + 'vulkan-icd-loader' + 'openssl' + 'zlib' + 'ffmpeg' + 'unicorn' + 'glew' + 'libx11' + 'stormlib' # AUR — required at runtime by wowee-extract-assets (libstorm.so) +) +makedepends=( + 'git' + 'cmake' + 'pkgconf' + 'glm' + 'vulkan-headers' + 'shaderc' + 'python' +) +provides=('wowee') +conflicts=('wowee') +source=("${pkgname}::git+https://github.com/Kelsidavis/WoWee.git#branch=main" + "git+https://github.com/ocornut/imgui.git" + "git+https://github.com/charles-lunarg/vk-bootstrap.git") +sha256sums=('SKIP' 'SKIP' 'SKIP') + +pkgver() { + cd "${pkgname}" + printf "r%s.%s" "$(git rev-list --count HEAD)" "$(git rev-parse --short HEAD)" +} + +prepare() { + cd "${pkgname}" + git submodule init + git config submodule.extern/imgui.url "${srcdir}/imgui" + git config submodule.extern/vk-bootstrap.url "${srcdir}/vk-bootstrap" + git -c protocol.file.allow=always submodule update +} + +build() { + cmake -S "${pkgname}" -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -Wno-dev + cmake --build build --parallel "$(nproc)" +} + +package() { + DESTDIR="${pkgdir}" cmake --install build + + # Relocate real binaries from /usr/bin → /usr/lib/wowee/ + # so wrapper scripts can live at /usr/bin instead. + install -dm755 "${pkgdir}/usr/lib/wowee" + for bin in wowee asset_extract dbc_to_csv auth_probe auth_login_probe blp_convert; do + if [[ -f "${pkgdir}/usr/bin/${bin}" ]]; then + mv "${pkgdir}/usr/bin/${bin}" "${pkgdir}/usr/lib/wowee/${bin}" + fi + done + + # Main launcher: sets WOW_DATA_PATH to the user's XDG data dir. + # The app uses WOW_DATA_PATH to locate Data/manifest.json at runtime. + install -Dm755 /dev/stdin "${pkgdir}/usr/bin/wowee" <<'EOF' +#!/bin/sh +export WOW_DATA_PATH="${XDG_DATA_HOME:-$HOME/.local/share}/wowee/Data" +exec /usr/lib/wowee/wowee "$@" +EOF + + # Asset extraction helper: runs asset_extract and outputs to the XDG data dir. + # Usage: wowee-extract-assets /path/to/WoW/Data [wotlk|tbc|classic] + install -Dm755 /dev/stdin "${pkgdir}/usr/bin/wowee-extract-assets" <<'EOF' +#!/bin/sh +if [ -z "$1" ]; then + echo "Usage: wowee-extract-assets /path/to/WoW/Data [wotlk|tbc|classic]" + exit 1 +fi +OUTPUT="${XDG_DATA_HOME:-$HOME/.local/share}/wowee/Data" +mkdir -p "${OUTPUT}" +exec /usr/lib/wowee/asset_extract --mpq-dir "$1" --output "${OUTPUT}" ${2:+--expansion "$2"} +EOF + + # License + install -Dm644 "${pkgname}/LICENSE" \ + "${pkgdir}/usr/share/licenses/${pkgname}/LICENSE" + + # Post-install instructions (shown by pacman helpers that support it) + install -Dm644 /dev/stdin \ + "${pkgdir}/usr/share/doc/${pkgname}/POST_INSTALL" <<'EOF' +==> WoWee requires game assets extracted from your own WoW client. +==> Run the following once, pointing at your WoW Data/ directory: +==> +==> wowee-extract-assets /path/to/WoW-3.3.5a/Data wotlk +==> +==> Assets are written to ~/.local/share/wowee/Data/ (or $XDG_DATA_HOME/wowee/Data/). +==> Then launch the client with: wowee +EOF +} diff --git a/assets/shaders/fsr2_accumulate.comp.glsl b/assets/shaders/fsr2_accumulate.comp.glsl index 7fb0cb27..bcaad6f8 100644 --- a/assets/shaders/fsr2_accumulate.comp.glsl +++ b/assets/shaders/fsr2_accumulate.comp.glsl @@ -15,6 +15,16 @@ layout(push_constant) uniform PushConstants { vec4 params; // x = resetHistory (1=reset), y = sharpness, zw = unused } pc; +vec3 tonemap(vec3 c) { + float luma = max(dot(c, vec3(0.299, 0.587, 0.114)), 0.0); + return c / (1.0 + luma); +} + +vec3 inverseTonemap(vec3 c) { + float luma = max(dot(c, vec3(0.299, 0.587, 0.114)), 0.0); + return c / max(1.0 - luma, 1e-4); +} + vec3 rgbToYCoCg(vec3 rgb) { float y = 0.25 * rgb.r + 0.5 * rgb.g + 0.25 * rgb.b; float co = 0.5 * rgb.r - 0.5 * rgb.b; @@ -29,38 +39,97 @@ vec3 yCoCgToRgb(vec3 ycocg) { return vec3(y + co - cg, y + cg, y - co - cg); } +// Catmull-Rom bicubic (9 bilinear taps) with anti-ringing clamp. +vec3 sampleBicubic(sampler2D tex, vec2 uv, vec2 texSize) { + vec2 invTexSize = 1.0 / texSize; + vec2 iTc = uv * texSize; + vec2 tc = floor(iTc - 0.5) + 0.5; + vec2 f = iTc - tc; + + vec2 w0 = f * (-0.5 + f * (1.0 - 0.5 * f)); + vec2 w1 = 1.0 + f * f * (-2.5 + 1.5 * f); + vec2 w2 = f * (0.5 + f * (2.0 - 1.5 * f)); + vec2 w3 = f * f * (-0.5 + 0.5 * f); + + vec2 s12 = w1 + w2; + vec2 offset12 = w2 / s12; + + vec2 tc0 = (tc - 1.0) * invTexSize; + vec2 tc3 = (tc + 2.0) * invTexSize; + vec2 tc12 = (tc + offset12) * invTexSize; + + vec3 result = + (texture(tex, vec2(tc0.x, tc0.y)).rgb * w0.x + + texture(tex, vec2(tc12.x, tc0.y)).rgb * s12.x + + texture(tex, vec2(tc3.x, tc0.y)).rgb * w3.x) * w0.y + + (texture(tex, vec2(tc0.x, tc12.y)).rgb * w0.x + + texture(tex, vec2(tc12.x, tc12.y)).rgb * s12.x + + texture(tex, vec2(tc3.x, tc12.y)).rgb * w3.x) * s12.y + + (texture(tex, vec2(tc0.x, tc3.y)).rgb * w0.x + + texture(tex, vec2(tc12.x, tc3.y)).rgb * s12.x + + texture(tex, vec2(tc3.x, tc3.y)).rgb * w3.x) * w3.y; + + // Anti-ringing: clamp to range of the 4 nearest texels + vec2 tcNear = tc * invTexSize; + vec3 t00 = texture(tex, tcNear).rgb; + vec3 t10 = texture(tex, tcNear + vec2(invTexSize.x, 0.0)).rgb; + vec3 t01 = texture(tex, tcNear + vec2(0.0, invTexSize.y)).rgb; + vec3 t11 = texture(tex, tcNear + invTexSize).rgb; + vec3 minC = min(min(t00, t10), min(t01, t11)); + vec3 maxC = max(max(t00, t10), max(t01, t11)); + return clamp(result, minC, maxC); +} + void main() { ivec2 outPixel = ivec2(gl_GlobalInvocationID.xy); ivec2 outSize = ivec2(pc.displaySize.xy); if (outPixel.x >= outSize.x || outPixel.y >= outSize.y) return; vec2 outUV = (vec2(outPixel) + 0.5) * pc.displaySize.zw; - vec3 currentColor = texture(sceneColor, outUV).rgb; + + vec3 currentColor = sampleBicubic(sceneColor, outUV, pc.internalSize.xy); if (pc.params.x > 0.5) { imageStore(historyOutput, outPixel, vec4(currentColor, 1.0)); return; } - vec2 motion = texture(motionVectors, outUV).rg; - vec2 historyUV = outUV + motion; + // Depth-dilated motion vector (3x3 nearest-to-camera) + vec2 texelSize = pc.internalSize.zw; + float closestDepth = texture(depthBuffer, outUV).r; + vec2 closestOffset = vec2(0.0); + for (int y = -1; y <= 1; y++) { + for (int x = -1; x <= 1; x++) { + vec2 off = vec2(float(x), float(y)) * texelSize; + float d = texture(depthBuffer, outUV + off).r; + if (d < closestDepth) { + closestDepth = d; + closestOffset = off; + } + } + } + vec2 motion = texture(motionVectors, outUV + closestOffset).rg; + float motionMag = length(motion * pc.displaySize.xy); + vec2 historyUV = outUV + motion; float historyValid = (historyUV.x >= 0.0 && historyUV.x <= 1.0 && historyUV.y >= 0.0 && historyUV.y <= 1.0) ? 1.0 : 0.0; - vec3 historyColor = texture(historyInput, historyUV).rgb; - // Neighborhood clamping in YCoCg space - vec2 texelSize = pc.internalSize.zw; - vec3 s0 = rgbToYCoCg(currentColor); - vec3 s1 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, 0.0)).rgb); - vec3 s2 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, 0.0)).rgb); - vec3 s3 = rgbToYCoCg(texture(sceneColor, outUV + vec2(0.0, -texelSize.y)).rgb); - vec3 s4 = rgbToYCoCg(texture(sceneColor, outUV + vec2(0.0, texelSize.y)).rgb); - vec3 s5 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, -texelSize.y)).rgb); - vec3 s6 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, -texelSize.y)).rgb); - vec3 s7 = rgbToYCoCg(texture(sceneColor, outUV + vec2(-texelSize.x, texelSize.y)).rgb); - vec3 s8 = rgbToYCoCg(texture(sceneColor, outUV + vec2( texelSize.x, texelSize.y)).rgb); + // === Tonemapped accumulation === + vec3 tmCurrent = tonemap(currentColor); + vec3 tmHistory = tonemap(historyColor); + + // Neighborhood in tonemapped YCoCg + vec3 s0 = rgbToYCoCg(tmCurrent); + vec3 s1 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, 0.0)).rgb)); + vec3 s2 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, 0.0)).rgb)); + vec3 s3 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(0.0, -texelSize.y)).rgb)); + vec3 s4 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(0.0, texelSize.y)).rgb)); + vec3 s5 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, -texelSize.y)).rgb)); + vec3 s6 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, -texelSize.y)).rgb)); + vec3 s7 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2(-texelSize.x, texelSize.y)).rgb)); + vec3 s8 = rgbToYCoCg(tonemap(texture(sceneColor, outUV + vec2( texelSize.x, texelSize.y)).rgb)); vec3 m1 = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8; vec3 m2 = s0*s0 + s1*s1 + s2*s2 + s3*s3 + s4*s4 + s5*s5 + s6*s6 + s7*s7 + s8*s8; @@ -72,14 +141,45 @@ void main() { vec3 boxMin = mean - gamma * stddev; vec3 boxMax = mean + gamma * stddev; - vec3 historyYCoCg = rgbToYCoCg(historyColor); - vec3 clampedHistory = clamp(historyYCoCg, boxMin, boxMax); - historyColor = yCoCgToRgb(clampedHistory); + // Compute clamped history and measure how far it was from the box + vec3 tmHistYCoCg = rgbToYCoCg(tmHistory); + vec3 clampedYCoCg = clamp(tmHistYCoCg, boxMin, boxMax); + float clampDist = length(tmHistYCoCg - clampedYCoCg); - float clampDist = length(historyYCoCg - clampedHistory); - float blendFactor = mix(0.05, 0.30, clamp(clampDist * 2.0, 0.0, 1.0)); + // SELECTIVE CLAMP: only modify history when there's motion or disocclusion. + // For static pixels, history is already well-accumulated — clamping it + // each frame causes the clamp box (which shifts with jitter) to drag + // the history around, creating visible shimmer. By leaving static history + // untouched, accumulated anti-aliasing and detail is preserved. + float needsClamp = max( + clamp(motionMag * 2.0, 0.0, 1.0), // motion → full clamp + clamp(clampDist * 3.0, 0.0, 1.0) // disocclusion → full clamp + ); + tmHistory = yCoCgToRgb(mix(tmHistYCoCg, clampedYCoCg, needsClamp)); + + // Blend: higher for good jitter samples, lower for poor ones. + // Jitter-aware weighting: current frame's sample quality depends on + // how close the jittered sample fell to this output pixel. + vec2 jitterPx = pc.jitterOffset.xy * 0.5 * pc.internalSize.xy; + vec2 internalPos = outUV * pc.internalSize.xy; + vec2 subPixelOffset = fract(internalPos) - 0.5; + vec2 sampleDelta = subPixelOffset - jitterPx; + float dist2 = dot(sampleDelta, sampleDelta); + float sampleQuality = exp(-dist2 * 3.0); + float blendFactor = mix(0.03, 0.20, sampleQuality); + + // Disocclusion: aggressively replace stale history + blendFactor = mix(blendFactor, 0.80, clamp(clampDist * 5.0, 0.0, 1.0)); + + // Velocity: strong response during camera/object motion + blendFactor = max(blendFactor, clamp(motionMag * 0.30, 0.0, 0.50)); + + // Full current frame when history is out of bounds blendFactor = mix(blendFactor, 1.0, 1.0 - historyValid); - vec3 result = mix(historyColor, currentColor, blendFactor); + // Blend in tonemapped space, inverse-tonemap back to linear + vec3 tmResult = mix(tmHistory, tmCurrent, blendFactor); + vec3 result = inverseTonemap(tmResult); + imageStore(historyOutput, outPixel, vec4(result, 1.0)); } diff --git a/assets/shaders/fsr2_accumulate.comp.spv b/assets/shaders/fsr2_accumulate.comp.spv index 47529d75..c4590379 100644 Binary files a/assets/shaders/fsr2_accumulate.comp.spv and b/assets/shaders/fsr2_accumulate.comp.spv differ diff --git a/assets/shaders/fsr2_motion.comp.glsl b/assets/shaders/fsr2_motion.comp.glsl index b0b39375..1f86cb89 100644 --- a/assets/shaders/fsr2_motion.comp.glsl +++ b/assets/shaders/fsr2_motion.comp.glsl @@ -8,6 +8,7 @@ layout(set = 0, binding = 1, rg16f) uniform writeonly image2D motionVectors; layout(push_constant) uniform PushConstants { mat4 reprojMatrix; // prevUnjitteredVP * inverse(currentUnjitteredVP) vec4 resolution; // xy = internal size, zw = 1/internal size + vec4 jitterOffset; // xy = current jitter (NDC), zw = unused } pc; void main() { @@ -15,21 +16,35 @@ void main() { ivec2 imgSize = ivec2(pc.resolution.xy); if (pixelCoord.x >= imgSize.x || pixelCoord.y >= imgSize.y) return; - // Sample depth (Vulkan: 0 = near, 1 = far) float depth = texelFetch(depthBuffer, pixelCoord, 0).r; - // Pixel center in UV [0,1] and NDC [-1,1] + // Pixel center UV and NDC vec2 uv = (vec2(pixelCoord) + 0.5) * pc.resolution.zw; vec2 ndc = uv * 2.0 - 1.0; - // Clip-to-clip reprojection: current unjittered clip → previous unjittered clip - vec4 clipPos = vec4(ndc, depth, 1.0); + // Unjitter the NDC: the scene was rendered with jitter applied to + // projection[2][0/1]. For RH perspective (P[2][3]=-1, clip.w=-vz): + // jittered_ndc = unjittered_ndc - jitter + // unjittered_ndc = ndc + jitter + vec2 unjitteredNDC = ndc + pc.jitterOffset.xy; + + // Reproject to previous frame via unjittered VP matrices + vec4 clipPos = vec4(unjitteredNDC, depth, 1.0); vec4 prevClip = pc.reprojMatrix * clipPos; vec2 prevNdc = prevClip.xy / prevClip.w; vec2 prevUV = prevNdc * 0.5 + 0.5; - // Motion = previous position - current position (both unjittered, in UV space) - vec2 motion = prevUV - uv; + // Current unjittered UV for this pixel's world content + vec2 currentUnjitteredUV = unjitteredNDC * 0.5 + 0.5; + + // Motion between unjittered positions — jitter-free. + // For a static scene (identity reprojMatrix), this is exactly zero. + vec2 motion = prevUV - currentUnjitteredUV; + + // Soft dead zone: smoothly fade out sub-pixel noise from float precision + // in reprojMatrix (avoids hard spatial discontinuity from step()) + float motionPx = length(motion * pc.resolution.xy); + motion *= smoothstep(0.0, 0.05, motionPx); imageStore(motionVectors, pixelCoord, vec4(motion, 0.0, 0.0)); } diff --git a/assets/shaders/fsr2_motion.comp.spv b/assets/shaders/fsr2_motion.comp.spv index faa3d836..85b6378c 100644 Binary files a/assets/shaders/fsr2_motion.comp.spv and b/assets/shaders/fsr2_motion.comp.spv differ diff --git a/src/core/application.cpp b/src/core/application.cpp index f9ac557c..21c6f533 100644 --- a/src/core/application.cpp +++ b/src/core/application.cpp @@ -4042,7 +4042,7 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float // then restore the full radius after entering the game. // This matches WoW's behavior: load quickly, stream the rest in-game. const int savedLoadRadius = 4; - terrainMgr->setLoadRadius(1); + terrainMgr->setLoadRadius(3); // 7x7=49 tiles — prevents hitches on spawn terrainMgr->setUnloadRadius(7); // Trigger tile streaming for surrounding area @@ -4080,11 +4080,9 @@ void Application::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float // Trigger new streaming — enqueue tiles for background workers terrainMgr->update(*camera, 0.016f); - // Process ALL available ready tiles per iteration — batches GPU - // uploads into a single command buffer + fence wait instead of - // one fence per tile. Loading screen still updates between - // iterations while workers parse more tiles. - terrainMgr->processAllReadyTiles(); + // Process ONE tile per iteration so the progress bar updates + // smoothly between tiles instead of stalling on large batches. + terrainMgr->processOneReadyTile(); int remaining = terrainMgr->getRemainingTileCount(); int loaded = terrainMgr->getLoadedTileCount(); diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 063bae9a..c3449f93 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -3785,7 +3785,7 @@ bool Renderer::initFSR2Resources() { VkPushConstantRange pc{}; pc.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; pc.offset = 0; - pc.size = sizeof(glm::mat4) + sizeof(glm::vec4); // 80 bytes + pc.size = sizeof(glm::mat4) + 2 * sizeof(glm::vec4); // 96 bytes VkPipelineLayoutCreateInfo plCI{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; plCI.setLayoutCount = 1; @@ -4086,17 +4086,20 @@ void Renderer::dispatchMotionVectors() { vkCmdBindDescriptorSets(currentCmd, VK_PIPELINE_BIND_POINT_COMPUTE, fsr2_.motionVecPipelineLayout, 0, 1, &fsr2_.motionVecDescSet, 0, nullptr); - // Single reprojection matrix: prevUnjitteredVP * inv(currentUnjitteredVP) - // Both matrices are unjittered — jitter only affects sub-pixel sampling, - // not motion vector computation. This avoids numerical instability from - // jitter amplification through large world coordinates. + // Reprojection: prevUnjitteredVP * inv(currentUnjitteredVP) + // Using unjittered VPs avoids numerical instability from jitter amplification + // through large world coordinates. The shader corrects NDC by subtracting + // current jitter before reprojection (depth was rendered at jittered position). struct { - glm::mat4 reprojMatrix; // prevUnjitteredVP * inv(currentUnjitteredVP) + glm::mat4 reprojMatrix; glm::vec4 resolution; + glm::vec4 jitterOffset; // xy = current jitter (NDC), zw = unused } pc; glm::mat4 currentUnjitteredVP = camera->getUnjitteredViewProjectionMatrix(); pc.reprojMatrix = fsr2_.prevViewProjection * glm::inverse(currentUnjitteredVP); + glm::vec2 jitter = camera->getJitter(); + pc.jitterOffset = glm::vec4(jitter.x, jitter.y, 0.0f, 0.0f); pc.resolution = glm::vec4( static_cast(fsr2_.internalWidth), static_cast(fsr2_.internalHeight),