From b54458fe6cf79091f7ee8ac20609d133604e7720 Mon Sep 17 00:00:00 2001 From: k Date: Fri, 3 Apr 2026 23:27:02 -0700 Subject: [PATCH 1/8] fix: chdir to executable directory at startup for relative asset paths The binary assumed it was always launched from its own directory, causing shader/asset loads to fail when run from any other working directory. --- src/main.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/main.cpp b/src/main.cpp index 2bb2f75e..5bcd939f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,6 +6,11 @@ #include #include #include +#ifdef __APPLE__ +#include +#include +#include +#endif #ifdef __linux__ #include #include @@ -97,6 +102,24 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { std::signal(SIGTERM, crashHandler); std::signal(SIGINT, crashHandler); #endif + // Change working directory to the executable's directory so relative asset + // paths (assets/shaders/, Data/, etc.) resolve correctly from any launch location. +#ifdef __APPLE__ + { + uint32_t bufSize = 0; + _NSGetExecutablePath(nullptr, &bufSize); + std::string exePath(bufSize, '\0'); + _NSGetExecutablePath(exePath.data(), &bufSize); + chdir(dirname(exePath.data())); + } +#elif defined(__linux__) + { + char buf[4096]; + ssize_t len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); + if (len > 0) { buf[len] = '\0'; chdir(dirname(buf)); } + } +#endif + try { wowee::core::Logger::getInstance().setLogLevel(readLogLevelFromEnv()); LOG_INFO("=== Wowee Native Client ==="); From aeb295e0bbfe10204c9c058de80f05e2e1c1b19e Mon Sep 17 00:00:00 2001 From: Kelsi Date: Fri, 3 Apr 2026 22:49:33 -0700 Subject: [PATCH 2/8] fix(rendering): use separate timer for global sequence bones Global sequence bones (hair, cape, physics) need time values spanning their full duration (up to ~968733ms), but animationTime wraps at the current animation's sequence duration (~2000ms for walk). This caused vertex spikes projecting from fingers/neck/ponytail as bones got stuck in the first ~2s of their loop. Add a separate globalSequenceTime accumulator that is not wrapped at the animation duration. --- include/rendering/character_renderer.hpp | 5 +++-- src/rendering/character_renderer.cpp | 24 +++++++++++++++--------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/rendering/character_renderer.hpp b/include/rendering/character_renderer.hpp index a667f069..025f9924 100644 --- a/include/rendering/character_renderer.hpp +++ b/include/rendering/character_renderer.hpp @@ -158,6 +158,7 @@ private: uint32_t currentAnimationId = 0; int currentSequenceIndex = -1; // Index into M2Model::sequences float animationTime = 0.0f; + float globalSequenceTime = 0.0f; // Separate timer for global sequences (accumulates without wrapping at sequence duration) bool animationLoop = true; bool isDead = false; // Prevents movement while in death state std::vector boneMatrices; // Current bone transforms @@ -206,8 +207,8 @@ private: void calculateBindPose(M2ModelGPU& gpuModel); void updateAnimation(CharacterInstance& instance, float deltaTime); void calculateBoneMatrices(CharacterInstance& instance); - glm::mat4 getBoneTransform(const pipeline::M2Bone& bone, float time, int sequenceIndex, - const std::vector& globalSeqDurations); + glm::mat4 getBoneTransform(const pipeline::M2Bone& bone, float animTime, float globalSeqTime, + int sequenceIndex, const std::vector& globalSeqDurations); glm::mat4 getModelMatrix(const CharacterInstance& instance) const; void destroyModelGPU(M2ModelGPU& gpuModel, bool defer = false); void destroyInstanceBones(CharacterInstance& inst, bool defer = false); diff --git a/src/rendering/character_renderer.cpp b/src/rendering/character_renderer.cpp index def7d941..79412156 100644 --- a/src/rendering/character_renderer.cpp +++ b/src/rendering/character_renderer.cpp @@ -1690,6 +1690,9 @@ void CharacterRenderer::update(float deltaTime, const glm::vec3& cameraPos) { float distSq = glm::distance2(inst.position, cameraPos); if (distSq >= animUpdateRadiusSq) continue; + // Advance global sequence timer (accumulates independently of animation wrapping) + inst.globalSequenceTime += deltaTime * 1000.0f; + // Always advance animation time (cheap) if (inst.cachedModel && !inst.cachedModel->data.sequences.empty()) { if (inst.currentSequenceIndex < 0) { @@ -1852,8 +1855,10 @@ int CharacterRenderer::findKeyframeIndex(const std::vector& timestamps } // Resolve sequence index and time for a track, handling global sequences. +// globalSeqTime is a separate accumulating timer that is NOT wrapped at the +// current animation's sequence duration, so global sequences get full range. static void resolveTrackTime(const pipeline::M2AnimationTrack& track, - int seqIdx, float time, + int seqIdx, float animTime, float globalSeqTime, const std::vector& globalSeqDurations, int& outSeqIdx, float& outTime) { if (track.globalSequence >= 0 && @@ -1861,14 +1866,14 @@ static void resolveTrackTime(const pipeline::M2AnimationTrack& track, outSeqIdx = 0; float dur = static_cast(globalSeqDurations[track.globalSequence]); if (dur > 0.0f) { - outTime = std::fmod(time, dur); + outTime = std::fmod(globalSeqTime, dur); if (outTime < 0.0f) outTime += dur; } else { outTime = 0.0f; } } else { outSeqIdx = seqIdx; - outTime = time; + outTime = animTime; } } @@ -1959,7 +1964,8 @@ void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) { // Local transform includes pivot bracket: T(pivot)*T*R*S*T(-pivot) // At rest this is identity, so no separate bind pose is needed - glm::mat4 localTransform = getBoneTransform(bone, instance.animationTime, instance.currentSequenceIndex, gsd); + glm::mat4 localTransform = getBoneTransform(bone, instance.animationTime, instance.globalSequenceTime, + instance.currentSequenceIndex, gsd); // Compose with parent if (bone.parentBone >= 0 && static_cast(bone.parentBone) < numBones) { @@ -1970,16 +1976,16 @@ void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) { } } -glm::mat4 CharacterRenderer::getBoneTransform(const pipeline::M2Bone& bone, float time, int sequenceIndex, - const std::vector& globalSeqDurations) { +glm::mat4 CharacterRenderer::getBoneTransform(const pipeline::M2Bone& bone, float animTime, float globalSeqTime, + int sequenceIndex, const std::vector& globalSeqDurations) { // Resolve global sequences: bones with globalSequence >= 0 use sequence 0 // with time wrapped at the global sequence duration, independent of the // character's current animation. int tSeq, rSeq, sSeq; float tTime, rTime, sTime; - resolveTrackTime(bone.translation, sequenceIndex, time, globalSeqDurations, tSeq, tTime); - resolveTrackTime(bone.rotation, sequenceIndex, time, globalSeqDurations, rSeq, rTime); - resolveTrackTime(bone.scale, sequenceIndex, time, globalSeqDurations, sSeq, sTime); + resolveTrackTime(bone.translation, sequenceIndex, animTime, globalSeqTime, globalSeqDurations, tSeq, tTime); + resolveTrackTime(bone.rotation, sequenceIndex, animTime, globalSeqTime, globalSeqDurations, rSeq, rTime); + resolveTrackTime(bone.scale, sequenceIndex, animTime, globalSeqTime, globalSeqDurations, sSeq, sTime); glm::vec3 translation = interpolateVec3(bone.translation, tSeq, tTime, glm::vec3(0.0f)); glm::quat rotation = interpolateQuat(bone.rotation, rSeq, rTime); From 100394a743007712db5ba2aa35e99294bb540966 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Fri, 3 Apr 2026 23:02:04 -0700 Subject: [PATCH 3/8] fix(rendering,game): init bone SSBO to identity; stop movement before cast Bone SSBO buffers were allocated for MAX_BONES (240) entries but only the first numBones were written. Uninitialized GPU memory in the remaining slots caused vertex spikes when any bone index exceeded the model's actual bone count. Also send MSG_MOVE_STOP before spell casts so the server doesn't reject cast-time spells (e.g. hearthstone) with "can't do that while moving". --- src/game/spell_handler.cpp | 8 ++++++++ src/rendering/character_renderer.cpp | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/game/spell_handler.cpp b/src/game/spell_handler.cpp index 59e0d29f..47346c39 100644 --- a/src/game/spell_handler.cpp +++ b/src/game/spell_handler.cpp @@ -222,6 +222,14 @@ void SpellHandler::castSpell(uint32_t spellId, uint64_t targetGuid) { return; } + // Stop movement before casting — servers reject cast-time spells while moving + const uint32_t moveFlags = owner_.movementInfo.flags; + const bool isMoving = (moveFlags & 0x0Fu) != 0; // FORWARD|BACKWARD|STRAFE_LEFT|STRAFE_RIGHT + if (isMoving) { + owner_.movementInfo.flags &= ~0x0Fu; + owner_.sendMovement(Opcode::MSG_MOVE_STOP); + } + uint64_t target = targetGuid != 0 ? targetGuid : owner_.targetGuid; // Self-targeted spells like hearthstone should not send a target if (spellId == 8690) target = 0; diff --git a/src/rendering/character_renderer.cpp b/src/rendering/character_renderer.cpp index 79412156..2b347ac7 100644 --- a/src/rendering/character_renderer.cpp +++ b/src/rendering/character_renderer.cpp @@ -2024,6 +2024,13 @@ void CharacterRenderer::prepareRender(uint32_t frameIndex) { &instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo); instance.boneMapped[frameIndex] = allocInfo.pMappedData; + // Initialize all bone slots to identity so out-of-range indices + // produce correct (neutral) transforms instead of GPU garbage + if (instance.boneMapped[frameIndex]) { + auto* dst = static_cast(instance.boneMapped[frameIndex]); + for (int j = 0; j < MAX_BONES; j++) dst[j] = glm::mat4(1.0f); + } + VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; ai.descriptorPool = boneDescPool_; ai.descriptorSetCount = 1; @@ -2147,6 +2154,13 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, &instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo); instance.boneMapped[frameIndex] = allocInfo.pMappedData; + // Initialize all bone slots to identity so out-of-range indices + // produce correct (neutral) transforms instead of GPU garbage + if (instance.boneMapped[frameIndex]) { + auto* dst = static_cast(instance.boneMapped[frameIndex]); + for (int j = 0; j < MAX_BONES; j++) dst[j] = glm::mat4(1.0f); + } + // Allocate descriptor set for bone SSBO VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; ai.descriptorPool = boneDescPool_; @@ -2787,6 +2801,13 @@ void CharacterRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& light &inst.boneBuffer[frameIndex], &inst.boneAlloc[frameIndex], &ai); inst.boneMapped[frameIndex] = ai.pMappedData; + // Initialize all bone slots to identity so out-of-range indices + // produce correct (neutral) transforms instead of GPU garbage + if (inst.boneMapped[frameIndex]) { + auto* dst = static_cast(inst.boneMapped[frameIndex]); + for (int j = 0; j < MAX_BONES; j++) dst[j] = glm::mat4(1.0f); + } + VkDescriptorSetAllocateInfo dsAI{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; dsAI.descriptorPool = boneDescPool_; dsAI.descriptorSetCount = 1; From c85d023329d215152f3e813b80011653cd627775 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 4 Apr 2026 00:02:05 -0700 Subject: [PATCH 4/8] fix(rendering): correct alpha test on opaque batches and hair transparency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - alphaTestPipeline_ uses blendDisabled() so surviving pixels are fully opaque (was blendAlpha, causing hair to blend with background) - Remove alphaCutout from alphaTest condition — opaque materials like capes no longer alpha-test just because their texture has an alpha channel - Two-pass batch rendering: opaque (blendMode 0) draws first to establish depth, then alpha-key/blend draws on top --- assets/shaders/character.frag.spv | Bin 16048 -> 17632 bytes src/rendering/character_renderer.cpp | 91 +++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/assets/shaders/character.frag.spv b/assets/shaders/character.frag.spv index 99c68edf24ddec7f733c836d9232807899737158..f970ac7cfaa372c9f83b94572e6aea8b0bcdf821 100644 GIT binary patch literal 17632 zcmZ{r2bf(|)rJpDCZS8O0tpC0K%{p_2qhqa1QWUvZ)WZ!19ys3LTDO5KrA2%La+b< zk)k4?f{4I?0(PwUTd|@dVnanm|Mxxje8XX$|KHAf_P4&Z_S$RLbIv7pth()#qFAL^ zvsk-$WYwZ9>lLfQ6mXNvdiI?8b9NaX>E30}y>``MouZ@k)8`e%TE!%4rLWQ3uimU4o^$xZuED{D!y{+4nhRUK z{mq5ly+h5ek=~_E{Z^~__4aqQM!V|{op{u9=`2% z&)qa)2Z1Kme7x<&?ub|`(< zEViH?Yz#G8t;VvsU1T)eJJ5ejV{nJ1J-e5E^z3Fg#OWwDr5;@>?kqL~FYRrfd06kz zg88Mrv)G)vl@3a7Qn4lVaG7~K#x(S0rLCjbnfj~>8>1V8v0Z}&&1Hu=1RzXDaguhN zJ6YFL!6VIO&DLSf{^8ycL%>fedZ|z79vC^c)H{n2>PG+KR7GpWwAPIwgtWYHlv@_%6=w;hX@06m)v@AFF${iiw)qGHitMa?PLjy@1*jM3*(vczFXW{c*M{^{|LvWw|{Zl zZ>s(D_BUGPTy+%Npf9Lmct0-SfUyR|>?pQJpHk6Rr|EfIVSzBJ*WEd$Gi`m z?%g}U&F;RG_nfxew(~jIO-}t#uUEo&JI`uq=VGh#p4XB;)I+ZSlrml?e4}qsFLO{? zw6*i{teV%nf#LD^p3k0v#Wmk{zMf6^d0fSP?Q_CiR6xY({6O*#o ze%u?2&|iKxPbzxgyyyF>HcKYxr@u_^Ec(&eG~;<1cK~jbi+N;ps5%Fo<#Tah^rf-8 z&X?oY8ttP+M{zaS%g{V;0edI3^X@F}L+@)0W*g$uSv&!Eri)&&azBLQLhG*{U1h9i z^=l;7q~dw>m(Ite;zjfp&)&>Y2Mgcb+tU+$HS}tZ%6P9pU(#sxxa{V-A-eZ${I^mc ze~wKqUWM*h8m;lGg?63A&hU$R8^g_RSED`d-QilpFX;!OyEhYiplZ|BJBwxLqwUAN z*LRuE1-7ZJ-*xESz2#%0?DuAL_D}VsD~j2S!RP2!2&UplynIeswryAA98*49M+U<9s&B9a!Uh zCdtjLapo#_7&4pvm}j-~<8x>m#PLU;R@1%X>vnbR&W--QCpy1r?;UCm>s5B|N9`Sz z?z^OVd!8qi_XjE~)1DLWCyV)7oTqTkReO%x?R2fTQ?HLbci_Z!=3Z~7ZoB13`jdO9 zT{{QIpx;AC{Id2>)VQquQ#CGY|Hm4awRiFoFKfSUjmz3kg>(IKhdck?f!h$%5`9`* zEp_ij=i8#)`%8bSalP*EJAU2Yb6AhBuD|U!U>BgG`z6rc6B~mZ>r`Yj-rXIey8cQ^ z@D}(xX8op;pJTPiZG!F|x4rSTySMa}pT;V>x3$|&o4L66{WoORrc)=b=eH=9 z?`PZ@psA`9?dmfT^N;^rFel*<^Y=ZZ9kVD7sC4fPh%?^&f6ul^x&)AADFYgM+?sy{VS(iE|)~1--+0^E340BWWzR6hhKNm5U{^wOT zd2_M80P&1PzX9x8>%WrPSoW9PZv_{{%$e@J&w;e9B2_EV-Fx@L>u0>rS3Yv)tNuX6 zxtGSk#!Egkz>gsA&E)qO*b@Eg;F*`{$ALnB$PNMgcTtwUdCywt#d|f}=IX-poGr8n*GP-%_e@bN=L(hD9zwuXS z?*ll#vC8iHdEZ%*|JiMQhW;0UotyZpUtDoJ6G{FObkBolUlAv_OR*(ab9^uQB^$38 zKS%FFUwQb(Gt&2!=-*xT+kMTl}s8U;gm(2Ss0je(j2v;@*h9#*^C~ zm^^MncihqMM)zJ!`v=h}Gu7R*kGJ*GuKpEtskDC@rM{>CfbKKRIeQN5n9V=&H1dly~C&0bnSk-#jd{n1l{km*gY?Pqt$i4)1upc!35p!j@a$Lr>5)gH%IK| z?>9$u_pjd^(Y5;>QP=&Jh;BT;C+fQ26LsBhiRi}jJ0iO6pPisTKSB2!BL24b8zQ>x z{f4OPenUjp-*1Sz?l(kT_ZuR*@%(m(?tJ*&5M8_94bhG7cSCgTej`NJ?l(eR_ZvZd zKlY1fc76I-pZZ{SmwQS5AZ+gMjlmlsYgOlU6EJ_p;g!uc?l*bcOo!hb>>YherEdY2 z`#Zi{Qd|CsP5zaLoO@cFezyV}_pMbPTZ84&pZW{5Nn2~%`c5+q?DJn=_r7}Kz6$K~ zBIDT(%wIVkZMN}z$fwN?V4o>zvm=84L$@ce>*qkTzCDo zFN8P^xsaIZZvxv!KIf+a=C3?I+RephhmzUbMbvWodj~8A z=MLxrt0z`3Sby6sp;p)C+MWS+Ewr6Zt-c>?Xv`LP3u@!pPP=|nskOQGSp#ch`W`(9 z9;kHVycukp6B)}G`Vd$jdE@m{%VmELgWZFEI~mjcJpw16{XGhn%lwKPaN<0aPk@N1z@?1_if&4WUATL&ZYEPf<0NdxK)S2UV zg5}fayTEek^D=Py)b?&f&OXJ)8N=Z{VCOhz=nAl$VzKfJUCElb2j35&&sB(dUrsHT z^|=--w-#qo{||uqD|SP!0jxmuvF+8=+SB%fV7aXShrkop|HE+d7ghOO2bQyK+FcJ$ zJNr@BFZLU1_Vm3HPConUCa~O%h zg<3spd^^}RzKvRc&(X)g@>%1LgXLV~qHYcAip8}iH-PGyt(_r~LUp@nt%kw31taJYE0o#vrp}&2Y$7iW6=AqBMh@5$d z?X&#;2m76`?j3bM`cx!$)aSvUL*%oM9{|fM7AyDhWsJ?WcnCtD2a!DI$G}UmMe>|@ z7+!zvuHAjqa+#+`z!T@`3vlx0Y=2(_+iq@kUq1?#Q!G~I=J*~%(Qb_F*)M@@|2UF& zn=ga)wV0Paa%uBbux-AAq|MjBw&AaAqmNwj{5se+c^-ZP%wPFD)Rytf+h%9#Zz8)? zx?}wo*cf>pejBWheByow%wHK-yK$eOmN)KnY~KZMhU8uSdtf>D&iAP;?j89j5jppc zIOF*ruyOOQ{sXXF`cwD3rmeMYy}Nz{UJuDS{}?=Roqqx+?_ROZPr>dV`L$SUxu?MV z6+0lt)W`mRNUbfo{tRrc(SHv1zDPd50NW<}^_O6|hP-+|@w4EjCT;u>jt2KhJQ8i|el z9Q7X&WB-9VWB*gdXYT)8`CNcs=HoAL@;TFg1=~*EF+EEyr~er3{s!(xa!33fET>qk zyd#psKOl^e`F;VMyTST=@^`-f39n6mpG$K80vlhOxjauT=lnV^&Vh5`_)ekz4{{Qc z`5hz1e=9!unw*?su`=K6?NwBetn;d1ef%EQW*#p>$Y+d`z^;AtPIP_pyq}D&k9_v? z6tHpK&)WUn&DiB{59k@w>R`uoaut6Jzct|GGp04ca*D;c!sN3S%4v|fi`PcCjeN$r z4%l{%Q@gqNyN7(@tP6Hr(bq%QCu_JqSReVs-2j}p+Kub)C-RB25x72o8>8!!`P&4n zk9_8DQ?PNJKkd$6Y@1iM&EfR(w>#%^CHfX%&q?OfyXBQ|@_8TH5^M~4^Y!-~IkCU( zxNi^PR$58DHM;9_1Nd=j$7IZ@@Y?c>p9YpY8z1l9ZNV1LwYF^#Irp?UbF)3TJ~yvM zukWoL(Djjb&h2|gFx?QD`2pu%oenm4?HTJ%;O!7?j?dq!{0^4-{~bFJiN!|AKd^>GYGg6+rsow+^=U0;j2=_8jmuLIkr{@!#nx^2wMHu}h= z%^Yz0ag6S-W8mZ;AeZUr$AaZN@1CQ1V2kHS+gwD>b0kjeSRzaCEh{wnr-u$=qB zXH4Q)=RJG@*nT{t`rC(joIq_c4}FeDUNXj$;N&yLlfiP1(L55zI(eJ| zwjXoT-#*Oa4b&F%(C1V{&OF4)elSdEOe#}jO`!J6s)E4JMpT&rrd5Duo zFL+`er^Cr-KF$EkWj+$eI(f9f_G51P+lP7dQzs97`Vcwu5a&$48JzpoJ75UCe%}wH z>m#3geFSVy+H$Xtf(H?8c`lv__U!r0)V>t)91c)xOAcp&or~NnXM^Pwi!m~9=fF*z zw{zj-v!3UH?IZcV1+0&J?vuBI?Z*;sdBxdZ?#lDw^wnk@^S^*ve!uGZcOlq+PrZy< zn?74n%X^+O@7BrjZD4)!T)zmM=el=T{4R#qmfvPB0h^!S39f;0^huxE?Q>gd*V@{- z^L_R0VEf2B`xxyng_Fg-lIMHC=6M0}((e^;^2zhPU^&H-e)C@NK6rhMofubw>$zWrE~nIEB=`5j8%Mw7 zdNp`MB)MJ#o|x+jIQisyEm%&e_mf;d0I!d+&u6VXhaUvnzqa^&2rTdX#qYyl=UAJ2 zz;6~g_dvc`tdsBc*vvQYqhs`S1Dt&FT?v*`EVz&F0du<%zW#oH6S{5W^G^5?ukAlr*22$Vix4>)5oZkwzZO-#;V148h>vr(}73*X0+T!=|nxF3%>Ejdd z+S11zVB5<3e&oBsC&BiuE$8u0u;bjRvir>NTsq#Hv1!lT-vu_u+p2!Y@VgsMK6C#m zu$*Esw$MDC`%j~|#>x9L==PKQ>K?FMB)>1*3ui3v4f`;TzS%R{)5mAQ_F>)|<9{Ez z=cfKn`8haweOBUgKiKb_`tO{dhm+4U;sLPjPpR~& z)YB&DJ6F2>>g)Q8_1gw1?_%omE~fq)>$n~D_J~FMtEshHy9WMF<1^D3ca!^cYWJ6% z|INmC3-wna#+k!vwNAgggN>1X_W;YKUv>A8v1}{0 z?P}C}BF<;_(OzKZG|%sRoU>~y)=S*IE`aTluUNUF>jzZ-0&pwlL4P8&y!8&vGIhV1ptnU+* z?hH^kcYOFyGv`^nxq6KrlfA=>P3DPoSU zy>`cX9`#$0bC7cp<9Q#w6_MB9y`di4`Cz$As=Mn#u-pZRXTtVxL+pP#qFtM@mQ!m> ztc$^Nd1hP!mW%!EVEc;wQm~x%$<*&a98bpmPO$y%f*4DGefq0$y$h_5xwxm*)7QJf z%aQbT8CWj%%fYsZ{XJm0RI)*93wK-}MX?tif2HJ4G%rJ_?=~;}$si zywBbSmb(=(#?91n_GxanBgRb*9|P;-fBSKNeH@X?_&!l_@@aDi*f#O`Bv?*y-Rd?$Z^Q{yu{)r)19W0UJk~ecVMYmp(oV-n6pa zORcW$e(DF22P)m%A42rWzYBO6tj|M8-krVxmhV8a|Gx-!KWa0F`>5r_k5WH|_&)MD zwfC8|@4#QC{t9Bz{#9!2*5>G(d<`+io$ROS)ZeJMuTy)Ms(%SFmTkpp`vlmw(Z30H ze($JaeG9CQygrkuzm0f?qJIbMc=CJXcftC|-(1D{9@tpgyz9SDZ81M>Pa<;WCw8pK zSKhvy{~sW(U-Tb>T@UA3`;QPg{hV8Ea{9ZTKSt{7`4e<~vYtP!eB`s9Pl4s@>-jTu z*YhbP>nWEx`8oKin*Iy0b9O#?I)}eRHaW-bdOHu+#&eB-g}6?6r~P%s?MR(H zBv;=<&R^!~H(BX9d>sO9u` z-v5Zy=lxIU_K|u2bLAtSdH)MozCQ1NMR(q{C70*H^<4gjuAh8-{tk9MGXMVo>nCr% z&r!?8|DRyT5dB{h^natn}c?5vB$=xzLN2tXOjR$Y|f;#~yd20Xx^aia29-sZFg-LMsFH=Ae$f zk%y5j$YaRwkb?;xBLA+mHI`06UG!9|-HUHhZHL;TC94)KTefJ)>8pB&hE|P?ZfZ4F zwVH#CRejCjM(=2IeZ#mZmAK|$Z)>ct8qiHdwU%yrnnEAF?SY}O(Z-T?-x>AZ(f05m zaNUe>cl1zwxZY~jH!kZX+eouLxU@cW$ol@H=dABPW={W6a|j`}i`r|ovGwBa+HT&Eb_R3V(NP8oHGZ3a+cRC%SKAr;YQnvaxYNyFJ_oEOPFrf0KA}?aAQL#>Pf# zVPkNlISMO3sWuyZUSE6k?85G@orJCru5C3sLn&hB!41?mT1t^?K6=5r`tZ{FXk)lp zZ^=)_Bk$85s`uK(Wd56v?ON7bL+k1*8zZCO$;_L)>(kqAwTI7YZ0c#%doM10ojmS> zvB6fe8t>dU2AU%ytX5CE{e)cI72nF{pv~xKO3}|`@W@c3cdS((UOYIubpld{`=v2h zU(;%w-5wsOw^*XBwY$C@-|}Tki=3S?Z4Vx553J~|x7az`#hU*<0ChzEyimS-K2H$eTwjMOrds%<~NW+P=&r8vL zjn-)W-1CZ<&Ddow7-Oc?uGq#Ey%wWvjTQ4_|5xK5>rC;PnFc7@cGa#&Hw!$eb{l$V zxUt^E)^j^{y`R0dbu8VryWrL|`rE^e4rz_|!fhxLcWS;Ld$=(&)?yImc>ue=-j}Ui zaNmV{;<=h!`##+I#xTdFQ!FO2lZtyMjAv>K{My#4GltuPqa2s!;Mz{yqwxLB!FsEh ztFGFwuveBjyt7tvz*qxvcGZ51-Bb9wi@U6+Ia+$n)m8f=zRua~s{O^bJ!coS?8l)3 zuWAmi$L4%vmv_eb?cr8=2Xxn-0>8X5e8zBnpxmb&j#Ndw+H-0gKg@gkBJaaCo4|c3 zKM}s=wq3!&ZgA>{o7_6v+IdzBKNnV&_q-PT0uQ>*{^&4O?BsT+VQSJg%kh3?^l*{kS(~ zV?XiEom4vl&ii|yY;)8&`!~x@#|o+u#D1tlS&mxV#3-*H4k_1IE>pYf|kF>}_XZ zQtfW+7Vo~y$9>p+&Hnz_U%)QsqsaFF_PTnj-zB!zZ(@6=CjMdVV$M5b`3ZJw#M$lG zzrl8IC4L5*YE84Qn|$51!?4Fz6bZb3i$2bU>uVO*d=YaYHtSu!nL6!!@3+Uc?W0B1 z=DY9!1Xt}$TFyuN%<8DE93>T;`!sR3G0wde&iyUt{T$9Va=y32xv%A>S2*{x+@TfD z{VaE6g|k1oV=J8dSnk9M=lezOlnUp2MQ(nDvsSr<$RhUHe6(Y8etOU|5yu~UM#c7? zuKKn0r@h}_&Trap0b7ILE#~u%)8AFvzboy|JWnd_04P?bGk4xGKGy5we1>zbI&<7< zXK1yZc6IEz$Hup__G&wA+igbDpWL=~of;g2arYpJ%i2F!;j;FRRJg3Y<4@bHy^|L% zYrkuS%i2$ebNzA;JHOszGs)=_dq&4BZ0|Pb-$%c9knyOkb~WC!SIwubKk=Ti^mc7qo57 zW2}DT_CilU^vx-4?*X~l*tS0q+566~of!Kd>>VC@`R?twlScoq4EoVttCaPD5MIe8e$o z_aJiCX#4`iIWXR{uTMVy#bEbu##9IU#NPzF_iTR=+V=LJ_)EZ4Fn?d7t_10uAXRx{ z@6W1U1#g`BUR1`&S-m&o#IXac`u)8^AuXUkjdJv2Ov}f9my&9&^lx z=i2`dN}qFW|Jo0ioc7uIB#^98Zt_pO${tG5yb^ zL3>)))_rgY_TG1%bE4<&>0sx^6rD7t+J-%Mvxzaz{D-1_?|2{mZLqax9kspl z<&I|*hPvl`+ev`rM7pnzdyxxZ$DbG_5W&|?e9Q|*Z;?g?HTg-p7^!> z-6yv3{_az?{g#hy|9;2EHs0^}*w*iNd~El$-|?~a`@LSZ{brACKEKB)}-+lQ##5-;-yT!eseJVcp{|^!G+^OYxegsT+wbK!OwsHSFg4o8p z_{Yc=!yf1^#>s%`7L^p}X= z62`ihwUhVP;Kz`R=QrStN1tun@A7H$d$8|~wD|)#ZS>j3Ga;Wge*#;-KF^1Ce18So zN9@0WZ6EvZVB4I-shy1e2VxtaaQ_6$Sx2~kl^n~vmHRi?{62~M4_L0+Rzt4Zb^@HW z_@wQ`lB>4uDmmLOFZanLIP11f>k8iuZ%x)m1eOEh&r-F9?%fF8p_vDUXIlo8ThdY6N+=u$6B699SvAO-8+68QG-}~lv zJ^b#M|5%xO8d%P60rzC`s2}H~+5hff`*EKdZy(9G2iS9WSD9}wY&pLrZZ5gK!RF9s z{rjSQtY6HxH3>e~8WPyW%vk{apvn9Q5c%vL6?Mi61O(?Kj#`_Gg<6Vr-c+UdMXS_?mavAU0U?2O@w-k}HAF+9y>*Zj_ zoBezaSWfX#xu0Ln{J0;^gD~b?#I;$5mdji{8!R`K^I`mR!2HyXLj1sbYsvu)a~2B)3^H%A2d)EaXN@lgyT%uxjdxGC!17t+0kE8FoVo|Gt9NA^ zTdsOn4q@9?pJN$D`#2VTmmqSEMQlyp^`l^G8bPPOF|d5@`t@MB-4Mr}JnEc<4Pg6m zE{wMi>)3?$u?}N4B68LtwomVi=Yy|CYkTj!0K0l$yb#+M`RwEW0?R8tD);gAjLo&! z3}MV=NZz@ZgXiFjJ%y@D_Bwn_r79 z=iYfe+Q+>k|2jm@y(3O7H-gQZ@8+Apa_LXoeVevw+j@7s5$xH@I==~f{5roGPTsv@ zo40`7Kk`#qYq__A`KcX(n9~^he*;=yYW;7pwZ?uM*!v>&ydB#%*{|;a%SF6%-D~fJ zG2U||@QCV;H_NO6?$vWQ)Hpbsg^;yT=X!(rsGho*~ z_GiJyQnZJj?#>i*>{s+uYF@O5&;P}2<`u2k}?mLL{>HGM5V9!bB)4S#SaPs*c z`T^J+^49xJw4C^1wEOlnZYA^m5Zm=>g0Dq8CUZUl)|YqukHB&Z5bxd}qkTNr`nDi) z?rE|0{uKQ)#Qcj9b02{IIU=9=`vutb&HL$5u$)i0UzVJ7xL=i=du2KItmox1INKYq z-?91*()Vj@`!>e8_z7ChI`sb*ESG)xJFuMMqp~l@=I;r=hcLeSd%_>E<#S*B5iA$E zmbTsve}Xf=erx#+S}t?)I9M*<3V#O6DL$%vEBpmRziYFc{C+?DwY25j-*SHg8>=s4 z_&eBsT>H%HKfuQNSer3&Y4cC8ZLUMo=3ijj@KdxgMlNms4NgCf(LMGbIQdUfpYM7n zSFR3pk4<#4dE8_ACLnU|F>!Ktfyd{b1SkKAGIuvv&i&y1n>^}#qfQ3f&lYMh-af2j ziad{X7_%KBXC30yu{}8Rk}>W8C!aCy2$pk<){#8w)G-xoKh|cveOSlNXdmk^W+z0> zI>f1C7x4Hxc7>DAeC!66%X}n{I(19~+mE#wZy(mN2Rd~avpXVZ9pconC)hd;BRKQ1 z7o2?NV{for<|BF3sbe3o{aBmv_F*0Sp;L!3`yz7IAx<6pgU8qLBslrZ$CJTwnUCaA zr;Y=__G4|v+lO^bN2d;Bo`T3(hd5_?7I+==5&NmwcVp*0aUixa^7)oL2y9LIa=#x8 zo{8woJL73!QWftL{f8i)UH|T_&p!RPlfx1J-6S;}0d{V3&m4&@r}&sN^LSJlTV2ni z;pENdnLY+=pQ-;?urczvD~yLfMtFLf@0lTW{Ag5?yS^qX(-#qh?MJ2{>KuGW4QwwzMUk=mEQo5#4+ zx)kg;O=>+Gdwi|S;N(;5a&aluep2f>_>3|4<*b$O=x2iMUti)@faRUP#H|E7$NJm@ zcTk(0d*JNoMTv27#ox2tF3Ibgb| z^&tAK?Oe2c_WX0f)-naD?)m4z>&u+40^2s{dNtS>`Q)mD|F2wY;PoZ0w-V=fNBZc4 z*OxvTVB5<3-Q<}1!S<~$=W#9Aan3INb+qTw@m`2ef98H2*cvY?`}O=a;p8*-7lGvz zA9D*c_ZMTh#;Lo7Z9loU2EcNW{7yayXD;sz`!J8O*)#gn$0cCi279Qi%F58}UPJW$&E zmYn^YOFOaqgN=Q#jC~SV&RF}=PV50-+sA%NY3DsX9orcB#LNJjC-%(JPMcZS#>nTL z{Zz2?>)(W?BgR;doaF8keh+xhqO1CfIeWB$#2>dSmPNB-N4&o0Ob%NF^&6Ld zP6pdg_SPw2Ym?Jwf4NItd;N}eA=qrAzh~+k#PMX@&jj1A>trtD zjTtP*wE}F6wYaCX)7MIHwU%?S<>EgNY@5__K3FcbJPSMx$$RetuyZb-`koC=edafZ zajEY)U}I9BcKUxVxZ3~ou;ubjT?Mv3dE0wN<;1Jeb;Mj#(Y?rSNPb`FD>-=|Ys|PB z;LhIa{0+Rn^m|9?b8qGE6>Guz73=cuT8EflU*2&SgWYpYB>l9&^7(tn0NA$jj$sX2 zE;-uZ@i~Uz&0}1~r9U-2ADo&zOD{mG>;A%$OMm}`EvID8F9VxLpM6}4mP;R( zgLf@`o6*|(u0mgfTwU7Mel4;Y$-n8p1Z<3azA0V`mhVEc|6c}nKkBoFE6{S{>(MtL zep|mB?R}>HGc~*t{VK#q|EtmZ)z;{oyaqAH2iarCpkG&VuSI*8YQF+8muk~WH)GKe_&i~sG*Dv;MGWK`4reV`NZ4< zc0Drxp9ULu4`RJ{q2&^PAJ{R({>(W0v)HytpPvI8BcDD#50*a=$$RMwU~}uU-+R$= z_TxHy5!qa^zXW!kQpcCUw#j|>6|kI=`}M2f3CQ7y`OIOQ{oRk&pL|~f&qnf2{yNyU zM bitanAccum(vertCount, glm::vec3(0.0f)); // Copy base vertex data + size_t numBones = model.bones.size(); + int outOfRangeCount = 0, ge128Count = 0, nonzeroWeightOOR = 0; for (size_t i = 0; i < vertCount; i++) { const auto& src = model.vertices[i]; auto& dst = gpuVerts[i]; @@ -1490,6 +1492,22 @@ void CharacterRenderer::setupModelBuffers(M2ModelGPU& gpuModel) { dst.normal = src.normal; dst.texCoords = src.texCoords[0]; // Use first UV set dst.tangent = glm::vec4(1.0f, 0.0f, 0.0f, 1.0f); // default + + // Diagnostic: check bone indices + for (int j = 0; j < 4; j++) { + uint8_t bi = src.boneIndices[j]; + uint8_t bw = src.boneWeights[j]; + if (bi >= numBones) { + outOfRangeCount++; + if (bw > 0) nonzeroWeightOOR++; + } + if (bi >= 128) ge128Count++; + } + } + if (outOfRangeCount > 0 || ge128Count > 0) { + LOG_WARNING("VERTEX DIAG: model bones=", numBones, " verts=", vertCount, + " outOfRange=", outOfRangeCount, " (nonzeroWeight=", nonzeroWeightOOR, ")", + " ge128=", ge128Count); } // Accumulate tangent/bitangent per triangle @@ -1959,6 +1977,19 @@ void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) { const auto& gsd = model.globalSequenceDurations; + // One-time diagnostic: check bone ordering (parents must precede children) + static bool checkedBoneOrder = false; + if (!checkedBoneOrder) { + checkedBoneOrder = true; + for (size_t i = 0; i < numBones; i++) { + const auto& bone = model.bones[i]; + if (bone.parentBone >= 0 && static_cast(bone.parentBone) >= i) { + LOG_WARNING("Bone ", i, " references parent ", bone.parentBone, + " which comes AFTER it — will use stale matrix!"); + } + } + } + for (size_t i = 0; i < numBones; i++) { const auto& bone = model.bones[i]; @@ -1973,6 +2004,26 @@ void CharacterRenderer::calculateBoneMatrices(CharacterInstance& instance) { } else { instance.boneMatrices[i] = localTransform; } + + // Diagnostic: detect bones with extreme translation + float tx = std::abs(instance.boneMatrices[i][3][0]); + float ty = std::abs(instance.boneMatrices[i][3][1]); + float tz = std::abs(instance.boneMatrices[i][3][2]); + static int diagFrames = 0; + if (diagFrames < 3 && (tx > 50.0f || ty > 50.0f || tz > 50.0f)) { + LOG_WARNING("BONE DIAG: bone[", i, "] keyBone=", bone.keyBoneId, + " flags=0x", std::hex, bone.flags, std::dec, + " parent=", bone.parentBone, + " pivot=(", bone.pivot.x, ",", bone.pivot.y, ",", bone.pivot.z, ")", + " mat_t=(", instance.boneMatrices[i][3][0], ",", + instance.boneMatrices[i][3][1], ",", instance.boneMatrices[i][3][2], ")", + " local_t=(", localTransform[3][0], ",", localTransform[3][1], ",", + localTransform[3][2], ")", + " animTime=", instance.animationTime, + " gsTime=", instance.globalSequenceTime, + " seqIdx=", instance.currentSequenceIndex); + } + if (i == numBones - 1) diagFrames++; } } @@ -2297,8 +2348,39 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, return whiteTexture_.get(); }; - // Draw batches (submeshes) with per-batch textures + // One-time batch diagnostic for first character instance + static bool batchDiagDone = false; + if (!batchDiagDone && !instance.hasOverrideModelMatrix) { + batchDiagDone = true; + for (const auto& b : gpuModel.data.batches) { + uint16_t bm = 0, mf = 0; + if (b.materialIndex < gpuModel.data.materials.size()) { + bm = gpuModel.data.materials[b.materialIndex].blendMode; + mf = gpuModel.data.materials[b.materialIndex].flags; + } + uint16_t bg = static_cast(b.submeshId / 100); + bool active = instance.activeGeosets.empty() || + instance.activeGeosets.count(b.submeshId); + LOG_WARNING("BATCH DIAG: submesh=", b.submeshId, " group=", bg, + " blend=", bm, " matFlags=0x", std::hex, mf, std::dec, + " texIdx=", b.textureIndex, " matIdx=", b.materialIndex, + " active=", active); + } + } + + // Draw batches in two passes: opaque (blendMode 0) first, then + // alpha-key/blend after. This ensures capes and body parts write + // depth before hair overlay, preventing hair→cape z-fight. + auto getBatchBlendMode = [&](const pipeline::M2Batch& b) -> uint16_t { + if (b.materialIndex < gpuModel.data.materials.size()) + return gpuModel.data.materials[b.materialIndex].blendMode; + return 0; + }; + for (int pass = 0; pass < 2; pass++) { for (const auto& batch : gpuModel.data.batches) { + uint16_t bm = getBatchBlendMode(batch); + if (pass == 0 && bm != 0) continue; // pass 0: opaque only + if (pass == 1 && bm == 0) continue; // pass 1: non-opaque only if (applyGeosetFilter) { if (instance.activeGeosets.find(batch.submeshId) == instance.activeGeosets.end()) { continue; @@ -2449,7 +2531,7 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, // Create per-batch material UBO CharMaterialUBO matData{}; matData.opacity = instance.opacity; - matData.alphaTest = (blendNeedsCutout || alphaCutout) ? 1 : 0; + matData.alphaTest = blendNeedsCutout ? 1 : 0; matData.colorKeyBlack = (blendNeedsCutout || colorKeyBlack) ? 1 : 0; matData.unlit = unlit ? 1 : 0; matData.emissiveBoost = emissiveBoost; @@ -2509,6 +2591,7 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0); } + } // end pass loop } else { // Draw entire model with first texture VkTexture* texPtr = !gpuModel.textureIds.empty() ? gpuModel.textureIds[0] : whiteTexture_.get(); @@ -3425,7 +3508,7 @@ void CharacterRenderer::recreatePipelines() { " pipelineLayout=", (void*)pipelineLayout_); opaquePipeline_ = buildCharPipeline(PipelineBuilder::blendDisabled(), true); - alphaTestPipeline_ = buildCharPipeline(PipelineBuilder::blendAlpha(), true); + alphaTestPipeline_ = buildCharPipeline(PipelineBuilder::blendDisabled(), true); alphaPipeline_ = buildCharPipeline(PipelineBuilder::blendAlpha(), false); additivePipeline_ = buildCharPipeline(PipelineBuilder::blendAdditive(), false); From 5538655383e8c75429b76d1691a33a45635caa2e Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 4 Apr 2026 00:03:05 -0700 Subject: [PATCH 5/8] fix(chat): resolve /r reply target when name arrives after whisper Whisper sender name may not be in the player name cache when the packet arrives. Store the sender GUID and lazily resolve the name from the cache in getLastWhisperSender(). Also backfill lastWhisperSender_ when the SMSG_NAME_QUERY_RESPONSE arrives. --- assets/shaders/character.frag.spv | Bin 17632 -> 17488 bytes include/game/game_handler.hpp | 12 +++++++++++- src/game/chat_handler.cpp | 23 +++++++++++++++-------- src/game/entity_controller.cpp | 5 +++++ 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/assets/shaders/character.frag.spv b/assets/shaders/character.frag.spv index f970ac7cfaa372c9f83b94572e6aea8b0bcdf821..43789272f26cf2430fef49603a82af4bbb54dbe4 100644 GIT binary patch literal 17488 zcmZ{r2bf(|)rJpDCZS6WO&|e92#E9!384fekYGYr;?2yRWZ+J5N(fB?NKuiZfPx|r zkg6h}fQUd)!7f((tyob(v49E|{`Woie8XX$|KHAf_P4&Z_S$RLbIv7pti08vqFAX| zy;!q&aOI*b>l7=)6mS#Ede-cDvv(LC>E2=2-FDPrt)ip!)8{qC8pQ-^rLWQ3uiplfh&!SKjwt>%JO zZ+~+^ckfWMYovEcQ@>See!cx&ty~=o;OIzm_CWU`jjoY_p>0av z)r(E32OC3;R;#geP8S&s_YU+Q(HPujNzcw@A3ZzS4RJb(ji^VLh&zjo!Ap9ZrykTh zG=E-c?<_Wywx>R=!p7*vU~Jc5esk%84gm<$Q5>fo z=T6r3MDR#+X|r`uvwygE#1QZkieBntx(7y%EcMP}gu2ncsMTx_x%9E`zQ$6!D`VOC zz{QQBBN`*kq25MIej=7KD0pD7(PbAC`JWuiT{K#QiyQNs!^WS;{K~r*x&~SULx(p{ zo7rk~ol@G`aolmE{jJ`*zw6xW>m43uXUrTJcvY;zs=}pm9n3S;Nih$*J!IT zbXfn$*aRdG_hz%dv9Q%Va$u;h(PAmba$jZ4Hh0buWz6=NRs|0Z^v&yPwAiJujA^wt ze+1K2?m)KFSRSi`7dLwsEgqqVV;W3df0vDu=NjPtWH4`JsM)`0WHFp;c?Pw`HCsmA z*V|vMM`v+9djDwOScmS1E8$v=)0#sEH3tpip1vBcYjlXXBeQF!GX6E#<|4MenVR*U z)6+BDbRzBZZtCu4You}XF{RJ_=v6EjeO4(Rc!eu_El1fJE$7GnAHqJ`p5j9@4N$c0 zC?2DBFWT4B)PqCKB?dO1XV4ox?6tA6bQaISEo}A-3^m)N?OudCwG7;jwVs;c;qt7gDo#nkbvv;Jj8mpt2hP{0@JBl4_ zJM+jgm!sNvL2v&O^ohlM^y+Roc3`Mg-2t7&G4OMnLx&7C`l@}}<{Xp$)}B-S_+#D& zPWJ8{;AVGU%6m>*Zrgbr>?WsvsMjlDtet1Iv~#i5dCzOfALt?1e^ME*6TZ>6u$MWg zEZW+6c~;G9?!fR^e9vdkz@nOOJ73Qx{9LZ$zV(@xE3B|MM zubhPm#q;Pbp0k;cm(aU=dwQb3j9$%08E+z+U~!|>;}Vge97@!vpw?3p#O*c9Ed zG+JZV2<72-Hinzs&U|~^+rqVmU(t6#cONEpPt~TaGpFdI?MJ&;b(zm;wyCYv zRp{Nl9D$u5t$`=*sETZW`RxmVh? zb8rm$J%Yq9YyU)z%i2Fx;9g_dOUUQ@%M}+-g@i;RP25UwD-hhkYk;KY|Oj6V^p`jk`lZr z{*GC{Ddgu^Epi*8yT@&BeC_TnedVXJitcUgw$o-V?mao%uU}#FwEYIuwwG6*Tx20#$8Ue?-luZfH>$9EMZ3E9 zD}SY~zxQih_kLC12>V08Dry7?Z8KCte2dqfUE^$>(@<^U(i<%C-VM^X2`dv9s_V^yLRnF8#H?7ybK7f4|@MWaZpm zjBXqKw56X*!IwVz!hX>&L%-(gSK?lSzS@&p?VCJqKsWd3x1)QvrTx9=lo{&oyNBC) zX;=3*7^$@X8A^R${s!IWlJoRiuwyp=#9v#2PnYPgZR>mMKMkzvy2ic;Vqo2lv3=2p zzTtgG?diIBK7DRjZ2u{+xvW66tNR?0+nua*bq`tO{t08Q`g;HRP3PRGyEpxwi|#!= zrKW55`z&_#t;gxpYPx5_@38o*`z;pT_VdT-ep|$D|2;Kbf4?hYH~*7sx_i{`in{%R zak}3Su^Z3th`R1~L|ykABD($g{Se*uemm6lXUFN!*L2(a?NImk+o7)e?GRmmza8qj z-wt)%Z-?mS<2OTe=fm%X=-T~Wh;Dqp7ouzT+aS7jzYXfT-v;V?v0ps1>(a-%)cdo$ z+)L{FVRQda25*3@QJv=v!Tc2mS2o+Y-{fsG4So}__wcQizA0GlANX!YZFvcs{A&?8 z_p~_uZVoo?*;O7}faTJk`irzlTWj0;-ZB;JGhSc!zIx)m4(u}_<9R)pzj8dc3|_@=6O<&?M-0&h`uA(_R)6&+vX@vx$Cnt*fy4M zyHuQXxLqrb-NVPxgT`Z}T*- z@cV+z$sFu6{C;5jGrsZUeGc!B7|VA0IG^4}@&_u~oXfyx0)E4vCHV2V2f3dNhzqcUH*ZfMK zS8>kcQDAk)dII%{i0>D^Q`mk5@!txTpTk>__rcr1a*D;ub=P0}0*I54^N6YbcCc;a zbAB3N{>t;C-CTTz$S3bEu=yo!H&~y`tGG?DKJvC-NG+$ocfcZW?tmV!dSdm0^|zhR zB6V%9?I~c_LfgsI>U*(<#%zH%r8b`JwCgv8TAOR1HLy0O@5+PVfl4>dJHWO%ma(ir z9|G$mZ@hkLx$N&@uzS$&BV)S1N8seMzemAx+25yvE$$C(OAtBthuGLY8_=>z71%EXJKpT)i@v0)a;|am{uuf?Nbzsv8g6+q-(BD4H<5ScY^U&u` zM9w_K_UV0bH`woLb?=LN(5E1|FFp;{M?U-bGhlheV&y)*gt566_d)1$FOp~O{op0o zBDu3a3$MR+*X}NAxy;i8;PLbHIXHQ9w!a6#wwqJkou3EGDHbbpb9@h>Xg5ao>?2^? zKaAu(V+B}Wi+SlImo{Gj+vZUuZN3P$4S!`DedLnom%z5kv*|G~f911DTgESMo9(H; zjO-A@s>QzX;aHZ&PjN@jSJB#`rI=YajjJV14qu z{|{In`RwQag83`=vvz;yHug(kZ5flvIi};0tiehqjATqJgXKyd6Rzc$l1~S=lOS^! zPe8Yge8$-cww>eDZZ0oF$S2Muu;Yrp3c5a7!&TAskx$&!z=^Bfxc-(QpEzrP>+`oJ zx;~k|wZQtwXZ~ITHm>)9cIPj)bt>B?aQgW>oAbFGeO<8UB=hMRSPxDak|dsS+at9CluT3uiZKEIje1FboYI)&OEg53YN>++YKzI zSnONq*d|l&j-r44TWAk-`P_+ng5@F?(boHKFF50CHy3}Slgpg!4VKIM&^}-}#bPdb zAKDjAyZdi0@qI_yuhQjQE4lsQ^wpLzOb6SK`#Wx#=U9HV1%h^JuNjOmy2o zm2LEqOPd41>Blj;zYcuO@I3^k9pJ<^U&ulh@5$dlgE7U_&kn=lh1q{1D4BtB#w3RI2LR_=BB@W zn8)$d$wQyx5IOS@Cyx`r<}m~3%*To7@|lmfg5@$FiDR8S-UhZGbJO2G%wqwy#re?Z zBt*_U#L46B;PH7h;N&wO3&C=kkHoP~9$jGjF*p6~!#tYQ$wQxRM9w_KIn#^5xnI2l zdeQ6m{mJP1$md=^1#C{*a<8|*ix6#jF7|^xdptqYr*`|? zgxa;XcJ5A~(b-`8$UFNA+Ao8X&-3LRu$*GCFQNJ9e=bV>ef&IhImHtHyi>mm-kADj zOy`3gd-MyyFVQf0UWjg<=MpddUIZtfJl_qLQ!ME>?*;FH*T>k2@m_E}_lwcxlzNQh z{yuo)=$BkC0k4iE*Gs|UbG-~sKDk~FmQ(8eB-i)D>tpP5SS!!r2f+5PEq+&k<(qblb@1 zo$w=I+s#C@o7+{?^4aqr1)Ixsy>D=+}d7oAZ1FSReVsS`PldV%-R@Eq*uE z{CvMiA0LC)mOgF<+g9H9Bi{{f0o%8>oX1Rk*M%)LsoqX=x`@!~S3HRBGGv9Cz zRGhAPul^j0?e*7ge8+PawYL8-;yn@n{ekP}9c}U7TJI*gR(-_GkYy5&7)D1HtK0)?X%b2ml@BIV8@etbS_wK z4)R~xsMpV_|Bl1rzwtDe^{73M8&J(a5(d{n(0g9*m_P z-{ZmhK2iCe0G89&e$?apR5|?JlGn(Z~7Lmicy$HbpEOBHo40^F|e3oqFR6JHMMC zuE}eub4T`qvnKkC)buIfrQ`HvH0r2PJeEk{$fqHy%S%bkBOC@`{ra^oYNUd*6$RseBPg0VApQ~qRlmT zp5@b~4=k7ejyM38>qqSi64dXB2Ed*;}WA&22kG zoBb_8%+a;i?pV*FekXDUawcLt@1wI3dHvlR>am>zmbo)+hO? zr~j+K_5MGIE|)v|Lty)pxBZpWa$?Wt=7_OYqy8}B{?2p%BNZoaF~^MST5x-Bwg0=@ zM=QJcoi_JYp7Ga#wJYYeKK1p8@wK_4I|lC*xx}~y zJU+&)aPoPdy&Wuf8)A%`spag`-0nb(n;bq4*2n+$tBA4-fvf|{^=1#C};`1r6 zoZ`CM&s~Uf>Up$JC?#hNcP=lDlYxqi!P^R&hG;oN1J`zO)Zx` zJ`3KcvfWRuuI)kUhmnUW-P|8R^vS;qcoeM9BS_wzz6h4@K(hb81a?1aGlvJL<;0Is zKaTi5@?~o8Gi%?0ze@cz#G?J{)Y`4h(K-1BVvM`kPt&NsRdL^>_AXWb3SunViqrN9 zux+D%8|?hvS;hJeSRZ+PCQ^SF@eD=(9@z2Z_sH*q^^w21it_`ov9x*D|B%{Ze%hWy zLR`P-KL)!V&a?KPAaeRSx7y_NcRhcK)YtQ8==x+me_r{>XFZ<+ z%h%WQ7wE3%Q%KfRE_3or@Rc?FS77Jt9P)Gye~rlLXM1gOj@$Ki9;}V$8vh1yo$^lm z+lt$kI(ta2zK5K@%+v3{jx+l2!Oo-kIB$PI^pQ8-(-oKTJd5rcNB<+(oU_0G1lC91 z_Rmnu>F>P%8L7|vU(oF%^ZwV$M?UlZH?Vwt-k(Qz-nAu{7r^yg{*JDne0=@^c0Drx z{{-tNZ@$k_%fE2?XY0#c{tAKm)%`lLfQ6mXNvdiI?8b9NaX>E30}y>``MouZ@k)8`e%TE!%4rLWQ3uimU4o^$xZuED{D!y{+4nhRUK z{mq5ly+h5ek=~_E{Z^~__4aqQM!V|{op{u9=`2% z&)qa)2Z1Kme7x<&?ub|`(< zEViH?Yz#G8t;VvsU1T)eJJ5ejV{nJ1J-e5E^z3Fg#OWwDr5;@>?kqL~FYRrfd06kz zg88Mrv)G)vl@3a7Qn4lVaG7~K#x(S0rLCjbnfj~>8>1V8v0Z}&&1Hu=1RzXDaguhN zJ6YFL!6VIO&DLSf{^8ycL%>fedZ|z79vC^c)H{n2>PG+KR7GpWwAPIwgtWYHlv@_%6=w;hX@06m)v@AFF${iiw)qGHitMa?PLjy@1*jM3*(vczFXW{c*M{^{|LvWw|{Zl zZ>s(D_BUGPTy+%Npf9Lmct0-SfUyR|>?pQJpHk6Rr|EfIVSzBJ*WEd$Gi`m z?%g}U&F;RG_nfxew(~jIO-}t#uUEo&JI`uq=VGh#p4XB;)I+ZSlrml?e4}qsFLO{? zw6*i{teV%nf#LD^p3k0v#Wmk{zMf6^d0fSP?Q_CiR6xY({6O*#o ze%u?2&|iKxPbzxgyyyF>HcKYxr@u_^Ec(&eG~;<1cK~jbi+N;ps5%Fo<#Tah^rf-8 z&X?oY8ttP+M{zaS%g{V;0edI3^X@F}L+@)0W*g$uSv&!Eri)&&azBLQLhG*{U1h9i z^=l;7q~dw>m(Ite;zjfp&)&>Y2Mgcb+tU+$HS}tZ%6P9pU(#sxxa{V-A-eZ${I^mc ze~wKqUWM*h8m;lGg?63A&hU$R8^g_RSED`d-QilpFX;!OyEhYiplZ|BJBwxLqwUAN z*LRuE1-7ZJ-*xESz2#%0?DuAL_D}VsD~j2S!RP2!2&UplynIeswryAA98*49M+U<9s&B9a!Uh zCdtjLapo#_7&4pvm}j-~<8x>m#PLU;R@1%X>vnbR&W--QCpy1r?;UCm>s5B|N9`Sz z?z^OVd!8qi_XjE~)1DLWCyV)7oTqTkReO%x?R2fTQ?HLbci_Z!=3Z~7ZoB13`jdO9 zT{{QIpx;AC{Id2>)VQquQ#CGY|Hm4awRiFoFKfSUjmz3kg>(IKhdck?f!h$%5`9`* zEp_ij=i8#)`%8bSalP*EJAU2Yb6AhBuD|U!U>BgG`z6rc6B~mZ>r`Yj-rXIey8cQ^ z@D}(xX8op;pJTPiZG!F|x4rSTySMa}pT;V>x3$|&o4L66{WoORrc)=b=eH=9 z?`PZ@psA`9?dmfT^N;^rFel*<^Y=ZZ9kVD7sC4fPh%?^&f6ul^x&)AADFYgM+?sy{VS(iE|)~1--+0^E340BWWzR6hhKNm5U{^wOT zd2_M80P&1PzX9x8>%WrPSoW9PZv_{{%$e@J&w;e9B2_EV-Fx@L>u0>rS3Yv)tNuX6 zxtGSk#!Egkz>gsA&E)qO*b@Eg;F*`{$ALnB$PNMgcTtwUdCywt#d|f}=IX-poGr8n*GP-%_e@bN=L(hD9zwuXS z?*ll#vC8iHdEZ%*|JiMQhW;0UotyZpUtDoJ6G{FObkBolUlAv_OR*(ab9^uQB^$38 zKS%FFUwQb(Gt&2!=-*xT+kMTl}s8U;gm(2Ss0je(j2v;@*h9#*^C~ zm^^MncihqMM)zJ!`v=h}Gu7R*kGJ*GuKpEtskDC@rM{>CfbKKRIeQN5n9V=&H1dly~C&0bnSk-#jd{n1l{km*gY?Pqt$i4)1upc!35p!j@a$Lr>5)gH%IK| z?>9$u_pjd^(Y5;>QP=&Jh;BT;C+fQ26LsBhiRi}jJ0iO6pPisTKSB2!BL24b8zQ>x z{f4OPenUjp-*1Sz?l(kT_ZuR*@%(m(?tJ*&5M8_94bhG7cSCgTej`NJ?l(eR_ZvZd zKlY1fc76I-pZZ{SmwQS5AZ+gMjlmlsYgOlU6EJ_p;g!uc?l*bcOo!hb>>YherEdY2 z`#Zi{Qd|CsP5zaLoO@cFezyV}_pMbPTZ84&pZW{5Nn2~%`c5+q?DJn=_r7}Kz6$K~ zBIDT(%wIVkZMN}z$fwN?V4o>zvm=84L$@ce>*qkTzCDo zFN8P^xsaIZZvxv!KIf+a=C3?I+RephhmzUbMbvWodj~8A z=MLxrt0z`3Sby6sp;p)C+MWS+Ewr6Zt-c>?Xv`LP3u@!pPP=|nskOQGSp#ch`W`(9 z9;kHVycukp6B)}G`Vd$jdE@m{%VmELgWZFEI~mjcJpw16{XGhn%lwKPaN<0aPk@N1z@?1_if&4WUATL&ZYEPf<0NdxK)S2UV zg5}fayTEek^D=Py)b?&f&OXJ)8N=Z{VCOhz=nAl$VzKfJUCElb2j35&&sB(dUrsHT z^|=--w-#qo{||uqD|SP!0jxmuvF+8=+SB%fV7aXShrkop|HE+d7ghOO2bQyK+FcJ$ zJNr@BFZLU1_Vm3HPConUCa~O%h zg<3spd^^}RzKvRc&(X)g@>%1LgXLV~qHYcAip8}iH-PGyt(_r~LUp@nt%kw31taJYE0o#vrp}&2Y$7iW6=AqBMh@5$d z?X&#;2m76`?j3bM`cx!$)aSvUL*%oM9{|fM7AyDhWsJ?WcnCtD2a!DI$G}UmMe>|@ z7+!zvuHAjqa+#+`z!T@`3vlx0Y=2(_+iq@kUq1?#Q!G~I=J*~%(Qb_F*)M@@|2UF& zn=ga)wV0Paa%uBbux-AAq|MjBw&AaAqmNwj{5se+c^-ZP%wPFD)Rytf+h%9#Zz8)? zx?}wo*cf>pejBWheByow%wHK-yK$eOmN)KnY~KZMhU8uSdtf>D&iAP;?j89j5jppc zIOF*ruyOOQ{sXXF`cwD3rmeMYy}Nz{UJuDS{}?=Roqqx+?_ROZPr>dV`L$SUxu?MV z6+0lt)W`mRNUbfo{tRrc(SHv1zDPd50NW<}^_O6|hP-+|@w4EjCT;u>jt2KhJQ8i|el z9Q7X&WB-9VWB*gdXYT)8`CNcs=HoAL@;TFg1=~*EF+EEyr~er3{s!(xa!33fET>qk zyd#psKOl^e`F;VMyTST=@^`-f39n6mpG$K80vlhOxjauT=lnV^&Vh5`_)ekz4{{Qc z`5hz1e=9!unw*?su`=K6?NwBetn;d1ef%EQW*#p>$Y+d`z^;AtPIP_pyq}D&k9_v? z6tHpK&)WUn&DiB{59k@w>R`uoaut6Jzct|GGp04ca*D;c!sN3S%4v|fi`PcCjeN$r z4%l{%Q@gqNyN7(@tP6Hr(bq%QCu_JqSReVs-2j}p+Kub)C-RB25x72o8>8!!`P&4n zk9_8DQ?PNJKkd$6Y@1iM&EfR(w>#%^CHfX%&q?OfyXBQ|@_8TH5^M~4^Y!-~IkCU( zxNi^PR$58DHM;9_1Nd=j$7IZ@@Y?c>p9YpY8z1l9ZNV1LwYF^#Irp?UbF)3TJ~yvM zukWoL(Djjb&h2|gFx?QD`2pu%oenm4?HTJ%;O!7?j?dq!{0^4-{~bFJiN!|AKd^>GYGg6+rsow+^=U0;j2=_8jmuLIkr{@!#nx^2wMHu}h= z%^Yz0ag6S-W8mZ;AeZUr$AaZN@1CQ1V2kHS+gwD>b0kjeSRzaCEh{wnr-u$=qB zXH4Q)=RJG@*nT{t`rC(joIq_c4}FeDUNXj$;N&yLlfiP1(L55zI(eJ| zwjXoT-#*Oa4b&F%(C1V{&OF4)elSdEOe#}jO`!J6s)E4JMpT&rrd5Duo zFL+`er^Cr-KF$EkWj+$eI(f9f_G51P+lP7dQzs97`Vcwu5a&$48JzpoJ75UCe%}wH z>m#3geFSVy+H$Xtf(H?8c`lv__U!r0)V>t)91c)xOAcp&or~NnXM^Pwi!m~9=fF*z zw{zj-v!3UH?IZcV1+0&J?vuBI?Z*;sdBxdZ?#lDw^wnk@^S^*ve!uGZcOlq+PrZy< zn?74n%X^+O@7BrjZD4)!T)zmM=el=T{4R#qmfvPB0h^!S39f;0^huxE?Q>gd*V@{- z^L_R0VEf2B`xxyng_Fg-lIMHC=6M0}((e^;^2zhPU^&H-e)C@NK6rhMofubw>$zWrE~nIEB=`5j8%Mw7 zdNp`MB)MJ#o|x+jIQisyEm%&e_mf;d0I!d+&u6VXhaUvnzqa^&2rTdX#qYyl=UAJ2 zz;6~g_dvc`tdsBc*vvQYqhs`S1Dt&FT?v*`EVz&F0du<%zW#oH6S{5W^G^5?ukAlr*22$Vix4>)5oZkwzZO-#;V148h>vr(}73*X0+T!=|nxF3%>Ejdd z+S11zVB5<3e&oBsC&BiuE$8u0u;bjRvir>NTsq#Hv1!lT-vu_u+p2!Y@VgsMK6C#m zu$*Esw$MDC`%j~|#>x9L==PKQ>K?FMB)>1*3ui3v4f`;TzS%R{)5mAQ_F>)|<9{Ez z=cfKn`8haweOBUgKiKb_`tO{dhm+4U;sLPjPpR~& z)YB&DJ6F2>>g)Q8_1gw1?_%omE~fq)>$n~D_J~FMtEshHy9WMF<1^D3ca!^cYWJ6% z|INmC3-wna#+k!vwNAgggN>1X_W;YKUv>A8v1}{0 z?P}C}BF<;_(OzKZG|%sRoU>~y)=S*IE`aTluUNUF>jzZ-0&pwlL4P8&y!8&vGIhV1ptnU+* z?hH^kcYOFyGv`^nxq6KrlfA=>P3DPoSU zy>`cX9`#$0bC7cp<9Q#w6_MB9y`di4`Cz$As=Mn#u-pZRXTtVxL+pP#qFtM@mQ!m> ztc$^Nd1hP!mW%!EVEc;wQm~x%$<*&a98bpmPO$y%f*4DGefq0$y$h_5xwxm*)7QJf z%aQbT8CWj%%fYsZ{XJm0RI)*93wK-}MX?tif2HJ4G%rJ_?=~;}$si zywBbSmb(=(#?91n_GxanBgRb*9|P;-fBSKNeH@X?_&!l_@@aDi*f#O`Bv?*y-Rd?$Z^Q{yu{)r)19W0UJk~ecVMYmp(oV-n6pa zORcW$e(DF22P)m%A42rWzYBO6tj|M8-krVxmhV8a|Gx-!KWa0F`>5r_k5WH|_&)MD zwfC8|@4#QC{t9Bz{#9!2*5>G(d<`+io$ROS)ZeJMuTy)Ms(%SFmTkpp`vlmw(Z30H ze($JaeG9CQygrkuzm0f?qJIbMc=CJXcftC|-(1D{9@tpgyz9SDZ81M>Pa<;WCw8pK zSKhvy{~sW(U-Tb>T@UA3`;QPg{hV8Ea{9ZTKSt{7`4e<~vYtP!eB`s9Pl4s@>-jTu z*YhbP>nWEx`8oKin*Iy0b9O#?I)}eRHaW-bdOHu+#&eB-g}6?6r~P%s?MR(H zBv;=<&R^!~H(BX9d>sO9u` z-v5Zy=lxIU_K|u2bLAtSdH)MozCQ1NMR(q{C70*H^<4gjuAh8-{tk9MGXMVo>nCr% z&r!?8|DRyT5dB{h^natn}c?second; + } + return ""; + } void setLastWhisperSender(const std::string& name) { lastWhisperSender_ = name; } // Party/Raid management @@ -2436,6 +2445,7 @@ private: std::string afkMessage_; std::string dndMessage_; std::string lastWhisperSender_; + uint64_t lastWhisperSenderGuid_ = 0; // ---- Online item tracking ---- struct OnlineItemInfo { diff --git a/src/game/chat_handler.cpp b/src/game/chat_handler.cpp index ec9d6e46..0c3dcd83 100644 --- a/src/game/chat_handler.cpp +++ b/src/game/chat_handler.cpp @@ -255,15 +255,22 @@ void ChatHandler::handleMessageChat(network::Packet& packet) { } // Track whisper sender for /r command - if (data.type == ChatType::WHISPER && !data.senderName.empty()) { - owner_.lastWhisperSender_ = data.senderName; + if (data.type == ChatType::WHISPER) { + // Always store GUID so getLastWhisperSender() can resolve the name + // from the player name cache even if name wasn't available yet + if (data.senderGuid != 0) + owner_.lastWhisperSenderGuid_ = data.senderGuid; + if (!data.senderName.empty()) + owner_.lastWhisperSender_ = data.senderName; - if (owner_.afkStatus_ && !data.senderName.empty()) { - std::string reply = owner_.afkMessage_.empty() ? "Away from Keyboard" : owner_.afkMessage_; - sendChatMessage(ChatType::WHISPER, " " + reply, data.senderName); - } else if (owner_.dndStatus_ && !data.senderName.empty()) { - std::string reply = owner_.dndMessage_.empty() ? "Do Not Disturb" : owner_.dndMessage_; - sendChatMessage(ChatType::WHISPER, " " + reply, data.senderName); + if (!data.senderName.empty()) { + if (owner_.afkStatus_) { + std::string reply = owner_.afkMessage_.empty() ? "Away from Keyboard" : owner_.afkMessage_; + sendChatMessage(ChatType::WHISPER, " " + reply, data.senderName); + } else if (owner_.dndStatus_) { + std::string reply = owner_.dndMessage_.empty() ? "Do Not Disturb" : owner_.dndMessage_; + sendChatMessage(ChatType::WHISPER, " " + reply, data.senderName); + } } } diff --git a/src/game/entity_controller.cpp b/src/game/entity_controller.cpp index 6a9cc826..f788fca3 100644 --- a/src/game/entity_controller.cpp +++ b/src/game/entity_controller.cpp @@ -1988,6 +1988,11 @@ void EntityController::handleNameQueryResponse(network::Packet& packet) { } } + // Backfill whisper reply target if the name arrived after the whisper. + if (owner_.lastWhisperSenderGuid_ == data.guid && owner_.lastWhisperSender_.empty()) { + owner_.lastWhisperSender_ = data.name; + } + // Backfill mail inbox sender names for (auto& mail : owner_.mailInbox_) { if (mail.messageType == 0 && mail.senderGuid == data.guid) { From 84108c44f5a6dc51812c5e379d589a0d269762cb Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sat, 4 Apr 2026 00:21:15 -0700 Subject: [PATCH 6/8] fix(rendering): alpha-to-coverage for hair, skip eye glow geosets, add missing include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Enable alpha-to-coverage on alphaTestPipeline for smooth hair edges when MSAA is active (both init and recreatePipelines paths) - Shader uses fwidth()-based alpha rescaling for clean coverage - Skip group 17/18 geosets (DK/NE eye glow) when no geoset filter is set — prevents blue eye glow on all NPCs - Add missing include for dirname() on Linux --- assets/shaders/character.frag.glsl | 9 ++++++- assets/shaders/character.frag.spv | Bin 17488 -> 17796 bytes src/main.cpp | 1 + src/rendering/character_renderer.cpp | 34 ++++++++++++++++++++------- 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/assets/shaders/character.frag.glsl b/assets/shaders/character.frag.glsl index b28fa314..669eb84f 100644 --- a/assets/shaders/character.frag.glsl +++ b/assets/shaders/character.frag.glsl @@ -126,7 +126,14 @@ void main() { vec4 texColor = texture(uTexture, finalUV); - if (alphaTest != 0 && texColor.a < 0.5) discard; + if (alphaTest != 0) { + // Screen-space sharpened alpha for alpha-to-coverage anti-aliasing. + // Rescales alpha so the 0.5 cutoff maps to exactly the texel boundary, + // giving smooth edges when MSAA + alpha-to-coverage is active. + float aGrad = fwidth(texColor.a); + texColor.a = clamp((texColor.a - 0.5) / max(aGrad, 0.001) * 0.5 + 0.5, 0.0, 1.0); + if (texColor.a < 1.0 / 255.0) discard; + } if (colorKeyBlack != 0) { float lum = dot(texColor.rgb, vec3(0.299, 0.587, 0.114)); float ck = smoothstep(0.12, 0.30, lum); diff --git a/assets/shaders/character.frag.spv b/assets/shaders/character.frag.spv index 43789272f26cf2430fef49603a82af4bbb54dbe4..7277d275ce297eb3c61a83248ddb3d65fc8af675 100644 GIT binary patch literal 17796 zcmZ{r3B0Cb`Ntn-&Wt_lFhnzy7?OQw7R!)fMl<#@y>rfcW)AOJI?EW@W=LouMMa1q zOOjBLgh8|^D(T;8H=Digk+dluB=-r%%H% z$R-#kBikUaM@~i_ApZK))t5#=<7lZU9>O-hSgV*lXW{I*b7#*vWMSvPz`~*7(_77j zt)9N-!mggdX6JCv(x!fE)ckt-I$I-Mb%zc->bZ0jYm#G83^jTOTFv=O8eRRT%{%DO z?S|&>y32k`yLa0Un|0RJr?)XcQR?eacJ}uVb`7adEH*6l-hq+f=A8bnLmQpL{e#<; zzH1j-P!2Q(8?9Dj+1ySt8tUoqJF+pb-O}z|%Qm`qu^HlwD>kDXSt{-*CWDvuG*3IY zXK=y%(%w;QPT5KWB{#mlJ3xlzpg9XiH2iXN6%(&us z?KpQZuP1?ro6DN5gPVOrJ;R28A7AuP9^2JFd{n7-6vLE_zQwI(yUV4IZTB{o*<2aR z$_FiJ3?A7SZVvV|TJjUHltID$1C36bn85$!nC_y{8d%a;&>S-U1jbk1wb0q$>K{C! zdHSqYqx00#){f(f8|iEH)cu|3W^d2X5G!L=fB!3Db<}JNditzJJ8P8fOaKoJG&@IH zjlskFhDQe=dAK&4eT_w}=288Fy^R)AIhy+#qqcc-k1S)h`?Mx_pucy1XQRa`eYsC- zwfQ3%u5tykoJR9l8@#01vv|ocEgaim==wUXoIKY7_a%e*!-LJf#luVBoXayPeVnu9 zl)XKD)qHdm=cD(H^o};@dbk{})i}L5cyM#TAg<{v;5tVJi90-}W-8-fiESQY-5V*H z@44OGLrn+LHt(eDYPN@jTv?4Zu9$|sy*I}d zJ6U(uQDrVixADTBzNP3BiUsJ^*>YU}V5>R$J_;C;ctmr~2{7 zxc8sp+1t;_?z)tBpSE1K^V!%7+6Uu{WpItz zgAFIiIlc_NHPTz+tH2(F=6D;}^Prt`NAUo9Z(|@!5TB0XDY(;|@QRgt29Cq5ue!F% zSbx^9kyzu4m(X9{8{>=BndBDtZo4gRUG%P=?(XR8qF3Wn#@iTuNu$;6l$+~hbkErM zZ=*hXhfOH9N4GDH*66W9y^dlR_(eU9p=Ose(eC$NaIK-2^_l3d%fue8>a_Ka;wolBuMb5K6oOR^ZuW{}XxhXZyy&>m2K?x(;xe>u|~ zf6v6NiRlx4YFjOJ&q>GIN4w{l{uHBn-QT@fkEgCZ{_e5F+lW|8U8y09NX-jH=w&l<&)oZuzM}~A?U7qb@M$8 zy??{=_KiLV{f2?FrW^kl@G5*9kFD5=o=b8OZU3LxzvJ+A{;X&J)IHDSlFtd~=Ar+I zm2DMz#>?}Kze0O2!2YeO?9QL(olo*VyRA=;ei8bL<;%;x;Cj0lJ?-1>htV&Xbj9f2 z_z3#SLne*ZzXJWKWxqLKN3wLRuSBi-U=l8;~Ub(hLi`g+FV=e2ViqQOagC$tW)jpO~L#Xhg3G}xc=m= zGYx)ou;=v6mA(a7?(g_+N$K+vHu+a0a;|Z4+T99l+;gftUIUg(d+IMzCv`2Y>wC>q zuwM9ltjtj@N=ppI>o~mzUdMU-;-dj>29=M3lut0z_uSbytzH>qoLZcha}7urstRNsd=G-eCD1*P$pbeU}~p_gA`c-VV0Tar9*s`XE>zdE@m_%4K~Ifn9@sKN-{YJq#zG^*sWX z%lbYI?Bn{-wiJ{2B8zY8w;+dp;d#C}1|p4=~llh1yAH(2f>#J%}mN*~8w+j|f>$6ov| zG7`(VW->39w(FU83D`Nmm@;$zez1Jz{8F%7=KO;%XoYYESE9A7R+C<3vxB!8blxKUPY-rb*}@pv=$6LU1HzUsDt&~2Fr?yWZa*n4s z^C;&$W~?o3!}IS`;EfP-@;y~ObNp$rb9_6c{_g8L!19^nJHc|!aq_+keFG$Ue+Dc! z2g!Z$S+I4r*_XR1ee8?2&mnU5MQl#qcb^BF(>;`F?_RKc?z=CD5%;M5P8`eJclUv9 z$Fb1gHq7IGN+0vk=ZlD(d5CS(GxI@tN_Ef7Rp?WYoS6@S^^wmyei$sT_*l4(FQ#wK z#g`!Tc?8LO$D?4+gGlbJ$KdtX?%X{2<`Fmh}!}fjfWF+4X zKLE?Qc78e=htBCWWD|dEEn<2b^Lz| z$6v7%Voc-co3Vcm%wIY7`kIU9k-TG(J!om)y@P%aw(q~AG(XR$KY-SKLJpKVEpFRB|*n0Bz>CcpM z`me(OpWr?uXT-n2a*B_IXGC)NH-s@V-v0sTY_MEWohAQ;*QUSsl5=1n<7+dQmnh{N zU&qBUa4hWKiRk0N$5Uo}SK&S$PCof|faMe)+zmOpwkM$IlX;#9*2nL2ZRSz_egRG& z*91HF(bq!PC-?o@V14AXp4R~z*Y`8+zDF6`-$JydPwRp0(+O4lRrtLMPCk8FA1tT% z7+08lHb6NUGH3CI=+==>KQ{tf&wgq*7k`J5Pn=D__AB}%us)f?O~LxeC+=q8#MN$G zf6tLmoXx@Y@!JAjpN!wD!TQK&{I&!e*YVTt_{H{`%C$y?YjWMrWcRG3d@Z{3a~=3`O8aEY*THMc9ltGD?reNKd$$Ao zxUaRn9+7iRi!(Mmfa_zkBYJ&py#ZYxdB@zgcLLK4k&8lm&eb=9&0Tx?x-;0{Vzt>n ze>;;)`@4c&JJEMTPd>J_JGympe(V9(M?ULfPq1;c*@nNz$vG$f_PY<_7+zepa}4Fa z6`ywZL$_Y;;QhgJKH(0iII53w2UeV}^LTGqML8YC`ub~kOuT2c%|N$peazM03gyg0 z`z)|r_TE8YImO4eh4yU{<-sWW*S`l3L6^^&cqmvd;&+&%uOG;)R`+r z>hDeS(5+)$*3m~Ub&eJz^?meabou+q&-a=6U^(}@`{)?3kNZg50z}SzBu?yO#YjE& zap>~*Rk4o;%efxBV-m+Q@8KtaZO1*TzipVuNt8b3q0fnkoOy_o$6LgRyvx^-Qe8o^*!<)cy0M@=DlF^ z^E<&gFpfTHQ@d?$N9kN!I(ELVz7K30d1qfm{fpt`bH7{ymQ#FeOK9HuzaORkK7J{> zoZ=Jzyiosul$@ODkIi=oCa=jK_A7igzuH1*$ zfo)$~{8obH9l!Wp4|a^Txd!}Zk#h~?o5eEuejJez!Y2!h#ZJ75Y{8yp7Z|d)q55dXnvl5?&!G7n|f9HGzPCj?Um%!GO&zbuu*!FzF zJyvn%8}7>$r)%D;zk*_Y{k0q4{yadb?celxZ^VC-;d_r~w2%J|WiR~vcOCx!aK!$1 z`SlRbO#h8${;kHA;E9z!g>vc`eaA|-U45N@v3^@4?!#>;jcfTg=J9ou+af;NUr(vs z(mC*V8t+VF+(quwC|zH2{x>A=boJLF#h-8Cq8M;yWqr>Co97I~_H2I^BA@kl5ZJmt z;SL7N*^lf$f6vfYn=#^hC|KL-$O*Jy8?zC|OkR81J`C(yGH3k`N96R+I+Jq_olob% zGGp~7uw%8H`mUuT5IN^CYedfaStHJ;`nHtDvrN1>VCN?Bjs(jko;Eq_XRW&~)1RZj z_9y4)Jh0qcYqI5rQLOCCqSksTM>7G?5Rl03E-^U`yAm6O? z<0{T^FqV3JPXO!tMCE%TSWaKtQIGFiz}Aoc)=JNN^U3J?$j4_P*f`POR_UqJK-Wh; z@6e0Dt}pk#G4(MY`>D-Xu8}4pmwTxjY(4jOC(?!J<9KV!csoX0AU>NSo`sI{W))tW za&m@S`=|`p?#M{hu47KR-r)v8G$!fv=Cpgi6nOa|$kw=?o<7#BA$|kCozlnqLfZf$=lqGY4hO-`w`)VJUB9$50=Av3 zt<%8fwgaNg_Ld^%=-g|!uV+!7jhum;i5SoG=o~~|f7gb3Y%9QW7gT50d0@G75%+}k z--+1%G(@{LV=brDmRRos%jM3v04x{#g<#u?{UWfO_6d~lM(j`e{T{IG?t~aie|`F@ ze!Um0kGZ&})zjAdz{`=ebum~j_DjIliT(Xxx#V&w*u9tc(+_|hbNS@=L2&XjzA^Mm zewTsuNq*{S|3lz<`yWP^%bEQVuvHNo?#8_)leiU(i=ia}n;^ckIG5xw4 z++JJle@AjnW%s<(=Gw{~|1q$3#k@AAycRLOHrL!r%Igu=+;vFWxuN3oZt!vRYY}<- z;F%(q7&n2(#<&?yKJT-i0L$Hi7~@7tIomY1TM^?Xhfjj_@xN=jzHUS0(!WntoP6rs z4z^BwJ`I*roOj!~1942}a|a&j+y200(ey8CR!rM=Ig%PAT2yTQiM zW*c`>%B79ZgEy;e_fV>9yN~h#Zi65nW4Do&B%aopHmc9djmGWzdkM^%qYPU2;$K-Lu7qdVP?D*bZ#rhUlA9;NyP<|V64@LhD*#6}A$fv;i$lqAS`7YR4+C1yO zN9kjJ+MY(_%uj4zldrsOIsV^AoWJNl06QO!v-Te%a{4*8+T`?iK7WMN=kv$t`eZ(T zQu)YdKA!>0*XQ%6=+5UeNaj;6WAZcbDOTU8T~h4$I*Nox8EZA$Q$q3ic5cfhwdCl|2^28 zv%dcT)<@p@&r!o|BCLoYfCOKfa|&Z4P8I^ z`1~F0d}RFp0oG66e4nS3i~mbt`w;z~WAuNaTPOYhH&`F}wDBLXd?%87>AzrOYqQ-K zDdlX(d035uu|7|$qdQK?V;s75a^8(cms4`Sc7XXS?|tJLLqFTopgr-{0QcaVJ9$lX z{>u08KFWzG`dHWZLhZ?QE%0XeL|+@-%89)W*gDB|U9em!)m^Y2I)7!Z#xsU~zT0W@ Hz5jm!!;cd^ literal 17488 zcmZ{r2bf(|)rJpDCZS6WO&|e92#E9!384fekYGYr;?2yRWZ+J5N(fB?NKuiZfPx|r zkg6h}fQUd)!7f((tyob(v49E|{`Woie8XX$|KHAf_P4&Z_S$RLbIv7pti08vqFAX| zy;!q&aOI*b>l7=)6mS#Ede-cDvv(LC>E2=2-FDPrt)ip!)8{qC8pQ-^rLWQ3uiplfh&!SKjwt>%JO zZ+~+^ckfWMYovEcQ@>See!cx&ty~=o;OIzm_CWU`jjoY_p>0av z)r(E32OC3;R;#geP8S&s_YU+Q(HPujNzcw@A3ZzS4RJb(ji^VLh&zjo!Ap9ZrykTh zG=E-c?<_Wywx>R=!p7*vU~Jc5esk%84gm<$Q5>fo z=T6r3MDR#+X|r`uvwygE#1QZkieBntx(7y%EcMP}gu2ncsMTx_x%9E`zQ$6!D`VOC zz{QQBBN`*kq25MIej=7KD0pD7(PbAC`JWuiT{K#QiyQNs!^WS;{K~r*x&~SULx(p{ zo7rk~ol@G`aolmE{jJ`*zw6xW>m43uXUrTJcvY;zs=}pm9n3S;Nih$*J!IT zbXfn$*aRdG_hz%dv9Q%Va$u;h(PAmba$jZ4Hh0buWz6=NRs|0Z^v&yPwAiJujA^wt ze+1K2?m)KFSRSi`7dLwsEgqqVV;W3df0vDu=NjPtWH4`JsM)`0WHFp;c?Pw`HCsmA z*V|vMM`v+9djDwOScmS1E8$v=)0#sEH3tpip1vBcYjlXXBeQF!GX6E#<|4MenVR*U z)6+BDbRzBZZtCu4You}XF{RJ_=v6EjeO4(Rc!eu_El1fJE$7GnAHqJ`p5j9@4N$c0 zC?2DBFWT4B)PqCKB?dO1XV4ox?6tA6bQaISEo}A-3^m)N?OudCwG7;jwVs;c;qt7gDo#nkbvv;Jj8mpt2hP{0@JBl4_ zJM+jgm!sNvL2v&O^ohlM^y+Roc3`Mg-2t7&G4OMnLx&7C`l@}}<{Xp$)}B-S_+#D& zPWJ8{;AVGU%6m>*Zrgbr>?WsvsMjlDtet1Iv~#i5dCzOfALt?1e^ME*6TZ>6u$MWg zEZW+6c~;G9?!fR^e9vdkz@nOOJ73Qx{9LZ$zV(@xE3B|MM zubhPm#q;Pbp0k;cm(aU=dwQb3j9$%08E+z+U~!|>;}Vge97@!vpw?3p#O*c9Ed zG+JZV2<72-Hinzs&U|~^+rqVmU(t6#cONEpPt~TaGpFdI?MJ&;b(zm;wyCYv zRp{Nl9D$u5t$`=*sETZW`RxmVh? zb8rm$J%Yq9YyU)z%i2Fx;9g_dOUUQ@%M}+-g@i;RP25UwD-hhkYk;KY|Oj6V^p`jk`lZr z{*GC{Ddgu^Epi*8yT@&BeC_TnedVXJitcUgw$o-V?mao%uU}#FwEYIuwwG6*Tx20#$8Ue?-luZfH>$9EMZ3E9 zD}SY~zxQih_kLC12>V08Dry7?Z8KCte2dqfUE^$>(@<^U(i<%C-VM^X2`dv9s_V^yLRnF8#H?7ybK7f4|@MWaZpm zjBXqKw56X*!IwVz!hX>&L%-(gSK?lSzS@&p?VCJqKsWd3x1)QvrTx9=lo{&oyNBC) zX;=3*7^$@X8A^R${s!IWlJoRiuwyp=#9v#2PnYPgZR>mMKMkzvy2ic;Vqo2lv3=2p zzTtgG?diIBK7DRjZ2u{+xvW66tNR?0+nua*bq`tO{t08Q`g;HRP3PRGyEpxwi|#!= zrKW55`z&_#t;gxpYPx5_@38o*`z;pT_VdT-ep|$D|2;Kbf4?hYH~*7sx_i{`in{%R zak}3Su^Z3th`R1~L|ykABD($g{Se*uemm6lXUFN!*L2(a?NImk+o7)e?GRmmza8qj z-wt)%Z-?mS<2OTe=fm%X=-T~Wh;Dqp7ouzT+aS7jzYXfT-v;V?v0ps1>(a-%)cdo$ z+)L{FVRQda25*3@QJv=v!Tc2mS2o+Y-{fsG4So}__wcQizA0GlANX!YZFvcs{A&?8 z_p~_uZVoo?*;O7}faTJk`irzlTWj0;-ZB;JGhSc!zIx)m4(u}_<9R)pzj8dc3|_@=6O<&?M-0&h`uA(_R)6&+vX@vx$Cnt*fy4M zyHuQXxLqrb-NVPxgT`Z}T*- z@cV+z$sFu6{C;5jGrsZUeGc!B7|VA0IG^4}@&_u~oXfyx0)E4vCHV2V2f3dNhzqcUH*ZfMK zS8>kcQDAk)dII%{i0>D^Q`mk5@!txTpTk>__rcr1a*D;ub=P0}0*I54^N6YbcCc;a zbAB3N{>t;C-CTTz$S3bEu=yo!H&~y`tGG?DKJvC-NG+$ocfcZW?tmV!dSdm0^|zhR zB6V%9?I~c_LfgsI>U*(<#%zH%r8b`JwCgv8TAOR1HLy0O@5+PVfl4>dJHWO%ma(ir z9|G$mZ@hkLx$N&@uzS$&BV)S1N8seMzemAx+25yvE$$C(OAtBthuGLY8_=>z71%EXJKpT)i@v0)a;|am{uuf?Nbzsv8g6+q-(BD4H<5ScY^U&u` zM9w_K_UV0bH`woLb?=LN(5E1|FFp;{M?U-bGhlheV&y)*gt566_d)1$FOp~O{op0o zBDu3a3$MR+*X}NAxy;i8;PLbHIXHQ9w!a6#wwqJkou3EGDHbbpb9@h>Xg5ao>?2^? zKaAu(V+B}Wi+SlImo{Gj+vZUuZN3P$4S!`DedLnom%z5kv*|G~f911DTgESMo9(H; zjO-A@s>QzX;aHZ&PjN@jSJB#`rI=YajjJV14qu z{|{In`RwQag83`=vvz;yHug(kZ5flvIi};0tiehqjATqJgXKyd6Rzc$l1~S=lOS^! zPe8Yge8$-cww>eDZZ0oF$S2Muu;Yrp3c5a7!&TAskx$&!z=^Bfxc-(QpEzrP>+`oJ zx;~k|wZQtwXZ~ITHm>)9cIPj)bt>B?aQgW>oAbFGeO<8UB=hMRSPxDak|dsS+at9CluT3uiZKEIje1FboYI)&OEg53YN>++YKzI zSnONq*d|l&j-r44TWAk-`P_+ng5@F?(boHKFF50CHy3}Slgpg!4VKIM&^}-}#bPdb zAKDjAyZdi0@qI_yuhQjQE4lsQ^wpLzOb6SK`#Wx#=U9HV1%h^JuNjOmy2o zm2LEqOPd41>Blj;zYcuO@I3^k9pJ<^U&ulh@5$dlgE7U_&kn=lh1q{1D4BtB#w3RI2LR_=BB@W zn8)$d$wQyx5IOS@Cyx`r<}m~3%*To7@|lmfg5@$FiDR8S-UhZGbJO2G%wqwy#re?Z zBt*_U#L46B;PH7h;N&wO3&C=kkHoP~9$jGjF*p6~!#tYQ$wQxRM9w_KIn#^5xnI2l zdeQ6m{mJP1$md=^1#C{*a<8|*ix6#jF7|^xdptqYr*`|? zgxa;XcJ5A~(b-`8$UFNA+Ao8X&-3LRu$*GCFQNJ9e=bV>ef&IhImHtHyi>mm-kADj zOy`3gd-MyyFVQf0UWjg<=MpddUIZtfJl_qLQ!ME>?*;FH*T>k2@m_E}_lwcxlzNQh z{yuo)=$BkC0k4iE*Gs|UbG-~sKDk~FmQ(8eB-i)D>tpP5SS!!r2f+5PEq+&k<(qblb@1 zo$w=I+s#C@o7+{?^4aqr1)Ixsy>D=+}d7oAZ1FSReVsS`PldV%-R@Eq*uE z{CvMiA0LC)mOgF<+g9H9Bi{{f0o%8>oX1Rk*M%)LsoqX=x`@!~S3HRBGGv9Cz zRGhAPul^j0?e*7ge8+PawYL8-;yn@n{ekP}9c}U7TJI*gR(-_GkYy5&7)D1HtK0)?X%b2ml@BIV8@etbS_wK z4)R~xsMpV_|Bl1rzwtDe^{73M8&J(a5(d{n(0g9*m_P z-{ZmhK2iCe0G89&e$?apR5|?JlGn(Z~7Lmicy$HbpEOBHo40^F|e3oqFR6JHMMC zuE}eub4T`qvnKkC)buIfrQ`HvH0r2PJeEk{$fqHy%S%bkBOC@`{ra^oYNUd*6$RseBPg0VApQ~qRlmT zp5@b~4=k7ejyM38>qqSi64dXB2Ed*;}WA&22kG zoBb_8%+a;i?pV*FekXDUawcLt@1wI3dHvlR>am>zmbo)+hO? zr~j+K_5MGIE|)v|Lty)pxBZpWa$?Wt=7_OYqy8}B{?2p%BNZoaF~^MST5x-Bwg0=@ zM=QJcoi_JYp7Ga#wJYYeKK1p8@wK_4I|lC*xx}~y zJU+&)aPoPdy&Wuf8)A%`spag`-0nb(n;bq4*2n+$tBA4-fvf|{^=1#C};`1r6 zoZ`CM&s~Uf>Up$JC?#hNcP=lDlYxqi!P^R&hG;oN1J`zO)Zx` zJ`3KcvfWRuuI)kUhmnUW-P|8R^vS;qcoeM9BS_wzz6h4@K(hb81a?1aGlvJL<;0Is zKaTi5@?~o8Gi%?0ze@cz#G?J{)Y`4h(K-1BVvM`kPt&NsRdL^>_AXWb3SunViqrN9 zux+D%8|?hvS;hJeSRZ+PCQ^SF@eD=(9@z2Z_sH*q^^w21it_`ov9x*D|B%{Ze%hWy zLR`P-KL)!V&a?KPAaeRSx7y_NcRhcK)YtQ8==x+me_r{>XFZ<+ z%h%WQ7wE3%Q%KfRE_3or@Rc?FS77Jt9P)Gye~rlLXM1gOj@$Ki9;}V$8vh1yo$^lm z+lt$kI(ta2zK5K@%+v3{jx+l2!Oo-kIB$PI^pQ8-(-oKTJd5rcNB<+(oU_0G1lC91 z_Rmnu>F>P%8L7|vU(oF%^ZwV$M?UlZH?Vwt-k(Qz-nAu{7r^yg{*JDne0=@^c0Drx z{{-tNZ@$k_%fE2?XY0#c{tAKm)%` #include #include +#include #include // Keep a persistent X11 connection for emergency mouse release in signal handlers. diff --git a/src/rendering/character_renderer.cpp b/src/rendering/character_renderer.cpp index 5cbfecd3..1d15508b 100644 --- a/src/rendering/character_renderer.cpp +++ b/src/rendering/character_renderer.cpp @@ -253,8 +253,9 @@ bool CharacterRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFram }; // --- Build pipelines --- - auto buildCharPipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite) -> VkPipeline { - return PipelineBuilder() + auto buildCharPipeline = [&](VkPipelineColorBlendAttachmentState blendState, + bool depthWrite, bool alphaToCoverage = false) -> VkPipeline { + auto builder = PipelineBuilder() .setShaders(charVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), charFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) .setVertexInput({charBinding}, charAttrs) @@ -262,7 +263,10 @@ bool CharacterRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFram .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE) .setDepthTest(true, depthWrite, VK_COMPARE_OP_LESS_OR_EQUAL) .setColorBlendAttachment(blendState) - .setMultisample(samples) + .setMultisample(samples); + if (alphaToCoverage) + builder.setAlphaToCoverage(true); + return builder .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}) @@ -270,7 +274,7 @@ bool CharacterRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFram }; opaquePipeline_ = buildCharPipeline(PipelineBuilder::blendDisabled(), true); - alphaTestPipeline_ = buildCharPipeline(PipelineBuilder::blendDisabled(), true); + alphaTestPipeline_ = buildCharPipeline(PipelineBuilder::blendDisabled(), true, true); alphaPipeline_ = buildCharPipeline(PipelineBuilder::blendAlpha(), false); additivePipeline_ = buildCharPipeline(PipelineBuilder::blendAdditive(), false); @@ -2385,6 +2389,12 @@ void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, if (instance.activeGeosets.find(batch.submeshId) == instance.activeGeosets.end()) { continue; } + } else { + // Even without a geoset filter, skip eye glow (group 17) + // and group 18 unless explicitly opted in. These geosets are + // only for DK/NE eye glow and should be off by default. + uint16_t grp = batch.submeshId / 100; + if (grp == 17 || grp == 18) continue; } // Resolve texture for this batch (prefer hair textures for hair geosets). @@ -2950,6 +2960,10 @@ void CharacterRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& light if (blendMode >= 2) continue; // skip transparent if (applyGeosetFilter && inst.activeGeosets.find(batch.submeshId) == inst.activeGeosets.end()) continue; + if (!applyGeosetFilter) { + uint16_t grp = batch.submeshId / 100; + if (grp == 17 || grp == 18) continue; + } vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0); } } @@ -3487,8 +3501,9 @@ void CharacterRenderer::recreatePipelines() { {5, 0, VK_FORMAT_R32G32B32A32_SFLOAT, static_cast(offsetof(CharVertexGPU, tangent))}, }; - auto buildCharPipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite) -> VkPipeline { - return PipelineBuilder() + auto buildCharPipeline = [&](VkPipelineColorBlendAttachmentState blendState, + bool depthWrite, bool alphaToCoverage = false) -> VkPipeline { + auto builder = PipelineBuilder() .setShaders(charVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), charFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) .setVertexInput({charBinding}, charAttrs) @@ -3496,7 +3511,10 @@ void CharacterRenderer::recreatePipelines() { .setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE) .setDepthTest(true, depthWrite, VK_COMPARE_OP_LESS_OR_EQUAL) .setColorBlendAttachment(blendState) - .setMultisample(samples) + .setMultisample(samples); + if (alphaToCoverage) + builder.setAlphaToCoverage(true); + return builder .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}) @@ -3508,7 +3526,7 @@ void CharacterRenderer::recreatePipelines() { " pipelineLayout=", (void*)pipelineLayout_); opaquePipeline_ = buildCharPipeline(PipelineBuilder::blendDisabled(), true); - alphaTestPipeline_ = buildCharPipeline(PipelineBuilder::blendDisabled(), true); + alphaTestPipeline_ = buildCharPipeline(PipelineBuilder::blendDisabled(), true, true); alphaPipeline_ = buildCharPipeline(PipelineBuilder::blendAlpha(), false); additivePipeline_ = buildCharPipeline(PipelineBuilder::blendAdditive(), false); From b3fa8cf5f3110f25c050f43fc232f54500f3bf9f Mon Sep 17 00:00:00 2001 From: k Date: Sat, 4 Apr 2026 00:22:07 -0700 Subject: [PATCH 7/8] fix: warden mmap on macOS, add external listfile support to asset extractor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop PROT_EXEC from warden module mmap when using Unicorn emulation (not needed — module image is copied into emulator address space). Use MAP_JIT on macOS for the native fallback path. Add --listfile option to asset_extract and SFileAddListFileEntries support for resolving unnamed MPQ hash table entries from external listfiles. --- src/game/warden_module.cpp | 18 ++++++- tools/asset_extract/extractor.cpp | 87 +++++++++++++++++++++++++------ tools/asset_extract/extractor.hpp | 1 + tools/asset_extract/main.cpp | 21 ++++++++ 4 files changed, 110 insertions(+), 17 deletions(-) diff --git a/src/game/warden_module.cpp b/src/game/warden_module.cpp index 36be3f58..f3ea6723 100644 --- a/src/game/warden_module.cpp +++ b/src/game/warden_module.cpp @@ -535,11 +535,25 @@ bool WardenModule::parseExecutableFormat(const std::vector& exeData) { return false; } #else + // When using Unicorn emulation the module image is copied into the + // emulator's address space, so we only need read/write access here. + // Native execution paths (non-Unicorn) need PROT_EXEC; on macOS this + // requires MAP_JIT due to hardened-runtime restrictions. + #ifdef HAVE_UNICORN + int mmapProt = PROT_READ | PROT_WRITE; + int mmapFlags = MAP_PRIVATE | MAP_ANONYMOUS; + #elif defined(__APPLE__) + int mmapProt = PROT_READ | PROT_WRITE | PROT_EXEC; + int mmapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT; + #else + int mmapProt = PROT_READ | PROT_WRITE | PROT_EXEC; + int mmapFlags = MAP_PRIVATE | MAP_ANONYMOUS; + #endif moduleMemory_ = mmap( nullptr, finalCodeSize, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, + mmapProt, + mmapFlags, -1, 0 ); diff --git a/tools/asset_extract/extractor.cpp b/tools/asset_extract/extractor.cpp index 1df2d510..3c61bef3 100644 --- a/tools/asset_extract/extractor.cpp +++ b/tools/asset_extract/extractor.cpp @@ -537,10 +537,56 @@ static std::vector discoverArchives(const std::string& mpqDir, return result; } +// Read a text file into a vector of lines (for external listfile loading) +static std::vector readLines(const std::string& path) { + std::vector lines; + std::ifstream f(path); + if (!f) return lines; + std::string line; + while (std::getline(f, line)) { + // Trim trailing \r + if (!line.empty() && line.back() == '\r') line.pop_back(); + if (!line.empty()) lines.push_back(std::move(line)); + } + return lines; +} + +// Extract the (listfile) from an MPQ archive into a set of filenames +static void extractInternalListfile(HANDLE hMpq, std::set& out) { + HANDLE hFile = nullptr; + if (!SFileOpenFileEx(hMpq, "(listfile)", 0, &hFile)) return; + + DWORD size = SFileGetFileSize(hFile, nullptr); + if (size == SFILE_INVALID_SIZE || size == 0) { + SFileCloseFile(hFile); + return; + } + + std::vector buf(size); + DWORD bytesRead = 0; + if (!SFileReadFile(hFile, buf.data(), size, &bytesRead, nullptr)) { + SFileCloseFile(hFile); + return; + } + SFileCloseFile(hFile); + + // Parse newline/CR-delimited entries + std::string entry; + for (DWORD i = 0; i < bytesRead; ++i) { + if (buf[i] == '\n' || buf[i] == '\r') { + if (!entry.empty()) { + out.insert(std::move(entry)); + entry.clear(); + } + } else { + entry += buf[i]; + } + } + if (!entry.empty()) out.insert(std::move(entry)); +} + bool Extractor::enumerateFiles(const Options& opts, std::vector& outFiles) { - // Open all archives, enumerate files from highest priority to lowest. - // Use a set to deduplicate (highest-priority version wins). auto archives = discoverArchives(opts.mpqDir, opts.expansion, opts.locale); if (archives.empty()) { std::cerr << "No MPQ archives found in: " << opts.mpqDir << "\n"; @@ -549,12 +595,20 @@ bool Extractor::enumerateFiles(const Options& opts, std::cout << "Found " << archives.size() << " MPQ archives\n"; + // Load external listfile into memory once (avoids repeated file I/O) + std::vector externalEntries; + std::vector externalPtrs; + if (!opts.listFile.empty()) { + externalEntries = readLines(opts.listFile); + externalPtrs.reserve(externalEntries.size()); + for (const auto& e : externalEntries) externalPtrs.push_back(e.c_str()); + std::cout << " Loaded external listfile: " << externalEntries.size() << " entries\n"; + } + const auto wantedDbcs = buildWantedDbcSet(opts); + std::set seenNormalized; // Enumerate from highest priority first so first-seen files win - std::set seenNormalized; - std::vector> fileList; // (original name, archive path) - for (auto it = archives.rbegin(); it != archives.rend(); ++it) { HANDLE hMpq = nullptr; if (!SFileOpenArchive(it->path.c_str(), 0, 0, &hMpq)) { @@ -562,6 +616,14 @@ bool Extractor::enumerateFiles(const Options& opts, continue; } + // Inject external listfile entries into archive's in-memory name table. + // SFileAddListFileEntries is fast — it only hashes the names against the + // archive's hash table, no file I/O involved. + if (!externalPtrs.empty()) { + SFileAddListFileEntries(hMpq, externalPtrs.data(), + static_cast(externalPtrs.size())); + } + if (opts.verbose) { std::cout << " Scanning: " << it->path << " (priority " << it->priority << ")\n"; } @@ -571,28 +633,20 @@ bool Extractor::enumerateFiles(const Options& opts, if (hFind) { do { std::string fileName = findData.cFileName; - // Skip internal listfile/attributes if (fileName == "(listfile)" || fileName == "(attributes)" || fileName == "(signature)" || fileName == "(patch_metadata)") { continue; } - if (shouldSkipFile(opts, fileName)) { - continue; - } + if (shouldSkipFile(opts, fileName)) continue; - // Verify file actually exists in this archive's hash table - // (listfiles can reference files from other archives) - if (!SFileHasFile(hMpq, fileName.c_str())) { - continue; - } + if (!SFileHasFile(hMpq, fileName.c_str())) continue; std::string norm = normalizeWowPath(fileName); if (opts.onlyUsedDbcs && !wantedDbcs.empty() && !wantedDbcs.contains(norm)) { continue; } if (seenNormalized.insert(norm).second) { - // First time seeing this file — this is the highest-priority version outFiles.push_back(fileName); } } while (SFileFindNextFile(hFind, &findData)); @@ -674,6 +728,9 @@ bool Extractor::run(const Options& opts) { for (const auto& ad : archives) { HANDLE h = nullptr; if (SFileOpenArchive(ad.path.c_str(), 0, 0, &h)) { + if (!opts.listFile.empty()) { + SFileAddListFile(h, opts.listFile.c_str()); + } sharedHandles.push_back({h, ad.priority, ad.path}); } else { std::cerr << " Failed to open archive: " << ad.path << "\n"; diff --git a/tools/asset_extract/extractor.hpp b/tools/asset_extract/extractor.hpp index e9aa646d..48588273 100644 --- a/tools/asset_extract/extractor.hpp +++ b/tools/asset_extract/extractor.hpp @@ -26,6 +26,7 @@ public: bool onlyUsedDbcs = false; // Extract only the DBC files wowee uses (implies DBFilesClient/*.dbc filter) std::string dbcCsvOutputDir; // When set, write CSVs into this directory instead of outputDir/expansions//db std::string referenceManifest; // If set, only extract files NOT in this manifest (delta extraction) + std::string listFile; // External listfile for MPQ enumeration (resolves unnamed hash entries) }; struct Stats { diff --git a/tools/asset_extract/main.cpp b/tools/asset_extract/main.cpp index 6d9c27f5..0add3e99 100644 --- a/tools/asset_extract/main.cpp +++ b/tools/asset_extract/main.cpp @@ -20,6 +20,7 @@ static void printUsage(const char* prog) { << " --skip-dbc Do not extract DBFilesClient/*.dbc (visual assets only)\n" << " --dbc-csv Convert selected DBFilesClient/*.dbc to CSV under\n" << " /expansions//db/*.csv (for committing)\n" + << " --listfile External listfile for MPQ file enumeration (auto-detected)\n" << " --reference-manifest \n" << " Only extract files NOT in this manifest (delta extraction)\n" << " --dbc-csv-out Write CSV DBCs into (overrides default output path)\n" @@ -53,6 +54,8 @@ int main(int argc, char** argv) { opts.generateDbcCsv = true; } else if (std::strcmp(argv[i], "--dbc-csv-out") == 0 && i + 1 < argc) { opts.dbcCsvOutputDir = argv[++i]; + } else if (std::strcmp(argv[i], "--listfile") == 0 && i + 1 < argc) { + opts.listFile = argv[++i]; } else if (std::strcmp(argv[i], "--reference-manifest") == 0 && i + 1 < argc) { opts.referenceManifest = argv[++i]; } else if (std::strcmp(argv[i], "--verify") == 0) { @@ -99,6 +102,24 @@ int main(int argc, char** argv) { } opts.locale = locale; + // Auto-detect external listfile if not specified + if (opts.listFile.empty()) { + // Look next to the binary, then in the source tree + namespace fs = std::filesystem; + std::string binDir = fs::path(argv[0]).parent_path().string(); + for (const auto& candidate : { + binDir + "/listfile.txt", + binDir + "/../../../tools/asset_extract/listfile.txt", + opts.mpqDir + "/listfile.txt", + }) { + if (fs::exists(candidate)) { + opts.listFile = candidate; + std::cout << "Auto-detected listfile: " << candidate << "\n"; + break; + } + } + } + std::cout << "=== Wowee Asset Extractor ===\n"; std::cout << "MPQ directory: " << opts.mpqDir << "\n"; std::cout << "Output: " << opts.outputDir << "\n"; From d54e2620486aa1ae264568473291332c1fed1a92 Mon Sep 17 00:00:00 2001 From: Paul Date: Sat, 4 Apr 2026 13:43:16 +0300 Subject: [PATCH 8/8] feat(rendering): GPU architecture + visual quality fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M2 GPU instancing - M2InstanceGPU SSBO (96 B/entry, double-buffered, 16384 max) - Group opaque instances by (modelId, LOD); single vkCmdDrawIndexed per group - boneBase field indexes into mega bone SSBO via gl_InstanceIndex Indirect terrain drawing - 24 MB mega index buffer (6M uint32) + 64 MB mega vertex buffer - CPU builds VkDrawIndexedIndirectCommand per visible chunk - Single VB/IB bind per frame; shadow pass reuses mega buffers - Replaced vkCmdDrawIndexedIndirect with direct vkCmdDrawIndexed to fix host-mapped buffer race condition that caused terrain flickering GPU frustum culling (compute shader) - m2_cull.comp.glsl: 64-thread workgroups, sphere-vs-6-planes + distance cull - CullInstanceGPU SSBO input, uint visibility[] output, double-buffered - dispatchCullCompute() runs before main pass via render graph node Consolidated bone matrix SSBOs - 16 MB double-buffered mega bone SSBO (2048 instances × 128 bones) - Eliminated per-instance descriptor sets; one megaBoneSet_ per frame - prepareRender() packs bone matrices consecutively into current frame slot Render graph / frame graph - RenderGraph: RGResource handles, RGPass nodes, Kahn topological sort - Automatic VkImageMemoryBarrier/VkBufferMemoryBarrier between passes - Passes: minimap_composite, worldmap_composite, preview_composite, shadow_pass, reflection_pass, compute_cull - beginFrame() uses buildFrameGraph() + renderGraph_->execute(cmd) Pipeline derivatives - PipelineBuilder::setFlags/setBasePipeline for VK_PIPELINE_CREATE_DERIVATIVE_BIT - M2 opaque = base; alphaTest/alpha/additive are derivatives - Applied to terrain (wireframe) and WMO (alpha-test) renderers Rendering bug fixes: - fix(shadow): compute lightSpaceMatrix before updatePerFrameUBO to eliminate one-frame lag that caused shadow trails and flicker on moving objects - fix(shadow): scale depth bias with shadowDistance_ instead of hardcoded 0.8f to prevent acne at close range and gaps at far range - fix(visibility): WMO group distance threshold 500u → 1200u to match terrain view distance; buildings were disappearing on the horizon - fix(precision): camera near plane 0.05 → 0.5 (ratio 600K:1 → 60K:1), eliminating Z-fighting and improving frustum plane extraction stability - fix(streaming): terrain load radius 4 → 6 tiles (~2133u → ~3200u) to exceed M2 render distance (2800u) and eliminate pop-in when camera turns; unload radius 7 → 9; spawn radius 3 → 4 - fix(visibility): ground-detail M2 distance multiplier 0.75 → 0.9 to reduce early pop of grass and debris --- CMakeLists.txt | 1 + assets/shaders/m2.vert.glsl | 52 +- assets/shaders/m2.vert.spv | Bin 9120 -> 10120 bytes assets/shaders/m2_cull.comp.glsl | 76 ++ assets/shaders/m2_cull.comp.spv | Bin 0 -> 4556 bytes include/rendering/camera.hpp | 2 +- include/rendering/m2_renderer.hpp | 66 +- include/rendering/render_graph.hpp | 117 +++ include/rendering/renderer.hpp | 5 + include/rendering/terrain_manager.hpp | 4 +- include/rendering/terrain_renderer.hpp | 24 + include/rendering/vk_pipeline.hpp | 6 + src/core/world_loader.cpp | 6 +- src/main.cpp | 4 +- src/rendering/m2_renderer.cpp | 1169 ++++++++++++++++-------- src/rendering/render_graph.cpp | 194 ++++ src/rendering/renderer.cpp | 126 ++- src/rendering/terrain_renderer.cpp | 144 ++- src/rendering/vk_context.cpp | 2 +- src/rendering/vk_pipeline.cpp | 14 + src/rendering/wmo_renderer.cpp | 25 +- tools/asset_extract/extractor.cpp | 36 +- 22 files changed, 1579 insertions(+), 494 deletions(-) create mode 100644 assets/shaders/m2_cull.comp.glsl create mode 100644 assets/shaders/m2_cull.comp.spv create mode 100644 include/rendering/render_graph.hpp create mode 100644 src/rendering/render_graph.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f61b4024..88daaa4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -605,6 +605,7 @@ set(WOWEE_SOURCES src/rendering/wmo_renderer.cpp src/rendering/m2_renderer.cpp src/rendering/m2_model_classifier.cpp + src/rendering/render_graph.cpp src/rendering/quest_marker_renderer.cpp src/rendering/minimap.cpp src/rendering/world_map.cpp diff --git a/assets/shaders/m2.vert.glsl b/assets/shaders/m2.vert.glsl index 6f4545c8..a5913ca2 100644 --- a/assets/shaders/m2.vert.glsl +++ b/assets/shaders/m2.vert.glsl @@ -13,19 +13,29 @@ layout(set = 0, binding = 0) uniform PerFrame { vec4 shadowParams; }; +// Phase 2.1: Per-draw push constants (batch-level data only) layout(push_constant) uniform Push { - mat4 model; - vec2 uvOffset; - int texCoordSet; - int useBones; - int isFoliage; - float fadeAlpha; + int texCoordSet; // UV set index (0 or 1) + int isFoliage; // Foliage wind animation flag + int instanceDataOffset; // Base index into InstanceSSBO for this draw group } push; layout(set = 2, binding = 0) readonly buffer BoneSSBO { mat4 bones[]; }; +// Phase 2.1: Per-instance data read via gl_InstanceIndex (GPU instancing) +struct InstanceData { + mat4 model; + vec2 uvOffset; + float fadeAlpha; + int useBones; + int boneBase; +}; +layout(set = 3, binding = 0) readonly buffer InstanceSSBO { + InstanceData instanceData[]; +}; + layout(location = 0) in vec3 aPos; layout(location = 1) in vec3 aNormal; layout(location = 2) in vec2 aTexCoord; @@ -41,15 +51,23 @@ layout(location = 4) out float ModelHeight; layout(location = 5) out float vFadeAlpha; void main() { + // Phase 2.1: Fetch per-instance data from SSBO + int instIdx = push.instanceDataOffset + gl_InstanceIndex; + mat4 model = instanceData[instIdx].model; + vec2 uvOff = instanceData[instIdx].uvOffset; + float fade = instanceData[instIdx].fadeAlpha; + int uBones = instanceData[instIdx].useBones; + int bBase = instanceData[instIdx].boneBase; + vec4 pos = vec4(aPos, 1.0); vec4 norm = vec4(aNormal, 0.0); - if (push.useBones != 0) { + if (uBones != 0) { ivec4 bi = ivec4(aBoneIndicesF); - mat4 skinMat = bones[bi.x] * aBoneWeights.x - + bones[bi.y] * aBoneWeights.y - + bones[bi.z] * aBoneWeights.z - + bones[bi.w] * aBoneWeights.w; + mat4 skinMat = bones[bBase + bi.x] * aBoneWeights.x + + bones[bBase + bi.y] * aBoneWeights.y + + bones[bBase + bi.z] * aBoneWeights.z + + bones[bBase + bi.w] * aBoneWeights.w; pos = skinMat * pos; norm = skinMat * norm; } @@ -57,7 +75,7 @@ void main() { // Wind animation for foliage if (push.isFoliage != 0) { float windTime = fogParams.z; - vec3 worldRef = push.model[3].xyz; + vec3 worldRef = model[3].xyz; float heightFactor = clamp(pos.z / 20.0, 0.0, 1.0); heightFactor *= heightFactor; // quadratic — base stays grounded @@ -80,15 +98,15 @@ void main() { pos.y += trunkSwayY + branchSwayY + leafFlutterY; } - vec4 worldPos = push.model * pos; + vec4 worldPos = model * pos; FragPos = worldPos.xyz; - Normal = mat3(push.model) * norm.xyz; + Normal = mat3(model) * norm.xyz; - TexCoord = (push.texCoordSet == 1 ? aTexCoord2 : aTexCoord) + push.uvOffset; + TexCoord = (push.texCoordSet == 1 ? aTexCoord2 : aTexCoord) + uvOff; - InstanceOrigin = push.model[3].xyz; + InstanceOrigin = model[3].xyz; ModelHeight = pos.z; - vFadeAlpha = push.fadeAlpha; + vFadeAlpha = fade; gl_Position = projection * view * worldPos; } diff --git a/assets/shaders/m2.vert.spv b/assets/shaders/m2.vert.spv index 8397440f03249e030dbcb99c3f5adbb3e3403381..11364e67a801fc2fcccf69c6e791471f32aa75c4 100644 GIT binary patch literal 10120 zcmZ{o37nNx8HYa@W;8+6P{h=M0Tm4-ZN!yj25<%pWpofFHNC^!nYlX5on`^F4J*pZ zqP;ZR%~IQKFAXUx3tP+ya3KzfjVMgeq4iA_l{M~`^(ekqDP2%5$GruJjL#74LuKFPVS!z0y8%NxcjRh|ydoxJ-x=t$ker43~&AF1SLDdODU z+`ghRf?Ch#{^tDh>R=U%`)IyKq_nZ8QXU#BEZS;yHDR+Zmp$_H&}V(s{=~Odug@5K zSBc5F2$A1sWptp_TiskYk-Z_?0dI5N@1WmY9U1IBr`%WLnzB1l2J$8CC~X+C;+pT> z^kXCALzi_8-S`xs|CwVV)1K!a_jSa zPlp>Um->of;dnj@u48a~Y^*%O$d1#L&3P)%=(X{ez~}Q!&T)Rfjg86g|7l!lL z2*XS}25AuGY&mo}dT z;{&JT1n2qRKk?HOlnX`I-& ztw_Yl?T)MEvS$5uAmJB#{@sK#w|);Koa-+4Si+fK&LSh9=UZ<7g!8P+&4P2iotO2w zzB}k=BeuXTHJD@XqviyAABpL5yZ4be@_84DorC$jhj!Z=^3Q5h3!C?mwuSk&#Emr-16twJ~->q&)mVO%hmEX)&|GtH-HrAy4E^$1u@!l=oPjb_#?m+rk$idhS zq8B@#8T>z;UQTT8gXyhTUjIYr^>^%MEZ$+-f{%F|&N$vZ+MCh6Q^e*xoZfgj>l4TR z#{6c&+4OIr*WYo(N6=f7V}}1+^a+Ueqv`e6E}loPe{J5AvIXec#S!0%?!M`N0=;(U z682NjV@(#Jo6mJ}O_tD`E8>@;TZ_E&6SpC9=F@i>l4V<$hTVHJwV#pO%{wu_%N=Je zoHa!q=b~GOymg;PZw~7+uh{!n&i(N2bq(DQ?@O_9&!cy~a>j|hGvz$PCHh&2&HBYX zIodsrcj^She3e3Xp5f2Ogxnr@8`%7TuS0kJnzJeS-C~^~&gcJ$<9!*sxo$(OU%Uf} z{I1#Ui1(WQU!`~5+#koc&Rs~By{A3!J)paGtzH`V$2s1$IPksT&owtLna?xqIlB+h zM?QQWKyPimWl7+Na@<FBdFpdgM78{TL+jXq$`Z>ofFBM7w!BBjU(&EPCWwfc`8b@@R{> zd$x}&?B?-oi6akRIqP{&KwpSN9&N2i_?}qU&EuIAhwWtasK-2~B9TYiDMeL=}j-qReI>*qhGxA&KI3gc)ZbCOs-ZSCaZ$_-&7TgweeQj43 zewU-`t1bMlKsPqdllfkN7%LaCFGSZ@Tg1Ky-B{0Ea4$wTRxV;+g08Q&@Y{-RY(v6b ziEgZ1#J&`LYmRHr({%KgAJ$nUs9kMO)c_q4h?AcZ5#>vN?y$U_{ zOndCvtI@T!7ia4==Q-h&chbmc65DhG5^=2>#HsN-hggw zx@T_$8=LOgo51>Ni`X}#8=LOgTfoMqd-hhazS<)8ZRo}}B;4E4jg^a9c+g59(05}$XW%g3I*8{Ihh*t7Sb$DV1AJ$o;@w%D`xp}S|1 z-#vRjA|HEpJ-Tu7G5-&sI}clMH=yfli}`;LU0-eC_aSs+(>=QpY;3w`H-Yul7O^*@ z8=LOghr!0Cd-f5qzS<)8qv*z_d-gG~vFVmwiUFuA`o`)+#QMfUFRzVx3% zCL?nZ$7rJeJR%?Oxi1vlOnTpQU!u49p40Y4M9$pmp0Ne=JCPm8?MV8}e+8_~`fjBc zThAi;uOTy#RwUNw>jl4tPb*8%zk$dpHqV56YrZ?^wVQ7q{Wp=tNIL$v3jQj5JkNI_ za=VZzj3M`J#JuLveg?7(ag00ZwOgOQ-$CR8f49Kfiu~UL%jp;Ka^e4dbk`yLe}FD` z52BBA{vqPr)^JB0LjR+Jlebyxn~U*&g0B5O#OKT!e~Oq_n>E~Ba8bk0z;aXAJN-`v5?$N@>_5Kr*uJ@k{PTpp1vEF|}*Zw#X z>-_|}d9_)?BlL1n!{5>6;+*~?w>|aq%s=7eWA6V(m-`nI7^Ovuw_V7IhU7NozMZRaEr{m89>nHE{ey^7c z|M}?mlRNOSV9!vj)dH|S^5OF=bmtL1$D!*d-_HHz9eh0co@eUWaPkq?iY{k-#GQa1 zan>u=FUDSo?)?z>MDQdea-M{)k9_!?jPAU{=M;4P$5VB?%9uLWfQu7^5))5zXhox=C*dRe*b0#PDcI*HI0`0 literal 9120 zcmZ{o3!GI|6~+&Yj4#9osHlKnb;g`E;b)qh&5xUV|Dy9n%GU??UJ36me%DhZEY>BCok{p z?_WMRbYZ2myi)EhE$=E1lsbpXYf8rLkc%t#c2|f1iO}Vtz z*14;>hW!J5=egH?oT_^ETHQ6QI8sM{v9mR(hI$m_0UsxR!T^x>&b zwom(WEsLvHmP@@unMYr>u6lgd+l2$cn46x6%#`4ELVj-cuZ8WzF9h zTF2Vrh38bYd-K}4Rc#a6%7J2UXHPZR+$}Y&nyj|XU7+)fv4y{&+SuooDWy^KRfktWxqEdrxGqTsp3I#?tJaiGI98iXqOf z_U)Zg)6((k(>rEK@8A$KF0~DmS9v=*KkuE>d}kM@3!G}+Lu;B}eW{#BE842ca*ph> z(f}{6<9iO&ef0GWmeT|3eyi(B^>Xh}NBN@k?1#^@i*I42+`p(?EsNf&d7fK+!<9inTBgYq3A;_PZcAPdH(!6$qkeqG{X525XTXhh9g<|-g78m(<^6l5{v?@5{C;4^a(u^|L~9?` zx+i=xZ2>cX3e~x8<`}0h%pB|6pVo6>oa4(2n40w&-+;w@re;3%@J|E#OrVPCwAOVf ztue7qGr-oZ?zo51j>hi7`h3Rk1u zu;bg`6#l~XopkIlegDs_=W6_pWuEoQeV1c=$GaBuoEm>!=8OIRFx>tFzrW`HE`0r_xeH>W7=O-=Rb`aTPH9>(d5 zycfXhHx1YNeHq@}eM_y5SKzGoiSEyYQPutNe8~5Kd$!y^_h0T^trpz7TJHT(02?be ze@5nWpNyA#m+QAa&(C4-{CPS8&cBKIvojp|a_@frP5YC3_shK(jsZ*u+n@Qz!FL2l zzV-V(i26>-u;R*=m%{`jYoR9Cck9pfrtifF0J!>#F`1l<6Zk(Cp-hKR;T#NUPxH89m zXE)@y{u@T{TSxGXBlx`|_@)tj%Lu-81b=!2e|`ktmgCOf|Cez7C$b)%zg;lDgJ-Zx zuKli=MpM&v%QRAJ+U}X=dbiV~eeQvoZ@m6-*nD=uHP;to?g^fN#h818)qFym2v*CF zxlg9$$J`gqeB<@|jdo1;RbLbHno4^BR=^rC>+pR#5L3T{nCY|!Wt!j7m+0dVTA!DR z*LN_c=9v=PkMlkpn}zkl52ZaK)2zeuB)5)ZX^+Fy=3!CC@tNMl4xEtc)o-VD=rhiH zW`5?guSRea<~K3=a{o@s^!3D;vk)_{s#j|`8Ey@}dr?C(SUqZJ0sH;727OV(Vz9ob zVF}n8qAzPW1yhe2P6eB%?*6)Ft(dj>gmxO(Sf7itxYNPL>WjEGu(@$I?5`a&S1oeS z02`|>a?b>tJ36OzfX!8l+@)Y+^+oPkU~}to+A^@YYLR<3*jRnO8}7q7m~*iv`!=SD zIeI2uhq)i;VzD2u&-B=jH^5^*^u>O>5v{OJV?S2F%~OxL6~Wf% z6WU7f3I(2+TPN69eG%6MHaEW?CHRU=i`;IwvHBu+71&(YKC~Y2icE{#GTc~w5qBQg z+`620K6pi@MQ#OdtiITf)!f;R*(I7GdT7`U+l*KSf710 z(hg$oNA%@>3}t%k$1vPH^_bflur>OGwiaxxPt5HCu(A3g?n1D+`Te*EZmwG7z6ETo zzQ}zm*xdYnybW%yTI9YRY^=V>eFxaw{C=#1o2wSN7lVymhsA!p6YN~9$-a&8{+vy# zFV^>6U^Th#{kyRpv8z~b`6ZY!>hbMSi~RS1^ZDlxlOTosd$9F{SpUnOcTJL__ zCdQje`#!9IU4}WvWZL&*>hWE>BJ^EdM@# zDDzq0m0-C!*VBFkD`1Y*fWyAEu0akk)i@E#~>|Ewyk@qXG zdgMI`R(k@Ax%?Vze@|jDm*0S$i|0UpV6Jvu0F?knpQ2w`5Ra*=Ja=P?2o>f(~DqzFJO`T z4{&}?FTssdcdX}W)gu0%V8@C+{{>ci8H@4%4URR{AF=-d>w5)@{{9Qjk8h%J>W;sS zRxRR3fj1%r9}V{m#ah+DjZu%7?ZD0>V#dIYQ*Yw_T929kGvhh zk!QVf<6`Wc!1w0r-5KsVi9W}|jZu%BUBJ#eV#dLZQ;%_X1v_s3efD>B^~l>DtY&`X z?E#KF>y;blH~b3v^E+#6yuah@i51|}FzefocCSpAZx7!aZlCdY!bG?+>W<<6SF1(* zeqgou-cACmX+AciF?0Q$NPpBb1zf0j%Z|V@!psYd$uP5jh7Sn4=y! z2SU^}ADev6Gz4?hB4;{WUGuTg-_BRB067S=7W+0f*5qLDc;cd;L*QzfkB$Af#@-u; zBG{L4u9Lr?sku&5@y`MmXb+=xjLG1`!RogYGaY^;Sk2$cjG2UX6xiR!f*%d`x3P9| z{9SAwW}bE%b28`HOgGnfYw$O-}gYRxSQ!TL88X_q)OP z?94Ty>x=bj0;_30Hm;X_`dgiT>+)S+1oph1i`id2?a65B#xJCm8^4%#305Gcg*M_( z$#mnJ!E)nIAg&F34QbY>BpQ7uzBh+@1= instanceCount) return; + + CullInstance inst = cullInstances[id]; + + // Flag check: must be valid, not smoke, not invisible trap + uint f = inst.flags; + if ((f & 1u) == 0u || (f & 6u) != 0u) { + visibility[id] = 0u; + return; + } + + // Early distance rejection (loose upper bound) + vec3 toCam = inst.sphere.xyz - cameraPos.xyz; + float distSq = dot(toCam, toCam); + if (distSq > cameraPos.w) { + visibility[id] = 0u; + return; + } + + // Accurate per-instance distance cull + if (distSq > inst.effectiveMaxDistSq) { + visibility[id] = 0u; + return; + } + + // Frustum cull: sphere vs 6 planes + float radius = inst.sphere.w; + if (radius > 0.0) { + for (int i = 0; i < 6; i++) { + float d = dot(frustumPlanes[i].xyz, inst.sphere.xyz) + frustumPlanes[i].w; + if (d < -radius) { + visibility[id] = 0u; + return; + } + } + } + + visibility[id] = 1u; +} diff --git a/assets/shaders/m2_cull.comp.spv b/assets/shaders/m2_cull.comp.spv new file mode 100644 index 0000000000000000000000000000000000000000..ef1a08fd5218d3308808990f94a10efe719c9cc9 GIT binary patch literal 4556 zcmaKt>2FkJ6o+q1rywX$b``KJ;sTB^jTLOTJ06)seJ< zB*}!NJ=I;?_iSG}+~{4pa@B2SOiae4iRMfqr-W`p`pUI_0~3(R;9?!f2E<%t4F6iu zzu=U}DAtvTjU!g;H8w8obEtlxt3GhBT;J9|GSE|Q)CT&uZ6T*b|LN$>&3b)rf2}$& z)HjR^IyS|%t3%D?g87l7@$ndO2UbX(=M!CPIvU#A{-$-jp zX+KYVV6fcl1$bijmb=)p0_I(aEk}EvnP``M=cDDjE#E`jUgECd!NZlIirBamlV_z` zt@Jc%Bb6QHqg!gjjXg*HtC4TDUOtqaA?q`mr@f0jz4L`&o)eYwx*mNwUgEAIFR}mQ zWqdR*vC+K5F6C89ZUD=83^ud7ADd#pNl)S4Jg;|qlL40YdUQ3lOUX{OeReh*X(QiV z%r#OQt{tq^YmH-B-R=ygM_aNFyD_l2+=tfhM)&4-u74DNsN7p?Lcl#w;nxuDv*sdQ z3xsdj>fge*kTN_ub0hn1QQPKbsTGOZuA={hf_se|`IVUi)7_`_A^8 z3MxL2YU{P9lN0B|<(zH&Iwh%y&&0n1t!<6*@o4#rug!h;5L=9I?v;qk{HxH$#Anc@ zBgM1cQ?T_tf7DLJHg`6t?`4kITz+$$kHE|+ihIof%g&W?(|3;ItiD?(5?uAY8{^hr zliT*4208~lm)n?+4z9-JoSmaBeH-u09CMww%jM4D?2CvQpNs9B=)3QYXgP>^r~1x{ znD-*jn-Q1!OVGybrSEUP9NahZS75gw##f?^%Q@nAVEd-w--TU#hfVm_%RA;&-p-%C zeRrYdAtrDA-8ts$?8SD^n4O2Pmjj-_hnTYs}`CYBc&owE>*TYzT`VsTDPt1B}`dY;Az*&zR zf8)e|ASavi?Dt>%4|;Z@U4OIRT=aEFC*s{`|C6+&wpjFBjGgVk;^<2fEZp1>DVT+!n+Jlx{|;y&idd0&p%&v+RT3;$s5pQGN{??KEFx4s)KX1?)W zq#N1D)K}4mkl7h`f1 zE53_;g2w=MHxl2G>4{UYKr{u0_+wH#)lUq}9QX)V_*st#{xz@EYPg z$;X&^eoL<-#^O78Bge#DEl4X8-{YIuahK8KEim!u@iw-Y;))*eExZHfKIR4Acd;|R zW7GLRhaY^672oYWeB-C`Z|i+*dy0GSXVGHz?m&Nlh=u +#include +#include +#include +#include + +namespace wowee { +namespace rendering { + +// Phase 2.5: Lightweight Render Graph / Frame Graph +// Converts hardcoded pass sequence (shadow → reflection → compute cull → +// main → post-process → ImGui → present) into declarative graph nodes. +// Graph auto-inserts VkImageMemoryBarrier between passes. + +// Resource handle — identifies a virtual resource (image or buffer) within the graph. +struct RGResource { + uint32_t id = UINT32_MAX; + bool valid() const { return id != UINT32_MAX; } +}; + +// Image barrier descriptor for automatic synchronization between passes. +struct RGImageBarrier { + VkImage image; + VkImageLayout oldLayout; + VkImageLayout newLayout; + VkAccessFlags srcAccess; + VkAccessFlags dstAccess; + VkPipelineStageFlags srcStage; + VkPipelineStageFlags dstStage; + VkImageAspectFlags aspectMask; +}; + +// Buffer barrier descriptor for automatic synchronization between passes. +struct RGBufferBarrier { + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize size; + VkAccessFlags srcAccess; + VkAccessFlags dstAccess; + VkPipelineStageFlags srcStage; + VkPipelineStageFlags dstStage; +}; + +// Render pass node — wraps an execution callback with declared inputs/outputs. +struct RGPass { + std::string name; + std::vector inputs; + std::vector outputs; + std::function execute; + bool enabled = true; // Can be dynamically disabled per-frame + + // Barriers to insert before this pass executes + std::vector imageBarriers; + std::vector bufferBarriers; +}; + +class RenderGraph { +public: + RenderGraph() = default; + ~RenderGraph() = default; + + // Reset graph for a new frame (clears passes, keeps resource registry). + void reset(); + + // Register a virtual resource (returns handle for input/output declarations). + RGResource registerResource(const std::string& name); + + // Look up a previously registered resource by name. + RGResource findResource(const std::string& name) const; + + // Add a render pass node. + // inputs: resources this pass reads from + // outputs: resources this pass writes to + // execute: callback invoked with the frame's command buffer + void addPass(const std::string& name, + const std::vector& inputs, + const std::vector& outputs, + std::function execute); + + // Enable/disable a pass by name (for dynamic toggling, e.g. shadows off). + void setPassEnabled(const std::string& name, bool enabled); + + // Compile: topological sort by dependency order, insert barriers. + // Must be called after all addPass() calls and before execute(). + void compile(); + + // Execute all enabled passes in compiled order on the given command buffer. + void execute(VkCommandBuffer cmd); + + // Query: get the compiled execution order (pass names, for debug HUD). + const std::vector& getExecutionOrder() const { return executionOrder_; } + const std::vector& getPasses() const { return passes_; } + +private: + // Topological sort helper (Kahn's algorithm). + void topologicalSort(); + + // Resource registry: name → id + struct ResourceEntry { + std::string name; + uint32_t id; + }; + std::vector resources_; + uint32_t nextResourceId_ = 0; + + // Pass storage + std::vector passes_; + + // Compiled execution order (indices into passes_) + std::vector executionOrder_; + bool compiled_ = false; +}; + +} // namespace rendering +} // namespace wowee diff --git a/include/rendering/renderer.hpp b/include/rendering/renderer.hpp index 54372da9..a4d075e9 100644 --- a/include/rendering/renderer.hpp +++ b/include/rendering/renderer.hpp @@ -56,6 +56,7 @@ class AnimationController; class LevelUpEffect; class ChargeEffect; class SwimEffects; +class RenderGraph; class Renderer { public: @@ -433,6 +434,10 @@ private: bool ghostMode_ = false; // set each frame from gameHandler->isPlayerGhost() + // Phase 2.5: Render Graph — declarative pass ordering with automatic barriers + std::unique_ptr renderGraph_; + void buildFrameGraph(game::GameHandler* gameHandler); + // CPU timing stats (last frame/update). double lastUpdateMs = 0.0; double lastRenderMs = 0.0; diff --git a/include/rendering/terrain_manager.hpp b/include/rendering/terrain_manager.hpp index 50c09680..59c9c4e2 100644 --- a/include/rendering/terrain_manager.hpp +++ b/include/rendering/terrain_manager.hpp @@ -346,8 +346,8 @@ private: // Streaming parameters bool streamingEnabled = true; - int loadRadius = 4; // Load tiles within this radius (9x9 grid = 81 tiles) - int unloadRadius = 7; // Unload tiles beyond this radius + int loadRadius = 6; // Load tiles within this radius (13x13 grid = 169 tiles) + int unloadRadius = 9; // Unload tiles beyond this radius float updateInterval = 0.033f; // Check streaming every 33ms (~30 fps) float timeSinceLastUpdate = 0.0f; float proactiveStreamTimer_ = 0.0f; diff --git a/include/rendering/terrain_renderer.hpp b/include/rendering/terrain_renderer.hpp index 5bc13252..24fa1955 100644 --- a/include/rendering/terrain_renderer.hpp +++ b/include/rendering/terrain_renderer.hpp @@ -60,6 +60,11 @@ struct TerrainChunkGPU { float boundingSphereRadius = 0.0f; glm::vec3 boundingSphereCenter = glm::vec3(0.0f); + // Phase 2.2: Offsets into mega buffers for indirect drawing (-1 = not in mega buffer) + int32_t megaBaseVertex = -1; + uint32_t megaFirstIndex = 0; + uint32_t vertexCount = 0; + bool isValid() const { return vertexBuffer != VK_NULL_HANDLE && indexBuffer != VK_NULL_HANDLE; } }; @@ -200,6 +205,25 @@ private: bool fogEnabled = true; int renderedChunks = 0; int culledChunks = 0; + + // Phase 2.2: Mega vertex/index buffers for indirect drawing + // All terrain chunks share a single VB + IB, eliminating per-chunk rebinds. + // Indirect draw commands are built CPU-side each frame for visible chunks. + VkBuffer megaVB_ = VK_NULL_HANDLE; + VmaAllocation megaVBAlloc_ = VK_NULL_HANDLE; + void* megaVBMapped_ = nullptr; + VkBuffer megaIB_ = VK_NULL_HANDLE; + VmaAllocation megaIBAlloc_ = VK_NULL_HANDLE; + void* megaIBMapped_ = nullptr; + uint32_t megaVBUsed_ = 0; // vertices used + uint32_t megaIBUsed_ = 0; // indices used + static constexpr uint32_t MEGA_VB_MAX_VERTS = 1536 * 1024; // ~1.5M verts × 44B ≈ 64MB + static constexpr uint32_t MEGA_IB_MAX_INDICES = 6 * 1024 * 1024; // 6M indices × 4B = 24MB + + VkBuffer indirectBuffer_ = VK_NULL_HANDLE; + VmaAllocation indirectAlloc_ = VK_NULL_HANDLE; + void* indirectMapped_ = nullptr; + static constexpr uint32_t MAX_INDIRECT_DRAWS = 8192; }; } // namespace rendering diff --git a/include/rendering/vk_pipeline.hpp b/include/rendering/vk_pipeline.hpp index e95337f8..e53229e3 100644 --- a/include/rendering/vk_pipeline.hpp +++ b/include/rendering/vk_pipeline.hpp @@ -75,6 +75,10 @@ public: // Dynamic state PipelineBuilder& setDynamicStates(const std::vector& states); + // Pipeline derivatives — hint driver to share compiled state between similar pipelines + PipelineBuilder& setFlags(VkPipelineCreateFlags flags); + PipelineBuilder& setBasePipeline(VkPipeline basePipeline); + // Build the pipeline (pass a VkPipelineCache for faster creation) VkPipeline build(VkDevice device, VkPipelineCache cache = VK_NULL_HANDLE) const; @@ -106,6 +110,8 @@ private: VkRenderPass renderPass_ = VK_NULL_HANDLE; uint32_t subpass_ = 0; std::vector dynamicStates_; + VkPipelineCreateFlags flags_ = 0; + VkPipeline basePipelineHandle_ = VK_NULL_HANDLE; }; // Helper to create a pipeline layout from descriptor set layouts and push constant ranges diff --git a/src/core/world_loader.cpp b/src/core/world_loader.cpp index 9e90e747..4e967b18 100644 --- a/src/core/world_loader.cpp +++ b/src/core/world_loader.cpp @@ -734,9 +734,9 @@ void WorldLoader::loadOnlineWorldTerrain(uint32_t mapId, float x, float y, float // Use a small radius for the initial load (just immediate tiles), // then restore the full radius after entering the game. // This matches WoW's behavior: load quickly, stream the rest in-game. - const int savedLoadRadius = 4; - terrainMgr->setLoadRadius(3); // 7x7=49 tiles — prevents hitches on spawn - terrainMgr->setUnloadRadius(7); + const int savedLoadRadius = 6; + terrainMgr->setLoadRadius(4); // 9x9=81 tiles — prevents hitches on spawn + terrainMgr->setUnloadRadius(9); // Trigger tile streaming for surrounding area terrainMgr->update(*camera, 1.0f); diff --git a/src/main.cpp b/src/main.cpp index a4481a9f..7ac84715 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -111,13 +111,13 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { _NSGetExecutablePath(nullptr, &bufSize); std::string exePath(bufSize, '\0'); _NSGetExecutablePath(exePath.data(), &bufSize); - chdir(dirname(exePath.data())); + if (chdir(dirname(exePath.data())) != 0) {} } #elif defined(__linux__) { char buf[4096]; ssize_t len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); - if (len > 0) { buf[len] = '\0'; chdir(dirname(buf)); } + if (len > 0) { buf[len] = '\0'; if (chdir(dirname(buf)) != 0) {} } } #endif diff --git a/src/rendering/m2_renderer.cpp b/src/rendering/m2_renderer.cpp index d87a6844..8fccc598 100644 --- a/src/rendering/m2_renderer.cpp +++ b/src/rendering/m2_renderer.cpp @@ -349,6 +349,20 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout vkCreateDescriptorSetLayout(device, &ci, nullptr, &boneSetLayout_); } + // Phase 2.1: Instance data set layout (set 3): binding 0 = STORAGE_BUFFER (per-instance data) + { + VkDescriptorSetLayoutBinding binding{}; + binding.binding = 0; + binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + + VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + ci.bindingCount = 1; + ci.pBindings = &binding; + vkCreateDescriptorSetLayout(device, &ci, nullptr, &instanceSetLayout_); + } + // Particle texture set layout (set 1 for particles): binding 0 = sampler2D { VkDescriptorSetLayoutBinding binding{}; @@ -423,19 +437,244 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout } } + // Mega bone SSBO — consolidates all animated instance bones into one buffer per frame. + // Slot 0 = identity matrix (for non-animated instances), slots 1..N = animated instances. + { + const VkDeviceSize megaSize = MEGA_BONE_MAX_INSTANCES * MAX_BONES_PER_INSTANCE * sizeof(glm::mat4); + glm::mat4 identity(1.0f); + for (int i = 0; i < 2; i++) { + VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bci.size = megaSize; + bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VmaAllocationCreateInfo aci{}; + aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo allocInfo{}; + vmaCreateBuffer(ctx->getAllocator(), &bci, &aci, + &megaBoneBuffer_[i], &megaBoneAlloc_[i], &allocInfo); + megaBoneMapped_[i] = allocInfo.pMappedData; + + // Slot 0: identity matrix (for non-animated instances) + if (megaBoneMapped_[i]) { + memcpy(megaBoneMapped_[i], &identity, sizeof(identity)); + } + + megaBoneSet_[i] = allocateBoneSet(); + if (megaBoneSet_[i]) { + VkDescriptorBufferInfo bufInfo{}; + bufInfo.buffer = megaBoneBuffer_[i]; + bufInfo.offset = 0; + bufInfo.range = megaSize; + VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + write.dstSet = megaBoneSet_[i]; + write.dstBinding = 0; + write.descriptorCount = 1; + write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write.pBufferInfo = &bufInfo; + vkUpdateDescriptorSets(device, 1, &write, 0, nullptr); + } + } + } + + // Phase 2.1: Instance data SSBO — per-frame buffer holding per-instance transforms, fade, bones. + // Shader reads instanceData[push.instanceDataOffset + gl_InstanceIndex]. + { + static_assert(sizeof(M2InstanceGPU) == 96, "M2InstanceGPU must be 96 bytes (std430)"); + const VkDeviceSize instBufSize = MAX_INSTANCE_DATA * sizeof(M2InstanceGPU); + + // Descriptor pool for 2 sets (double-buffered) + VkDescriptorPoolSize poolSize{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2}; + VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + poolCi.maxSets = 2; + poolCi.poolSizeCount = 1; + poolCi.pPoolSizes = &poolSize; + vkCreateDescriptorPool(device, &poolCi, nullptr, &instanceDescPool_); + + for (int i = 0; i < 2; i++) { + VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bci.size = instBufSize; + bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VmaAllocationCreateInfo aci{}; + aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo allocInfo{}; + vmaCreateBuffer(ctx->getAllocator(), &bci, &aci, + &instanceBuffer_[i], &instanceAlloc_[i], &allocInfo); + instanceMapped_[i] = allocInfo.pMappedData; + + VkDescriptorSetAllocateInfo setAi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + setAi.descriptorPool = instanceDescPool_; + setAi.descriptorSetCount = 1; + setAi.pSetLayouts = &instanceSetLayout_; + vkAllocateDescriptorSets(device, &setAi, &instanceSet_[i]); + + VkDescriptorBufferInfo bufInfo{}; + bufInfo.buffer = instanceBuffer_[i]; + bufInfo.offset = 0; + bufInfo.range = instBufSize; + VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + write.dstSet = instanceSet_[i]; + write.dstBinding = 0; + write.descriptorCount = 1; + write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write.pBufferInfo = &bufInfo; + vkUpdateDescriptorSets(device, 1, &write, 0, nullptr); + } + } + + // Phase 2.3: GPU frustum culling — compute pipeline, buffers, descriptors. + // Compute shader tests each instance bounding sphere against 6 frustum planes + distance. + // Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build. + { + static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)"); + static_assert(sizeof(CullUniformsGPU) == 128, "CullUniformsGPU must be 128 bytes (std140)"); + + // Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output) + VkDescriptorSetLayoutBinding bindings[3] = {}; + bindings[0].binding = 0; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[0].descriptorCount = 1; + bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[1].binding = 1; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[1].descriptorCount = 1; + bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[2].binding = 2; + bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[2].descriptorCount = 1; + bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + layoutCi.bindingCount = 3; + layoutCi.pBindings = bindings; + vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &cullSetLayout_); + + // Pipeline layout (no push constants — everything via UBO) + VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCi.setLayoutCount = 1; + plCi.pSetLayouts = &cullSetLayout_; + vkCreatePipelineLayout(device, &plCi, nullptr, &cullPipelineLayout_); + + // Load compute shader + rendering::VkShaderModule cullComp; + if (!cullComp.loadFromFile(device, "assets/shaders/m2_cull.comp.spv")) { + LOG_ERROR("M2Renderer: failed to load m2_cull.comp.spv — GPU culling disabled"); + } else { + VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + cpCi.stage = cullComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT); + cpCi.layout = cullPipelineLayout_; + if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &cpCi, nullptr, &cullPipeline_) != VK_SUCCESS) { + LOG_ERROR("M2Renderer: failed to create cull compute pipeline"); + cullPipeline_ = VK_NULL_HANDLE; + } + cullComp.destroy(); + } + + // Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO) + VkDescriptorPoolSize poolSizes[2] = {}; + poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2}; + poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 4}; // 2 input + 2 output + VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + poolCi.maxSets = 2; + poolCi.poolSizeCount = 2; + poolCi.pPoolSizes = poolSizes; + vkCreateDescriptorPool(device, &poolCi, nullptr, &cullDescPool_); + + const VkDeviceSize uniformSize = sizeof(CullUniformsGPU); + const VkDeviceSize inputSize = MAX_CULL_INSTANCES * sizeof(CullInstanceGPU); + const VkDeviceSize outputSize = MAX_CULL_INSTANCES * sizeof(uint32_t); + + for (int i = 0; i < 2; i++) { + // Uniform buffer (frustum planes + camera) + { + VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bci.size = uniformSize; + bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + VmaAllocationCreateInfo aci{}; + aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo ai{}; + vmaCreateBuffer(ctx->getAllocator(), &bci, &aci, + &cullUniformBuffer_[i], &cullUniformAlloc_[i], &ai); + cullUniformMapped_[i] = ai.pMappedData; + } + // Input SSBO (per-instance cull data) + { + VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bci.size = inputSize; + bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VmaAllocationCreateInfo aci{}; + aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo ai{}; + vmaCreateBuffer(ctx->getAllocator(), &bci, &aci, + &cullInputBuffer_[i], &cullInputAlloc_[i], &ai); + cullInputMapped_[i] = ai.pMappedData; + } + // Output SSBO (visibility flags — GPU writes, CPU reads) + { + VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bci.size = outputSize; + bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + VmaAllocationCreateInfo aci{}; + aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo ai{}; + vmaCreateBuffer(ctx->getAllocator(), &bci, &aci, + &cullOutputBuffer_[i], &cullOutputAlloc_[i], &ai); + cullOutputMapped_[i] = ai.pMappedData; + } + + // Allocate and write descriptor set + VkDescriptorSetAllocateInfo setAi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + setAi.descriptorPool = cullDescPool_; + setAi.descriptorSetCount = 1; + setAi.pSetLayouts = &cullSetLayout_; + vkAllocateDescriptorSets(device, &setAi, &cullSet_[i]); + + VkDescriptorBufferInfo uboInfo{cullUniformBuffer_[i], 0, uniformSize}; + VkDescriptorBufferInfo inputInfo{cullInputBuffer_[i], 0, inputSize}; + VkDescriptorBufferInfo outputInfo{cullOutputBuffer_[i], 0, outputSize}; + + VkWriteDescriptorSet writes[3] = {}; + writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + writes[0].dstSet = cullSet_[i]; + writes[0].dstBinding = 0; + writes[0].descriptorCount = 1; + writes[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + writes[0].pBufferInfo = &uboInfo; + + writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + writes[1].dstSet = cullSet_[i]; + writes[1].dstBinding = 1; + writes[1].descriptorCount = 1; + writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[1].pBufferInfo = &inputInfo; + + writes[2] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; + writes[2].dstSet = cullSet_[i]; + writes[2].dstBinding = 2; + writes[2].descriptorCount = 1; + writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[2].pBufferInfo = &outputInfo; + + vkUpdateDescriptorSets(device, 3, writes, 0, nullptr); + } + } + // --- Pipeline layouts --- - // Main M2 pipeline layout: set 0 = perFrame, set 1 = material, set 2 = bones - // Push constant: mat4 model + vec2 uvOffset + int texCoordSet + int useBones = 80 bytes + // Main M2 pipeline layout: set 0 = perFrame, set 1 = material, set 2 = bones, set 3 = instances + // Push constant: int texCoordSet + int isFoliage + int instanceDataOffset (12 bytes) { - VkDescriptorSetLayout setLayouts[] = {perFrameLayout, materialSetLayout_, boneSetLayout_}; + VkDescriptorSetLayout setLayouts[] = {perFrameLayout, materialSetLayout_, boneSetLayout_, instanceSetLayout_}; VkPushConstantRange pushRange{}; pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; pushRange.offset = 0; - pushRange.size = 88; // mat4(64) + vec2(8) + int(4) + int(4) + int(4) + float(4) + pushRange.size = 12; // int texCoordSet + int isFoliage + int instanceDataOffset VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; - ci.setLayoutCount = 3; + ci.setLayoutCount = 4; ci.pSetLayouts = setLayouts; ci.pushConstantRangeCount = 1; ci.pPushConstantRanges = &pushRange; @@ -513,7 +752,9 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout {4, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)}, // boneIndices (float) }; - auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite) -> VkPipeline { + // Pipeline derivatives — opaque is the base, others derive from it for shared state optimization + auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite, + VkPipelineCreateFlags flags = 0, VkPipeline basePipeline = VK_NULL_HANDLE) -> VkPipeline { return PipelineBuilder() .setShaders(m2Vert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), m2Frag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -526,13 +767,19 @@ bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}) + .setFlags(flags) + .setBasePipeline(basePipeline) .build(device, vkCtx_->getPipelineCache()); }; - opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true); - alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true); - alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false); - additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false); + opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true, + VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT); + alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true, + VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_); + alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false, + VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_); + additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false, + VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_); // --- Build particle pipelines --- if (particleVert.isValid() && particleFrag.isValid()) { @@ -805,10 +1052,38 @@ void M2Renderer::shutdown() { if (dummyBoneBuffer_) { vmaDestroyBuffer(alloc, dummyBoneBuffer_, dummyBoneAlloc_); dummyBoneBuffer_ = VK_NULL_HANDLE; } // dummyBoneSet_ is freed implicitly when boneDescPool_ is destroyed dummyBoneSet_ = VK_NULL_HANDLE; + // Mega bone SSBO cleanup (sets freed implicitly with boneDescPool_) + for (int i = 0; i < 2; i++) { + if (megaBoneBuffer_[i]) { vmaDestroyBuffer(alloc, megaBoneBuffer_[i], megaBoneAlloc_[i]); megaBoneBuffer_[i] = VK_NULL_HANDLE; } + megaBoneMapped_[i] = nullptr; + megaBoneSet_[i] = VK_NULL_HANDLE; + } if (materialDescPool_) { vkDestroyDescriptorPool(device, materialDescPool_, nullptr); materialDescPool_ = VK_NULL_HANDLE; } if (boneDescPool_) { vkDestroyDescriptorPool(device, boneDescPool_, nullptr); boneDescPool_ = VK_NULL_HANDLE; } + // Phase 2.1: Instance data SSBO cleanup (sets freed with instanceDescPool_) + for (int i = 0; i < 2; i++) { + if (instanceBuffer_[i]) { vmaDestroyBuffer(alloc, instanceBuffer_[i], instanceAlloc_[i]); instanceBuffer_[i] = VK_NULL_HANDLE; } + instanceMapped_[i] = nullptr; + instanceSet_[i] = VK_NULL_HANDLE; + } + if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; } + + // Phase 2.3: GPU frustum culling compute pipeline + buffers cleanup + if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; } + if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; } + for (int i = 0; i < 2; i++) { + if (cullUniformBuffer_[i]) { vmaDestroyBuffer(alloc, cullUniformBuffer_[i], cullUniformAlloc_[i]); cullUniformBuffer_[i] = VK_NULL_HANDLE; } + if (cullInputBuffer_[i]) { vmaDestroyBuffer(alloc, cullInputBuffer_[i], cullInputAlloc_[i]); cullInputBuffer_[i] = VK_NULL_HANDLE; } + if (cullOutputBuffer_[i]) { vmaDestroyBuffer(alloc, cullOutputBuffer_[i], cullOutputAlloc_[i]); cullOutputBuffer_[i] = VK_NULL_HANDLE; } + cullUniformMapped_[i] = cullInputMapped_[i] = cullOutputMapped_[i] = nullptr; + cullSet_[i] = VK_NULL_HANDLE; + } + if (cullDescPool_) { vkDestroyDescriptorPool(device, cullDescPool_, nullptr); cullDescPool_ = VK_NULL_HANDLE; } + if (cullSetLayout_) { vkDestroyDescriptorSetLayout(device, cullSetLayout_, nullptr); cullSetLayout_ = VK_NULL_HANDLE; } + if (materialSetLayout_) { vkDestroyDescriptorSetLayout(device, materialSetLayout_, nullptr); materialSetLayout_ = VK_NULL_HANDLE; } if (boneSetLayout_) { vkDestroyDescriptorSetLayout(device, boneSetLayout_, nullptr); boneSetLayout_ = VK_NULL_HANDLE; } + if (instanceSetLayout_) { vkDestroyDescriptorSetLayout(device, instanceSetLayout_, nullptr); instanceSetLayout_ = VK_NULL_HANDLE; } if (particleTexLayout_) { vkDestroyDescriptorSetLayout(device, particleTexLayout_, nullptr); particleTexLayout_ = VK_NULL_HANDLE; } // Destroy shadow resources @@ -2212,47 +2487,117 @@ void M2Renderer::prepareRender(uint32_t frameIndex, const Camera& camera) { if (!initialized_ || instances.empty()) return; (void)camera; // reserved for future frustum-based culling - // Pre-allocate bone SSBOs + descriptor sets on main thread (pool ops not thread-safe). - // Only iterate animated instances — static doodads don't need bone buffers. + // --- Mega bone SSBO: assign slots and upload all animated instance bones --- + // Slot 0 = identity (non-animated), slots 1..N = animated instances. + uint32_t nextSlot = 1; for (size_t idx : animatedInstanceIndices_) { if (idx >= instances.size()) continue; auto& instance = instances[idx]; - if (instance.boneMatrices.empty()) continue; + if (instance.boneMatrices.empty()) { + instance.megaBoneOffset = 0; // Use identity slot + continue; + } - if (!instance.boneBuffer[frameIndex]) { - VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; - bci.size = 128 * sizeof(glm::mat4); - bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - VmaAllocationCreateInfo aci{}; - aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; - aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - VmaAllocationInfo allocInfo{}; - vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, - &instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo); - instance.boneMapped[frameIndex] = allocInfo.pMappedData; + if (nextSlot >= MEGA_BONE_MAX_INSTANCES) { + instance.megaBoneOffset = 0; // Overflow — use identity + continue; + } - // Force dirty so current boneMatrices get copied into this - // newly-allocated buffer during render (prevents garbage/zero - // bones when the other frame index already cleared bonesDirty). - instance.bonesDirty[frameIndex] = true; + instance.megaBoneOffset = nextSlot * MAX_BONES_PER_INSTANCE; - instance.boneSet[frameIndex] = allocateBoneSet(); - if (instance.boneSet[frameIndex]) { - VkDescriptorBufferInfo bufInfo{}; - bufInfo.buffer = instance.boneBuffer[frameIndex]; - bufInfo.offset = 0; - bufInfo.range = bci.size; - VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET}; - write.dstSet = instance.boneSet[frameIndex]; - write.dstBinding = 0; - write.descriptorCount = 1; - write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - write.pBufferInfo = &bufInfo; - vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr); + // Upload bone matrices to mega buffer + if (megaBoneMapped_[frameIndex]) { + int numBones = std::min(static_cast(instance.boneMatrices.size()), + static_cast(MAX_BONES_PER_INSTANCE)); + auto* dst = static_cast(megaBoneMapped_[frameIndex]) + instance.megaBoneOffset; + memcpy(dst, instance.boneMatrices.data(), numBones * sizeof(glm::mat4)); + } + + nextSlot++; + } +} + +// Phase 2.3: Dispatch GPU frustum culling compute shader. +// Called on the primary command buffer BEFORE the render pass begins so that +// compute dispatch and memory barrier complete before secondary command buffers +// read the visibility output in render(). +void M2Renderer::dispatchCullCompute(VkCommandBuffer cmd, uint32_t frameIndex, const Camera& camera) { + if (!cullPipeline_ || instances.empty()) return; + + const uint32_t numInstances = std::min(static_cast(instances.size()), MAX_CULL_INSTANCES); + + // --- Compute per-instance adaptive distances (same formula as old CPU cull) --- + const float targetRenderDist = (instances.size() > 2000) ? 300.0f + : (instances.size() > 1000) ? 500.0f + : 1000.0f; + const float shrinkRate = 0.005f; + const float growRate = 0.05f; + float blendRate = (targetRenderDist < smoothedRenderDist_) ? shrinkRate : growRate; + smoothedRenderDist_ = glm::mix(smoothedRenderDist_, targetRenderDist, blendRate); + const float maxRenderDistance = smoothedRenderDist_; + const float maxRenderDistanceSq = maxRenderDistance * maxRenderDistance; + const float maxPossibleDistSq = maxRenderDistanceSq * 4.0f; // 2x safety margin + + // --- Upload frustum planes + camera (UBO, binding 0) --- + const glm::mat4 vp = camera.getProjectionMatrix() * camera.getViewMatrix(); + Frustum frustum; + frustum.extractFromMatrix(vp); + const glm::vec3 camPos = camera.getPosition(); + + if (cullUniformMapped_[frameIndex]) { + auto* ubo = static_cast(cullUniformMapped_[frameIndex]); + for (int i = 0; i < 6; i++) { + const auto& p = frustum.getPlane(static_cast(i)); + ubo->frustumPlanes[i] = glm::vec4(p.normal, p.distance); + } + ubo->cameraPos = glm::vec4(camPos, maxPossibleDistSq); + ubo->instanceCount = numInstances; + } + + // --- Upload per-instance cull data (SSBO, binding 1) --- + if (cullInputMapped_[frameIndex]) { + auto* input = static_cast(cullInputMapped_[frameIndex]); + for (uint32_t i = 0; i < numInstances; i++) { + const auto& inst = instances[i]; + float worldRadius = inst.cachedBoundRadius * inst.scale; + float cullRadius = worldRadius; + if (inst.cachedDisableAnimation) { + cullRadius = std::max(cullRadius, 3.0f); } + float effectiveMaxDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f); + if (inst.cachedDisableAnimation) effectiveMaxDistSq *= 2.6f; + if (inst.cachedIsGroundDetail) effectiveMaxDistSq *= 0.9f; + + float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f); + + uint32_t flags = 0; + if (inst.cachedIsValid) flags |= 1u; + if (inst.cachedIsSmoke) flags |= 2u; + if (inst.cachedIsInvisibleTrap) flags |= 4u; + + input[i].sphere = glm::vec4(inst.position, paddedRadius); + input[i].effectiveMaxDistSq = effectiveMaxDistSq; + input[i].flags = flags; } } + + // --- Dispatch compute shader --- + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, cullPipeline_); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + cullPipelineLayout_, 0, 1, &cullSet_[frameIndex], 0, nullptr); + + const uint32_t groupCount = (numInstances + 63) / 64; + vkCmdDispatch(cmd, groupCount, 1, 1); + + // --- Memory barrier: compute writes → host reads --- + VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER}; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; + vkCmdPipelineBarrier(cmd, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_HOST_BIT, + 0, 1, &barrier, 0, nullptr, 0, nullptr); } void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera) { @@ -2267,71 +2612,86 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const LOG_INFO("M2 render: ", instances.size(), " instances, ", models.size(), " models"); } - // Build frustum for culling - const glm::mat4 view = camera.getViewMatrix(); - const glm::mat4 projection = camera.getProjectionMatrix(); - Frustum frustum; - frustum.extractFromMatrix(projection * view); - // Reuse persistent buffers (clear instead of reallocating) glowSprites_.clear(); lastDrawCallCount = 0; - // Adaptive render distance: smoothed to prevent pop-in/pop-out flickering - const float targetRenderDist = (instances.size() > 2000) ? 300.0f - : (instances.size() > 1000) ? 500.0f - : 1000.0f; - // Smooth transitions: shrink slowly (avoid popping out nearby objects) - const float shrinkRate = 0.005f; // very slow decrease - const float growRate = 0.05f; // faster increase - float blendRate = (targetRenderDist < smoothedRenderDist_) ? shrinkRate : growRate; - smoothedRenderDist_ = glm::mix(smoothedRenderDist_, targetRenderDist, blendRate); - const float maxRenderDistance = smoothedRenderDist_; - const float maxRenderDistanceSq = maxRenderDistance * maxRenderDistance; + // Phase 2.3: GPU cull results — dispatchCullCompute() already updated smoothedRenderDist_. + // Use the cached value (set by dispatchCullCompute or fallback below). + const uint32_t frameIndex = vkCtx_->getCurrentFrame(); + const uint32_t numInstances = std::min(static_cast(instances.size()), MAX_CULL_INSTANCES); + const uint32_t* visibility = static_cast(cullOutputMapped_[frameIndex]); + const bool gpuCullAvailable = (cullPipeline_ != VK_NULL_HANDLE && visibility != nullptr); + + // If GPU culling was not dispatched, fallback: compute distances on CPU + float maxRenderDistanceSq; + if (!gpuCullAvailable) { + const float targetRenderDist = (instances.size() > 2000) ? 300.0f + : (instances.size() > 1000) ? 500.0f + : 1000.0f; + const float shrinkRate = 0.005f; + const float growRate = 0.05f; + float blendRate = (targetRenderDist < smoothedRenderDist_) ? shrinkRate : growRate; + smoothedRenderDist_ = glm::mix(smoothedRenderDist_, targetRenderDist, blendRate); + maxRenderDistanceSq = smoothedRenderDist_ * smoothedRenderDist_; + } else { + maxRenderDistanceSq = smoothedRenderDist_ * smoothedRenderDist_; + } + const float fadeStartFraction = 0.75f; const glm::vec3 camPos = camera.getPosition(); - // Build sorted visible instance list: cull then sort by modelId to batch VAO binds - // Reuse persistent vector to avoid allocation + // Build sorted visible instance list sortedVisible_.clear(); - // Reserve based on expected visible count (roughly 30% of total instances in dense areas) const size_t expectedVisible = std::min(instances.size() / 3, size_t(600)); if (sortedVisible_.capacity() < expectedVisible) { sortedVisible_.reserve(expectedVisible); } - // Early distance rejection: max possible render distance (tight but safe upper bound) - const float maxPossibleDistSq = maxRenderDistance * maxRenderDistance * 4.0f; // 2x safety margin (reduced from 4x) + // Phase 2.3: GPU frustum culling — build frustum only for CPU fallback path + Frustum frustum; + if (!gpuCullAvailable) { + const glm::mat4 vp = camera.getProjectionMatrix() * camera.getViewMatrix(); + frustum.extractFromMatrix(vp); + } + const float maxPossibleDistSq = maxRenderDistanceSq * 4.0f; - for (uint32_t i = 0; i < static_cast(instances.size()); ++i) { + for (uint32_t i = 0; i < numInstances; ++i) { const auto& instance = instances[i]; - // Use cached model flags — no hash lookup needed - if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue; + if (gpuCullAvailable) { + // Phase 2.3: GPU already tested flags + distance + frustum + if (!visibility[i]) continue; + } else { + // CPU fallback: same culling logic as before Phase 2.3 + if (!instance.cachedIsValid || instance.cachedIsSmoke || instance.cachedIsInvisibleTrap) continue; + glm::vec3 toCam = instance.position - camPos; + float distSqTest = glm::dot(toCam, toCam); + if (distSqTest > maxPossibleDistSq) continue; + + float worldRadius = instance.cachedBoundRadius * instance.scale; + float cullRadius = worldRadius; + if (instance.cachedDisableAnimation) cullRadius = std::max(cullRadius, 3.0f); + float effDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f); + if (instance.cachedDisableAnimation) effDistSq *= 2.6f; + if (instance.cachedIsGroundDetail) effDistSq *= 0.9f; + if (distSqTest > effDistSq) continue; + + float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f); + if (cullRadius > 0.0f && !frustum.intersectsSphere(instance.position, paddedRadius)) continue; + } + + // Compute distSq + effectiveMaxDistSq for sorting and fade alpha (cheap for visible-only) glm::vec3 toCam = instance.position - camPos; float distSq = glm::dot(toCam, toCam); - if (distSq > maxPossibleDistSq) continue; - float worldRadius = instance.cachedBoundRadius * instance.scale; float cullRadius = worldRadius; - if (instance.cachedDisableAnimation) { - cullRadius = std::max(cullRadius, 3.0f); - } + if (instance.cachedDisableAnimation) cullRadius = std::max(cullRadius, 3.0f); float effectiveMaxDistSq = maxRenderDistanceSq * std::max(1.0f, cullRadius / 12.0f); - if (instance.cachedDisableAnimation) { - effectiveMaxDistSq *= 2.6f; - } - if (instance.cachedIsGroundDetail) { - effectiveMaxDistSq *= 0.75f; - } - - if (distSq > effectiveMaxDistSq) continue; - - // Frustum cull with padding - float paddedRadius = std::max(cullRadius * 1.5f, cullRadius + 3.0f); - if (cullRadius > 0.0f && !frustum.intersectsSphere(instance.position, paddedRadius)) continue; + if (instance.cachedDisableAnimation) effectiveMaxDistSq *= 2.6f; + if (instance.cachedIsGroundDetail) effectiveMaxDistSq *= 0.9f; sortedVisible_.push_back({i, instance.modelId, distSq, effectiveMaxDistSq}); } @@ -2351,17 +2711,12 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const // State tracking VkPipeline currentPipeline = VK_NULL_HANDLE; VkDescriptorSet currentMaterialSet = VK_NULL_HANDLE; - VkDescriptorSet currentBoneSet = VK_NULL_HANDLE; - uint32_t frameIndex = vkCtx_->getCurrentFrame(); - // Push constants struct matching m2.vert.glsl push_constant block + // Phase 2.1: Push constants now carry per-batch data only; per-instance data is in instance SSBO. struct M2PushConstants { - glm::mat4 model; - glm::vec2 uvOffset; - int texCoordSet; - int useBones; - int isFoliage; - float fadeAlpha; + int32_t texCoordSet; // UV set index (0 or 1) + int32_t isFoliage; // Foliage wind animation flag + int32_t instanceDataOffset; // Base index into instance SSBO for this draw group }; // Validate per-frame descriptor set before any Vulkan commands @@ -2377,311 +2732,338 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const // Start with opaque pipeline vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, opaquePipeline_); currentPipeline = opaquePipeline_; - bool opaquePass = true; // Pass 1 = opaque, pass 2 = transparent (set below for second pass) // Bind dummy bone set (set 2) so non-animated draws have a valid binding. - // Animated instances override this with their real bone set per-instance. - if (dummyBoneSet_) { + // Phase 2.4: Bind mega bone SSBO instead — all instances index into one buffer via boneBase. + if (megaBoneSet_[frameIndex]) { + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipelineLayout_, 2, 1, &megaBoneSet_[frameIndex], 0, nullptr); + } else if (dummyBoneSet_) { vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout_, 2, 1, &dummyBoneSet_, 0, nullptr); } - for (const auto& entry : sortedVisible_) { - if (entry.index >= instances.size()) continue; - auto& instance = instances[entry.index]; + // Phase 2.1: Bind instance data SSBO (set 3) — per-instance transforms, fade, bones + if (instanceSet_[frameIndex]) { + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipelineLayout_, 3, 1, &instanceSet_[frameIndex], 0, nullptr); + } - // Bind vertex + index buffers once per model group - if (entry.modelId != currentModelId) { - currentModelId = entry.modelId; - currentModelValid = false; - auto mdlIt = models.find(currentModelId); - if (mdlIt == models.end()) continue; - currentModel = &mdlIt->second; - if (!currentModel->vertexBuffer || !currentModel->indexBuffer) continue; - currentModelValid = true; - VkDeviceSize offset = 0; - vkCmdBindVertexBuffers(cmd, 0, 1, ¤tModel->vertexBuffer, &offset); - vkCmdBindIndexBuffer(cmd, currentModel->indexBuffer, 0, VK_INDEX_TYPE_UINT16); - } - if (!currentModelValid) continue; + // Phase 2.1: Reset instance SSBO write cursor for this frame + instanceDataCount_ = 0; + auto* instSSBO = static_cast(instanceMapped_[frameIndex]); - const M2ModelGPU& model = *currentModel; + // ===================================================================== + // Phase 2.1: Opaque pass — instanced draws grouped by (modelId, LOD) + // ===================================================================== + // sortedVisible_ is already sorted by modelId so consecutive entries share + // the same vertex/index buffer. Within each model group we sub-group by + // targetLOD to guarantee all instances in one vkCmdDrawIndexed use the + // same batch set. Per-instance data (model matrix, fade, bones) is + // written to the instance SSBO; the shader reads it via gl_InstanceIndex. + { + struct PendingInstance { + uint32_t instanceIdx; + float fadeAlpha; + bool useBones; + uint16_t targetLOD; + }; + std::vector pending; + pending.reserve(128); - // Distance-based fade alpha for smooth pop-in (squared-distance, no sqrt) - float fadeAlpha = 1.0f; - float fadeFrac = model.disableAnimation ? 0.55f : fadeStartFraction; - float fadeStartDistSq = entry.effectiveMaxDistSq * fadeFrac * fadeFrac; - if (entry.distSq > fadeStartDistSq) { - fadeAlpha = std::clamp((entry.effectiveMaxDistSq - entry.distSq) / - (entry.effectiveMaxDistSq - fadeStartDistSq), 0.0f, 1.0f); - } + size_t visStart = 0; + while (visStart < sortedVisible_.size()) { + // Find group of consecutive entries with same modelId + uint32_t groupModelId = sortedVisible_[visStart].modelId; + size_t groupEnd = visStart; + while (groupEnd < sortedVisible_.size() && sortedVisible_[groupEnd].modelId == groupModelId) + groupEnd++; - float instanceFadeAlpha = fadeAlpha; - if (model.isGroundDetail) { - instanceFadeAlpha *= 0.82f; - } - if (model.isInstancePortal) { - // Render mesh at low alpha + emit glow sprite at center - instanceFadeAlpha *= 0.12f; - if (entry.distSq < 400.0f * 400.0f) { - glm::vec3 center = glm::vec3(instance.modelMatrix * glm::vec4(0.0f, 0.0f, 0.0f, 1.0f)); - GlowSprite gs; - gs.worldPos = center; - gs.color = glm::vec4(0.35f, 0.5f, 1.0f, 1.1f); - gs.size = instance.scale * 5.0f; - glowSprites_.push_back(gs); - GlowSprite halo = gs; - halo.color.a *= 0.3f; - halo.size *= 2.2f; - glowSprites_.push_back(halo); - } - } - - // Upload bone matrices to SSBO if model has skeletal animation. - // Skip animated instances entirely until bones are computed + buffers allocated - // to prevent bind-pose/T-pose flash on first appearance. - bool modelNeedsAnimation = model.hasAnimation && !model.disableAnimation; - if (modelNeedsAnimation && instance.boneMatrices.empty()) { - continue; // Bones not yet computed — skip to avoid bind-pose flash - } - bool needsBones = modelNeedsAnimation && !instance.boneMatrices.empty(); - if (needsBones && (!instance.boneBuffer[frameIndex] || !instance.boneSet[frameIndex])) { - continue; // Bone buffers not yet allocated — skip to avoid bind-pose flash - } - bool useBones = needsBones; - if (useBones) { - // Upload bone matrices only when recomputed (per-frame-index tracking - // ensures both double-buffered SSBOs get the latest bone data) - if (instance.bonesDirty[frameIndex] && instance.boneMapped[frameIndex]) { - int numBones = std::min(static_cast(instance.boneMatrices.size()), 128); - memcpy(instance.boneMapped[frameIndex], instance.boneMatrices.data(), - numBones * sizeof(glm::mat4)); - instance.bonesDirty[frameIndex] = false; - } - - // Bind bone descriptor set (set 2) — skip if already bound - if (instance.boneSet[frameIndex] && instance.boneSet[frameIndex] != currentBoneSet) { - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipelineLayout_, 2, 1, &instance.boneSet[frameIndex], 0, nullptr); - currentBoneSet = instance.boneSet[frameIndex]; - } - } - - // LOD selection based on squared distance (avoid sqrt) - uint16_t desiredLOD = 0; - if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3; - else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2; - else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1; - - uint16_t targetLOD = desiredLOD; - if (desiredLOD > 0 && !(model.availableLODs & (1u << desiredLOD))) { - targetLOD = 0; - } - - const bool foliageLikeModel = model.isFoliageLike; - // Particle-dominant spell effects: mesh is emission geometry, render dim - const bool particleDominantEffect = model.isSpellEffect && - !model.particleEmitters.empty() && model.batches.size() <= 2; - for (const auto& batch : model.batches) { - if (batch.indexCount == 0) continue; - if (!model.isGroundDetail && batch.submeshLevel != targetLOD) continue; - if (batch.batchOpacity < 0.01f) continue; - - // Two-pass gate: pass 1 = opaque/cutout only, pass 2 = transparent/additive only. - // Alpha-test (blendMode==1) and spell effects that force-additive are handled - // by their effective blend mode below; gate on raw blendMode here. - { - const bool rawTransparent = (batch.blendMode >= 2) || model.isSpellEffect; - if (opaquePass && rawTransparent) continue; // skip transparent in opaque pass - if (!opaquePass && !rawTransparent) continue; // skip opaque in transparent pass - } - - const bool koboldFlameCard = batch.colorKeyBlack && model.isKoboldFlame; - const bool smallCardLikeBatch = - (batch.glowSize <= 1.35f) || - (batch.lanternGlowHint && batch.glowSize <= 6.0f); - const bool batchUnlit = (batch.materialFlags & 0x01) != 0; - const bool elvenLikeModel = model.isElvenLike; - const bool lanternLikeModel = model.isLanternLike; - const bool shouldUseGlowSprite = - !koboldFlameCard && - (elvenLikeModel || (lanternLikeModel && batch.lanternGlowHint)) && - !model.isSpellEffect && - smallCardLikeBatch && - (batch.lanternGlowHint || - (batch.blendMode >= 3) || - (batch.colorKeyBlack && batchUnlit && batch.blendMode >= 1)); - if (shouldUseGlowSprite) { - if (entry.distSq < 180.0f * 180.0f) { - glm::vec3 worldPos = glm::vec3(instance.modelMatrix * glm::vec4(batch.center, 1.0f)); - GlowSprite gs; - gs.worldPos = worldPos; - if (batch.glowTint == 1 || elvenLikeModel) { - gs.color = glm::vec4(0.48f, 0.72f, 1.0f, 1.05f); - } else if (batch.glowTint == 2) { - gs.color = glm::vec4(1.0f, 0.28f, 0.22f, 1.10f); - } else { - gs.color = glm::vec4(1.0f, 0.82f, 0.46f, 1.15f); - } - gs.size = batch.glowSize * instance.scale * 1.45f; - glowSprites_.push_back(gs); - GlowSprite halo = gs; - halo.color.a *= 0.42f; - halo.size *= 1.8f; - glowSprites_.push_back(halo); - } - const bool cardLikeSkipMesh = - (batch.blendMode >= 3) || - batch.colorKeyBlack || - ((batch.materialFlags & 0x01) != 0); - const bool lanternGlowCardSkip = - lanternLikeModel && - batch.lanternGlowHint && - smallCardLikeBatch && - cardLikeSkipMesh; - if (lanternGlowCardSkip || (cardLikeSkipMesh && !lanternLikeModel)) { - continue; - } - } - - // Compute UV offset for texture animation - glm::vec2 uvOffset(0.0f, 0.0f); - if (batch.textureAnimIndex != 0xFFFF && model.hasTextureAnimation) { - uint16_t lookupIdx = batch.textureAnimIndex; - if (lookupIdx < model.textureTransformLookup.size()) { - uint16_t transformIdx = model.textureTransformLookup[lookupIdx]; - if (transformIdx < model.textureTransforms.size()) { - const auto& tt = model.textureTransforms[transformIdx]; - glm::vec3 trans = interpVec3(tt.translation, - instance.currentSequenceIndex, instance.animTime, - glm::vec3(0.0f), model.globalSequenceDurations); - uvOffset = glm::vec2(trans.x, trans.y); - } - } - } - // Lava M2 models: fallback UV scroll if no texture animation. - // Uses kLavaAnimStart (file-scope) for consistent timing across passes. - if (model.isLavaModel && uvOffset == glm::vec2(0.0f)) { - float t = std::chrono::duration(std::chrono::steady_clock::now() - kLavaAnimStart).count(); - uvOffset = glm::vec2(t * 0.03f, -t * 0.08f); - } - - // Foliage/card-like batches render more stably as cutout (depth-write on) - // instead of alpha-blended sorting. - const bool foliageCutout = - foliageLikeModel && - !model.isSpellEffect && - batch.blendMode <= 3; - const bool forceCutout = - !model.isSpellEffect && - (model.isGroundDetail || - foliageCutout || - batch.blendMode == 1 || - (batch.blendMode >= 2 && !batch.hasAlpha) || - batch.colorKeyBlack); - - // Select pipeline based on blend mode - uint8_t effectiveBlendMode = batch.blendMode; - if (model.isSpellEffect) { - // Effect models: force additive blend for opaque/cutout batches - // so the mesh renders as a transparent glow, not a solid object - if (effectiveBlendMode <= 1) { - effectiveBlendMode = 3; // additive - } else if (effectiveBlendMode == 4 || effectiveBlendMode == 5) { - effectiveBlendMode = 3; - } - } - if (forceCutout) { - effectiveBlendMode = 1; - } - - VkPipeline desiredPipeline; - if (forceCutout) { - // Use opaque pipeline + shader discard for stable foliage cards. - desiredPipeline = opaquePipeline_; - } else { - switch (effectiveBlendMode) { - case 0: desiredPipeline = opaquePipeline_; break; - case 1: desiredPipeline = alphaTestPipeline_; break; - case 2: desiredPipeline = alphaPipeline_; break; - default: desiredPipeline = additivePipeline_; break; - } - } - if (desiredPipeline != currentPipeline) { - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, desiredPipeline); - currentPipeline = desiredPipeline; - } - - // Update material UBO with per-draw dynamic values (interiorDarken, forceCutout overrides) - // Note: fadeAlpha is in push constants (per-draw) to avoid shared-UBO race - if (batch.materialUBOMapped) { - auto* mat = static_cast(batch.materialUBOMapped); - mat->interiorDarken = insideInterior ? 1.0f : 0.0f; - if (batch.colorKeyBlack) { - mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f; - } - if (forceCutout) { - mat->alphaTest = model.isGroundDetail ? 3 : (foliageCutout ? 2 : 1); - if (model.isGroundDetail) { - mat->unlit = 0; - } - } - } - - // Bind material descriptor set (set 1) — skip batch if missing - // to avoid inheriting a stale descriptor set from a prior renderer - if (!batch.materialSet) continue; - if (batch.materialSet != currentMaterialSet) { - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipelineLayout_, 1, 1, &batch.materialSet, 0, nullptr); - currentMaterialSet = batch.materialSet; - } - - // Push constants - M2PushConstants pc; - pc.model = instance.modelMatrix; - pc.uvOffset = uvOffset; - pc.texCoordSet = static_cast(batch.textureUnit); - pc.useBones = useBones ? 1 : 0; - pc.isFoliage = model.shadowWindFoliage ? 1 : 0; - pc.fadeAlpha = instanceFadeAlpha; - // Particle-dominant effects: mesh is emission geometry, don't render - if (particleDominantEffect && batch.blendMode <= 1) { + auto mdlIt = models.find(groupModelId); + if (mdlIt == models.end() || !mdlIt->second.vertexBuffer || !mdlIt->second.indexBuffer) { + visStart = groupEnd; continue; } - vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc); - vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0); - lastDrawCallCount++; + const M2ModelGPU& model = mdlIt->second; + + bool modelNeedsAnimation = model.hasAnimation && !model.disableAnimation; + const bool foliageLikeModel = model.isFoliageLike; + const bool particleDominantEffect = model.isSpellEffect && + !model.particleEmitters.empty() && model.batches.size() <= 2; + + // Collect per-instance data for this model group + pending.clear(); + for (size_t vi = visStart; vi < groupEnd; vi++) { + const auto& entry = sortedVisible_[vi]; + if (entry.index >= instances.size()) continue; + auto& instance = instances[entry.index]; + + // Distance-based fade alpha + float fadeFrac = model.disableAnimation ? 0.55f : fadeStartFraction; + float fadeStartDistSq = entry.effectiveMaxDistSq * fadeFrac * fadeFrac; + float fadeAlpha = 1.0f; + if (entry.distSq > fadeStartDistSq) { + fadeAlpha = std::clamp((entry.effectiveMaxDistSq - entry.distSq) / + (entry.effectiveMaxDistSq - fadeStartDistSq), 0.0f, 1.0f); + } + float instanceFadeAlpha = fadeAlpha; + if (model.isGroundDetail) instanceFadeAlpha *= 0.82f; + if (model.isInstancePortal) { + instanceFadeAlpha *= 0.12f; + if (entry.distSq < 400.0f * 400.0f) { + glm::vec3 center = glm::vec3(instance.modelMatrix * glm::vec4(0.0f, 0.0f, 0.0f, 1.0f)); + GlowSprite gs; + gs.worldPos = center; + gs.color = glm::vec4(0.35f, 0.5f, 1.0f, 1.1f); + gs.size = instance.scale * 5.0f; + glowSprites_.push_back(gs); + GlowSprite halo = gs; + halo.color.a *= 0.3f; + halo.size *= 2.2f; + glowSprites_.push_back(halo); + } + } + + // Bone readiness check + if (modelNeedsAnimation && instance.boneMatrices.empty()) continue; + bool needsBones = modelNeedsAnimation && !instance.boneMatrices.empty(); + if (needsBones && instance.megaBoneOffset == 0) continue; + + // LOD selection + uint16_t desiredLOD = 0; + if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3; + else if (entry.distSq > 80.0f * 80.0f) desiredLOD = 2; + else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1; + uint16_t targetLOD = desiredLOD; + if (desiredLOD > 0 && !(model.availableLODs & (1u << desiredLOD))) targetLOD = 0; + + pending.push_back({entry.index, instanceFadeAlpha, needsBones, targetLOD}); + } + + if (pending.empty()) { visStart = groupEnd; continue; } + + // Sort by targetLOD so each sub-group occupies a contiguous SSBO range + std::sort(pending.begin(), pending.end(), + [](const PendingInstance& a, const PendingInstance& b) { return a.targetLOD < b.targetLOD; }); + + // Bind vertex/index buffers once per model group + VkDeviceSize vbOffset = 0; + vkCmdBindVertexBuffers(cmd, 0, 1, &model.vertexBuffer, &vbOffset); + vkCmdBindIndexBuffer(cmd, model.indexBuffer, 0, VK_INDEX_TYPE_UINT16); + + // Write base instance data to SSBO (uvOffset=0 — overridden for tex-anim batches) + uint32_t baseSSBOOffset = instanceDataCount_; + for (const auto& p : pending) { + if (instanceDataCount_ >= MAX_INSTANCE_DATA) break; + auto& inst = instances[p.instanceIdx]; + auto& e = instSSBO[instanceDataCount_]; + e.model = inst.modelMatrix; + e.uvOffset = glm::vec2(0.0f); + e.fadeAlpha = p.fadeAlpha; + e.useBones = p.useBones ? 1 : 0; + e.boneBase = p.useBones ? static_cast(inst.megaBoneOffset) : 0; + std::memset(e._pad, 0, sizeof(e._pad)); + instanceDataCount_++; + } + + // Process LOD sub-groups within this model group + size_t lodIdx = 0; + while (lodIdx < pending.size()) { + uint16_t lod = pending[lodIdx].targetLOD; + size_t lodEnd = lodIdx + 1; + while (lodEnd < pending.size() && pending[lodEnd].targetLOD == lod) lodEnd++; + uint32_t groupSize = static_cast(lodEnd - lodIdx); + uint32_t groupSSBOOffset = baseSSBOOffset + static_cast(lodIdx); + + for (size_t bi = 0; bi < model.batches.size(); bi++) { + const auto& batch = model.batches[bi]; + if (batch.indexCount == 0) continue; + if (!model.isGroundDetail && batch.submeshLevel != lod) continue; + if (batch.batchOpacity < 0.01f) continue; + + // Opaque gate — skip transparent batches + const bool rawTransparent = (batch.blendMode >= 2) || model.isSpellEffect; + if (rawTransparent) continue; + + // Particle-dominant effects: emission geometry — skip opaque + if (particleDominantEffect && batch.blendMode <= 1) continue; + + // Glow sprite check (per model+batch, sprites generated per instance) + const bool koboldFlameCard = batch.colorKeyBlack && model.isKoboldFlame; + const bool smallCardLikeBatch = + (batch.glowSize <= 1.35f) || + (batch.lanternGlowHint && batch.glowSize <= 6.0f); + const bool batchUnlit = (batch.materialFlags & 0x01) != 0; + const bool shouldUseGlowSprite = + !koboldFlameCard && + (model.isElvenLike || (model.isLanternLike && batch.lanternGlowHint)) && + !model.isSpellEffect && + smallCardLikeBatch && + (batch.lanternGlowHint || + (batch.blendMode >= 3) || + (batch.colorKeyBlack && batchUnlit && batch.blendMode >= 1)); + if (shouldUseGlowSprite) { + // Generate glow sprites for each instance in the group + for (size_t j = lodIdx; j < lodEnd; j++) { + auto& inst = instances[pending[j].instanceIdx]; + float distSq = sortedVisible_[visStart].distSq; // approximate with group + if (distSq < 180.0f * 180.0f) { + glm::vec3 worldPos = glm::vec3(inst.modelMatrix * glm::vec4(batch.center, 1.0f)); + GlowSprite gs; + gs.worldPos = worldPos; + if (batch.glowTint == 1 || model.isElvenLike) + gs.color = glm::vec4(0.48f, 0.72f, 1.0f, 1.05f); + else if (batch.glowTint == 2) + gs.color = glm::vec4(1.0f, 0.28f, 0.22f, 1.10f); + else + gs.color = glm::vec4(1.0f, 0.82f, 0.46f, 1.15f); + gs.size = batch.glowSize * inst.scale * 1.45f; + glowSprites_.push_back(gs); + GlowSprite halo = gs; + halo.color.a *= 0.42f; + halo.size *= 1.8f; + glowSprites_.push_back(halo); + } + } + const bool cardLikeSkipMesh = + (batch.blendMode >= 3) || batch.colorKeyBlack || batchUnlit; + const bool lanternGlowCardSkip = + model.isLanternLike && batch.lanternGlowHint && + smallCardLikeBatch && cardLikeSkipMesh; + if (lanternGlowCardSkip || (cardLikeSkipMesh && !model.isLanternLike)) + continue; + } + + // Handle texture animation: if this batch has per-instance uvOffset, + // write a separate SSBO range with the correct offsets. + bool hasBatchTexAnim = (batch.textureAnimIndex != 0xFFFF && model.hasTextureAnimation) + || model.isLavaModel; + uint32_t drawOffset = groupSSBOOffset; + if (hasBatchTexAnim && instanceDataCount_ + groupSize <= MAX_INSTANCE_DATA) { + drawOffset = instanceDataCount_; + for (size_t j = lodIdx; j < lodEnd; j++) { + auto& inst = instances[pending[j].instanceIdx]; + glm::vec2 uvOffset(0.0f); + if (batch.textureAnimIndex != 0xFFFF && model.hasTextureAnimation) { + uint16_t lookupIdx = batch.textureAnimIndex; + if (lookupIdx < model.textureTransformLookup.size()) { + uint16_t transformIdx = model.textureTransformLookup[lookupIdx]; + if (transformIdx < model.textureTransforms.size()) { + const auto& tt = model.textureTransforms[transformIdx]; + glm::vec3 trans = interpVec3(tt.translation, + inst.currentSequenceIndex, inst.animTime, + glm::vec3(0.0f), model.globalSequenceDurations); + uvOffset = glm::vec2(trans.x, trans.y); + } + } + } + if (model.isLavaModel && uvOffset == glm::vec2(0.0f)) { + float t = std::chrono::duration( + std::chrono::steady_clock::now() - kLavaAnimStart).count(); + uvOffset = glm::vec2(t * 0.03f, -t * 0.08f); + } + // Copy base entry and override uvOffset + instSSBO[instanceDataCount_] = instSSBO[groupSSBOOffset + (j - lodIdx)]; + instSSBO[instanceDataCount_].uvOffset = uvOffset; + instanceDataCount_++; + } + } + + // Pipeline selection (per-model/batch, not per-instance) + const bool foliageCutout = foliageLikeModel && !model.isSpellEffect && batch.blendMode <= 3; + const bool forceCutout = + !model.isSpellEffect && + (model.isGroundDetail || foliageCutout || + batch.blendMode == 1 || + (batch.blendMode >= 2 && !batch.hasAlpha) || + batch.colorKeyBlack); + + uint8_t effectiveBlendMode = batch.blendMode; + if (model.isSpellEffect) { + if (effectiveBlendMode <= 1) effectiveBlendMode = 3; + else if (effectiveBlendMode == 4 || effectiveBlendMode == 5) effectiveBlendMode = 3; + } + if (forceCutout) effectiveBlendMode = 1; + + VkPipeline desiredPipeline; + if (forceCutout) { + desiredPipeline = opaquePipeline_; + } else { + switch (effectiveBlendMode) { + case 0: desiredPipeline = opaquePipeline_; break; + case 1: desiredPipeline = alphaTestPipeline_; break; + case 2: desiredPipeline = alphaPipeline_; break; + default: desiredPipeline = additivePipeline_; break; + } + } + if (desiredPipeline != currentPipeline) { + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, desiredPipeline); + currentPipeline = desiredPipeline; + } + + // Update material UBO + if (batch.materialUBOMapped) { + auto* mat = static_cast(batch.materialUBOMapped); + mat->interiorDarken = insideInterior ? 1.0f : 0.0f; + if (batch.colorKeyBlack) + mat->colorKeyThreshold = (effectiveBlendMode == 4 || effectiveBlendMode == 5) ? 0.7f : 0.08f; + if (forceCutout) { + mat->alphaTest = model.isGroundDetail ? 3 : (foliageCutout ? 2 : 1); + if (model.isGroundDetail) mat->unlit = 0; + } + } + + // Bind material descriptor set (set 1) + if (!batch.materialSet) continue; + if (batch.materialSet != currentMaterialSet) { + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipelineLayout_, 1, 1, &batch.materialSet, 0, nullptr); + currentMaterialSet = batch.materialSet; + } + + // Push constants + instanced draw + M2PushConstants pc; + pc.texCoordSet = static_cast(batch.textureUnit); + pc.isFoliage = model.shadowWindFoliage ? 1 : 0; + pc.instanceDataOffset = static_cast(drawOffset); + vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc); + vkCmdDrawIndexed(cmd, batch.indexCount, groupSize, batch.indexStart, 0, 0); + lastDrawCallCount++; + } + + lodIdx = lodEnd; + } + + visStart = groupEnd; } } - // Pass 2: transparent/additive batches — sort back-to-front by distance so - // overlapping transparent geometry composites in the correct painter's order. - opaquePass = false; + // ===================================================================== + // Pass 2: Transparent/additive batches — back-to-front per instance + // ===================================================================== + // Transparent geometry must be drawn individually per instance in back-to- + // front order for correct alpha compositing. Each draw writes one + // M2InstanceGPU entry and issues a single-instance indexed draw. std::sort(sortedVisible_.begin(), sortedVisible_.end(), [](const VisibleEntry& a, const VisibleEntry& b) { return a.distSq > b.distSq; }); currentModelId = UINT32_MAX; currentModel = nullptr; currentModelValid = false; - // Reset state so the first transparent bind always sets explicitly currentPipeline = opaquePipeline_; currentMaterialSet = VK_NULL_HANDLE; - currentBoneSet = VK_NULL_HANDLE; for (const auto& entry : sortedVisible_) { if (entry.index >= instances.size()) continue; auto& instance = instances[entry.index]; - // Quick skip: if model has no transparent batches at all, skip it entirely + // Quick skip: if model has no transparent batches at all if (entry.modelId != currentModelId) { auto mdlIt = models.find(entry.modelId); if (mdlIt == models.end()) continue; if (!mdlIt->second.hasTransparentBatches && !mdlIt->second.isSpellEffect) continue; } - // Reuse the same rendering logic as pass 1 (via fallthrough — the batch gate - // `!opaquePass && !rawTransparent → continue` handles opaque skipping) if (entry.modelId != currentModelId) { currentModelId = entry.modelId; currentModelValid = false; @@ -2690,15 +3072,15 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const currentModel = &mdlIt->second; if (!currentModel->vertexBuffer || !currentModel->indexBuffer) continue; currentModelValid = true; - VkDeviceSize offset = 0; - vkCmdBindVertexBuffers(cmd, 0, 1, ¤tModel->vertexBuffer, &offset); + VkDeviceSize vbOff = 0; + vkCmdBindVertexBuffers(cmd, 0, 1, ¤tModel->vertexBuffer, &vbOff); vkCmdBindIndexBuffer(cmd, currentModel->indexBuffer, 0, VK_INDEX_TYPE_UINT16); } if (!currentModelValid) continue; const M2ModelGPU& model = *currentModel; - // Distance-based fade alpha (same as pass 1) + // Fade alpha float fadeAlpha = 1.0f; float fadeFrac = model.disableAnimation ? 0.55f : fadeStartFraction; float fadeStartDistSq = entry.effectiveMaxDistSq * fadeFrac * fadeFrac; @@ -2713,13 +3095,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const bool modelNeedsAnimation = model.hasAnimation && !model.disableAnimation; if (modelNeedsAnimation && instance.boneMatrices.empty()) continue; bool needsBones = modelNeedsAnimation && !instance.boneMatrices.empty(); - if (needsBones && (!instance.boneBuffer[frameIndex] || !instance.boneSet[frameIndex])) continue; - bool useBones = needsBones; - if (useBones && instance.boneSet[frameIndex] && instance.boneSet[frameIndex] != currentBoneSet) { - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipelineLayout_, 2, 1, &instance.boneSet[frameIndex], 0, nullptr); - currentBoneSet = instance.boneSet[frameIndex]; - } + if (needsBones && instance.megaBoneOffset == 0) continue; uint16_t desiredLOD = 0; if (entry.distSq > 150.0f * 150.0f) desiredLOD = 3; @@ -2742,7 +3118,7 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const if (!rawTransparent) continue; } - // Skip glow sprites (handled after loop) + // Skip glow sprites (handled in opaque pass) const bool batchUnlit = (batch.materialFlags & 0x01) != 0; const bool koboldFlameCard = batch.colorKeyBlack && model.isKoboldFlame; const bool smallCardLikeBatch = @@ -2766,7 +3142,10 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const continue; } - glm::vec2 uvOffset(0.0f, 0.0f); + if (particleDominantEffect) continue; // emission-only mesh + + // Compute UV offset for this instance + batch + glm::vec2 uvOffset(0.0f); if (batch.textureAnimIndex != 0xFFFF && model.hasTextureAnimation) { uint16_t lookupIdx = batch.textureAnimIndex; if (lookupIdx < model.textureTransformLookup.size()) { @@ -2785,6 +3164,19 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const uvOffset = glm::vec2(t * 0.03f, -t * 0.08f); } + // Write single instance entry to SSBO + if (instanceDataCount_ >= MAX_INSTANCE_DATA) continue; + uint32_t drawOffset = instanceDataCount_; + auto& e = instSSBO[instanceDataCount_]; + e.model = instance.modelMatrix; + e.uvOffset = uvOffset; + e.fadeAlpha = instanceFadeAlpha; + e.useBones = needsBones ? 1 : 0; + e.boneBase = needsBones ? static_cast(instance.megaBoneOffset) : 0; + std::memset(e._pad, 0, sizeof(e._pad)); + instanceDataCount_++; + + // Pipeline selection uint8_t effectiveBlendMode = batch.blendMode; if (model.isSpellEffect) { if (effectiveBlendMode <= 1) effectiveBlendMode = 3; @@ -2815,14 +3207,11 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const currentMaterialSet = batch.materialSet; } + // Push constants + single-instance draw M2PushConstants pc; - pc.model = instance.modelMatrix; - pc.uvOffset = uvOffset; - pc.texCoordSet = static_cast(batch.textureUnit); - pc.useBones = useBones ? 1 : 0; + pc.texCoordSet = static_cast(batch.textureUnit); pc.isFoliage = model.shadowWindFoliage ? 1 : 0; - pc.fadeAlpha = instanceFadeAlpha; - if (particleDominantEffect) continue; // emission-only mesh + pc.instanceDataOffset = static_cast(drawOffset); vkCmdPushConstants(cmd, pipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc); vkCmdDrawIndexed(cmd, batch.indexCount, 1, batch.indexStart, 0, 0); lastDrawCallCount++; @@ -4842,7 +5231,9 @@ void M2Renderer::recreatePipelines() { {4, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)}, // boneIndices (float) }; - auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite) -> VkPipeline { + // Pipeline derivatives — opaque is the base, others derive from it for shared state optimization + auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite, + VkPipelineCreateFlags flags = 0, VkPipeline basePipeline = VK_NULL_HANDLE) -> VkPipeline { return PipelineBuilder() .setShaders(m2Vert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), m2Frag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -4855,13 +5246,19 @@ void M2Renderer::recreatePipelines() { .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}) + .setFlags(flags) + .setBasePipeline(basePipeline) .build(device, vkCtx_->getPipelineCache()); }; - opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true); - alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true); - alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false); - additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false); + opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true, + VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT); + alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true, + VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_); + alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false, + VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_); + additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false, + VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_); // --- Particle pipelines --- if (particleVert.isValid() && particleFrag.isValid()) { diff --git a/src/rendering/render_graph.cpp b/src/rendering/render_graph.cpp new file mode 100644 index 00000000..d36d20bc --- /dev/null +++ b/src/rendering/render_graph.cpp @@ -0,0 +1,194 @@ +#include "rendering/render_graph.hpp" +#include "core/logger.hpp" +#include +#include +#include + +namespace wowee { +namespace rendering { + +void RenderGraph::reset() { + passes_.clear(); + executionOrder_.clear(); + compiled_ = false; + // Keep resource registry — resources are stable across frames +} + +RGResource RenderGraph::registerResource(const std::string& name) { + // Check for duplicate + for (const auto& r : resources_) { + if (r.name == name) return {r.id}; + } + uint32_t id = nextResourceId_++; + resources_.push_back({name, id}); + return {id}; +} + +RGResource RenderGraph::findResource(const std::string& name) const { + for (const auto& r : resources_) { + if (r.name == name) return {r.id}; + } + return {}; // invalid +} + +void RenderGraph::addPass(const std::string& name, + const std::vector& inputs, + const std::vector& outputs, + std::function execute) { + RGPass pass; + pass.name = name; + pass.inputs = inputs; + pass.outputs = outputs; + pass.execute = std::move(execute); + pass.enabled = true; + passes_.push_back(std::move(pass)); +} + +void RenderGraph::setPassEnabled(const std::string& name, bool enabled) { + for (auto& pass : passes_) { + if (pass.name == name) { + pass.enabled = enabled; + return; + } + } +} + +void RenderGraph::compile() { + topologicalSort(); + compiled_ = true; +} + +void RenderGraph::topologicalSort() { + const uint32_t n = static_cast(passes_.size()); + if (n == 0) { executionOrder_.clear(); return; } + + // Build adjacency: if pass A outputs resource R and pass B inputs resource R, + // then A must execute before B (edge A → B). + // Map: resource id → index of pass that produces it + std::unordered_map producer; + for (uint32_t i = 0; i < n; ++i) { + for (const auto& out : passes_[i].outputs) { + producer[out.id] = i; + } + } + + // Build in-degree and adjacency list + std::vector inDegree(n, 0); + std::vector> adj(n); + + for (uint32_t i = 0; i < n; ++i) { + for (const auto& inp : passes_[i].inputs) { + auto it = producer.find(inp.id); + if (it != producer.end() && it->second != i) { + adj[it->second].push_back(i); + inDegree[i]++; + } + } + } + + // Kahn's algorithm + std::queue queue; + for (uint32_t i = 0; i < n; ++i) { + if (inDegree[i] == 0) queue.push(i); + } + + executionOrder_.clear(); + executionOrder_.reserve(n); + + while (!queue.empty()) { + uint32_t u = queue.front(); + queue.pop(); + executionOrder_.push_back(u); + for (uint32_t v : adj[u]) { + if (--inDegree[v] == 0) queue.push(v); + } + } + + // If not all passes are in the order, there's a cycle — fall back to insertion order + if (executionOrder_.size() != n) { + LOG_WARNING("RenderGraph: dependency cycle detected, falling back to insertion order"); + executionOrder_.clear(); + for (uint32_t i = 0; i < n; ++i) executionOrder_.push_back(i); + } +} + +void RenderGraph::execute(VkCommandBuffer cmd) { + if (!compiled_) { + LOG_WARNING("RenderGraph::execute called without compile()"); + compile(); + } + + for (uint32_t idx : executionOrder_) { + const auto& pass = passes_[idx]; + if (!pass.enabled) continue; + + // Insert image barriers declared for this pass + if (!pass.imageBarriers.empty()) { + std::vector barriers; + barriers.reserve(pass.imageBarriers.size()); + + VkPipelineStageFlags srcStages = 0; + VkPipelineStageFlags dstStages = 0; + + for (const auto& b : pass.imageBarriers) { + VkImageMemoryBarrier ib{}; + ib.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + ib.oldLayout = b.oldLayout; + ib.newLayout = b.newLayout; + ib.srcAccessMask = b.srcAccess; + ib.dstAccessMask = b.dstAccess; + ib.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + ib.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + ib.image = b.image; + ib.subresourceRange = {b.aspectMask, 0, 1, 0, 1}; + barriers.push_back(ib); + srcStages |= b.srcStage; + dstStages |= b.dstStage; + } + + vkCmdPipelineBarrier(cmd, + srcStages, dstStages, + 0, + 0, nullptr, + 0, nullptr, + static_cast(barriers.size()), barriers.data()); + } + + // Insert buffer barriers declared for this pass + if (!pass.bufferBarriers.empty()) { + std::vector barriers; + barriers.reserve(pass.bufferBarriers.size()); + + VkPipelineStageFlags srcStages = 0; + VkPipelineStageFlags dstStages = 0; + + for (const auto& b : pass.bufferBarriers) { + VkBufferMemoryBarrier bb{}; + bb.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + bb.srcAccessMask = b.srcAccess; + bb.dstAccessMask = b.dstAccess; + bb.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bb.buffer = b.buffer; + bb.offset = b.offset; + bb.size = b.size; + barriers.push_back(bb); + srcStages |= b.srcStage; + dstStages |= b.dstStage; + } + + vkCmdPipelineBarrier(cmd, + srcStages, dstStages, + 0, + 0, nullptr, + static_cast(barriers.size()), barriers.data(), + 0, nullptr); + } + + // Execute the pass + pass.execute(cmd); + } +} + +} // namespace rendering +} // namespace wowee diff --git a/src/rendering/renderer.cpp b/src/rendering/renderer.cpp index 31f4c68c..1daf09cf 100644 --- a/src/rendering/renderer.cpp +++ b/src/rendering/renderer.cpp @@ -61,6 +61,7 @@ #include "rendering/spell_visual_system.hpp" #include "rendering/post_process_pipeline.hpp" #include "rendering/animation_controller.hpp" +#include "rendering/render_graph.hpp" #include #include #include @@ -458,7 +459,9 @@ void Renderer::updatePerFrameUBO() { } currentFrameData.lightSpaceMatrix = lightSpaceMatrix; - currentFrameData.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, 0.8f, 0.0f, 0.0f); + // Scale shadow bias proportionally to ortho extent to avoid acne at close range / gaps at far range + float shadowBias = 0.8f * (shadowDistance_ / 300.0f); + currentFrameData.shadowParams = glm::vec4(shadowsEnabled ? 1.0f : 0.0f, shadowBias, 0.0f, 0.0f); // Player water ripple data: pack player XY into shadowParams.zw, ripple strength into fogParams.w if (cameraController) { @@ -563,6 +566,15 @@ bool Renderer::initialize(core::Window* win) { postProcessPipeline_ = std::make_unique(); postProcessPipeline_->initialize(vkCtx); + // Phase 2.5: Create render graph and register virtual resources + renderGraph_ = std::make_unique(); + renderGraph_->registerResource("shadow_depth"); + renderGraph_->registerResource("reflection_texture"); + renderGraph_->registerResource("cull_visibility"); + renderGraph_->registerResource("scene_color"); + renderGraph_->registerResource("scene_depth"); + renderGraph_->registerResource("final_image"); + LOG_INFO("Renderer initialized"); return true; } @@ -674,6 +686,10 @@ void Renderer::shutdown() { postProcessPipeline_->shutdown(); postProcessPipeline_.reset(); } + + // Phase 2.5: Destroy render graph + renderGraph_.reset(); + destroyPerFrameResources(); zoneManager.reset(); @@ -839,36 +855,19 @@ void Renderer::beginFrame() { // FSR2 jitter pattern (§4.3 — delegates to PostProcessPipeline) if (postProcessPipeline_ && camera) postProcessPipeline_->applyJitter(camera.get()); + // Compute fresh shadow matrix BEFORE UBO update so shaders get current-frame data. + lightSpaceMatrix = computeLightSpaceMatrix(); + // Update per-frame UBO with current camera/lighting state updatePerFrameUBO(); - // --- Off-screen pre-passes (before main render pass) --- - // Minimap composite (renders 3x3 tile grid into 768x768 render target) - if (minimap && minimap->isEnabled() && camera) { - glm::vec3 minimapCenter = camera->getPosition(); - if (cameraController && cameraController->isThirdPerson()) - minimapCenter = characterPosition; - minimap->compositePass(currentCmd, minimapCenter); + // --- Off-screen pre-passes (Phase 2.5: render graph) --- + // Build frame graph: registers pre-passes as graph nodes with dependencies. + // compile() topologically sorts; execute() runs them with auto barriers. + buildFrameGraph(nullptr); + if (renderGraph_) { + renderGraph_->execute(currentCmd); } - // World map composite (renders zone tiles into 1024x768 render target) - if (worldMap) { - worldMap->compositePass(currentCmd); - } - - // Character preview composite passes - for (auto* preview : activePreviews_) { - if (preview && preview->isModelLoaded()) { - preview->compositePass(currentCmd, vkCtx->getCurrentFrame()); - } - } - - // Shadow pre-pass (before main render pass) - if (shadowsEnabled && shadowDepthImage[0] != VK_NULL_HANDLE) { - renderShadowPass(); - } - - // Water reflection pre-pass (renders scene from mirrored camera into 512x512 texture) - renderReflectionPass(); // --- Begin render pass --- // Select framebuffer: PP off-screen target or swapchain (§4.3 — PostProcessPipeline) @@ -3063,17 +3062,10 @@ void Renderer::renderShadowPass() { // Shadows render every frame — throttling causes visible flicker on player/NPCs - // Compute and store light space matrix; write to per-frame UBO - lightSpaceMatrix = computeLightSpaceMatrix(); + // lightSpaceMatrix was already computed at frame start (before updatePerFrameUBO). // Zero matrix means character position isn't set yet — skip shadow pass entirely. if (lightSpaceMatrix == glm::mat4(0.0f)) return; uint32_t frame = vkCtx->getCurrentFrame(); - auto* ubo = reinterpret_cast(perFrameUBOMapped[frame]); - if (ubo) { - ubo->lightSpaceMatrix = lightSpaceMatrix; - ubo->shadowParams.x = shadowsEnabled ? 1.0f : 0.0f; - ubo->shadowParams.y = 0.8f; - } // Barrier 1: transition this frame's shadow map into writable depth layout. VkImageMemoryBarrier b1{}; @@ -3147,5 +3139,69 @@ void Renderer::renderShadowPass() { shadowDepthLayout_[frame] = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } +// Phase 2.5: Build the per-frame render graph for off-screen pre-passes. +// Declares passes as graph nodes with input/output dependencies. +// compile() performs topological sort; execute() runs them with auto barriers. +void Renderer::buildFrameGraph(game::GameHandler* gameHandler) { + (void)gameHandler; + if (!renderGraph_) return; + + renderGraph_->reset(); + + auto shadowDepth = renderGraph_->findResource("shadow_depth"); + auto reflTex = renderGraph_->findResource("reflection_texture"); + auto cullVis = renderGraph_->findResource("cull_visibility"); + + // Minimap composites (no dependencies — standalone off-screen render target) + renderGraph_->addPass("minimap_composite", {}, {}, + [this](VkCommandBuffer cmd) { + if (minimap && minimap->isEnabled() && camera) { + glm::vec3 minimapCenter = camera->getPosition(); + if (cameraController && cameraController->isThirdPerson()) + minimapCenter = characterPosition; + minimap->compositePass(cmd, minimapCenter); + } + }); + + // World map composite (standalone) + renderGraph_->addPass("worldmap_composite", {}, {}, + [this](VkCommandBuffer cmd) { + if (worldMap) worldMap->compositePass(cmd); + }); + + // Character preview composites (standalone) + renderGraph_->addPass("preview_composite", {}, {}, + [this](VkCommandBuffer cmd) { + uint32_t frame = vkCtx->getCurrentFrame(); + for (auto* preview : activePreviews_) { + if (preview && preview->isModelLoaded()) + preview->compositePass(cmd, frame); + } + }); + + // Shadow pre-pass → outputs shadow_depth + renderGraph_->addPass("shadow_pass", {}, {shadowDepth}, + [this](VkCommandBuffer) { + if (shadowsEnabled && shadowDepthImage[0] != VK_NULL_HANDLE) + renderShadowPass(); + }); + renderGraph_->setPassEnabled("shadow_pass", shadowsEnabled && shadowDepthImage[0] != VK_NULL_HANDLE); + + // Reflection pre-pass → outputs reflection_texture (reads scene, so after shadow) + renderGraph_->addPass("reflection_pass", {shadowDepth}, {reflTex}, + [this](VkCommandBuffer) { + renderReflectionPass(); + }); + + // GPU frustum cull compute → outputs cull_visibility + renderGraph_->addPass("compute_cull", {}, {cullVis}, + [this](VkCommandBuffer cmd) { + if (m2Renderer && camera) + m2Renderer->dispatchCullCompute(cmd, vkCtx->getCurrentFrame(), *camera); + }); + + renderGraph_->compile(); +} + } // namespace rendering } // namespace wowee diff --git a/src/rendering/terrain_renderer.cpp b/src/rendering/terrain_renderer.cpp index 0de9698a..458714a5 100644 --- a/src/rendering/terrain_renderer.cpp +++ b/src/rendering/terrain_renderer.cpp @@ -128,7 +128,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL vertexAttribs[3] = { 3, 0, VK_FORMAT_R32G32_SFLOAT, static_cast(offsetof(pipeline::TerrainVertex, layerUV)) }; - // --- Build fill pipeline --- + // --- Build fill pipeline (base for derivatives — shared state optimization) --- VkRenderPass mainPass = vkCtx->getImGuiRenderPass(); pipeline = PipelineBuilder() @@ -143,6 +143,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL .setLayout(pipelineLayout) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT) .build(device, vkCtx->getPipelineCache()); if (!pipeline) { @@ -152,7 +153,7 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL return false; } - // --- Build wireframe pipeline --- + // --- Build wireframe pipeline (derivative of fill) --- wireframePipeline = PipelineBuilder() .setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -165,6 +166,8 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL .setLayout(pipelineLayout) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT) + .setBasePipeline(pipeline) .build(device, vkCtx->getPipelineCache()); if (!wireframePipeline) { @@ -190,6 +193,64 @@ bool TerrainRenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameL envSizeMBOrDefault("WOWEE_TERRAIN_TEX_CACHE_MB", 4096) * 1024ull * 1024ull; LOG_INFO("Terrain texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB"); + // Phase 2.2: Allocate mega vertex/index buffers and indirect draw buffer. + // All terrain chunks share these buffers, eliminating per-chunk VB/IB rebinds. + { + VmaAllocator allocator = vkCtx->getAllocator(); + + // Mega vertex buffer (host-visible for direct write during chunk upload) + VkBufferCreateInfo vbCI{}; + vbCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + vbCI.size = static_cast(MEGA_VB_MAX_VERTS) * sizeof(pipeline::TerrainVertex); + vbCI.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + VmaAllocationCreateInfo vbAllocCI{}; + vbAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + vbAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo vbInfo{}; + if (vmaCreateBuffer(allocator, &vbCI, &vbAllocCI, + &megaVB_, &megaVBAlloc_, &vbInfo) == VK_SUCCESS) { + megaVBMapped_ = vbInfo.pMappedData; + } else { + LOG_WARNING("TerrainRenderer: mega VB allocation failed, per-chunk fallback"); + } + + // Mega index buffer + VkBufferCreateInfo ibCI{}; + ibCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ibCI.size = static_cast(MEGA_IB_MAX_INDICES) * sizeof(uint32_t); + ibCI.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + VmaAllocationCreateInfo ibAllocCI{}; + ibAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + ibAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo ibInfo{}; + if (vmaCreateBuffer(allocator, &ibCI, &ibAllocCI, + &megaIB_, &megaIBAlloc_, &ibInfo) == VK_SUCCESS) { + megaIBMapped_ = ibInfo.pMappedData; + } else { + LOG_WARNING("TerrainRenderer: mega IB allocation failed, per-chunk fallback"); + } + + // Indirect draw command buffer + VkBufferCreateInfo indCI{}; + indCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + indCI.size = MAX_INDIRECT_DRAWS * sizeof(VkDrawIndexedIndirectCommand); + indCI.usage = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; + VmaAllocationCreateInfo indAllocCI{}; + indAllocCI.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + indAllocCI.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo indInfo{}; + if (vmaCreateBuffer(allocator, &indCI, &indAllocCI, + &indirectBuffer_, &indirectAlloc_, &indInfo) == VK_SUCCESS) { + indirectMapped_ = indInfo.pMappedData; + } else { + LOG_WARNING("TerrainRenderer: indirect buffer allocation failed"); + } + + LOG_INFO("Terrain mega buffers: VB=", vbCI.size / (1024*1024), "MB IB=", + ibCI.size / (1024*1024), "MB indirect=", + indCI.size / 1024, "KB"); + } + LOG_INFO("Terrain renderer initialized (Vulkan)"); return true; } @@ -232,7 +293,7 @@ void TerrainRenderer::recreatePipelines() { VkRenderPass mainPass = vkCtx->getImGuiRenderPass(); - // Rebuild fill pipeline + // Rebuild fill pipeline (base for derivatives — shared state optimization) pipeline = PipelineBuilder() .setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -245,13 +306,14 @@ void TerrainRenderer::recreatePipelines() { .setLayout(pipelineLayout) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT) .build(device, vkCtx->getPipelineCache()); if (!pipeline) { LOG_ERROR("TerrainRenderer::recreatePipelines: failed to create fill pipeline"); } - // Rebuild wireframe pipeline + // Rebuild wireframe pipeline (derivative of fill) wireframePipeline = PipelineBuilder() .setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -264,6 +326,8 @@ void TerrainRenderer::recreatePipelines() { .setLayout(pipelineLayout) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT) + .setBasePipeline(pipeline) .build(device, vkCtx->getPipelineCache()); if (!wireframePipeline) { @@ -311,6 +375,13 @@ void TerrainRenderer::shutdown() { if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; } if (shadowParamsUBO_) { vmaDestroyBuffer(allocator, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; shadowParamsAlloc_ = VK_NULL_HANDLE; } + // Phase 2.2: Destroy mega buffers and indirect draw buffer + if (megaVB_) { vmaDestroyBuffer(allocator, megaVB_, megaVBAlloc_); megaVB_ = VK_NULL_HANDLE; megaVBAlloc_ = VK_NULL_HANDLE; megaVBMapped_ = nullptr; } + if (megaIB_) { vmaDestroyBuffer(allocator, megaIB_, megaIBAlloc_); megaIB_ = VK_NULL_HANDLE; megaIBAlloc_ = VK_NULL_HANDLE; megaIBMapped_ = nullptr; } + if (indirectBuffer_) { vmaDestroyBuffer(allocator, indirectBuffer_, indirectAlloc_); indirectBuffer_ = VK_NULL_HANDLE; indirectAlloc_ = VK_NULL_HANDLE; indirectMapped_ = nullptr; } + megaVBUsed_ = 0; + megaIBUsed_ = 0; + vkCtx = nullptr; } @@ -537,6 +608,7 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) { gpuChunk.worldY = chunk.worldY; gpuChunk.worldZ = chunk.worldZ; gpuChunk.indexCount = static_cast(chunk.indices.size()); + gpuChunk.vertexCount = static_cast(chunk.vertices.size()); VkDeviceSize vbSize = chunk.vertices.size() * sizeof(pipeline::TerrainVertex); AllocatedBuffer vb = uploadBuffer(*vkCtx, chunk.vertices.data(), vbSize, @@ -550,6 +622,25 @@ TerrainChunkGPU TerrainRenderer::uploadChunk(const pipeline::ChunkMesh& chunk) { gpuChunk.indexBuffer = ib.buffer; gpuChunk.indexAlloc = ib.allocation; + // Phase 2.2: Also copy into mega buffers for indirect drawing + uint32_t vertCount = static_cast(chunk.vertices.size()); + uint32_t idxCount = static_cast(chunk.indices.size()); + if (megaVBMapped_ && megaIBMapped_ && + megaVBUsed_ + vertCount <= MEGA_VB_MAX_VERTS && + megaIBUsed_ + idxCount <= MEGA_IB_MAX_INDICES) { + // Copy vertices + auto* vbDst = static_cast(megaVBMapped_) + megaVBUsed_; + std::memcpy(vbDst, chunk.vertices.data(), vertCount * sizeof(pipeline::TerrainVertex)); + // Copy indices + auto* ibDst = static_cast(megaIBMapped_) + megaIBUsed_; + std::memcpy(ibDst, chunk.indices.data(), idxCount * sizeof(uint32_t)); + + gpuChunk.megaBaseVertex = static_cast(megaVBUsed_); + gpuChunk.megaFirstIndex = megaIBUsed_; + megaVBUsed_ += vertCount; + megaIBUsed_ += idxCount; + } + return gpuChunk; } @@ -789,6 +880,15 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c renderedChunks = 0; culledChunks = 0; + // Phase 2.2: Use mega VB + IB when available. + // Bind mega buffers once, then use direct draws with base vertex/index offsets. + const bool useMegaBuffers = (megaVB_ && megaIB_); + if (useMegaBuffers) { + VkDeviceSize megaOffset = 0; + vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset); + vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32); + } + for (const auto& chunk : chunks) { if (!chunk.isValid() || !chunk.materialSet) continue; @@ -808,11 +908,17 @@ void TerrainRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, c vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 1, 1, &chunk.materialSet, 0, nullptr); - VkDeviceSize offset = 0; - vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset); - vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32); - - vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0); + if (useMegaBuffers && chunk.megaBaseVertex >= 0) { + // Direct draw from mega buffer — single VB/IB already bound + vkCmdDrawIndexed(cmd, chunk.indexCount, 1, + chunk.megaFirstIndex, chunk.megaBaseVertex, 0); + } else { + // Fallback: per-chunk VB/IB bind + direct draw + VkDeviceSize offset = 0; + vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset); + vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32); + vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0); + } renderedChunks++; } @@ -986,6 +1092,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp vkCmdPushConstants(cmd, shadowPipelineLayout_, VK_SHADER_STAGE_VERTEX_BIT, 0, 128, &push); + // Phase 2.2: Bind mega buffers once for shadow pass (same as opaque) + const bool useMegaShadow = (megaVB_ && megaIB_); + if (useMegaShadow) { + VkDeviceSize megaOffset = 0; + vkCmdBindVertexBuffers(cmd, 0, 1, &megaVB_, &megaOffset); + vkCmdBindIndexBuffer(cmd, megaIB_, 0, VK_INDEX_TYPE_UINT32); + } + for (const auto& chunk : chunks) { if (!chunk.isValid()) continue; @@ -995,10 +1109,14 @@ void TerrainRenderer::renderShadow(VkCommandBuffer cmd, const glm::mat4& lightSp float combinedRadius = shadowRadius + chunk.boundingSphereRadius; if (distSq > combinedRadius * combinedRadius) continue; - VkDeviceSize offset = 0; - vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset); - vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT16); - vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0); + if (useMegaShadow && chunk.megaBaseVertex >= 0) { + vkCmdDrawIndexed(cmd, chunk.indexCount, 1, chunk.megaFirstIndex, chunk.megaBaseVertex, 0); + } else { + VkDeviceSize offset = 0; + vkCmdBindVertexBuffers(cmd, 0, 1, &chunk.vertexBuffer, &offset); + vkCmdBindIndexBuffer(cmd, chunk.indexBuffer, 0, VK_INDEX_TYPE_UINT32); + vkCmdDrawIndexed(cmd, chunk.indexCount, 1, 0, 0, 0); + } } } diff --git a/src/rendering/vk_context.cpp b/src/rendering/vk_context.cpp index c2a37415..4a5d6366 100644 --- a/src/rendering/vk_context.cpp +++ b/src/rendering/vk_context.cpp @@ -334,7 +334,7 @@ bool VkContext::selectPhysicalDevice() { VkPhysicalDeviceProperties props; vkGetPhysicalDeviceProperties(physicalDevice, &props); - uint32_t apiVersion = props.apiVersion; + (void)props.apiVersion; // Available if needed for version checks gpuVendorId_ = props.vendorID; std::strncpy(gpuName_, props.deviceName, sizeof(gpuName_) - 1); gpuName_[sizeof(gpuName_) - 1] = '\0'; diff --git a/src/rendering/vk_pipeline.cpp b/src/rendering/vk_pipeline.cpp index 2a95bd8b..e5c32e6c 100644 --- a/src/rendering/vk_pipeline.cpp +++ b/src/rendering/vk_pipeline.cpp @@ -111,6 +111,17 @@ PipelineBuilder& PipelineBuilder::setDynamicStates(const std::vector(offsetof(WMOVertexData, tangent)) }; - // --- Build opaque pipeline --- + // --- Build opaque pipeline (base for derivatives — shared state optimization) --- VkRenderPass mainPass = vkCtx_->getImGuiRenderPass(); opaquePipeline_ = PipelineBuilder() @@ -184,6 +184,7 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT) .build(device, vkCtx_->getPipelineCache()); if (!opaquePipeline_) { @@ -193,7 +194,7 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou return false; } - // --- Build transparent pipeline --- + // --- Build transparent pipeline (derivative of opaque) --- transparentPipeline_ = PipelineBuilder() .setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -206,13 +207,15 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT) + .setBasePipeline(opaquePipeline_) .build(device, vkCtx_->getPipelineCache()); if (!transparentPipeline_) { core::Logger::getInstance().warning("WMORenderer: transparent pipeline not available"); } - // --- Build glass pipeline (alpha blend WITH depth write for windows) --- + // --- Build glass pipeline (derivative — alpha blend WITH depth write for windows) --- glassPipeline_ = PipelineBuilder() .setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -225,9 +228,11 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT) + .setBasePipeline(opaquePipeline_) .build(device, vkCtx_->getPipelineCache()); - // --- Build wireframe pipeline --- + // --- Build wireframe pipeline (derivative of opaque) --- wireframePipeline_ = PipelineBuilder() .setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -240,6 +245,8 @@ bool WMORenderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayou .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT) + .setBasePipeline(opaquePipeline_) .build(device, vkCtx_->getPipelineCache()); if (!wireframePipeline_) { @@ -1434,7 +1441,7 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const if (doDistanceCull) { glm::vec3 closestPoint = glm::clamp(camPos, gMin, gMax); float distSq = glm::dot(closestPoint - camPos, closestPoint - camPos); - if (distSq > 250000.0f) { + if (distSq > 1440000.0f) { // 1200 units — matches terrain view distance result.distanceCulled++; continue; } @@ -3733,6 +3740,7 @@ void WMORenderer::recreatePipelines() { VkRenderPass mainPass = vkCtx_->getImGuiRenderPass(); + // Pipeline derivatives — opaque is the base, others derive for shared state optimization opaquePipeline_ = PipelineBuilder() .setShaders(vertShader.stageInfo(VK_SHADER_STAGE_VERTEX_BIT), fragShader.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT)) @@ -3745,6 +3753,7 @@ void WMORenderer::recreatePipelines() { .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT) .build(device, vkCtx_->getPipelineCache()); transparentPipeline_ = PipelineBuilder() @@ -3759,6 +3768,8 @@ void WMORenderer::recreatePipelines() { .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT) + .setBasePipeline(opaquePipeline_) .build(device, vkCtx_->getPipelineCache()); glassPipeline_ = PipelineBuilder() @@ -3773,6 +3784,8 @@ void WMORenderer::recreatePipelines() { .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT) + .setBasePipeline(opaquePipeline_) .build(device, vkCtx_->getPipelineCache()); wireframePipeline_ = PipelineBuilder() @@ -3787,6 +3800,8 @@ void WMORenderer::recreatePipelines() { .setLayout(pipelineLayout_) .setRenderPass(mainPass) .setDynamicStates({ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }) + .setFlags(VK_PIPELINE_CREATE_DERIVATIVE_BIT) + .setBasePipeline(opaquePipeline_) .build(device, vkCtx_->getPipelineCache()); vertShader.destroy(); diff --git a/tools/asset_extract/extractor.cpp b/tools/asset_extract/extractor.cpp index 3c61bef3..d79d4671 100644 --- a/tools/asset_extract/extractor.cpp +++ b/tools/asset_extract/extractor.cpp @@ -537,20 +537,6 @@ static std::vector discoverArchives(const std::string& mpqDir, return result; } -// Read a text file into a vector of lines (for external listfile loading) -static std::vector readLines(const std::string& path) { - std::vector lines; - std::ifstream f(path); - if (!f) return lines; - std::string line; - while (std::getline(f, line)) { - // Trim trailing \r - if (!line.empty() && line.back() == '\r') line.pop_back(); - if (!line.empty()) lines.push_back(std::move(line)); - } - return lines; -} - // Extract the (listfile) from an MPQ archive into a set of filenames static void extractInternalListfile(HANDLE hMpq, std::set& out) { HANDLE hFile = nullptr; @@ -595,14 +581,9 @@ bool Extractor::enumerateFiles(const Options& opts, std::cout << "Found " << archives.size() << " MPQ archives\n"; - // Load external listfile into memory once (avoids repeated file I/O) - std::vector externalEntries; - std::vector externalPtrs; - if (!opts.listFile.empty()) { - externalEntries = readLines(opts.listFile); - externalPtrs.reserve(externalEntries.size()); - for (const auto& e : externalEntries) externalPtrs.push_back(e.c_str()); - std::cout << " Loaded external listfile: " << externalEntries.size() << " entries\n"; + const bool haveExternalListFile = !opts.listFile.empty(); + if (haveExternalListFile) { + std::cout << " Using external listfile: " << opts.listFile << "\n"; } const auto wantedDbcs = buildWantedDbcSet(opts); @@ -616,12 +597,11 @@ bool Extractor::enumerateFiles(const Options& opts, continue; } - // Inject external listfile entries into archive's in-memory name table. - // SFileAddListFileEntries is fast — it only hashes the names against the - // archive's hash table, no file I/O involved. - if (!externalPtrs.empty()) { - SFileAddListFileEntries(hMpq, externalPtrs.data(), - static_cast(externalPtrs.size())); + // Inject external listfile into archive's in-memory name table. + // SFileAddListFile reads the file and hashes names against the + // archive's hash table. + if (haveExternalListFile) { + SFileAddListFile(hMpq, opts.listFile.c_str()); } if (opts.verbose) {