Parallelize M2 bone matrix computation across worker threads

Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.
2026-05-11 19:43:52 +00:00 · 2026-02-07 14:28:14 -08:00 · 2026-02-07 14:28:14 -08:00 · 249c4fa842
commit 249c4fa842
parent 5bfe4b61aa
2 changed files with 61 additions and 4 deletions
--- a/include/rendering/m2_renderer.hpp
+++ b/include/rendering/m2_renderer.hpp
@ -10,6 +10,7 @@
 #include <string>
 #include <optional>
 #include <random>
+#include <future>

 namespace wowee {

@ -354,6 +355,9 @@ private:
    static constexpr size_t MAX_M2_PARTICLES = 4000;
    std::mt19937 particleRng_{123};

+    // Thread count for parallel bone animation
+    uint32_t numAnimThreads_ = 1;
+
    float interpFloat(const pipeline::M2AnimationTrack& track, float animTime, int seqIdx,
                      const std::vector<pipeline::M2Sequence>& seqs,
                      const std::vector<uint32_t>& globalSeqDurations);