mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-05-03 08:03:50 +00:00
Parallelize M2 bone matrix computation across worker threads
Split the M2 animation update loop into three phases: sequential animation state update, parallel bone matrix computation via std::async (when 32+ animated instances), and sequential particle update. Each thread processes a disjoint slice of instances so no synchronization is needed.
This commit is contained in:
parent
5bfe4b61aa
commit
249c4fa842
2 changed files with 61 additions and 4 deletions
|
|
@ -10,6 +10,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
#include <future>
|
||||||
|
|
||||||
namespace wowee {
|
namespace wowee {
|
||||||
|
|
||||||
|
|
@ -354,6 +355,9 @@ private:
|
||||||
static constexpr size_t MAX_M2_PARTICLES = 4000;
|
static constexpr size_t MAX_M2_PARTICLES = 4000;
|
||||||
std::mt19937 particleRng_{123};
|
std::mt19937 particleRng_{123};
|
||||||
|
|
||||||
|
// Thread count for parallel bone animation
|
||||||
|
uint32_t numAnimThreads_ = 1;
|
||||||
|
|
||||||
float interpFloat(const pipeline::M2AnimationTrack& track, float animTime, int seqIdx,
|
float interpFloat(const pipeline::M2AnimationTrack& track, float animTime, int seqIdx,
|
||||||
const std::vector<pipeline::M2Sequence>& seqs,
|
const std::vector<pipeline::M2Sequence>& seqs,
|
||||||
const std::vector<uint32_t>& globalSeqDurations);
|
const std::vector<uint32_t>& globalSeqDurations);
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,8 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <future>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
namespace wowee {
|
namespace wowee {
|
||||||
namespace rendering {
|
namespace rendering {
|
||||||
|
|
@ -203,7 +205,8 @@ M2Renderer::~M2Renderer() {
|
||||||
bool M2Renderer::initialize(pipeline::AssetManager* assets) {
|
bool M2Renderer::initialize(pipeline::AssetManager* assets) {
|
||||||
assetManager = assets;
|
assetManager = assets;
|
||||||
|
|
||||||
LOG_INFO("Initializing M2 renderer...");
|
numAnimThreads_ = std::min(4u, std::max(1u, std::thread::hardware_concurrency() - 1));
|
||||||
|
LOG_INFO("Initializing M2 renderer (", numAnimThreads_, " anim threads)...");
|
||||||
|
|
||||||
// Create M2 shader with skeletal animation support
|
// Create M2 shader with skeletal animation support
|
||||||
const char* vertexSrc = R"(
|
const char* vertexSrc = R"(
|
||||||
|
|
@ -1212,7 +1215,13 @@ void M2Renderer::update(float deltaTime) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Normal M2 animation update ---
|
// --- Normal M2 animation update ---
|
||||||
for (auto& instance : instances) {
|
// Phase 1: Update animation state (cheap, sequential)
|
||||||
|
// Collect indices of instances that need bone matrix computation.
|
||||||
|
std::vector<size_t> boneWorkIndices;
|
||||||
|
boneWorkIndices.reserve(instances.size());
|
||||||
|
|
||||||
|
for (size_t idx = 0; idx < instances.size(); ++idx) {
|
||||||
|
auto& instance = instances[idx];
|
||||||
auto it = models.find(instance.modelId);
|
auto it = models.find(instance.modelId);
|
||||||
if (it == models.end()) continue;
|
if (it == models.end()) continue;
|
||||||
const M2ModelGPU& model = it->second;
|
const M2ModelGPU& model = it->second;
|
||||||
|
|
@ -1267,9 +1276,53 @@ void M2Renderer::update(float deltaTime) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
computeBoneMatrices(model, instance);
|
boneWorkIndices.push_back(idx);
|
||||||
|
}
|
||||||
|
|
||||||
// M2 particle emitter update
|
// Phase 2: Compute bone matrices (expensive, parallel if enough work)
|
||||||
|
const size_t animCount = boneWorkIndices.size();
|
||||||
|
if (animCount > 0) {
|
||||||
|
if (animCount < 32 || numAnimThreads_ <= 1) {
|
||||||
|
// Sequential — not enough work to justify thread overhead
|
||||||
|
for (size_t i : boneWorkIndices) {
|
||||||
|
auto& inst = instances[i];
|
||||||
|
const auto& mdl = models.find(inst.modelId)->second;
|
||||||
|
computeBoneMatrices(mdl, inst);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Parallel — dispatch across worker threads
|
||||||
|
const size_t numThreads = std::min(static_cast<size_t>(numAnimThreads_), animCount);
|
||||||
|
const size_t chunkSize = animCount / numThreads;
|
||||||
|
const size_t remainder = animCount % numThreads;
|
||||||
|
|
||||||
|
std::vector<std::future<void>> futures;
|
||||||
|
futures.reserve(numThreads);
|
||||||
|
|
||||||
|
size_t start = 0;
|
||||||
|
for (size_t t = 0; t < numThreads; ++t) {
|
||||||
|
size_t end = start + chunkSize + (t < remainder ? 1 : 0);
|
||||||
|
futures.push_back(std::async(std::launch::async,
|
||||||
|
[this, &boneWorkIndices, start, end]() {
|
||||||
|
for (size_t j = start; j < end; ++j) {
|
||||||
|
size_t idx = boneWorkIndices[j];
|
||||||
|
auto& inst = instances[idx];
|
||||||
|
const auto& mdl = models.find(inst.modelId)->second;
|
||||||
|
computeBoneMatrices(mdl, inst);
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
start = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& f : futures) {
|
||||||
|
f.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 3: Particle update (sequential — uses RNG, not thread-safe)
|
||||||
|
for (size_t idx : boneWorkIndices) {
|
||||||
|
auto& instance = instances[idx];
|
||||||
|
const auto& model = models.find(instance.modelId)->second;
|
||||||
if (!model.particleEmitters.empty()) {
|
if (!model.particleEmitters.empty()) {
|
||||||
emitParticles(instance, model, deltaTime);
|
emitParticles(instance, model, deltaTime);
|
||||||
updateParticles(instance, deltaTime);
|
updateParticles(instance, deltaTime);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue