From 3b7ac068d27f2db0bdc6dd55091f7805d7dca651 Mon Sep 17 00:00:00 2001 From: Kelsi Date: Sun, 29 Mar 2026 19:51:17 -0700 Subject: [PATCH] perf: hoist key array read out of per-sequence loop in parseAnimTrackVanilla readArray was called inside the loop on every iteration, re-parsing the entire flat key array via memcpy. For a model with 200 sequences and 10k keys this produced ~24MB of redundant copying. Now reads once before the loop (matching how allTimestamps was already handled). --- src/pipeline/m2_loader.cpp | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/pipeline/m2_loader.cpp b/src/pipeline/m2_loader.cpp index 2454fd68..f8af506e 100644 --- a/src/pipeline/m2_loader.cpp +++ b/src/pipeline/m2_loader.cpp @@ -515,6 +515,22 @@ void parseAnimTrackVanilla(const std::vector& data, ranges.push_back({0, disk.nTimestamps}); } + // Read the flat key array ONCE before the per-sequence loop. Previously + // readArray was called inside the loop on every iteration, re-parsing and + // copying the entire array (O(sequences × keys) redundant memcpy). + struct Vec3Disk { float x, y, z; }; + struct C4Quaternion { float x, y, z, w; }; + std::vector allFloatKeys; + std::vector allVec3Keys; + std::vector allQuatKeys; + if (type == TrackType::FLOAT) { + allFloatKeys = readArray(data, disk.ofsKeys, disk.nKeys); + } else if (type == TrackType::VEC3) { + allVec3Keys = readArray(data, disk.ofsKeys, disk.nKeys); + } else { + allQuatKeys = readArray(data, disk.ofsKeys, disk.nKeys); + } + track.sequences.resize(ranges.size()); for (size_t i = 0; i < ranges.size(); i++) { @@ -523,9 +539,6 @@ void parseAnimTrackVanilla(const std::vector& data, if (start >= end || start >= disk.nTimestamps) continue; end = std::min(end, disk.nTimestamps); - // Copy timestamps for this sequence, normalized to start at 0 - // (vanilla stores absolute timestamps in the flat array, but the - // renderer expects 0-relative times matching sequence duration) track.sequences[i].timestamps.assign( allTimestamps.begin() + start, allTimestamps.begin() + end); if (!track.sequences[i].timestamps.empty()) { @@ -535,31 +548,24 @@ void parseAnimTrackVanilla(const std::vector& data, } } - // Copy key values for this sequence if (start >= disk.nKeys) continue; uint32_t keyEnd = std::min(end, disk.nKeys); uint32_t keyCount = keyEnd - start; if (type == TrackType::FLOAT) { - auto allValues = readArray(data, disk.ofsKeys, disk.nKeys); track.sequences[i].floatValues.assign( - allValues.begin() + start, allValues.begin() + start + keyCount); + allFloatKeys.begin() + start, allFloatKeys.begin() + start + keyCount); } else if (type == TrackType::VEC3) { - struct Vec3Disk { float x, y, z; }; - auto allValues = readArray(data, disk.ofsKeys, disk.nKeys); track.sequences[i].vec3Values.reserve(keyCount); for (uint32_t k = start; k < start + keyCount; k++) { track.sequences[i].vec3Values.emplace_back( - allValues[k].x, allValues[k].y, allValues[k].z); + allVec3Keys[k].x, allVec3Keys[k].y, allVec3Keys[k].z); } } else { // Vanilla: C4Quaternion — full float[4] per key (XYZW on disk) - // NOT compressed int16 like WotLK - struct C4Quaternion { float x, y, z, w; }; - auto allQ = readArray(data, disk.ofsKeys, disk.nKeys); track.sequences[i].quatValues.reserve(keyCount); for (uint32_t k = start; k < start + keyCount; k++) { - const auto& fq = allQ[k]; + const auto& fq = allQuatKeys[k]; // Disk order: XYZW, glm::quat constructor: (w, x, y, z) glm::quat q(fq.w, fq.x, fq.y, fq.z); float len = glm::length(q);