fix: data race on collision query profiling counters

queryTimeMs and queryCallCount on WMORenderer and M2Renderer were plain
mutable doubles/uint32s written by getFloorHeight (dispatched on async
threads from CameraController) and read by the main thread. This is
undefined behavior per C++ — thread sanitizer would flag it. Changed to
std::atomic with relaxed ordering (adequate for diagnostics) and updated
QueryTimer to use atomic fetch_add/compare_exchange.
This commit is contained in:
Kelsi 2026-03-29 21:26:11 -07:00
parent 3dd1128ecf
commit bbb560f93c
3 changed files with 23 additions and 12 deletions

View file

@ -5,6 +5,7 @@
#include <vulkan/vulkan.h> #include <vulkan/vulkan.h>
#include <vk_mem_alloc.h> #include <vk_mem_alloc.h>
#include <glm/glm.hpp> #include <glm/glm.hpp>
#include <atomic>
#include <memory> #include <memory>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
@ -531,9 +532,10 @@ private:
std::unordered_map<uint32_t, size_t> instanceIndexById; std::unordered_map<uint32_t, size_t> instanceIndexById;
// Collision scratch buffers are thread_local (see m2_renderer.cpp) for thread-safety. // Collision scratch buffers are thread_local (see m2_renderer.cpp) for thread-safety.
// Collision query profiling (per frame). // Collision query profiling — atomic because getFloorHeight is dispatched
mutable double queryTimeMs = 0.0; // on async threads from camera_controller while the main thread reads these.
mutable uint32_t queryCallCount = 0; mutable std::atomic<double> queryTimeMs{0.0};
mutable std::atomic<uint32_t> queryCallCount{0};
// Persistent render buffers (avoid per-frame allocation/deallocation) // Persistent render buffers (avoid per-frame allocation/deallocation)
struct VisibleEntry { struct VisibleEntry {

View file

@ -2,6 +2,7 @@
#include <vulkan/vulkan.h> #include <vulkan/vulkan.h>
#include <glm/glm.hpp> #include <glm/glm.hpp>
#include <atomic>
#include <chrono> #include <chrono>
namespace wowee { namespace wowee {
@ -42,19 +43,25 @@ struct ShadowParamsUBO {
float foliageMotionDamp; float foliageMotionDamp;
}; };
// Timer utility for performance profiling queries // Timer utility for performance profiling queries.
// Uses atomics because floor-height queries are dispatched on async threads
// from CameraController while the main thread may read the counters.
struct QueryTimer { struct QueryTimer {
double* totalMs = nullptr; std::atomic<double>* totalMs = nullptr;
uint32_t* callCount = nullptr; std::atomic<uint32_t>* callCount = nullptr;
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
QueryTimer(double* total, uint32_t* calls) : totalMs(total), callCount(calls) {} QueryTimer(std::atomic<double>* total, std::atomic<uint32_t>* calls)
: totalMs(total), callCount(calls) {}
~QueryTimer() { ~QueryTimer() {
if (callCount) { if (callCount) {
(*callCount)++; callCount->fetch_add(1, std::memory_order_relaxed);
} }
if (totalMs) { if (totalMs) {
auto end = std::chrono::steady_clock::now(); auto end = std::chrono::steady_clock::now();
*totalMs += std::chrono::duration<double, std::milli>(end - start).count(); double ms = std::chrono::duration<double, std::milli>(end - start).count();
// Relaxed is fine for diagnostics — exact ordering doesn't matter.
double old = totalMs->load(std::memory_order_relaxed);
while (!totalMs->compare_exchange_weak(old, old + ms, std::memory_order_relaxed)) {}
} }
} }
}; };

View file

@ -4,6 +4,7 @@
#include <vulkan/vulkan.h> #include <vulkan/vulkan.h>
#include <vk_mem_alloc.h> #include <vk_mem_alloc.h>
#include <glm/glm.hpp> #include <glm/glm.hpp>
#include <atomic>
#include <memory> #include <memory>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
@ -740,9 +741,10 @@ private:
std::vector<uint32_t> portalVisibleGroups_; // reused per frame (portal culling scratch) std::vector<uint32_t> portalVisibleGroups_; // reused per frame (portal culling scratch)
std::unordered_set<uint32_t> portalVisibleGroupSet_; // reused per frame (portal culling scratch) std::unordered_set<uint32_t> portalVisibleGroupSet_; // reused per frame (portal culling scratch)
// Collision query profiling (per frame). // Collision query profiling — atomic because getFloorHeight is dispatched
mutable double queryTimeMs = 0.0; // on async threads from camera_controller while the main thread reads these.
mutable uint32_t queryCallCount = 0; mutable std::atomic<double> queryTimeMs{0.0};
mutable std::atomic<uint32_t> queryCallCount{0};
// Floor height cache - persistent precomputed grid // Floor height cache - persistent precomputed grid
static constexpr float FLOOR_GRID_CELL_SIZE = 2.0f; // 2 unit grid cells static constexpr float FLOOR_GRID_CELL_SIZE = 2.0f; // 2 unit grid cells