mirror of
https://github.com/Kelsidavis/WoWee.git
synced 2026-04-17 09:33:51 +00:00
Fix WMO wall collision, normal mapping, POM backfill, and M2/WMO rendering performance
- Fix MOPY flag check (0x08 not 0x01) for proper wall collision detection - Cap MAX_PUSH to PLAYER_RADIUS to prevent gradual clip-through - Fix WMO doodad quaternion component ordering (X/Y swap) - Linear normal map strength blend in shader for smooth slider control - Enable shadow sampling for interior WMO groups (covered outdoor areas) - Backfill deferred normal/height maps after streaming with descriptor rebind - M2: prepareRender only iterates animated instances, bone dirty flag - M2: remove worker thread VMA allocation, skip unready bone instances - WMO: persistent visibility vectors, sequential culling - Add FSR EASU/RCAS shaders
This commit is contained in:
parent
16c6c2b6a0
commit
a4966e486f
25 changed files with 1467 additions and 352 deletions
|
|
@ -1924,6 +1924,61 @@ glm::mat4 CharacterRenderer::getBoneTransform(const pipeline::M2Bone& bone, floa
|
|||
|
||||
// --- Rendering ---
|
||||
|
||||
void CharacterRenderer::prepareRender(uint32_t frameIndex) {
|
||||
if (instances.empty() || !opaquePipeline_) return;
|
||||
|
||||
// Pre-allocate bone SSBOs + descriptor sets on main thread (pool ops not thread-safe)
|
||||
for (auto& [id, instance] : instances) {
|
||||
int numBones = std::min(static_cast<int>(instance.boneMatrices.size()), MAX_BONES);
|
||||
if (numBones <= 0) continue;
|
||||
|
||||
if (!instance.boneBuffer[frameIndex]) {
|
||||
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
||||
bci.size = MAX_BONES * sizeof(glm::mat4);
|
||||
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
VmaAllocationCreateInfo aci{};
|
||||
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
VmaAllocationInfo allocInfo{};
|
||||
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci,
|
||||
&instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo);
|
||||
instance.boneMapped[frameIndex] = allocInfo.pMappedData;
|
||||
|
||||
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
|
||||
ai.descriptorPool = boneDescPool_;
|
||||
ai.descriptorSetCount = 1;
|
||||
ai.pSetLayouts = &boneSetLayout_;
|
||||
VkResult dsRes = vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &instance.boneSet[frameIndex]);
|
||||
if (dsRes != VK_SUCCESS) {
|
||||
LOG_ERROR("CharacterRenderer::prepareRender: bone descriptor alloc failed (instance=",
|
||||
id, ", frame=", frameIndex, ", vk=", static_cast<int>(dsRes), ")");
|
||||
if (instance.boneBuffer[frameIndex]) {
|
||||
vmaDestroyBuffer(vkCtx_->getAllocator(),
|
||||
instance.boneBuffer[frameIndex], instance.boneAlloc[frameIndex]);
|
||||
instance.boneBuffer[frameIndex] = VK_NULL_HANDLE;
|
||||
instance.boneAlloc[frameIndex] = VK_NULL_HANDLE;
|
||||
instance.boneMapped[frameIndex] = nullptr;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (instance.boneSet[frameIndex]) {
|
||||
VkDescriptorBufferInfo bufInfo{};
|
||||
bufInfo.buffer = instance.boneBuffer[frameIndex];
|
||||
bufInfo.offset = 0;
|
||||
bufInfo.range = bci.size;
|
||||
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
write.dstSet = instance.boneSet[frameIndex];
|
||||
write.dstBinding = 0;
|
||||
write.descriptorCount = 1;
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
write.pBufferInfo = &bufInfo;
|
||||
vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CharacterRenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, [[maybe_unused]] const Camera& camera) {
|
||||
if (instances.empty() || !opaquePipeline_) {
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -1602,6 +1602,12 @@ bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) {
|
|||
}
|
||||
}
|
||||
|
||||
// Pre-compute available LOD levels to avoid per-instance batch iteration
|
||||
gpuModel.availableLODs = 0;
|
||||
for (const auto& b : gpuModel.batches) {
|
||||
if (b.submeshLevel < 8) gpuModel.availableLODs |= (1u << b.submeshLevel);
|
||||
}
|
||||
|
||||
models[modelId] = std::move(gpuModel);
|
||||
|
||||
LOG_DEBUG("Loaded M2 model: ", model.name, " (", models[modelId].vertexCount, " vertices, ",
|
||||
|
|
@ -1911,6 +1917,7 @@ static void computeBoneMatrices(const M2ModelGPU& model, M2Instance& instance) {
|
|||
instance.boneMatrices[i] = local;
|
||||
}
|
||||
}
|
||||
instance.bonesDirty = true;
|
||||
}
|
||||
|
||||
void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::mat4& viewProjection) {
|
||||
|
|
@ -2172,6 +2179,48 @@ void M2Renderer::update(float deltaTime, const glm::vec3& cameraPos, const glm::
|
|||
|
||||
}
|
||||
|
||||
void M2Renderer::prepareRender(uint32_t frameIndex, const Camera& camera) {
|
||||
if (!initialized_ || instances.empty()) return;
|
||||
(void)camera; // reserved for future frustum-based culling
|
||||
|
||||
// Pre-allocate bone SSBOs + descriptor sets on main thread (pool ops not thread-safe).
|
||||
// Only iterate animated instances — static doodads don't need bone buffers.
|
||||
for (size_t idx : animatedInstanceIndices_) {
|
||||
if (idx >= instances.size()) continue;
|
||||
auto& instance = instances[idx];
|
||||
|
||||
if (instance.boneMatrices.empty()) continue;
|
||||
|
||||
if (!instance.boneBuffer[frameIndex]) {
|
||||
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
||||
bci.size = 128 * sizeof(glm::mat4);
|
||||
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
VmaAllocationCreateInfo aci{};
|
||||
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
VmaAllocationInfo allocInfo{};
|
||||
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci,
|
||||
&instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo);
|
||||
instance.boneMapped[frameIndex] = allocInfo.pMappedData;
|
||||
|
||||
instance.boneSet[frameIndex] = allocateBoneSet();
|
||||
if (instance.boneSet[frameIndex]) {
|
||||
VkDescriptorBufferInfo bufInfo{};
|
||||
bufInfo.buffer = instance.boneBuffer[frameIndex];
|
||||
bufInfo.offset = 0;
|
||||
bufInfo.range = bci.size;
|
||||
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
write.dstSet = instance.boneSet[frameIndex];
|
||||
write.dstBinding = 0;
|
||||
write.descriptorCount = 1;
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
write.pBufferInfo = &bufInfo;
|
||||
vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera) {
|
||||
if (instances.empty() || !opaquePipeline_) {
|
||||
return;
|
||||
|
|
@ -2254,8 +2303,8 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
}
|
||||
|
||||
// Sort by modelId to minimize vertex/index buffer rebinds
|
||||
std::stable_sort(sortedVisible_.begin(), sortedVisible_.end(),
|
||||
[](const VisibleEntry& a, const VisibleEntry& b) { return a.modelId < b.modelId; });
|
||||
std::sort(sortedVisible_.begin(), sortedVisible_.end(),
|
||||
[](const VisibleEntry& a, const VisibleEntry& b) { return a.modelId < b.modelId; });
|
||||
|
||||
uint32_t currentModelId = UINT32_MAX;
|
||||
const M2ModelGPU* currentModel = nullptr;
|
||||
|
|
@ -2330,44 +2379,22 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
}
|
||||
}
|
||||
|
||||
// Upload bone matrices to SSBO if model has skeletal animation
|
||||
bool useBones = model.hasAnimation && !model.disableAnimation && !instance.boneMatrices.empty();
|
||||
// Upload bone matrices to SSBO if model has skeletal animation.
|
||||
// Bone buffers are pre-allocated by prepareRender() on the main thread.
|
||||
// If not yet allocated (race/timing), skip this instance entirely to avoid
|
||||
// a bind-pose flash — it will render correctly next frame.
|
||||
bool needsBones = model.hasAnimation && !model.disableAnimation && !instance.boneMatrices.empty();
|
||||
if (needsBones && (!instance.boneBuffer[frameIndex] || !instance.boneSet[frameIndex])) {
|
||||
continue;
|
||||
}
|
||||
bool useBones = needsBones;
|
||||
if (useBones) {
|
||||
// Lazy-allocate bone SSBO on first use
|
||||
if (!instance.boneBuffer[frameIndex]) {
|
||||
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
|
||||
bci.size = 128 * sizeof(glm::mat4); // max 128 bones
|
||||
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
VmaAllocationCreateInfo aci{};
|
||||
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
VmaAllocationInfo allocInfo{};
|
||||
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci,
|
||||
&instance.boneBuffer[frameIndex], &instance.boneAlloc[frameIndex], &allocInfo);
|
||||
instance.boneMapped[frameIndex] = allocInfo.pMappedData;
|
||||
|
||||
// Allocate descriptor set for bone SSBO
|
||||
instance.boneSet[frameIndex] = allocateBoneSet();
|
||||
if (instance.boneSet[frameIndex]) {
|
||||
VkDescriptorBufferInfo bufInfo{};
|
||||
bufInfo.buffer = instance.boneBuffer[frameIndex];
|
||||
bufInfo.offset = 0;
|
||||
bufInfo.range = bci.size;
|
||||
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
|
||||
write.dstSet = instance.boneSet[frameIndex];
|
||||
write.dstBinding = 0;
|
||||
write.descriptorCount = 1;
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
write.pBufferInfo = &bufInfo;
|
||||
vkUpdateDescriptorSets(vkCtx_->getDevice(), 1, &write, 0, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
// Upload bone matrices
|
||||
if (instance.boneMapped[frameIndex]) {
|
||||
// Upload bone matrices only when recomputed (skip frame-skipped instances)
|
||||
if (instance.bonesDirty && instance.boneMapped[frameIndex]) {
|
||||
int numBones = std::min(static_cast<int>(instance.boneMatrices.size()), 128);
|
||||
memcpy(instance.boneMapped[frameIndex], instance.boneMatrices.data(),
|
||||
numBones * sizeof(glm::mat4));
|
||||
instance.bonesDirty = false;
|
||||
}
|
||||
|
||||
// Bind bone descriptor set (set 2)
|
||||
|
|
@ -2384,12 +2411,8 @@ void M2Renderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
else if (entry.distSq > 40.0f * 40.0f) desiredLOD = 1;
|
||||
|
||||
uint16_t targetLOD = desiredLOD;
|
||||
if (desiredLOD > 0) {
|
||||
bool hasDesiredLOD = false;
|
||||
for (const auto& b : model.batches) {
|
||||
if (b.submeshLevel == desiredLOD) { hasDesiredLOD = true; break; }
|
||||
}
|
||||
if (!hasDesiredLOD) targetLOD = 0;
|
||||
if (desiredLOD > 0 && !(model.availableLODs & (1u << desiredLOD))) {
|
||||
targetLOD = 0;
|
||||
}
|
||||
|
||||
const bool foliageLikeModel = model.isFoliageLike;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "rendering/performance_hud.hpp"
|
||||
#include "rendering/renderer.hpp"
|
||||
#include "rendering/vk_context.hpp"
|
||||
#include "rendering/terrain_renderer.hpp"
|
||||
#include "rendering/terrain_manager.hpp"
|
||||
#include "rendering/water_renderer.hpp"
|
||||
|
|
@ -187,6 +188,19 @@ void PerformanceHUD::render(const Renderer* renderer, const Camera* camera) {
|
|||
0, nullptr, 0.0f, 33.33f, ImVec2(200, 40));
|
||||
}
|
||||
|
||||
// FSR info
|
||||
if (renderer->isFSREnabled()) {
|
||||
ImGui::TextColored(ImVec4(0.4f, 1.0f, 0.4f, 1.0f), "FSR 1.0: ON");
|
||||
auto* ctx = renderer->getVkContext();
|
||||
if (ctx) {
|
||||
auto ext = ctx->getSwapchainExtent();
|
||||
float sf = renderer->getFSRScaleFactor();
|
||||
uint32_t iw = static_cast<uint32_t>(ext.width * sf) & ~1u;
|
||||
uint32_t ih = static_cast<uint32_t>(ext.height * sf) & ~1u;
|
||||
ImGui::Text(" %ux%u -> %ux%u (%.0f%%)", iw, ih, ext.width, ext.height, sf * 100.0f);
|
||||
}
|
||||
}
|
||||
|
||||
ImGui::Spacing();
|
||||
}
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -911,6 +911,8 @@ bool TerrainManager::advanceFinalization(FinalizingTile& ft) {
|
|||
wmoRenderer->setDeferNormalMaps(false);
|
||||
wmoRenderer->setPredecodedBLPCache(nullptr);
|
||||
if (ft.wmoModelIndex < pending->wmoModels.size()) return false;
|
||||
// All WMO models loaded — backfill normal/height maps that were skipped during streaming
|
||||
wmoRenderer->backfillNormalMaps();
|
||||
}
|
||||
ft.phase = FinalizationPhase::WMO_INSTANCES;
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -252,14 +252,22 @@ bool VkContext::createAllocator() {
|
|||
bool VkContext::createSwapchain(int width, int height) {
|
||||
vkb::SwapchainBuilder swapchainBuilder{physicalDevice, device, surface};
|
||||
|
||||
auto swapRet = swapchainBuilder
|
||||
auto& builder = swapchainBuilder
|
||||
.set_desired_format({VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR})
|
||||
.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR) // VSync
|
||||
.set_desired_extent(static_cast<uint32_t>(width), static_cast<uint32_t>(height))
|
||||
.set_image_usage_flags(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)
|
||||
.set_desired_min_image_count(2)
|
||||
.set_old_swapchain(swapchain) // For recreation
|
||||
.build();
|
||||
.set_old_swapchain(swapchain);
|
||||
|
||||
if (vsync_) {
|
||||
builder.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR);
|
||||
} else {
|
||||
builder.set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR);
|
||||
builder.add_fallback_present_mode(VK_PRESENT_MODE_MAILBOX_KHR);
|
||||
builder.add_fallback_present_mode(VK_PRESENT_MODE_FIFO_RELAXED_KHR);
|
||||
}
|
||||
|
||||
auto swapRet = builder.build();
|
||||
|
||||
if (!swapRet) {
|
||||
LOG_ERROR("Failed to create Vulkan swapchain: ", swapRet.error().message());
|
||||
|
|
@ -1026,14 +1034,22 @@ bool VkContext::recreateSwapchain(int width, int height) {
|
|||
VkSwapchainKHR oldSwapchain = swapchain;
|
||||
|
||||
vkb::SwapchainBuilder swapchainBuilder{physicalDevice, device, surface};
|
||||
auto swapRet = swapchainBuilder
|
||||
auto& builder = swapchainBuilder
|
||||
.set_desired_format({VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR})
|
||||
.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR)
|
||||
.set_desired_extent(static_cast<uint32_t>(width), static_cast<uint32_t>(height))
|
||||
.set_image_usage_flags(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)
|
||||
.set_desired_min_image_count(2)
|
||||
.set_old_swapchain(oldSwapchain)
|
||||
.build();
|
||||
.set_old_swapchain(oldSwapchain);
|
||||
|
||||
if (vsync_) {
|
||||
builder.set_desired_present_mode(VK_PRESENT_MODE_FIFO_KHR);
|
||||
} else {
|
||||
builder.set_desired_present_mode(VK_PRESENT_MODE_IMMEDIATE_KHR);
|
||||
builder.add_fallback_present_mode(VK_PRESENT_MODE_MAILBOX_KHR);
|
||||
builder.add_fallback_present_mode(VK_PRESENT_MODE_FIFO_RELAXED_KHR);
|
||||
}
|
||||
|
||||
auto swapRet = builder.build();
|
||||
|
||||
if (oldSwapchain) {
|
||||
vkDestroySwapchainKHR(device, oldSwapchain, nullptr);
|
||||
|
|
|
|||
|
|
@ -787,8 +787,8 @@ bool WMORenderer::loadModel(const pipeline::WMOModel& model, uint32_t id) {
|
|||
}
|
||||
|
||||
// Build doodad's local transform (WoW coordinates)
|
||||
// WMO doodads use quaternion rotation (X/Y swapped for correct orientation)
|
||||
glm::quat fixedRotation(doodad.rotation.w, doodad.rotation.y, doodad.rotation.x, doodad.rotation.z);
|
||||
// WMO doodads use quaternion rotation
|
||||
glm::quat fixedRotation(doodad.rotation.w, doodad.rotation.x, doodad.rotation.y, doodad.rotation.z);
|
||||
|
||||
glm::mat4 localTransform(1.0f);
|
||||
localTransform = glm::translate(localTransform, doodad.position);
|
||||
|
|
@ -1318,15 +1318,10 @@ void WMORenderer::gatherCandidates(const glm::vec3& queryMin, const glm::vec3& q
|
|||
}
|
||||
}
|
||||
|
||||
void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera) {
|
||||
void WMORenderer::prepareRender() {
|
||||
++currentFrameId;
|
||||
|
||||
if (!opaquePipeline_ || instances.empty()) {
|
||||
lastDrawCalls = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// Update material UBOs if settings changed
|
||||
// Update material UBOs if settings changed (mapped memory writes — main thread only)
|
||||
if (materialSettingsDirty_) {
|
||||
materialSettingsDirty_ = false;
|
||||
static const int pomSampleTable[] = { 16, 32, 64 };
|
||||
|
|
@ -1335,7 +1330,6 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
for (auto& group : model.groups) {
|
||||
for (auto& mb : group.mergedBatches) {
|
||||
if (!mb.materialUBO) continue;
|
||||
// Read existing UBO data, update normal/POM fields
|
||||
VmaAllocationInfo allocInfo{};
|
||||
vmaGetAllocationInfo(vkCtx_->getAllocator(), mb.materialUBOAlloc, &allocInfo);
|
||||
if (allocInfo.pMappedData) {
|
||||
|
|
@ -1351,6 +1345,13 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const Camera& camera) {
|
||||
if (!opaquePipeline_ || instances.empty()) {
|
||||
lastDrawCalls = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
lastDrawCalls = 0;
|
||||
|
||||
|
|
@ -1362,43 +1363,45 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
lastPortalCulledGroups = 0;
|
||||
lastDistanceCulledGroups = 0;
|
||||
|
||||
// ── Phase 1: Parallel visibility culling ──────────────────────────
|
||||
std::vector<size_t> visibleInstances;
|
||||
visibleInstances.reserve(instances.size());
|
||||
// ── Phase 1: Visibility culling ──────────────────────────
|
||||
visibleInstances_.clear();
|
||||
for (size_t i = 0; i < instances.size(); ++i) {
|
||||
const auto& instance = instances[i];
|
||||
if (loadedModels.find(instance.modelId) == loadedModels.end())
|
||||
continue;
|
||||
visibleInstances.push_back(i);
|
||||
if (loadedModels.count(instances[i].modelId))
|
||||
visibleInstances_.push_back(i);
|
||||
}
|
||||
|
||||
glm::vec3 camPos = camera.getPosition();
|
||||
bool doPortalCull = portalCulling;
|
||||
bool doFrustumCull = false; // Temporarily disabled: can over-cull world WMOs
|
||||
bool doDistanceCull = distanceCulling;
|
||||
|
||||
auto cullInstance = [&](size_t instIdx) -> InstanceDrawList {
|
||||
if (instIdx >= instances.size()) return InstanceDrawList{};
|
||||
auto cullInstance = [&](size_t instIdx, InstanceDrawList& result) {
|
||||
if (instIdx >= instances.size()) return;
|
||||
const auto& instance = instances[instIdx];
|
||||
auto mdlIt = loadedModels.find(instance.modelId);
|
||||
if (mdlIt == loadedModels.end()) return InstanceDrawList{};
|
||||
if (mdlIt == loadedModels.end()) return;
|
||||
const ModelData& model = mdlIt->second;
|
||||
|
||||
InstanceDrawList result;
|
||||
result.instanceIndex = instIdx;
|
||||
result.visibleGroups.clear();
|
||||
result.portalCulled = 0;
|
||||
result.distanceCulled = 0;
|
||||
|
||||
// Portal-based visibility
|
||||
std::unordered_set<uint32_t> portalVisibleGroups;
|
||||
// Portal-based visibility — use a flat sorted vector instead of unordered_set
|
||||
std::vector<uint32_t> portalVisibleGroups;
|
||||
bool usePortalCulling = doPortalCull && !model.portals.empty() && !model.portalRefs.empty();
|
||||
if (usePortalCulling) {
|
||||
std::unordered_set<uint32_t> pvgSet;
|
||||
glm::vec4 localCamPos = instance.invModelMatrix * glm::vec4(camPos, 1.0f);
|
||||
getVisibleGroupsViaPortals(model, glm::vec3(localCamPos), frustum,
|
||||
instance.modelMatrix, portalVisibleGroups);
|
||||
instance.modelMatrix, pvgSet);
|
||||
portalVisibleGroups.assign(pvgSet.begin(), pvgSet.end());
|
||||
std::sort(portalVisibleGroups.begin(), portalVisibleGroups.end());
|
||||
}
|
||||
|
||||
for (size_t gi = 0; gi < model.groups.size(); ++gi) {
|
||||
if (usePortalCulling &&
|
||||
portalVisibleGroups.find(static_cast<uint32_t>(gi)) == portalVisibleGroups.end()) {
|
||||
!std::binary_search(portalVisibleGroups.begin(), portalVisibleGroups.end(),
|
||||
static_cast<uint32_t>(gi))) {
|
||||
result.portalCulled++;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1414,62 +1417,18 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (doFrustumCull && !frustum.intersectsAABB(gMin, gMax))
|
||||
continue;
|
||||
}
|
||||
|
||||
result.visibleGroups.push_back(static_cast<uint32_t>(gi));
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
// Dispatch culling — parallel when enough instances, sequential otherwise.
|
||||
std::vector<InstanceDrawList> drawLists;
|
||||
drawLists.reserve(visibleInstances.size());
|
||||
// Resize drawLists to match (reuses previous capacity)
|
||||
drawLists_.resize(visibleInstances_.size());
|
||||
|
||||
static const size_t minParallelCullInstances = std::max<size_t>(
|
||||
4, envSizeOrDefault("WOWEE_WMO_CULL_MT_MIN", 128));
|
||||
if (visibleInstances.size() >= minParallelCullInstances && numCullThreads_ > 1) {
|
||||
static const size_t minCullWorkPerThread = std::max<size_t>(
|
||||
16, envSizeOrDefault("WOWEE_WMO_CULL_WORK_PER_THREAD", 64));
|
||||
const size_t maxUsefulThreads = std::max<size_t>(
|
||||
1, (visibleInstances.size() + minCullWorkPerThread - 1) / minCullWorkPerThread);
|
||||
const size_t numThreads = std::min(static_cast<size_t>(numCullThreads_), maxUsefulThreads);
|
||||
if (numThreads <= 1) {
|
||||
for (size_t idx : visibleInstances) {
|
||||
drawLists.push_back(cullInstance(idx));
|
||||
}
|
||||
} else {
|
||||
const size_t chunkSize = visibleInstances.size() / numThreads;
|
||||
const size_t remainder = visibleInstances.size() % numThreads;
|
||||
|
||||
drawLists.resize(visibleInstances.size());
|
||||
|
||||
cullFutures_.clear();
|
||||
if (cullFutures_.capacity() < numThreads) {
|
||||
cullFutures_.reserve(numThreads);
|
||||
}
|
||||
|
||||
size_t start = 0;
|
||||
for (size_t t = 0; t < numThreads; ++t) {
|
||||
const size_t end = start + chunkSize + (t < remainder ? 1 : 0);
|
||||
cullFutures_.push_back(std::async(std::launch::async,
|
||||
[&, start, end]() {
|
||||
for (size_t j = start; j < end; ++j) {
|
||||
drawLists[j] = cullInstance(visibleInstances[j]);
|
||||
}
|
||||
}));
|
||||
start = end;
|
||||
}
|
||||
|
||||
for (auto& f : cullFutures_) {
|
||||
f.get();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t idx : visibleInstances)
|
||||
drawLists.push_back(cullInstance(idx));
|
||||
// Sequential culling (parallel dispatch overhead > savings for typical instance counts)
|
||||
for (size_t j = 0; j < visibleInstances_.size(); ++j) {
|
||||
cullInstance(visibleInstances_[j], drawLists_[j]);
|
||||
}
|
||||
|
||||
// ── Phase 2: Vulkan draw ────────────────────────────────
|
||||
|
|
@ -1484,7 +1443,7 @@ void WMORenderer::render(VkCommandBuffer cmd, VkDescriptorSet perFrameSet, const
|
|||
// Track which pipeline is currently bound: 0=opaque, 1=transparent, 2=glass
|
||||
int currentPipelineKind = 0;
|
||||
|
||||
for (const auto& dl : drawLists) {
|
||||
for (const auto& dl : drawLists_) {
|
||||
if (dl.instanceIndex >= instances.size()) continue;
|
||||
const auto& instance = instances[dl.instanceIndex];
|
||||
auto modelIt = loadedModels.find(instance.modelId);
|
||||
|
|
@ -2412,6 +2371,69 @@ VkTexture* WMORenderer::loadTexture(const std::string& path) {
|
|||
return rawPtr;
|
||||
}
|
||||
|
||||
void WMORenderer::backfillNormalMaps() {
|
||||
if (!normalMappingEnabled_ && !pomEnabled_) return;
|
||||
|
||||
if (!assetManager) return;
|
||||
|
||||
int generated = 0;
|
||||
for (auto& [key, entry] : textureCache) {
|
||||
if (entry.normalHeightMap) continue; // already has one
|
||||
if (!entry.texture) continue;
|
||||
|
||||
// Re-load the BLP from MPQ to get pixel data for normal map generation
|
||||
pipeline::BLPImage blp = assetManager->loadTexture(key);
|
||||
if (!blp.isValid() || blp.width == 0 || blp.height == 0) continue;
|
||||
|
||||
float variance = 0.0f;
|
||||
auto nhMap = generateNormalHeightMap(blp.data.data(), blp.width, blp.height, variance);
|
||||
if (nhMap) {
|
||||
entry.normalHeightMap = std::move(nhMap);
|
||||
entry.heightMapVariance = variance;
|
||||
generated++;
|
||||
}
|
||||
}
|
||||
|
||||
if (generated > 0) {
|
||||
VkDevice device = vkCtx_->getDevice();
|
||||
int rebound = 0;
|
||||
// Update merged batches: assign normal map pointer and rebind descriptor set
|
||||
for (auto& [modelId, model] : loadedModels) {
|
||||
for (auto& group : model.groups) {
|
||||
for (auto& mb : group.mergedBatches) {
|
||||
if (mb.normalHeightMap) continue; // already set
|
||||
if (!mb.texture) continue;
|
||||
// Find this texture in the cache
|
||||
for (const auto& [cacheKey, cacheEntry] : textureCache) {
|
||||
if (cacheEntry.texture.get() == mb.texture) {
|
||||
if (cacheEntry.normalHeightMap) {
|
||||
mb.normalHeightMap = cacheEntry.normalHeightMap.get();
|
||||
mb.heightMapVariance = cacheEntry.heightMapVariance;
|
||||
// Rebind descriptor set binding 2 to the real normal/height map
|
||||
if (mb.materialSet) {
|
||||
VkDescriptorImageInfo nhImgInfo = mb.normalHeightMap->descriptorInfo();
|
||||
VkWriteDescriptorSet write{};
|
||||
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
write.dstSet = mb.materialSet;
|
||||
write.dstBinding = 2;
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
write.descriptorCount = 1;
|
||||
write.pImageInfo = &nhImgInfo;
|
||||
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
||||
rebound++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
materialSettingsDirty_ = true;
|
||||
LOG_INFO("Backfilled ", generated, " normal/height maps (", rebound, " descriptor sets rebound) for deferred WMO textures");
|
||||
}
|
||||
}
|
||||
|
||||
// Ray-AABB intersection (slab method)
|
||||
// Returns true if the ray intersects the axis-aligned bounding box
|
||||
static bool rayIntersectsAABB(const glm::vec3& origin, const glm::vec3& dir,
|
||||
|
|
@ -3145,18 +3167,13 @@ bool WMORenderer::checkWallCollision(const glm::vec3& from, const glm::vec3& to,
|
|||
if (triHeight < 1.0f && tb.maxZ <= localFeetZ + 1.2f) continue;
|
||||
|
||||
// Use MOPY flags to filter wall collision.
|
||||
// Collidable triangles (flag 0x01) block the player — including
|
||||
// invisible collision walls (0x01 without 0x20) used in tunnels.
|
||||
// Skip detail/decorative geometry (0x04) and render-only surfaces.
|
||||
// Collide with triangles that have the collision flag (0x08) or no flags at all.
|
||||
// Skip detail/decorative (0x04) and render-only (0x20 without 0x08) surfaces.
|
||||
uint32_t triIdx = triStart / 3;
|
||||
if (!group.triMopyFlags.empty() && triIdx < group.triMopyFlags.size()) {
|
||||
uint8_t mopy = group.triMopyFlags[triIdx];
|
||||
if (mopy != 0) {
|
||||
bool collidable = (mopy & 0x01) != 0;
|
||||
bool detail = (mopy & 0x04) != 0;
|
||||
if (!collidable || detail) {
|
||||
continue;
|
||||
}
|
||||
if ((mopy & 0x04) || !(mopy & 0x08)) continue;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3217,8 +3234,8 @@ bool WMORenderer::checkWallCollision(const glm::vec3& from, const glm::vec3& to,
|
|||
if (absNz >= 0.35f) continue;
|
||||
|
||||
const float SKIN = 0.005f; // small separation so we don't re-collide immediately
|
||||
// Stronger push when inside WMO for more responsive indoor collision
|
||||
const float MAX_PUSH = insideWMO ? 0.35f : 0.15f;
|
||||
// Push must cover full penetration to prevent gradual clip-through
|
||||
const float MAX_PUSH = PLAYER_RADIUS;
|
||||
float penetration = (PLAYER_RADIUS - horizDist);
|
||||
float pushDist = glm::clamp(penetration + SKIN, 0.0f, MAX_PUSH);
|
||||
glm::vec2 pushDir2;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue