From bb6e625caff8b006201040194845bf06cc40e71a Mon Sep 17 00:00:00 2001 From: nyorain Date: Thu, 11 Jul 2024 19:50:31 +0200 Subject: [PATCH] Fix AccelStruct building issues --- src/accelStruct.cpp | 23 +++++++ src/accelStruct.hpp | 4 ++ src/cb.cpp | 5 ++ src/command/commands.cpp | 4 +- src/command/record.cpp | 1 + src/command/record.hpp | 7 +++ src/commandHook/hook.cpp | 15 ++--- src/commandHook/hook.hpp | 2 +- src/commandHook/record.cpp | 52 +++------------- src/commandHook/record.hpp | 19 ++++-- src/commandHook/submission.cpp | 95 +++++++++++----------------- src/device.cpp | 27 ++++---- src/gui/gui.cpp | 5 ++ src/layer.cpp | 17 ++++- src/queue.cpp | 109 +++++++++++++++++++-------------- src/queue.hpp | 1 + src/submit.cpp | 6 +- 17 files changed, 209 insertions(+), 183 deletions(-) diff --git a/src/accelStruct.cpp b/src/accelStruct.cpp index d6834415..96242688 100644 --- a/src/accelStruct.cpp +++ b/src/accelStruct.cpp @@ -29,6 +29,8 @@ AccelStruct& accelStructAt(Device& dev, VkDeviceAddress address) { } AccelStruct* tryAccelStructAtLocked(Device& dev, VkDeviceAddress address) { + ZoneScoped; + assertOwnedOrShared(dev.mutex); auto it = dev.accelStructAddresses.find(address); if(it == dev.accelStructAddresses.end()) { @@ -37,6 +39,22 @@ AccelStruct* tryAccelStructAtLocked(Device& dev, VkDeviceAddress address) { return it->second; } +std::unordered_map captureBLASesLocked(Device& dev) { + ZoneScoped; + + // TODO: could be optimized (a bit) by maintaing such a map. Then we would just have to copy it + assertOwnedOrShared(dev.mutex); + + std::unordered_map ret; + for(auto& as : dev.accelStructs.inner) { + if(as->effectiveType == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && as->pendingState) { + ret.emplace(as->deviceAddress, as->pendingState); + } + } + + return ret; +} + // building /* void writeAABBs(AccelStruct& accelStruct, unsigned id, @@ -189,6 +207,10 @@ IntrusivePtr createState(AccelStruct& accelStruct, const VkAccelerationStructureBuildRangeInfoKHR* buildRangeInfos) { auto& dev = *accelStruct.dev; + dlg_assert(info.type != VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR); + dlg_assert(accelStruct.effectiveType == VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR || + info.type == accelStruct.effectiveType); + accelStruct.effectiveType = info.type; if(info.geometryCount == 0u) { return {}; // TODO: still return state? @@ -331,6 +353,7 @@ VKAPI_ATTR VkResult VKAPI_CALL CreateAccelerationStructureKHR( accelStruct.buf = &buf; accelStruct.handle = *pAccelerationStructure; accelStruct.type = pCreateInfo->type; + accelStruct.effectiveType = accelStruct.type; accelStruct.offset = pCreateInfo->offset; accelStruct.size = pCreateInfo->size; diff --git a/src/accelStruct.hpp b/src/accelStruct.hpp index 755db7d4..b68e6772 100644 --- a/src/accelStruct.hpp +++ b/src/accelStruct.hpp @@ -98,6 +98,10 @@ AccelStruct& accelStructAt(Device& dev, VkDeviceAddress address); AccelStruct& accelStructAtLocked(Device& dev, VkDeviceAddress address); AccelStruct* tryAccelStructAtLocked(Device& dev, VkDeviceAddress address); +// Returns a map of all BLASes (that have been built) and their +// pending states at this point in time. +std::unordered_map captureBLASesLocked(Device& dev); + Mat4f toMat4f(const VkTransformMatrixKHR& src); // VK_KHR_acceleration_structure diff --git a/src/cb.cpp b/src/cb.cpp index 8da3b068..b0aa725f 100644 --- a/src/cb.cpp +++ b/src/cb.cpp @@ -2974,6 +2974,7 @@ VKAPI_ATTR void VKAPI_CALL CmdBuildAccelerationStructuresKHR( } buildInfo.pGeometries = dst.data(); + buildInfo.ppGeometries = nullptr; } if(buildInfo.srcAccelerationStructure) { @@ -3073,6 +3074,10 @@ VKAPI_ATTR void VKAPI_CALL CmdCopyAccelerationStructureKHR( useHandle(cb, cmd, *cmd.src); useHandle(cb, cmd, *cmd.dst); + auto& copy = cb.builder().record_->accelStructCopies.emplace_back(); + copy.src = cmd.src; + copy.dst = cmd.dst; + auto fwd = *pInfo; fwd.src = cmd.src->handle; fwd.dst = cmd.dst->handle; diff --git a/src/command/commands.cpp b/src/command/commands.cpp index ace76190..8440be40 100644 --- a/src/command/commands.cpp +++ b/src/command/commands.cpp @@ -1909,8 +1909,8 @@ void CopyAccelStructCmd::record(const Device& dev, VkCommandBuffer cb, u32) cons VkCopyAccelerationStructureInfoKHR info {}; info.sType = VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR; info.pNext = pNext; - info.dst = src->handle; - info.src = dst->handle; + info.src = src->handle; + info.dst = dst->handle; info.mode = mode; dev.dispatch.CmdCopyAccelerationStructureKHR(cb, &info); } diff --git a/src/command/record.cpp b/src/command/record.cpp index f8ca6cc4..47d81af8 100644 --- a/src/command/record.cpp +++ b/src/command/record.cpp @@ -53,6 +53,7 @@ CommandRecord::CommandRecord(ManualTag, Device* xdev) : queueFamily(0u), // initialize allocators pushLables(alloc), + accelStructCopies(alloc), used(alloc), secondaries(alloc) { ++DebugStats::get().aliveRecords; diff --git a/src/command/record.hpp b/src/command/record.hpp index d551b9b4..3aa0bc8a 100644 --- a/src/command/record.hpp +++ b/src/command/record.hpp @@ -262,6 +262,11 @@ inline auto find(CommandAllocHashSet& use return it; } +struct AccelStructCopy { + AccelStruct* src; + AccelStruct* dst; +}; + // Represents the recorded state of a command buffer. // We represent it as extra, reference-counted object so we can display // old records as well. @@ -306,6 +311,8 @@ struct CommandRecord { u32 numPopLabels {}; CommandAllocList pushLables; + CommandAllocList accelStructCopies; + struct UsedHandles { // NOTE: change this when adding maps here! // Important for CmdExecuteCommands impl assert diff --git a/src/commandHook/hook.cpp b/src/commandHook/hook.cpp index ae827732..05facd6f 100644 --- a/src/commandHook/hook.cpp +++ b/src/commandHook/hook.cpp @@ -377,7 +377,7 @@ void CommandHook::hook(QueueSubmitter& subm) { std::lock_guard lock(dev.mutex); // fast early-outs - if(localCaptures_.empty()) { + if(localCaptures_.empty() && !forceHook.load()) { auto hasBuildCmd = false; for(auto [subID, sub] : enumerate(subm.dstBatch->submissions)) { auto& cmdSub = std::get(sub.data); @@ -389,7 +389,7 @@ void CommandHook::hook(QueueSubmitter& subm) { } } - if(!hasBuildCmd && (target_.type == TargetType::none || freeze.load())) { + if(!hasBuildCmd && (target_.type == TargetType::none)) { return; } } @@ -520,7 +520,7 @@ void CommandHook::hook(QueueSubmitter& subm) { } } - if(!hookData && rec.buildsAccelStructs && hookAccelStructBuilds) { + if(!hookData && (forceHook.load() || (rec.buildsAccelStructs && hookAccelStructBuilds))) { dlg_assert(!hooked); hooked = doHook(rec, {}, 0.f, sub, hookData); } @@ -794,12 +794,9 @@ VkCommandBuffer CommandHook::doHook(CommandRecord& record, continue; } - // We want to capture an acceleration structure. - // CommandHookSubmission::{activate, finish} might write into - // the dst state, assumes it wasn't used before. - // PERF: we could make this work in theory. Might need - // more complicated state re-using logic - if(!hookRecord->accelStructCaptures.empty()) { + // Accel Structure stuff re-use is hard due to ordering. + // PERF: we could make some of this work in theory. + if(!hookRecord->accelStructOps.empty()) { continue; } diff --git a/src/commandHook/hook.hpp b/src/commandHook/hook.hpp index 468bf905..932ce6d6 100644 --- a/src/commandHook/hook.hpp +++ b/src/commandHook/hook.hpp @@ -182,7 +182,7 @@ struct CommandHook { // Mainly useful for debugging, should always be true otherwise // as we need it to have accelStruct data. - bool hookAccelStructBuilds {true}; + std::atomic hookAccelStructBuilds {true}; public: CommandHook(Device& dev); diff --git a/src/commandHook/record.cpp b/src/commandHook/record.cpp index 3fde7555..e729ae9e 100644 --- a/src/commandHook/record.cpp +++ b/src/commandHook/record.cpp @@ -523,6 +523,7 @@ void CommandHookRecord::hookRecord(Command* cmd, RecordInfo& info) { while(cmd) { // check if command needs additional, manual hook if(cmd->category() == CommandCategory::buildAccelStruct && hook->hookAccelStructBuilds) { + auto* basCmd = commandCast(cmd); auto* basCmdIndirect = commandCast(cmd); dlg_assert(basCmd || basCmdIndirect); @@ -538,6 +539,10 @@ void CommandHookRecord::hookRecord(Command* cmd, RecordInfo& info) { info.rebindComputeState = true; } + if(auto* cas = commandCast(cmd); cas) { + accelStructOps.push_back(AccelStructCopy{cas->src, cas->dst}); + } + // check if command is on hooking chain if(info.nextHookLevel < hcommand.size() && cmd == hcommand[info.nextHookLevel]) { auto hookDst = (info.nextHookLevel == hcommand.size() - 1); @@ -754,34 +759,9 @@ void CommandHookRecord::copyDs(Command& bcmd, RecordInfo& info, using CapturedAccelStruct = CommandHookState::CapturedAccelStruct; auto& dstCapture = dst.data.emplace(); + (void) dstCapture; - // We only have to store the state now if the tlas has been - // previously built in this record. Otherwise we cannot know - // its state at execution time right now. It will get captured - // in CommandHookSubmission::finish - auto tlasBuildPtr = lastAccelStructBuild(elem.accelStruct->deviceAddress); - if(tlasBuildPtr) { - dstCapture.tlas = std::move(tlasBuildPtr); - - // NOTE: only capture the blases built by this record. - // The rest will be captured in CommandHookSubmission::finish. - // For BLASes not built in this record, we cannot tell their - // pending state at execution time for sure. - // We just write everything we build into the map, we cannot - // know which blases are referenced by the tlas at this point on cpu. - // Yes, this might include blas builds *after* the TLAS was build. - // But when a blas was built that is inside the tlas, - // the tlas would be invalid at this point (application error). - for(auto& cmd : accelStructBuilds) { - for(auto& build : cmd.builds) { - dlg_assert_or(build.dst && build.state, continue); - auto blasAddress = build.dst->deviceAddress; - dstCapture.blases.insert({blasAddress, build.state}); - } - } - } - - accelStructCaptures.push_back({dstID, elem.accelStruct}); + accelStructOps.push_back(AccelStructCapture{dstID, elem.accelStruct}); } else if(cat == DescriptorCategory::bufferView) { // TODO: copy as buffer or image? maybe best to copy // as buffer but then create bufferView on our own? @@ -1270,7 +1250,8 @@ void CommandHookRecord::hookBefore(const BuildAccelStructsCmd& cmd) { auto& cmdHook = *dev.commandHook; - auto& build = accelStructBuilds.emplace_back(); + auto& ops = accelStructOps.emplace_back(); + auto& build = ops.emplace(); build.command = &cmd; // TODO 1. Make sure all data has been written via memory barrier. @@ -1425,21 +1406,6 @@ void CommandHookRecord::hookBefore(const BuildAccelStructsIndirectCmd& cmd) { dlg_error("TODO: implement support for copying BuildAccelStructsIndirectCmd data"); } -IntrusivePtr CommandHookRecord::lastAccelStructBuild(u64 accelStructAddress) { - // TODO PERF: could be solved more efficiently with an unordered map - // might be relevant when there are many (thousand) builds - for(auto& cmd : accelStructBuilds) { - for(auto& build : cmd.builds) { - dlg_assert(build.dst); - if(build.dst->deviceAddress == accelStructAddress) { - return build.state; - } - } - } - - return {}; -} - void CommandHookRecord::finish() noexcept { // NOTE: We don't do this since we can assume the record to remain // valid until all submissions are finished. We can assume it to diff --git a/src/commandHook/record.hpp b/src/commandHook/record.hpp index fe184da8..a93a67fa 100644 --- a/src/commandHook/record.hpp +++ b/src/commandHook/record.hpp @@ -65,20 +65,31 @@ struct CommandHookRecord { struct Build { AccelStruct* dst; - IntrusivePtr state; + AccelStructStatePtr state; }; std::vector builds; }; - std::vector accelStructBuilds; - struct AccelStructCapture { unsigned id; // index into state->copiedDescriptors AccelStruct* accelStruct; }; - std::vector accelStructCaptures; + struct AccelStructCopy { + AccelStruct* src {}; + AccelStruct* dst {}; + AccelStructStatePtr state {}; // filled in later by CommandHookSubmission + }; + + using AccelStructOp = std::variant< + AccelStructBuild, + AccelStructCapture, + AccelStructCopy + >; + + // Order here is important, ops might depend on each other + std::vector accelStructOps; // Needed for image to buffer sample-copying std::vector descriptorSets; diff --git a/src/commandHook/submission.cpp b/src/commandHook/submission.cpp index 50a7b566..20cbfca7 100644 --- a/src/commandHook/submission.cpp +++ b/src/commandHook/submission.cpp @@ -13,7 +13,6 @@ namespace vil { CommandHookSubmission::CommandHookSubmission(CommandHookRecord& rec, Submission& subm, CommandDescriptorSnapshot descriptors) : record(&rec), descriptorSnapshot(std::move(descriptors)) { - dlg_assert(rec.state || !rec.accelStructBuilds.empty()); dlg_assert(!rec.writer); rec.writer = &subm; } @@ -35,11 +34,30 @@ CommandHookSubmission::~CommandHookSubmission() { } void CommandHookSubmission::activate() { - // set AccelStruct::pendingState for all build accelStructs. - for(auto& buildCmd : record->accelStructBuilds) { - for(auto& build : buildCmd.builds) { - dlg_assert(build.dst); - build.dst->pendingState = build.state; + for(auto& op : record->accelStructOps) { + if(auto* buildOp = std::get_if(&op); buildOp) { + for(auto& build : buildOp->builds) { + dlg_assert(build.dst); + build.dst->pendingState = build.state; + } + } else if(auto* copy = std::get_if(&op); copy) { + dlg_assert(copy->src->pendingState); + copy->state = copy->src->pendingState; + copy->dst->pendingState = copy->src->pendingState; + } else if(auto* capture = std::get_if(&op); capture) { + dlg_assert(record->state); + + auto& dst = record->state->copiedDescriptors[capture->id]; + auto& dstCapture = std::get(dst.data); + + dlg_assert(capture->accelStruct->pendingState); + dstCapture.tlas = capture->accelStruct->pendingState; + + // NOTE: this can be quite expensive, in the case of many BLASes + // This cannot really be optimized though. At this point in time we might + // not know the current TLAS instances and we do not want to capture the BLASes + // at any later time since they might have been invalidated then already. + dstCapture.blases = captureBLASesLocked(*record->hook->dev_); } } } @@ -124,51 +142,6 @@ void CommandHookSubmission::finish(Submission& subm) { } } - // capture pending accelStruct states - // TODO: logically, we should do this in activate(). Here, the blas - // might already have gotten new states. But in activate() we might not - // know the instances on CPU side, as the TLAS might still be building. - // On the other hand, doing this here should be *relatively* safe as - // the tlas is invalidated when a blas is rebuilt. - // We should guarantee that submissions are always finished in order - // (submission order?) for stronger guarantees here. - // Alternatively, we could capture a map of ALL blases to their state - // in activate (meh). - auto& dev = *record->hook->dev_; - for(auto& capture : record->accelStructCaptures) { - dlg_assert(capture.id < record->state->copiedDescriptors.size()); - dlg_assert(capture.accelStruct); - dlg_assert(capture.accelStruct->effectiveType == - VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR); - - using CapturedAccelStruct = CommandHookState::CapturedAccelStruct; - - auto& dstDescriptor = record->state->copiedDescriptors[capture.id]; - auto& dstCapture = std::get(dstDescriptor.data); - if(!dstCapture.tlas) { // already set when built during commandBuffer - dstCapture.tlas = capture.accelStruct->pendingState; - } - - auto& tlasState = *dstCapture.tlas; - // TODO: this can probably fail due to missing finish ordering. - dlg_assert(tlasState.built); - - auto& inis = std::get(tlasState.data); - for(auto ini : inis.instances) { - auto address = ini.accelerationStructureReference; - if(address) { - auto* accelStruct = tryAccelStructAtLocked(dev, address); - dlg_assertm(accelStruct, "Invalid BLAS in TLAS"); - if(accelStruct) { - // NOTE: we use insert here by design. When the blas was - // built by the given commandBuffer, it was already - // inserted during record-hook-recording. - dstCapture.blases.insert({address, accelStruct->lastValid}); - } - } - } - } - finishAccelStructBuilds(); CompletedHook* dstCompleted {}; @@ -246,17 +219,21 @@ void CommandHookSubmission::finishAccelStructBuilds() { // Notify all accel struct builds that they have finished. // We are guaranteed by the standard that all accelStructs build // by the submission are still valid at this point. - for(auto& buildCmd : record->accelStructBuilds) { - for(auto& build : buildCmd.builds) { - dlg_assert(build.state); - build.state->built = true; + for(auto& op : record->accelStructOps) { + if(auto* buildOp = std::get_if(&op); buildOp) { + for(auto& build : buildOp->builds) { + dlg_assert(build.state); + build.state->built = true; - // TODO: needed? - build.state->buffer.invalidateMap(); + // TODO: needed? + build.state->buffer.invalidateMap(); - if(build.dst->pendingState == build.state) { - build.dst->lastValid = build.dst->pendingState; + build.dst->lastValid = build.state; } + } else if(auto* copy = std::get_if(&op); copy) { + dlg_assert(copy->dst->pendingState); + dlg_assert(copy->state); + copy->dst->lastValid = copy->state; } } } diff --git a/src/device.cpp b/src/device.cpp index 493f1d6f..e0f7c5fc 100644 --- a/src/device.cpp +++ b/src/device.cpp @@ -820,22 +820,17 @@ VkResult doCreateDevice( // TODO: no idea exactly why this is needed. I guess they should not be // part of the device loader table in the first place? // Might be related: https://github.com/KhronosGroup/Vulkan-Loader/issues/116 - dev.dispatch.QueueBeginDebugUtilsLabelEXT = (PFN_vkQueueBeginDebugUtilsLabelEXT) - fpGetInstanceProcAddr(ini.handle, "vkQueueBeginDebugUtilsLabelEXT"); - dev.dispatch.QueueEndDebugUtilsLabelEXT = (PFN_vkQueueEndDebugUtilsLabelEXT) - fpGetInstanceProcAddr(ini.handle, "vkQueueEndDebugUtilsLabelEXT"); - dev.dispatch.CmdInsertDebugUtilsLabelEXT = (PFN_vkCmdInsertDebugUtilsLabelEXT) - fpGetInstanceProcAddr(ini.handle, "vkCmdInsertDebugUtilsLabelEXT"); - dev.dispatch.CmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT) - fpGetInstanceProcAddr(ini.handle, "vkCmdBeginDebugUtilsLabelEXT"); - dev.dispatch.CmdEndDebugUtilsLabelEXT = (PFN_vkCmdEndDebugUtilsLabelEXT) - fpGetInstanceProcAddr(ini.handle, "vkCmdEndDebugUtilsLabelEXT"); - dev.dispatch.SetDebugUtilsObjectNameEXT = (PFN_vkSetDebugUtilsObjectNameEXT) - fpGetInstanceProcAddr(ini.handle, "vkSetDebugUtilsObjectNameEXT"); - dev.dispatch.SetDebugUtilsObjectTagEXT = (PFN_vkSetDebugUtilsObjectTagEXT) - fpGetInstanceProcAddr(ini.handle, "vkSetDebugUtilsObjectTagEXT"); - dev.dispatch.QueueInsertDebugUtilsLabelEXT = (PFN_vkQueueInsertDebugUtilsLabelEXT) - fpGetInstanceProcAddr(ini.handle, "vkQueueInsertDebugUtilsLabelEXT"); +#define LOAD_FALLBACK_INI(name) if(!dev.dispatch.name) dev.dispatch.name = (PFN_vk##name) \ + fpGetInstanceProcAddr(ini.handle, "vk" #name) + + LOAD_FALLBACK_INI(QueueBeginDebugUtilsLabelEXT); + LOAD_FALLBACK_INI(QueueEndDebugUtilsLabelEXT); + LOAD_FALLBACK_INI(CmdInsertDebugUtilsLabelEXT); + LOAD_FALLBACK_INI(CmdBeginDebugUtilsLabelEXT); + LOAD_FALLBACK_INI(CmdEndDebugUtilsLabelEXT); + LOAD_FALLBACK_INI(SetDebugUtilsObjectNameEXT); + LOAD_FALLBACK_INI(SetDebugUtilsObjectTagEXT); + LOAD_FALLBACK_INI(QueueInsertDebugUtilsLabelEXT); // NOTE: not sure if this is needed actually. // Should do it for all commands that need it for now. diff --git a/src/gui/gui.cpp b/src/gui/gui.cpp index e7fa6b59..51a03583 100644 --- a/src/gui/gui.cpp +++ b/src/gui/gui.cpp @@ -1129,6 +1129,11 @@ void Gui::drawOverviewUI(Draw& draw) { if(ImGui::Checkbox("Allow hook record reuse", &allow)) { dev.commandHook->allowReuse.store(allow); } + + auto hookAccel = dev.commandHook->hookAccelStructBuilds.load(); + if(ImGui::Checkbox("Hook AccelerationStructures", &hookAccel)) { + dev.commandHook->hookAccelStructBuilds.store(hookAccel); + } } } diff --git a/src/layer.cpp b/src/layer.cpp index 9b8131db..554a906e 100644 --- a/src/layer.cpp +++ b/src/layer.cpp @@ -45,6 +45,7 @@ namespace vil { // Util static auto dlgWarnErrorCount = 0u; static auto breakOnError = false; +static auto dlgMinLevel = dlg_level_trace; // TODO: doesn't belong here std::mutex ThreadContext::mutex_; @@ -56,6 +57,10 @@ void dlgHandler(const struct dlg_origin* origin, const char* string, void* data) // (void) origin; #ifndef DLG_DISABLE + if(origin->level < dlgMinLevel) { + return; + } + dlg_default_output(origin, string, data); if(origin->level >= dlg_level_warn) { @@ -172,8 +177,16 @@ VkResult doCreateInstance( } #ifndef DLG_DISABLE - if(checkEnvBinary("VIL_DLG_HANDLER", false) || breakOnError) - { + if(checkEnvBinary("VIL_DLG_HANDLER", false) || breakOnError) { + if(const char* minLevel = std::getenv("VIL_MIN_LOG_LEVEL"); minLevel) { + if(std::strcmp(minLevel, "trace")) dlgMinLevel = dlg_level_warn; + else if(std::strcmp(minLevel, "debug")) dlgMinLevel = dlg_level_debug; + else if(std::strcmp(minLevel, "info")) dlgMinLevel = dlg_level_info; + else if(std::strcmp(minLevel, "warn")) dlgMinLevel = dlg_level_warn; + else if(std::strcmp(minLevel, "error")) dlgMinLevel = dlg_level_error; + else dlg_error("Invalid value for VIL_MIN_LOG_LEVEL: {}", minLevel); + } + dlg_set_handler(dlgHandler, nullptr); #ifdef _WIN32 AllocConsole(); diff --git a/src/queue.cpp b/src/queue.cpp index 9107a274..43100cb7 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -19,46 +19,8 @@ namespace vil { -std::optional checkLocked(SubmissionBatch& batch) { - ZoneScoped; - +void finish(SubmissionBatch& batch) { auto& dev = *batch.queue->dev; - assertOwned(dev.mutex); - - // If a submission in the batch isn't even active yet, it can't - // be finished. We don't have to check it, but it's an optimization. - // NOTE: this is important for integration testing though. The - // mock icd always returns VK_SUCCESS for fences. - for(auto& sub : batch.submissions) { - if(!sub.active) { - return std::nullopt; - } - } - - if(batch.appFence) { - auto res = dev.dispatch.GetFenceStatus(dev.handle, batch.appFence->handle); - if(res != VK_SUCCESS) { - if(res == VK_ERROR_DEVICE_LOST) { - onDeviceLost(dev); - } - - return std::nullopt; - } - } else { - dlg_assert(batch.ourFence); - auto res = dev.dispatch.GetFenceStatus(dev.handle, batch.ourFence); - if(res != VK_SUCCESS) { - if(res == VK_ERROR_DEVICE_LOST) { - onDeviceLost(dev); - } - - return std::nullopt; - } - } - // apparently unique_ptr == ptr comparision not supported in stdlibc++ yet? - auto finder = [&](auto& ptr){ return ptr.get() == &batch; }; - auto it = std::find_if(dev.pending.begin(), dev.pending.end(), finder); - dlg_assert(it != dev.pending.end()); for(auto& sub : batch.submissions) { dlg_assert(sub.active); @@ -66,14 +28,20 @@ std::optional checkLocked(SubmissionBatch& batch) { // process finished records if(batch.type == SubmissionType::command) { auto& cmdSub = std::get(sub.data); - for(auto& [cb, hookData] : cmdSub.cbs) { - if(hookData) { - hookData->finish(sub); + for(auto& scb : cmdSub.cbs) { + if(scb.hook) { + scb.hook->finish(sub); + } else { + auto& accelStructCopies = scb.cb->lastRecordLocked()->accelStructCopies; + dlg_assert(accelStructCopies.size() == scb.accelStructCopies.size()); + for(auto [i, copy] : enumerate(accelStructCopies)) { + copy.dst->lastValid = scb.accelStructCopies[i]; + } } - auto it2 = std::find(cb->pending.begin(), cb->pending.end(), &sub); - dlg_assert(it2 != cb->pending.end()); - cb->pending.erase(it2); + auto it2 = std::find(scb.cb->pending.begin(), scb.cb->pending.end(), &sub); + dlg_assert(it2 != scb.cb->pending.end()); + scb.cb->pending.erase(it2); } } @@ -135,6 +103,50 @@ std::optional checkLocked(SubmissionBatch& batch) { sem.signals.erase(it); } } +} + +std::optional checkLocked(SubmissionBatch& batch) { + ZoneScoped; + + auto& dev = *batch.queue->dev; + assertOwned(dev.mutex); + + // If a submission in the batch isn't even active yet, it can't + // be finished. We don't have to check it, but it's an optimization. + // NOTE: this is important for integration testing though. The + // mock icd always returns VK_SUCCESS for fences. + for(auto& sub : batch.submissions) { + if(!sub.active) { + return std::nullopt; + } + } + + if(batch.appFence) { + auto res = dev.dispatch.GetFenceStatus(dev.handle, batch.appFence->handle); + if(res != VK_SUCCESS) { + if(res == VK_ERROR_DEVICE_LOST) { + onDeviceLost(dev); + } + + return std::nullopt; + } + } else { + dlg_assert(batch.ourFence); + auto res = dev.dispatch.GetFenceStatus(dev.handle, batch.ourFence); + if(res != VK_SUCCESS) { + if(res == VK_ERROR_DEVICE_LOST) { + onDeviceLost(dev); + } + + return std::nullopt; + } + } + // apparently unique_ptr == ptr comparision not supported in stdlibc++ yet? + auto finder = [&](auto& ptr){ return ptr.get() == &batch; }; + auto it = std::find_if(dev.pending.begin(), dev.pending.end(), finder); + dlg_assert(it != dev.pending.end()); + + finish(batch); if(batch.ourFence) { dev.dispatch.ResetFences(dev.handle, 1, &batch.ourFence); @@ -554,6 +566,13 @@ void activateLocked(CommandSubmission& cmdSub) { // notify hooks that submission was activated if(scb.hook) { scb.hook->activate(); + } else { + dlg_assert(scb.accelStructCopies.empty()); + for(auto& copy : recPtr->accelStructCopies) { + dlg_assert(copy.src->pendingState); + copy.dst->pendingState = copy.src->pendingState; + scb.accelStructCopies.push_back(copy.src->pendingState); + } } // store pending layouts for(auto& ui : recPtr->used.images) { diff --git a/src/queue.hpp b/src/queue.hpp index bf40f7d0..104ee928 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -66,6 +66,7 @@ struct QueueFamily { struct SubmittedCommandBuffer { CommandBuffer* cb {}; std::unique_ptr hook; // optional + std::vector accelStructCopies; // size() == cb->rec->accelStructCopies.size() SubmittedCommandBuffer(); SubmittedCommandBuffer(SubmittedCommandBuffer&&) noexcept = default; diff --git a/src/submit.cpp b/src/submit.cpp index a74249a6..d7fdf631 100644 --- a/src/submit.cpp +++ b/src/submit.cpp @@ -851,7 +851,8 @@ bool potentiallyWritesLocked(const Submission& subm, const Image* img, const Buf if(subm.parent->type == SubmissionType::command) { auto& cmdSub = std::get(subm.data); - for(auto& [cb, _] : cmdSub.cbs) { + for(auto& scb : cmdSub.cbs) { + auto& cb = scb.cb; auto& rec = *cb->lastRecordLocked(); if(buf) { @@ -953,7 +954,8 @@ std::vector needsSyncLocked(const SubmissionBatch& pending, c // writen by the submission if(subm.parent->type == SubmissionType::command) { auto& cmdSub = std::get(subm.data); - for(auto& [_, hookPtr] : cmdSub.cbs) { + for(auto& scb : cmdSub.cbs) { + auto& hookPtr = scb.hook; // TODO(perf): we might not need sync in all cases. Pretty much only // for images and xfb buffers I guess. if(hookPtr && hookPtr->record->state.get() == draw.usedHookState.get()) {