Skip to content

Commit

Permalink
Fix AccelStruct building issues
Browse files Browse the repository at this point in the history
  • Loading branch information
nyorain committed Jul 11, 2024
1 parent 1197472 commit bb6e625
Show file tree
Hide file tree
Showing 17 changed files with 209 additions and 183 deletions.
23 changes: 23 additions & 0 deletions src/accelStruct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ AccelStruct& accelStructAt(Device& dev, VkDeviceAddress address) {
}

AccelStruct* tryAccelStructAtLocked(Device& dev, VkDeviceAddress address) {
ZoneScoped;

assertOwnedOrShared(dev.mutex);
auto it = dev.accelStructAddresses.find(address);
if(it == dev.accelStructAddresses.end()) {
Expand All @@ -37,6 +39,22 @@ AccelStruct* tryAccelStructAtLocked(Device& dev, VkDeviceAddress address) {
return it->second;
}

std::unordered_map<VkDeviceAddress, AccelStructStatePtr> captureBLASesLocked(Device& dev) {
ZoneScoped;

// TODO: could be optimized (a bit) by maintaing such a map. Then we would just have to copy it
assertOwnedOrShared(dev.mutex);

std::unordered_map<VkDeviceAddress, AccelStructStatePtr> ret;
for(auto& as : dev.accelStructs.inner) {
if(as->effectiveType == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR && as->pendingState) {
ret.emplace(as->deviceAddress, as->pendingState);
}
}

return ret;
}

// building
/*
void writeAABBs(AccelStruct& accelStruct, unsigned id,
Expand Down Expand Up @@ -189,6 +207,10 @@ IntrusivePtr<AccelStructState> createState(AccelStruct& accelStruct,
const VkAccelerationStructureBuildRangeInfoKHR* buildRangeInfos) {
auto& dev = *accelStruct.dev;

dlg_assert(info.type != VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR);
dlg_assert(accelStruct.effectiveType == VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR ||
info.type == accelStruct.effectiveType);

accelStruct.effectiveType = info.type;
if(info.geometryCount == 0u) {
return {}; // TODO: still return state?
Expand Down Expand Up @@ -331,6 +353,7 @@ VKAPI_ATTR VkResult VKAPI_CALL CreateAccelerationStructureKHR(
accelStruct.buf = &buf;
accelStruct.handle = *pAccelerationStructure;
accelStruct.type = pCreateInfo->type;
accelStruct.effectiveType = accelStruct.type;
accelStruct.offset = pCreateInfo->offset;
accelStruct.size = pCreateInfo->size;

Expand Down
4 changes: 4 additions & 0 deletions src/accelStruct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ AccelStruct& accelStructAt(Device& dev, VkDeviceAddress address);
AccelStruct& accelStructAtLocked(Device& dev, VkDeviceAddress address);
AccelStruct* tryAccelStructAtLocked(Device& dev, VkDeviceAddress address);

// Returns a map of all BLASes (that have been built) and their
// pending states at this point in time.
std::unordered_map<VkDeviceAddress, AccelStructStatePtr> captureBLASesLocked(Device& dev);

Mat4f toMat4f(const VkTransformMatrixKHR& src);

// VK_KHR_acceleration_structure
Expand Down
5 changes: 5 additions & 0 deletions src/cb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2974,6 +2974,7 @@ VKAPI_ATTR void VKAPI_CALL CmdBuildAccelerationStructuresKHR(
}

buildInfo.pGeometries = dst.data();
buildInfo.ppGeometries = nullptr;
}

if(buildInfo.srcAccelerationStructure) {
Expand Down Expand Up @@ -3073,6 +3074,10 @@ VKAPI_ATTR void VKAPI_CALL CmdCopyAccelerationStructureKHR(
useHandle(cb, cmd, *cmd.src);
useHandle(cb, cmd, *cmd.dst);

auto& copy = cb.builder().record_->accelStructCopies.emplace_back();
copy.src = cmd.src;
copy.dst = cmd.dst;

auto fwd = *pInfo;
fwd.src = cmd.src->handle;
fwd.dst = cmd.dst->handle;
Expand Down
4 changes: 2 additions & 2 deletions src/command/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1909,8 +1909,8 @@ void CopyAccelStructCmd::record(const Device& dev, VkCommandBuffer cb, u32) cons
VkCopyAccelerationStructureInfoKHR info {};
info.sType = VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR;
info.pNext = pNext;
info.dst = src->handle;
info.src = dst->handle;
info.src = src->handle;
info.dst = dst->handle;
info.mode = mode;
dev.dispatch.CmdCopyAccelerationStructureKHR(cb, &info);
}
Expand Down
1 change: 1 addition & 0 deletions src/command/record.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ CommandRecord::CommandRecord(ManualTag, Device* xdev) :
queueFamily(0u),
// initialize allocators
pushLables(alloc),
accelStructCopies(alloc),
used(alloc),
secondaries(alloc) {
++DebugStats::get().aliveRecords;
Expand Down
7 changes: 7 additions & 0 deletions src/command/record.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,11 @@ inline auto find(CommandAllocHashSet<UsedDescriptorSet, UsedDescriptorHash>& use
return it;
}

struct AccelStructCopy {
AccelStruct* src;
AccelStruct* dst;
};

// Represents the recorded state of a command buffer.
// We represent it as extra, reference-counted object so we can display
// old records as well.
Expand Down Expand Up @@ -306,6 +311,8 @@ struct CommandRecord {
u32 numPopLabels {};
CommandAllocList<const char*> pushLables;

CommandAllocList<AccelStructCopy> accelStructCopies;

struct UsedHandles {
// NOTE: change this when adding maps here!
// Important for CmdExecuteCommands impl assert
Expand Down
15 changes: 6 additions & 9 deletions src/commandHook/hook.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ void CommandHook::hook(QueueSubmitter& subm) {
std::lock_guard lock(dev.mutex);

// fast early-outs
if(localCaptures_.empty()) {
if(localCaptures_.empty() && !forceHook.load()) {
auto hasBuildCmd = false;
for(auto [subID, sub] : enumerate(subm.dstBatch->submissions)) {
auto& cmdSub = std::get<CommandSubmission>(sub.data);
Expand All @@ -389,7 +389,7 @@ void CommandHook::hook(QueueSubmitter& subm) {
}
}

if(!hasBuildCmd && (target_.type == TargetType::none || freeze.load())) {
if(!hasBuildCmd && (target_.type == TargetType::none)) {
return;
}
}
Expand Down Expand Up @@ -520,7 +520,7 @@ void CommandHook::hook(QueueSubmitter& subm) {
}
}

if(!hookData && rec.buildsAccelStructs && hookAccelStructBuilds) {
if(!hookData && (forceHook.load() || (rec.buildsAccelStructs && hookAccelStructBuilds))) {
dlg_assert(!hooked);
hooked = doHook(rec, {}, 0.f, sub, hookData);
}
Expand Down Expand Up @@ -794,12 +794,9 @@ VkCommandBuffer CommandHook::doHook(CommandRecord& record,
continue;
}

// We want to capture an acceleration structure.
// CommandHookSubmission::{activate, finish} might write into
// the dst state, assumes it wasn't used before.
// PERF: we could make this work in theory. Might need
// more complicated state re-using logic
if(!hookRecord->accelStructCaptures.empty()) {
// Accel Structure stuff re-use is hard due to ordering.
// PERF: we could make some of this work in theory.
if(!hookRecord->accelStructOps.empty()) {
continue;
}

Expand Down
2 changes: 1 addition & 1 deletion src/commandHook/hook.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ struct CommandHook {

// Mainly useful for debugging, should always be true otherwise
// as we need it to have accelStruct data.
bool hookAccelStructBuilds {true};
std::atomic<bool> hookAccelStructBuilds {true};

public:
CommandHook(Device& dev);
Expand Down
52 changes: 9 additions & 43 deletions src/commandHook/record.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ void CommandHookRecord::hookRecord(Command* cmd, RecordInfo& info) {
while(cmd) {
// check if command needs additional, manual hook
if(cmd->category() == CommandCategory::buildAccelStruct && hook->hookAccelStructBuilds) {

auto* basCmd = commandCast<BuildAccelStructsCmd*>(cmd);
auto* basCmdIndirect = commandCast<BuildAccelStructsCmd*>(cmd);
dlg_assert(basCmd || basCmdIndirect);
Expand All @@ -538,6 +539,10 @@ void CommandHookRecord::hookRecord(Command* cmd, RecordInfo& info) {
info.rebindComputeState = true;
}

if(auto* cas = commandCast<CopyAccelStructCmd*>(cmd); cas) {
accelStructOps.push_back(AccelStructCopy{cas->src, cas->dst});
}

// check if command is on hooking chain
if(info.nextHookLevel < hcommand.size() && cmd == hcommand[info.nextHookLevel]) {
auto hookDst = (info.nextHookLevel == hcommand.size() - 1);
Expand Down Expand Up @@ -754,34 +759,9 @@ void CommandHookRecord::copyDs(Command& bcmd, RecordInfo& info,

using CapturedAccelStruct = CommandHookState::CapturedAccelStruct;
auto& dstCapture = dst.data.emplace<CapturedAccelStruct>();
(void) dstCapture;

// We only have to store the state now if the tlas has been
// previously built in this record. Otherwise we cannot know
// its state at execution time right now. It will get captured
// in CommandHookSubmission::finish
auto tlasBuildPtr = lastAccelStructBuild(elem.accelStruct->deviceAddress);
if(tlasBuildPtr) {
dstCapture.tlas = std::move(tlasBuildPtr);

// NOTE: only capture the blases built by this record.
// The rest will be captured in CommandHookSubmission::finish.
// For BLASes not built in this record, we cannot tell their
// pending state at execution time for sure.
// We just write everything we build into the map, we cannot
// know which blases are referenced by the tlas at this point on cpu.
// Yes, this might include blas builds *after* the TLAS was build.
// But when a blas was built that is inside the tlas,
// the tlas would be invalid at this point (application error).
for(auto& cmd : accelStructBuilds) {
for(auto& build : cmd.builds) {
dlg_assert_or(build.dst && build.state, continue);
auto blasAddress = build.dst->deviceAddress;
dstCapture.blases.insert({blasAddress, build.state});
}
}
}

accelStructCaptures.push_back({dstID, elem.accelStruct});
accelStructOps.push_back(AccelStructCapture{dstID, elem.accelStruct});
} else if(cat == DescriptorCategory::bufferView) {
// TODO: copy as buffer or image? maybe best to copy
// as buffer but then create bufferView on our own?
Expand Down Expand Up @@ -1270,7 +1250,8 @@ void CommandHookRecord::hookBefore(const BuildAccelStructsCmd& cmd) {

auto& cmdHook = *dev.commandHook;

auto& build = accelStructBuilds.emplace_back();
auto& ops = accelStructOps.emplace_back();
auto& build = ops.emplace<AccelStructBuild>();
build.command = &cmd;

// TODO 1. Make sure all data has been written via memory barrier.
Expand Down Expand Up @@ -1425,21 +1406,6 @@ void CommandHookRecord::hookBefore(const BuildAccelStructsIndirectCmd& cmd) {
dlg_error("TODO: implement support for copying BuildAccelStructsIndirectCmd data");
}

IntrusivePtr<AccelStructState> CommandHookRecord::lastAccelStructBuild(u64 accelStructAddress) {
// TODO PERF: could be solved more efficiently with an unordered map
// might be relevant when there are many (thousand) builds
for(auto& cmd : accelStructBuilds) {
for(auto& build : cmd.builds) {
dlg_assert(build.dst);
if(build.dst->deviceAddress == accelStructAddress) {
return build.state;
}
}
}

return {};
}

void CommandHookRecord::finish() noexcept {
// NOTE: We don't do this since we can assume the record to remain
// valid until all submissions are finished. We can assume it to
Expand Down
19 changes: 15 additions & 4 deletions src/commandHook/record.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,31 @@ struct CommandHookRecord {

struct Build {
AccelStruct* dst;
IntrusivePtr<AccelStructState> state;
AccelStructStatePtr state;
};

std::vector<Build> builds;
};

std::vector<AccelStructBuild> accelStructBuilds;

struct AccelStructCapture {
unsigned id; // index into state->copiedDescriptors
AccelStruct* accelStruct;
};

std::vector<AccelStructCapture> accelStructCaptures;
struct AccelStructCopy {
AccelStruct* src {};
AccelStruct* dst {};
AccelStructStatePtr state {}; // filled in later by CommandHookSubmission
};

using AccelStructOp = std::variant<
AccelStructBuild,
AccelStructCapture,
AccelStructCopy
>;

// Order here is important, ops might depend on each other
std::vector<AccelStructOp> accelStructOps;

// Needed for image to buffer sample-copying
std::vector<VkDescriptorSet> descriptorSets;
Expand Down
Loading

0 comments on commit bb6e625

Please sign in to comment.