Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resync optick with my latest changes submitted to head stream #896

Merged
merged 5 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions neo/libs/optick/optick_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,7 @@ static const ProcessID INVALID_PROCESS_ID = (ProcessID)-1;
// Asserts
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(OPTICK_MSVC)
#ifdef UNICODE
#define OPTICK_DEBUG_BREAK(description) OutputDebugString(L"Optick ERROR: " description L"\n"); __debugbreak()
#else
#define OPTICK_DEBUG_BREAK(description) OutputDebugString("Optick ERROR: " description "\n"); __debugbreak()
#endif
#define OPTICK_DEBUG_BREAK(description) OutputDebugString(TEXT("Optick ERROR: ") description TEXT("\n")); __debugbreak()
#elif defined(OPTICK_GCC)
#if __has_builtin(__builtin_debugtrap)
#define OPTICK_DEBUG_BREAK(description) std::cerr << "Optick ERROR: " << description << std::endl; __builtin_debugtrap()
Expand Down
133 changes: 75 additions & 58 deletions neo/libs/optick/optick_gpu.vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ namespace Optick
{
class GPUProfilerVulkan : public GPUProfiler
{
private:
VulkanFunctions vulkanFunctions = {};

protected:
struct Frame
{
Expand All @@ -48,7 +45,7 @@ namespace Optick

struct NodePayload
{
VulkanFunctions* vulkanFunctions;
VulkanFunctions vulkanFunctions;
VkDevice device;
VkPhysicalDevice physicalDevice;
VkQueue queue;
Expand Down Expand Up @@ -109,6 +106,8 @@ namespace Optick

void GPUProfilerVulkan::InitDevice(VkInstance instance, VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions)
{
VulkanFunctions vulkanFunctions = {};

if (functions != nullptr)
{
vulkanFunctions = *functions;
Expand All @@ -125,7 +124,7 @@ namespace Optick
(PFN_vkAllocateCommandBuffers_)vkAllocateCommandBuffers,
(PFN_vkCreateFence_)vkCreateFence,
vkCmdResetQueryPool,
vkResetQueryPool,
nullptr, // dynamically define vkResetQueryPool via VK_EXT_host_query_reset extension or Vulkan 1.2 hostQueryReset feature
(PFN_vkCmdWaitEvents_)vkCmdWaitEvents,
(PFN_vkResetEvent_)vkResetEvent,
(PFN_vkSetEvent_)vkSetEvent,
Expand Down Expand Up @@ -196,6 +195,9 @@ namespace Optick
vulkanFunctions.vkCreateFence = (PFN_vkCreateFence_)vkGetDeviceProcAddr_(devices[i], "vkCreateFence");
vulkanFunctions.vkCmdResetQueryPool = (PFN_vkCmdResetQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkCmdResetQueryPool");
vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkResetQueryPool");
if (!vulkanFunctions.vkResetQueryPool) { // if vkResetQueryPool not defined via Vulkan 1.2, try vkResetQueryPoolEXT
vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr_(devices[i], "vkResetQueryPoolEXT");
}
vulkanFunctions.vkCmdWaitEvents = (PFN_vkCmdWaitEvents_)vkGetDeviceProcAddr_(devices[i], "vkCmdWaitEvents");
vulkanFunctions.vkResetEvent = (PFN_vkResetEvent_)vkGetDeviceProcAddr_(devices[i], "vkResetEvent");
vulkanFunctions.vkSetEvent = (PFN_vkSetEvent_)vkGetDeviceProcAddr_(devices[i], "vkSetEvent");
Expand All @@ -215,19 +217,32 @@ namespace Optick
vulkanFunctions.vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr_(devices[i], "vkGetPastPresentationTimingGOOGLE");
}
#if OPTICK_STATIC_VULKAN_FUNCTIONS
else if (!vulkanFunctions.vkGetPastPresentationTimingGOOGLE)
else // this condition can also run if vulkanFunctions are manually-defined via the "functions" parameter and vulkanFunctions.vkGetInstanceProcAddr == nullptr
{
vulkanFunctions.vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr(devices[i], "vkGetPastPresentationTimingGOOGLE");
// SRS - First check for nullptr to make sure we don't overwrite any manually-defined function pointers
if (!vulkanFunctions.vkResetQueryPool) {
vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr(devices[i], "vkResetQueryPool");
if (!vulkanFunctions.vkResetQueryPool) { // if vkResetQueryPool not defined via Vulkan 1.2, try vkResetQueryPoolEXT
vulkanFunctions.vkResetQueryPool = (PFN_vkResetQueryPool_)vkGetDeviceProcAddr(devices[i], "vkResetQueryPoolEXT");
}
}

if (!vulkanFunctions.vkGetPastPresentationTimingGOOGLE) {
vulkanFunctions.vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr(devices[i], "vkGetPastPresentationTimingGOOGLE");
}
}
#endif
if (!vulkanFunctions.vkResetQueryPool) {
OPTICK_FAILED("vkResetQueryPool must be enabled via VK_EXT_host_query_reset extension or Vulkan 1.2 hostQueryReset feature. Can't initialize GPU Profiler!");
}

VkPhysicalDeviceProperties properties = { 0 };
(*vulkanFunctions.vkGetPhysicalDeviceProperties)(physicalDevices[i], &properties);
GPUProfiler::InitNode(properties.deviceName, i);

NodePayload* nodePayload = Memory::New<NodePayload>();
nodePayloads[i] = nodePayload;
nodePayload->vulkanFunctions = &vulkanFunctions;
nodePayload->vulkanFunctions = vulkanFunctions;
nodePayload->device = devices[i];
nodePayload->physicalDevice = physicalDevices[i];
nodePayload->queue = cmdQueues[i];
Expand Down Expand Up @@ -298,25 +313,27 @@ namespace Optick
{
if (currentState == STATE_RUNNING)
{
uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp);
(*vulkanFunctions.vkCmdWriteTimestamp)(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[currentNode]->queryPool, index);
Node& node = *nodes[currentNode];
NodePayload& payload = *nodePayloads[currentNode];

uint32_t index = node.QueryTimestamp(outCpuTimestamp);
(*payload.vulkanFunctions.vkCmdWriteTimestamp)(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, payload.queryPool, index);
}
}

void GPUProfilerVulkan::ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count)
{
if (count)
{
Node* node = nodes[nodeIndex];

NodePayload* payload = nodePayloads[nodeIndex];
Node& node = *nodes[nodeIndex];
NodePayload& payload = *nodePayloads[nodeIndex];

OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * (size_t)count, &nodes[nodeIndex]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
(*vulkanFunctions.vkResetQueryPool)(payload->device, payload->queryPool, startIndex, count);
OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkGetQueryPoolResults)(payload.device, payload.queryPool, startIndex, count, 8 * (size_t)count, &node.queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
(*payload.vulkanFunctions.vkResetQueryPool)(payload.device, payload.queryPool, startIndex, count);

// Convert GPU timestamps => CPU Timestamps
for (uint32_t index = startIndex; index < startIndex + count; ++index)
*node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]);
*node.queryCpuTimestamps[index] = node.clock.GetCPUTimestamp(node.queryGpuTimestamps[index]);
}
}

Expand All @@ -328,7 +345,7 @@ namespace Optick
do
{
NodePayload& payload = *nodePayloads[nodeIndex];
r = (*vulkanFunctions.vkWaitForFences)(nodePayloads[nodeIndex]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
r = (*payload.vulkanFunctions.vkWaitForFences)(payload.device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
} while (r != VK_SUCCESS);
}

Expand Down Expand Up @@ -357,15 +374,15 @@ namespace Optick
VkDevice device = payload.device;
VkQueue queue = payload.queue;

(*vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(device, 1, &fence);
(*payload.vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1);
(*payload.vulkanFunctions.vkResetFences)(device, 1, &fence);

VkCommandBufferBeginInfo commandBufferBeginInfo;
commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
commandBufferBeginInfo.pNext = 0;
commandBufferBeginInfo.pInheritanceInfo = 0;
commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo));
OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo));

if (EventData* frameEvent = currentFrame.frameEvent)
QueryTimestamp(commandBuffer, &frameEvent->finish);
Expand All @@ -376,7 +393,7 @@ namespace Optick
QueryTimestamp(commandBuffer, &AddFrameTag().timestamp);
nextFrame.frameEvent = &event;

OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkEndCommandBuffer)(commandBuffer));
OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkEndCommandBuffer)(commandBuffer));
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.pNext = nullptr;
Expand All @@ -386,7 +403,7 @@ namespace Optick
submitInfo.pCommandBuffers = &commandBuffer;
submitInfo.signalSemaphoreCount = 0;
submitInfo.pSignalSemaphores = nullptr;
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkQueueSubmit)(queue, 1, &submitInfo, fence));
OPTICK_VK_CHECK((VkResult)(*payload.vulkanFunctions.vkQueueSubmit)(queue, 1, &submitInfo, fence));

uint32_t queryBegin = currentFrame.queryIndexStart;
uint32_t queryEnd = node.queryIndex;
Expand Down Expand Up @@ -422,16 +439,16 @@ namespace Optick
}

// SRS - Add Vulkan presentation / vsync timing if VK_GOOGLE_display_timing extension available
if (vulkanFunctions.vkGetPastPresentationTimingGOOGLE)
if (payload.vulkanFunctions.vkGetPastPresentationTimingGOOGLE)
{
uint32_t queryPresentTimingCount = 0;
(*vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, nullptr);
(*payload.vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, nullptr);
if (queryPresentTimingCount > 0)
{
// Query Presentation Timing / VSync
vector<VkPastPresentationTimingGOOGLE> queryPresentTimings;
queryPresentTimings.resize(queryPresentTimingCount);
(*vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, &queryPresentTimings[0]);
(*payload.vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, &queryPresentTimings[0]);
for (uint32_t presentIndex = 0; presentIndex < queryPresentTimingCount; presentIndex++)
{
// Process Presentation Timing / VSync if swap image was actually presented (i.e. not dropped)
Expand Down Expand Up @@ -465,24 +482,24 @@ namespace Optick
{
GPUProfiler::ClockSynchronization clock;

NodePayload& node = *nodePayloads[nodeIndex];
Frame& currentFrame = node.frames[frameNumber % NUM_FRAMES_DELAY];
NodePayload& payload = *nodePayloads[nodeIndex];
Frame& currentFrame = payload.frames[frameNumber % NUM_FRAMES_DELAY];

VkCommandBufferBeginInfo commandBufferBeginInfo;
commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
commandBufferBeginInfo.pNext = 0;
commandBufferBeginInfo.pInheritanceInfo = 0;
commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkCommandBuffer CB = currentFrame.commandBuffer;
VkDevice Device = node.device;
VkDevice Device = payload.device;
VkFence Fence = currentFrame.fence;

// SRS - Prepare and submit an empty command buffer to wait on app buffer completion
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*vulkanFunctions.vkEndCommandBuffer)(CB);
(*payload.vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*payload.vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*payload.vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
(*payload.vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*payload.vulkanFunctions.vkEndCommandBuffer)(CB);

VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
Expand All @@ -493,48 +510,48 @@ namespace Optick
submitInfo.pCommandBuffers = &CB;
submitInfo.signalSemaphoreCount = 0;
submitInfo.pSignalSemaphores = nullptr;
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
(*payload.vulkanFunctions.vkQueueSubmit)(payload.queue, 1, &submitInfo, Fence);

// SRS - Prepare and submit the actual command buffer used for clock synchronization
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*vulkanFunctions.vkResetEvent)(Device, nodePayloads[nodeIndex]->event);
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, 1);
(*vulkanFunctions.vkCmdWaitEvents)(CB, 1, &nodePayloads[nodeIndex]->event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, nullptr, 0, nullptr, 0, nullptr);
(*vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, nodePayloads[nodeIndex]->queryPool, 0);
(*vulkanFunctions.vkEndCommandBuffer)(CB);
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
(*payload.vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*payload.vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*payload.vulkanFunctions.vkResetEvent)(Device, payload.event);
(*payload.vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*payload.vulkanFunctions.vkCmdResetQueryPool)(CB, payload.queryPool, 0, 1);
(*payload.vulkanFunctions.vkCmdWaitEvents)(CB, 1, &payload.event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, nullptr, 0, nullptr, 0, nullptr);
(*payload.vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, payload.queryPool, 0);
(*payload.vulkanFunctions.vkEndCommandBuffer)(CB);
(*payload.vulkanFunctions.vkQueueSubmit)(payload.queue, 1, &submitInfo, Fence);

// SRS - Improve GPU to CPU clock offset calibration by using Vulkan events
// thanks to cdwfs for concept at https://gist.github.com/cdwfs/4222ca09cb259f8dd50f7f2cf7d09179
(*vulkanFunctions.vkSetEvent)(Device, nodePayloads[nodeIndex]->event);
(*payload.vulkanFunctions.vkSetEvent)(Device, payload.event);
clock.timestampCPU = GetHighPrecisionTime();
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*payload.vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*payload.vulkanFunctions.vkResetFences)(Device, 1, &Fence);
clock.timestampGPU = 0;
(*vulkanFunctions.vkGetQueryPoolResults)(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT);
(*payload.vulkanFunctions.vkGetQueryPoolResults)(Device, payload.queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT);

// SRS - Improve GPU to CPU clock frequency scaling by using floating point doubles
clock.frequencyCPU = GetHighPrecisionFrequency();
VkPhysicalDeviceProperties Properties;
(*vulkanFunctions.vkGetPhysicalDeviceProperties)(nodePayloads[nodeIndex]->physicalDevice, &Properties);
(*payload.vulkanFunctions.vkGetPhysicalDeviceProperties)(payload.physicalDevice, &Properties);
clock.frequencyGPU = (int64_t)(1000000000.0 / (double)Properties.limits.timestampPeriod);

// SRS - Reset entire query pool to clear clock sync query + any leftover queries from previous run
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, MAX_QUERIES_COUNT);
(*vulkanFunctions.vkEndCommandBuffer)(CB);
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
(*payload.vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*payload.vulkanFunctions.vkCmdResetQueryPool)(CB, payload.queryPool, 0, MAX_QUERIES_COUNT);
(*payload.vulkanFunctions.vkEndCommandBuffer)(CB);
(*payload.vulkanFunctions.vkQueueSubmit)(payload.queue, 1, &submitInfo, Fence);

return clock;
}

GPUProfilerVulkan::NodePayload::~NodePayload()
{
(*vulkanFunctions->vkDestroyEvent)(device, event, nullptr);
(*vulkanFunctions->vkDestroyCommandPool)(device, commandPool, nullptr);
(*vulkanFunctions->vkDestroyQueryPool)(device, queryPool, nullptr);
(*vulkanFunctions.vkDestroyEvent)(device, event, nullptr);
(*vulkanFunctions.vkDestroyCommandPool)(device, commandPool, nullptr);
(*vulkanFunctions.vkDestroyQueryPool)(device, queryPool, nullptr);
}

GPUProfilerVulkan::~GPUProfilerVulkan()
Expand All @@ -543,8 +560,8 @@ namespace Optick
{
for (Frame& frame : payload->frames)
{
(*vulkanFunctions.vkDestroyFence)(payload->device, frame.fence, nullptr);
(*vulkanFunctions.vkFreeCommandBuffers)(payload->device, payload->commandPool, 1, &frame.commandBuffer);
(*payload->vulkanFunctions.vkDestroyFence)(payload->device, frame.fence, nullptr);
(*payload->vulkanFunctions.vkFreeCommandBuffers)(payload->device, payload->commandPool, 1, &frame.commandBuffer);
}

Memory::Delete(payload);
Expand All @@ -562,5 +579,5 @@ namespace Optick
OPTICK_FAILED("OPTICK_ENABLE_GPU_VULKAN is disabled! Can't initialize GPU Profiler!");
}
}
#endif //OPTICK_ENABLE_GPU_D3D12
#endif //OPTICK_ENABLE_GPU_VULKAN
#endif //USE_OPTICK