From 34817cf2d270fc42c7af524cab73834919af05d7 Mon Sep 17 00:00:00 2001 From: Martin Grant Date: Fri, 13 Dec 2024 14:20:05 +0000 Subject: [PATCH 1/6] Merge pull request #2461 from yingcong-wu/yc/1213-omp-global-fix [DeviceASAN] Register globals for the program --- .../loader/layers/sanitizer/asan/asan_interceptor.cpp | 10 ++++++++++ .../loader/layers/sanitizer/asan/asan_interceptor.hpp | 1 + 2 files changed, 11 insertions(+) diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 19af8546c2..02dcb3d0d3 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -431,6 +431,12 @@ ur_result_t AsanInterceptor::unregisterProgram(ur_program_handle_t Program) { auto ProgramInfo = getProgramInfo(Program); assert(ProgramInfo != nullptr && "unregistered program!"); + std::scoped_lock Guard(m_AllocationMapMutex); + for (auto AI : ProgramInfo->AllocInfoForGlobals) { + m_AllocationMap.erase(AI->AllocBegin); + } + ProgramInfo->AllocInfoForGlobals.clear(); + ProgramInfo->InstrumentedKernels.clear(); return UR_RESULT_SUCCESS; @@ -549,6 +555,10 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) { {}}); ContextInfo->insertAllocInfo({Device}, AI); + ProgramInfo->AllocInfoForGlobals.emplace(AI); + + std::scoped_lock Guard(m_AllocationMapMutex); + m_AllocationMap.emplace(AI->AllocBegin, std::move(AI)); } } diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp index f1e80dae56..2270795969 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp @@ -112,6 +112,7 @@ struct ProgramInfo { std::atomic RefCount = 1; // Program is built only once, so we don't need to lock it + std::unordered_set> AllocInfoForGlobals; std::unordered_set InstrumentedKernels; explicit ProgramInfo(ur_program_handle_t Program) : Handle(Program) { From 2a9ec305879c296ece16bb72708af5cd4ef209ca Mon Sep 17 00:00:00 2001 From: Martin Grant Date: Fri, 13 Dec 2024 14:20:15 +0000 Subject: [PATCH 2/6] Merge pull request #2454 from Bensuo/l0_cmd-buf_multi-device Fix L0 command-buffer consumption of multi-device kernels --- source/adapters/level_zero/command_buffer.cpp | 73 +++++---- .../program_adapter_level_zero_v2.match | 1 + .../urMultiDeviceProgramCreateWithBinary.cpp | 138 ++++++++++++++++++ 3 files changed, 187 insertions(+), 25 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 5ae19092a6..32eff7e141 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -894,28 +894,31 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { /** * Sets the kernel arguments for a kernel command that will be appended to the * command buffer. - * @param[in] CommandBuffer The CommandBuffer where the command will be + * @param[in] Device The Device associated with the command-buffer where the + * kernel command will be appended. + * @param[in,out] Arguments stored in the ur_kernel_handle_t object to be set + * on the /p ZeKernel object. + * @param[in] ZeKernel The handle to the Level-Zero kernel that will be * appended. - * @param[in] Kernel The handle to the kernel that will be appended. * @return UR_RESULT_SUCCESS or an error code on failure */ -ur_result_t -setKernelPendingArguments(ur_exp_command_buffer_handle_t CommandBuffer, - ur_kernel_handle_t Kernel) { - +ur_result_t setKernelPendingArguments( + ur_device_handle_t Device, + std::vector &PendingArguments, + ze_kernel_handle_t ZeKernel) { // If there are any pending arguments set them now. - for (auto &Arg : Kernel->PendingArguments) { + for (auto &Arg : PendingArguments) { // The ArgValue may be a NULL pointer in which case a NULL value is used for // the kernel argument declared as a pointer to global or constant memory. char **ZeHandlePtr = nullptr; if (Arg.Value) { - UR_CALL(Arg.Value->getZeHandlePtr(ZeHandlePtr, Arg.AccessMode, - CommandBuffer->Device, nullptr, 0u)); + UR_CALL(Arg.Value->getZeHandlePtr(ZeHandlePtr, Arg.AccessMode, Device, + nullptr, 0u)); } ZE2UR_CALL(zeKernelSetArgumentValue, - (Kernel->ZeKernel, Arg.Index, Arg.Size, ZeHandlePtr)); + (ZeKernel, Arg.Index, Arg.Size, ZeHandlePtr)); } - Kernel->PendingArguments.clear(); + PendingArguments.clear(); return UR_RESULT_SUCCESS; } @@ -951,6 +954,8 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer, ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; auto Platform = CommandBuffer->Context->getPlatform(); + auto ZeDevice = CommandBuffer->Device->ZeDevice; + if (NumKernelAlternatives > 0) { ZeMutableCommandDesc.flags |= ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION; @@ -958,14 +963,20 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer, std::vector TranslatedKernelHandles( NumKernelAlternatives + 1, nullptr); + ze_kernel_handle_t ZeMainKernel{}; + UR_CALL(getZeKernel(ZeDevice, Kernel, &ZeMainKernel)); + // Translate main kernel first ZE2UR_CALL(zelLoaderTranslateHandle, - (ZEL_HANDLE_KERNEL, Kernel->ZeKernel, + (ZEL_HANDLE_KERNEL, ZeMainKernel, (void **)&TranslatedKernelHandles[0])); for (size_t i = 0; i < NumKernelAlternatives; i++) { + ze_kernel_handle_t ZeAltKernel{}; + UR_CALL(getZeKernel(ZeDevice, KernelAlternatives[i], &ZeAltKernel)); + ZE2UR_CALL(zelLoaderTranslateHandle, - (ZEL_HANDLE_KERNEL, KernelAlternatives[i]->ZeKernel, + (ZEL_HANDLE_KERNEL, ZeAltKernel, (void **)&TranslatedKernelHandles[i + 1])); } @@ -1022,23 +1033,28 @@ ur_result_t urCommandBufferAppendKernelLaunchExp( std::scoped_lock Lock( Kernel->Mutex, Kernel->Program->Mutex, CommandBuffer->Mutex); + auto Device = CommandBuffer->Device; + ze_kernel_handle_t ZeKernel{}; + UR_CALL(getZeKernel(Device->ZeDevice, Kernel, &ZeKernel)); + if (GlobalWorkOffset != NULL) { - UR_CALL(setKernelGlobalOffset(CommandBuffer->Context, Kernel->ZeKernel, - WorkDim, GlobalWorkOffset)); + UR_CALL(setKernelGlobalOffset(CommandBuffer->Context, ZeKernel, WorkDim, + GlobalWorkOffset)); } // If there are any pending arguments set them now. if (!Kernel->PendingArguments.empty()) { - UR_CALL(setKernelPendingArguments(CommandBuffer, Kernel)); + UR_CALL( + setKernelPendingArguments(Device, Kernel->PendingArguments, ZeKernel)); } ze_group_count_t ZeThreadGroupDimensions{1, 1, 1}; uint32_t WG[3]; - UR_CALL(calculateKernelWorkDimensions(Kernel->ZeKernel, CommandBuffer->Device, + UR_CALL(calculateKernelWorkDimensions(ZeKernel, Device, ZeThreadGroupDimensions, WG, WorkDim, GlobalWorkSize, LocalWorkSize)); - ZE2UR_CALL(zeKernelSetGroupSize, (Kernel->ZeKernel, WG[0], WG[1], WG[2])); + ZE2UR_CALL(zeKernelSetGroupSize, (ZeKernel, WG[0], WG[1], WG[2])); CommandBuffer->KernelsList.push_back(Kernel); for (size_t i = 0; i < NumKernelAlternatives; i++) { @@ -1063,7 +1079,7 @@ ur_result_t urCommandBufferAppendKernelLaunchExp( SyncPointWaitList, false, RetSyncPoint, ZeEventList, ZeLaunchEvent)); ZE2UR_CALL(zeCommandListAppendLaunchKernel, - (CommandBuffer->ZeComputeCommandList, Kernel->ZeKernel, + (CommandBuffer->ZeComputeCommandList, ZeKernel, &ZeThreadGroupDimensions, ZeLaunchEvent, ZeEventList.size(), getPointerFromVector(ZeEventList))); @@ -1836,6 +1852,7 @@ ur_result_t updateKernelCommand( const auto CommandBuffer = Command->CommandBuffer; const void *NextDesc = nullptr; auto Platform = CommandBuffer->Context->getPlatform(); + auto ZeDevice = CommandBuffer->Device->ZeDevice; uint32_t Dim = CommandDesc->newWorkDim; size_t *NewGlobalWorkOffset = CommandDesc->pNewGlobalWorkOffset; @@ -1844,11 +1861,14 @@ ur_result_t updateKernelCommand( // Kernel handle must be updated first for a given CommandId if required ur_kernel_handle_t NewKernel = CommandDesc->hNewKernel; + if (NewKernel && Command->Kernel != NewKernel) { + ze_kernel_handle_t ZeNewKernel{}; + UR_CALL(getZeKernel(ZeDevice, NewKernel, &ZeNewKernel)); + ze_kernel_handle_t ZeKernelTranslated = nullptr; - ZE2UR_CALL( - zelLoaderTranslateHandle, - (ZEL_HANDLE_KERNEL, NewKernel->ZeKernel, (void **)&ZeKernelTranslated)); + ZE2UR_CALL(zelLoaderTranslateHandle, + (ZEL_HANDLE_KERNEL, ZeNewKernel, (void **)&ZeKernelTranslated)); ZE2UR_CALL(Platform->ZeMutableCmdListExt .zexCommandListUpdateMutableCommandKernelsExp, @@ -1905,10 +1925,13 @@ ur_result_t updateKernelCommand( // by the driver for the kernel. bool UpdateWGSize = NewLocalWorkSize == nullptr; + ze_kernel_handle_t ZeKernel{}; + UR_CALL(getZeKernel(ZeDevice, Command->Kernel, &ZeKernel)); + uint32_t WG[3]; - UR_CALL(calculateKernelWorkDimensions( - Command->Kernel->ZeKernel, CommandBuffer->Device, - ZeThreadGroupDimensions, WG, Dim, NewGlobalWorkSize, NewLocalWorkSize)); + UR_CALL(calculateKernelWorkDimensions(ZeKernel, CommandBuffer->Device, + ZeThreadGroupDimensions, WG, Dim, + NewGlobalWorkSize, NewLocalWorkSize)); auto MutableGroupCountDesc = std::make_unique>(); diff --git a/test/conformance/program/program_adapter_level_zero_v2.match b/test/conformance/program/program_adapter_level_zero_v2.match index 97d6869b81..fd359b3653 100644 --- a/test/conformance/program/program_adapter_level_zero_v2.match +++ b/test/conformance/program/program_adapter_level_zero_v2.match @@ -1,3 +1,4 @@ urProgramSetSpecializationConstantsTest.InvalidValueSize/* urProgramSetSpecializationConstantsTest.InvalidValueId/* urProgramSetSpecializationConstantsTest.InvalidValuePtr/* +{{OPT}}urMultiDeviceCommandBufferExpTest.* diff --git a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp index 9ff11d9016..5f99747462 100644 --- a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp +++ b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp @@ -240,3 +240,141 @@ TEST_F(urMultiDeviceProgramCreateWithBinaryTest, CheckProgramGetInfo) { reinterpret_cast(property_value.data()); ASSERT_STRNE(returned_kernel_names, ""); } + +struct urMultiDeviceCommandBufferExpTest + : urMultiDeviceProgramCreateWithBinaryTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + urMultiDeviceProgramCreateWithBinaryTest::SetUp()); + + auto kernelName = + uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; + + ASSERT_SUCCESS(urProgramBuild(context, binary_program, nullptr)); + ASSERT_SUCCESS( + urKernelCreate(binary_program, kernelName.data(), &kernel)); + } + + void TearDown() override { + if (kernel) { + EXPECT_SUCCESS(urKernelRelease(kernel)); + } + UUR_RETURN_ON_FATAL_FAILURE( + urMultiDeviceProgramCreateWithBinaryTest::TearDown()); + } + + static bool hasCommandBufferSupport(ur_device_handle_t device) { + ur_bool_t cmd_buffer_support = false; + auto res = urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP, + sizeof(cmd_buffer_support), &cmd_buffer_support, nullptr); + + if (res) { + return false; + } + + return cmd_buffer_support; + } + + static bool hasCommandBufferUpdateSupport(ur_device_handle_t device) { + ur_device_command_buffer_update_capability_flags_t + update_capability_flags; + auto res = urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP, + sizeof(update_capability_flags), &update_capability_flags, nullptr); + + if (res) { + return false; + } + + return (0 != update_capability_flags); + } + + ur_kernel_handle_t kernel = nullptr; + + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t global_size = 64; + static constexpr size_t local_size = 4; +}; + +TEST_F(urMultiDeviceCommandBufferExpTest, Enqueue) { + for (size_t i = 0; i < devices.size(); i++) { + auto device = devices[i]; + if (!hasCommandBufferSupport(device)) { + continue; + } + + // Create command-buffer + uur::raii::CommandBuffer cmd_buf_handle; + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + cmd_buf_handle.ptr())); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, + &local_size, 0, nullptr, 0, nullptr, 0, nullptr, nullptr, nullptr, + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + + // Verify execution succeeds + ASSERT_SUCCESS(urCommandBufferEnqueueExp(cmd_buf_handle, queues[i], 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queues[i])); + } +} + +TEST_F(urMultiDeviceCommandBufferExpTest, Update) { + for (size_t i = 0; i < devices.size(); i++) { + auto device = devices[i]; + if (!(hasCommandBufferSupport(device) && + hasCommandBufferUpdateSupport(device))) { + continue; + } + + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true, false, + false}; + + // Create command-buffer + uur::raii::CommandBuffer cmd_buf_handle; + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, + cmd_buf_handle.ptr())); + + // Append kernel command to command-buffer and close command-buffer + uur::raii::CommandBufferCommand command; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, + &local_size, 0, nullptr, 0, nullptr, 0, nullptr, nullptr, nullptr, + command.ptr())); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + + // Verify execution succeeds + ASSERT_SUCCESS(urCommandBufferEnqueueExp(cmd_buf_handle, queues[i], 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queues[i])); + + // Update kernel and enqueue command-buffer again + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(cmd_buf_handle, queues[i], 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queues[i])); + } +} From c4b1986d6aad0322d432c5c43cafcab3a29c5a61 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Mon, 16 Dec 2024 13:52:14 +0000 Subject: [PATCH 3/6] Merge pull request #2447 from nrspruit/fix_deviceinfo_vector_width [L0] Fix Device Info Reporting for vector width to match spec --- source/adapters/level_zero/device.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index b7422fe2cc..6705c4c659 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -654,9 +654,15 @@ ur_result_t urDeviceGetInfo( return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 4); case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: + // Must return 0 for *vector_width_double* if the device does not have fp64. + if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64)) + return ReturnValue(uint32_t{0}); return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 8); case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: + // Must return 0 for *vector_width_half* if the device does not have fp16. + if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16)) + return ReturnValue(uint32_t{0}); return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 2); case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { // Max_num_sub_Groups = maxTotalGroupSize/min(set of subGroupSizes); From 389489089efbb34b129154f4d4a68481fb7412b1 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Mon, 16 Dec 2024 13:52:47 +0000 Subject: [PATCH 4/6] Merge pull request #2449 from nrspruit/improve_l0_init_checking [L0] Update L0 Init checking to print details in error log --- source/adapters/level_zero/adapter.cpp | 13 +++++++++---- source/adapters/level_zero/common.cpp | 2 +- source/adapters/level_zero/common.hpp | 3 +++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 7dff6bcf14..68aa852595 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "adapter.hpp" +#include "common.hpp" #include "ur_level_zero.hpp" #include @@ -162,7 +163,7 @@ ur_result_t initPlatforms(PlatformVec &platforms, ZE2UR_CALL(zeDriverGet, (&ZeDriverGetCount, ZeDriverGetHandles.data())); } if (ZeDriverGetCount == 0 && GlobalAdapter->ZeInitDriversCount == 0) { - logger::debug("\nNo Valid L0 Drivers found.\n"); + logger::error("\nNo Valid L0 Drivers found.\n"); return UR_RESULT_SUCCESS; } @@ -376,7 +377,9 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() static_cast(L0InitFlags)); GlobalAdapter->ZeInitResult = ZE_CALL_NOCHECK(zeInit, (L0InitFlags)); if (GlobalAdapter->ZeInitResult != ZE_RESULT_SUCCESS) { - logger::debug("\nzeInit failed with {}\n", GlobalAdapter->ZeInitResult); + const char *ErrorString = "Unknown"; + zeParseError(GlobalAdapter->ZeInitResult, ErrorString); + logger::error("\nzeInit failed with {}\n", ErrorString); } bool useInitDrivers = false; @@ -422,8 +425,9 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() if (GlobalAdapter->ZeInitDriversResult == ZE_RESULT_SUCCESS) { GlobalAdapter->InitDriversSupported = true; } else { - logger::debug("\nzeInitDrivers failed with {}\n", - GlobalAdapter->ZeInitDriversResult); + const char *ErrorString = "Unknown"; + zeParseError(GlobalAdapter->ZeInitDriversResult, ErrorString); + logger::error("\nzeInitDrivers failed with {}\n", ErrorString); } } } @@ -441,6 +445,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() // Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms. if (*GlobalAdapter->ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { + logger::error("Level Zero Uninitialized\n"); result = std::move(platforms); return; } diff --git a/source/adapters/level_zero/common.cpp b/source/adapters/level_zero/common.cpp index 3b3f59e055..e13afc179f 100644 --- a/source/adapters/level_zero/common.cpp +++ b/source/adapters/level_zero/common.cpp @@ -88,7 +88,7 @@ ZeUSMImportExtension ZeUSMImport; std::map *ZeCallCount = nullptr; -inline void zeParseError(ze_result_t ZeError, const char *&ErrorString) { +void zeParseError(ze_result_t ZeError, const char *&ErrorString) { switch (ZeError) { #define ZE_ERRCASE(ERR) \ case ERR: \ diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index 8a93993752..09d144df82 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -340,6 +340,9 @@ bool setEnvVar(const char *name, const char *value); // Map Level Zero runtime error code to UR error code. ur_result_t ze2urResult(ze_result_t ZeResult); +// Parse Level Zero error code and return the error string. +void zeParseError(ze_result_t ZeError, const char *&ErrorString); + // Trace a call to Level-Zero RT #define ZE2UR_CALL(ZeName, ZeArgs) \ { \ From b6bf7656b91c6c331d9bcb5fca7ec4ac26100d81 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Mon, 16 Dec 2024 13:52:58 +0000 Subject: [PATCH 5/6] Merge pull request #2452 from winstonzhang-intel/urlza-414 [L0] Fixed event leak when outevent is given and is completed --- source/adapters/level_zero/event.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index c1e93483b8..eae16f0c57 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -145,6 +145,10 @@ ur_result_t urEnqueueEventsWait( std::unique_lock Lock(Queue->Mutex); resetCommandLists(Queue); } + if (OutEvent && (*OutEvent)->Completed) { + UR_CALL(CleanupCompletedEvent((*OutEvent), false, false)); + UR_CALL(urEventReleaseInternal((*OutEvent))); + } return UR_RESULT_SUCCESS; } @@ -955,7 +959,6 @@ ur_result_t urEventCreateWithNativeHandle( UREvent = new ur_event_handle_t_(ZeEvent, nullptr /* ZeEventPool */, Context, UR_EXT_COMMAND_TYPE_USER, Properties->isNativeHandleOwned); - UREvent->RefCountExternal++; } catch (const std::bad_alloc &) { From e8b4896a234c2f2667bca36ad2ca6b64a06202da Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Mon, 16 Dec 2024 13:53:13 +0000 Subject: [PATCH 6/6] Merge pull request #2467 from nrspruit/fix_external_import_function_call [L0] Fix external semaphore import function calls to match the header --- source/adapters/level_zero/image.cpp | 6 +++--- source/adapters/level_zero/platform.hpp | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index 8437fcff95..09bdc16a64 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -1237,7 +1237,7 @@ ur_result_t urBindlessImagesImportExternalSemaphoreExp( } ZE2UR_CALL(UrPlatform->ZeExternalSemaphoreExt.zexImportExternalSemaphoreExp, - (hDevice->ZeDevice, &ExtSemaphoreHandle, &SemDesc)); + (hDevice->ZeDevice, &SemDesc, &ExtSemaphoreHandle)); *phExternalSemaphoreHandle = (ur_exp_external_semaphore_handle_t)ExtSemaphoreHandle; @@ -1310,7 +1310,7 @@ ur_result_t urBindlessImagesWaitExternalSemaphoreExp( reinterpret_cast(hSemaphore); ZE2UR_CALL(UrPlatform->ZeExternalSemaphoreExt .zexCommandListAppendWaitExternalSemaphoresExp, - (ZeCommandList, &hExtSemaphore, &WaitParams, 1, ZeEvent, + (ZeCommandList, 1, &hExtSemaphore, &WaitParams, ZeEvent, WaitList.Length, WaitList.ZeEventList)); return UR_RESULT_SUCCESS; @@ -1373,7 +1373,7 @@ ur_result_t urBindlessImagesSignalExternalSemaphoreExp( ZE2UR_CALL(UrPlatform->ZeExternalSemaphoreExt .zexCommandListAppendSignalExternalSemaphoresExp, - (ZeCommandList, &hExtSemaphore, &SignalParams, 1, ZeEvent, + (ZeCommandList, 1, &hExtSemaphore, &SignalParams, ZeEvent, WaitList.Length, WaitList.ZeEventList)); return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/platform.hpp b/source/adapters/level_zero/platform.hpp index 4b613fb1e5..748460158c 100644 --- a/source/adapters/level_zero/platform.hpp +++ b/source/adapters/level_zero/platform.hpp @@ -119,17 +119,17 @@ struct ur_platform_handle_t_ : public _ur_platform { struct ZeExternalSemaphoreExtension { bool Supported = false; ze_result_t (*zexImportExternalSemaphoreExp)( - ze_device_handle_t, ze_intel_external_semaphore_exp_handle_t *, - const ze_intel_external_semaphore_exp_desc_t *); + ze_device_handle_t, const ze_intel_external_semaphore_exp_desc_t *, + ze_intel_external_semaphore_exp_handle_t *); ze_result_t (*zexCommandListAppendWaitExternalSemaphoresExp)( - ze_command_list_handle_t, + ze_command_list_handle_t, unsigned int, const ze_intel_external_semaphore_exp_handle_t *, - const ze_intel_external_semaphore_wait_exp_params_t *, unsigned int, + const ze_intel_external_semaphore_wait_exp_params_t *, ze_event_handle_t, uint32_t, ze_event_handle_t *); ze_result_t (*zexCommandListAppendSignalExternalSemaphoresExp)( - ze_command_list_handle_t, + ze_command_list_handle_t, size_t, const ze_intel_external_semaphore_exp_handle_t *, - const ze_intel_external_semaphore_signal_exp_params_t *, size_t, + const ze_intel_external_semaphore_signal_exp_params_t *, ze_event_handle_t, uint32_t, ze_event_handle_t *); ze_result_t (*zexDeviceReleaseExternalSemaphoreExp)( ze_intel_external_semaphore_exp_handle_t);