Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Candidate for the v0.11.1 release tag #2470

Merged
merged 6 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions source/adapters/level_zero/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//===----------------------------------------------------------------------===//

#include "adapter.hpp"
#include "common.hpp"
#include "ur_level_zero.hpp"
#include <iomanip>

Expand Down Expand Up @@ -162,7 +163,7 @@ ur_result_t initPlatforms(PlatformVec &platforms,
ZE2UR_CALL(zeDriverGet, (&ZeDriverGetCount, ZeDriverGetHandles.data()));
}
if (ZeDriverGetCount == 0 && GlobalAdapter->ZeInitDriversCount == 0) {
logger::debug("\nNo Valid L0 Drivers found.\n");
logger::error("\nNo Valid L0 Drivers found.\n");
return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -376,7 +377,9 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
static_cast<int>(L0InitFlags));
GlobalAdapter->ZeInitResult = ZE_CALL_NOCHECK(zeInit, (L0InitFlags));
if (GlobalAdapter->ZeInitResult != ZE_RESULT_SUCCESS) {
logger::debug("\nzeInit failed with {}\n", GlobalAdapter->ZeInitResult);
const char *ErrorString = "Unknown";
zeParseError(GlobalAdapter->ZeInitResult, ErrorString);
logger::error("\nzeInit failed with {}\n", ErrorString);
}

bool useInitDrivers = false;
Expand Down Expand Up @@ -422,8 +425,9 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
if (GlobalAdapter->ZeInitDriversResult == ZE_RESULT_SUCCESS) {
GlobalAdapter->InitDriversSupported = true;
} else {
logger::debug("\nzeInitDrivers failed with {}\n",
GlobalAdapter->ZeInitDriversResult);
const char *ErrorString = "Unknown";
zeParseError(GlobalAdapter->ZeInitDriversResult, ErrorString);
logger::error("\nzeInitDrivers failed with {}\n", ErrorString);
}
}
}
Expand All @@ -441,6 +445,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()

// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
if (*GlobalAdapter->ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
logger::error("Level Zero Uninitialized\n");
result = std::move(platforms);
return;
}
Expand Down
73 changes: 48 additions & 25 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,28 +894,31 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) {
/**
* Sets the kernel arguments for a kernel command that will be appended to the
* command buffer.
* @param[in] CommandBuffer The CommandBuffer where the command will be
* @param[in] Device The Device associated with the command-buffer where the
* kernel command will be appended.
* @param[in,out] Arguments stored in the ur_kernel_handle_t object to be set
* on the /p ZeKernel object.
* @param[in] ZeKernel The handle to the Level-Zero kernel that will be
* appended.
* @param[in] Kernel The handle to the kernel that will be appended.
* @return UR_RESULT_SUCCESS or an error code on failure
*/
ur_result_t
setKernelPendingArguments(ur_exp_command_buffer_handle_t CommandBuffer,
ur_kernel_handle_t Kernel) {

ur_result_t setKernelPendingArguments(
ur_device_handle_t Device,
std::vector<ur_kernel_handle_t_::ArgumentInfo> &PendingArguments,
ze_kernel_handle_t ZeKernel) {
// If there are any pending arguments set them now.
for (auto &Arg : Kernel->PendingArguments) {
for (auto &Arg : PendingArguments) {
// The ArgValue may be a NULL pointer in which case a NULL value is used for
// the kernel argument declared as a pointer to global or constant memory.
char **ZeHandlePtr = nullptr;
if (Arg.Value) {
UR_CALL(Arg.Value->getZeHandlePtr(ZeHandlePtr, Arg.AccessMode,
CommandBuffer->Device, nullptr, 0u));
UR_CALL(Arg.Value->getZeHandlePtr(ZeHandlePtr, Arg.AccessMode, Device,
nullptr, 0u));
}
ZE2UR_CALL(zeKernelSetArgumentValue,
(Kernel->ZeKernel, Arg.Index, Arg.Size, ZeHandlePtr));
(ZeKernel, Arg.Index, Arg.Size, ZeHandlePtr));
}
Kernel->PendingArguments.clear();
PendingArguments.clear();

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -951,21 +954,29 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer,
ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET;

auto Platform = CommandBuffer->Context->getPlatform();
auto ZeDevice = CommandBuffer->Device->ZeDevice;

if (NumKernelAlternatives > 0) {
ZeMutableCommandDesc.flags |=
ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION;

std::vector<ze_kernel_handle_t> TranslatedKernelHandles(
NumKernelAlternatives + 1, nullptr);

ze_kernel_handle_t ZeMainKernel{};
UR_CALL(getZeKernel(ZeDevice, Kernel, &ZeMainKernel));

// Translate main kernel first
ZE2UR_CALL(zelLoaderTranslateHandle,
(ZEL_HANDLE_KERNEL, Kernel->ZeKernel,
(ZEL_HANDLE_KERNEL, ZeMainKernel,
(void **)&TranslatedKernelHandles[0]));

for (size_t i = 0; i < NumKernelAlternatives; i++) {
ze_kernel_handle_t ZeAltKernel{};
UR_CALL(getZeKernel(ZeDevice, KernelAlternatives[i], &ZeAltKernel));

ZE2UR_CALL(zelLoaderTranslateHandle,
(ZEL_HANDLE_KERNEL, KernelAlternatives[i]->ZeKernel,
(ZEL_HANDLE_KERNEL, ZeAltKernel,
(void **)&TranslatedKernelHandles[i + 1]));
}

Expand Down Expand Up @@ -1022,23 +1033,28 @@ ur_result_t urCommandBufferAppendKernelLaunchExp(
std::scoped_lock<ur_shared_mutex, ur_shared_mutex, ur_shared_mutex> Lock(
Kernel->Mutex, Kernel->Program->Mutex, CommandBuffer->Mutex);

auto Device = CommandBuffer->Device;
ze_kernel_handle_t ZeKernel{};
UR_CALL(getZeKernel(Device->ZeDevice, Kernel, &ZeKernel));

if (GlobalWorkOffset != NULL) {
UR_CALL(setKernelGlobalOffset(CommandBuffer->Context, Kernel->ZeKernel,
WorkDim, GlobalWorkOffset));
UR_CALL(setKernelGlobalOffset(CommandBuffer->Context, ZeKernel, WorkDim,
GlobalWorkOffset));
}

// If there are any pending arguments set them now.
if (!Kernel->PendingArguments.empty()) {
UR_CALL(setKernelPendingArguments(CommandBuffer, Kernel));
UR_CALL(
setKernelPendingArguments(Device, Kernel->PendingArguments, ZeKernel));
}

ze_group_count_t ZeThreadGroupDimensions{1, 1, 1};
uint32_t WG[3];
UR_CALL(calculateKernelWorkDimensions(Kernel->ZeKernel, CommandBuffer->Device,
UR_CALL(calculateKernelWorkDimensions(ZeKernel, Device,
ZeThreadGroupDimensions, WG, WorkDim,
GlobalWorkSize, LocalWorkSize));

ZE2UR_CALL(zeKernelSetGroupSize, (Kernel->ZeKernel, WG[0], WG[1], WG[2]));
ZE2UR_CALL(zeKernelSetGroupSize, (ZeKernel, WG[0], WG[1], WG[2]));

CommandBuffer->KernelsList.push_back(Kernel);
for (size_t i = 0; i < NumKernelAlternatives; i++) {
Expand All @@ -1063,7 +1079,7 @@ ur_result_t urCommandBufferAppendKernelLaunchExp(
SyncPointWaitList, false, RetSyncPoint, ZeEventList, ZeLaunchEvent));

ZE2UR_CALL(zeCommandListAppendLaunchKernel,
(CommandBuffer->ZeComputeCommandList, Kernel->ZeKernel,
(CommandBuffer->ZeComputeCommandList, ZeKernel,
&ZeThreadGroupDimensions, ZeLaunchEvent, ZeEventList.size(),
getPointerFromVector(ZeEventList)));

Expand Down Expand Up @@ -1836,6 +1852,7 @@ ur_result_t updateKernelCommand(
const auto CommandBuffer = Command->CommandBuffer;
const void *NextDesc = nullptr;
auto Platform = CommandBuffer->Context->getPlatform();
auto ZeDevice = CommandBuffer->Device->ZeDevice;

uint32_t Dim = CommandDesc->newWorkDim;
size_t *NewGlobalWorkOffset = CommandDesc->pNewGlobalWorkOffset;
Expand All @@ -1844,11 +1861,14 @@ ur_result_t updateKernelCommand(

// Kernel handle must be updated first for a given CommandId if required
ur_kernel_handle_t NewKernel = CommandDesc->hNewKernel;

if (NewKernel && Command->Kernel != NewKernel) {
ze_kernel_handle_t ZeNewKernel{};
UR_CALL(getZeKernel(ZeDevice, NewKernel, &ZeNewKernel));

ze_kernel_handle_t ZeKernelTranslated = nullptr;
ZE2UR_CALL(
zelLoaderTranslateHandle,
(ZEL_HANDLE_KERNEL, NewKernel->ZeKernel, (void **)&ZeKernelTranslated));
ZE2UR_CALL(zelLoaderTranslateHandle,
(ZEL_HANDLE_KERNEL, ZeNewKernel, (void **)&ZeKernelTranslated));

ZE2UR_CALL(Platform->ZeMutableCmdListExt
.zexCommandListUpdateMutableCommandKernelsExp,
Expand Down Expand Up @@ -1905,10 +1925,13 @@ ur_result_t updateKernelCommand(
// by the driver for the kernel.
bool UpdateWGSize = NewLocalWorkSize == nullptr;

ze_kernel_handle_t ZeKernel{};
UR_CALL(getZeKernel(ZeDevice, Command->Kernel, &ZeKernel));

uint32_t WG[3];
UR_CALL(calculateKernelWorkDimensions(
Command->Kernel->ZeKernel, CommandBuffer->Device,
ZeThreadGroupDimensions, WG, Dim, NewGlobalWorkSize, NewLocalWorkSize));
UR_CALL(calculateKernelWorkDimensions(ZeKernel, CommandBuffer->Device,
ZeThreadGroupDimensions, WG, Dim,
NewGlobalWorkSize, NewLocalWorkSize));

auto MutableGroupCountDesc =
std::make_unique<ZeStruct<ze_mutable_group_count_exp_desc_t>>();
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/level_zero/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ ZeUSMImportExtension ZeUSMImport;

std::map<std::string, int> *ZeCallCount = nullptr;

inline void zeParseError(ze_result_t ZeError, const char *&ErrorString) {
void zeParseError(ze_result_t ZeError, const char *&ErrorString) {
switch (ZeError) {
#define ZE_ERRCASE(ERR) \
case ERR: \
Expand Down
3 changes: 3 additions & 0 deletions source/adapters/level_zero/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@ bool setEnvVar(const char *name, const char *value);
// Map Level Zero runtime error code to UR error code.
ur_result_t ze2urResult(ze_result_t ZeResult);

// Parse Level Zero error code and return the error string.
void zeParseError(ze_result_t ZeError, const char *&ErrorString);

// Trace a call to Level-Zero RT
#define ZE2UR_CALL(ZeName, ZeArgs) \
{ \
Expand Down
6 changes: 6 additions & 0 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,9 +654,15 @@ ur_result_t urDeviceGetInfo(
return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 4);
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE:
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE:
// Must return 0 for *vector_width_double* if the device does not have fp64.
if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64))
return ReturnValue(uint32_t{0});
return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 8);
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF:
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF:
// Must return 0 for *vector_width_half* if the device does not have fp16.
if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16))
return ReturnValue(uint32_t{0});
return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 2);
case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: {
// Max_num_sub_Groups = maxTotalGroupSize/min(set of subGroupSizes);
Expand Down
5 changes: 4 additions & 1 deletion source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ ur_result_t urEnqueueEventsWait(
std::unique_lock<ur_shared_mutex> Lock(Queue->Mutex);
resetCommandLists(Queue);
}
if (OutEvent && (*OutEvent)->Completed) {
UR_CALL(CleanupCompletedEvent((*OutEvent), false, false));
UR_CALL(urEventReleaseInternal((*OutEvent)));
}

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -955,7 +959,6 @@ ur_result_t urEventCreateWithNativeHandle(
UREvent = new ur_event_handle_t_(ZeEvent, nullptr /* ZeEventPool */,
Context, UR_EXT_COMMAND_TYPE_USER,
Properties->isNativeHandleOwned);

UREvent->RefCountExternal++;

} catch (const std::bad_alloc &) {
Expand Down
6 changes: 3 additions & 3 deletions source/adapters/level_zero/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1237,7 +1237,7 @@ ur_result_t urBindlessImagesImportExternalSemaphoreExp(
}

ZE2UR_CALL(UrPlatform->ZeExternalSemaphoreExt.zexImportExternalSemaphoreExp,
(hDevice->ZeDevice, &ExtSemaphoreHandle, &SemDesc));
(hDevice->ZeDevice, &SemDesc, &ExtSemaphoreHandle));
*phExternalSemaphoreHandle =
(ur_exp_external_semaphore_handle_t)ExtSemaphoreHandle;

Expand Down Expand Up @@ -1310,7 +1310,7 @@ ur_result_t urBindlessImagesWaitExternalSemaphoreExp(
reinterpret_cast<ze_intel_external_semaphore_exp_handle_t>(hSemaphore);
ZE2UR_CALL(UrPlatform->ZeExternalSemaphoreExt
.zexCommandListAppendWaitExternalSemaphoresExp,
(ZeCommandList, &hExtSemaphore, &WaitParams, 1, ZeEvent,
(ZeCommandList, 1, &hExtSemaphore, &WaitParams, ZeEvent,
WaitList.Length, WaitList.ZeEventList));

return UR_RESULT_SUCCESS;
Expand Down Expand Up @@ -1373,7 +1373,7 @@ ur_result_t urBindlessImagesSignalExternalSemaphoreExp(

ZE2UR_CALL(UrPlatform->ZeExternalSemaphoreExt
.zexCommandListAppendSignalExternalSemaphoresExp,
(ZeCommandList, &hExtSemaphore, &SignalParams, 1, ZeEvent,
(ZeCommandList, 1, &hExtSemaphore, &SignalParams, ZeEvent,
WaitList.Length, WaitList.ZeEventList));

return UR_RESULT_SUCCESS;
Expand Down
12 changes: 6 additions & 6 deletions source/adapters/level_zero/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,17 @@ struct ur_platform_handle_t_ : public _ur_platform {
struct ZeExternalSemaphoreExtension {
bool Supported = false;
ze_result_t (*zexImportExternalSemaphoreExp)(
ze_device_handle_t, ze_intel_external_semaphore_exp_handle_t *,
const ze_intel_external_semaphore_exp_desc_t *);
ze_device_handle_t, const ze_intel_external_semaphore_exp_desc_t *,
ze_intel_external_semaphore_exp_handle_t *);
ze_result_t (*zexCommandListAppendWaitExternalSemaphoresExp)(
ze_command_list_handle_t,
ze_command_list_handle_t, unsigned int,
const ze_intel_external_semaphore_exp_handle_t *,
const ze_intel_external_semaphore_wait_exp_params_t *, unsigned int,
const ze_intel_external_semaphore_wait_exp_params_t *,
ze_event_handle_t, uint32_t, ze_event_handle_t *);
ze_result_t (*zexCommandListAppendSignalExternalSemaphoresExp)(
ze_command_list_handle_t,
ze_command_list_handle_t, size_t,
const ze_intel_external_semaphore_exp_handle_t *,
const ze_intel_external_semaphore_signal_exp_params_t *, size_t,
const ze_intel_external_semaphore_signal_exp_params_t *,
ze_event_handle_t, uint32_t, ze_event_handle_t *);
ze_result_t (*zexDeviceReleaseExternalSemaphoreExp)(
ze_intel_external_semaphore_exp_handle_t);
Expand Down
10 changes: 10 additions & 0 deletions source/loader/layers/sanitizer/asan/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,12 @@ ur_result_t AsanInterceptor::unregisterProgram(ur_program_handle_t Program) {
auto ProgramInfo = getProgramInfo(Program);
assert(ProgramInfo != nullptr && "unregistered program!");

std::scoped_lock<ur_shared_mutex> Guard(m_AllocationMapMutex);
for (auto AI : ProgramInfo->AllocInfoForGlobals) {
m_AllocationMap.erase(AI->AllocBegin);
}
ProgramInfo->AllocInfoForGlobals.clear();

ProgramInfo->InstrumentedKernels.clear();

return UR_RESULT_SUCCESS;
Expand Down Expand Up @@ -549,6 +555,10 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
{}});

ContextInfo->insertAllocInfo({Device}, AI);
ProgramInfo->AllocInfoForGlobals.emplace(AI);

std::scoped_lock<ur_shared_mutex> Guard(m_AllocationMapMutex);
m_AllocationMap.emplace(AI->AllocBegin, std::move(AI));
}
}

Expand Down
1 change: 1 addition & 0 deletions source/loader/layers/sanitizer/asan/asan_interceptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ struct ProgramInfo {
std::atomic<int32_t> RefCount = 1;

// Program is built only once, so we don't need to lock it
std::unordered_set<std::shared_ptr<AllocInfo>> AllocInfoForGlobals;
std::unordered_set<std::string> InstrumentedKernels;

explicit ProgramInfo(ur_program_handle_t Program) : Handle(Program) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
urProgramSetSpecializationConstantsTest.InvalidValueSize/*
urProgramSetSpecializationConstantsTest.InvalidValueId/*
urProgramSetSpecializationConstantsTest.InvalidValuePtr/*
{{OPT}}urMultiDeviceCommandBufferExpTest.*
Loading
Loading