From 4f28de5ad0faddd8c54ed28ef479d1741f59df4a Mon Sep 17 00:00:00 2001 From: "Zhao, Maosu" Date: Sun, 27 Oct 2024 19:43:30 -0700 Subject: [PATCH 1/4] [DeviceASAN] Use device usm to sync asan runtime data instead of shared usm Shared USM has poor performance, change it to device USM will benefit several benchmarks. --- .../layers/sanitizer/asan_interceptor.cpp | 133 +++++++++--------- .../layers/sanitizer/asan_interceptor.hpp | 66 +++++++-- .../layers/sanitizer/asan_libdevice.hpp | 3 +- source/loader/layers/sanitizer/ur_sanddi.cpp | 7 +- 4 files changed, 127 insertions(+), 82 deletions(-) diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp index 4a315588fd..e023208b59 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -243,7 +243,7 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context, ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, - USMLaunchInfo &LaunchInfo) { + LaunchInfo &LaunchInfo) { auto Context = GetContext(Queue); auto Device = GetDevice(Queue); auto ContextInfo = getContextInfo(Context); @@ -268,12 +268,14 @@ ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, ur_result_t SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, - USMLaunchInfo &LaunchInfo) { + LaunchInfo &LaunchInfo) { // FIXME: We must use block operation here, until we support urEventSetCallback auto Result = getContext()->urDdiTable.Queue.pfnFinish(Queue); + UR_CALL(LaunchInfo.Data.syncFromDevice(Queue)); + if (Result == UR_RESULT_SUCCESS) { - for (const auto &AH : LaunchInfo.Data->SanitizerReport) { + for (const auto &AH : LaunchInfo.Data.Host.SanitizerReport) { if (!AH.Flag) { continue; } @@ -600,7 +602,7 @@ SanitizerInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) { ur_result_t SanitizerInterceptor::prepareLaunch( std::shared_ptr &ContextInfo, std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue, - ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) { + ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) { do { auto KernelInfo = getKernelInfo(Kernel); @@ -635,27 +637,6 @@ ur_result_t SanitizerInterceptor::prepareLaunch( } } - // Set launch info argument - auto ArgNums = GetKernelNumArgs(Kernel); - if (ArgNums) { - getContext()->logger.debug( - "launch_info {} (numLocalArgs={}, localArgs={})", - (void *)LaunchInfo.Data, LaunchInfo.Data->NumLocalArgs, - (void *)LaunchInfo.Data->LocalArgs); - ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( - Kernel, ArgNums - 1, nullptr, LaunchInfo.Data); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error("Failed to set launch info: {}", - URes); - return URes; - } - } - - LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; - LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; - LaunchInfo.Data->DeviceTy = DeviceInfo->Type; - LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0; - if (LaunchInfo.LocalWorkSize.empty()) { LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim); auto URes = @@ -682,6 +663,32 @@ ur_result_t SanitizerInterceptor::prepareLaunch( LocalWorkSize[Dim]; } + // Set launch info argument + auto ArgNums = GetKernelNumArgs(Kernel); + if (ArgNums == 0) { + return UR_RESULT_SUCCESS; + } + + LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; + LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; + LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type; + LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0; + + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr, + sizeof(LaunchInfo), (void **)&LaunchInfo.Data.DevicePtr)); + getContext()->logger.debug( + "launch_info {} (numLocalArgs={}, localArgs={})", + (void *)LaunchInfo.Data.DevicePtr, + LaunchInfo.Data.Host.NumLocalArgs, + (void *)LaunchInfo.Data.Host.LocalArgs); + ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( + Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.DevicePtr); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("Failed to set launch info: {}", URes); + return URes; + } + auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle, Device = DeviceInfo->Handle, Queue](size_t Size, uptr &Ptr) { @@ -730,7 +737,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch( if (EnqueueAllocateShadowMemory( LocalShadowMemorySize, - LaunchInfo.Data->LocalShadowOffset) != + LaunchInfo.Data.Host.LocalShadowOffset) != UR_RESULT_SUCCESS) { getContext()->logger.warning( "Failed to allocate shadow memory for local " @@ -741,8 +748,8 @@ ur_result_t SanitizerInterceptor::prepareLaunch( "Skip checking local memory of kernel <{}>", GetKernelName(Kernel)); } else { - LaunchInfo.Data->LocalShadowOffsetEnd = - LaunchInfo.Data->LocalShadowOffset + + LaunchInfo.Data.Host.LocalShadowOffsetEnd = + LaunchInfo.Data.Host.LocalShadowOffset + LocalShadowMemorySize - 1; ContextInfo->Stats.UpdateShadowMalloced( @@ -750,8 +757,8 @@ ur_result_t SanitizerInterceptor::prepareLaunch( getContext()->logger.info( "ShadowMemory(Local, {} - {})", - (void *)LaunchInfo.Data->LocalShadowOffset, - (void *)LaunchInfo.Data->LocalShadowOffsetEnd); + (void *)LaunchInfo.Data.Host.LocalShadowOffset, + (void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd); } } } @@ -759,7 +766,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch( // Write shadow memory offset for private memory if (getOptions().DetectPrivates) { if (DeviceInfo->Type == DeviceType::CPU) { - LaunchInfo.Data->PrivateShadowOffset = + LaunchInfo.Data.Host.PrivateShadowOffset = DeviceInfo->Shadow->ShadowBegin; } else if (DeviceInfo->Type == DeviceType::GPU_PVC || DeviceInfo->Type == DeviceType::GPU_DG2) { @@ -772,7 +779,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch( if (EnqueueAllocateShadowMemory( PrivateShadowMemorySize, - LaunchInfo.Data->PrivateShadowOffset) != + LaunchInfo.Data.Host.PrivateShadowOffset) != UR_RESULT_SUCCESS) { getContext()->logger.warning( "Failed to allocate shadow memory for private " @@ -783,8 +790,8 @@ ur_result_t SanitizerInterceptor::prepareLaunch( "Skip checking private memory of kernel <{}>", GetKernelName(Kernel)); } else { - LaunchInfo.Data->PrivateShadowOffsetEnd = - LaunchInfo.Data->PrivateShadowOffset + + LaunchInfo.Data.Host.PrivateShadowOffsetEnd = + LaunchInfo.Data.Host.PrivateShadowOffset + PrivateShadowMemorySize - 1; ContextInfo->Stats.UpdateShadowMalloced( @@ -792,11 +799,14 @@ ur_result_t SanitizerInterceptor::prepareLaunch( getContext()->logger.info( "ShadowMemory(Private, {} - {})", - (void *)LaunchInfo.Data->PrivateShadowOffset, - (void *)LaunchInfo.Data->PrivateShadowOffsetEnd); + (void *)LaunchInfo.Data.Host.PrivateShadowOffset, + (void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd); } } } + + // Prepare launch info for device side + UR_CALL(LaunchInfo.Data.syncToDevice(Queue)); } while (false); return UR_RESULT_SUCCESS; @@ -848,61 +858,52 @@ ContextInfo::~ContextInfo() { } } -ur_result_t USMLaunchInfo::initialize() { - UR_CALL(getContext()->urDdiTable.Context.pfnRetain(Context)); - UR_CALL(getContext()->urDdiTable.Device.pfnRetain(Device)); - UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( - Context, Device, nullptr, nullptr, sizeof(LaunchInfo), (void **)&Data)); - *Data = LaunchInfo{}; - return UR_RESULT_SUCCESS; -} - -ur_result_t USMLaunchInfo::updateKernelInfo(const KernelInfo &KI) { - auto NumArgs = KI.LocalArgs.size(); - if (NumArgs) { - Data->NumLocalArgs = NumArgs; - UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( - Context, Device, nullptr, nullptr, sizeof(LocalArgsInfo) * NumArgs, - (void **)&Data->LocalArgs)); - uint32_t i = 0; +ur_result_t LaunchInfo::updateKernelInfo(const KernelInfo &KI) { + if (!KI.LocalArgs.empty()) { + std::vector LocalArgsInfo; for (auto [ArgIndex, ArgInfo] : KI.LocalArgs) { - Data->LocalArgs[i++] = ArgInfo; + LocalArgsInfo.push_back(ArgInfo); getContext()->logger.debug( "local_args (argIndex={}, size={}, sizeWithRZ={})", ArgIndex, ArgInfo.Size, ArgInfo.SizeWithRedZone); } + ManagedQueue Queue(Context, Device); + UR_CALL( + Data.importLocalArgsInfo(Context, Device, Queue, LocalArgsInfo)); } return UR_RESULT_SUCCESS; } -USMLaunchInfo::~USMLaunchInfo() { +LaunchInfo::~LaunchInfo() { [[maybe_unused]] ur_result_t Result; - if (Data) { + if (Data.DevicePtr) { auto Type = GetDeviceType(Context, Device); auto ContextInfo = getContext()->interceptor->getContextInfo(Context); if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) { - if (Data->PrivateShadowOffset) { + if (Data.Host.PrivateShadowOffset) { ContextInfo->Stats.UpdateShadowFreed( - Data->PrivateShadowOffsetEnd - Data->PrivateShadowOffset + - 1); + Data.Host.PrivateShadowOffsetEnd - + Data.Host.PrivateShadowOffset + 1); Result = getContext()->urDdiTable.USM.pfnFree( - Context, (void *)Data->PrivateShadowOffset); + Context, (void *)Data.Host.PrivateShadowOffset); assert(Result == UR_RESULT_SUCCESS); } - if (Data->LocalShadowOffset) { + if (Data.Host.LocalShadowOffset) { ContextInfo->Stats.UpdateShadowFreed( - Data->LocalShadowOffsetEnd - Data->LocalShadowOffset + 1); + Data.Host.LocalShadowOffsetEnd - + Data.Host.LocalShadowOffset + 1); Result = getContext()->urDdiTable.USM.pfnFree( - Context, (void *)Data->LocalShadowOffset); + Context, (void *)Data.Host.LocalShadowOffset); assert(Result == UR_RESULT_SUCCESS); } } - if (Data->LocalArgs) { + if (Data.Host.LocalArgs) { Result = getContext()->urDdiTable.USM.pfnFree( - Context, (void *)Data->LocalArgs); + Context, (void *)Data.Host.LocalArgs); assert(Result == UR_RESULT_SUCCESS); } - Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data); + Result = getContext()->urDdiTable.USM.pfnFree(Context, + (void *)Data.DevicePtr); assert(Result == UR_RESULT_SUCCESS); } Result = getContext()->urDdiTable.Context.pfnRelease(Context); diff --git a/source/loader/layers/sanitizer/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan_interceptor.hpp index e5429acd56..5d70f09cda 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan_interceptor.hpp @@ -154,8 +154,50 @@ struct ContextInfo { } }; -struct USMLaunchInfo { - LaunchInfo *Data = nullptr; +struct AsanRuntimeDataWrapper { + AsanRuntimeData Host{}; + + AsanRuntimeData *DevicePtr = nullptr; + + ur_result_t syncFromDevice(ur_queue_handle_t Queue) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, ur_cast(&Host), DevicePtr, + sizeof(AsanRuntimeData), 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; + } + + ur_result_t syncToDevice(ur_queue_handle_t Queue) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, DevicePtr, ur_cast(&Host), + sizeof(AsanRuntimeData), 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; + } + + ur_result_t + importLocalArgsInfo(ur_context_handle_t Context, ur_device_handle_t Device, + ur_queue_handle_t Queue, + const std::vector &LocalArgs) { + assert(!LocalArgs.empty()); + + Host.NumLocalArgs = LocalArgs.size(); + const size_t LocalArgsInfoSize = + sizeof(LocalArgsInfo) * Host.NumLocalArgs; + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, nullptr, nullptr, LocalArgsInfoSize, + ur_cast(&Host.LocalArgs))); + + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, Host.LocalArgs, &LocalArgs[0], LocalArgsInfoSize, 0, + nullptr, nullptr)); + + return UR_RESULT_SUCCESS; + } +}; + +struct LaunchInfo { + AsanRuntimeDataWrapper Data{}; ur_context_handle_t Context = nullptr; ur_device_handle_t Device = nullptr; @@ -164,19 +206,23 @@ struct USMLaunchInfo { std::vector LocalWorkSize; uint32_t WorkDim = 0; - USMLaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize, - const size_t *GlobalWorkOffset, uint32_t WorkDim) + LaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device, + const size_t *GlobalWorkSize, const size_t *LocalWorkSize, + const size_t *GlobalWorkOffset, uint32_t WorkDim) : Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize), GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) { if (LocalWorkSize) { this->LocalWorkSize = std::vector(LocalWorkSize, LocalWorkSize + WorkDim); } + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Context.pfnRetain(Context); + assert(Result == UR_RESULT_SUCCESS); + Result = getContext()->urDdiTable.Device.pfnRetain(Device); + assert(Result == UR_RESULT_SUCCESS); } - ~USMLaunchInfo(); + ~LaunchInfo(); - ur_result_t initialize(); ur_result_t updateKernelInfo(const KernelInfo &KI); }; @@ -206,11 +252,11 @@ class SanitizerInterceptor { ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, - USMLaunchInfo &LaunchInfo); + LaunchInfo &LaunchInfo); ur_result_t postLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, - USMLaunchInfo &LaunchInfo); + LaunchInfo &LaunchInfo); ur_result_t insertContext(ur_context_handle_t Context, std::shared_ptr &CI); @@ -285,7 +331,7 @@ class SanitizerInterceptor { std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue, ur_kernel_handle_t Kernel, - USMLaunchInfo &LaunchInfo); + LaunchInfo &LaunchInfo); ur_result_t allocShadowMemory(ur_context_handle_t Context, std::shared_ptr &DeviceInfo); diff --git a/source/loader/layers/sanitizer/asan_libdevice.hpp b/source/loader/layers/sanitizer/asan_libdevice.hpp index 8eba929f34..db2df0ff0f 100644 --- a/source/loader/layers/sanitizer/asan_libdevice.hpp +++ b/source/loader/layers/sanitizer/asan_libdevice.hpp @@ -71,10 +71,9 @@ struct LocalArgsInfo { constexpr std::size_t ASAN_MAX_NUM_REPORTS = 10; -struct LaunchInfo { +struct AsanRuntimeData { uintptr_t GlobalShadowOffset = 0; uintptr_t GlobalShadowOffsetEnd = 0; - uintptr_t PrivateShadowOffset = 0; uintptr_t PrivateShadowOffsetEnd = 0; diff --git a/source/loader/layers/sanitizer/ur_sanddi.cpp b/source/loader/layers/sanitizer/ur_sanddi.cpp index 95b1649691..d65a51212b 100644 --- a/source/loader/layers/sanitizer/ur_sanddi.cpp +++ b/source/loader/layers/sanitizer/ur_sanddi.cpp @@ -458,10 +458,9 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( getContext()->logger.debug("==== urEnqueueKernelLaunch"); - USMLaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), - pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset, - workDim); - UR_CALL(LaunchInfo.initialize()); + LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), + pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset, + workDim); UR_CALL(getContext()->interceptor->preLaunchKernel(hKernel, hQueue, LaunchInfo)); From aca9048e3fb4edea236caf2a7a01a05f16af1e99 Mon Sep 17 00:00:00 2001 From: "Zhao, Maosu" Date: Sun, 27 Oct 2024 20:34:22 -0700 Subject: [PATCH 2/4] format --- source/loader/layers/sanitizer/asan_interceptor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp index e023208b59..fbbd1e2067 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -669,8 +669,10 @@ ur_result_t SanitizerInterceptor::prepareLaunch( return UR_RESULT_SUCCESS; } - LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; - LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; + LaunchInfo.Data.Host.GlobalShadowOffset = + DeviceInfo->Shadow->ShadowBegin; + LaunchInfo.Data.Host.GlobalShadowOffsetEnd = + DeviceInfo->Shadow->ShadowEnd; LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type; LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0; From 47e04d5f959a578a511b523f1aa1170e9b887ae1 Mon Sep 17 00:00:00 2001 From: "Zhao, Maosu" Date: Mon, 28 Oct 2024 01:09:27 -0700 Subject: [PATCH 3/4] Refine some code --- .../layers/sanitizer/asan_interceptor.cpp | 113 ++++++++---------- .../layers/sanitizer/asan_interceptor.hpp | 40 +++++-- 2 files changed, 84 insertions(+), 69 deletions(-) diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp index fbbd1e2067..73d88a4886 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -248,9 +248,6 @@ ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, auto Device = GetDevice(Queue); auto ContextInfo = getContextInfo(Context); auto DeviceInfo = getDeviceInfo(Device); - auto KernelInfo = getKernelInfo(Kernel); - - UR_CALL(LaunchInfo.updateKernelInfo(*KernelInfo.get())); ManagedQueue InternalQueue(Context, Device); if (!InternalQueue) { @@ -663,12 +660,12 @@ ur_result_t SanitizerInterceptor::prepareLaunch( LocalWorkSize[Dim]; } - // Set launch info argument auto ArgNums = GetKernelNumArgs(Kernel); if (ArgNums == 0) { return UR_RESULT_SUCCESS; } + // Prepare asan runtime data LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; LaunchInfo.Data.Host.GlobalShadowOffsetEnd = @@ -676,21 +673,6 @@ ur_result_t SanitizerInterceptor::prepareLaunch( LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type; LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0; - UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( - ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr, - sizeof(LaunchInfo), (void **)&LaunchInfo.Data.DevicePtr)); - getContext()->logger.debug( - "launch_info {} (numLocalArgs={}, localArgs={})", - (void *)LaunchInfo.Data.DevicePtr, - LaunchInfo.Data.Host.NumLocalArgs, - (void *)LaunchInfo.Data.Host.LocalArgs); - ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( - Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.DevicePtr); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error("Failed to set launch info: {}", URes); - return URes; - } - auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle, Device = DeviceInfo->Handle, Queue](size_t Size, uptr &Ptr) { @@ -807,8 +789,34 @@ ur_result_t SanitizerInterceptor::prepareLaunch( } } - // Prepare launch info for device side + // Write local arguments info + if (!KernelInfo->LocalArgs.empty()) { + std::vector LocalArgsInfo; + for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs) { + LocalArgsInfo.push_back(ArgInfo); + getContext()->logger.debug( + "local_args (argIndex={}, size={}, sizeWithRZ={})", + ArgIndex, ArgInfo.Size, ArgInfo.SizeWithRedZone); + } + UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo)); + } + + // sync asan runtime data to device side UR_CALL(LaunchInfo.Data.syncToDevice(Queue)); + + // set kernel argument + ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( + Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr()); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("Failed to set launch info: {}", URes); + return URes; + } + + getContext()->logger.debug( + "launch_info {} (numLocalArgs={}, localArgs={})", + (void *)LaunchInfo.Data.getDevicePtr(), + LaunchInfo.Data.Host.NumLocalArgs, + (void *)LaunchInfo.Data.Host.LocalArgs); } while (false); return UR_RESULT_SUCCESS; @@ -860,54 +868,39 @@ ContextInfo::~ContextInfo() { } } -ur_result_t LaunchInfo::updateKernelInfo(const KernelInfo &KI) { - if (!KI.LocalArgs.empty()) { - std::vector LocalArgsInfo; - for (auto [ArgIndex, ArgInfo] : KI.LocalArgs) { - LocalArgsInfo.push_back(ArgInfo); - getContext()->logger.debug( - "local_args (argIndex={}, size={}, sizeWithRZ={})", ArgIndex, - ArgInfo.Size, ArgInfo.SizeWithRedZone); - } - ManagedQueue Queue(Context, Device); - UR_CALL( - Data.importLocalArgsInfo(Context, Device, Queue, LocalArgsInfo)); - } - return UR_RESULT_SUCCESS; -} - -LaunchInfo::~LaunchInfo() { +AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() { [[maybe_unused]] ur_result_t Result; - if (Data.DevicePtr) { - auto Type = GetDeviceType(Context, Device); - auto ContextInfo = getContext()->interceptor->getContextInfo(Context); - if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) { - if (Data.Host.PrivateShadowOffset) { - ContextInfo->Stats.UpdateShadowFreed( - Data.Host.PrivateShadowOffsetEnd - - Data.Host.PrivateShadowOffset + 1); - Result = getContext()->urDdiTable.USM.pfnFree( - Context, (void *)Data.Host.PrivateShadowOffset); - assert(Result == UR_RESULT_SUCCESS); - } - if (Data.Host.LocalShadowOffset) { - ContextInfo->Stats.UpdateShadowFreed( - Data.Host.LocalShadowOffsetEnd - - Data.Host.LocalShadowOffset + 1); - Result = getContext()->urDdiTable.USM.pfnFree( - Context, (void *)Data.Host.LocalShadowOffset); - assert(Result == UR_RESULT_SUCCESS); - } + auto Type = GetDeviceType(Context, Device); + auto ContextInfo = getContext()->interceptor->getContextInfo(Context); + if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) { + if (Host.PrivateShadowOffset) { + ContextInfo->Stats.UpdateShadowFreed(Host.PrivateShadowOffsetEnd - + Host.PrivateShadowOffset + 1); + Result = getContext()->urDdiTable.USM.pfnFree( + Context, (void *)Host.PrivateShadowOffset); + assert(Result == UR_RESULT_SUCCESS); } - if (Data.Host.LocalArgs) { + if (Host.LocalShadowOffset) { + ContextInfo->Stats.UpdateShadowFreed(Host.LocalShadowOffsetEnd - + Host.LocalShadowOffset + 1); Result = getContext()->urDdiTable.USM.pfnFree( - Context, (void *)Data.Host.LocalArgs); + Context, (void *)Host.LocalShadowOffset); assert(Result == UR_RESULT_SUCCESS); } + } + if (Host.LocalArgs) { Result = getContext()->urDdiTable.USM.pfnFree(Context, - (void *)Data.DevicePtr); + (void *)Host.LocalArgs); assert(Result == UR_RESULT_SUCCESS); } + if (DevicePtr) { + Result = getContext()->urDdiTable.USM.pfnFree(Context, DevicePtr); + assert(Result == UR_RESULT_SUCCESS); + } +} + +LaunchInfo::~LaunchInfo() { + [[maybe_unused]] ur_result_t Result; Result = getContext()->urDdiTable.Context.pfnRelease(Context); assert(Result == UR_RESULT_SUCCESS); Result = getContext()->urDdiTable.Device.pfnRelease(Device); diff --git a/source/loader/layers/sanitizer/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan_interceptor.hpp index 5d70f09cda..2f2c112eb7 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan_interceptor.hpp @@ -159,9 +159,33 @@ struct AsanRuntimeDataWrapper { AsanRuntimeData *DevicePtr = nullptr; + ur_context_handle_t Context{}; + + ur_device_handle_t Device{}; + + AsanRuntimeDataWrapper(ur_context_handle_t Context, + ur_device_handle_t Device) + : Context(Context), Device(Device) {} + + ~AsanRuntimeDataWrapper(); + + AsanRuntimeData *getDevicePtr() { + if (DevicePtr == nullptr) { + ur_result_t Result = getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, nullptr, nullptr, sizeof(AsanRuntimeData), + (void **)&DevicePtr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to alloc device usm for asan runtime data: {}", + Result); + } + } + return DevicePtr; + } + ur_result_t syncFromDevice(ur_queue_handle_t Queue) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( - Queue, true, ur_cast(&Host), DevicePtr, + Queue, true, ur_cast(&Host), getDevicePtr(), sizeof(AsanRuntimeData), 0, nullptr, nullptr)); return UR_RESULT_SUCCESS; @@ -169,15 +193,14 @@ struct AsanRuntimeDataWrapper { ur_result_t syncToDevice(ur_queue_handle_t Queue) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( - Queue, true, DevicePtr, ur_cast(&Host), + Queue, true, getDevicePtr(), ur_cast(&Host), sizeof(AsanRuntimeData), 0, nullptr, nullptr)); return UR_RESULT_SUCCESS; } ur_result_t - importLocalArgsInfo(ur_context_handle_t Context, ur_device_handle_t Device, - ur_queue_handle_t Queue, + importLocalArgsInfo(ur_queue_handle_t Queue, const std::vector &LocalArgs) { assert(!LocalArgs.empty()); @@ -197,8 +220,6 @@ struct AsanRuntimeDataWrapper { }; struct LaunchInfo { - AsanRuntimeDataWrapper Data{}; - ur_context_handle_t Context = nullptr; ur_device_handle_t Device = nullptr; const size_t *GlobalWorkSize = nullptr; @@ -206,11 +227,14 @@ struct LaunchInfo { std::vector LocalWorkSize; uint32_t WorkDim = 0; + AsanRuntimeDataWrapper Data; + LaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, const size_t *GlobalWorkOffset, uint32_t WorkDim) : Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize), - GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) { + GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim), + Data(Context, Device) { if (LocalWorkSize) { this->LocalWorkSize = std::vector(LocalWorkSize, LocalWorkSize + WorkDim); @@ -222,8 +246,6 @@ struct LaunchInfo { assert(Result == UR_RESULT_SUCCESS); } ~LaunchInfo(); - - ur_result_t updateKernelInfo(const KernelInfo &KI); }; struct DeviceGlobalInfo { From 9564628a274164ef20ebd86b0806305dd333b289 Mon Sep 17 00:00:00 2001 From: Maosu Zhao Date: Tue, 29 Oct 2024 21:10:10 +0800 Subject: [PATCH 4/4] Fix failures on cpu device --- .../layers/sanitizer/asan_interceptor.cpp | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp index 73d88a4886..d16bbfca80 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -634,6 +634,20 @@ ur_result_t SanitizerInterceptor::prepareLaunch( } } + auto ArgNums = GetKernelNumArgs(Kernel); + // We must prepare all kernel args before call + // urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on + // CPU device. + if (ArgNums) { + ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( + Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr()); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("Failed to set launch info: {}", + URes); + return URes; + } + } + if (LaunchInfo.LocalWorkSize.empty()) { LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim); auto URes = @@ -660,11 +674,6 @@ ur_result_t SanitizerInterceptor::prepareLaunch( LocalWorkSize[Dim]; } - auto ArgNums = GetKernelNumArgs(Kernel); - if (ArgNums == 0) { - return UR_RESULT_SUCCESS; - } - // Prepare asan runtime data LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; @@ -804,14 +813,6 @@ ur_result_t SanitizerInterceptor::prepareLaunch( // sync asan runtime data to device side UR_CALL(LaunchInfo.Data.syncToDevice(Queue)); - // set kernel argument - ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( - Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr()); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error("Failed to set launch info: {}", URes); - return URes; - } - getContext()->logger.debug( "launch_info {} (numLocalArgs={}, localArgs={})", (void *)LaunchInfo.Data.getDevicePtr(),