Skip to content

Commit

Permalink
Merge pull request #2249 from zhaomaosu/use-device-usm-for-rtl-data
Browse files Browse the repository at this point in the history
[DeviceASAN] Use device usm to sync asan runtime data instead of shared usm
  • Loading branch information
martygrant authored Dec 5, 2024
2 parents 1851eff + cde0d4c commit a172cde
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 99 deletions.
7 changes: 3 additions & 4 deletions source/loader/layers/sanitizer/asan/asan_ddi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,10 +471,9 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
numEventsInWaitList, phEventWaitList, phEvent);
}

USMLaunchInfo LaunchInfo(GetContext(hKernel), GetDevice(hQueue),
pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset,
workDim);
UR_CALL(LaunchInfo.initialize());
LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue),
pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset,
workDim);

UR_CALL(getAsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo));

Expand Down
156 changes: 76 additions & 80 deletions source/loader/layers/sanitizer/asan/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,15 +249,11 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context,

ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
USMLaunchInfo &LaunchInfo) {
LaunchInfo &LaunchInfo) {
auto Context = GetContext(Queue);
auto Device = GetDevice(Queue);
auto ContextInfo = getContextInfo(Context);
auto DeviceInfo = getDeviceInfo(Device);
auto KernelInfo = getKernelInfo(Kernel);
assert(KernelInfo && "Kernel should be instrumented");

UR_CALL(LaunchInfo.updateKernelInfo(*KernelInfo.get()));

ManagedQueue InternalQueue(Context, Device);
if (!InternalQueue) {
Expand All @@ -275,12 +271,14 @@ ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,

ur_result_t AsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
USMLaunchInfo &LaunchInfo) {
LaunchInfo &LaunchInfo) {
// FIXME: We must use block operation here, until we support urEventSetCallback
auto Result = getContext()->urDdiTable.Queue.pfnFinish(Queue);

UR_CALL(LaunchInfo.Data.syncFromDevice(Queue));

if (Result == UR_RESULT_SUCCESS) {
for (const auto &Report : LaunchInfo.Data->Report) {
for (const auto &Report : LaunchInfo.Data.Host.Report) {
if (!Report.Flag) {
continue;
}
Expand Down Expand Up @@ -685,7 +683,7 @@ AsanInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) {
ur_result_t AsanInterceptor::prepareLaunch(
std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) {
ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) {

do {
auto KernelInfo = getKernelInfo(Kernel);
Expand Down Expand Up @@ -721,27 +719,20 @@ ur_result_t AsanInterceptor::prepareLaunch(
}
}

// Set launch info argument
auto ArgNums = GetKernelNumArgs(Kernel);
// We must prepare all kernel args before call
// urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
// CPU device.
if (ArgNums) {
getContext()->logger.debug(
"launch_info {} (numLocalArgs={}, localArgs={})",
(void *)LaunchInfo.Data, LaunchInfo.Data->NumLocalArgs,
(void *)LaunchInfo.Data->LocalArgs);
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data);
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr());
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("Failed to set launch info: {}",
URes);
return URes;
}
}

LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
LaunchInfo.Data->DeviceTy = DeviceInfo->Type;
LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0;

if (LaunchInfo.LocalWorkSize.empty()) {
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
auto URes =
Expand All @@ -768,6 +759,14 @@ ur_result_t AsanInterceptor::prepareLaunch(
LocalWorkSize[Dim];
}

// Prepare asan runtime data
LaunchInfo.Data.Host.GlobalShadowOffset =
DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data.Host.GlobalShadowOffsetEnd =
DeviceInfo->Shadow->ShadowEnd;
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;

auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle,
Device = DeviceInfo->Handle,
Queue](size_t Size, uptr &Ptr) {
Expand Down Expand Up @@ -816,7 +815,7 @@ ur_result_t AsanInterceptor::prepareLaunch(

if (EnqueueAllocateShadowMemory(
LocalShadowMemorySize,
LaunchInfo.Data->LocalShadowOffset) !=
LaunchInfo.Data.Host.LocalShadowOffset) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for local "
Expand All @@ -827,25 +826,25 @@ ur_result_t AsanInterceptor::prepareLaunch(
"Skip checking local memory of kernel <{}>",
GetKernelName(Kernel));
} else {
LaunchInfo.Data->LocalShadowOffsetEnd =
LaunchInfo.Data->LocalShadowOffset +
LaunchInfo.Data.Host.LocalShadowOffsetEnd =
LaunchInfo.Data.Host.LocalShadowOffset +
LocalShadowMemorySize - 1;

ContextInfo->Stats.UpdateShadowMalloced(
LocalShadowMemorySize);

getContext()->logger.info(
"ShadowMemory(Local, {} - {})",
(void *)LaunchInfo.Data->LocalShadowOffset,
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
}
}
}

// Write shadow memory offset for private memory
if (getOptions().DetectPrivates) {
if (DeviceInfo->Type == DeviceType::CPU) {
LaunchInfo.Data->PrivateShadowOffset =
LaunchInfo.Data.Host.PrivateShadowOffset =
DeviceInfo->Shadow->ShadowBegin;
} else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
DeviceInfo->Type == DeviceType::GPU_DG2) {
Expand All @@ -858,7 +857,7 @@ ur_result_t AsanInterceptor::prepareLaunch(

if (EnqueueAllocateShadowMemory(
PrivateShadowMemorySize,
LaunchInfo.Data->PrivateShadowOffset) !=
LaunchInfo.Data.Host.PrivateShadowOffset) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for private "
Expand All @@ -869,20 +868,41 @@ ur_result_t AsanInterceptor::prepareLaunch(
"Skip checking private memory of kernel <{}>",
GetKernelName(Kernel));
} else {
LaunchInfo.Data->PrivateShadowOffsetEnd =
LaunchInfo.Data->PrivateShadowOffset +
LaunchInfo.Data.Host.PrivateShadowOffsetEnd =
LaunchInfo.Data.Host.PrivateShadowOffset +
PrivateShadowMemorySize - 1;

ContextInfo->Stats.UpdateShadowMalloced(
PrivateShadowMemorySize);

getContext()->logger.info(
"ShadowMemory(Private, {} - {})",
(void *)LaunchInfo.Data->PrivateShadowOffset,
(void *)LaunchInfo.Data->PrivateShadowOffsetEnd);
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
}
}
}

// Write local arguments info
if (!KernelInfo->LocalArgs.empty()) {
std::vector<LocalArgsInfo> LocalArgsInfo;
for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs) {
LocalArgsInfo.push_back(ArgInfo);
getContext()->logger.debug(
"local_args (argIndex={}, size={}, sizeWithRZ={})",
ArgIndex, ArgInfo.Size, ArgInfo.SizeWithRedZone);
}
UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo));
}

// sync asan runtime data to device side
UR_CALL(LaunchInfo.Data.syncToDevice(Queue));

getContext()->logger.debug(
"launch_info {} (numLocalArgs={}, localArgs={})",
(void *)LaunchInfo.Data.getDevicePtr(),
LaunchInfo.Data.Host.NumLocalArgs,
(void *)LaunchInfo.Data.Host.LocalArgs);
} while (false);

return UR_RESULT_SUCCESS;
Expand Down Expand Up @@ -942,63 +962,39 @@ ContextInfo::~ContextInfo() {
}
}

ur_result_t USMLaunchInfo::initialize() {
UR_CALL(getContext()->urDdiTable.Context.pfnRetain(Context));
UR_CALL(getContext()->urDdiTable.Device.pfnRetain(Device));
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
Context, Device, nullptr, nullptr, sizeof(LaunchInfo), (void **)&Data));
*Data = LaunchInfo{};
return UR_RESULT_SUCCESS;
}

ur_result_t USMLaunchInfo::updateKernelInfo(const KernelInfo &KI) {
auto NumArgs = KI.LocalArgs.size();
if (NumArgs) {
Data->NumLocalArgs = NumArgs;
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
Context, Device, nullptr, nullptr, sizeof(LocalArgsInfo) * NumArgs,
(void **)&Data->LocalArgs));
uint32_t i = 0;
for (auto [ArgIndex, ArgInfo] : KI.LocalArgs) {
Data->LocalArgs[i++] = ArgInfo;
getContext()->logger.debug(
"local_args (argIndex={}, size={}, sizeWithRZ={})", ArgIndex,
ArgInfo.Size, ArgInfo.SizeWithRedZone);
}
}
return UR_RESULT_SUCCESS;
}

USMLaunchInfo::~USMLaunchInfo() {
AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() {
[[maybe_unused]] ur_result_t Result;
if (Data) {
auto Type = GetDeviceType(Context, Device);
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
if (Data->PrivateShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(
Data->PrivateShadowOffsetEnd - Data->PrivateShadowOffset +
1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Data->PrivateShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
if (Data->LocalShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(
Data->LocalShadowOffsetEnd - Data->LocalShadowOffset + 1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Data->LocalShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
auto Type = GetDeviceType(Context, Device);
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
if (Host.PrivateShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(Host.PrivateShadowOffsetEnd -
Host.PrivateShadowOffset + 1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Host.PrivateShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
if (Data->LocalArgs) {
if (Host.LocalShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(Host.LocalShadowOffsetEnd -
Host.LocalShadowOffset + 1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Data->LocalArgs);
Context, (void *)Host.LocalShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data);
}
if (Host.LocalArgs) {
Result = getContext()->urDdiTable.USM.pfnFree(Context,
(void *)Host.LocalArgs);
assert(Result == UR_RESULT_SUCCESS);
}
if (DevicePtr) {
Result = getContext()->urDdiTable.USM.pfnFree(Context, DevicePtr);
assert(Result == UR_RESULT_SUCCESS);
}
}

LaunchInfo::~LaunchInfo() {
[[maybe_unused]] ur_result_t Result;
Result = getContext()->urDdiTable.Context.pfnRelease(Context);
assert(Result == UR_RESULT_SUCCESS);
Result = getContext()->urDdiTable.Device.pfnRelease(Device);
Expand Down
Loading

0 comments on commit a172cde

Please sign in to comment.