Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

5.0.x/opal/cuda: Handle stream-ordered allocations and assign primary device #12843

Open
wants to merge 1 commit into
base: v5.0.x
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 107 additions & 8 deletions opal/mca/accelerator/cuda/accelerator_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,96 @@ static int accelerator_cuda_get_device_id(CUcontext mem_ctx) {
return dev_id;
}

static int accelerator_cuda_check_mpool(CUdeviceptr dbuf, CUmemorytype *mem_type,
int *dev_id)
{
#if OPAL_CUDA_VMM_SUPPORT
static int device_count = -1;
static int mpool_supported = -1;
CUresult result;
CUmemoryPool mpool;
CUmemAccess_flags flags;
CUmemLocation location;

if (mpool_supported <= 0) {
if (mpool_supported == -1) {
if (device_count == -1) {
result = cuDeviceGetCount(&device_count);
if (result != CUDA_SUCCESS || (0 == device_count)) {
mpool_supported = 0; /* never check again */
device_count = 0;
return 0;
}
}

/* assume uniformity of devices */
result = cuDeviceGetAttribute(&mpool_supported,
CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, 0);
if (result != CUDA_SUCCESS) {
mpool_supported = 0;
}
}
if (0 == mpool_supported) {
return 0;
}
}

result = cuPointerGetAttribute(&mpool, CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE,
dbuf);
if (CUDA_SUCCESS != result) {
return 0;
}

/* check if device has access */
for (int i = 0; i < device_count; i++) {
location.type = CU_MEM_LOCATION_TYPE_DEVICE;
location.id = i;
result = cuMemPoolGetAccess(&flags, mpool, &location);
if ((CUDA_SUCCESS == result) &&
(CU_MEM_ACCESS_FLAGS_PROT_READWRITE == flags)) {
*mem_type = CU_MEMORYTYPE_DEVICE;
*dev_id = i;
return 1;
}
}

/* host must have access as device access possibility is exhausted */
*mem_type = CU_MEMORYTYPE_HOST;
*dev_id = MCA_ACCELERATOR_NO_DEVICE_ID;
return 0;
#endif

return 0;
}

static int accelerator_cuda_get_primary_context(CUdevice dev_id, CUcontext *pctx)
{
CUresult result;
unsigned int flags;
int active;

result = cuDevicePrimaryCtxGetState(dev_id, &flags, &active);
if (CUDA_SUCCESS != result) {
return OPAL_ERROR;
}

if (active) {
result = cuDevicePrimaryCtxRetain(pctx, dev_id);
return OPAL_SUCCESS;
}

return OPAL_ERROR;
}

static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *flags)
{
CUresult result;
int is_vmm = 0;
int is_mpool_ptr = 0;
int vmm_dev_id = MCA_ACCELERATOR_NO_DEVICE_ID;
int mpool_dev_id = MCA_ACCELERATOR_NO_DEVICE_ID;
CUmemorytype vmm_mem_type = 0;
CUmemorytype mpool_mem_type = 0;
CUmemorytype mem_type = 0;
CUdeviceptr dbuf = (CUdeviceptr) addr;
CUcontext ctx = NULL, mem_ctx = NULL;
Expand All @@ -177,6 +261,7 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
*flags = 0;

is_vmm = accelerator_cuda_check_vmm(dbuf, &vmm_mem_type, &vmm_dev_id);
is_mpool_ptr = accelerator_cuda_check_mpool(dbuf, &mpool_mem_type, &mpool_dev_id);

#if OPAL_CUDA_GET_ATTRIBUTES
uint32_t is_managed = 0;
Expand Down Expand Up @@ -210,6 +295,9 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
if (is_vmm && (vmm_mem_type == CU_MEMORYTYPE_DEVICE)) {
mem_type = CU_MEMORYTYPE_DEVICE;
*dev_id = vmm_dev_id;
} else if (is_mpool_ptr && (mpool_mem_type == CU_MEMORYTYPE_DEVICE)) {
mem_type = CU_MEMORYTYPE_DEVICE;
*dev_id = mpool_dev_id;
} else {
/* Host memory, nothing to do here */
return 0;
Expand All @@ -220,6 +308,8 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
} else {
if (is_vmm) {
*dev_id = vmm_dev_id;
} else if (is_mpool_ptr) {
*dev_id = mpool_dev_id;
} else {
/* query the device from the context */
*dev_id = accelerator_cuda_get_device_id(mem_ctx);
Expand All @@ -238,13 +328,18 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
if (is_vmm && (vmm_mem_type == CU_MEMORYTYPE_DEVICE)) {
mem_type = CU_MEMORYTYPE_DEVICE;
*dev_id = vmm_dev_id;
} else if (is_mpool_ptr && (mpool_mem_type == CU_MEMORYTYPE_DEVICE)) {
mem_type = CU_MEMORYTYPE_DEVICE;
*dev_id = mpool_dev_id;
} else {
/* Host memory, nothing to do here */
return 0;
}
} else {
if (is_vmm) {
*dev_id = vmm_dev_id;
} else if (is_mpool_ptr) {
*dev_id = mpool_dev_id;
} else {
result = cuPointerGetAttribute(&mem_ctx,
CU_POINTER_ATTRIBUTE_CONTEXT, dbuf);
Expand Down Expand Up @@ -278,14 +373,18 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
return OPAL_ERROR;
}
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
if (is_vmm) {
/* This function is expected to set context if pointer is device
* accessible but VMM allocations have NULL context associated
* which cannot be set against the calling thread */
opal_output(0,
"CUDA: unable to set context with the given pointer"
"ptr=%p aborting...", addr);
return OPAL_ERROR;
if (is_vmm || is_mpool_ptr) {
if (OPAL_SUCCESS ==
accelerator_cuda_get_primary_context(
is_vmm ? vmm_dev_id : mpool_dev_id, &mem_ctx)) {
/* As VMM/mempool allocations have no context associated
* with them, check if device primary context can be set */
} else {
opal_output(0,
"CUDA: unable to set ctx with the given pointer"
"ptr=%p aborting...", addr);
return OPAL_ERROR;
}
}

result = cuCtxSetCurrent(mem_ctx);
Expand Down
Loading