Skip to content

opal/cuda: Handle CUDA VMM pointers in accelerator check_addr function #12757

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions config/opal_check_cuda.m4
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
dnl -*- autoconf -*-
dnl
dnl Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
dnl University Research and Technology
dnl Corporation. All rights reserved.
Expand Down Expand Up @@ -118,6 +119,12 @@ AS_IF([test "$opal_check_cuda_happy" = "yes"],
[#include <$opal_cuda_incdir/cuda.h>])],
[])

# If we have CUDA support, check to see if we have support for cuMemCreate memory on host NUMA.
AS_IF([test "$opal_check_cuda_happy"="yes"],
[AC_CHECK_DECL([CU_MEM_LOCATION_TYPE_HOST_NUMA], [CUDA_VMM_SUPPORT=1], [CUDA_VMM_SUPPORT=0],
[#include <$opal_cuda_incdir/cuda.h>])],
[])

# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
# which was first introduced in CUDA 6.0.
AS_IF([test "$opal_check_cuda_happy" = "yes"],
Expand Down Expand Up @@ -160,6 +167,10 @@ AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT,
[Whether we want cuda device pointer support])

AM_CONDITIONAL([OPAL_cuda_vmm_support], [test "x$CUDA_VMM_SUPPORT" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_VMM_SUPPORT],$CUDA_VMM_SUPPORT,
[Whether we have CU_MEM_LOCATION_TYPE_HOST_NUMA support available])

AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])
Expand Down
122 changes: 109 additions & 13 deletions opal/mca/accelerator/cuda/accelerator_cuda.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
/*
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) Amazon.com, Inc. or its affiliates.
* All Rights reserved.
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2024 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
Expand Down Expand Up @@ -154,9 +154,75 @@ static int accelerator_cuda_get_device_id(CUcontext mem_ctx) {
return dev_id;
}

static int accelerator_cuda_check_vmm(CUdeviceptr dbuf, CUmemorytype *mem_type,
int *dev_id)
{
#if OPAL_CUDA_VMM_SUPPORT
static int device_count = -1;
CUmemAllocationProp prop;
CUmemLocation location;
CUresult result;
unsigned long long flags;
CUmemGenericAllocationHandle alloc_handle;

if (device_count == -1) {
result = cuDeviceGetCount(&device_count);
if (result != CUDA_SUCCESS) {
return 0;
}
}

result = cuMemRetainAllocationHandle(&alloc_handle, (void*)dbuf);
if (result != CUDA_SUCCESS) {
return 0;
}

result = cuMemGetAllocationPropertiesFromHandle(&prop, alloc_handle);
if (result != CUDA_SUCCESS) {
cuMemRelease(alloc_handle);
return 0;
}

if (prop.location.type == CU_MEM_LOCATION_TYPE_DEVICE) {
*mem_type = CU_MEMORYTYPE_DEVICE;
*dev_id = prop.location.id;
cuMemRelease(alloc_handle);
return 1;
}

if (prop.location.type == CU_MEM_LOCATION_TYPE_HOST_NUMA) {
/* check if device has access */
for (int i = 0; i < device_count; i++) {
location.type = CU_MEM_LOCATION_TYPE_DEVICE;
location.id = i;
result = cuMemGetAccess(&flags, &location, dbuf);
if ((CUDA_SUCCESS == result) &&
(CU_MEM_ACCESS_FLAGS_PROT_READWRITE == flags)) {
*mem_type = CU_MEMORYTYPE_DEVICE;
*dev_id = i;
cuMemRelease(alloc_handle);
return 1;
}
}
}

/* host must have access as device access possibility is exhausted */
*mem_type = CU_MEMORYTYPE_HOST;
*dev_id = MCA_ACCELERATOR_NO_DEVICE_ID;
cuMemRelease(alloc_handle);
return 1;

#endif

return 0;
}

static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *flags)
{
CUresult result;
int is_vmm = 0;
int vmm_dev_id = MCA_ACCELERATOR_NO_DEVICE_ID;
CUmemorytype vmm_mem_type = 0;
CUmemorytype mem_type = 0;
CUdeviceptr dbuf = (CUdeviceptr) addr;
CUcontext ctx = NULL, mem_ctx = NULL;
Expand All @@ -168,6 +234,8 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *

*flags = 0;

is_vmm = accelerator_cuda_check_vmm(dbuf, &vmm_mem_type, &vmm_dev_id);

#if OPAL_CUDA_GET_ATTRIBUTES
uint32_t is_managed = 0;
/* With CUDA 7.0, we can get multiple attributes with a single call */
Expand Down Expand Up @@ -197,17 +265,24 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
return OPAL_ERROR;
}
} else if (CU_MEMORYTYPE_HOST == mem_type) {
/* Host memory, nothing to do here */
return 0;
if (is_vmm && (vmm_mem_type == CU_MEMORYTYPE_DEVICE)) {
mem_type = CU_MEMORYTYPE_DEVICE;
*dev_id = vmm_dev_id;
} else {
/* Host memory, nothing to do here */
return 0;
}
} else if (0 == mem_type) {
/* This can happen when CUDA is initialized but dbuf is not valid CUDA pointer */
return 0;
} else {
/* query the device from the context */
*dev_id = accelerator_cuda_get_device_id(mem_ctx);
if (is_vmm) {
*dev_id = vmm_dev_id;
} else {
/* query the device from the context */
*dev_id = accelerator_cuda_get_device_id(mem_ctx);
}
}
/* Must be a device pointer */
assert(CU_MEMORYTYPE_DEVICE == mem_type);
#else /* OPAL_CUDA_GET_ATTRIBUTES */
result = cuPointerGetAttribute(&mem_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf);
if (CUDA_SUCCESS != result) {
Expand All @@ -218,16 +293,27 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
return OPAL_ERROR;
}
} else if (CU_MEMORYTYPE_HOST == mem_type) {
/* Host memory, nothing to do here */
return 0;
if (is_vmm && (vmm_mem_type == CU_MEMORYTYPE_DEVICE)) {
mem_type = CU_MEMORYTYPE_DEVICE;
*dev_id = vmm_dev_id;
} else {
/* Host memory, nothing to do here */
return 0;
}
} else {
result = cuPointerGetAttribute(&mem_ctx, CU_POINTER_ATTRIBUTE_CONTEXT, dbuf);
/* query the device from the context */
*dev_id = accelerator_cuda_get_device_id(mem_ctx);
if (is_vmm) {
*dev_id = vmm_dev_id;
} else {
result = cuPointerGetAttribute(&mem_ctx,
CU_POINTER_ATTRIBUTE_CONTEXT, dbuf);
/* query the device from the context */
*dev_id = accelerator_cuda_get_device_id(mem_ctx);
}
}
#endif /* OPAL_CUDA_GET_ATTRIBUTES */

/* Must be a device pointer */
assert(CU_MEMORYTYPE_DEVICE == mem_type);
#endif /* OPAL_CUDA_GET_ATTRIBUTES */

/* This piece of code was added in to handle in a case involving
* OMP threads. The user had initialized CUDA and then spawned
Expand All @@ -250,6 +336,16 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
return OPAL_ERROR;
}
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
if (is_vmm) {
/* This function is expected to set context if pointer is device
* accessible but VMM allocations have NULL context associated
* which cannot be set against the calling thread */
opal_output(0,
"CUDA: unable to set context with the given pointer"
"ptr=%p aborting...", addr);
return OPAL_ERROR;
}

result = cuCtxSetCurrent(mem_ctx);
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
opal_output(0,
Expand Down
Loading