Skip to content

Commit 036c997

Browse files
opal/cuda: avoid direct access to cumem host numa memory
Signed-off-by: Akshay Venkatesh <[email protected]>
1 parent 57f2404 commit 036c997

File tree

2 files changed

+62
-5
lines changed

2 files changed

+62
-5
lines changed

config/opal_check_cuda.m4

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
dnl -*- autoconf -*-
22
dnl
3+
dnl Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
34
dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
45
dnl University Research and Technology
56
dnl Corporation. All rights reserved.
@@ -118,6 +119,12 @@ AS_IF([test "$opal_check_cuda_happy" = "yes"],
118119
[#include <$opal_cuda_incdir/cuda.h>])],
119120
[])
120121
122+
# If we have CUDA support, check to see if we have support for cuMemCreate memory on host NUMA.
123+
AS_IF([test "$opal_check_cuda_happy"="yes"],
124+
[AC_CHECK_DECL([CU_MEM_LOCATION_TYPE_HOST_NUMA], [CUDA_HOST_NUMA_SUPPORT=1], [CUDA_HOST_NUMA_SUPPORT=0],
125+
[#include <$opal_cuda_incdir/cuda.h>])],
126+
[])
127+
121128
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
122129
# which was first introduced in CUDA 6.0.
123130
AS_IF([test "$opal_check_cuda_happy" = "yes"],
@@ -160,6 +167,10 @@ AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"])
160167
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT,
161168
[Whether we want cuda device pointer support])
162169
170+
AM_CONDITIONAL([OPAL_cuda_host_numa_support], [test "x$CUDA_HOST_NUMA_SUPPORT" = "x1"])
171+
AC_DEFINE_UNQUOTED([OPAL_CUDA_HOST_NUMA_SUPPORT],$CUDA_HOST_NUMA_SUPPORT,
172+
[Whether we have CU_MEM_LOCATION_TYPE_HOST_NUMA support available])
173+
163174
AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
164175
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
165176
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])

opal/mca/accelerator/cuda/accelerator_cuda.c

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
/*
2+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
23
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
34
* Copyright (c) 2014 Research Organization for Information Science
45
* and Technology (RIST). All rights reserved.
56
* Copyright (c) 2014 Mellanox Technologies, Inc.
67
* All rights reserved.
78
* Copyright (c) Amazon.com, Inc. or its affiliates.
89
* All Rights reserved.
9-
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
1010
* Copyright (c) 2024 The University of Tennessee and The University
1111
* of Tennessee Research Foundation. All rights
1212
* reserved.
@@ -154,6 +154,46 @@ static int accelerator_cuda_get_device_id(CUcontext mem_ctx) {
154154
return dev_id;
155155
}
156156

157+
static CUmemorytype accelerator_cuda_check_host_numa(CUdeviceptr dbuf)
158+
{
159+
#if OPAL_CUDA_HOST_NUMA_SUPPORT
160+
CUmemAllocationProp prop;
161+
CUmemLocation location;
162+
CUresult result;
163+
unsigned long long flags;
164+
CUmemGenericAllocationHandle alloc_handle;
165+
/* Check if memory is allocated using VMM API and see if host memory needs
166+
* to be treated as pinned device memory */
167+
result = cuMemRetainAllocationHandle(&alloc_handle, (void*)dbuf);
168+
if (result != CUDA_SUCCESS) {
169+
return CU_MEMORYTYPE_HOST;
170+
}
171+
172+
result = cuMemGetAllocationPropertiesFromHandle(&prop, alloc_handle);
173+
if (result != CUDA_SUCCESS) {
174+
return CU_MEMORYTYPE_HOST;
175+
}
176+
177+
if ((CU_MEM_LOCATION_TYPE_HOST == prop.location.type) ||
178+
(CU_MEM_LOCATION_TYPE_HOST_NUMA == prop.location.type) ||
179+
(CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT == prop.location.type)) {
180+
/* If host has read-write access, then range is accessible by CPU */
181+
result = cuMemGetAccess(&flags, &location, dbuf);
182+
if ((CUDA_SUCCESS == result) &&
183+
((CU_MEM_LOCATION_TYPE_HOST == location.type) ||
184+
(CU_MEM_LOCATION_TYPE_HOST_NUMA == location.type) ||
185+
(CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT == location.type)) &&
186+
(CU_MEM_ACCESS_FLAGS_PROT_READWRITE == flags)) {
187+
return CU_MEMORYTYPE_HOST;
188+
} else {
189+
return CU_MEMORYTYPE_DEVICE;
190+
}
191+
}
192+
#else
193+
return CU_MEMORYTYPE_HOST;
194+
#endif
195+
}
196+
157197
static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *flags)
158198
{
159199
CUresult result;
@@ -197,8 +237,11 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
197237
return OPAL_ERROR;
198238
}
199239
} else if (CU_MEMORYTYPE_HOST == mem_type) {
200-
/* Host memory, nothing to do here */
201-
return 0;
240+
mem_type = accelerator_cuda_check_host_numa(dbuf);
241+
if (CU_MEMORYTYPE_HOST == mem_type) {
242+
/* Host memory, nothing to do here */
243+
return 0;
244+
}
202245
} else if (0 == mem_type) {
203246
/* This can happen when CUDA is initialized but dbuf is not valid CUDA pointer */
204247
return 0;
@@ -218,8 +261,11 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
218261
return OPAL_ERROR;
219262
}
220263
} else if (CU_MEMORYTYPE_HOST == mem_type) {
221-
/* Host memory, nothing to do here */
222-
return 0;
264+
mem_type = accelerator_cuda_check_host_numa(dbuf);
265+
if (CU_MEMORYTYPE_HOST == mem_type) {
266+
/* Host memory, nothing to do here */
267+
return 0;
268+
}
223269
} else {
224270
result = cuPointerGetAttribute(&mem_ctx, CU_POINTER_ATTRIBUTE_CONTEXT, dbuf);
225271
/* query the device from the context */

0 commit comments

Comments
 (0)