1
1
/*
2
+ * Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
2
3
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
3
4
* Copyright (c) 2014 Research Organization for Information Science
4
5
* and Technology (RIST). All rights reserved.
5
6
* Copyright (c) 2014 Mellanox Technologies, Inc.
6
7
* All rights reserved.
7
8
* Copyright (c) Amazon.com, Inc. or its affiliates.
8
9
* All Rights reserved.
9
- * Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
10
10
* Copyright (c) 2024 The University of Tennessee and The University
11
11
* of Tennessee Research Foundation. All rights
12
12
* reserved.
@@ -154,6 +154,46 @@ static int accelerator_cuda_get_device_id(CUcontext mem_ctx) {
154
154
return dev_id ;
155
155
}
156
156
157
+ static CUmemorytype accelerator_cuda_check_host_numa (CUdeviceptr dbuf )
158
+ {
159
+ #if OPAL_CUDA_HOST_NUMA_SUPPORT
160
+ CUmemAllocationProp prop ;
161
+ CUmemLocation location ;
162
+ CUresult result ;
163
+ unsigned long long flags ;
164
+ CUmemGenericAllocationHandle alloc_handle ;
165
+ /* Check if memory is allocated using VMM API and see if host memory needs
166
+ * to be treated as pinned device memory */
167
+ result = cuMemRetainAllocationHandle (& alloc_handle , (void * )dbuf );
168
+ if (result != CUDA_SUCCESS ) {
169
+ return CU_MEMORYTYPE_HOST ;
170
+ }
171
+
172
+ result = cuMemGetAllocationPropertiesFromHandle (& prop , alloc_handle );
173
+ if (result != CUDA_SUCCESS ) {
174
+ return CU_MEMORYTYPE_HOST ;
175
+ }
176
+
177
+ if ((CU_MEM_LOCATION_TYPE_HOST == prop .location .type ) ||
178
+ (CU_MEM_LOCATION_TYPE_HOST_NUMA == prop .location .type ) ||
179
+ (CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT == prop .location .type )) {
180
+ /* If host has read-write access, then range is accessible by CPU */
181
+ result = cuMemGetAccess (& flags , & location , dbuf );
182
+ if ((CUDA_SUCCESS == result ) &&
183
+ ((CU_MEM_LOCATION_TYPE_HOST == location .type ) ||
184
+ (CU_MEM_LOCATION_TYPE_HOST_NUMA == location .type ) ||
185
+ (CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT == location .type )) &&
186
+ (CU_MEM_ACCESS_FLAGS_PROT_READWRITE == flags )) {
187
+ return CU_MEMORYTYPE_HOST ;
188
+ } else {
189
+ return CU_MEMORYTYPE_DEVICE ;
190
+ }
191
+ }
192
+ #else
193
+ return CU_MEMORYTYPE_HOST ;
194
+ #endif
195
+ }
196
+
157
197
static int accelerator_cuda_check_addr (const void * addr , int * dev_id , uint64_t * flags )
158
198
{
159
199
CUresult result ;
@@ -197,8 +237,11 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
197
237
return OPAL_ERROR ;
198
238
}
199
239
} else if (CU_MEMORYTYPE_HOST == mem_type ) {
200
- /* Host memory, nothing to do here */
201
- return 0 ;
240
+ mem_type = accelerator_cuda_check_host_numa (dbuf );
241
+ if (CU_MEMORYTYPE_HOST == mem_type ) {
242
+ /* Host memory, nothing to do here */
243
+ return 0 ;
244
+ }
202
245
} else if (0 == mem_type ) {
203
246
/* This can happen when CUDA is initialized but dbuf is not valid CUDA pointer */
204
247
return 0 ;
@@ -218,8 +261,11 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
218
261
return OPAL_ERROR ;
219
262
}
220
263
} else if (CU_MEMORYTYPE_HOST == mem_type ) {
221
- /* Host memory, nothing to do here */
222
- return 0 ;
264
+ mem_type = accelerator_cuda_check_host_numa (dbuf );
265
+ if (CU_MEMORYTYPE_HOST == mem_type ) {
266
+ /* Host memory, nothing to do here */
267
+ return 0 ;
268
+ }
223
269
} else {
224
270
result = cuPointerGetAttribute (& mem_ctx , CU_POINTER_ATTRIBUTE_CONTEXT , dbuf );
225
271
/* query the device from the context */
0 commit comments