@@ -195,14 +195,18 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
195
195
return 0 ;
196
196
}
197
197
}
198
-
198
+ /* First access on a device pointer finalizes CUDA support initialization. */
199
+ opal_accelerator_cuda_delayed_init ();
199
200
return 1 ;
200
201
}
201
202
202
203
static int accelerator_cuda_create_stream (int dev_id , opal_accelerator_stream_t * * stream )
203
204
{
204
205
CUresult result ;
205
-
206
+ result = opal_accelerator_cuda_delayed_init ();
207
+ if (0 != result ) {
208
+ return result ;
209
+ }
206
210
* stream = (opal_accelerator_stream_t * )OBJ_NEW (opal_accelerator_cuda_stream_t );
207
211
if (NULL == * stream ) {
208
212
return OPAL_ERR_OUT_OF_RESOURCE ;
@@ -248,6 +252,10 @@ OBJ_CLASS_INSTANCE(
248
252
static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event )
249
253
{
250
254
CUresult result ;
255
+ result = opal_accelerator_cuda_delayed_init ();
256
+ if (0 != result ) {
257
+ return result ;
258
+ }
251
259
252
260
* event = (opal_accelerator_event_t * )OBJ_NEW (opal_accelerator_cuda_event_t );
253
261
if (NULL == * event ) {
@@ -340,6 +348,11 @@ static int accelerator_cuda_memcpy_async(int dest_dev_id, int src_dev_id, void *
340
348
{
341
349
CUresult result ;
342
350
351
+ result = opal_accelerator_cuda_delayed_init ();
352
+ if (0 != result ) {
353
+ return result ;
354
+ }
355
+
343
356
if (NULL == stream || NULL == dest || NULL == src || size <= 0 ) {
344
357
return OPAL_ERR_BAD_PARAM ;
345
358
}
@@ -358,6 +371,11 @@ static int accelerator_cuda_memcpy(int dest_dev_id, int src_dev_id, void *dest,
358
371
{
359
372
CUresult result ;
360
373
374
+ result = opal_accelerator_cuda_delayed_init ();
375
+ if (0 != result ) {
376
+ return result ;
377
+ }
378
+
361
379
if (NULL == dest || NULL == src || size <= 0 ) {
362
380
return OPAL_ERR_BAD_PARAM ;
363
381
}
@@ -391,6 +409,11 @@ static int accelerator_cuda_memmove(int dest_dev_id, int src_dev_id, void *dest,
391
409
CUdeviceptr tmp ;
392
410
CUresult result ;
393
411
412
+ result = opal_accelerator_cuda_delayed_init ();
413
+ if (0 != result ) {
414
+ return result ;
415
+ }
416
+
394
417
if (NULL == dest || NULL == src || size <= 0 ) {
395
418
return OPAL_ERR_BAD_PARAM ;
396
419
}
@@ -425,6 +448,11 @@ static int accelerator_cuda_mem_alloc(int dev_id, void **ptr, size_t size)
425
448
{
426
449
CUresult result ;
427
450
451
+ result = opal_accelerator_cuda_delayed_init ();
452
+ if (0 != result ) {
453
+ return result ;
454
+ }
455
+
428
456
if (NULL == ptr || 0 == size ) {
429
457
return OPAL_ERR_BAD_PARAM ;
430
458
}
@@ -459,6 +487,11 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
459
487
{
460
488
CUresult result ;
461
489
490
+ result = opal_accelerator_cuda_delayed_init ();
491
+ if (0 != result ) {
492
+ return result ;
493
+ }
494
+
462
495
if (NULL == ptr || NULL == base || NULL == size ) {
463
496
return OPAL_ERR_BAD_PARAM ;
464
497
}
@@ -479,6 +512,11 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
479
512
static int accelerator_cuda_host_register (int dev_id , void * ptr , size_t size )
480
513
{
481
514
CUresult result ;
515
+ result = opal_accelerator_cuda_delayed_init ();
516
+ if (0 != result ) {
517
+ return result ;
518
+ }
519
+
482
520
if (NULL == ptr && size > 0 ) {
483
521
return OPAL_ERR_BAD_PARAM ;
484
522
}
@@ -512,6 +550,11 @@ static int accelerator_cuda_get_device(int *dev_id)
512
550
CUdevice cuDev ;
513
551
CUresult result ;
514
552
553
+ result = opal_accelerator_cuda_delayed_init ();
554
+ if (0 != result ) {
555
+ return result ;
556
+ }
557
+
515
558
if (NULL == dev_id ) {
516
559
return OPAL_ERR_BAD_PARAM ;
517
560
}
@@ -530,6 +573,11 @@ static int accelerator_cuda_device_can_access_peer(int *access, int dev1, int de
530
573
{
531
574
CUresult result ;
532
575
576
+ result = opal_accelerator_cuda_delayed_init ();
577
+ if (0 != result ) {
578
+ return result ;
579
+ }
580
+
533
581
if (NULL == access ) {
534
582
return OPAL_ERR_BAD_PARAM ;
535
583
}
@@ -554,6 +602,12 @@ static int accelerator_cuda_get_buffer_id(int dev_id, const void *addr, opal_acc
554
602
{
555
603
CUresult result ;
556
604
int enable = 1 ;
605
+
606
+ result = opal_accelerator_cuda_delayed_init ();
607
+ if (0 != result ) {
608
+ return result ;
609
+ }
610
+
557
611
result = cuPointerGetAttribute ((unsigned long long * )buf_id , CU_POINTER_ATTRIBUTE_BUFFER_ID , (CUdeviceptr ) addr );
558
612
if (OPAL_UNLIKELY (result != CUDA_SUCCESS )) {
559
613
opal_show_help ("help-accelerator-cuda.txt" , "bufferID failed" , true, OPAL_PROC_MY_HOSTNAME ,
0 commit comments