5
5
* Copyright (c) Amazon.com, Inc. or its affiliates.
6
6
* All Rights reserved.
7
7
* Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
8
+ * Copyright (c) 2024 The University of Tennessee and The University
9
+ * of Tennessee Research Foundation. All rights
10
+ * reserved.
8
11
*
9
12
* $COPYRIGHT$
10
13
*
@@ -193,7 +196,7 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
193
196
* @param[IN] dev_id Associated device for the event or
194
197
* MCA_ACCELERATOR_NO_DEVICE_ID
195
198
* @param[OUT] event Event to create
196
- * @param[IN] enable_ipc support inter-process tracking of the event
199
+ * @param[IN] enable_ipc support inter-process tracking of the event
197
200
*
198
201
* @return OPAL_SUCCESS or error status on failure.
199
202
*/
@@ -310,6 +313,31 @@ typedef int (*opal_accelerator_base_module_memmove_fn_t)(
310
313
int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
311
314
opal_accelerator_transfer_type_t type );
312
315
316
+
317
+ /**
318
+ * Copies memory asynchronously from src to dest. Memory of dest and src
319
+ * may overlap. Optionally can specify the transfer type to
320
+ * avoid pointer detection for performance. The operations will be enqueued
321
+ * into the provided stream but are not guaranteed to be complete upon return.
322
+ *
323
+ * @param[IN] dest_dev_id Associated device to copy to or
324
+ * MCA_ACCELERATOR_NO_DEVICE_ID
325
+ * @param[IN] src_dev_id Associated device to copy from or
326
+ * MCA_ACCELERATOR_NO_DEVICE_ID
327
+ * @param[IN] dest Destination to copy memory to
328
+ * @param[IN] src Source to copy memory from
329
+ * @param[IN] size Size of memory to copy
330
+ * @param[IN] stream Stream to perform asynchronous move on
331
+ * @param[IN] type Transfer type field for performance
332
+ * Can be set to MCA_ACCELERATOR_TRANSFER_UNSPEC
333
+ * if caller is unsure of the transfer direction.
334
+ *
335
+ * @return OPAL_SUCCESS or error status on failure
336
+ */
337
+ typedef int (* opal_accelerator_base_module_memmove_async_fn_t )(
338
+ int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
339
+ opal_accelerator_stream_t * stream , opal_accelerator_transfer_type_t type );
340
+
313
341
/**
314
342
* Allocates size bytes memory from the device and sets ptr to the
315
343
* pointer of the allocated memory. The memory is not initialized.
@@ -340,6 +368,46 @@ typedef int (*opal_accelerator_base_module_mem_alloc_fn_t)(
340
368
typedef int (* opal_accelerator_base_module_mem_release_fn_t )(
341
369
int dev_id , void * ptr );
342
370
371
+
372
+ /**
373
+ * Allocates size bytes memory from the device and sets ptr to the
374
+ * pointer of the allocated memory. The memory is not initialized.
375
+ * The allocation request is placed into the stream object.
376
+ * Any use of the memory must succeed the completion of this
377
+ * operation on the stream.
378
+ *
379
+ * @param[IN] dev_id Associated device for the allocation or
380
+ * MCA_ACCELERATOR_NO_DEVICE_ID
381
+ * @param[OUT] ptr Returns pointer to allocated memory
382
+ * @param[IN] size Size of memory to allocate
383
+ * @param[IN] stream Stream into which to insert the allocation request
384
+ *
385
+ * @return OPAL_SUCCESS or error status on failure
386
+ */
387
+ typedef int (* opal_accelerator_base_module_mem_alloc_stream_fn_t )(
388
+ int dev_id , void * * ptr , size_t size , opal_accelerator_stream_t * stream );
389
+
390
+ /**
391
+ * Frees the memory space pointed to by ptr which has been returned by
392
+ * a previous call to an opal_accelerator_base_module_mem_alloc_stream_fn_t().
393
+ * If the function is called on a ptr that has already been freed,
394
+ * undefined behavior occurs. If ptr is NULL, no operation is performed,
395
+ * and the function returns OPAL_SUCCESS.
396
+ * The release of the memory will be inserted into the stream and occurs after
397
+ * all previous operations have completed.
398
+ *
399
+ * @param[IN] dev_id Associated device for the allocation or
400
+ * MCA_ACCELERATOR_NO_DEVICE_ID
401
+ * @param[IN] ptr Pointer to free
402
+ * @param[IN] stream Stream into which to insert the free operation
403
+ *
404
+ * @return OPAL_SUCCESS or error status on failure
405
+ */
406
+ typedef int (* opal_accelerator_base_module_mem_release_stream_fn_t )(
407
+ int dev_id , void * ptr , opal_accelerator_stream_t * stream );
408
+
409
+
410
+
343
411
/**
344
412
* Retrieves the base address and/or size of a memory allocation of the
345
413
* device.
@@ -557,6 +625,35 @@ typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
557
625
typedef int (* opal_accelerator_base_module_get_buffer_id_fn_t )(
558
626
int dev_id , const void * addr , opal_accelerator_buffer_id_t * buf_id );
559
627
628
+ /**
629
+ * Wait for the completion of all operations inserted into the stream.
630
+ *
631
+ * @param[IN] stram The stream to wait for.
632
+ *
633
+ * @return OPAL_SUCCESS or error status on failure
634
+ */
635
+ typedef int (* opal_accelerator_base_module_sync_stream_fn_t )(opal_accelerator_stream_t * stream );
636
+
637
+ /**
638
+ * Get the number of devices available.
639
+ *
640
+ * @param[OUT] stram Number of devices.
641
+ *
642
+ * @return OPAL_SUCCESS or error status on failure
643
+ */
644
+ typedef int (* opal_accelerator_base_module_get_num_devices_fn_t )(int * num_devices );
645
+
646
+ /**
647
+ * Get the memory bandwidth of the device.
648
+ *
649
+ * @param[IN] device The device to query.
650
+ * @param[OUT] bw The returned bandwidth for the device.
651
+ *
652
+ * @return OPAL_SUCCESS or error status on failure
653
+ */
654
+ typedef int (* opal_accelerator_base_module_get_mem_bw_fn_t )(int device , float * bw );
655
+
656
+
560
657
/*
561
658
* the standard public API data structure
562
659
*/
@@ -572,10 +669,13 @@ typedef struct {
572
669
573
670
opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async ;
574
671
opal_accelerator_base_module_memcpy_fn_t mem_copy ;
672
+ opal_accelerator_base_module_memmove_async_fn_t mem_move_async ;
575
673
opal_accelerator_base_module_memmove_fn_t mem_move ;
576
674
577
675
opal_accelerator_base_module_mem_alloc_fn_t mem_alloc ;
578
676
opal_accelerator_base_module_mem_release_fn_t mem_release ;
677
+ opal_accelerator_base_module_mem_alloc_stream_fn_t mem_alloc_stream ;
678
+ opal_accelerator_base_module_mem_release_stream_fn_t mem_release_stream ;
579
679
opal_accelerator_base_module_get_address_range_fn_t get_address_range ;
580
680
581
681
opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled ;
@@ -595,6 +695,10 @@ typedef struct {
595
695
opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer ;
596
696
597
697
opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id ;
698
+
699
+ opal_accelerator_base_module_sync_stream_fn_t sync_stream ;
700
+ opal_accelerator_base_module_get_num_devices_fn_t num_devices ;
701
+ opal_accelerator_base_module_get_mem_bw_fn_t get_mem_bw ;
598
702
} opal_accelerator_base_module_t ;
599
703
600
704
/**
0 commit comments