Skip to content

Commit 0765a9e

Browse files
devrealPhuong Nguyen
and
Phuong Nguyen
committed
Add stream operations to accelerator components
- Stream-based alloc and free - Stream-based memmove - Wait for stream to complete Also, enable querying for number of devices and memory bandwidth. These operations are needed for operation device offloading. Co-authored-by: Phuong Nguyen <[email protected]> Signed-off-by: Joseph Schuchart <[email protected]>
1 parent 980eb50 commit 0765a9e

9 files changed

+714
-83
lines changed

opal/mca/accelerator/accelerator.h

+105-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
* Copyright (c) Amazon.com, Inc. or its affiliates.
66
* All Rights reserved.
77
* Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
8+
* Copyright (c) 2024 The University of Tennessee and The University
9+
* of Tennessee Research Foundation. All rights
10+
* reserved.
811
*
912
* $COPYRIGHT$
1013
*
@@ -193,7 +196,7 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
193196
* @param[IN] dev_id Associated device for the event or
194197
* MCA_ACCELERATOR_NO_DEVICE_ID
195198
* @param[OUT] event Event to create
196-
* @param[IN] enable_ipc support inter-process tracking of the event
199+
* @param[IN] enable_ipc support inter-process tracking of the event
197200
*
198201
* @return OPAL_SUCCESS or error status on failure.
199202
*/
@@ -310,6 +313,31 @@ typedef int (*opal_accelerator_base_module_memmove_fn_t)(
310313
int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
311314
opal_accelerator_transfer_type_t type);
312315

316+
317+
/**
318+
* Copies memory asynchronously from src to dest. Memory of dest and src
319+
* may overlap. Optionally can specify the transfer type to
320+
* avoid pointer detection for performance. The operations will be enqueued
321+
* into the provided stream but are not guaranteed to be complete upon return.
322+
*
323+
* @param[IN] dest_dev_id Associated device to copy to or
324+
* MCA_ACCELERATOR_NO_DEVICE_ID
325+
* @param[IN] src_dev_id Associated device to copy from or
326+
* MCA_ACCELERATOR_NO_DEVICE_ID
327+
* @param[IN] dest Destination to copy memory to
328+
* @param[IN] src Source to copy memory from
329+
* @param[IN] size Size of memory to copy
330+
* @param[IN] stream Stream to perform asynchronous move on
331+
* @param[IN] type Transfer type field for performance
332+
* Can be set to MCA_ACCELERATOR_TRANSFER_UNSPEC
333+
* if caller is unsure of the transfer direction.
334+
*
335+
* @return OPAL_SUCCESS or error status on failure
336+
*/
337+
typedef int (*opal_accelerator_base_module_memmove_async_fn_t)(
338+
int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
339+
opal_accelerator_stream_t *stream, opal_accelerator_transfer_type_t type);
340+
313341
/**
314342
* Allocates size bytes memory from the device and sets ptr to the
315343
* pointer of the allocated memory. The memory is not initialized.
@@ -340,6 +368,46 @@ typedef int (*opal_accelerator_base_module_mem_alloc_fn_t)(
340368
typedef int (*opal_accelerator_base_module_mem_release_fn_t)(
341369
int dev_id, void *ptr);
342370

371+
372+
/**
373+
* Allocates size bytes memory from the device and sets ptr to the
374+
* pointer of the allocated memory. The memory is not initialized.
375+
* The allocation request is placed into the stream object.
376+
* Any use of the memory must succeed the completion of this
377+
* operation on the stream.
378+
*
379+
* @param[IN] dev_id Associated device for the allocation or
380+
* MCA_ACCELERATOR_NO_DEVICE_ID
381+
* @param[OUT] ptr Returns pointer to allocated memory
382+
* @param[IN] size Size of memory to allocate
383+
* @param[IN] stream Stream into which to insert the allocation request
384+
*
385+
* @return OPAL_SUCCESS or error status on failure
386+
*/
387+
typedef int (*opal_accelerator_base_module_mem_alloc_stream_fn_t)(
388+
int dev_id, void **ptr, size_t size, opal_accelerator_stream_t *stream);
389+
390+
/**
391+
* Frees the memory space pointed to by ptr which has been returned by
392+
* a previous call to an opal_accelerator_base_module_mem_alloc_stream_fn_t().
393+
* If the function is called on a ptr that has already been freed,
394+
* undefined behavior occurs. If ptr is NULL, no operation is performed,
395+
* and the function returns OPAL_SUCCESS.
396+
* The release of the memory will be inserted into the stream and occurs after
397+
* all previous operations have completed.
398+
*
399+
* @param[IN] dev_id Associated device for the allocation or
400+
* MCA_ACCELERATOR_NO_DEVICE_ID
401+
* @param[IN] ptr Pointer to free
402+
* @param[IN] stream Stream into which to insert the free operation
403+
*
404+
* @return OPAL_SUCCESS or error status on failure
405+
*/
406+
typedef int (*opal_accelerator_base_module_mem_release_stream_fn_t)(
407+
int dev_id, void *ptr, opal_accelerator_stream_t *stream);
408+
409+
410+
343411
/**
344412
* Retrieves the base address and/or size of a memory allocation of the
345413
* device.
@@ -557,6 +625,35 @@ typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
557625
typedef int (*opal_accelerator_base_module_get_buffer_id_fn_t)(
558626
int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id);
559627

628+
/**
629+
* Wait for the completion of all operations inserted into the stream.
630+
*
631+
* @param[IN] stram The stream to wait for.
632+
*
633+
* @return OPAL_SUCCESS or error status on failure
634+
*/
635+
typedef int (*opal_accelerator_base_module_sync_stream_fn_t)(opal_accelerator_stream_t *stream);
636+
637+
/**
638+
* Get the number of devices available.
639+
*
640+
* @param[OUT] stram Number of devices.
641+
*
642+
* @return OPAL_SUCCESS or error status on failure
643+
*/
644+
typedef int (*opal_accelerator_base_module_get_num_devices_fn_t)(int *num_devices);
645+
646+
/**
647+
* Get the memory bandwidth of the device.
648+
*
649+
* @param[IN] device The device to query.
650+
* @param[OUT] bw The returned bandwidth for the device.
651+
*
652+
* @return OPAL_SUCCESS or error status on failure
653+
*/
654+
typedef int (*opal_accelerator_base_module_get_mem_bw_fn_t)(int device, float *bw);
655+
656+
560657
/*
561658
* the standard public API data structure
562659
*/
@@ -572,10 +669,13 @@ typedef struct {
572669

573670
opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async;
574671
opal_accelerator_base_module_memcpy_fn_t mem_copy;
672+
opal_accelerator_base_module_memmove_async_fn_t mem_move_async;
575673
opal_accelerator_base_module_memmove_fn_t mem_move;
576674

577675
opal_accelerator_base_module_mem_alloc_fn_t mem_alloc;
578676
opal_accelerator_base_module_mem_release_fn_t mem_release;
677+
opal_accelerator_base_module_mem_alloc_stream_fn_t mem_alloc_stream;
678+
opal_accelerator_base_module_mem_release_stream_fn_t mem_release_stream;
579679
opal_accelerator_base_module_get_address_range_fn_t get_address_range;
580680

581681
opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled;
@@ -595,6 +695,10 @@ typedef struct {
595695
opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer;
596696

597697
opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id;
698+
699+
opal_accelerator_base_module_sync_stream_fn_t sync_stream;
700+
opal_accelerator_base_module_get_num_devices_fn_t num_devices;
701+
opal_accelerator_base_module_get_mem_bw_fn_t get_mem_bw;
598702
} opal_accelerator_base_module_t;
599703

600704
/**

0 commit comments

Comments
 (0)