|
15 | 15 | * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.
|
16 | 16 | * Copyright (c) 2024 Triad National Security, LLC. All rights
|
17 | 17 | * reserved.
|
| 18 | + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. |
18 | 19 | * $COPYRIGHT$
|
19 | 20 | *
|
20 | 21 | * Additional copyrights may follow
|
|
30 | 31 | #include "ompi/mca/fcoll/base/fcoll_base_coll_array.h"
|
31 | 32 | #include "ompi/mca/fcoll/base/base.h"
|
32 | 33 | #include "ompi/mca/common/ompio/common_ompio.h"
|
| 34 | +#include "ompi/mca/common/ompio/common_ompio_request.h" |
| 35 | +#include "ompi/mca/common/ompio/common_ompio_buffer.h" |
33 | 36 | #include "ompi/mca/io/io.h"
|
34 | 37 | #include "math.h"
|
35 | 38 | #include "ompi/mca/pml/pml.h"
|
| 39 | +#include "opal/mca/accelerator/accelerator.h" |
36 | 40 | #include <unistd.h>
|
37 | 41 |
|
38 | 42 | #define DEBUG_ON 0
|
@@ -106,6 +110,9 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
|
106 | 110 | int* blocklength_proc = NULL;
|
107 | 111 | ptrdiff_t* displs_proc = NULL;
|
108 | 112 |
|
| 113 | + int is_gpu, is_managed; |
| 114 | + bool use_accelerator_buffer = false; |
| 115 | + |
109 | 116 | #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
|
110 | 117 | double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0;
|
111 | 118 | double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0;
|
@@ -138,6 +145,12 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
|
138 | 145 | goto exit;
|
139 | 146 | }
|
140 | 147 |
|
| 148 | + mca_common_ompio_check_gpu_buf (fh, buf, &is_gpu, &is_managed); |
| 149 | + if (is_gpu && !is_managed && NULL != fh->f_fbtl->fbtl_ipreadv && |
| 150 | + fh->f_get_mca_parameter_value ("use_accelerator_buffers", strlen("use_accelerator_buffers"))) { |
| 151 | + use_accelerator_buffer = true; |
| 152 | + } |
| 153 | + |
141 | 154 | ret = mca_common_ompio_set_aggregator_props ((struct ompio_file_t *) fh,
|
142 | 155 | base_num_io_procs,
|
143 | 156 | max_data);
|
@@ -364,11 +377,22 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
|
364 | 377 | goto exit;
|
365 | 378 | }
|
366 | 379 |
|
367 |
| - global_buf = (char *) malloc (bytes_per_cycle); |
368 |
| - if (NULL == global_buf){ |
369 |
| - opal_output(1, "OUT OF MEMORY\n"); |
370 |
| - ret = OMPI_ERR_OUT_OF_RESOURCE; |
371 |
| - goto exit; |
| 380 | + if (use_accelerator_buffer) { |
| 381 | + opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, |
| 382 | + "Allocating GPU device buffer for aggregation\n"); |
| 383 | + ret = opal_accelerator.mem_alloc(MCA_ACCELERATOR_NO_DEVICE_ID, (void**)&global_buf, |
| 384 | + bytes_per_cycle); |
| 385 | + if (OPAL_SUCCESS != ret) { |
| 386 | + opal_output(1, "Could not allocate accelerator memory"); |
| 387 | + ret = OMPI_ERR_OUT_OF_RESOURCE; |
| 388 | + goto exit; |
| 389 | + } |
| 390 | + } else {global_buf = (char *) malloc (bytes_per_cycle); |
| 391 | + if (NULL == global_buf){ |
| 392 | + opal_output(1, "OUT OF MEMORY\n"); |
| 393 | + ret = OMPI_ERR_OUT_OF_RESOURCE; |
| 394 | + goto exit; |
| 395 | + } |
372 | 396 | }
|
373 | 397 |
|
374 | 398 | sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *));
|
@@ -686,10 +710,26 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
|
686 | 710 | #endif
|
687 | 711 |
|
688 | 712 | if (fh->f_num_of_io_entries) {
|
689 |
| - if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { |
690 |
| - opal_output (1, "READ FAILED\n"); |
691 |
| - ret = OMPI_ERROR; |
692 |
| - goto exit; |
| 713 | + if (use_accelerator_buffer) { |
| 714 | + mca_ompio_request_t *ompio_req = NULL; |
| 715 | + mca_common_ompio_request_alloc (&ompio_req, MCA_OMPIO_REQUEST_READ); |
| 716 | + |
| 717 | + ret = mca_common_ompio_file_iread_pregen(fh, (ompi_request_t *) ompio_req); |
| 718 | + if(0 > ret) { |
| 719 | + opal_output (1, "common_ompio_file_read_all: mca_common_ompio_iread_pregen failed\n"); |
| 720 | + ompio_req->req_ompi.req_status.MPI_ERROR = ret; |
| 721 | + ompio_req->req_ompi.req_status._ucount = 0; |
| 722 | + } |
| 723 | + ret = ompi_request_wait ((ompi_request_t**)&ompio_req, MPI_STATUS_IGNORE); |
| 724 | + if (OMPI_SUCCESS != ret){ |
| 725 | + goto exit; |
| 726 | + } |
| 727 | + } else { |
| 728 | + if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { |
| 729 | + opal_output (1, "READ FAILED\n"); |
| 730 | + ret = OMPI_ERROR; |
| 731 | + goto exit; |
| 732 | + } |
693 | 733 | }
|
694 | 734 | }
|
695 | 735 |
|
@@ -881,7 +921,11 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
|
881 | 921 |
|
882 | 922 | exit:
|
883 | 923 | if (NULL != global_buf) {
|
884 |
| - free (global_buf); |
| 924 | + if (use_accelerator_buffer) { |
| 925 | + opal_accelerator.mem_release(MCA_ACCELERATOR_NO_DEVICE_ID, global_buf); |
| 926 | + } else { |
| 927 | + free (global_buf); |
| 928 | + } |
885 | 929 | global_buf = NULL;
|
886 | 930 | }
|
887 | 931 | if (NULL != sorted) {
|
|
0 commit comments