Skip to content

Commit 1afb524

Browse files
authored
Merge pull request #12678 from edgargabriel/topic/fcoll-vulcan-accelerator-support
fcoll/vulcan accelerator support
2 parents 72c952d + d30471c commit 1afb524

9 files changed

+268
-37
lines changed

ompi/mca/common/ompio/common_ompio.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_write_at (ompio_file_t *fh, OMPI_MPI_OFF
262262
OMPI_DECLSPEC int mca_common_ompio_file_iwrite (ompio_file_t *fh, const void *buf, size_t count,
263263
struct ompi_datatype_t *datatype, ompi_request_t **request);
264264

265+
OMPI_DECLSPEC int mca_common_ompio_file_iwrite_pregen (ompio_file_t *fh, ompi_request_t *request);
266+
265267
OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
266268
const void *buf, size_t count, struct ompi_datatype_t *datatype,
267269
ompi_request_t **request);
@@ -297,6 +299,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_read_at (ompio_file_t *fh, OMPI_MPI_OFFS
297299
OMPI_DECLSPEC int mca_common_ompio_file_iread (ompio_file_t *fh, void *buf, size_t count,
298300
struct ompi_datatype_t *datatype, ompi_request_t **request);
299301

302+
OMPI_DECLSPEC int mca_common_ompio_file_iread_pregen (ompio_file_t *fh, ompi_request_t *request);
303+
300304
OMPI_DECLSPEC int mca_common_ompio_file_iread_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
301305
void *buf, size_t count, struct ompi_datatype_t *datatype,
302306
ompi_request_t **request);

ompi/mca/common/ompio/common_ompio_file_read.c

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
1313
* Copyright (c) 2018 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
15-
* Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
15+
* Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
1616
* Copyright (c) 2024 Triad National Security, LLC. All rights
1717
* reserved.
1818
* $COPYRIGHT$
@@ -530,6 +530,68 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
530530
return ret;
531531
}
532532

533+
/*
534+
** This routine is invoked from file_read_all.
535+
** It is only used if the temporary buffer is a gpu buffer,
536+
** and the fbtl supports the ipreadv operation.
537+
**
538+
** The io-array has already been generated in file_read_all,
539+
** and we use the pre-computed offsets to created a pseudo fview.
540+
** The position of the file pointer is updated in the file_read_all
541+
** operation, not here.
542+
*/
543+
544+
int mca_common_ompio_file_iread_pregen (ompio_file_t *fh,
545+
ompi_request_t *request)
546+
{
547+
uint32_t i;
548+
size_t max_data;
549+
size_t pipeline_buf_size;
550+
mca_ompio_request_t *ompio_req = (mca_ompio_request_t *) request;
551+
552+
max_data = fh->f_io_array[0].length;
553+
pipeline_buf_size = OMPIO_MCA_GET(fh, pipeline_buffer_size);
554+
555+
mca_common_ompio_register_progress ();
556+
557+
OMPIO_PREPARE_READ_BUF (fh, fh->f_io_array[0].memory_address, max_data, MPI_BYTE,
558+
ompio_req->req_tbuf, &ompio_req->req_convertor, max_data,
559+
pipeline_buf_size, NULL, i);
560+
561+
ompio_req->req_num_subreqs = ceil((double)max_data/pipeline_buf_size);
562+
ompio_req->req_size = pipeline_buf_size;
563+
ompio_req->req_max_data = max_data;
564+
ompio_req->req_post_next_subreq = mca_common_ompio_post_next_read_subreq;
565+
ompio_req->req_fh = fh;
566+
ompio_req->req_ompi.req_status.MPI_ERROR = MPI_SUCCESS;
567+
568+
ompio_req->req_fview = (struct ompio_fview_t *) calloc(1, sizeof(struct ompio_fview_t));
569+
if (NULL == ompio_req->req_fview) {
570+
opal_output(1, "common_ompio: error allocating memory\n");
571+
return OMPI_ERR_OUT_OF_RESOURCE;
572+
}
573+
574+
ompio_req->req_fview->f_decoded_iov = (struct iovec*) malloc (fh->f_num_of_io_entries *
575+
sizeof(struct iovec));
576+
if (NULL == ompio_req->req_fview->f_decoded_iov) {
577+
opal_output(1, "common_ompio_file_iread_pregen: could not allocate memory\n");
578+
return OMPI_ERR_OUT_OF_RESOURCE;
579+
}
580+
581+
ompio_req->req_fview->f_iov_count = fh->f_num_of_io_entries;
582+
for (i=0; i < ompio_req->req_fview->f_iov_count; i++) {
583+
ompio_req->req_fview->f_decoded_iov[i].iov_base = fh->f_io_array[i].offset;
584+
ompio_req->req_fview->f_decoded_iov[i].iov_len = fh->f_io_array[i].length ;
585+
}
586+
587+
fh->f_num_of_io_entries = 0;
588+
free (fh->f_io_array);
589+
fh->f_io_array = NULL;
590+
591+
mca_common_ompio_post_next_read_subreq(ompio_req, 0);
592+
return OMPI_SUCCESS;
593+
}
594+
533595
int mca_common_ompio_file_iread_at (ompio_file_t *fh,
534596
OMPI_MPI_OFFSET_TYPE offset,
535597
void *buf,

ompi/mca/common/ompio/common_ompio_file_read_all.c

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.
1616
* Copyright (c) 2024 Triad National Security, LLC. All rights
1717
* reserved.
18+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -30,9 +31,12 @@
3031
#include "ompi/mca/fcoll/base/fcoll_base_coll_array.h"
3132
#include "ompi/mca/fcoll/base/base.h"
3233
#include "ompi/mca/common/ompio/common_ompio.h"
34+
#include "ompi/mca/common/ompio/common_ompio_request.h"
35+
#include "ompi/mca/common/ompio/common_ompio_buffer.h"
3336
#include "ompi/mca/io/io.h"
3437
#include "math.h"
3538
#include "ompi/mca/pml/pml.h"
39+
#include "opal/mca/accelerator/accelerator.h"
3640
#include <unistd.h>
3741

3842
#define DEBUG_ON 0
@@ -106,6 +110,9 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
106110
int* blocklength_proc = NULL;
107111
ptrdiff_t* displs_proc = NULL;
108112

113+
int is_gpu, is_managed;
114+
bool use_accelerator_buffer = false;
115+
109116
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
110117
double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0;
111118
double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0;
@@ -138,6 +145,12 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
138145
goto exit;
139146
}
140147

148+
mca_common_ompio_check_gpu_buf (fh, buf, &is_gpu, &is_managed);
149+
if (is_gpu && !is_managed && NULL != fh->f_fbtl->fbtl_ipreadv &&
150+
fh->f_get_mca_parameter_value ("use_accelerator_buffers", strlen("use_accelerator_buffers"))) {
151+
use_accelerator_buffer = true;
152+
}
153+
141154
ret = mca_common_ompio_set_aggregator_props ((struct ompio_file_t *) fh,
142155
base_num_io_procs,
143156
max_data);
@@ -364,11 +377,22 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
364377
goto exit;
365378
}
366379

367-
global_buf = (char *) malloc (bytes_per_cycle);
368-
if (NULL == global_buf){
369-
opal_output(1, "OUT OF MEMORY\n");
370-
ret = OMPI_ERR_OUT_OF_RESOURCE;
371-
goto exit;
380+
if (use_accelerator_buffer) {
381+
opal_output_verbose(10, ompi_fcoll_base_framework.framework_output,
382+
"Allocating GPU device buffer for aggregation\n");
383+
ret = opal_accelerator.mem_alloc(MCA_ACCELERATOR_NO_DEVICE_ID, (void**)&global_buf,
384+
bytes_per_cycle);
385+
if (OPAL_SUCCESS != ret) {
386+
opal_output(1, "Could not allocate accelerator memory");
387+
ret = OMPI_ERR_OUT_OF_RESOURCE;
388+
goto exit;
389+
}
390+
} else {global_buf = (char *) malloc (bytes_per_cycle);
391+
if (NULL == global_buf){
392+
opal_output(1, "OUT OF MEMORY\n");
393+
ret = OMPI_ERR_OUT_OF_RESOURCE;
394+
goto exit;
395+
}
372396
}
373397

374398
sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *));
@@ -686,10 +710,26 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
686710
#endif
687711

688712
if (fh->f_num_of_io_entries) {
689-
if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) {
690-
opal_output (1, "READ FAILED\n");
691-
ret = OMPI_ERROR;
692-
goto exit;
713+
if (use_accelerator_buffer) {
714+
mca_ompio_request_t *ompio_req = NULL;
715+
mca_common_ompio_request_alloc (&ompio_req, MCA_OMPIO_REQUEST_READ);
716+
717+
ret = mca_common_ompio_file_iread_pregen(fh, (ompi_request_t *) ompio_req);
718+
if(0 > ret) {
719+
opal_output (1, "common_ompio_file_read_all: mca_common_ompio_iread_pregen failed\n");
720+
ompio_req->req_ompi.req_status.MPI_ERROR = ret;
721+
ompio_req->req_ompi.req_status._ucount = 0;
722+
}
723+
ret = ompi_request_wait ((ompi_request_t**)&ompio_req, MPI_STATUS_IGNORE);
724+
if (OMPI_SUCCESS != ret){
725+
goto exit;
726+
}
727+
} else {
728+
if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) {
729+
opal_output (1, "READ FAILED\n");
730+
ret = OMPI_ERROR;
731+
goto exit;
732+
}
693733
}
694734
}
695735

@@ -881,7 +921,11 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
881921

882922
exit:
883923
if (NULL != global_buf) {
884-
free (global_buf);
924+
if (use_accelerator_buffer) {
925+
opal_accelerator.mem_release(MCA_ACCELERATOR_NO_DEVICE_ID, global_buf);
926+
} else {
927+
free (global_buf);
928+
}
885929
global_buf = NULL;
886930
}
887931
if (NULL != sorted) {

ompi/mca/common/ompio/common_ompio_file_write.c

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
1313
* Copyright (c) 2015-2018 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
15-
* Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
15+
* Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
1616
* Copyright (c) 2024 Triad National Security, LLC. All rights
1717
* reserved.
1818
* $COPYRIGHT$
@@ -329,6 +329,7 @@ static void mca_common_ompio_post_next_write_subreq(struct mca_ompio_request_t *
329329
decoded_iov.iov_base = req->req_tbuf;
330330
decoded_iov.iov_len = req->req_size;
331331
opal_convertor_pack (&req->req_convertor, &decoded_iov, &iov_count, &pos);
332+
332333
mca_common_ompio_build_io_array (req->req_fview, index, req->req_num_subreqs,
333334
bytes_per_cycle, pos,
334335
iov_count, &decoded_iov,
@@ -472,6 +473,72 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
472473
return ret;
473474
}
474475

476+
/*
477+
** This routine is invoked from the fcoll component.
478+
** It is only used if the temporary buffer is a gpu buffer,
479+
** and the fbtl supports the ipwritev operation.
480+
**
481+
** The io-array has already been generated in fcoll/xxx/file_write_all,
482+
** and we use the pre-computed offsets to created a pseudo fview.
483+
** The position of the file pointer is updated in the fcoll
484+
** component, not here.
485+
*/
486+
487+
int mca_common_ompio_file_iwrite_pregen (ompio_file_t *fh,
488+
ompi_request_t *request)
489+
{
490+
uint32_t i;
491+
size_t max_data;
492+
size_t pipeline_buf_size;
493+
mca_ompio_request_t *ompio_req = (mca_ompio_request_t *) request;
494+
495+
if (NULL == fh->f_fbtl->fbtl_ipwritev) {
496+
return MPI_ERR_INTERN;
497+
}
498+
499+
max_data = fh->f_io_array[0].length;
500+
pipeline_buf_size = OMPIO_MCA_GET(fh, pipeline_buffer_size);
501+
502+
mca_common_ompio_register_progress ();
503+
504+
OMPIO_PREPARE_BUF (fh, fh->f_io_array[0].memory_address, max_data, MPI_BYTE,
505+
ompio_req->req_tbuf, &ompio_req->req_convertor, max_data,
506+
pipeline_buf_size, NULL, i);
507+
508+
ompio_req->req_num_subreqs = ceil((double)max_data/pipeline_buf_size);
509+
ompio_req->req_size = pipeline_buf_size;
510+
ompio_req->req_max_data = max_data;
511+
ompio_req->req_post_next_subreq = mca_common_ompio_post_next_write_subreq;
512+
ompio_req->req_fh = fh;
513+
ompio_req->req_ompi.req_status.MPI_ERROR = MPI_SUCCESS;
514+
515+
ompio_req->req_fview = (struct ompio_fview_t *) calloc(1, sizeof(struct ompio_fview_t));
516+
if (NULL == ompio_req->req_fview) {
517+
opal_output(1, "common_ompio: error allocating memory\n");
518+
return OMPI_ERR_OUT_OF_RESOURCE;
519+
}
520+
521+
ompio_req->req_fview->f_decoded_iov = (struct iovec*) malloc ( fh->f_num_of_io_entries *
522+
sizeof(struct iovec));
523+
if (NULL == ompio_req->req_fview->f_decoded_iov) {
524+
opal_output(1, "common_ompio_file_iwrite_pregen: could not allocate memory\n");
525+
return OMPI_ERR_OUT_OF_RESOURCE;
526+
}
527+
528+
ompio_req->req_fview->f_iov_count = fh->f_num_of_io_entries;
529+
for (i=0; i < ompio_req->req_fview->f_iov_count; i++) {
530+
ompio_req->req_fview->f_decoded_iov[i].iov_base = fh->f_io_array[i].offset;
531+
ompio_req->req_fview->f_decoded_iov[i].iov_len = fh->f_io_array[i].length ;
532+
}
533+
534+
fh->f_num_of_io_entries = 0;
535+
free (fh->f_io_array);
536+
fh->f_io_array = NULL;
537+
538+
mca_common_ompio_post_next_write_subreq(ompio_req, 0);
539+
return OMPI_SUCCESS;
540+
}
541+
475542
int mca_common_ompio_file_iwrite_at (ompio_file_t *fh,
476543
OMPI_MPI_OFFSET_TYPE offset,
477544
const void *buf,

ompi/mca/fcoll/vulcan/fcoll_vulcan.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ extern int mca_fcoll_vulcan_priority;
4343
extern int mca_fcoll_vulcan_num_groups;
4444
extern int mca_fcoll_vulcan_write_chunksize;
4545
extern int mca_fcoll_vulcan_async_io;
46+
extern int mca_fcoll_vulcan_use_accelerator_buffers;
4647

4748
OMPI_DECLSPEC extern mca_fcoll_base_component_3_0_0_t mca_fcoll_vulcan_component;
4849

0 commit comments

Comments
 (0)