Skip to content

Commit 2d50600

Browse files
authored
Merge pull request #12752 from Akshay-Venkatesh/topic/reduce-local-impl
v4.1.x: ompi/coll/cuda: implement reduce_local
2 parents 1d02355 + c8eff46 commit 2d50600

File tree

3 files changed

+56
-0
lines changed

3 files changed

+56
-0
lines changed

ompi/mca/coll/cuda/coll_cuda.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*
2+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
23
* Copyright (c) 2014 The University of Tennessee and The University
34
* of Tennessee Research Foundation. All rights
45
* reserved.
@@ -45,6 +46,11 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count,
4546
struct ompi_communicator_t *comm,
4647
mca_coll_base_module_t *module);
4748

49+
int mca_coll_cuda_reduce_local(const void *sbuf, void *rbuf, int count,
50+
struct ompi_datatype_t *dtype,
51+
struct ompi_op_t *op,
52+
mca_coll_base_module_t *module);
53+
4854
int mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
4955
struct ompi_datatype_t *dtype,
5056
struct ompi_op_t *op,

ompi/mca/coll/cuda/coll_cuda_module.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*
2+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
23
* Copyright (c) 2014-2017 The University of Tennessee and The University
34
* of Tennessee Research Foundation. All rights
45
* reserved.
@@ -104,6 +105,7 @@ mca_coll_cuda_comm_query(struct ompi_communicator_t *comm,
104105
cuda_module->super.coll_gather = NULL;
105106
cuda_module->super.coll_gatherv = NULL;
106107
cuda_module->super.coll_reduce = mca_coll_cuda_reduce;
108+
cuda_module->super.coll_reduce_local = mca_coll_cuda_reduce_local;
107109
cuda_module->super.coll_reduce_scatter = NULL;
108110
cuda_module->super.coll_reduce_scatter_block = mca_coll_cuda_reduce_scatter_block;
109111
cuda_module->super.coll_scan = mca_coll_cuda_scan;

ompi/mca/coll/cuda/coll_cuda_reduce.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*
2+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
23
* Copyright (c) 2004-2015 The University of Tennessee and The University
34
* of Tennessee Research Foundation. All rights
45
* reserved.
@@ -78,3 +79,50 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count,
7879
}
7980
return rc;
8081
}
82+
83+
int
84+
mca_coll_cuda_reduce_local(const void *sbuf, void *rbuf, int count,
85+
struct ompi_datatype_t *dtype,
86+
struct ompi_op_t *op,
87+
mca_coll_base_module_t *module)
88+
{
89+
ptrdiff_t gap;
90+
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
91+
size_t bufsize;
92+
int rc;
93+
94+
bufsize = opal_datatype_span(&dtype->super, count, &gap);
95+
96+
if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) {
97+
sbuf1 = (char*)malloc(bufsize);
98+
if (NULL == sbuf1) {
99+
return OMPI_ERR_OUT_OF_RESOURCE;
100+
}
101+
opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
102+
sbuf = sbuf1 - gap;
103+
}
104+
105+
if (opal_cuda_check_bufs((char *)rbuf, NULL)) {
106+
rbuf1 = (char*)malloc(bufsize);
107+
if (NULL == rbuf1) {
108+
if (NULL != sbuf1) free(sbuf1);
109+
return OMPI_ERR_OUT_OF_RESOURCE;
110+
}
111+
opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
112+
rbuf2 = rbuf; /* save away original buffer */
113+
rbuf = rbuf1 - gap;
114+
}
115+
116+
ompi_op_reduce(op, (void *)sbuf, rbuf, count, dtype);
117+
rc = OMPI_SUCCESS;
118+
119+
if (NULL != sbuf1) {
120+
free(sbuf1);
121+
}
122+
if (NULL != rbuf1) {
123+
rbuf = rbuf2;
124+
opal_cuda_memcpy_sync(rbuf, rbuf1, bufsize);
125+
free(rbuf1);
126+
}
127+
return rc;
128+
}

0 commit comments

Comments
 (0)