Skip to content

Commit afb0775

Browse files
committed
Merge remote-tracking branch 'upstream/main' into oshmem_base_exchange
2 parents 0cb684d + 285f6b1 commit afb0775

33 files changed

+4073
-2485
lines changed

docs/developers/sphinx.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,12 @@ permissions to run this command):
113113
# Or: python3 -m pip install install -r docs/requirements.txt
114114
115115
This will install Sphinx and some Python modules required for building
116-
the Open MPI documentation in a system-wide location.
116+
the Open MPI documentation in a user-specific location, likely
117+
somewhere under ``$HOME``
117118

118119
You will likely need to find the location where ``sphinx-build`` was
119-
installed and add it to your ``PATH``.
120+
installed and add it to your ``PATH`` (e.g., on macOS, it might appear
121+
under ``$HOME/Library/Python/PYTHON_VERSION/bin/sphinx-build``).
120122

121123
.. note:: On MacOS, look for ``sphinx-build`` under
122124
``$HOME/Library/Python/VERSION/bin`` (where ``VERSION`` is

docs/index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,11 @@ Table of contents
8282
history
8383
man-openmpi/index
8484
man-openshmem/index
85+
86+
Contributors
87+
============
88+
89+
A gigantic "thank you!" to all of our contributors:
90+
91+
.. image:: https://contrib.rocks/image?repo=open-mpi/ompi&max=999
92+
:target: https://github.com/open-mpi/ompi/graphs/contributors

examples/Makefile.include

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,5 @@ EXTRA_DIST += \
5656
examples/oshmem_symmetric_data.c \
5757
examples/Hello.java \
5858
examples/Ring.java \
59-
examples/spc_example.c
59+
examples/spc_example.c \
60+
examples/hello_sessions_c.c

ompi/communicator/comm_cid.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
2525
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
2626
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
27-
* Copyright (c) 2020-2024 Triad National Security, LLC. All rights
27+
* Copyright (c) 2020-2025 Triad National Security, LLC. All rights
2828
* reserved.
2929
* $COPYRIGHT$
3030
*
@@ -320,6 +320,7 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
320320
pmix_proc_t *procs = NULL;
321321
void *grpinfo = NULL, *list = NULL;
322322
pmix_data_array_t darray;
323+
pmix_info_t tinfo;
323324

324325
switch (mode) {
325326
case OMPI_COMM_CID_GROUP_NEW:
@@ -349,6 +350,13 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
349350
goto fn_exit;
350351
}
351352

353+
rc = PMIx_Info_list_add(grpinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
354+
if (PMIX_SUCCESS != rc) {
355+
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Info_list_add failed %s %d", PMIx_Error_string(rc), __LINE__));
356+
rc = OMPI_ERR_OUT_OF_RESOURCE;
357+
goto fn_exit;
358+
}
359+
352360
list = PMIx_Info_list_start();
353361

354362
size_t c_index = (size_t)newcomm->c_index;
@@ -450,7 +458,10 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
450458
tag, tproc_count, ninfo, cid_base));
451459

452460
/* destruct the group */
453-
rc = PMIx_Group_destruct (tag, NULL, 0);
461+
PMIX_INFO_CONSTRUCT(&tinfo);
462+
PMIX_INFO_LOAD(&tinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
463+
rc = PMIx_Group_destruct (tag, &tinfo, 0);
464+
PMIX_INFO_DESTRUCT(&tinfo);
454465
if(PMIX_SUCCESS != rc) {
455466
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Group_destruct failed %s", PMIx_Error_string(rc)));
456467
rc = opal_pmix_convert_status(rc);

ompi/dpm/dpm.c

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -100,18 +100,6 @@ int ompi_dpm_init(void)
100100
return OMPI_SUCCESS;
101101
}
102102

103-
static int compare_pmix_proc(const void *a, const void *b)
104-
{
105-
const pmix_proc_t *proc_a = (pmix_proc_t *)a;
106-
const pmix_proc_t *proc_b = (pmix_proc_t *)b;
107-
108-
int nspace_dif = strncmp(proc_a->nspace, proc_b->nspace, PMIX_MAX_NSLEN);
109-
if (nspace_dif != 0)
110-
return nspace_dif;
111-
112-
return proc_a->rank - proc_b->rank;
113-
}
114-
115103
int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
116104
const char *port_string, bool send_first,
117105
ompi_communicator_t **newcomm)
@@ -395,10 +383,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
395383
PMIX_INFO_CONSTRUCT(&tinfo);
396384
PMIX_INFO_LOAD(&tinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
397385

398-
/*
399-
* sort procs so that all ranks call PMIx_Connect() with the processes in same order
400-
*/
401-
qsort(procs, nprocs, sizeof(pmix_proc_t), compare_pmix_proc);
402386
pret = PMIx_Connect(procs, nprocs, &tinfo, 1);
403387
PMIX_INFO_DESTRUCT(&tinfo);
404388
PMIX_PROC_FREE(procs, nprocs);

ompi/mca/coll/accelerator/coll_accelerator.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* reserved.
66
* Copyright (c) 2014-2024 NVIDIA Corporation. All rights reserved.
77
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
8+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
89
* $COPYRIGHT$
910
*
1011
* Additional copyrights may follow
@@ -87,22 +88,24 @@ mca_coll_accelerator_reduce_scatter_block(const void *sbuf, void *rbuf, size_t r
8788
* @retval >0 The buffer belongs to a managed buffer in
8889
* device memory.
8990
*/
90-
static inline int mca_coll_accelerator_check_buf(void *addr)
91+
static inline int mca_coll_accelerator_check_buf(void *addr, int *dev_id)
9192
{
9293
uint64_t flags;
93-
int dev_id;
94+
9495
if (OPAL_LIKELY(NULL != addr)) {
95-
return opal_accelerator.check_addr(addr, &dev_id, &flags);
96+
return opal_accelerator.check_addr(addr, dev_id, &flags);
9697
} else {
98+
*dev_id = MCA_ACCELERATOR_NO_DEVICE_ID;
9799
return 0;
98100
}
99101
}
100102

101-
static inline void *mca_coll_accelerator_memcpy(void *dest, const void *src, size_t size)
103+
static inline void *mca_coll_accelerator_memcpy(void *dest, int dest_dev, const void *src, int src_dev, size_t size,
104+
opal_accelerator_transfer_type_t type)
102105
{
103106
int res;
104-
res = opal_accelerator.mem_copy(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
105-
dest, src, size, MCA_ACCELERATOR_TRANSFER_UNSPEC);
107+
108+
res = opal_accelerator.mem_copy(dest_dev, src_dev, dest, src, size, type);
106109
if (res != 0) {
107110
opal_output(0, "coll/accelerator: Error in mem_copy: res=%d, dest=%p, src=%p, size=%d", res, dest, src,
108111
(int) size);

ompi/mca/coll/accelerator/coll_accelerator_allreduce.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
66
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
77
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
8+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
89
* $COPYRIGHT$
910
*
1011
* Additional copyrights may follow
@@ -37,11 +38,12 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
3738
mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module;
3839
ptrdiff_t gap;
3940
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
41+
int sbuf_dev, rbuf_dev;
4042
size_t bufsize;
4143
int rc;
4244

4345
bufsize = opal_datatype_span(&dtype->super, count, &gap);
44-
rc = mca_coll_accelerator_check_buf((void *)sbuf);
46+
rc = mca_coll_accelerator_check_buf((void *)sbuf, &sbuf_dev);
4547
if (rc < 0) {
4648
return rc;
4749
}
@@ -50,10 +52,11 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
5052
if (NULL == sbuf1) {
5153
return OMPI_ERR_OUT_OF_RESOURCE;
5254
}
53-
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
55+
mca_coll_accelerator_memcpy(sbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, sbuf, sbuf_dev,
56+
bufsize, MCA_ACCELERATOR_TRANSFER_DTOH);
5457
sbuf = sbuf1 - gap;
5558
}
56-
rc = mca_coll_accelerator_check_buf(rbuf);
59+
rc = mca_coll_accelerator_check_buf(rbuf, &rbuf_dev);
5760
if (rc < 0) {
5861
return rc;
5962
}
@@ -63,7 +66,8 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
6366
if (NULL != sbuf1) free(sbuf1);
6467
return OMPI_ERR_OUT_OF_RESOURCE;
6568
}
66-
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
69+
mca_coll_accelerator_memcpy(rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbuf, rbuf_dev,
70+
bufsize, MCA_ACCELERATOR_TRANSFER_DTOH);
6771
rbuf2 = rbuf; /* save away original buffer */
6872
rbuf = rbuf1 - gap;
6973
}
@@ -73,7 +77,8 @@ mca_coll_accelerator_allreduce(const void *sbuf, void *rbuf, size_t count,
7377
}
7478
if (NULL != rbuf1) {
7579
rbuf = rbuf2;
76-
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
80+
mca_coll_accelerator_memcpy(rbuf, rbuf_dev, rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, bufsize,
81+
MCA_ACCELERATOR_TRANSFER_HTOD);
7782
free(rbuf1);
7883
}
7984
return rc;

ompi/mca/coll/accelerator/coll_accelerator_exscan.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
66
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
77
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
8+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
89
* $COPYRIGHT$
910
*
1011
* Additional copyrights may follow
@@ -29,11 +30,12 @@ int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, size_t count,
2930
mca_coll_accelerator_module_t *s = (mca_coll_accelerator_module_t*) module;
3031
ptrdiff_t gap;
3132
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
33+
int sbuf_dev, rbuf_dev;
3234
size_t bufsize;
3335
int rc;
3436

3537
bufsize = opal_datatype_span(&dtype->super, count, &gap);
36-
rc = mca_coll_accelerator_check_buf((void *)sbuf);
38+
rc = mca_coll_accelerator_check_buf((void *)sbuf, &sbuf_dev);
3739
if (rc < 0) {
3840
return rc;
3941
}
@@ -43,10 +45,11 @@ int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, size_t count,
4345
if (NULL == sbuf1) {
4446
return OMPI_ERR_OUT_OF_RESOURCE;
4547
}
46-
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
48+
mca_coll_accelerator_memcpy(sbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, sbuf, sbuf_dev, bufsize,
49+
MCA_ACCELERATOR_TRANSFER_DTOH);
4750
sbuf = sbuf1 - gap;
4851
}
49-
rc = mca_coll_accelerator_check_buf(rbuf);
52+
rc = mca_coll_accelerator_check_buf(rbuf, &rbuf_dev);
5053
if (rc < 0) {
5154
return rc;
5255
}
@@ -56,7 +59,8 @@ int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, size_t count,
5659
if (NULL != sbuf1) free(sbuf1);
5760
return OMPI_ERR_OUT_OF_RESOURCE;
5861
}
59-
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
62+
mca_coll_accelerator_memcpy(rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbuf, rbuf_dev, bufsize,
63+
MCA_ACCELERATOR_TRANSFER_DTOH);
6064
rbuf2 = rbuf; /* save away original buffer */
6165
rbuf = rbuf1 - gap;
6266
}
@@ -68,7 +72,8 @@ int mca_coll_accelerator_exscan(const void *sbuf, void *rbuf, size_t count,
6872
}
6973
if (NULL != rbuf1) {
7074
rbuf = rbuf2;
71-
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
75+
mca_coll_accelerator_memcpy(rbuf, rbuf_dev, rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, bufsize,
76+
MCA_ACCELERATOR_TRANSFER_HTOD);
7277
free(rbuf1);
7378
}
7479
return rc;

ompi/mca/coll/accelerator/coll_accelerator_reduce.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved.
77
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
88
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
9+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights reserved.
910
* $COPYRIGHT$
1011
*
1112
* Additional copyrights may follow
@@ -39,12 +40,13 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
3940
int rank = ompi_comm_rank(comm);
4041
ptrdiff_t gap;
4142
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
43+
int rbuf_dev, sbuf_dev;
4244
size_t bufsize;
4345
int rc;
4446

4547
bufsize = opal_datatype_span(&dtype->super, count, &gap);
4648

47-
rc = mca_coll_accelerator_check_buf((void *)sbuf);
49+
rc = mca_coll_accelerator_check_buf((void *)sbuf, &sbuf_dev);
4850
if (rc < 0) {
4951
return rc;
5052
}
@@ -53,11 +55,12 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
5355
if (NULL == sbuf1) {
5456
return OMPI_ERR_OUT_OF_RESOURCE;
5557
}
56-
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
58+
mca_coll_accelerator_memcpy(sbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, sbuf, sbuf_dev, bufsize,
59+
MCA_ACCELERATOR_TRANSFER_DTOH);
5760
sbuf = sbuf1 - gap;
5861
}
5962

60-
rc = mca_coll_accelerator_check_buf(rbuf);
63+
rc = mca_coll_accelerator_check_buf(rbuf, &rbuf_dev);
6164
if (rc < 0) {
6265
return rc;
6366
}
@@ -67,7 +70,8 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
6770
if (NULL != sbuf1) free(sbuf1);
6871
return OMPI_ERR_OUT_OF_RESOURCE;
6972
}
70-
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
73+
mca_coll_accelerator_memcpy(rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbuf, rbuf_dev, bufsize,
74+
MCA_ACCELERATOR_TRANSFER_DTOH);
7175
rbuf2 = rbuf; /* save away original buffer */
7276
rbuf = rbuf1 - gap;
7377
}
@@ -80,7 +84,8 @@ mca_coll_accelerator_reduce(const void *sbuf, void *rbuf, size_t count,
8084
}
8185
if (NULL != rbuf1) {
8286
rbuf = rbuf2;
83-
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
87+
mca_coll_accelerator_memcpy(rbuf, rbuf_dev, rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, bufsize,
88+
MCA_ACCELERATOR_TRANSFER_HTOD);
8489
free(rbuf1);
8590
}
8691
return rc;
@@ -94,12 +99,13 @@ mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
9499
{
95100
ptrdiff_t gap;
96101
char *rbuf1 = NULL, *sbuf1 = NULL, *rbuf2 = NULL;
102+
int sbuf_dev, rbuf_dev;
97103
size_t bufsize;
98104
int rc;
99105

100106
bufsize = opal_datatype_span(&dtype->super, count, &gap);
101107

102-
rc = mca_coll_accelerator_check_buf((void *)sbuf);
108+
rc = mca_coll_accelerator_check_buf((void *)sbuf, &sbuf_dev);
103109
if (rc < 0) {
104110
return rc;
105111
}
@@ -109,11 +115,12 @@ mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
109115
if (NULL == sbuf1) {
110116
return OMPI_ERR_OUT_OF_RESOURCE;
111117
}
112-
mca_coll_accelerator_memcpy(sbuf1, sbuf, bufsize);
118+
mca_coll_accelerator_memcpy(sbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, sbuf, sbuf_dev, bufsize,
119+
MCA_ACCELERATOR_TRANSFER_DTOH);
113120
sbuf = sbuf1 - gap;
114121
}
115122

116-
rc = mca_coll_accelerator_check_buf(rbuf);
123+
rc = mca_coll_accelerator_check_buf(rbuf, &rbuf_dev);
117124
if (rc < 0) {
118125
return rc;
119126
}
@@ -124,7 +131,8 @@ mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
124131
if (NULL != sbuf1) free(sbuf1);
125132
return OMPI_ERR_OUT_OF_RESOURCE;
126133
}
127-
mca_coll_accelerator_memcpy(rbuf1, rbuf, bufsize);
134+
mca_coll_accelerator_memcpy(rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, rbuf, rbuf_dev, bufsize,
135+
MCA_ACCELERATOR_TRANSFER_DTOH);
128136
rbuf2 = rbuf; /* save away original buffer */
129137
rbuf = rbuf1 - gap;
130138
}
@@ -137,7 +145,8 @@ mca_coll_accelerator_reduce_local(const void *sbuf, void *rbuf, size_t count,
137145
}
138146
if (NULL != rbuf1) {
139147
rbuf = rbuf2;
140-
mca_coll_accelerator_memcpy(rbuf, rbuf1, bufsize);
148+
mca_coll_accelerator_memcpy(rbuf, rbuf_dev, rbuf1, MCA_ACCELERATOR_NO_DEVICE_ID, bufsize,
149+
MCA_ACCELERATOR_TRANSFER_HTOD);
141150
free(rbuf1);
142151
}
143152
return rc;

0 commit comments

Comments
 (0)