Skip to content

Commit 470b904

Browse files
Ralph Castainartpol84
Ralph Castain
authored andcommitted
Add an experimental ability to skip the RTE barriers at the end of MPI_Init and the beginning of MPI_Finalize
(cherry-picked from 2c086e5)
1 parent be030c3 commit 470b904

File tree

4 files changed

+66
-29
lines changed

4 files changed

+66
-29
lines changed

ompi/runtime/ompi_mpi_finalize.c

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -240,26 +240,28 @@ int ompi_mpi_finalize(void)
240240
del_procs behavior around May of 2014 (see
241241
https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
242242
more details). */
243-
if (NULL != opal_pmix.fence_nb) {
244-
active = true;
245-
/* Note that use of the non-blocking PMIx fence will
246-
* allow us to lazily cycle calling
247-
* opal_progress(), which will allow any other pending
248-
* communications/actions to complete. See
249-
* https://github.com/open-mpi/ompi/issues/1576 for the
250-
* original bug report. */
251-
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
252-
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
253-
} else {
254-
/* However, we cannot guarantee that the provided PMIx has
255-
* fence_nb. If it doesn't, then do the best we can: an MPI
256-
* barrier on COMM_WORLD (which isn't the best because of the
257-
* reasons cited above), followed by a blocking PMIx fence
258-
* (which does not call opal_progress()). */
259-
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
260-
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
261-
262-
opal_pmix.fence(NULL, 0);
243+
if (!ompi_async_mpi_finalize) {
244+
if (NULL != opal_pmix.fence_nb) {
245+
active = true;
246+
/* Note that use of the non-blocking PMIx fence will
247+
* allow us to lazily cycle calling
248+
* opal_progress(), which will allow any other pending
249+
* communications/actions to complete. See
250+
* https://github.com/open-mpi/ompi/issues/1576 for the
251+
* original bug report. */
252+
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
253+
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
254+
} else {
255+
/* However, we cannot guarantee that the provided PMIx has
256+
* fence_nb. If it doesn't, then do the best we can: an MPI
257+
* barrier on COMM_WORLD (which isn't the best because of the
258+
* reasons cited above), followed by a blocking PMIx fence
259+
* (which does not call opal_progress()). */
260+
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
261+
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
262+
263+
opal_pmix.fence(NULL, 0);
264+
}
263265
}
264266

265267
/* check for timing request - get stop time and report elapsed

ompi/runtime/ompi_mpi_init.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -804,14 +804,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
804804
/* wait for everyone to reach this point - this is a hard
805805
* barrier requirement at this time, though we hope to relax
806806
* it at a later point */
807-
active = true;
808-
opal_pmix.commit();
809-
if (NULL != opal_pmix.fence_nb) {
810-
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
811-
fence_release, (void*)&active);
812-
OMPI_WAIT_FOR_COMPLETION(active);
813-
} else {
814-
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
807+
if (!ompi_async_mpi_init) {
808+
active = true;
809+
if (NULL != opal_pmix.fence_nb) {
810+
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
811+
fence_release, (void*)&active);
812+
OMPI_WAIT_FOR_COMPLETION(active);
813+
} else {
814+
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
815+
}
815816
}
816817

817818
/* check for timing request - get stop time and report elapsed

ompi/runtime/ompi_mpi_params.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
1515
* reserved.
1616
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
17-
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
17+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
1818
* Copyright (c) 2015 Mellanox Technologies, Inc.
1919
* All rights reserved.
2020
* $COPYRIGHT$
@@ -65,6 +65,9 @@ char *ompi_mpi_show_mca_params_string = NULL;
6565
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
6666
bool ompi_mpi_preconnect_mpi = false;
6767

68+
bool ompi_async_mpi_init = false;
69+
bool ompi_async_mpi_finalize = false;
70+
6871
#define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0
6972
uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT;
7073
bool ompi_mpi_dynamics_enabled = true;
@@ -274,6 +277,30 @@ int ompi_mpi_register_params(void)
274277
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
275278
&ompi_add_procs_cutoff);
276279

280+
ompi_mpi_dynamics_enabled = true;
281+
(void) mca_base_var_register("ompi", "mpi", NULL, "dynamics_enabled",
282+
"Is the MPI dynamic process functionality enabled (e.g., MPI_COMM_SPAWN)? Default is yes, but certain transports and/or environments may disable it.",
283+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
284+
OPAL_INFO_LVL_4,
285+
MCA_BASE_VAR_SCOPE_READONLY,
286+
&ompi_mpi_dynamics_enabled);
287+
288+
ompi_async_mpi_init = false;
289+
(void) mca_base_var_register("ompi", "async", "mpi", "init",
290+
"Do not perform a barrier at the end of MPI_Init",
291+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
292+
OPAL_INFO_LVL_9,
293+
MCA_BASE_VAR_SCOPE_READONLY,
294+
&ompi_async_mpi_init);
295+
296+
ompi_async_mpi_finalize = false;
297+
(void) mca_base_var_register("ompi", "async", "mpi", "finalize",
298+
"Do not perform a barrier at the beginning of MPI_Finalize",
299+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
300+
OPAL_INFO_LVL_9,
301+
MCA_BASE_VAR_SCOPE_READONLY,
302+
&ompi_async_mpi_finalize);
303+
277304
value = mca_base_var_find ("opal", "opal", NULL, "abort_delay");
278305
if (0 <= value) {
279306
(void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay",

ompi/runtime/params.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,13 @@ OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff;
134134
*/
135135
OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;
136136

137+
/* EXPERIMENTAL: do not perform an RTE barrier at the end of MPI_Init */
138+
OMPI_DECLSPEC extern bool ompi_async_mpi_init;
139+
140+
/* EXPERIMENTAL: do not perform an RTE barrier at the beginning of MPI_Finalize */
141+
OMPI_DECLSPEC extern bool ompi_async_mpi_finalize;
142+
143+
137144
/**
138145
* Register MCA parameters used by the MPI layer.
139146
*

0 commit comments

Comments
 (0)