Skip to content

Add an experimental ability to skip the RTE barriers #2176

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 22 additions & 20 deletions ompi/runtime/ompi_mpi_finalize.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,26 +240,28 @@ int ompi_mpi_finalize(void)
del_procs behavior around May of 2014 (see
https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
more details). */
if (NULL != opal_pmix.fence_nb) {
active = true;
/* Note that use of the non-blocking PMIx fence will
* allow us to lazily cycle calling
* opal_progress(), which will allow any other pending
* communications/actions to complete. See
* https://github.com/open-mpi/ompi/issues/1576 for the
* original bug report. */
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
} else {
/* However, we cannot guarantee that the provided PMIx has
* fence_nb. If it doesn't, then do the best we can: an MPI
* barrier on COMM_WORLD (which isn't the best because of the
* reasons cited above), followed by a blocking PMIx fence
* (which does not call opal_progress()). */
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);

opal_pmix.fence(NULL, 0);
if (!ompi_async_mpi_finalize) {
if (NULL != opal_pmix.fence_nb) {
active = true;
/* Note that use of the non-blocking PMIx fence will
* allow us to lazily cycle calling
* opal_progress(), which will allow any other pending
* communications/actions to complete. See
* https://github.com/open-mpi/ompi/issues/1576 for the
* original bug report. */
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
} else {
/* However, we cannot guarantee that the provided PMIx has
* fence_nb. If it doesn't, then do the best we can: an MPI
* barrier on COMM_WORLD (which isn't the best because of the
* reasons cited above), followed by a blocking PMIx fence
* (which does not call opal_progress()). */
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);

opal_pmix.fence(NULL, 0);
}
}

/* check for timing request - get stop time and report elapsed
Expand Down
17 changes: 9 additions & 8 deletions ompi/runtime/ompi_mpi_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -804,14 +804,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* wait for everyone to reach this point - this is a hard
* barrier requirement at this time, though we hope to relax
* it at a later point */
active = true;
opal_pmix.commit();
if (NULL != opal_pmix.fence_nb) {
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
fence_release, (void*)&active);
OMPI_WAIT_FOR_COMPLETION(active);
} else {
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
if (!ompi_async_mpi_init) {
active = true;
if (NULL != opal_pmix.fence_nb) {
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
fence_release, (void*)&active);
OMPI_WAIT_FOR_COMPLETION(active);
} else {
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
}
}

/* check for timing request - get stop time and report elapsed
Expand Down
29 changes: 28 additions & 1 deletion ompi/runtime/ompi_mpi_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -65,6 +65,9 @@ char *ompi_mpi_show_mca_params_string = NULL;
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
bool ompi_mpi_preconnect_mpi = false;

bool ompi_async_mpi_init = false;
bool ompi_async_mpi_finalize = false;

#define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0
uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT;
bool ompi_mpi_dynamics_enabled = true;
Expand Down Expand Up @@ -274,6 +277,30 @@ int ompi_mpi_register_params(void)
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_add_procs_cutoff);

ompi_mpi_dynamics_enabled = true;
(void) mca_base_var_register("ompi", "mpi", NULL, "dynamics_enabled",
"Is the MPI dynamic process functionality enabled (e.g., MPI_COMM_SPAWN)? Default is yes, but certain transports and/or environments may disable it.",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_4,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mpi_dynamics_enabled);

ompi_async_mpi_init = false;
(void) mca_base_var_register("ompi", "async", "mpi", "init",
"Do not perform a barrier at the end of MPI_Init",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_async_mpi_init);

ompi_async_mpi_finalize = false;
(void) mca_base_var_register("ompi", "async", "mpi", "finalize",
"Do not perform a barrier at the beginning of MPI_Finalize",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_async_mpi_finalize);

value = mca_base_var_find ("opal", "opal", NULL, "abort_delay");
if (0 <= value) {
(void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay",
Expand Down
7 changes: 7 additions & 0 deletions ompi/runtime/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff;
*/
OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;

/* EXPERIMENTAL: do not perform an RTE barrier at the end of MPI_Init */
OMPI_DECLSPEC extern bool ompi_async_mpi_init;

/* EXPERIMENTAL: do not perform an RTE barrier at the beginning of MPI_Finalize */
OMPI_DECLSPEC extern bool ompi_async_mpi_finalize;


/**
* Register MCA parameters used by the MPI layer.
*
Expand Down