open-mpi · artpol84 · Jun 2, 2016
diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c
@@ -240,26 +240,28 @@ int ompi_mpi_finalize(void)
        del_procs behavior around May of 2014 (see
        https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
        more details). */
-    if (NULL != opal_pmix.fence_nb) {
-        active = true;
-        /* Note that use of the non-blocking PMIx fence will
-         * allow us to lazily cycle calling
-         * opal_progress(), which will allow any other pending
-         * communications/actions to complete.  See
-         * https://github.com/open-mpi/ompi/issues/1576 for the
-         * original bug report. */
-        opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
-        OMPI_LAZY_WAIT_FOR_COMPLETION(active);
-    } else {
-        /* However, we cannot guarantee that the provided PMIx has
-         * fence_nb.  If it doesn't, then do the best we can: an MPI
-         * barrier on COMM_WORLD (which isn't the best because of the
-         * reasons cited above), followed by a blocking PMIx fence
-         * (which does not call opal_progress()). */
-        ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
-        comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
-
-        opal_pmix.fence(NULL, 0);
+    if (!ompi_async_mpi_finalize) {
+        if (NULL != opal_pmix.fence_nb) {
+            active = true;
+            /* Note that use of the non-blocking PMIx fence will
+             * allow us to lazily cycle calling
+             * opal_progress(), which will allow any other pending
+             * communications/actions to complete.  See
+             * https://github.com/open-mpi/ompi/issues/1576 for the
+             * original bug report. */
+            opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
+            OMPI_LAZY_WAIT_FOR_COMPLETION(active);
+        } else {
+            /* However, we cannot guarantee that the provided PMIx has
+             * fence_nb.  If it doesn't, then do the best we can: an MPI
+             * barrier on COMM_WORLD (which isn't the best because of the
+             * reasons cited above), followed by a blocking PMIx fence
+             * (which does not call opal_progress()). */
+            ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
+            comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
+
+            opal_pmix.fence(NULL, 0);
+        }
     }
 
     /* check for timing request - get stop time and report elapsed

diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c
@@ -804,14 +804,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
     /* wait for everyone to reach this point - this is a hard
      * barrier requirement at this time, though we hope to relax
      * it at a later point */
-    active = true;
-    opal_pmix.commit();
-    if (NULL != opal_pmix.fence_nb) {
-        opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
-                           fence_release, (void*)&active);
-        OMPI_WAIT_FOR_COMPLETION(active);
-    } else {
-        opal_pmix.fence(NULL, opal_pmix_collect_all_data);
+    if (!ompi_async_mpi_init) {
+        active = true;
+        if (NULL != opal_pmix.fence_nb) {
+            opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
+                               fence_release, (void*)&active);
+            OMPI_WAIT_FOR_COMPLETION(active);
+        } else {
+            opal_pmix.fence(NULL, opal_pmix_collect_all_data);
+        }
     }
 
     /* check for timing request - get stop time and report elapsed

diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c
@@ -14,7 +14,7 @@
  * Copyright (c) 2007-2015 Los Alamos National Security, LLC.  All rights
  *                         reserved.
  * Copyright (c) 2013      NVIDIA Corporation.  All rights reserved.
- * Copyright (c) 2013-2014 Intel, Inc. All rights reserved
+ * Copyright (c) 2013-2016 Intel, Inc. All rights reserved
  * Copyright (c) 2015      Mellanox Technologies, Inc.
  *                         All rights reserved.
  * $COPYRIGHT$
@@ -65,6 +65,9 @@ char *ompi_mpi_show_mca_params_string = NULL;
 bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
 bool ompi_mpi_preconnect_mpi = false;
 
+bool ompi_async_mpi_init = false;
+bool ompi_async_mpi_finalize = false;
+
 #define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0
 uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT;
 bool ompi_mpi_dynamics_enabled = true;
@@ -274,6 +277,30 @@ int ompi_mpi_register_params(void)
                                   0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
                                   &ompi_add_procs_cutoff);
 
+    ompi_mpi_dynamics_enabled = true;
+    (void) mca_base_var_register("ompi", "mpi", NULL, "dynamics_enabled",
+                                 "Is the MPI dynamic process functionality enabled (e.g., MPI_COMM_SPAWN)?  Default is yes, but certain transports and/or environments may disable it.",
+                                 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
+                                 OPAL_INFO_LVL_4,
+                                 MCA_BASE_VAR_SCOPE_READONLY,
+                                 &ompi_mpi_dynamics_enabled);
+
+    ompi_async_mpi_init = false;
+    (void) mca_base_var_register("ompi", "async", "mpi", "init",
+                                 "Do not perform a barrier at the end of MPI_Init",
+                                 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
+                                 OPAL_INFO_LVL_9,
+                                 MCA_BASE_VAR_SCOPE_READONLY,
+                                 &ompi_async_mpi_init);
+
+    ompi_async_mpi_finalize = false;
+    (void) mca_base_var_register("ompi", "async", "mpi", "finalize",
+                                 "Do not perform a barrier at the beginning of MPI_Finalize",
+                                 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
+                                 OPAL_INFO_LVL_9,
+                                 MCA_BASE_VAR_SCOPE_READONLY,
+                                 &ompi_async_mpi_finalize);
+
     value = mca_base_var_find ("opal", "opal", NULL, "abort_delay");
     if (0 <= value) {
         (void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay",

diff --git a/ompi/runtime/params.h b/ompi/runtime/params.h
@@ -134,6 +134,13 @@ OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff;
  */
 OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;
 
+/* EXPERIMENTAL: do not perform an RTE barrier at the end of MPI_Init */
+OMPI_DECLSPEC extern bool ompi_async_mpi_init;
+
+/* EXPERIMENTAL: do not perform an RTE barrier at the beginning of MPI_Finalize */
+OMPI_DECLSPEC extern bool ompi_async_mpi_finalize;
+
+
 /**
  * Register MCA parameters used by the MPI layer.
  *