diff --git a/ompi/instance/instance.c b/ompi/instance/instance.c index 2d669ce8c4e..6c4b2c2dd20 100644 --- a/ompi/instance/instance.c +++ b/ompi/instance/instance.c @@ -331,13 +331,12 @@ static void evhandler_dereg_callbk(pmix_status_t status, /** * @brief Function that starts up the common components needed by all instances */ -static int ompi_mpi_instance_init_common (int argc, char **argv) +static int ompi_mpi_instance_init_common (int argc, char **argv, bool *background_fence) { int ret; ompi_proc_t **procs; size_t nprocs; volatile bool active; - bool background_fence = false; pmix_info_t info[2]; pmix_status_t rc; opal_pmix_lock_t mylock; @@ -402,7 +401,6 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) } OMPI_TIMING_NEXT("rte_init"); - OMPI_TIMING_IMPORT_OPAL("orte_ess_base_app_setup"); OMPI_TIMING_IMPORT_OPAL("rte_init"); ompi_rte_initialized = true; @@ -520,7 +518,7 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) return ret; /* TODO: need to fix this */ } - OMPI_TIMING_NEXT("commit"); + OMPI_TIMING_NEXT("commit"); #if (OPAL_ENABLE_TIMING) if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && opal_pmix_collect_all_data && !opal_process_info.is_singleton) { @@ -548,7 +546,9 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) if (opal_pmix_collect_all_data) { /* execute the fence_nb in the background to collect * the data */ - background_fence = true; + if (NULL != background_fence) { + *background_fence = true; + } active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); @@ -709,39 +709,6 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) return ompi_instance_print_error ("ompi_mpi_init: ompi_comm_cid_init failed", ret); } - /* Do we need to wait for a debugger? */ - ompi_rte_wait_for_debugger(); - - /* Next timing measurement */ - OMPI_TIMING_NEXT("modex-barrier"); - - if (!opal_process_info.is_singleton) { - /* if we executed the above fence in the background, then - * we have to wait here for it to complete. However, there - * is no reason to do two barriers! */ - if (background_fence) { - OMPI_LAZY_WAIT_FOR_COMPLETION(active); - } else if (!ompi_async_mpi_init) { - /* wait for everyone to reach this point - this is a hard - * barrier requirement at this time, though we hope to relax - * it at a later point */ - bool flag = false; - active = true; - OPAL_POST_OBJECT(&active); - PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, - fence_release, (void*)&active))) { - ret = opal_pmix_convert_status(rc); - return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); - } - OMPI_LAZY_WAIT_FOR_COMPLETION(active); - } - } - - /* check for timing request - get stop time and report elapsed - time if so, then start the clock again */ - OMPI_TIMING_NEXT("barrier"); - #if OPAL_ENABLE_PROGRESS_THREADS == 0 /* Start setting up the event engine for MPI operations. Don't block in the event library, so that communications don't take @@ -794,7 +761,8 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) return OMPI_SUCCESS; } -int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance, int argc, char **argv) +int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance, int argc, char **argv, + bool *background_fence) { ompi_instance_t *new_instance; int ret; @@ -809,7 +777,7 @@ int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t opal_mutex_lock (&instance_lock); if (0 == opal_atomic_fetch_add_32 (&ompi_instance_count, 1)) { - ret = ompi_mpi_instance_init_common (argc, argv); + ret = ompi_mpi_instance_init_common (argc, argv, background_fence); if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { opal_mutex_unlock (&instance_lock); return ret; diff --git a/ompi/instance/instance.h b/ompi/instance/instance.h index f72f19525ae..a9f4ff19c8b 100644 --- a/ompi/instance/instance.h +++ b/ompi/instance/instance.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2018 Triad National Security, LLC. All rights reserved. + * Copyright (c) 2018-2022 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -122,9 +122,14 @@ void ompi_mpi_instance_release (void); * @param[in] ts_level thread support level (see mpi.h) * @param[in] info info object * @param[in] errhander errhandler to set on the instance + * @param[in] argc argument count to application + * @param[in] argv arrray of arguments to application + * @param[out] background_fence set to true if pmix non-blocking fence initiated + * @param[out] instance handler to instance */ OMPI_DECLSPEC int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, - ompi_instance_t **instance, int argc, char **argv); + ompi_instance_t **instance, int argc, char **argv, + bool *background_fence); /** * @brief Destroy an MPI instance and set it to MPI_SESSION_NULL diff --git a/ompi/mpi/c/session_init.c b/ompi/mpi/c/session_init.c index 74c6e6f2cc3..16b08de32f1 100644 --- a/ompi/mpi/c/session_init.c +++ b/ompi/mpi/c/session_init.c @@ -54,7 +54,7 @@ int MPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, MPI_Session *ses } } - rc = ompi_mpi_instance_init (ts_level, &info->super, errhandler, session, 0, NULL); + rc = ompi_mpi_instance_init (ts_level, &info->super, errhandler, session, 0, NULL, NULL); /* if an error occurred raise it on the null session */ OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME); } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 19c0999d163..fbe93f27236 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -1,3 +1,4 @@ + /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana @@ -299,6 +300,7 @@ static void fence_release(pmix_status_t status, void *cbdata) OPAL_POST_OBJECT(active); } + int ompi_mpi_init(int argc, char **argv, int requested, int *provided, bool reinit_ok) { @@ -356,7 +358,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, ompi_mpi_thread_level(requested, provided); - ret = ompi_mpi_instance_init (*provided, &ompi_mpi_info_null.info.super, MPI_ERRORS_ARE_FATAL, &ompi_mpi_instance_default, argc, argv); + ret = ompi_mpi_instance_init (*provided, &ompi_mpi_info_null.info.super, MPI_ERRORS_ARE_FATAL, &ompi_mpi_instance_default, argc, argv, + &background_fence); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { error = "ompi_mpi_init: ompi_mpi_instance_init failed"; goto error; @@ -392,69 +395,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, free(tmp); } -#if (OPAL_ENABLE_TIMING) - if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && - opal_pmix_collect_all_data && !opal_process_info.is_singleton) { - if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) { - ret = opal_pmix_convert_status(rc); - error = "timing: pmix-barrier-1 failed"; - goto error; - } - OMPI_TIMING_NEXT("pmix-barrier-1"); - if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) { - ret = opal_pmix_convert_status(rc); - error = "timing: pmix-barrier-2 failed"; - goto error; - } - OMPI_TIMING_NEXT("pmix-barrier-2"); - } -#endif - - if (!opal_process_info.is_singleton) { - if (opal_pmix_base_async_modex) { - /* if we are doing an async modex, but we are collecting all - * data, then execute the non-blocking modex in the background. - * All calls to modex_recv will be cached until the background - * modex completes. If collect_all_data is false, then we skip - * the fence completely and retrieve data on-demand from the - * source node. - */ - if (opal_pmix_collect_all_data) { - /* execute the fence_nb in the background to collect - * the data */ - background_fence = true; - active = true; - OPAL_POST_OBJECT(&active); - PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, - fence_release, - (void*)&active))) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Fence_nb() failed"; - goto error; - } - } - } else { - /* we want to do the modex - we block at this point, but we must - * do so in a manner that allows us to call opal_progress so our - * event library can be cycled as we have tied PMIx to that - * event base */ - active = true; - OPAL_POST_OBJECT(&active); - PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); - if( PMIX_SUCCESS != rc) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Fence() failed"; - goto error; - } - /* cannot just wait on thread as we need to call opal_progress */ - OMPI_LAZY_WAIT_FOR_COMPLETION(active); - } - } - - OMPI_TIMING_NEXT("modex"); - MCA_PML_CALL(add_comm(&ompi_mpi_comm_world.comm)); MCA_PML_CALL(add_comm(&ompi_mpi_comm_self.comm));