Skip to content

remove redundant PMIx fence calls at initialization #11212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 8 additions & 40 deletions ompi/instance/instance.c
Original file line number Diff line number Diff line change
Expand Up @@ -331,13 +331,12 @@ static void evhandler_dereg_callbk(pmix_status_t status,
/**
* @brief Function that starts up the common components needed by all instances
*/
static int ompi_mpi_instance_init_common (int argc, char **argv)
static int ompi_mpi_instance_init_common (int argc, char **argv, bool *background_fence)
{
int ret;
ompi_proc_t **procs;
size_t nprocs;
volatile bool active;
bool background_fence = false;
pmix_info_t info[2];
pmix_status_t rc;
opal_pmix_lock_t mylock;
Expand Down Expand Up @@ -402,7 +401,6 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
}

OMPI_TIMING_NEXT("rte_init");
OMPI_TIMING_IMPORT_OPAL("orte_ess_base_app_setup");
OMPI_TIMING_IMPORT_OPAL("rte_init");

ompi_rte_initialized = true;
Expand Down Expand Up @@ -520,7 +518,7 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
return ret; /* TODO: need to fix this */
}

OMPI_TIMING_NEXT("commit");
OMPI_TIMING_NEXT("commit");
#if (OPAL_ENABLE_TIMING)
if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex &&
opal_pmix_collect_all_data && !opal_process_info.is_singleton) {
Expand Down Expand Up @@ -548,7 +546,9 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
if (opal_pmix_collect_all_data) {
/* execute the fence_nb in the background to collect
* the data */
background_fence = true;
if (NULL != background_fence) {
*background_fence = true;
}
active = true;
OPAL_POST_OBJECT(&active);
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
Expand Down Expand Up @@ -709,39 +709,6 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
return ompi_instance_print_error ("ompi_mpi_init: ompi_comm_cid_init failed", ret);
}

/* Do we need to wait for a debugger? */
ompi_rte_wait_for_debugger();

/* Next timing measurement */
OMPI_TIMING_NEXT("modex-barrier");

if (!opal_process_info.is_singleton) {
/* if we executed the above fence in the background, then
* we have to wait here for it to complete. However, there
* is no reason to do two barriers! */
if (background_fence) {
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
} else if (!ompi_async_mpi_init) {
/* wait for everyone to reach this point - this is a hard
* barrier requirement at this time, though we hope to relax
* it at a later point */
bool flag = false;
active = true;
OPAL_POST_OBJECT(&active);
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL);
if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1,
fence_release, (void*)&active))) {
ret = opal_pmix_convert_status(rc);
return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret);
}
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
}
}

/* check for timing request - get stop time and report elapsed
time if so, then start the clock again */
OMPI_TIMING_NEXT("barrier");

#if OPAL_ENABLE_PROGRESS_THREADS == 0
/* Start setting up the event engine for MPI operations. Don't
block in the event library, so that communications don't take
Expand Down Expand Up @@ -794,7 +761,8 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
return OMPI_SUCCESS;
}

int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance, int argc, char **argv)
int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance, int argc, char **argv,
bool *background_fence)
{
ompi_instance_t *new_instance;
int ret;
Expand All @@ -809,7 +777,7 @@ int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t

opal_mutex_lock (&instance_lock);
if (0 == opal_atomic_fetch_add_32 (&ompi_instance_count, 1)) {
ret = ompi_mpi_instance_init_common (argc, argv);
ret = ompi_mpi_instance_init_common (argc, argv, background_fence);
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
opal_mutex_unlock (&instance_lock);
return ret;
Expand Down
9 changes: 7 additions & 2 deletions ompi/instance/instance.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2018 Triad National Security, LLC. All rights reserved.
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -122,9 +122,14 @@ void ompi_mpi_instance_release (void);
* @param[in] ts_level thread support level (see mpi.h)
* @param[in] info info object
* @param[in] errhander errhandler to set on the instance
* @param[in] argc argument count to application
* @param[in] argv arrray of arguments to application
* @param[out] background_fence set to true if pmix non-blocking fence initiated
* @param[out] instance handler to instance
*/
OMPI_DECLSPEC int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler,
ompi_instance_t **instance, int argc, char **argv);
ompi_instance_t **instance, int argc, char **argv,
bool *background_fence);

/**
* @brief Destroy an MPI instance and set it to MPI_SESSION_NULL
Expand Down
2 changes: 1 addition & 1 deletion ompi/mpi/c/session_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ int MPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, MPI_Session *ses
}
}

rc = ompi_mpi_instance_init (ts_level, &info->super, errhandler, session, 0, NULL);
rc = ompi_mpi_instance_init (ts_level, &info->super, errhandler, session, 0, NULL, NULL);
/* if an error occurred raise it on the null session */
OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME);
}
68 changes: 4 additions & 64 deletions ompi/runtime/ompi_mpi_init.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
Expand Down Expand Up @@ -299,6 +300,7 @@ static void fence_release(pmix_status_t status, void *cbdata)
OPAL_POST_OBJECT(active);
}


int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
bool reinit_ok)
{
Expand Down Expand Up @@ -356,7 +358,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,

ompi_mpi_thread_level(requested, provided);

ret = ompi_mpi_instance_init (*provided, &ompi_mpi_info_null.info.super, MPI_ERRORS_ARE_FATAL, &ompi_mpi_instance_default, argc, argv);
ret = ompi_mpi_instance_init (*provided, &ompi_mpi_info_null.info.super, MPI_ERRORS_ARE_FATAL, &ompi_mpi_instance_default, argc, argv,
&background_fence);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
error = "ompi_mpi_init: ompi_mpi_instance_init failed";
goto error;
Expand Down Expand Up @@ -392,69 +395,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
free(tmp);
}

#if (OPAL_ENABLE_TIMING)
if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex &&
opal_pmix_collect_all_data && !opal_process_info.is_singleton) {
if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) {
ret = opal_pmix_convert_status(rc);
error = "timing: pmix-barrier-1 failed";
goto error;
}
OMPI_TIMING_NEXT("pmix-barrier-1");
if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) {
ret = opal_pmix_convert_status(rc);
error = "timing: pmix-barrier-2 failed";
goto error;
}
OMPI_TIMING_NEXT("pmix-barrier-2");
}
#endif

if (!opal_process_info.is_singleton) {
if (opal_pmix_base_async_modex) {
/* if we are doing an async modex, but we are collecting all
* data, then execute the non-blocking modex in the background.
* All calls to modex_recv will be cached until the background
* modex completes. If collect_all_data is false, then we skip
* the fence completely and retrieve data on-demand from the
* source node.
*/
if (opal_pmix_collect_all_data) {
/* execute the fence_nb in the background to collect
* the data */
background_fence = true;
active = true;
OPAL_POST_OBJECT(&active);
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0,
fence_release,
(void*)&active))) {
ret = opal_pmix_convert_status(rc);
error = "PMIx_Fence_nb() failed";
goto error;
}
}
} else {
/* we want to do the modex - we block at this point, but we must
* do so in a manner that allows us to call opal_progress so our
* event library can be cycled as we have tied PMIx to that
* event base */
active = true;
OPAL_POST_OBJECT(&active);
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active);
if( PMIX_SUCCESS != rc) {
ret = opal_pmix_convert_status(rc);
error = "PMIx_Fence() failed";
goto error;
}
/* cannot just wait on thread as we need to call opal_progress */
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
}
}

OMPI_TIMING_NEXT("modex");

MCA_PML_CALL(add_comm(&ompi_mpi_comm_world.comm));
MCA_PML_CALL(add_comm(&ompi_mpi_comm_self.comm));

Expand Down