Skip to content

PMIx_Connect usage: add optional timeout #8959

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions ompi/dpm/dpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -104,7 +106,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
bool dense, isnew;
opal_process_name_t pname;
opal_list_t ilist, mlist, rlist;
pmix_info_t info;
pmix_info_t info, tinfo;
pmix_value_t pval;
pmix_pdata_t pdat;
pmix_proc_t *procs, pxproc;
Expand Down Expand Up @@ -373,7 +375,10 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
/* tell the host RTE to connect us - this will download
* all known data for the nspace's of participating procs
* so that add_procs will not result in a slew of lookups */
pret = PMIx_Connect(procs, nprocs, NULL, 0);
PMIX_INFO_CONSTRUCT(&tinfo);
PMIX_INFO_LOAD(&tinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
pret = PMIx_Connect(procs, nprocs, &tinfo, 1);
PMIX_INFO_DESTRUCT(&tinfo);
PMIX_PROC_FREE(procs, nprocs);
rc = opal_pmix_convert_status(pret);
if (OPAL_SUCCESS != rc) {
Expand Down
10 changes: 10 additions & 0 deletions ompi/runtime/ompi_mpi_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
* All rights reserved.
* Copyright (c) 2016-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -83,6 +85,7 @@ bool ompi_mpi_compat_mpi3 = false;

char *ompi_mpi_spc_attach_string = NULL;
bool ompi_mpi_spc_dump_enabled = false;
uint32_t ompi_pmix_connect_timeout;

static bool show_default_mca_params = false;
static bool show_file_mca_params = false;
Expand Down Expand Up @@ -391,6 +394,13 @@ int ompi_mpi_register_params(void)
&ompi_mpi_spc_dump_enabled);
#endif // SPC_ENABLE

ompi_pmix_connect_timeout = 0; /* infinite timeout - see PMIx standard */
(void) mca_base_var_register ("ompi", "mpi", NULL, "pmix_connect_timeout",
"Timeout(secs) for calls to PMIx_Connect. Default is no timeout.",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_pmix_connect_timeout);

return OMPI_SUCCESS;
}

Expand Down
6 changes: 6 additions & 0 deletions ompi/runtime/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2021 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -172,6 +174,10 @@ OMPI_DECLSPEC extern char * ompi_mpi_spc_attach_string;
*/
OMPI_DECLSPEC extern bool ompi_mpi_spc_dump_enabled;

/**
* Timeout for calls to PMIx_Connect(defaut 0, no timeout)
*/
OMPI_DECLSPEC extern uint32_t ompi_pmix_connect_timeout;

/**
* Register MCA parameters used by the MPI layer.
Expand Down