From e30efcc26367b4a0a017526182d13f8219388890 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 6 Sep 2019 08:27:05 -0700 Subject: [PATCH] Be a little less restrictive on interface requirements If both types of interfaces are enabled, don't error out if one of them isn't able to open listener sockets. Only one interface family may be available on some machines, but someone might want to build the code to run more generally. Refs https://github.com/pmix/prrte/pull/249 Signed-off-by: Ralph Castain (cherry picked from commit 06d188ebf3646760f50d4513361b50642af9cec4) --- orte/mca/oob/tcp/help-oob-tcp.txt | 28 ++++++++++++++++++++++++++-- orte/mca/oob/tcp/oob_tcp_listener.c | 20 ++++++++++---------- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/orte/mca/oob/tcp/help-oob-tcp.txt b/orte/mca/oob/tcp/help-oob-tcp.txt index fd9dfdfde84..fbf8095add0 100644 --- a/orte/mca/oob/tcp/help-oob-tcp.txt +++ b/orte/mca/oob/tcp/help-oob-tcp.txt @@ -10,8 +10,8 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. -# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved # $COPYRIGHT$ # # Additional copyrights may follow @@ -113,3 +113,27 @@ The connection was rejected. Static ports were requested while orte_fwd_mpirun_port was set. Both options cannot be simultaneously set. Please either set orte_fwd_mpirun_port=false or remove any static port directives. +# +[version mismatch] +Open MPI detected a mismatch in versions between two processes. This +typically means that you executed "mpirun" (or "mpiexec") from one +version of Open MPI on on node, but your default path on one of the +other nodes upon which you launched found a different version of Open +MPI. + +Open MPI only supports running exactly the same version between all +processes in a single job. + +This will almost certainly cause unpredictable behavior, and may end +up aborting your job. + + Local host: %s + Local process name: %s + Local Open MPI version: %s + Peer host: %s + Peer process name: %s + Peer Open MPI version: %s +# +[no-listeners] +No sockets were able to be opened on the available protocols +(IPv4 and/or IPv6). Please check your network and retry. diff --git a/orte/mca/oob/tcp/oob_tcp_listener.c b/orte/mca/oob/tcp/oob_tcp_listener.c index f452f7b5ef5..d050bae8c0c 100644 --- a/orte/mca/oob/tcp/oob_tcp_listener.c +++ b/orte/mca/oob/tcp/oob_tcp_listener.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -98,7 +98,7 @@ static void connection_event_handler(int sd, short flags, void* cbdata); */ int orte_oob_tcp_start_listening(void) { - int rc; + int rc = ORTE_SUCCESS, rc2 = ORTE_SUCCESS; mca_oob_tcp_listener_t *listener; /* if we don't have any TCP interfaces, we shouldn't be here */ @@ -112,19 +112,19 @@ int orte_oob_tcp_start_listening(void) } /* create listen socket(s) for incoming connection attempts */ - if (ORTE_SUCCESS != (rc = create_listen())) { - ORTE_ERROR_LOG(rc); - return rc; - } + rc = create_listen(); #if OPAL_ENABLE_IPV6 /* create listen socket(s) for incoming connection attempts */ - if (ORTE_SUCCESS != (rc = create_listen6())) { - ORTE_ERROR_LOG(rc); - return rc; - } + rc2 = create_listen6(); #endif + if (ORTE_SUCCESS != rc && ORTE_SUCCESS != rc2) { + /* we were unable to open any listening sockets */ + opal_show_help("help-oob-tcp.txt", "no-listeners", true); + return ORTE_ERR_FATAL; + } + /* if I am the HNP, start a listening thread so we can * harvest connection requests as rapidly as possible */