Skip to content

Commit 82f3869

Browse files
authored
Merge pull request #6968 from rhc54/cmr31/oob
v3.1.x: Be a little less restrictive on interface requirements
2 parents 76cdb13 + e30efcc commit 82f3869

File tree

2 files changed

+36
-12
lines changed

2 files changed

+36
-12
lines changed

orte/mca/oob/tcp/help-oob-tcp.txt

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
# University of Stuttgart. All rights reserved.
1111
# Copyright (c) 2004-2005 The Regents of the University of California.
1212
# All rights reserved.
13-
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
14-
# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
13+
# Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
14+
# Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
1515
# $COPYRIGHT$
1616
#
1717
# Additional copyrights may follow
@@ -113,3 +113,27 @@ The connection was rejected.
113113
Static ports were requested while orte_fwd_mpirun_port was set.
114114
Both options cannot be simultaneously set. Please either set
115115
orte_fwd_mpirun_port=false or remove any static port directives.
116+
#
117+
[version mismatch]
118+
Open MPI detected a mismatch in versions between two processes. This
119+
typically means that you executed "mpirun" (or "mpiexec") from one
120+
version of Open MPI on on node, but your default path on one of the
121+
other nodes upon which you launched found a different version of Open
122+
MPI.
123+
124+
Open MPI only supports running exactly the same version between all
125+
processes in a single job.
126+
127+
This will almost certainly cause unpredictable behavior, and may end
128+
up aborting your job.
129+
130+
Local host: %s
131+
Local process name: %s
132+
Local Open MPI version: %s
133+
Peer host: %s
134+
Peer process name: %s
135+
Peer Open MPI version: %s
136+
#
137+
[no-listeners]
138+
No sockets were able to be opened on the available protocols
139+
(IPv4 and/or IPv6). Please check your network and retry.

orte/mca/oob/tcp/oob_tcp_listener.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* All rights reserved.
1414
* Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
1515
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
16-
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
16+
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
1717
* Copyright (c) 2015 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
1919
* $COPYRIGHT$
@@ -98,7 +98,7 @@ static void connection_event_handler(int sd, short flags, void* cbdata);
9898
*/
9999
int orte_oob_tcp_start_listening(void)
100100
{
101-
int rc;
101+
int rc = ORTE_SUCCESS, rc2 = ORTE_SUCCESS;
102102
mca_oob_tcp_listener_t *listener;
103103

104104
/* if we don't have any TCP interfaces, we shouldn't be here */
@@ -112,19 +112,19 @@ int orte_oob_tcp_start_listening(void)
112112
}
113113

114114
/* create listen socket(s) for incoming connection attempts */
115-
if (ORTE_SUCCESS != (rc = create_listen())) {
116-
ORTE_ERROR_LOG(rc);
117-
return rc;
118-
}
115+
rc = create_listen();
119116

120117
#if OPAL_ENABLE_IPV6
121118
/* create listen socket(s) for incoming connection attempts */
122-
if (ORTE_SUCCESS != (rc = create_listen6())) {
123-
ORTE_ERROR_LOG(rc);
124-
return rc;
125-
}
119+
rc2 = create_listen6();
126120
#endif
127121

122+
if (ORTE_SUCCESS != rc && ORTE_SUCCESS != rc2) {
123+
/* we were unable to open any listening sockets */
124+
opal_show_help("help-oob-tcp.txt", "no-listeners", true);
125+
return ORTE_ERR_FATAL;
126+
}
127+
128128
/* if I am the HNP, start a listening thread so we can
129129
* harvest connection requests as rapidly as possible
130130
*/

0 commit comments

Comments
 (0)