Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit 61f265a

Browse files
author
rhc54
committed
Merge pull request #1010 from jsquyres/pr/v1.10/usnic-connect-can-fail
v1.10: usnic: allow connect(2) to fail temporarily
2 parents 5efd182 + 6f1d772 commit 61f265a

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

ompi/mca/btl/usnic/btl_usnic_cclient.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
2+
* Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved.
33
* Copyright (c) 2015 Research Organization for Information Science
44
* and Technology (RIST). All rights reserved.
55
* $COPYRIGHT$
@@ -105,7 +105,23 @@ int opal_btl_usnic_connectivity_client_init(void)
105105
address.sun_family = AF_UNIX;
106106
strncpy(address.sun_path, ipc_filename, sizeof(address.sun_path) - 1);
107107

108-
if (0 != connect(agent_fd, (struct sockaddr*) &address, sizeof(address))) {
108+
int count = 0;
109+
while (1) {
110+
int ret = connect(agent_fd, (struct sockaddr*) &address,
111+
sizeof(address));
112+
if (0 == ret) {
113+
break;
114+
}
115+
116+
// If we get ECONNREFUSED, delay a little and try again
117+
if (ECONNREFUSED == errno) {
118+
if (count < mca_btl_usnic_component.connectivity_num_retries) {
119+
usleep(100);
120+
++count;
121+
continue;
122+
}
123+
}
124+
109125
OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
110126
ABORT("connect() failed");
111127
/* Will not return */

0 commit comments

Comments
 (0)