Skip to content

Fix incorrect TCP connections. #12259

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions opal/mca/btl/tcp/btl_tcp_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* Copyright (c) 2009 Oak Ridge National Laboratory
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013-2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
Expand Down Expand Up @@ -1502,9 +1502,32 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void *user)
/* lookup the corresponding process */
btl_proc = mca_btl_tcp_proc_lookup(&guid);
if (NULL == btl_proc) {
opal_show_help("help-mpi-btl-tcp.txt", "server accept cannot find guid", true,
opal_process_info.nodename, getpid());
const char *peer = opal_fd_get_peer_name(sd);
if( 0 == opal_compare_proc(opal_process_info.my_name, guid) ) {
opal_show_help("help-mpi-btl-tcp.txt", "server cannot accept connection from self", true,
peer, OPAL_NAME_PRINT(guid),
opal_process_info.nodename, getpid());
Comment on lines +1507 to +1509
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a recoverable case so do we always want to print this message?

We print a message for this when we handle the connection: https://github.com/open-mpi/ompi/pull/12259/files#diff-cc4089aecdcf0cb98dd15472b23f1a3f2af00357a180f986cdba0c89738472dcR462

Copy link
Member Author

@bosilca bosilca Mar 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can handle with this patch, but it is a misconfiguration and the user should fix in future runs. The other side is a verbose output, it will only show when the BTL verbosity is set to 20.

/**
* Special case: we used an interface to send data to a remote peer
* but that interface is only local, and we received our own message.
* If we just close the socket this will confuse the connection, as
* it will not be able to know that the interface should not be
* used. Instead, we can identify ourselves by sending our guid
* back to ourselves, marking the interface as improper for future
* communications.
*/
if (sizeof(hs_msg) != mca_btl_tcp_send_blocking(sd, &hs_msg, sizeof(hs_msg))) {
opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail", true,
opal_process_info.nodename, sizeof(hs_msg),
"connect ACK failed to send magic-id and guid");
}
} else {
opal_show_help("help-mpi-btl-tcp.txt", "server accept cannot find guid", true,
OPAL_NAME_PRINT(opal_process_info.my_name), opal_process_info.nodename,
getpid(), OPAL_NAME_PRINT(guid), peer);
}
CLOSE_THE_SOCKET(sd);
free((char*)peer);
return;
}

Expand Down
13 changes: 10 additions & 3 deletions opal/mca/btl/tcp/btl_tcp_endpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -724,11 +724,12 @@ void mca_btl_tcp_set_socket_options(int sd)
*/
static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t *btl_endpoint)
{
int rc, flags;
int rc, flags, local_port;
struct sockaddr_storage endpoint_addr;
/* By default consider a IPv4 connection */
uint16_t af_family = AF_INET;
opal_socklen_t addrlen = sizeof(struct sockaddr_in);
char local_ip[16]; /* used for printing the IP and port of the local socket */

#if OPAL_ENABLE_IPV6
if (AF_INET6 == btl_endpoint->endpoint_addr->addr_family) {
Expand Down Expand Up @@ -791,6 +792,8 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t *btl_endpo
CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd);
return OPAL_ERROR;
}
inet_ntop(AF_INET, &((struct sockaddr_in *)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_addr, local_ip, 16);
local_port = htons(((struct sockaddr_in *) &btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_port);
}
#if OPAL_ENABLE_IPV6
if (endpoint_addr.ss_family == AF_INET6) {
Expand All @@ -809,13 +812,17 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t *btl_endpo
CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd);
return OPAL_ERROR;
}
inet_ntop(AF_INET6, &((struct sockaddr_in *)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin6_addr, local_ip, 16);
local_port = htons(((struct sockaddr_in6 *) &btl_endpoint->endpoint_btl->tcp_ifaddr)->sin6_port);
}
#endif
opal_output_verbose(10, opal_btl_base_framework.framework_output,
"btl: tcp: attempting to connect() to %s address %s on port %d",
"btl: tcp: %s attempting to connect() to %s address %s on port %d using %s:%d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(btl_endpoint->endpoint_proc->proc_opal->proc_name),
opal_net_get_hostname((struct sockaddr *) &endpoint_addr),
ntohs(btl_endpoint->endpoint_addr->addr_port));
ntohs(btl_endpoint->endpoint_addr->addr_port),
local_ip, local_port);

if (0 == connect(btl_endpoint->endpoint_sd, (struct sockaddr *) &endpoint_addr, addrlen)) {
opal_output_verbose(10, opal_btl_base_framework.framework_output,
Expand Down
63 changes: 62 additions & 1 deletion opal/mca/btl/tcp/btl_tcp_proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,68 @@ mca_btl_tcp_proc_t *mca_btl_tcp_proc_create(opal_proc_t *proc)
goto cleanup;
}

btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_modex_addr_t);
/**
* If the peer is physically located on another node, remove all interfaces
* that have an IP address identical to any local interface (this will
* remove all the local and virtual interfaces).
*/
size_t count = size / sizeof(mca_btl_tcp_modex_addr_t);
if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
opal_list_t *local_ifs = &mca_btl_tcp_component.local_ifs;
opal_if_t *local_iter;
char tmp[2][16];
for (uint32_t i = 0; i < count; /* no automatic progress */) {
OPAL_LIST_FOREACH (local_iter, local_ifs, opal_if_t) {
if (MCA_BTL_TCP_AF_INET == remote_addrs[i].addr_family &&
AF_INET == local_iter->af_family) {
if (0 == memcmp(&((struct sockaddr_in *)&local_iter->if_addr)->sin_addr,
remote_addrs[i].addr,
sizeof(struct in_addr))) {
/* we found a match */
inet_ntop(AF_INET, remote_addrs[i].addr, tmp[0], 16);
inet_ntop(AF_INET, &((struct sockaddr_in *) &local_iter->if_addr)->sin_addr,
tmp[1], 16);
goto match_found;
}
}
#if OPAL_ENABLE_IPV6
else if (MCA_BTL_TCP_AF_INET6 == remote_addrs[i].addr_family &&
AF_INET6 == local_iter->af_family) {
if (0 == memcmp(&((struct sockaddr_in6 *) &local_iter->if_addr)->sin6_addr,
remote_addrs[i].addr,
sizeof(struct in6_addr))) {
/* we found a match */
inet_ntop(AF_INET6, remote_addrs[i].addr, tmp[0], 16);
inet_ntop(AF_INET6, &((struct sockaddr_in6 *) &local_iter->if_addr)->sin6_addr,
tmp[1], 16);
goto match_found;
}
}
#endif /* OPAL_ENABLE_IPV6 */
}
if (MCA_BTL_TCP_AF_INET == remote_addrs[i].addr_family) {
inet_ntop(AF_INET, remote_addrs[i].addr, tmp[0], 16);
} else {
inet_ntop(AF_INET6, remote_addrs[i].addr, tmp[0], 16);
}
opal_output_verbose(20, opal_btl_base_framework.framework_output,
"btl: tcp: Accept IP %s from %s\n", tmp[0],
OPAL_NAME_PRINT(proc->proc_name));
i++; /* go to the next remote interface */
continue;
match_found:
opal_output_verbose(20, opal_btl_base_framework.framework_output,
"btl: tcp: Drop IP %s from %s because it matches "
"the local IP %s (%s))!\n",
tmp[0], OPAL_NAME_PRINT(proc->proc_name), tmp[1],
local_iter->if_name);
count--;
memmove(&remote_addrs[i], &remote_addrs[i + 1],
(count - i) * sizeof(mca_btl_tcp_modex_addr_t));
break;
}
}
btl_proc->proc_addr_count = count;
btl_proc->proc_addrs = malloc(btl_proc->proc_addr_count * sizeof(mca_btl_tcp_addr_t));
if (NULL == btl_proc->proc_addrs) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
Expand Down
20 changes: 18 additions & 2 deletions opal/mca/btl/tcp/help-mpi-btl-tcp.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,19 @@ aborting your job.
Flag: %s
Error: %s (%d)
#
[server cannot accept connection from self]
WARNING: Open MPI accepted a TCP connection from what appears to be
the same Open MPI process. This is not supported, please exclude
(using btl_tcp_if_exclude) the interface corresponding to the
following IP address %s

This attempted connection will be ignored; your MPI job may or may not
continue properly.

Process guid: %s
on host: %s
PID: %d
#
[server accept cannot find guid]
WARNING: Open MPI accepted a TCP connection from what appears to be a
another Open MPI process but cannot find a corresponding process
Expand All @@ -121,8 +134,11 @@ entry for that peer.
This attempted connection will be ignored; your MPI job may or may not
continue properly.

Local host: %s
PID: %d
Process guid: %s
on host: %s
PID: %d
guid: %s
Source IP of socket: %s
#
[server getpeername failed]
WARNING: Open MPI failed to look up the peer IP address information of
Expand Down