diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index 131d9c1fe53..be6e65ffc13 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Oak Ridge National Laboratory * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2013-2024 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -1502,9 +1502,32 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void *user) /* lookup the corresponding process */ btl_proc = mca_btl_tcp_proc_lookup(&guid); if (NULL == btl_proc) { - opal_show_help("help-mpi-btl-tcp.txt", "server accept cannot find guid", true, - opal_process_info.nodename, getpid()); + const char *peer = opal_fd_get_peer_name(sd); + if( 0 == opal_compare_proc(opal_process_info.my_name, guid) ) { + opal_show_help("help-mpi-btl-tcp.txt", "server cannot accept connection from self", true, + peer, OPAL_NAME_PRINT(guid), + opal_process_info.nodename, getpid()); + /** + * Special case: we used an interface to send data to a remote peer + * but that interface is only local, and we received our own message. + * If we just close the socket this will confuse the connection, as + * it will not be able to know that the interface should not be + * used. Instead, we can identify ourselves by sending our guid + * back to ourselves, marking the interface as improper for future + * communications. + */ + if (sizeof(hs_msg) != mca_btl_tcp_send_blocking(sd, &hs_msg, sizeof(hs_msg))) { + opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail", true, + opal_process_info.nodename, sizeof(hs_msg), + "connect ACK failed to send magic-id and guid"); + } + } else { + opal_show_help("help-mpi-btl-tcp.txt", "server accept cannot find guid", true, + OPAL_NAME_PRINT(opal_process_info.my_name), opal_process_info.nodename, + getpid(), OPAL_NAME_PRINT(guid), peer); + } CLOSE_THE_SOCKET(sd); + free((char*)peer); return; } diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index 28138a6b43f..b08870bd289 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -724,11 +724,12 @@ void mca_btl_tcp_set_socket_options(int sd) */ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t *btl_endpoint) { - int rc, flags; + int rc, flags, local_port; struct sockaddr_storage endpoint_addr; /* By default consider a IPv4 connection */ uint16_t af_family = AF_INET; opal_socklen_t addrlen = sizeof(struct sockaddr_in); + char local_ip[16]; /* used for printing the IP and port of the local socket */ #if OPAL_ENABLE_IPV6 if (AF_INET6 == btl_endpoint->endpoint_addr->addr_family) { @@ -791,6 +792,8 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t *btl_endpo CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); return OPAL_ERROR; } + inet_ntop(AF_INET, &((struct sockaddr_in *)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_addr, local_ip, 16); + local_port = htons(((struct sockaddr_in *) &btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_port); } #if OPAL_ENABLE_IPV6 if (endpoint_addr.ss_family == AF_INET6) { @@ -809,13 +812,17 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t *btl_endpo CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); return OPAL_ERROR; } + inet_ntop(AF_INET6, &((struct sockaddr_in *)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin6_addr, local_ip, 16); + local_port = htons(((struct sockaddr_in6 *) &btl_endpoint->endpoint_btl->tcp_ifaddr)->sin6_port); } #endif opal_output_verbose(10, opal_btl_base_framework.framework_output, - "btl: tcp: attempting to connect() to %s address %s on port %d", + "btl: tcp: %s attempting to connect() to %s address %s on port %d using %s:%d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(btl_endpoint->endpoint_proc->proc_opal->proc_name), opal_net_get_hostname((struct sockaddr *) &endpoint_addr), - ntohs(btl_endpoint->endpoint_addr->addr_port)); + ntohs(btl_endpoint->endpoint_addr->addr_port), + local_ip, local_port); if (0 == connect(btl_endpoint->endpoint_sd, (struct sockaddr *) &endpoint_addr, addrlen)) { opal_output_verbose(10, opal_btl_base_framework.framework_output, diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index fc038fde92a..ce8430d3b40 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -409,7 +409,68 @@ mca_btl_tcp_proc_t *mca_btl_tcp_proc_create(opal_proc_t *proc) goto cleanup; } - btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_modex_addr_t); + /** + * If the peer is physically located on another node, remove all interfaces + * that have an IP address identical to any local interface (this will + * remove all the local and virtual interfaces). + */ + size_t count = size / sizeof(mca_btl_tcp_modex_addr_t); + if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { + opal_list_t *local_ifs = &mca_btl_tcp_component.local_ifs; + opal_if_t *local_iter; + char tmp[2][16]; + for (uint32_t i = 0; i < count; /* no automatic progress */) { + OPAL_LIST_FOREACH (local_iter, local_ifs, opal_if_t) { + if (MCA_BTL_TCP_AF_INET == remote_addrs[i].addr_family && + AF_INET == local_iter->af_family) { + if (0 == memcmp(&((struct sockaddr_in *)&local_iter->if_addr)->sin_addr, + remote_addrs[i].addr, + sizeof(struct in_addr))) { + /* we found a match */ + inet_ntop(AF_INET, remote_addrs[i].addr, tmp[0], 16); + inet_ntop(AF_INET, &((struct sockaddr_in *) &local_iter->if_addr)->sin_addr, + tmp[1], 16); + goto match_found; + } + } +#if OPAL_ENABLE_IPV6 + else if (MCA_BTL_TCP_AF_INET6 == remote_addrs[i].addr_family && + AF_INET6 == local_iter->af_family) { + if (0 == memcmp(&((struct sockaddr_in6 *) &local_iter->if_addr)->sin6_addr, + remote_addrs[i].addr, + sizeof(struct in6_addr))) { + /* we found a match */ + inet_ntop(AF_INET6, remote_addrs[i].addr, tmp[0], 16); + inet_ntop(AF_INET6, &((struct sockaddr_in6 *) &local_iter->if_addr)->sin6_addr, + tmp[1], 16); + goto match_found; + } + } +#endif /* OPAL_ENABLE_IPV6 */ + } + if (MCA_BTL_TCP_AF_INET == remote_addrs[i].addr_family) { + inet_ntop(AF_INET, remote_addrs[i].addr, tmp[0], 16); + } else { + inet_ntop(AF_INET6, remote_addrs[i].addr, tmp[0], 16); + } + opal_output_verbose(20, opal_btl_base_framework.framework_output, + "btl: tcp: Accept IP %s from %s\n", tmp[0], + OPAL_NAME_PRINT(proc->proc_name)); + i++; /* go to the next remote interface */ + continue; + match_found: + opal_output_verbose(20, opal_btl_base_framework.framework_output, + "btl: tcp: Drop IP %s from %s because it matches " + "the local IP %s (%s))!\n", + tmp[0], OPAL_NAME_PRINT(proc->proc_name), tmp[1], + local_iter->if_name); + count--; + memmove(&remote_addrs[i], &remote_addrs[i + 1], + (count - i) * sizeof(mca_btl_tcp_modex_addr_t)); + break; + } + } + btl_proc->proc_addr_count = count; btl_proc->proc_addrs = malloc(btl_proc->proc_addr_count * sizeof(mca_btl_tcp_addr_t)); if (NULL == btl_proc->proc_addrs) { rc = OPAL_ERR_OUT_OF_RESOURCE; diff --git a/opal/mca/btl/tcp/help-mpi-btl-tcp.txt b/opal/mca/btl/tcp/help-mpi-btl-tcp.txt index 0afedce447f..8e21e54aa42 100644 --- a/opal/mca/btl/tcp/help-mpi-btl-tcp.txt +++ b/opal/mca/btl/tcp/help-mpi-btl-tcp.txt @@ -113,6 +113,19 @@ aborting your job. Flag: %s Error: %s (%d) # +[server cannot accept connection from self] +WARNING: Open MPI accepted a TCP connection from what appears to be +the same Open MPI process. This is not supported, please exclude +(using btl_tcp_if_exclude) the interface corresponding to the +following IP address %s + +This attempted connection will be ignored; your MPI job may or may not +continue properly. + + Process guid: %s + on host: %s + PID: %d +# [server accept cannot find guid] WARNING: Open MPI accepted a TCP connection from what appears to be a another Open MPI process but cannot find a corresponding process @@ -121,8 +134,11 @@ entry for that peer. This attempted connection will be ignored; your MPI job may or may not continue properly. - Local host: %s - PID: %d + Process guid: %s + on host: %s + PID: %d + guid: %s + Source IP of socket: %s # [server getpeername failed] WARNING: Open MPI failed to look up the peer IP address information of