Skip to content

PML/UCX: Use net worker address for remote peers - v4.0.x #6395

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config/ompi_check_ucx.m4
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ AC_DEFUN([OMPI_CHECK_UCX],[
UCP_ATOMIC_FETCH_OP_FXOR],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS],
[AC_DEFINE([HAVE_UCP_WORKER_ADDRESS_FLAGS], [1],
[have worker address attribute])], [],
[#include <ucp/api/ucp.h>])
CPPFLAGS=$old_CPPFLAGS

OPAL_SUMMARY_ADD([[Transports]],[[Open UCX]],[$1],[$ompi_check_ucx_happy])])])
Expand Down
65 changes: 61 additions & 4 deletions ompi/mca/pml/ucx/pml_ucx.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,46 @@ mca_pml_ucx_module_t ompi_pml_ucx = {
#define PML_UCX_REQ_ALLOCA() \
((char *)alloca(ompi_pml_ucx.request_size) + ompi_pml_ucx.request_size);

#if HAVE_UCP_WORKER_ADDRESS_FLAGS
static int mca_pml_ucx_send_worker_address_type(int addr_flags, int modex_scope)
{
ucs_status_t status;
ucp_worker_attr_t attrs;
int rc;

attrs.field_mask = UCP_WORKER_ATTR_FIELD_ADDRESS |
UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS;
attrs.address_flags = addr_flags;

status = ucp_worker_query(ompi_pml_ucx.ucp_worker, &attrs);
if (UCS_OK != status) {
PML_UCX_ERROR("Failed to query UCP worker address");
return OMPI_ERROR;
}

OPAL_MODEX_SEND(rc, modex_scope, &mca_pml_ucx_component.pmlm_version,
(void*)attrs.address, attrs.address_length);

ucp_worker_release_address(ompi_pml_ucx.ucp_worker, attrs.address);

if (OMPI_SUCCESS != rc) {
return OMPI_ERROR;
}

PML_UCX_VERBOSE(2, "Pack %s worker address, size %ld",
(modex_scope == OPAL_PMIX_LOCAL) ? "local" : "remote",
attrs.address_length);

return OMPI_SUCCESS;
}
#endif

static int mca_pml_ucx_send_worker_address(void)
{
ucp_address_t *address;
ucs_status_t status;

#if !HAVE_UCP_WORKER_ADDRESS_FLAGS
ucp_address_t *address;
size_t addrlen;
int rc;

Expand All @@ -96,16 +131,35 @@ static int mca_pml_ucx_send_worker_address(void)
return OMPI_ERROR;
}

PML_UCX_VERBOSE(2, "Pack worker address, size %ld", addrlen);

OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
&mca_pml_ucx_component.pmlm_version, (void*)address, addrlen);

ucp_worker_release_address(ompi_pml_ucx.ucp_worker, address);

if (OMPI_SUCCESS != rc) {
PML_UCX_ERROR("Open MPI couldn't distribute EP connection details");
return OMPI_ERROR;
goto err;
}
#else
/* Pack just network device addresses for remote node peers */
status = mca_pml_ucx_send_worker_address_type(UCP_WORKER_ADDRESS_FLAG_NET_ONLY,
OPAL_PMIX_REMOTE);
if (UCS_OK != status) {
goto err;
}

ucp_worker_release_address(ompi_pml_ucx.ucp_worker, address);
status = mca_pml_ucx_send_worker_address_type(0, OPAL_PMIX_LOCAL);
if (UCS_OK != status) {
goto err;
}
#endif

return OMPI_SUCCESS;

err:
PML_UCX_ERROR("Open MPI couldn't distribute EP connection details");
return OMPI_ERROR;
}

static int mca_pml_ucx_recv_worker_address(ompi_proc_t *proc,
Expand All @@ -121,6 +175,9 @@ static int mca_pml_ucx_recv_worker_address(ompi_proc_t *proc,
PML_UCX_ERROR("Failed to receive UCX worker address: %s (%d)",
opal_strerror(ret), ret);
}

PML_UCX_VERBOSE(2, "Got proc %d address, size %ld",
proc->super.proc_name.vpid, *addrlen_p);
return ret;
}

Expand Down