@@ -82,11 +82,46 @@ mca_pml_ucx_module_t ompi_pml_ucx = {
82
82
#define PML_UCX_REQ_ALLOCA () \
83
83
((char *)alloca(ompi_pml_ucx.request_size) + ompi_pml_ucx.request_size);
84
84
85
+ #if HAVE_UCP_WORKER_ADDRESS_FLAGS
86
+ static int mca_pml_ucx_send_worker_address_type (int addr_flags , int modex_scope )
87
+ {
88
+ ucs_status_t status ;
89
+ ucp_worker_attr_t attrs ;
90
+ int rc ;
91
+
92
+ attrs .field_mask = UCP_WORKER_ATTR_FIELD_ADDRESS |
93
+ UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS ;
94
+ attrs .address_flags = addr_flags ;
95
+
96
+ status = ucp_worker_query (ompi_pml_ucx .ucp_worker , & attrs );
97
+ if (UCS_OK != status ) {
98
+ PML_UCX_ERROR ("Failed to query UCP worker address" );
99
+ return OMPI_ERROR ;
100
+ }
101
+
102
+ OPAL_MODEX_SEND (rc , modex_scope , & mca_pml_ucx_component .pmlm_version ,
103
+ (void * )attrs .address , attrs .address_length );
104
+
105
+ ucp_worker_release_address (ompi_pml_ucx .ucp_worker , attrs .address );
106
+
107
+ if (OMPI_SUCCESS != rc ) {
108
+ return OMPI_ERROR ;
109
+ }
110
+
111
+ PML_UCX_VERBOSE (2 , "Pack %s worker address, size %ld" ,
112
+ (modex_scope == OPAL_PMIX_LOCAL ) ? "local" : "remote" ,
113
+ attrs .address_length );
114
+
115
+ return OMPI_SUCCESS ;
116
+ }
117
+ #endif
85
118
86
119
static int mca_pml_ucx_send_worker_address (void )
87
120
{
88
- ucp_address_t * address ;
89
121
ucs_status_t status ;
122
+
123
+ #if !HAVE_UCP_WORKER_ADDRESS_FLAGS
124
+ ucp_address_t * address ;
90
125
size_t addrlen ;
91
126
int rc ;
92
127
@@ -96,16 +131,35 @@ static int mca_pml_ucx_send_worker_address(void)
96
131
return OMPI_ERROR ;
97
132
}
98
133
134
+ PML_UCX_VERBOSE (2 , "Pack worker address, size %ld" , addrlen );
135
+
99
136
OPAL_MODEX_SEND (rc , OPAL_PMIX_GLOBAL ,
100
137
& mca_pml_ucx_component .pmlm_version , (void * )address , addrlen );
138
+
139
+ ucp_worker_release_address (ompi_pml_ucx .ucp_worker , address );
140
+
101
141
if (OMPI_SUCCESS != rc ) {
102
- PML_UCX_ERROR ("Open MPI couldn't distribute EP connection details" );
103
- return OMPI_ERROR ;
142
+ goto err ;
143
+ }
144
+ #else
145
+ /* Pack just network device addresses for remote node peers */
146
+ status = mca_pml_ucx_send_worker_address_type (UCP_WORKER_ADDRESS_FLAG_NET_ONLY ,
147
+ OPAL_PMIX_REMOTE );
148
+ if (UCS_OK != status ) {
149
+ goto err ;
104
150
}
105
151
106
- ucp_worker_release_address (ompi_pml_ucx .ucp_worker , address );
152
+ status = mca_pml_ucx_send_worker_address_type (0 , OPAL_PMIX_LOCAL );
153
+ if (UCS_OK != status ) {
154
+ goto err ;
155
+ }
156
+ #endif
107
157
108
158
return OMPI_SUCCESS ;
159
+
160
+ err :
161
+ PML_UCX_ERROR ("Open MPI couldn't distribute EP connection details" );
162
+ return OMPI_ERROR ;
109
163
}
110
164
111
165
static int mca_pml_ucx_recv_worker_address (ompi_proc_t * proc ,
@@ -121,6 +175,9 @@ static int mca_pml_ucx_recv_worker_address(ompi_proc_t *proc,
121
175
PML_UCX_ERROR ("Failed to receive UCX worker address: %s (%d)" ,
122
176
opal_strerror (ret ), ret );
123
177
}
178
+
179
+ PML_UCX_VERBOSE (2 , "Got proc %d address, size %ld" ,
180
+ proc -> super .proc_name .vpid , * addrlen_p );
124
181
return ret ;
125
182
}
126
183
0 commit comments