12
12
* All rights reserved.
13
13
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
14
14
* reserved.
15
- * Copyright (c) 2013-2014 Intel, Inc. All rights reserved
16
- * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
15
+ * Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
16
+ * Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved.
17
+ * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights
18
+ * reserved.
19
+ * Copyright (c) 2021 Nanook Consulting. All rights reserved.
17
20
* $COPYRIGHT$
18
21
*
19
22
* Additional copyrights may follow
@@ -44,8 +47,6 @@ typedef struct opened_component_t {
44
47
mca_pml_base_component_t * om_component ;
45
48
} opened_component_t ;
46
49
47
- static bool modex_reqd = false;
48
-
49
50
/**
50
51
* Function for selecting one component from all those that are
51
52
* available.
@@ -59,7 +60,7 @@ static bool modex_reqd=false;
59
60
int mca_pml_base_select (bool enable_progress_threads ,
60
61
bool enable_mpi_threads )
61
62
{
62
- int i , priority = 0 , best_priority = 0 , num_pml = 0 ;
63
+ int i , priority = 0 , best_priority = 0 , num_pml = 0 , ret = 0 ;
63
64
opal_list_item_t * item = NULL ;
64
65
mca_base_component_list_item_t * cli = NULL ;
65
66
mca_pml_base_component_t * component = NULL , * best_component = NULL ;
@@ -186,12 +187,13 @@ int mca_pml_base_select(bool enable_progress_threads,
186
187
"selected %s best priority %d\n" ,
187
188
best_component -> pmlm_version .mca_component_name , best_priority );
188
189
189
- /* if more than one PML could be considered, then we still need the
190
- * modex since we cannot know which one will be selected on all procs
191
- */
192
- if (1 < num_pml ) {
193
- modex_reqd = true;
194
- }
190
+ /* Save the winner */
191
+
192
+ mca_pml_base_selected_component = * best_component ;
193
+ mca_pml = * best_module ;
194
+ opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
195
+ "select: component %s selected" ,
196
+ mca_pml_base_selected_component .pmlm_version .mca_component_name );
195
197
196
198
/* Finalize all non-selected components */
197
199
@@ -239,14 +241,6 @@ int mca_pml_base_select(bool enable_progress_threads,
239
241
}
240
242
#endif
241
243
242
- /* Save the winner */
243
-
244
- mca_pml_base_selected_component = * best_component ;
245
- mca_pml = * best_module ;
246
- opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
247
- "select: component %s selected" ,
248
- mca_pml_base_selected_component .pmlm_version .mca_component_name );
249
-
250
244
/* This base function closes, unloads, and removes from the
251
245
available list all unselected components. The available list will
252
246
contain only the selected component. */
@@ -287,13 +281,11 @@ int mca_pml_base_select(bool enable_progress_threads,
287
281
}
288
282
289
283
/* register winner in the modex */
290
- if (modex_reqd && 0 == OMPI_PROC_MY_NAME -> vpid ) {
291
- mca_pml_base_pml_selected (best_component -> pmlm_version .mca_component_name );
292
- }
284
+ ret = mca_pml_base_pml_selected (best_component -> pmlm_version .mca_component_name );
293
285
294
286
/* All done */
295
287
296
- return OMPI_SUCCESS ;
288
+ return ret ;
297
289
}
298
290
299
291
/* need a "commonly" named PML structure so everything ends up in the
@@ -307,49 +299,56 @@ static mca_base_component_t pml_base_component = {
307
299
};
308
300
309
301
302
+ /*
303
+ * If direct modex, then publish PML for all procs. If full modex then
304
+ * publish PML for rank 0 only. This information is used during add_procs
305
+ * to perform PML check.
306
+ * During PML check, for direct modex, compare our PML with the peer's
307
+ * PML for all procs in the add_procs call. This does not change the
308
+ * connection complexity of modex transfers, since adding the proc is
309
+ * going to get the peer information in the MTL/PML/BTL anyway.
310
+ * For full modex, compare our PML with rank 0.
311
+ * Direct Modex is performed when collect_all_data is false, as we do
312
+ * not perform a fence operation during MPI_Init if async_modex is true.
313
+ * If async_modex is false and collect_all_data is false then we do a
314
+ * zero-byte barrier and we would still require direct modex during
315
+ * add_procs
316
+ */
310
317
int
311
318
mca_pml_base_pml_selected (const char * name )
312
319
{
313
- int rc ;
320
+ int rc = 0 ;
321
+
322
+ if (!opal_pmix_collect_all_data || 0 == OMPI_PROC_MY_NAME -> vpid ) {
323
+ OPAL_MODEX_SEND (rc , OPAL_PMIX_GLOBAL , & pml_base_component , name ,
324
+ strlen (name ) + 1 );
325
+ }
314
326
315
- OPAL_MODEX_SEND (rc , OPAL_PMIX_GLOBAL , & pml_base_component , name , strlen (name ) + 1 );
316
327
return rc ;
317
328
}
318
329
319
- int
320
- mca_pml_base_pml_check_selected (const char * my_pml ,
321
- ompi_proc_t * * procs ,
322
- size_t nprocs )
330
+ static int
331
+ mca_pml_base_pml_check_selected_impl (const char * my_pml ,
332
+ opal_process_name_t proc_name )
323
333
{
324
334
size_t size ;
325
- int ret ;
335
+ int ret = 0 ;
326
336
char * remote_pml ;
327
337
328
- /* if no modex was required by the PML, then
329
- * we can assume success
330
- */
331
- if (!modex_reqd ) {
338
+ /* if we are proc_name=OMPI_PROC_MY_NAME, then we can also assume success */
339
+ if (0 == opal_compare_proc (ompi_proc_local ()-> super .proc_name , proc_name )) {
332
340
opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
333
- "check:select: modex not reqd " );
341
+ "check:select: PML check not necessary on self " );
334
342
return OMPI_SUCCESS ;
335
343
}
336
-
337
- /* if we are rank=0, then we can also assume success */
338
- if (0 == OMPI_PROC_MY_NAME -> vpid ) {
344
+ OPAL_MODEX_RECV_STRING (ret ,
345
+ mca_base_component_to_string (& pml_base_component ),
346
+ & proc_name , (void * * ) & remote_pml , & size );
347
+ if (OPAL_ERR_NOT_FOUND == ret ) {
339
348
opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
340
- "check:select: rank=0" );
341
- return OMPI_SUCCESS ;
342
- }
343
-
344
- /* get the name of the PML module selected by rank=0 */
345
- OPAL_MODEX_RECV (ret , & pml_base_component ,
346
- & procs [0 ]-> super .proc_name , (void * * ) & remote_pml , & size );
347
-
348
- /* if this key wasn't found, then just assume all is well... */
349
- if (OMPI_SUCCESS != ret ) {
350
- opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
351
- "check:select: modex data not found" );
352
- return OMPI_SUCCESS ;
349
+ "check:select: PML modex for process %s not found" ,
350
+ OMPI_NAME_PRINT (& proc_name ));
351
+ return OMPI_ERR_NOT_FOUND ;
353
352
}
354
353
355
354
/* the remote pml returned should never be NULL if an error
@@ -358,26 +357,68 @@ mca_pml_base_pml_check_selected(const char *my_pml,
358
357
*/
359
358
if (NULL == remote_pml ) {
360
359
opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
361
- "check:select: got a NULL pml from rank=0" );
360
+ "check:select: got a NULL pml from process %s" ,
361
+ OMPI_NAME_PRINT (& proc_name ));
362
362
return OMPI_ERR_UNREACH ;
363
363
}
364
364
365
365
opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
366
- "check:select: checking my pml %s against rank=0 pml %s" ,
367
- my_pml , remote_pml );
366
+ "check:select: checking my pml %s against process %s"
367
+ " pml %s" , my_pml , OMPI_NAME_PRINT (& proc_name ),
368
+ remote_pml );
368
369
369
370
/* if that module doesn't match my own, return an error */
370
371
if ((size != strlen (my_pml ) + 1 ) ||
371
372
(0 != strcmp (my_pml , remote_pml ))) {
373
+ char * errhost = NULL ;
374
+ OPAL_MODEX_RECV_VALUE_OPTIONAL (ret , OPAL_PMIX_HOSTNAME , & proc_name ,
375
+ & (errhost ), OPAL_STRING );
372
376
opal_output (0 , "%s selected pml %s, but peer %s on %s selected pml %s" ,
373
377
OMPI_NAME_PRINT (& ompi_proc_local ()-> super .proc_name ),
374
- my_pml , OMPI_NAME_PRINT (& procs [ 0 ] -> super . proc_name ),
375
- (NULL == procs [ 0 ] -> super . proc_hostname ) ? "unknown" : procs [ 0 ] -> super . proc_hostname ,
378
+ my_pml , OMPI_NAME_PRINT (& proc_name ),
379
+ (NULL == errhost ) ? "unknown" : errhost ,
376
380
remote_pml );
377
- free (remote_pml ); /* cleanup before returning */
381
+ free (remote_pml );
382
+ free (errhost );
383
+ /* cleanup before returning */
378
384
return OMPI_ERR_UNREACH ;
379
385
}
380
386
381
387
free (remote_pml );
382
388
return OMPI_SUCCESS ;
383
389
}
390
+
391
+ int
392
+ mca_pml_base_pml_check_selected (const char * my_pml ,
393
+ ompi_proc_t * * procs ,
394
+ size_t nprocs )
395
+ {
396
+ int ret = 0 ;
397
+ size_t i ;
398
+
399
+ if (!opal_pmix_collect_all_data ) {
400
+ /*
401
+ * If direct modex, then compare our PML with the peer's PML
402
+ * for all procs
403
+ */
404
+ for (i = 0 ; i < nprocs ; i ++ ) {
405
+ ret = mca_pml_base_pml_check_selected_impl (
406
+ my_pml ,
407
+ procs [i ]-> super .proc_name );
408
+ if (ret ) {
409
+ return ret ;
410
+ }
411
+ }
412
+ } else {
413
+ /* else if full modex compare our PML with rank 0 */
414
+ opal_process_name_t proc_name = {
415
+ .jobid = ompi_proc_local ()-> super .proc_name .jobid ,
416
+ .vpid = 0
417
+ };
418
+ ret = mca_pml_base_pml_check_selected_impl (
419
+ my_pml ,
420
+ proc_name );
421
+ }
422
+
423
+ return ret ;
424
+ }
0 commit comments