Skip to content

Commit f96994b

Browse files
authored
Merge pull request #6865 from rhc54/cmr40/locality
Provide locality for all procs on node
2 parents 7b09c15 + e17203b commit f96994b

File tree

1 file changed

+68
-8
lines changed

1 file changed

+68
-8
lines changed

ompi/dpm/dpm.c

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
1616
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
1717
* reserved.
18-
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
18+
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
1919
* Copyright (c) 2014-2017 Research Organization for Information Science
2020
* and Technology (RIST). All rights reserved.
2121
* $COPYRIGHT$
@@ -404,9 +404,43 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
404404
goto exit;
405405
}
406406
if (0 < opal_list_get_size(&ilist)) {
407+
uint32_t *peer_ranks = NULL;
408+
int prn, nprn;
409+
char *val, *mycpuset;
410+
uint16_t u16;
411+
opal_process_name_t wildcard_rank;
407412
/* convert the list of new procs to a proc_t array */
408413
new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist),
409414
sizeof(ompi_proc_t *));
415+
/* get the list of local peers for the new procs */
416+
cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist);
417+
proc = cd->p;
418+
wildcard_rank.jobid = proc->super.proc_name.jobid;
419+
wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid;
420+
/* retrieve the local peers */
421+
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_PEERS,
422+
&wildcard_rank, &val, OPAL_STRING);
423+
if (OPAL_SUCCESS == rc && NULL != val) {
424+
char **peers = opal_argv_split(val, ',');
425+
free(val);
426+
nprn = opal_argv_count(peers);
427+
peer_ranks = (uint32_t*)calloc(nprn, sizeof(uint32_t));
428+
for (prn = 0; NULL != peers[prn]; prn++) {
429+
peer_ranks[prn] = strtoul(peers[prn], NULL, 10);
430+
}
431+
opal_argv_free(peers);
432+
}
433+
434+
/* get my locality string */
435+
val = NULL;
436+
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING,
437+
OMPI_PROC_MY_NAME, &val, OPAL_STRING);
438+
if (OPAL_SUCCESS == rc && NULL != val) {
439+
mycpuset = val;
440+
} else {
441+
mycpuset = NULL;
442+
}
443+
410444
i = 0;
411445
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
412446
opal_value_t *kv;
@@ -416,15 +450,41 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
416450
* OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without
417451
* them, we are just fine */
418452
ompi_proc_complete_init_single(proc);
419-
/* save the locality for later */
420-
kv = OBJ_NEW(opal_value_t);
421-
kv->key = strdup(OPAL_PMIX_LOCALITY);
422-
kv->type = OPAL_UINT16;
423-
kv->data.uint16 = proc->super.proc_flags;
424-
opal_pmix.store_local(&proc->super.proc_name, kv);
425-
OBJ_RELEASE(kv); // maintain accounting
453+
/* if this proc is local, then get its locality */
454+
if (NULL != peer_ranks) {
455+
for (prn=0; prn < nprn; prn++) {
456+
if (peer_ranks[prn] == proc->super.proc_name.vpid) {
457+
/* get their locality string */
458+
val = NULL;
459+
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, OPAL_PMIX_LOCALITY_STRING,
460+
&proc->super.proc_name, &val, OPAL_STRING);
461+
if (OPAL_SUCCESS == rc && NULL != val) {
462+
u16 = opal_hwloc_compute_relative_locality(mycpuset, val);
463+
free(val);
464+
} else {
465+
/* all we can say is that it shares our node */
466+
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
467+
}
468+
proc->super.proc_flags = u16;
469+
/* save the locality for later */
470+
kv = OBJ_NEW(opal_value_t);
471+
kv->key = strdup(OPAL_PMIX_LOCALITY);
472+
kv->type = OPAL_UINT16;
473+
kv->data.uint16 = proc->super.proc_flags;
474+
opal_pmix.store_local(&proc->super.proc_name, kv);
475+
OBJ_RELEASE(kv); // maintain accounting
476+
break;
477+
}
478+
}
479+
}
426480
++i;
427481
}
482+
if (NULL != mycpuset) {
483+
free(mycpuset);
484+
}
485+
if (NULL != peer_ranks) {
486+
free(peer_ranks);
487+
}
428488
/* call add_procs on the new ones */
429489
rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist)));
430490
free(new_proc_list);

0 commit comments

Comments
 (0)