Skip to content

Commit d489030

Browse files
authored
Merge pull request #8199 from rhc54/topic/locality
Fix confusion between cpuset and locality
2 parents 57ccb83 + 2f7f1fe commit d489030

File tree

5 files changed

+18
-23
lines changed

5 files changed

+18
-23
lines changed

ompi/dpm/dpm.c

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
355355
if (0 < opal_list_get_size(&ilist)) {
356356
uint32_t *peer_ranks = NULL;
357357
int prn, nprn = 0;
358-
char *val, *mycpuset;
358+
char *val;
359359
uint16_t u16;
360360
opal_process_name_t wildcard_rank;
361361
/* convert the list of new procs to a proc_t array */
@@ -380,16 +380,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
380380
opal_argv_free(peers);
381381
}
382382

383-
/* get my locality string */
384-
val = NULL;
385-
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
386-
OMPI_PROC_MY_NAME, &val, PMIX_STRING);
387-
if (OPAL_SUCCESS == rc && NULL != val) {
388-
mycpuset = val;
389-
} else {
390-
mycpuset = NULL;
391-
}
392-
393383
i = 0;
394384
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
395385
proc = cd->p;
@@ -406,8 +396,8 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
406396
val = NULL;
407397
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
408398
&proc->super.proc_name, &val, OPAL_STRING);
409-
if (OPAL_SUCCESS == rc && NULL != val) {
410-
u16 = opal_hwloc_compute_relative_locality(mycpuset, val);
399+
if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) {
400+
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val);
411401
free(val);
412402
} else {
413403
/* all we can say is that it shares our node */
@@ -425,9 +415,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
425415
}
426416
++i;
427417
}
428-
if (NULL != mycpuset) {
429-
free(mycpuset);
430-
}
431418
if (NULL != peer_ranks) {
432419
free(peer_ranks);
433420
}

ompi/runtime/ompi_rte.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
764764

765765
/* identify our location */
766766
val = NULL;
767-
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
767+
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_CPUSET,
768768
&opal_process_info.my_name, &val, PMIX_STRING);
769769
if (PMIX_SUCCESS == rc && NULL != val) {
770770
opal_process_info.cpuset = val;
@@ -774,6 +774,15 @@ int ompi_rte_init(int *pargc, char ***pargv)
774774
opal_process_info.cpuset = NULL;
775775
opal_process_info.proc_is_bound = false;
776776
}
777+
val = NULL;
778+
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
779+
&opal_process_info.my_name, &val, PMIX_STRING);
780+
if (PMIX_SUCCESS == rc && NULL != val) {
781+
opal_process_info.locality = val;
782+
val = NULL; // protect the string
783+
} else {
784+
opal_process_info.locality = NULL;
785+
}
777786

778787
/* retrieve the local peers - defaults to local node */
779788
val = NULL;
@@ -811,7 +820,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
811820
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
812821
&pname, &val, PMIX_STRING);
813822
if (PMIX_SUCCESS == rc && NULL != val) {
814-
u16 = opal_hwloc_compute_relative_locality(opal_process_info.cpuset, val);
823+
u16 = opal_hwloc_compute_relative_locality(opal_process_info.locality, val);
815824
free(val);
816825
} else {
817826
/* all we can say is that it shares our node */
@@ -826,9 +835,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
826835
ret = opal_pmix_convert_status(rc);
827836
error = "local store of locality";
828837
opal_argv_free(peers);
829-
if (NULL != opal_process_info.cpuset) {
830-
free(opal_process_info.cpuset);
831-
}
832838
goto error;
833839
}
834840
}

opal/mca/common/ofi/common_ofi.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015 Intel, Inc. All rights reserved.
2+
* Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
33
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
44
* reserved.
55
* Copyright (c) 2020 Triad National Security, LLC. All rights
@@ -345,7 +345,7 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
345345
}
346346

347347
// compute relative locality
348-
relative_locality = opal_hwloc_compute_relative_locality(process_info->cpuset, locality_string);
348+
relative_locality = opal_hwloc_compute_relative_locality(process_info->locality, locality_string);
349349
free(locality_string);
350350

351351
if (relative_locality & OPAL_PROC_ON_SOCKET) {

opal/util/proc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ opal_process_info_t opal_process_info = {
4141
.my_local_rank = 0, /* I'm the only process around here */
4242
.my_node_rank = 0,
4343
.cpuset = NULL,
44+
.locality = NULL,
4445
.pid = 0,
4546
.num_procs = 0,
4647
.app_num = 0,

opal/util/proc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ typedef struct opal_process_info_t {
115115
uint16_t my_local_rank; /**< local rank on this node within my job */
116116
uint16_t my_node_rank;
117117
char *cpuset; /**< String-representation of bitmap where we are bound */
118+
char *locality; /**< String-representation of process locality */
118119
pid_t pid;
119120
uint32_t num_procs;
120121
uint32_t app_num;

0 commit comments

Comments
 (0)