Skip to content

Commit 5111dbd

Browse files
authored
Merge pull request #6493 from rhc54/topic/order
Ensure that nodes are always used in order provided
2 parents ad8c842 + 5aa775c commit 5111dbd

File tree

3 files changed

+124
-92
lines changed

3 files changed

+124
-92
lines changed

orte/mca/rmaps/base/rmaps_base_support_fns.c

Lines changed: 18 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1717
* $COPYRIGHT$
1818
*
@@ -210,18 +210,17 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
210210
return ORTE_ERR_SILENT;
211211
}
212212
/* find the nodes in our node array and assemble them
213-
* in daemon order if the vm was launched
213+
* in list order as that is what the user specified
214214
*/
215-
for (i=0; i < orte_node_pool->size; i++) {
216-
nd = NULL;
217-
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
218-
continue;
219-
}
220-
/* ignore nodes that are non-usable */
221-
if (ORTE_FLAG_TEST(node, ORTE_NODE_NON_USABLE)) {
222-
continue;
223-
}
224-
OPAL_LIST_FOREACH_SAFE(nptr, next, &nodes, orte_node_t) {
215+
OPAL_LIST_FOREACH_SAFE(nptr, next, &nodes, orte_node_t) {
216+
for (i=0; i < orte_node_pool->size; i++) {
217+
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
218+
continue;
219+
}
220+
/* ignore nodes that are non-usable */
221+
if (ORTE_FLAG_TEST(node, ORTE_NODE_NON_USABLE)) {
222+
continue;
223+
}
225224
if (0 != strcmp(node->name, nptr->name)) {
226225
OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
227226
"NODE %s DOESNT MATCH NODE %s",
@@ -266,37 +265,14 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
266265
*/
267266
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
268267
}
269-
if (NULL == nd || NULL == nd->daemon ||
270-
NULL == node->daemon ||
271-
nd->daemon->name.vpid < node->daemon->name.vpid) {
272-
/* just append to end */
273-
opal_list_append(allocated_nodes, &node->super);
274-
nd = node;
275-
} else {
276-
/* starting from end, put this node in daemon-vpid order */
277-
while (node->daemon->name.vpid < nd->daemon->name.vpid) {
278-
if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) {
279-
/* insert at beginning */
280-
opal_list_prepend(allocated_nodes, &node->super);
281-
goto moveon1;
282-
}
283-
nd = (orte_node_t*)opal_list_get_prev(&nd->super);
284-
}
285-
item = opal_list_get_next(&nd->super);
286-
if (item == opal_list_get_end(allocated_nodes)) {
287-
/* we are at the end - just append */
288-
opal_list_append(allocated_nodes, &node->super);
289-
} else {
290-
nd = (orte_node_t*)item;
291-
opal_list_insert_pos(allocated_nodes, item, &node->super);
292-
}
293-
moveon1:
294-
/* reset us back to the end for the next node */
295-
nd = (orte_node_t*)opal_list_get_last(allocated_nodes);
296-
}
297-
opal_list_remove_item(&nodes, (opal_list_item_t*)nptr);
298-
OBJ_RELEASE(nptr);
268+
/* the list is ordered as per user direction using -host
269+
* or the listing in -hostfile - preserve that ordering */
270+
opal_list_append(allocated_nodes, &node->super);
271+
break;
299272
}
273+
/* remove the item from the list as we have allocated it */
274+
opal_list_remove_item(&nodes, (opal_list_item_t*)nptr);
275+
OBJ_RELEASE(nptr);
300276
}
301277
OBJ_DESTRUCT(&nodes);
302278
/* now prune for usage and compute total slots */

orte/util/dash_host/dash_host.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
13-
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
1414
* Copyright (c) 2015 Research Organization for Information Science
1515
* and Technology (RIST). All rights reserved.
1616
* Copyright (c) 2016 IBM Corporation. All rights reserved.
@@ -52,7 +52,8 @@ int orte_util_dash_host_compute_slots(orte_node_t *node, char *hosts)
5252

5353
/* see if this node appears in the list */
5454
for (n=0; NULL != specs[n]; n++) {
55-
if (0 == strncmp(node->name, specs[n], strlen(node->name))) {
55+
if (0 == strncmp(node->name, specs[n], strlen(node->name)) ||
56+
(orte_ifislocal(node->name) && orte_ifislocal(specs[n]))) {
5657
/* check if the #slots was specified */
5758
if (NULL != (cptr = strchr(specs[n], ':'))) {
5859
*cptr = '\0';

orte/util/nidmap.c

Lines changed: 103 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ int orte_util_decode_nidmap(opal_buffer_t *buf)
385385
/* add this name to the pool */
386386
nd = OBJ_NEW(orte_node_t);
387387
nd->name = strdup(names[n]);
388+
nd->index = n;
388389
opal_pointer_array_set_item(orte_node_pool, n, nd);
389390
/* set the topology - always default to homogeneous
390391
* as that is the most common scenario */
@@ -409,7 +410,6 @@ int orte_util_decode_nidmap(opal_buffer_t *buf)
409410
daemons->num_procs++;
410411
opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc);
411412
}
412-
nd->index = proc->name.vpid;
413413
OBJ_RETAIN(nd);
414414
proc->node = nd;
415415
OBJ_RETAIN(proc);
@@ -945,8 +945,9 @@ int orte_util_parse_node_info(opal_buffer_t *buf)
945945
int orte_util_generate_ppn(orte_job_t *jdata,
946946
opal_buffer_t *buf)
947947
{
948-
uint16_t *ppn=NULL;
949-
size_t nbytes;
948+
uint16_t ppn;
949+
uint8_t *bytes;
950+
int32_t nbytes;
950951
int rc = ORTE_SUCCESS;
951952
orte_app_idx_t i;
952953
int j, k;
@@ -955,40 +956,47 @@ int orte_util_generate_ppn(orte_job_t *jdata,
955956
orte_node_t *nptr;
956957
orte_proc_t *proc;
957958
size_t sz;
959+
opal_buffer_t bucket;
958960

959-
/* make room for the number of procs on each node */
960-
nbytes = sizeof(uint16_t) * orte_node_pool->size;
961-
ppn = (uint16_t*)malloc(nbytes);
961+
OBJ_CONSTRUCT(&bucket, opal_buffer_t);
962962

963963
for (i=0; i < jdata->num_apps; i++) {
964-
/* reset the #procs */
965-
memset(ppn, 0, nbytes);
966-
/* for each app_context, compute the #procs on
967-
* each node of the allocation */
968-
for (j=0; j < orte_node_pool->size; j++) {
969-
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) {
964+
/* for each app_context */
965+
for (j=0; j < jdata->map->nodes->size; j++) {
966+
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, j))) {
970967
continue;
971968
}
972969
if (NULL == nptr->daemon) {
973970
continue;
974971
}
972+
ppn = 0;
975973
for (k=0; k < nptr->procs->size; k++) {
976974
if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(nptr->procs, k))) {
977975
if (proc->name.jobid == jdata->jobid) {
978-
++ppn[j];
976+
++ppn;
979977
}
980978
}
981979
}
980+
if (0 < ppn) {
981+
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &nptr->index, 1, ORTE_STD_CNTR))) {
982+
goto cleanup;
983+
}
984+
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &ppn, 1, OPAL_UINT16))) {
985+
goto cleanup;
986+
}
987+
}
982988
}
983-
if (opal_compress.compress_block((uint8_t*)ppn, nbytes,
989+
opal_dss.unload(&bucket, (void**)&bytes, &nbytes);
990+
991+
if (opal_compress.compress_block(bytes, (size_t)nbytes,
984992
(uint8_t**)&bo.bytes, &sz)) {
985993
/* mark that this was compressed */
986994
compressed = true;
987995
bo.size = sz;
988996
} else {
989997
/* mark that this was not compressed */
990998
compressed = false;
991-
bo.bytes = (uint8_t*)ppn;
999+
bo.bytes = bytes;
9921000
bo.size = nbytes;
9931001
}
9941002
/* indicate compression */
@@ -1015,21 +1023,31 @@ int orte_util_generate_ppn(orte_job_t *jdata,
10151023
}
10161024

10171025
cleanup:
1018-
free(ppn);
1026+
OBJ_DESTRUCT(&bucket);
10191027
return rc;
10201028
}
10211029

10221030
int orte_util_decode_ppn(orte_job_t *jdata,
10231031
opal_buffer_t *buf)
10241032
{
1033+
orte_std_cntr_t index;
10251034
orte_app_idx_t n;
1026-
int m, cnt, rc;
1035+
int cnt, rc, m;
10271036
opal_byte_object_t *boptr;
10281037
bool compressed;
1038+
uint8_t *bytes;
10291039
size_t sz;
1030-
uint16_t *ppn, k;
1040+
uint16_t ppn, k;
10311041
orte_node_t *node;
10321042
orte_proc_t *proc;
1043+
opal_buffer_t bucket;
1044+
1045+
/* reset any flags */
1046+
for (m=0; m < orte_node_pool->size; m++) {
1047+
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, m))) {
1048+
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
1049+
}
1050+
}
10331051

10341052
for (n=0; n < jdata->num_apps; n++) {
10351053
/* unpack the compression flag */
@@ -1062,14 +1080,15 @@ int orte_util_decode_ppn(orte_job_t *jdata,
10621080

10631081
/* decompress if required */
10641082
if (compressed) {
1065-
if (!opal_compress.decompress_block((uint8_t**)&ppn, sz,
1083+
if (!opal_compress.decompress_block(&bytes, sz,
10661084
boptr->bytes, boptr->size)) {
10671085
ORTE_ERROR_LOG(ORTE_ERROR);
10681086
OBJ_RELEASE(boptr);
10691087
return ORTE_ERROR;
10701088
}
10711089
} else {
1072-
ppn = (uint16_t*)boptr->bytes;
1090+
bytes = boptr->bytes;
1091+
sz = boptr->size;
10731092
boptr->bytes = NULL;
10741093
boptr->size = 0;
10751094
}
@@ -1078,38 +1097,74 @@ int orte_util_decode_ppn(orte_job_t *jdata,
10781097
}
10791098
free(boptr);
10801099

1081-
/* cycle thru the node pool */
1082-
for (m=0; m < orte_node_pool->size; m++) {
1083-
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, m))) {
1084-
continue;
1100+
/* setup to unpack */
1101+
OBJ_CONSTRUCT(&bucket, opal_buffer_t);
1102+
opal_dss.load(&bucket, bytes, sz);
1103+
1104+
/* unpack each node and its ppn */
1105+
cnt = 1;
1106+
while (OPAL_SUCCESS == (rc = opal_dss.unpack(&bucket, &index, &cnt, ORTE_STD_CNTR))) {
1107+
/* get the corresponding node object */
1108+
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, index))) {
1109+
rc = ORTE_ERR_NOT_FOUND;
1110+
ORTE_ERROR_LOG(rc);
1111+
goto error;
10851112
}
1086-
if (0 < ppn[m]) {
1087-
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
1088-
OBJ_RETAIN(node);
1089-
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
1090-
opal_pointer_array_add(jdata->map->nodes, node);
1091-
}
1092-
/* create a proc object for each one */
1093-
for (k=0; k < ppn[m]; k++) {
1094-
proc = OBJ_NEW(orte_proc_t);
1095-
proc->name.jobid = jdata->jobid;
1096-
/* leave the vpid undefined as this will be determined
1097-
* later when we do the overall ranking */
1098-
proc->app_idx = n;
1099-
proc->parent = node->daemon->name.vpid;
1100-
OBJ_RETAIN(node);
1101-
proc->node = node;
1102-
/* flag the proc as ready for launch */
1103-
proc->state = ORTE_PROC_STATE_INIT;
1104-
opal_pointer_array_add(node->procs, proc);
1105-
/* we will add the proc to the jdata array when we
1106-
* compute its rank */
1107-
}
1108-
node->num_procs += ppn[m];
1113+
/* add the node to the job map if not already assigned */
1114+
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
1115+
OBJ_RETAIN(node);
1116+
opal_pointer_array_add(jdata->map->nodes, node);
1117+
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
1118+
}
1119+
/* get the ppn */
1120+
cnt = 1;
1121+
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&bucket, &ppn, &cnt, OPAL_UINT16))) {
1122+
ORTE_ERROR_LOG(rc);
1123+
goto error;
11091124
}
1125+
/* create a proc object for each one */
1126+
for (k=0; k < ppn; k++) {
1127+
proc = OBJ_NEW(orte_proc_t);
1128+
proc->name.jobid = jdata->jobid;
1129+
/* leave the vpid undefined as this will be determined
1130+
* later when we do the overall ranking */
1131+
proc->app_idx = n;
1132+
proc->parent = node->daemon->name.vpid;
1133+
OBJ_RETAIN(node);
1134+
proc->node = node;
1135+
/* flag the proc as ready for launch */
1136+
proc->state = ORTE_PROC_STATE_INIT;
1137+
opal_pointer_array_add(node->procs, proc);
1138+
node->num_procs++;
1139+
/* we will add the proc to the jdata array when we
1140+
* compute its rank */
1141+
}
1142+
node->num_procs += ppn;
1143+
cnt = 1;
11101144
}
1111-
free(ppn);
1145+
OBJ_DESTRUCT(&bucket);
1146+
}
1147+
if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
1148+
ORTE_ERROR_LOG(rc);
11121149
}
11131150

1151+
/* reset any flags */
1152+
for (m=0; m < jdata->map->nodes->size; m++) {
1153+
node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m);
1154+
if (NULL != node) {
1155+
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
1156+
}
1157+
}
11141158
return ORTE_SUCCESS;
1159+
1160+
error:
1161+
OBJ_DESTRUCT(&bucket);
1162+
/* reset any flags */
1163+
for (m=0; m < jdata->map->nodes->size; m++) {
1164+
node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m);
1165+
if (NULL != node) {
1166+
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
1167+
}
1168+
}
1169+
return rc;
11151170
}

0 commit comments

Comments
 (0)