Skip to content

Commit d591211

Browse files
committed
sessions:deworldify behavior of pmix pset lookup
It turns out that the existing ompi_instance_group_pmix_pset implementation assumes an MPI_COMM_WORLD type of model. This prevents the ability to use more dynamically generated process sets, possibly using an external agent. Switch to using the pmix pset membership query to find new pset membership. Related to #10862 Related to openpmix/prrte#1906 prrte changes in above referenced PR are necessary for creating groups/communicators from psets defined by --pset option on the mpirun command line. Signed-off-by: Howard Pritchard <[email protected]>
1 parent 5fa32f7 commit d591211

File tree

3 files changed

+69
-56
lines changed

3 files changed

+69
-56
lines changed

3rd-party/openpmix

ompi/instance/instance.c

Lines changed: 67 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,7 +1091,8 @@ int ompi_instance_get_num_psets (ompi_instance_t *instance, int *npset_names)
10911091

10921092
int ompi_instance_get_nth_pset (ompi_instance_t *instance, int n, int *len, char *pset_name)
10931093
{
1094-
if (NULL == ompi_mpi_instance_pmix_psets && n >= ompi_instance_builtin_count) {
1094+
if (NULL == ompi_mpi_instance_pmix_psets ||
1095+
(size_t) n >= (ompi_instance_builtin_count + ompi_mpi_instance_num_pmix_psets)) {
10951096
ompi_instance_refresh_pmix_psets (PMIX_QUERY_PSET_NAMES);
10961097
}
10971098

@@ -1229,71 +1230,83 @@ static int ompi_instance_group_self (ompi_instance_t *instance, ompi_group_t **g
12291230

12301231
static int ompi_instance_group_pmix_pset (ompi_instance_t *instance, const char *pset_name, ompi_group_t **group_out)
12311232
{
1233+
int ret = OMPI_SUCCESS;
1234+
size_t i,n;
1235+
bool isnew, try_again = false, refresh = true;
12321236
pmix_status_t rc;
1233-
pmix_proc_t p;
1234-
ompi_group_t *group;
1235-
pmix_value_t *pval = NULL;
1236-
char *stmp = NULL;
1237-
size_t size = 0;
1238-
1239-
/* make the group large enough to hold world */
1240-
group = ompi_group_allocate (NULL, ompi_process_info.num_procs);
1241-
if (OPAL_UNLIKELY(NULL == group)) {
1242-
return OMPI_ERR_OUT_OF_RESOURCE;
1243-
}
1237+
ompi_group_t *group = NULL;
1238+
pmix_query_t query;
1239+
pmix_info_t *info = NULL;
1240+
size_t ninfo;
1241+
opal_process_name_t pname;
12441242

1243+
PMIX_QUERY_CONSTRUCT(&query);
1244+
PMIX_ARGV_APPEND(rc, query.keys, PMIX_QUERY_PSET_MEMBERSHIP);
1245+
PMIX_INFO_CREATE(query.qualifiers, 1);
1246+
query.nqual = 1;
1247+
PMIX_INFO_LOAD(&query.qualifiers[0], PMIX_PSET_NAME, pset_name, PMIX_STRING);
12451248

1246-
for (size_t i = 0 ; i < ompi_process_info.num_procs ; ++i) {
1247-
opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
1249+
/*
1250+
* First try finding in the local PMIx cache, if not found, try a refresh
1251+
*/
1252+
fn_try_again:
1253+
rc = PMIx_Query_info(&query, 1, &info, &ninfo);
1254+
if (PMIX_SUCCESS != (rc = PMIx_Query_info(&query, 1, &info, &ninfo)) || 0 == ninfo) {
1255+
if ((PMIX_ERR_NOT_FOUND == rc) && (false == try_again)) {
1256+
try_again = true;
1257+
PMIX_QUERY_DESTRUCT(&query);
1258+
PMIX_QUERY_CONSTRUCT(&query);
1259+
PMIX_ARGV_APPEND(rc, query.keys, PMIX_QUERY_PSET_MEMBERSHIP);
1260+
PMIX_INFO_CREATE(query.qualifiers, 2);
1261+
PMIX_INFO_LOAD(&query.qualifiers[0], PMIX_PSET_NAME, pset_name, PMIX_STRING);
1262+
PMIX_INFO_LOAD(&query.qualifiers[1], PMIX_QUERY_REFRESH_CACHE, &refresh, PMIX_BOOL);
1263+
goto fn_try_again;
1264+
}
1265+
ret = opal_pmix_convert_status(rc);
1266+
ompi_instance_print_error ("PMIx_Query_info() failed", ret);
1267+
goto fn_w_query;
1268+
}
12481269

1249-
OPAL_PMIX_CONVERT_NAME(&p, &name);
1250-
rc = PMIx_Get(&p, PMIX_PSET_NAME, NULL, 0, &pval);
1251-
if (OPAL_UNLIKELY(PMIX_SUCCESS != rc)) {
1252-
OBJ_RELEASE(group);
1253-
return opal_pmix_convert_status(rc);
1254-
}
1270+
for(n = 0; n < ninfo; n++){
1271+
if(0 == strcmp(info[n].key, PMIX_QUERY_PSET_MEMBERSHIP)){
1272+
1273+
pmix_data_array_t *data_array = info[n].value.data.darray;
1274+
pmix_proc_t *members_array = (pmix_proc_t*) data_array->array;
12551275

1256-
PMIX_VALUE_UNLOAD(rc,
1257-
pval,
1258-
(void **)&stmp,
1259-
&size);
1260-
if (0 != strcmp (pset_name, stmp)) {
1261-
PMIX_VALUE_RELEASE(pval);
1262-
free(stmp);
1263-
continue;
1264-
}
1265-
PMIX_VALUE_RELEASE(pval);
1266-
free(stmp);
1276+
group = ompi_group_allocate (NULL, data_array->size);
1277+
if (OPAL_UNLIKELY(NULL == group)) {
1278+
ret = OMPI_ERR_OUT_OF_RESOURCE;
1279+
goto fn_w_info;
1280+
}
12671281

1268-
/* look for existing ompi_proc_t that matches this name */
1269-
group->grp_proc_pointers[size] = (ompi_proc_t *) ompi_proc_lookup (name);
1270-
if (NULL == group->grp_proc_pointers[size]) {
1271-
/* set sentinel value */
1272-
group->grp_proc_pointers[size] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name);
1273-
} else {
1274-
OBJ_RETAIN (group->grp_proc_pointers[size]);
1282+
for(i = 0; i < data_array->size; i++){
1283+
OPAL_PMIX_CONVERT_PROCT(ret, &pname, &members_array[i]);
1284+
if (OPAL_SUCCESS == rc) {
1285+
group->grp_proc_pointers[i] = ompi_proc_find_and_add(&pname,&isnew);
1286+
} else {
1287+
ompi_instance_print_error ("OPAL_PMIX_CONVERT_PROCT failed %d", ret);
1288+
ompi_group_free(&group);
1289+
goto fn_w_info;
1290+
}
1291+
}
1292+
break;
12751293
}
1276-
++size;
12771294
}
12781295

1279-
/* shrink the proc array if needed */
1280-
if (size < (size_t) group->grp_proc_count) {
1281-
void *tmp = realloc (group->grp_proc_pointers, size * sizeof (group->grp_proc_pointers[0]));
1282-
if (OPAL_UNLIKELY(NULL == tmp)) {
1283-
OBJ_RELEASE(group);
1284-
return OMPI_ERR_OUT_OF_RESOURCE;
1285-
}
1286-
1287-
group->grp_proc_pointers = (ompi_proc_t **) tmp;
1288-
group->grp_proc_count = (int) size;
1296+
if (NULL != group) {
1297+
ompi_set_group_rank (group, ompi_proc_local());
1298+
group->grp_instance = instance;
1299+
*group_out = group;
1300+
} else {
1301+
ret = OMPI_ERR_NOT_FOUND;
12891302
}
12901303

1291-
ompi_set_group_rank (group, ompi_proc_local());
1292-
1293-
group->grp_instance = instance;
1304+
fn_w_info:
1305+
PMIX_INFO_DESTRUCT(info);
1306+
fn_w_query:
1307+
PMIX_QUERY_DESTRUCT(&query);
12941308

1295-
*group_out = group;
1296-
return OMPI_SUCCESS;
1309+
return ret;
12971310
}
12981311

12991312
static int ompi_instance_get_pmix_pset_size (ompi_instance_t *instance, const char *pset_name, size_t *size_out)

0 commit comments

Comments
 (0)