From 45f23ca5c92633f3053569780bea5576eaa1f3a0 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 26 Sep 2018 10:00:09 -0700 Subject: [PATCH] Update mapping system Correctly transfer job-level mapping directives for dynamically spawned jobs to the mapping system. Signed-off-by: Ralph Castain --- orte/mca/rmaps/base/base.h | 3 ++- orte/mca/rmaps/base/rmaps_base_frame.c | 17 +++++++++++++---- orte/orted/orted_submit.c | 2 +- orte/orted/pmix/pmix_server_dyn.c | 2 +- orte/orted/pmix/pmix_server_gen.c | 16 ++++++++-------- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/orte/mca/rmaps/base/base.h b/orte/mca/rmaps/base/base.h index b893581b13d..fa0915993e5 100644 --- a/orte/mca/rmaps/base/base.h +++ b/orte/mca/rmaps/base/base.h @@ -123,7 +123,8 @@ ORTE_DECLSPEC int orte_rmaps_base_filter_nodes(orte_app_context_t *app, opal_list_t *nodes, bool remove); -ORTE_DECLSPEC int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, +ORTE_DECLSPEC int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata, + orte_mapping_policy_t *policy, char **device, char *spec); ORTE_DECLSPEC int orte_rmaps_base_set_ranking_policy(orte_ranking_policy_t *policy, orte_mapping_policy_t mapping, diff --git a/orte/mca/rmaps/base/rmaps_base_frame.c b/orte/mca/rmaps/base/rmaps_base_frame.c index 9300b338ddd..7f15e075bc9 100644 --- a/orte/mca/rmaps/base/rmaps_base_frame.c +++ b/orte/mca/rmaps/base/rmaps_base_frame.c @@ -296,7 +296,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags) "rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=:PE=N, default =NUMA"); } - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping, + if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(NULL, &orte_rmaps_base.mapping, &orte_rmaps_base.device, rmaps_base_mapping_policy))) { return rc; @@ -593,7 +593,8 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp) return ORTE_ERR_TAKE_NEXT_OPTION; } -int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, +int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata, + orte_mapping_policy_t *policy, char **device, char *inspec) { char *ck; @@ -681,7 +682,11 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, } } /* now save the pattern */ - orte_rmaps_base.ppr = strdup(ck); + if (NULL == jdata || NULL == jdata->map) { + orte_rmaps_base.ppr = strdup(ck); + } else { + jdata->map->ppr = strdup(ck); + } ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR); ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); free(spec); @@ -747,7 +752,11 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, } setpolicy: - *policy = tmp; + if (NULL == jdata || NULL == jdata->map) { + *policy = tmp; + } else { + jdata->map->mapping = tmp; + } return ORTE_SUCCESS; } diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index f835db4ae2f..5b8b0242c32 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -876,7 +876,7 @@ int orte_submit_job(char *argv[], int *index, jdata->map = OBJ_NEW(orte_job_map_t); if (NULL != orte_cmd_options.mapping_policy) { - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, NULL, orte_cmd_options.mapping_policy))) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping, NULL, orte_cmd_options.mapping_policy))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 89b4303ba54..a2f18c4532c 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -308,7 +308,7 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_BAD_PARAM; } - rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, + rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping, NULL, info->data.string); if (ORTE_SUCCESS != rc) { return rc; diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 25244c2a340..648d69557a7 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -489,7 +489,7 @@ static void _query(int sd, short args, void *cbdata) orte_job_t *jdata; orte_proc_t *proct; orte_app_context_t *app; - int rc, i, k, num_replies; + int rc = ORTE_SUCCESS, i, k, num_replies; opal_list_t *results, targets, *array; size_t n; uint32_t key; @@ -716,7 +716,7 @@ static void _query(int sd, short args, void *cbdata) } } if (ORTE_JOBID_INVALID == jobid) { - rc = ORTE_ERR_BAD_PARAM; + rc = ORTE_ERR_NOT_FOUND; goto done; } /* construct a list of values with opal_proc_info_t @@ -810,12 +810,12 @@ static void _query(int sd, short args, void *cbdata) } done: - if (0 == opal_list_get_size(results)) { - rc = ORTE_ERR_NOT_FOUND; - } else if (opal_list_get_size(results) < opal_list_get_size(cd->info)) { - rc = ORTE_ERR_PARTIAL_SUCCESS; - } else { - rc = ORTE_SUCCESS; + if (ORTE_SUCCESS == rc) { + if (0 == opal_list_get_size(results)) { + rc = ORTE_ERR_NOT_FOUND; + } else if (opal_list_get_size(results) < opal_list_get_size(cd->info)) { + rc = ORTE_ERR_PARTIAL_SUCCESS; + } } cd->infocbfunc(rc, results, cd->cbdata, qrel, results); }