-
Notifications
You must be signed in to change notification settings - Fork 902
map-by ppr behavior change between 3.1.2 and later releases #6236
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Labels
Comments
This is a bug - please try the inline patch (which is 90% indentation cleanup). I also added more comments to explain the parsing of the input string: diff --git a/orte/mca/rmaps/base/rmaps_base_frame.c b/orte/mca/rmaps/base/rmaps_base_frame.c
index 2b8bdeb4dd..718820a9e6 100644
--- a/orte/mca/rmaps/base/rmaps_base_frame.c
+++ b/orte/mca/rmaps/base/rmaps_base_frame.c
@@ -620,137 +620,137 @@ int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata,
if (NULL == inspec) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
- } else {
- spec = strdup(inspec); // protect the input string
- /* see if a colon was included - if so, then we have a policy + modifier */
- ck = strchr(spec, ':');
- if (NULL != ck) {
- /* if the colon is the first character of the string, then we
- * just have modifiers on the default mapping policy */
- if (ck == spec) {
- ck++;
- opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
- "%s rmaps:base only modifiers %s provided - assuming bysocket mapping",
- ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck);
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
- if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) &&
- ORTE_ERR_BAD_PARAM != rc) {
- free(spec);
- return ORTE_ERR_SILENT;
- }
+ goto setpolicy;
+ }
+
+ spec = strdup(inspec); // protect the input string
+ /* see if a colon was included - if so, then we have a policy + modifier */
+ ck = strchr(spec, ':');
+ if (NULL != ck) {
+ /* if the colon is the first character of the string, then we
+ * just have modifiers on the default mapping policy */
+ if (ck == spec) {
+ ck++; // step over the colon
+ opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
+ "%s rmaps:base only modifiers %s provided - assuming bysocket mapping",
+ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck);
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
+ if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) &&
+ ORTE_ERR_BAD_PARAM != rc) {
free(spec);
- goto setpolicy;
+ return ORTE_ERR_SILENT;
}
- /* split the string */
- *ck = '\0';
- ck++;
- opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
- "%s rmaps:base policy %s modifiers %s provided",
- ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck);
- /* if the policy is "dist", then we set the policy to that value
- * and save the second argument as the device
+ free(spec);
+ goto setpolicy;
+ }
+ *ck = '\0'; // terminate spec where the colon was
+ ck++; // step past the colon
+ opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
+ "%s rmaps:base policy %s modifiers %s provided",
+ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck);
+
+ if (0 == strncasecmp(spec, "ppr", strlen(spec))) {
+ /* at this point, ck points to a string that contains at least
+ * two fields (specifying the #procs/obj and the object we are
+ * to map by). we have to allow additional modifiers here - e.g.,
+ * specifying #pe's/proc or oversubscribe - so check for modifiers. if
+ * they are present, ck will look like "N:obj:mod1,mod2,mod3"
*/
- if (0 == strncasecmp(spec, "ppr", strlen(spec))) {
- /* we have to allow additional modifiers here - e.g., specifying
- * #pe's/proc or oversubscribe - so check for modifiers
+ if (NULL == (ptr = strchr(ck, ':'))) {
+ /* this is an error - there had to be at least one
+ * colon to delimit the number from the object type
*/
- if (NULL == (ptr = strrchr(ck, ':'))) {
- /* this is an error - there had to be at least one
- * colon to delimit the number from the object type
- */
- orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec);
+ orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec);
+ free(spec);
+ return ORTE_ERR_SILENT;
+ }
+ ptr++; // move past the colon
+ /* at this point, ptr is pointing to the beginning of the string that describes
+ * the object plus any modifiers (i.e., "obj:mod1,mod2". We first check to see if there
+ * is another colon indicating that there are modifiers to the request */
+ if (NULL != (cptr = strchr(ptr, ':'))) {
+ /* there are modifiers, so we terminate the object string
+ * at the location of the colon */
+ *cptr = '\0';
+ /* step over that colon */
+ cptr++;
+ /* now check for modifiers - may be none, so
+ * don't emit an error message if the modifier
+ * isn't recognized */
+ if (ORTE_ERR_SILENT == (rc = check_modifiers(cptr, &tmp)) &&
+ ORTE_ERR_BAD_PARAM != rc) {
free(spec);
return ORTE_ERR_SILENT;
}
- ptr++; // move past the colon
- /* at this point, ck is pointing to the number of procs/object
- * and ptr is pointing to the beginning of the string that describes
- * the object plus any modifiers. We first check to see if there
- * is a comma indicating that there are modifiers to the request */
- if (NULL != (cptr = strchr(ptr, ','))) {
- /* there are modifiers, so we terminate the object string
- * at the location of the first comma */
- *cptr = '\0';
- /* step over that comma */
- cptr++;
- /* now check for modifiers - may be none, so
- * don't emit an error message if the modifier
- * isn't recognized */
- if (ORTE_ERR_SILENT == (rc = check_modifiers(cptr, &tmp)) &&
- ORTE_ERR_BAD_PARAM != rc) {
- free(spec);
- return ORTE_ERR_SILENT;
- }
- }
- /* now save the pattern */
- if (NULL == jdata || NULL == jdata->map) {
- orte_rmaps_base.ppr = strdup(ck);
- } else {
- jdata->map->ppr = strdup(ck);
- }
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR);
- ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
- free(spec);
- goto setpolicy;
}
- if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) &&
- ORTE_ERR_TAKE_NEXT_OPTION != rc) {
- if (ORTE_ERR_BAD_PARAM == rc) {
- orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec);
- }
- free(spec);
- return rc;
+ /* now save the pattern */
+ if (NULL == jdata || NULL == jdata->map) {
+ orte_rmaps_base.ppr = strdup(ck);
+ } else {
+ jdata->map->ppr = strdup(ck);
}
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR);
+ ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
+ free(spec);
+ goto setpolicy;
}
- len = strlen(spec);
- if (0 == strncasecmp(spec, "slot", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
- } else if (0 == strncasecmp(spec, "node", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
- } else if (0 == strncasecmp(spec, "seq", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ);
- } else if (0 == strncasecmp(spec, "core", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
- } else if (0 == strncasecmp(spec, "l1cache", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE);
- } else if (0 == strncasecmp(spec, "l2cache", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE);
- } else if (0 == strncasecmp(spec, "l3cache", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE);
- } else if (0 == strncasecmp(spec, "socket", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
- } else if (0 == strncasecmp(spec, "numa", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA);
- } else if (0 == strncasecmp(spec, "board", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD);
- } else if (0 == strncasecmp(spec, "hwthread", len)) {
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD);
- /* if we are mapping processes to individual hwthreads, then
- * we need to treat those hwthreads as separate cpus
- */
- opal_hwloc_use_hwthreads_as_cpus = true;
- } else if (0 == strncasecmp(spec, "dist", len)) {
- if (NULL != rmaps_dist_device) {
- if (NULL != (pch = strchr(rmaps_dist_device, ':'))) {
- *pch = '\0';
- }
- if (NULL != device) {
- *device = strdup(rmaps_dist_device);
- }
- ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST);
- } else {
- orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true);
- free(spec);
- return ORTE_ERR_SILENT;
+ if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) &&
+ ORTE_ERR_TAKE_NEXT_OPTION != rc) {
+ if (ORTE_ERR_BAD_PARAM == rc) {
+ orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec);
+ }
+ free(spec);
+ return rc;
+ }
+ }
+ len = strlen(spec);
+ if (0 == strncasecmp(spec, "slot", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
+ } else if (0 == strncasecmp(spec, "node", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
+ } else if (0 == strncasecmp(spec, "seq", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ);
+ } else if (0 == strncasecmp(spec, "core", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
+ } else if (0 == strncasecmp(spec, "l1cache", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE);
+ } else if (0 == strncasecmp(spec, "l2cache", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE);
+ } else if (0 == strncasecmp(spec, "l3cache", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE);
+ } else if (0 == strncasecmp(spec, "socket", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
+ } else if (0 == strncasecmp(spec, "numa", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA);
+ } else if (0 == strncasecmp(spec, "board", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD);
+ } else if (0 == strncasecmp(spec, "hwthread", len)) {
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD);
+ /* if we are mapping processes to individual hwthreads, then
+ * we need to treat those hwthreads as separate cpus
+ */
+ opal_hwloc_use_hwthreads_as_cpus = true;
+ } else if (0 == strncasecmp(spec, "dist", len)) {
+ if (NULL != rmaps_dist_device) {
+ if (NULL != (pch = strchr(rmaps_dist_device, ':'))) {
+ *pch = '\0';
+ }
+ if (NULL != device) {
+ *device = strdup(rmaps_dist_device);
}
+ ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST);
} else {
- orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec);
+ orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true);
free(spec);
return ORTE_ERR_SILENT;
}
+ } else {
+ orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec);
free(spec);
- ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
+ return ORTE_ERR_SILENT;
}
+ free(spec);
+ ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
setpolicy:
if (NULL == jdata || NULL == jdata->map) { |
Actually, I'll just give you a PR to test - easier that way. |
closing. we're long past 3.1.x releases. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A user is reporting a problem with using a ppr mapping specification option which appeared to work pre OMPI 3.1.3 release.
Here's what the user was using for placement options (simplified) which works with 3.1.2 and older
versions of Open MPI:
but launching the job using these options with 3.1.3 (or master or 4.0.x) gives:
Now if the user replaces the above command line options with
the command seems to work.
If the user uses the
,
for 3.1.2 or older however, she gets a similar error message from mpirun.This change in behavior was due to 376d408.
So, the question is, which notation is correct? Its not clear from the mpirun man page why one should use a
,
starting with 3.1.3 but use a:
for 3.1.2 and older releases.The text was updated successfully, but these errors were encountered: