diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 9fe0b943228..571d4531854 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -85,9 +85,7 @@ static int rte_init(void) { int rc, ret; char *error = NULL; - char *envar, *ev1, *ev2; - uint64_t unique_key[2]; - char *string_key; + char *ev1, *ev2; opal_value_t *kv; char *val; int u32, *u32ptr; @@ -265,19 +263,7 @@ static int rte_init(void) * we can use the jobfam and stepid as unique keys * because they are unique values assigned by the RM */ - if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) { - unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid); - unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid); - if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key); - putenv(envar); - added_transport_keys = true; - /* cannot free the envar as that messes up our environ */ - free(string_key); - } + assert (NULL != getenv(OPAL_MCA_PREFIX"orte_precondition_transports")); /* retrieve our topology */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO, @@ -586,6 +572,12 @@ static int fork_hnp(void) memset(orted_uri, 0, buffer_length); while (chunk == (rc = read(p[0], &orted_uri[num_chars_read], chunk))) { + if (rc < 0 && (EAGAIN == errno || EINTR == errno)) { + continue; + } else { + num_chars_read = 0; + break; + } /* we read an entire buffer - better get more */ num_chars_read += chunk; orted_uri = realloc((void*)orted_uri, buffer_length+ORTE_URI_MSG_LGTH); diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index d975e7d9d6c..3c7bc63db26 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -17,7 +17,7 @@ * et Automatique. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -60,6 +60,7 @@ #include "opal/util/os_path.h" #include "opal/util/printf.h" #include "opal/util/argv.h" +#include "opal/util/fd.h" #include "opal/runtime/opal.h" #include "opal/mca/base/mca_base_var.h" #include "opal/util/daemon_init.h" @@ -74,6 +75,7 @@ #include "orte/util/nidmap.h" #include "orte/util/parse_options.h" #include "orte/mca/rml/base/rml_contact.h" +#include "orte/util/pre_condition_transports.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ess/ess.h" @@ -526,7 +528,7 @@ int orte_daemon(int argc, char *argv[]) orte_node_t *node; orte_app_context_t *app; char *tmp, *nptr, *sysinfo; - char **singenv=NULL; + char **singenv=NULL, *string_key, *env_str; /* setup the singleton's job */ jdata = OBJ_NEW(orte_job_t); @@ -587,6 +589,9 @@ int orte_daemon(int argc, char *argv[]) proc->app_idx = 0; ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_LOCAL); + /* set the ORTE_JOB_TRANSPORT_KEY from the environment */ + orte_pre_condition_transports(jdata); + /* register the singleton's nspace with our PMIx server */ if (ORTE_SUCCESS != (ret = orte_pmix_server_register_nspace(jdata))) { ORTE_ERROR_LOG(ret); @@ -594,9 +599,19 @@ int orte_daemon(int argc, char *argv[]) } /* use setup fork to create the envars needed by the singleton */ if (OPAL_SUCCESS != (ret = opal_pmix.server_setup_fork(&proc->name, &singenv))) { - ORTE_ERROR_LOG(ret); - goto DONE; + ORTE_ERROR_LOG(ret); + goto DONE; + } + + /* append the transport key to the envars needed by the singleton */ + if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_TRANSPORT_KEY, (void**)&string_key, OPAL_STRING) || NULL == string_key) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + goto DONE; } + asprintf(&env_str, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key); + opal_argv_append_nosize(&singenv, env_str); + free(env_str); + nptr = opal_argv_join(singenv, ','); opal_argv_free(singenv); /* create a string that contains our uri + sysinfo + PMIx server URI envars */ @@ -606,7 +621,10 @@ int orte_daemon(int argc, char *argv[]) free(nptr); /* pass that info to the singleton */ - write(orted_globals.uri_pipe, tmp, strlen(tmp)+1); /* need to add 1 to get the NULL */ + if (OPAL_SUCCESS != (ret = opal_fd_write(orted_globals.uri_pipe, strlen(tmp)+1, tmp))) { ; /* need to add 1 to get the NULL */ + ORTE_ERROR_LOG(ret); + goto DONE; + } /* cleanup */ free(tmp);