diff --git a/config/opal_check_singularity.m4 b/config/opal_check_singularity.m4 new file mode 100644 index 00000000000..1b0c38bd457 --- /dev/null +++ b/config/opal_check_singularity.m4 @@ -0,0 +1,60 @@ +# -*- shell-script ; indent-tabs-mode:nil -*- +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OPAL_CHECK_SINGULARITY(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +AC_DEFUN([OPAL_CHECK_SINGULARITY],[ + OPAL_VAR_SCOPE_PUSH([spath have_singularity]) + + AC_ARG_WITH([singularity], + [AC_HELP_STRING([--with-singularity(=DIR)], + [Build support for the Singularity container, optionally adding DIR to the search path])]) + spath= + AC_MSG_CHECKING([if Singularity support is to be built]) + AS_IF([test "$with_singularity" = "no"], + [AC_MSG_RESULT([no]) + have_singularity=no], + [AC_MSG_RESULT([yes]) + AS_IF([test -z "$with_singularity" || test "$with_singularity" = "yes"], + [ # look for the singularity command in the default path + AC_CHECK_PROG([SINGULARITY], [singularity], [singularity]) + AS_IF([test "$SINGULARITY" != ""], + [have_singularity=yes], + [AS_IF([test "$with_singularity" = "yes"], + [AC_MSG_WARN([Singularity support requested, but required executable]) + AC_MSG_WARN(["singularity" not found in default locations]) + AC_MSG_ERROR([Cannot continue])]) + have_singularity=no])], + [ AC_MSG_CHECKING([for existence of $with_singularity/bin]) + # look for the singularity command in the bin subdirectory + AS_IF([test ! -d "$with_singularity/bin"], + [AC_MSG_RESULT([not found]) + AC_MSG_WARN([Directory $with_singularity/bin not found]) + AC_MSG_ERROR([Cannot continue])], + [AC_MSG_RESULT([found])]) + save_path=$PATH + PATH=$with_singularity/bin:$PATH + AC_CHECK_PROG([SINGULARITY], [singularity], [singularity]) + AS_IF([test "$SINGULARITY" != ""], + [have_singularity=yes + spath=$with_singularity/bin], + [have_singlarity=no + AC_MSG_WARN([Singularity support requested, but required executable]) + AC_MSG_WARN(["singularity" not found in either default or specified path]) + AC_MSG_ERROR([Cannot continue])]) + PATH=$save_path + ] + )]) + + AC_DEFINE_UNQUOTED(OPAL_SINGULARITY_PATH, "$spath", [Path to Singularity binaries]) + AS_IF([test "$have_singularity" = "yes"], + [$2], [$3]) + OPAL_VAR_SCOPE_POP +]) diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index ab121720431..4404f1ba4e4 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -76,6 +76,9 @@ int orte_ess_base_proc_binding(void) /* we were not bound at launch */ if (NULL == opal_hwloc_topology) { /* there is nothing we can do, so just return */ + OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, + "%s NULL topology - Binding not supported", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); return ORTE_SUCCESS; } support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology); @@ -115,6 +118,9 @@ int orte_ess_base_proc_binding(void) /* the system is capable of doing processor affinity, but it * has not yet been set - see if a slot_list was given */ + OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, + "%s Binding available - computing", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); hwloc_bitmap_zero(cpus); if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list, @@ -269,7 +275,7 @@ int orte_ess_base_proc_binding(void) orte_process_info.cpuset = NULL; } if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { - opal_output(0, "MCW rank %d is not bound", + opal_output(0, "MCW rank %d is not bound - get_cpubind returned negative", ORTE_PROC_MY_NAME->vpid); } } else { diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index b6b5b6352c5..6e5b418a7bb 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -14,7 +14,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,7 +56,6 @@ #include "orte/mca/plm/base/base.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/schizo/base/base.h" #include "orte/mca/filem/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" @@ -582,18 +581,6 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } - /* setup the SCHIZO framework */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_select"; - goto error; - } - return ORTE_SUCCESS; error: @@ -625,7 +612,6 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&opal_pmix_base_framework); /* close frameworks */ - (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_filem_base_framework); (void) mca_base_framework_close(&orte_grpcomm_base_framework); (void) mca_base_framework_close(&orte_iof_base_framework); diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index c48f85a2b72..4615eba2f83 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -47,7 +47,6 @@ #include "orte/mca/errmgr/base/base.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/state/base/base.h" -#include "orte/mca/schizo/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/show_help.h" @@ -188,18 +187,6 @@ int orte_ess_base_tool_setup(void) * base proxy functions */ } - /* setup schizo in case we are parsing cmd lines */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_base_select"; - goto error; - } - return ORTE_SUCCESS; error: @@ -223,7 +210,6 @@ int orte_ess_base_tool_finalize(void) } (void) mca_base_framework_close(&orte_routed_base_framework); (void) mca_base_framework_close(&orte_rml_base_framework); - (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); return ORTE_SUCCESS; diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 0fdbbd935a7..ad55b9d82ff 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,7 +68,6 @@ #include "orte/mca/odls/base/base.h" #include "orte/mca/rmaps/base/base.h" #include "orte/mca/filem/base/base.h" -#include "orte/mca/schizo/base/base.h" #include "orte/mca/state/base/base.h" #include "orte/mca/state/state.h" @@ -694,18 +693,6 @@ static int rte_init(void) goto error; } - /* setup the schizo framework */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_select"; - goto error; - } - /* if a tool has launched us and is requesting event reports, * then set its contact info into the comm system */ @@ -774,7 +761,6 @@ static int rte_finalize(void) /* cleanup our data server */ orte_data_server_finalize(); - (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_dfs_base_framework); (void) mca_base_framework_close(&orte_filem_base_framework); /* output any lingering stdout/err data */ diff --git a/orte/mca/ess/pmi/ess_pmi_component.c b/orte/mca/ess/pmi/ess_pmi_component.c index b877ed36439..adabefe8d32 100644 --- a/orte/mca/ess/pmi/ess_pmi_component.c +++ b/orte/mca/ess/pmi/ess_pmi_component.c @@ -3,7 +3,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All * rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,6 +26,7 @@ #include "orte/util/proc_info.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/schizo/schizo.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/pmi/ess_pmi.h" @@ -67,41 +68,27 @@ static int pmi_component_open(void) static int pmi_component_query(mca_base_module_t **module, int *priority) { - int ret; - - /* all APPS must use pmix */ - if (ORTE_PROC_IS_APP) { - if (NULL == opal_pmix.initialized) { - /* open and setup pmix */ - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - *priority = -1; - *module = NULL; - return ret; - } - if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { - /* don't error log this as it might not be an error at all */ - *priority = -1; - *module = NULL; - (void) mca_base_framework_close(&opal_pmix_base_framework); - return ret; - } - } - if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { - /* we cannot be in a PMI environment */ - *priority = -1; - *module = NULL; - return ORTE_ERROR; - } - *priority = 35; - *module = (mca_base_module_t *)&orte_ess_pmi_module; - return ORTE_SUCCESS; + orte_schizo_launch_environ_t ret; + + if (!ORTE_PROC_IS_APP) { + *module = NULL; + *priority = 0; + return ORTE_ERROR; + } + + /* find out what our environment looks like */ + ret = orte_schizo.check_launch_environment(); + if (ORTE_SCHIZO_UNMANAGED_SINGLETON == ret || + ORTE_SCHIZO_MANAGED_SINGLETON == ret) { + /* not us */ + *module = NULL; + *priority = 0; + return ORTE_ERROR; } - /* we can't run */ - *priority = -1; - *module = NULL; - return ORTE_ERROR; + *priority = 35; + *module = (mca_base_module_t *)&orte_ess_pmi_module; + return ORTE_SUCCESS; } diff --git a/orte/mca/ess/singleton/ess_singleton_component.c b/orte/mca/ess/singleton/ess_singleton_component.c index 05322bbd3a1..bb20759f138 100644 --- a/orte/mca/ess/singleton/ess_singleton_component.c +++ b/orte/mca/ess/singleton/ess_singleton_component.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +34,7 @@ #include "orte/util/proc_info.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/schizo/schizo.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/singleton/ess_singleton.h" @@ -97,7 +99,7 @@ orte_ess_singleton_component_open(void) int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority) { - int ret; + orte_schizo_launch_environ_t ret; /* if we are an HNP, daemon, or tool, then we * are definitely not a singleton! @@ -109,45 +111,18 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority return ORTE_ERROR; } - /* okay, we still could be a singleton or - * an application process. If we have been - * given an HNP URI, then we are definitely - * not a singleton - */ - if (NULL != orte_process_info.my_hnp_uri) { - *module = NULL; - return ORTE_ERROR; - } - - /* open and setup pmix */ - if (NULL == opal_pmix.initialized) { - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { - /* if PMIx is not available, then we are indeed a singleton */ - goto single; - } - if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { - /* if PMIx is not available, then we are indeed a singleton */ - (void) mca_base_framework_close(&opal_pmix_base_framework); - goto single; - } - } - if (opal_pmix.initialized()) { - /* we are in a PMI environment and are therefore - * not a singleton */ - *priority = -1; + /* find out what our environment looks like */ + ret = orte_schizo.check_launch_environment(); + if (ORTE_SCHIZO_UNMANAGED_SINGLETON != ret && + ORTE_SCHIZO_MANAGED_SINGLETON != ret) { + /* not us */ *module = NULL; + *priority = 0; return ORTE_ERROR; } - single: - /* okay, we could still be an application process, - * but launched in "standalone" mode - i.e., directly - * launched by an environment instead of via mpirun. - * We need to set our priority low so that any enviro - * component will override us. If they don't, then we - * want to be selected as we must be a singleton - */ - *priority = 25; + /* okay, we want to be selected as we must be a singleton */ + *priority = 100; *module = (mca_base_module_t *)&orte_ess_singleton_module; return ORTE_SUCCESS; } diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 1d30fb88da2..bec96958c98 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -12,9 +12,11 @@ * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,10 +42,10 @@ #include "opal/util/argv.h" #include "opal/util/opal_environ.h" #include "opal/util/path.h" +#include "opal/runtime/opal_progress_threads.h" #include "opal/mca/installdirs/installdirs.h" #include "opal/mca/pmix/base/base.h" #include "opal/mca/pmix/pmix.h" -#include "opal/runtime/opal_progress_threads.h" #include "orte/util/show_help.h" #include "orte/util/proc_info.h" @@ -74,7 +76,6 @@ static bool added_transport_keys=false; static bool added_num_procs = false; static bool added_app_ctx = false; static bool added_pmix_envs = false; -static char *pmixenvars[4]; static bool progress_thread_running = false; static int fork_hnp(void); @@ -83,13 +84,11 @@ static int rte_init(void) { int rc, ret; char *error = NULL; - char *envar, *ev1, *ev2; - uint64_t unique_key[2]; - char *string_key; opal_value_t *kv; - char *val; + char *val = NULL; int u32, *u32ptr; uint16_t u16, *u16ptr; + orte_process_name_t name; /* run the prolog */ if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) { @@ -156,7 +155,10 @@ static int rte_init(void) ORTE_PROC_MY_NAME->vpid = 0; /* for convenience, push the pubsub version of this param into the environ */ - opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, 1, &environ); + opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, true, &environ); + } else if (NULL != getenv("SINGULARITY_CONTAINER")) { + /* ensure we use the isolated pmix component */ + opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ); } else { /* we want to use PMIX_NAMESPACE that will be sent by the hnp as a jobid */ opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &environ); @@ -166,6 +168,7 @@ static int rte_init(void) return rc; } /* our name was given to us by the HNP */ + opal_setenv (OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ); } /* get an async event base - we use the opal_async one so @@ -174,19 +177,20 @@ static int rte_init(void) progress_thread_running = true; /* open and setup pmix */ - if (NULL == opal_pmix.initialized) { - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { - error = "opening pmix"; - goto error; - } - if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { - error = "select pmix"; - goto error; - } + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { + error = "opening pmix"; + goto error; } + if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { + error = "select pmix"; + goto error; + } + /* set the event base */ + opal_pmix_base_set_evbase(orte_event_base); /* initialize the selected module */ if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { - error = "init pmix"; + /* we cannot run */ + error = "pmix init"; goto error; } @@ -194,6 +198,8 @@ static int rte_init(void) * so carry it forward here */ ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; + name.jobid = OPAL_PROC_MY_NAME.jobid; + name.vpid = ORTE_VPID_WILDCARD; /* get our local rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK, @@ -213,25 +219,32 @@ static int rte_init(void) } orte_process_info.my_node_rank = u16; - /* get universe size */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_UNIV_SIZE, - ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + /* get max procs */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS, + &name, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS != ret) { - error = "getting univ size"; + error = "getting max procs"; goto error; } - orte_process_info.num_procs = u32; + orte_process_info.max_procs = u32; + + /* we are a singleton, so there is only one proc in the job */ + orte_process_info.num_procs = 1; /* push into the environ for pickup in MPI layer for * MPI-3 required info key */ if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) { - asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs); - putenv(ev1); + char * num_procs; + asprintf(&num_procs, "%d", orte_process_info.num_procs); + opal_setenv(OPAL_MCA_PREFIX"orte_ess_num_procs", num_procs, true, &environ); + free(num_procs); added_num_procs = true; } if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) { - asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs); - putenv(ev2); + char * num_procs; + asprintf(&num_procs, "%d", orte_process_info.num_procs); + opal_setenv("OMPI_APP_CTX_NUM_PROCS", num_procs, true, &environ); + free(num_procs); added_app_ctx = true; } @@ -247,27 +260,9 @@ static int rte_init(void) /* set some other standard values */ orte_process_info.num_local_peers = 0; - /* setup transport keys in case the MPI layer needs them - - * we can use the jobfam and stepid as unique keys - * because they are unique values assigned by the RM - */ - if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) { - unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid); - unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid); - if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key); - putenv(envar); - added_transport_keys = true; - /* cannot free the envar as that messes up our environ */ - free(string_key); - } - /* retrieve our topology */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO, - ORTE_PROC_MY_NAME, &val, OPAL_STRING); + &name, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { /* load the topology */ if (0 != hwloc_topology_init(&opal_hwloc_topology)) { @@ -374,7 +369,6 @@ static int rte_finalize(void) unsetenv("PMIX_SERVER_URI"); unsetenv("PMIX_SECURITY_MODE"); } - /* use the default procedure to finish */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { ORTE_ERROR_LOG(ret); @@ -391,7 +385,6 @@ static int rte_finalize(void) opal_progress_thread_finalize(NULL); progress_thread_running = false; } - return ret; } @@ -498,6 +491,16 @@ static int fork_hnp(void) opal_argv_append(&argc, &argv, "state_novm_select"); opal_argv_append(&argc, &argv, "1"); + /* direct the selection of the ess component */ + opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(&argc, &argv, "ess"); + opal_argv_append(&argc, &argv, "hnp"); + + /* direct the selection of the pmix component */ + opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(&argc, &argv, "pmix"); + opal_argv_append(&argc, &argv, "^s1,s2,cray,isolated"); + /* Fork off the child */ orte_process_info.hnp_pid = fork(); if(orte_process_info.hnp_pid < 0) { @@ -546,6 +549,8 @@ static int fork_hnp(void) exit(1); } else { + int count; + free(cmd); /* I am the parent - wait to hear something back and * report results @@ -561,14 +566,24 @@ static int fork_hnp(void) orted_uri = (char*)malloc(buffer_length); memset(orted_uri, 0, buffer_length); - while (chunk == (rc = read(p[0], &orted_uri[num_chars_read], chunk))) { - /* we read an entire buffer - better get more */ - num_chars_read += chunk; - orted_uri = realloc((void*)orted_uri, buffer_length+ORTE_URI_MSG_LGTH); - memset(&orted_uri[buffer_length], 0, ORTE_URI_MSG_LGTH); - buffer_length += ORTE_URI_MSG_LGTH; + while (0 != (rc = read(p[0], &orted_uri[num_chars_read], chunk))) { + if (rc < 0 && (EAGAIN == errno || EINTR == errno)) { + continue; + } else if (rc < 0) { + num_chars_read = -1; + break; + } + /* we read something - better get more */ + num_chars_read += rc; + chunk -= rc; + if (0 == chunk) { + chunk = ORTE_URI_MSG_LGTH; + orted_uri = realloc((void*)orted_uri, buffer_length+chunk); + memset(&orted_uri[buffer_length], 0, chunk); + buffer_length += chunk; + } } - num_chars_read += rc; + close(p[0]); if (num_chars_read <= 0) { /* we didn't get anything back - this is bad */ @@ -612,14 +627,13 @@ static int fork_hnp(void) /* split the pmix_uri into its parts */ argv = opal_argv_split(cptr, ','); - if (4 != opal_argv_count(argv)) { - opal_argv_free(argv); - return ORTE_ERR_BAD_PARAM; - } + count = opal_argv_count(argv); /* push each piece into the environment */ - for (i=0; i < 4; i++) { - pmixenvars[i] = strdup(argv[i]); - putenv(pmixenvars[i]); + for (i=0; i < count; i++) { + char *c = strchr(argv[i], '='); + assert(NULL != c); + *c++ = '\0'; + opal_setenv(argv[i], c, true, &environ); } opal_argv_free(argv); added_pmix_envs = true; diff --git a/orte/mca/schizo/alps/Makefile.am b/orte/mca/schizo/alps/Makefile.am new file mode 100644 index 00000000000..05d47a75815 --- /dev/null +++ b/orte/mca/schizo/alps/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + schizo_alps_component.c \ + schizo_alps.h \ + schizo_alps.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_schizo_alps_DSO +component_noinst = +component_install = mca_schizo_alps.la +else +component_noinst = libmca_schizo_alps.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_schizo_alps_la_SOURCES = $(sources) +mca_schizo_alps_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_schizo_alps_la_SOURCES = $(sources) +libmca_schizo_alps_la_LDFLAGS = -module -avoid-version + diff --git a/orte/mca/schizo/alps/configure.m4 b/orte/mca/schizo/alps/configure.m4 new file mode 100644 index 00000000000..0bcd85a524f --- /dev/null +++ b/orte/mca/schizo/alps/configure.m4 @@ -0,0 +1,47 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008 UT-Battelle, LLC +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_schizo_alps_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_orte_schizo_alps_CONFIG],[ + AC_CONFIG_FILES([orte/mca/schizo/alps/Makefile]) + + ORTE_CHECK_ALPS([schizo_alps], [schizo_alps_happy="yes"], [schizo_alps_happy="no"]) + + # check for alps/apInfo.h + # save current CPPFLAGS + MCA_orte_schizo_save_CPPFLAGS="$CPPFLAGS" + + # add flags obtained from ORTE_CHECK_ALPS + CPPFLAGS="$CPPFLAGS $schizo_alps_CPPFLAGS" + + AC_CHECK_HEADERS([alps/apInfo.h], [], [schizo_alps_happy="no"]) + + # restore CPPFLAGS + CPPFLAGS="$MCA_orte_schizo_save_CPPFLAGS" + + AC_SUBST([schizo_alps_CPPFLAGS]) + + AS_IF([test "$schizo_alps_happy" = "yes"], [$1], [$2]) +])dnl diff --git a/orte/mca/schizo/alps/owner.txt b/orte/mca/schizo/alps/owner.txt new file mode 100644 index 00000000000..85b4416d206 --- /dev/null +++ b/orte/mca/schizo/alps/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: INTEL +status: active diff --git a/orte/mca/schizo/alps/schizo_alps.c b/orte/mca/schizo/alps/schizo_alps.c new file mode 100644 index 00000000000..b20ab9185d4 --- /dev/null +++ b/orte/mca/schizo/alps/schizo_alps.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/mca/schizo/base/base.h" + +#include "schizo_alps.h" + +static orte_schizo_launch_environ_t check_launch_environment(void); +static void finalize(void); + +orte_schizo_base_module_t orte_schizo_alps_module = { + .check_launch_environment = check_launch_environment, + .finalize = finalize +}; + +static char **pushed_envs = NULL; +static char **pushed_vals = NULL; +static orte_schizo_launch_environ_t myenv; +static bool myenvdefined = false; + +static orte_schizo_launch_environ_t check_launch_environment(void) +{ + int i; + const char proc_job_file[]="/proc/job"; + FILE *fd = NULL, *fd_task_is_app = NULL; + char task_is_app_fname[PATH_MAX]; + + if (myenvdefined) { + return myenv; + } + myenvdefined = true; + + /* we were only selected because we are an app, + * so no need to further check that here. Instead, + * see if we were direct launched vs launched via mpirun */ + if (NULL != orte_process_info.my_daemon_uri) { + /* nope */ + myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + goto setup; + } + + /* see if we are running in a Cray PAGG container */ + fd = fopen(proc_job_file, "r"); + if (NULL == fd) { + /* we are a singleton */ + myenv = ORTE_SCHIZO_MANAGED_SINGLETON; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "singleton"); + } else { + if (NULL != orte_process_info.my_daemon_uri) { + myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; + } else { + myenv = ORTE_SCHIZO_DIRECT_LAUNCHED; + } + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + snprintf(task_is_app_fname,sizeof(task_is_app_fname), + "/proc/self/task/%ld/task_is_app",syscall(SYS_gettid)); + fd_task_is_app = fopen(task_is_app_fname, "r"); + if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, + and we are an app task (not just a process + running on a mom node, for example) */ + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"pmix"); + opal_argv_append_nosize(&pushed_vals, "cray"); + } + fclose(fd); + } + + setup: + opal_output_verbose(1, orte_schizo_base_framework.framework_output, + "schizo:alps DECLARED AS %s", orte_schizo_base_print_env(myenv)); + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ); + } + } + + return myenv; +} + +static void finalize(void) +{ + int i; + + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_unsetenv(pushed_envs[i], &environ); + } + opal_argv_free(pushed_envs); + opal_argv_free(pushed_vals); + } +} diff --git a/orte/mca/schizo/alps/schizo_alps.h b/orte/mca/schizo/alps/schizo_alps.h new file mode 100644 index 00000000000..2e2d8ffb758 --- /dev/null +++ b/orte/mca/schizo/alps/schizo_alps.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_SCHIZO_ALPS_H_ +#define _MCA_SCHIZO_ALPS_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/schizo/schizo.h" + + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_alps_component; +extern orte_schizo_base_module_t orte_schizo_alps_module; + +END_C_DECLS + +#endif /* MCA_SCHIZO_ALPS_H_ */ + diff --git a/orte/mca/schizo/alps/schizo_alps_component.c b/orte/mca/schizo/alps/schizo_alps_component.c new file mode 100644 index 00000000000..c454c9d00e1 --- /dev/null +++ b/orte/mca/schizo/alps/schizo_alps_component.c @@ -0,0 +1,53 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/schizo/schizo.h" +#include "schizo_alps.h" + +static int component_query(mca_base_module_t **module, int *priority); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_schizo_base_component_t mca_schizo_alps_component = { + .base_version = { + MCA_SCHIZO_BASE_VERSION_1_0_0, + .mca_component_name = "alps", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + /* if we are not an app, then don't bother */ + if (!ORTE_PROC_IS_APP) { + *priority = 0; + *module = NULL; + return ORTE_ERROR; + } + + /* since we were built, assume we are on an alps system */ + *priority = 90; + *module = (mca_base_module_t *)&orte_schizo_alps_module; + return ORTE_SUCCESS; +} + diff --git a/orte/mca/schizo/base/base.h b/orte/mca/schizo/base/base.h index 2f22ebb8752..3afd17f903a 100644 --- a/orte/mca/schizo/base/base.h +++ b/orte/mca/schizo/base/base.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,6 +60,7 @@ typedef struct { OBJ_CLASS_DECLARATION(orte_schizo_base_active_module_t); /* the base stub functions */ +ORTE_DECLSPEC const char* orte_schizo_base_print_env(orte_schizo_launch_environ_t env); ORTE_DECLSPEC int orte_schizo_base_parse_cli(char *personality, int argc, int start, char **argv); ORTE_DECLSPEC int orte_schizo_base_parse_env(char *personality, @@ -72,6 +73,8 @@ ORTE_DECLSPEC int orte_schizo_base_setup_fork(orte_job_t *jdata, ORTE_DECLSPEC int orte_schizo_base_setup_child(orte_job_t *jobdat, orte_proc_t *child, orte_app_context_t *app); +ORTE_DECLSPEC orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void); +ORTE_DECLSPEC void orte_schizo_base_finalize(void); END_C_DECLS diff --git a/orte/mca/schizo/base/schizo_base_frame.c b/orte/mca/schizo/base/schizo_base_frame.c index 5bdd4cf3e18..2752ac26d4f 100644 --- a/orte/mca/schizo/base/schizo_base_frame.c +++ b/orte/mca/schizo/base/schizo_base_frame.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -37,10 +37,12 @@ */ orte_schizo_base_t orte_schizo_base = {{{0}}}; orte_schizo_base_module_t orte_schizo = { - orte_schizo_base_parse_cli, - orte_schizo_base_parse_env, - orte_schizo_base_setup_fork, - orte_schizo_base_setup_child + .parse_cli = orte_schizo_base_parse_cli, + .parse_env = orte_schizo_base_parse_env, + .setup_fork = orte_schizo_base_setup_fork, + .setup_child = orte_schizo_base_setup_child, + .check_launch_environment = orte_schizo_base_check_launch_environment, + .finalize = orte_schizo_base_finalize }; static int orte_schizo_base_close(void) diff --git a/orte/mca/schizo/base/schizo_base_stubs.c b/orte/mca/schizo/base/schizo_base_stubs.c index a2e5fe1bf2c..c3e40962545 100644 --- a/orte/mca/schizo/base/schizo_base_stubs.c +++ b/orte/mca/schizo/base/schizo_base_stubs.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,6 +15,24 @@ #include "opal/class/opal_list.h" #include "orte/mca/schizo/base/base.h" + const char* orte_schizo_base_print_env(orte_schizo_launch_environ_t env) + { + switch(env) { + case ORTE_SCHIZO_UNDETERMINED: + return "UNDETERMINED"; + case ORTE_SCHIZO_NATIVE_LAUNCHED: + return "NATIVE_LAUNCHED"; + case ORTE_SCHIZO_UNMANAGED_SINGLETON: + return "UNMANAGED_SINGLETON"; + case ORTE_SCHIZO_DIRECT_LAUNCHED: + return "DIRECT_LAUNCHED"; + case ORTE_SCHIZO_MANAGED_SINGLETON: + return "MANAGED_SINGLETON"; + default: + return "INVALID_CODE"; + } +} + int orte_schizo_base_parse_cli(char *personality, int argc, int start, char **argv) { @@ -91,3 +109,30 @@ int orte_schizo_base_setup_child(orte_job_t *jdata, } return ORTE_ERR_NOT_SUPPORTED; } + +orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void) +{ + orte_schizo_launch_environ_t rc; + orte_schizo_base_active_module_t *mod; + + OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { + if (NULL != mod->module->check_launch_environment) { + rc = mod->module->check_launch_environment(); + if (ORTE_SCHIZO_UNDETERMINED != rc) { + return rc; + } + } + } + return ORTE_SCHIZO_UNDETERMINED; +} + +void orte_schizo_base_finalize(void) +{ + orte_schizo_base_active_module_t *mod; + + OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { + if (NULL != mod->module->finalize) { + mod->module->finalize(); + } + } +} diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index 27b7f7af6c0..7303f928297 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -63,10 +63,10 @@ static int setup_child(orte_job_t *jobdat, orte_app_context_t *app); orte_schizo_base_module_t orte_schizo_ompi_module = { - parse_cli, - parse_env, - setup_fork, - setup_child + .parse_cli = parse_cli, + .parse_env = parse_env, + .setup_fork = setup_fork, + .setup_child = setup_child }; static int parse_cli(char *personality, diff --git a/orte/mca/schizo/orte/Makefile.am b/orte/mca/schizo/orte/Makefile.am new file mode 100644 index 00000000000..606b1ac822b --- /dev/null +++ b/orte/mca/schizo/orte/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + schizo_orte_component.c \ + schizo_orte.h \ + schizo_orte.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_schizo_orte_DSO +component_noinst = +component_install = mca_schizo_orte.la +else +component_noinst = libmca_schizo_orte.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_schizo_orte_la_SOURCES = $(sources) +mca_schizo_orte_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_schizo_orte_la_SOURCES = $(sources) +libmca_schizo_orte_la_LDFLAGS = -module -avoid-version + diff --git a/orte/mca/schizo/orte/schizo_orte.c b/orte/mca/schizo/orte/schizo_orte.c new file mode 100644 index 00000000000..b3783fe8fb1 --- /dev/null +++ b/orte/mca/schizo/orte/schizo_orte.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/mca/schizo/base/base.h" + +#include "schizo_orte.h" + +static orte_schizo_launch_environ_t check_launch_environment(void); +static void finalize(void); + +orte_schizo_base_module_t orte_schizo_orte_module = { + .check_launch_environment = check_launch_environment, + .finalize = finalize +}; + +static char **pushed_envs = NULL; +static char **pushed_vals = NULL; +static orte_schizo_launch_environ_t myenv; +static bool myenvdefined = false; + +static orte_schizo_launch_environ_t check_launch_environment(void) +{ + int i; + + if (myenvdefined) { + return myenv; + } + myenvdefined = true; + + /* we were only selected because we are an app, + * so no need to further check that here. Instead, + * see if we were direct launched vs launched via mpirun */ + if (NULL != orte_process_info.my_daemon_uri) { + /* nope */ + myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + goto setup; + } + + /* if nobody else has laid claim to this process, + * then it must be a singleton */ + myenv = ORTE_SCHIZO_UNMANAGED_SINGLETON; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "singleton"); + + setup: + opal_output_verbose(1, orte_schizo_base_framework.framework_output, + "schizo:orte DECLARED AS %s", orte_schizo_base_print_env(myenv)); + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ); + } + } + return myenv; +} + +static void finalize(void) +{ + int i; + + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_unsetenv(pushed_envs[i], &environ); + } + opal_argv_free(pushed_envs); + opal_argv_free(pushed_vals); + } +} diff --git a/orte/mca/schizo/orte/schizo_orte.h b/orte/mca/schizo/orte/schizo_orte.h new file mode 100644 index 00000000000..ad06724bc39 --- /dev/null +++ b/orte/mca/schizo/orte/schizo_orte.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_SCHIZO_ORTE_H_ +#define _MCA_SCHIZO_ORTE_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/schizo/schizo.h" + + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_orte_component; +extern orte_schizo_base_module_t orte_schizo_orte_module; + +END_C_DECLS + +#endif /* MCA_SCHIZO_ORTE_H_ */ + diff --git a/orte/mca/schizo/orte/schizo_orte_component.c b/orte/mca/schizo/orte/schizo_orte_component.c new file mode 100644 index 00000000000..c6a45ec774f --- /dev/null +++ b/orte/mca/schizo/orte/schizo_orte_component.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/schizo/schizo.h" +#include "schizo_orte.h" + +static int component_query(mca_base_module_t **module, int *priority); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_schizo_base_component_t mca_schizo_orte_component = { + .base_version = { + MCA_SCHIZO_BASE_VERSION_1_0_0, + .mca_component_name = "orte", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + /* disqualify ourselves if we are not an app */ + if (!ORTE_PROC_IS_APP) { + *priority = 0; + *module = NULL; + return OPAL_ERROR; + } + + *module = (mca_base_module_t*)&orte_schizo_orte_module; + *priority = 1; + return ORTE_SUCCESS; +} + diff --git a/orte/mca/schizo/schizo.h b/orte/mca/schizo/schizo.h index 5b685e503de..8ba2628dcb0 100644 --- a/orte/mca/schizo/schizo.h +++ b/orte/mca/schizo/schizo.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -56,14 +56,35 @@ typedef int (*orte_schizo_base_module_setup_child_fn_t)(orte_job_t *jdata, orte_proc_t *child, orte_app_context_t *app); +typedef enum { + ORTE_SCHIZO_UNDETERMINED, + ORTE_SCHIZO_NATIVE_LAUNCHED, + ORTE_SCHIZO_UNMANAGED_SINGLETON, + ORTE_SCHIZO_DIRECT_LAUNCHED, + ORTE_SCHIZO_MANAGED_SINGLETON +} orte_schizo_launch_environ_t; + + +/* check if this process was directly launched by a managed environment, and + * do whatever the module wants to do under those conditions. The module + * can push any required envars into the local environment, but must remember + * to "unset" them during finalize. The module then returns a flag indicating + * the launch environment of the process */ +typedef orte_schizo_launch_environ_t (*orte_schizo_base_module_ck_launch_environ_fn_t)(void); + +/* give the component a chance to cleanup */ +typedef void (*orte_schizo_base_module_finalize_fn_t)(void); + /* * schizo module version 1.3.0 */ typedef struct { - orte_schizo_base_module_parse_cli_fn_t parse_cli; - orte_schizo_base_module_parse_env_fn_t parse_env; - orte_schizo_base_module_setup_fork_fn_t setup_fork; - orte_schizo_base_module_setup_child_fn_t setup_child; + orte_schizo_base_module_parse_cli_fn_t parse_cli; + orte_schizo_base_module_parse_env_fn_t parse_env; + orte_schizo_base_module_setup_fork_fn_t setup_fork; + orte_schizo_base_module_setup_child_fn_t setup_child; + orte_schizo_base_module_ck_launch_environ_fn_t check_launch_environment; + orte_schizo_base_module_finalize_fn_t finalize; } orte_schizo_base_module_t; ORTE_DECLSPEC extern orte_schizo_base_module_t orte_schizo; diff --git a/orte/mca/schizo/singularity/Makefile.am b/orte/mca/schizo/singularity/Makefile.am new file mode 100644 index 00000000000..ef7fa7c3555 --- /dev/null +++ b/orte/mca/schizo/singularity/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + schizo_singularity_component.c \ + schizo_singularity.h \ + schizo_singularity.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_schizo_singularity_DSO +component_noinst = +component_install = mca_schizo_singularity.la +else +component_noinst = libmca_schizo_singularity.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_schizo_singularity_la_SOURCES = $(sources) +mca_schizo_singularity_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_schizo_singularity_la_SOURCES = $(sources) +libmca_schizo_singularity_la_LDFLAGS = -module -avoid-version + diff --git a/orte/mca/schizo/singularity/configure.m4 b/orte/mca/schizo/singularity/configure.m4 new file mode 100644 index 00000000000..838d693641f --- /dev/null +++ b/orte/mca/schizo/singularity/configure.m4 @@ -0,0 +1,19 @@ +# -*- shell-script -*- +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_schizo_singularity_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_orte_schizo_singularity_CONFIG],[ + AC_CONFIG_FILES([orte/mca/schizo/singularity/Makefile]) + + OPAL_CHECK_SINGULARITY([schizo_singularity], [schizo_singularity_happy="yes"], [schizo_singularity_happy="no"]) + + AS_IF([test "$schizo_singularity_happy" = "yes"], [$1], [$2]) +])dnl diff --git a/orte/mca/schizo/singularity/owner.txt b/orte/mca/schizo/singularity/owner.txt new file mode 100644 index 00000000000..85b4416d206 --- /dev/null +++ b/orte/mca/schizo/singularity/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: INTEL +status: active diff --git a/orte/mca/schizo/singularity/schizo_singularity.c b/orte/mca/schizo/singularity/schizo_singularity.c new file mode 100644 index 00000000000..26d279d5acd --- /dev/null +++ b/orte/mca/schizo/singularity/schizo_singularity.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" +#include "opal/util/os_dirpath.h" +#include "opal/util/path.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/mca/schizo/base/base.h" + +#include "schizo_singularity.h" + +static int setup_fork(orte_job_t *jdata, orte_app_context_t *context); + +orte_schizo_base_module_t orte_schizo_singularity_module = { + .setup_fork = setup_fork +}; + +static int setup_fork(orte_job_t *jdata, orte_app_context_t *app) +{ + int i; + bool takeus = false; + char *t2, *pth, *newenv; + + if (NULL != orte_schizo_base.personalities) { + /* see if we are included */ + for (i=0; NULL != jdata->personality[i]; i++) { + if (0 == strcmp(jdata->personality[i], "singularity")) { + takeus = true; + break; + } + } + } + if (!takeus) { + /* even if they didn't specify, check to see if + * this involves a singularity container */ + if (0 != strcmp(app->argv[0],"singularity")) { + /* guess not! */ + return ORTE_ERR_TAKE_NEXT_OPTION; + } + } + + opal_output_verbose(1, orte_schizo_base_framework.framework_output, + "%s schizo:singularity: configuring app environment %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->argv[0]); + + + /* Make sure we prepend the path Singularity was called by incase that + * path is not defined on the nodes */ + if (0 < strlen(OPAL_SINGULARITY_PATH)) { + if (0 > asprintf(&pth, "%s/singularity", OPAL_SINGULARITY_PATH) ) { + /* Something bad happened, let's move on */ + return ORTE_ERR_TAKE_NEXT_OPTION; + } + } else { + /* since we allow for detecting singularity's presence, it + * is possible that we found it in the PATH, but not in a + * standard location. Check for that here */ + pth = opal_path_findv("singularity", X_OK, app->env, NULL); + if (NULL == pth) { + /* cannot execute */ + return ORTE_ERR_TAKE_NEXT_OPTION; + } + } + /* find the path and prepend it with the path to Singularity */ + for (i = 0; NULL != app->env && NULL != app->env[i]; ++i) { + /* add to PATH */ + if (0 == strncmp("PATH=", app->env[i], 5)) { + t2 = opal_dirname(pth); + if (0 < asprintf(&newenv, "%s:%s", t2, app->env[i] + 5) ) { + opal_setenv("PATH", newenv, true, &app->env); + free(newenv); + } + free(t2); + break; + } + } + free(pth); + + /* set the singularity cache dir, unless asked not to do so */ + if (!orte_get_attribute(&app->attributes, ORTE_APP_NO_CACHEDIR, NULL, OPAL_BOOL)) { + /* Set the Singularity sessiondir to exist within the OMPI sessiondir */ + opal_setenv("SINGULARITY_SESSIONDIR", orte_process_info.job_session_dir, true, &app->env); + /* No need for Singularity to clean up after itself if OMPI will */ + opal_setenv("SINGULARITY_NOSESSIONCLEANUP", "1", true, &app->env); + } + + return ORTE_SUCCESS; +} + diff --git a/orte/mca/schizo/singularity/schizo_singularity.h b/orte/mca/schizo/singularity/schizo_singularity.h new file mode 100644 index 00000000000..35bcf63348c --- /dev/null +++ b/orte/mca/schizo/singularity/schizo_singularity.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_SCHIZO_SINGULARITY_H_ +#define _MCA_SCHIZO_SINGULARITY_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/schizo/schizo.h" + + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_singularity_component; +extern orte_schizo_base_module_t orte_schizo_singularity_module; + +END_C_DECLS + +#endif /* MCA_SCHIZO_SINGULARITY_H_ */ + diff --git a/orte/mca/schizo/singularity/schizo_singularity_component.c b/orte/mca/schizo/singularity/schizo_singularity_component.c new file mode 100644 index 00000000000..a3a0f454471 --- /dev/null +++ b/orte/mca/schizo/singularity/schizo_singularity_component.c @@ -0,0 +1,53 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/schizo/schizo.h" +#include "schizo_singularity.h" + +static int component_query(mca_base_module_t **module, int *priority); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_schizo_base_component_t mca_schizo_singularity_component = { + .base_version = { + MCA_SCHIZO_BASE_VERSION_1_0_0, + .mca_component_name = "singularity", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + /* if we are an app, ignore us */ + if (ORTE_PROC_IS_APP) { + *module = NULL; + *priority = 0; + return ORTE_ERROR; + } + *module = (mca_base_module_t*)&orte_schizo_singularity_module; + *priority = 5; + return ORTE_SUCCESS; +} + diff --git a/orte/mca/schizo/slurm/Makefile.am b/orte/mca/schizo/slurm/Makefile.am new file mode 100644 index 00000000000..e063ce7220d --- /dev/null +++ b/orte/mca/schizo/slurm/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + schizo_slurm_component.c \ + schizo_slurm.h \ + schizo_slurm.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_schizo_slurm_DSO +component_noinst = +component_install = mca_schizo_slurm.la +else +component_noinst = libmca_schizo_slurm.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_schizo_slurm_la_SOURCES = $(sources) +mca_schizo_slurm_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_schizo_slurm_la_SOURCES = $(sources) +libmca_schizo_slurm_la_LDFLAGS = -module -avoid-version + diff --git a/orte/mca/schizo/slurm/configure.m4 b/orte/mca/schizo/slurm/configure.m4 new file mode 100644 index 00000000000..44f2d46c18d --- /dev/null +++ b/orte/mca/schizo/slurm/configure.m4 @@ -0,0 +1,41 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2016 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_schizo_slurm_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_orte_schizo_slurm_CONFIG],[ + AC_CONFIG_FILES([orte/mca/schizo/slurm/Makefile]) + + ORTE_CHECK_SLURM([schizo_slurm], [schizo_slurm_good=1], [schizo_slurm_good=0]) + + # if check worked, set wrapper flags if so. + # Evaluate succeed / fail + AS_IF([test "$schizo_slurm_good" = "1"], + [$1], + [$2]) + + # set build flags to use in makefile + AC_SUBST([schizo_slurm_CPPFLAGS]) + AC_SUBST([schizo_slurm_LDFLAGS]) + AC_SUBST([schizo_slurm_LIBS]) +])dnl diff --git a/orte/mca/schizo/slurm/owner.txt b/orte/mca/schizo/slurm/owner.txt new file mode 100644 index 00000000000..85b4416d206 --- /dev/null +++ b/orte/mca/schizo/slurm/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: INTEL +status: active diff --git a/orte/mca/schizo/slurm/schizo_slurm.c b/orte/mca/schizo/slurm/schizo_slurm.c new file mode 100644 index 00000000000..d29141382c6 --- /dev/null +++ b/orte/mca/schizo/slurm/schizo_slurm.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/mca/schizo/base/base.h" + +#include "schizo_slurm.h" + +static orte_schizo_launch_environ_t check_launch_environment(void); +static void finalize(void); + +orte_schizo_base_module_t orte_schizo_slurm_module = { + .check_launch_environment = check_launch_environment, + .finalize = finalize +}; + +static char **pushed_envs = NULL; +static char **pushed_vals = NULL; +static orte_schizo_launch_environ_t myenv; +static bool myenvdefined = false; + +static orte_schizo_launch_environ_t check_launch_environment(void) +{ + char *bind; + int i; + + if (myenvdefined) { + return myenv; + } + myenvdefined = true; + + /* we were only selected because SLURM was detected + * and we are an app, so no need to further check + * that here. Instead, see if we were direct launched + * vs launched via mpirun */ + if (NULL != orte_process_info.my_daemon_uri) { + /* nope */ + myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + goto setup; + } + + /* see if we are in a SLURM allocation */ + if (NULL == getenv("SLURM_NODELIST")) { + /* nope */ + myenv = ORTE_SCHIZO_UNDETERMINED; + return myenv; + } + + /* we are in an allocation, but were we direct launched + * or are we a singleton? */ + if (NULL == getenv("SLURM_STEP_ID")) { + /* not in a job step - ensure we select the + * correct things */ + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "singleton"); + myenv = ORTE_SCHIZO_MANAGED_SINGLETON; + goto setup; + } + myenv = ORTE_SCHIZO_DIRECT_LAUNCHED; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + + /* if we are direct launched by SLURM, then we want + * to ensure that we do not override their binding + * options, so set that envar */ + if (NULL != (bind = getenv("SLURM_CPU_BIND_TYPE"))) { + opal_output_verbose(1, orte_schizo_base_framework.framework_output, + "schizo:slurm BIND_TYPE %s", bind); + /* indicate we are externally bound so we won't try to do it ourselves */ + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"orte_bound_at_launch"); + opal_argv_append_nosize(&pushed_vals, "1"); + } + + setup: + opal_output_verbose(1, orte_schizo_base_framework.framework_output, + "schizo:slurm DECLARED AS %s", orte_schizo_base_print_env(myenv)); + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ); + } + } + return myenv; +} + +static void finalize(void) +{ + int i; + + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_unsetenv(pushed_envs[i], &environ); + } + opal_argv_free(pushed_envs); + opal_argv_free(pushed_vals); + } +} diff --git a/orte/mca/schizo/slurm/schizo_slurm.h b/orte/mca/schizo/slurm/schizo_slurm.h new file mode 100644 index 00000000000..e9ee000821e --- /dev/null +++ b/orte/mca/schizo/slurm/schizo_slurm.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_SCHIZO_SLURM_H_ +#define _MCA_SCHIZO_SLURM_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/schizo/schizo.h" + + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_slurm_component; +extern orte_schizo_base_module_t orte_schizo_slurm_module; + +END_C_DECLS + +#endif /* MCA_SCHIZO_SLURM_H_ */ + diff --git a/orte/mca/schizo/slurm/schizo_slurm_component.c b/orte/mca/schizo/slurm/schizo_slurm_component.c new file mode 100644 index 00000000000..32d4bfbead9 --- /dev/null +++ b/orte/mca/schizo/slurm/schizo_slurm_component.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/schizo/schizo.h" +#include "schizo_slurm.h" + +static int component_query(mca_base_module_t **module, int *priority); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_schizo_base_component_t mca_schizo_slurm_component = { + .base_version = { + MCA_SCHIZO_BASE_VERSION_1_0_0, + .mca_component_name = "slurm", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + /* disqualify ourselves if we are not an app or under slurm */ + if (!ORTE_PROC_IS_APP) { + *priority = 0; + *module = NULL; + return OPAL_ERROR; + } + + *module = (mca_base_module_t*)&orte_schizo_slurm_module; + *priority = 50; + return ORTE_SUCCESS; +} + diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 71d1df2ef33..08f2e06d286 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -591,11 +591,12 @@ int orte_daemon(int argc, char *argv[]) /* create a string that contains our uri + sysinfo + PMIx server URI envars */ orte_util_convert_sysinfo_to_string(&sysinfo, orte_local_cpu_type, orte_local_cpu_model); asprintf(&tmp, "%s[%s]%s", orte_process_info.my_daemon_uri, sysinfo, nptr); - free(sysinfo); + free(sysinfo); free(nptr); /* pass that info to the singleton */ write(orted_globals.uri_pipe, tmp, strlen(tmp)+1); /* need to add 1 to get the NULL */ + close(orted_globals.uri_pipe); /* cleanup */ free(tmp); diff --git a/orte/runtime/orte_finalize.c b/orte/runtime/orte_finalize.c index 4a8b3291cb2..da4f7059998 100644 --- a/orte/runtime/orte_finalize.c +++ b/orte/runtime/orte_finalize.c @@ -12,7 +12,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "orte/mca/ess/ess.h" #include "orte/mca/ess/base/base.h" +#include "orte/mca/schizo/base/base.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/runtime.h" #include "orte/runtime/orte_locks.h" @@ -76,6 +77,10 @@ int orte_finalize(void) /* close the ess itself */ (void) mca_base_framework_close(&orte_ess_base_framework); + /* finalize and close schizo */ + orte_schizo.finalize(); + (void) mca_base_framework_close(&orte_schizo_base_framework); + /* cleanup the process info */ orte_proc_info_finalize(); diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index d4f56d81060..e2b23d4d4de 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -45,6 +45,7 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/ess.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/schizo/base/base.h" #include "orte/util/listener.h" #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" @@ -202,6 +203,22 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) pmix_server_register_params(); } + /* open the SCHIZO framework as everyone needs it, and the + * ess will use it to help select its component */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_schizo_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { + error = "orte_schizo_base_select"; + goto error; + } + /* if we are an app, let SCHIZO help us determine our environment */ + if (ORTE_PROC_IS_APP) { + (void)orte_schizo.check_launch_environment(); + } + /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { ORTE_ERROR_LOG(ret);