diff --git a/opal/mca/btl/vader/btl_vader.h b/opal/mca/btl/vader/btl_vader.h index 5290a7faa78..f0e8ef678f5 100644 --- a/opal/mca/btl/vader/btl_vader.h +++ b/opal/mca/btl/vader/btl_vader.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * @@ -136,6 +136,8 @@ struct mca_btl_vader_component_t { opal_list_t pending_endpoints; /**< list of endpoints with pending fragments */ opal_list_t pending_fragments; /**< fragments pending remote completion */ + char *backing_directory; /**< directory to place shared memory backing files */ + /* knem stuff */ #if OPAL_BTL_VADER_HAVE_KNEM unsigned int knem_dma_min; /**< minimum size to enable DMA for knem transfers (0 disables) */ diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 38cc5fb987a..ccbc0aa4647 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -12,10 +12,10 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2015 Los Alamos National Security, LLC. + * Copyright (c) 2010-2017 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -211,6 +211,19 @@ static int mca_btl_vader_component_register (void) OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_GROUP, &mca_btl_vader_component.single_copy_mechanism); OBJ_RELEASE(new_enum); + if (0 == access ("/dev/shm", W_OK)) { + mca_btl_vader_component.backing_directory = "/dev/shm"; + } else { + mca_btl_vader_component.backing_directory = opal_process_info.proc_session_dir; + } + (void) mca_base_component_var_register (&mca_btl_vader_component.super.btl_version, "backing_directory", + "Directory to place backing files for shared memory communication. " + "This directory should be on a local filesystem such as /tmp or " + "/dev/shm (default: (linux) /dev/shm, (others) session directory)", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_vader_component.backing_directory); + + #if OPAL_BTL_VADER_HAVE_KNEM /* Currently disabling DMA mode by default; it's not clear that this is useful in all applications and architectures. */ mca_btl_vader_component.knem_dma_min = 0; @@ -491,13 +504,17 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls, if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) { char *sm_file; - rc = asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%d", opal_process_info.proc_session_dir, + rc = asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%d", mca_btl_vader_component.backing_directory, opal_process_info.nodename, MCA_BTL_VADER_LOCAL_RANK); if (0 > rc) { free (btls); return NULL; } + if (NULL != opal_pmix.register_cleanup) { + opal_pmix.register_cleanup (sm_file, false, false); + } + rc = opal_shmem_segment_create (&component->seg_ds, sm_file, component->segment_size); free (sm_file); if (OPAL_SUCCESS != rc) { diff --git a/opal/mca/pmix/base/base.h b/opal/mca/pmix/base/base.h index d1eeb68e109..e533e026720 100644 --- a/opal/mca/pmix/base/base.h +++ b/opal/mca/pmix/base/base.h @@ -65,6 +65,7 @@ typedef struct { opal_mutex_t mutex; opal_pmix_condition_t cond; volatile bool active; + int status; } opal_pmix_lock_t; diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index 53e04571ab5..4e650cf30bf 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -867,6 +867,9 @@ typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor, opal_list_t *directives, opal_pmix_info_cbfunc_t cbfunc, void *cbdata); +/* register cleanup */ +typedef int (*opal_pmix_base_register_cleanup_fn_t)(char *path, bool ignore, bool jobscope); + /* * the standard public API data structure */ @@ -901,6 +904,7 @@ typedef struct { opal_pmix_base_alloc_fn_t allocate; opal_pmix_base_job_control_fn_t job_control; opal_pmix_base_process_monitor_fn_t monitor; + opal_pmix_base_register_cleanup_fn_t register_cleanup; /* server APIs */ opal_pmix_base_module_server_init_fn_t server_init; opal_pmix_base_module_server_finalize_fn_t server_finalize; diff --git a/opal/mca/pmix/pmix3x/pmix/VERSION b/opal/mca/pmix/pmix3x/pmix/VERSION index 3b0f60b307a..93b4afb0c98 100644 --- a/opal/mca/pmix/pmix3x/pmix/VERSION +++ b/opal/mca/pmix/pmix3x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitf56d30e +repo_rev=git5c0b64b # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Nov 11, 2017" +date="Dec 11, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in index 897c5f43a3e..de699b2fcd3 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in @@ -462,6 +462,16 @@ typedef uint32_t pmix_rank_t; #define PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned #define PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted #define PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs +#define PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files/directories to + // be removed upon process termination +#define PMIX_CLEANUP_RECURSIVE "pmix.clnup.recurse" // (bool) recursively cleanup all subdirectories under the + // specified one(s) +#define PMIX_CLEANUP_EMPTY "pmix.clnup.empty" // (bool) only remove empty subdirectories +#define PMIX_CLEANUP_IGNORE "pmix.clnup.ignore" // (char*) comma-delimited list of filenames that are not + // to be removed +#define PMIX_CLEANUP_LEAVE_TOPDIR "pmix.clnup.lvtop" // (bool) when recursively cleaning subdirs, do not remove + // the top-level directory (the one given in the + // cleanup request) /* monitoring attributes */ #define PMIX_MONITOR_ID "pmix.monitor.id" // (char*) provide a string identifier for this request @@ -584,6 +594,7 @@ typedef int pmix_status_t; #define PMIX_ERR_NOT_IMPLEMENTED -48 #define PMIX_ERR_COMM_FAILURE -49 #define PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER -50 // internal-only +#define PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES -51 /* define a starting point for v2.x error values */ #define PMIX_ERR_V2X_BASE -100 diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h index 9682b9e62af..4e39a43ee33 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h @@ -84,7 +84,7 @@ void pmix_atomic_rmb(void) static inline void pmix_atomic_wmb(void) { - PMIXRMB(); + PMIXWMB(); } static inline @@ -110,7 +110,7 @@ void pmix_atomic_isync(void) #pragma mc_func pmix_atomic_rmb { "7c2004ac" } /* lwsync */ #pragma reg_killed_by pmix_atomic_rmb /* none */ -#pragma mc_func pmix_atomic_wmb { "7c0006ac" } /* eieio */ +#pragma mc_func pmix_atomic_wmb { "7c2004ac" } /* lwsync */ #pragma reg_killed_by pmix_atomic_wmb /* none */ #endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c index 15d56e6268b..9f5487f14f2 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c @@ -36,11 +36,27 @@ #endif #include #include PMIX_EVENT_HEADER +#if HAVE_SYS_STAT_H +#include +#endif /* HAVE_SYS_STAT_H */ +#ifdef HAVE_DIRENT_H +#include +#endif /* HAVE_DIRENT_H */ + +#include #include "src/mca/bfrops/bfrops_types.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" #include "src/threads/threads.h" +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/os_path.h" + +static void cleanup(pmix_epilog_t *epi); +static void dirpath_destroy(char *path, pmix_cleanup_dir_t *cd, + pmix_epilog_t *epi); +static bool dirpath_is_empty(const char *path); PMIX_EXPORT pmix_lock_t pmix_global_lock = { .mutex = PMIX_MUTEX_STATIC_INIT, @@ -52,6 +68,36 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_namelist_t, pmix_list_item_t, NULL, NULL); +static void cfcon(pmix_cleanup_file_t *p) +{ + p->path = NULL; +} +static void cfdes(pmix_cleanup_file_t *p) +{ + if (NULL != p->path) { + free(p->path); + } +} +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cleanup_file_t, + pmix_list_item_t, + cfcon, cfdes); + +static void cdcon(pmix_cleanup_dir_t *p) +{ + p->path = NULL; + p->recurse = false; + p->leave_topdir = false; +} +static void cddes(pmix_cleanup_dir_t *p) +{ + if (NULL != p->path) { + free(p->path); + } +} +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cleanup_dir_t, + pmix_list_item_t, + cdcon, cddes); + static void nscon(pmix_nspace_t *p) { p->nspace = NULL; @@ -61,6 +107,9 @@ static void nscon(pmix_nspace_t *p) p->ndelivered = 0; PMIX_CONSTRUCT(&p->ranks, pmix_list_t); memset(&p->compat, 0, sizeof(p->compat)); + PMIX_CONSTRUCT(&p->epilog.cleanup_dirs, pmix_list_t); + PMIX_CONSTRUCT(&p->epilog.cleanup_files, pmix_list_t); + PMIX_CONSTRUCT(&p->epilog.ignores, pmix_list_t); } static void nsdes(pmix_nspace_t *p) { @@ -71,6 +120,12 @@ static void nsdes(pmix_nspace_t *p) PMIX_RELEASE(p->jobbkt); } PMIX_LIST_DESTRUCT(&p->ranks); + /* perform any epilog */ + cleanup(&p->epilog); + /* cleanup the epilog */ + PMIX_LIST_DESTRUCT(&p->epilog.cleanup_dirs); + PMIX_LIST_DESTRUCT(&p->epilog.cleanup_files); + PMIX_LIST_DESTRUCT(&p->epilog.ignores); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nspace_t, pmix_list_item_t, @@ -124,7 +179,11 @@ static void pcon(pmix_peer_t *p) PMIX_CONSTRUCT(&p->send_queue, pmix_list_t); p->send_msg = NULL; p->recv_msg = NULL; + PMIX_CONSTRUCT(&p->epilog.cleanup_dirs, pmix_list_t); + PMIX_CONSTRUCT(&p->epilog.cleanup_files, pmix_list_t); + PMIX_CONSTRUCT(&p->epilog.ignores, pmix_list_t); } + static void pdes(pmix_peer_t *p) { if (0 <= p->sd) { @@ -148,6 +207,12 @@ static void pdes(pmix_peer_t *p) if (NULL != p->recv_msg) { PMIX_RELEASE(p->recv_msg); } + /* perform any epilog */ + cleanup(&p->epilog); + /* cleanup the epilog */ + PMIX_LIST_DESTRUCT(&p->epilog.cleanup_dirs); + PMIX_LIST_DESTRUCT(&p->epilog.cleanup_files); + PMIX_LIST_DESTRUCT(&p->epilog.ignores); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_peer_t, pmix_object_t, @@ -252,3 +317,200 @@ static void qdes(pmix_query_caddy_t *p) PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t, pmix_object_t, qcon, qdes); + +static void cleanup(pmix_epilog_t *epi) +{ + pmix_cleanup_file_t *cf; + pmix_cleanup_dir_t *cd; + struct stat statbuf; + int rc; + + /* start with any specified files */ + PMIX_LIST_FOREACH(cf, &epi->cleanup_files, pmix_cleanup_file_t) { + /* check the effective uid/gid of the file and ensure it + * matches that of the peer - we do this to provide at least + * some minimum level of protection */ + rc = stat(cf->path, &statbuf); + if (0 != rc) { + pmix_output_verbose(10, pmix_globals.debug_output, + "File %s failed to stat: %s", cf->path, strerror(rc)); + continue; + } + if (statbuf.st_uid != epi->uid || + statbuf.st_gid != epi->gid) { + pmix_output_verbose(10, pmix_globals.debug_output, + "File %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)", + cf->path, + (unsigned long)statbuf.st_uid, (unsigned long)epi->uid, + (unsigned long)statbuf.st_gid, (unsigned long)epi->gid); + continue; + } + rc = unlink(cf->path); + if (0 != rc) { + pmix_output_verbose(10, pmix_globals.debug_output, + "File %s failed to unlink: %s", cf->path, strerror(rc)); + } + } + + /* now cleanup the directories */ + PMIX_LIST_FOREACH(cd, &epi->cleanup_dirs, pmix_cleanup_dir_t) { + /* check the effective uid/gid of the file and ensure it + * matches that of the peer - we do this to provide at least + * some minimum level of protection */ + rc = stat(cd->path, &statbuf); + if (0 != rc) { + pmix_output_verbose(10, pmix_globals.debug_output, + "Directory %s failed to stat: %s", cd->path, strerror(rc)); + continue; + } + if (statbuf.st_uid != epi->uid || + statbuf.st_gid != epi->gid) { + pmix_output_verbose(10, pmix_globals.debug_output, + "Directory %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)", + cd->path, + (unsigned long)statbuf.st_uid, (unsigned long)epi->uid, + (unsigned long)statbuf.st_gid, (unsigned long)epi->gid); + continue; + } + if ((statbuf.st_mode & S_IRWXU) == S_IRWXU) { + dirpath_destroy(cd->path, cd, epi); + } else { + pmix_output_verbose(10, pmix_globals.debug_output, + "Directory %s lacks permissions", cd->path); + } + } +} + +static void dirpath_destroy(char *path, pmix_cleanup_dir_t *cd, pmix_epilog_t *epi) +{ + int rc; + bool is_dir = false, ignore; + DIR *dp; + struct dirent *ep; + char *filenm; + struct stat buf; + size_t n; + pmix_cleanup_file_t *cf; + + if (NULL == path) { /* protect against error */ + return; + } + + /* if this path is it to be ignored, then do so */ + PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) { + if (0 == strcmp(cf->path, path)) { + return; + } + } + + /* Open up the directory */ + dp = opendir(path); + if (NULL == dp) { + return; + } + + while (NULL != (ep = readdir(dp))) { + /* skip: + * - . and .. + */ + if ((0 == strcmp(ep->d_name, ".")) || + (0 == strcmp(ep->d_name, ".."))) { + continue; + } + + /* Create a pathname. This is not always needed, but it makes + * for cleaner code just to create it here. Note that we are + * allocating memory here, so we need to free it later on. + */ + filenm = pmix_os_path(false, path, ep->d_name, NULL); + + /* if this path is it to be ignored, then do so */ + PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) { + if (0 == strcmp(cf->path, filenm)) { + free(filenm); + continue; + } + } + + /* Check to see if it is a directory */ + is_dir = false; + + rc = stat(filenm, &buf); + if (0 > rc) { + /* Handle a race condition. filenm might have been deleted by an + * other process running on the same node. That typically occurs + * when one task is removing the job_session_dir and an other task + * is still removing its proc_session_dir. + */ + free(filenm); + continue; + } + /* if the uid/gid don't match, then leave it alone */ + if (buf.st_uid != epi->uid || + buf.st_gid != epi->gid) { + free(filenm); + continue; + } + + if (S_ISDIR(buf.st_mode)) { + is_dir = true; + } + + /* + * If not recursively decending, then if we find a directory then fail + * since we were not told to remove it. + */ + if (is_dir && !cd->recurse) { + /* continue removing files */ + free(filenm); + continue; + } + + /* Directories are recursively destroyed */ + if (is_dir && cd->recurse && ((buf.st_mode & S_IRWXU) == S_IRWXU)) { + dirpath_destroy(filenm, cd, epi); + free(filenm); + } else { + /* Files are removed right here */ + unlink(filenm); + free(filenm); + } + } + + /* Done with this directory */ + closedir(dp); + + cleanup: + /* If the directory is empty, then remove it unless we + * were told to leave it */ + if (0 == strcmp(path, cd->path) && cd->leave_topdir) { + return; + } + if (dirpath_is_empty(path)) { + rmdir(path); + } +} + +static bool dirpath_is_empty(const char *path ) +{ + DIR *dp; + struct dirent *ep; + + if (NULL != path) { /* protect against error */ + dp = opendir(path); + if (NULL != dp) { + while ((ep = readdir(dp))) { + if ((0 != strcmp(ep->d_name, ".")) && + (0 != strcmp(ep->d_name, ".."))) { + closedir(dp); + return false; + } + } + closedir(dp); + return true; + } + return false; + } + + return true; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h index 34f12a5dfeb..ab43db2cba8 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h @@ -119,6 +119,29 @@ typedef struct pmix_personality_t { pmix_gds_base_module_t *gds; } pmix_personality_t; +/* define a set of structs for tracking post-termination cleanup */ +typedef struct pmix_epilog_t { + uid_t uid; + gid_t gid; + pmix_list_t cleanup_dirs; + pmix_list_t cleanup_files; + pmix_list_t ignores; +} pmix_epilog_t; + +typedef struct { + pmix_list_item_t super; + char *path; +} pmix_cleanup_file_t; +PMIX_CLASS_DECLARATION(pmix_cleanup_file_t); + +typedef struct { + pmix_list_item_t super; + char *path; + bool recurse; + bool leave_topdir; +} pmix_cleanup_dir_t; +PMIX_CLASS_DECLARATION(pmix_cleanup_dir_t); + /* objects used by servers for tracking active nspaces */ typedef struct { pmix_list_item_t super; @@ -133,6 +156,8 @@ typedef struct { * Since servers may support clients from multiple nspaces, * track their respective compatibility modules here */ pmix_personality_t compat; + pmix_epilog_t epilog; // things to do upon termination of all local clients + // from this nspace } pmix_nspace_t; PMIX_CLASS_DECLARATION(pmix_nspace_t); @@ -156,6 +181,17 @@ typedef struct pmix_rank_info_t { } pmix_rank_info_t; PMIX_CLASS_DECLARATION(pmix_rank_info_t); + +/* define a very simple caddy for dealing with pmix_info_t + * objects when transferring portions of arrays */ +typedef struct { + pmix_list_item_t super; + pmix_info_t *info; + size_t ninfo; +} pmix_info_caddy_t; +PMIX_CLASS_DECLARATION(pmix_info_caddy_t); + + /* object for tracking peers - each peer can have multiple * connections. This can occur if the initial app executes * a fork/exec, and the child initiates its own connection @@ -177,6 +213,8 @@ typedef struct pmix_peer_t { pmix_list_t send_queue; /**< list of messages to send */ pmix_ptl_send_t *send_msg; /**< current send in progress */ pmix_ptl_recv_t *recv_msg; /**< current recv in progress */ + pmix_epilog_t epilog; /**< things to be performed upon + termination of this peer */ } pmix_peer_t; PMIX_CLASS_DECLARATION(pmix_peer_t); @@ -305,14 +343,6 @@ typedef struct { } pmix_cb_t; PMIX_CLASS_DECLARATION(pmix_cb_t); -/* define a very simple caddy for dealing with pmix_info_t - * objects when transferring portions of arrays */ -typedef struct { - pmix_list_item_t super; - pmix_info_t *info; -} pmix_info_caddy_t; -PMIX_CLASS_DECLARATION(pmix_info_caddy_t); - #define PMIX_THREADSHIFT(r, c) \ do { \ pmix_event_assign(&((r)->ev), pmix_globals.evbase, \ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c index 1f60b49dbcf..4d7a2b8549e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c @@ -426,6 +426,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, /* an array of data pertaining to a specific proc */ if (PMIX_DATA_ARRAY != info[n].value.type) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + rc = PMIX_ERR_TYPE_MISMATCH; goto release; } size = info[n].value.data.darray->size; @@ -433,6 +434,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, /* first element of the array must be the rank */ if (0 != strcmp(iptr[0].key, PMIX_RANK) || PMIX_PROC_RANK != iptr[0].value.type) { + rc = PMIX_ERR_TYPE_MISMATCH; PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); goto release; } @@ -458,7 +460,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, if (NULL == tmp) { PMIX_ERROR_LOG(PMIX_ERR_NOMEM); rc = PMIX_ERR_NOMEM; - return rc; + goto release; } kp2->value->type = PMIX_COMPRESSED_STRING; free(kp2->value->data.string); @@ -493,10 +495,10 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, if (PMIX_STRING_SIZE_CHECK(kp2->value)) { if (pmix_util_compress_string(kp2->value->data.string, &tmp, &len)) { if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - PMIX_RELEASE(kp2); rc = PMIX_ERR_NOMEM; - return rc; + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; } kp2->value->type = PMIX_COMPRESSED_STRING; free(kp2->value->data.string); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index 69ae60e55dd..05bb12ef298 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -1161,6 +1161,12 @@ static void connection_handler(int sd, short args, void *cbdata) peer->nptr = nptr; PMIX_RETAIN(info); peer->info = info; + /* update the epilog fields */ + peer->epilog.uid = info->uid; + peer->epilog.gid = info->gid; + /* ensure the nspace epilog is updated too */ + nptr->epilog.uid = info->uid; + nptr->epilog.gid = info->gid; info->proc_cnt++; /* increase number of processes on this rank */ peer->sd = pnd->sd; if (0 > (peer->index = pmix_pointer_array_add(&pmix_server_globals.clients, peer))) { @@ -1399,6 +1405,11 @@ static void process_cbfunc(int sd, short args, void *cbdata) peer->nptr = nptr; PMIX_RETAIN(info); peer->info = info; + /* save the uid/gid */ + peer->epilog.uid = info->uid; + peer->epilog.gid = info->gid; + nptr->epilog.uid = info->uid; + nptr->epilog.gid = info->gid; peer->proc_cnt = 1; peer->sd = pnd->sd; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c index f25d66eba7b..f3c63b9b4ca 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -601,6 +601,11 @@ static void connection_handler(int sd, short args, void *cbdata) psave->nptr = nptr; PMIX_RETAIN(info); psave->info = info; + /* save the epilog info */ + psave->epilog.uid = info->uid; + psave->epilog.gid = info->gid; + nptr->epilog.uid = info->uid; + nptr->epilog.gid = info->gid; info->proc_cnt++; /* increase number of processes on this rank */ psave->sd = pnd->sd; if (0 > (psave->index = pmix_pointer_array_add(&pmix_server_globals.clients, psave))) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c index b50c7ae743f..73c993c8df0 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c @@ -382,6 +382,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, } if (PMIX_ERR_NOT_FOUND != rc || NULL == lcd) { /* we have a problem - e.g., out of memory */ + cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL); PMIX_INFO_FREE(info, ninfo); return rc; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c index 0f3d8f2f41b..eab1b4137a8 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c @@ -2015,6 +2015,13 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, pmix_status_t rc; pmix_query_caddy_t *cd; pmix_proc_t proc; + size_t n; + bool recurse, leave_topdir, duplicate; + pmix_list_t cachedirs, cachefiles; + pmix_epilog_t *epi; + pmix_cleanup_file_t *cf, *cf2; + pmix_cleanup_dir_t *cdir, *cdir2; + struct stat statbuf; pmix_output_verbose(2, pmix_server_globals.base_output, "recvd job control request from client"); @@ -2045,6 +2052,22 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, goto exit; } } + + /* check targets to find proper place to put any epilog requests */ + if (NULL == cd->targets) { + epi = &peer->nptr->epilog; + } else if (1 == cd->ntargets) { + if (0 == strncmp(cd->targets[0].nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN)) { + if (PMIX_RANK_WILDCARD == cd->targets[0].rank) { + epi = &peer->nptr->epilog; + } else { + epi = &peer->epilog; + } + } + } else { + epi = NULL; // do not allow epilog requests + } + /* unpack the number of info objects */ cnt = 1; PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); @@ -2063,6 +2086,173 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, } } + /* if this includes a request for post-termination cleanup, we handle + * that request ourselves */ + PMIX_CONSTRUCT(&cachedirs, pmix_list_t); + PMIX_CONSTRUCT(&cachefiles, pmix_list_t); + cnt = 0; // track how many infos are cleanup related + for (n=0; n < cd->ninfo; n++) { + if (0 == strncmp(cd->info[n].key, PMIX_REGISTER_CLEANUP, PMIX_MAX_KEYLEN)) { + ++cnt; + /* see if we allow epilog requests */ + if (NULL == epi) { + /* return an error */ + rc = PMIX_ERR_BAD_PARAM; + goto exit; + } + if (PMIX_STRING != cd->info[n].value.type || + NULL == cd->info[n].value.data.string) { + /* return an error */ + rc = PMIX_ERR_BAD_PARAM; + goto exit; + } + if (0 != stat(cd->info[n].value.data.string, &statbuf)) { + /* return an error */ + rc = PMIX_ERR_BAD_PARAM; + goto exit; + } + if (S_ISDIR(statbuf.st_mode)) { + cdir = PMIX_NEW(pmix_cleanup_dir_t); + if (NULL == cdir) { + /* return an error */ + rc = PMIX_ERR_NOMEM; + goto exit; + } + cdir->path = strdup(cd->info[n].value.data.string); + pmix_list_append(&cachedirs, &cdir->super); + } else { + cf = PMIX_NEW(pmix_cleanup_file_t); + if (NULL == cf) { + /* return an error */ + rc = PMIX_ERR_NOMEM; + goto exit; + } + cf->path = strdup(cd->info[n].value.data.string); + pmix_list_append(&cachefiles, &cf->super); + } + } else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_RECURSIVE, PMIX_MAX_KEYLEN)) { + /* see if we allow epilog requests */ + if (NULL == epi) { + /* return an error */ + rc = PMIX_ERR_BAD_PARAM; + goto exit; + } + recurse = PMIX_INFO_TRUE(&cd->info[n]); + ++cnt; + } else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_IGNORE, PMIX_MAX_KEYLEN)) { + if (PMIX_STRING != cd->info[n].value.type || + NULL == cd->info[n].value.data.string) { + /* return an error */ + rc = PMIX_ERR_BAD_PARAM; + goto exit; + } + /* see if we allow epilog requests */ + if (NULL == epi) { + /* return an error */ + rc = PMIX_ERR_BAD_PARAM; + goto exit; + } + /* scan the list of ignores for any duplicate */ + duplicate = false; + PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) { + if (0 == strcmp(cf->path, cd->info[n].value.data.string)) { + /* we can drop this request */ + duplicate = true; + break; + } + } + if (!duplicate) { + cf = PMIX_NEW(pmix_cleanup_file_t); + if (NULL == cf) { + /* return an error */ + rc = PMIX_ERR_NOMEM; + goto exit; + } + cf->path = strdup(cd->info[n].value.data.string); + pmix_list_append(&epi->ignores, &cf->super); + } + ++cnt; + } else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_LEAVE_TOPDIR, PMIX_MAX_KEYLEN)) { + /* see if we allow epilog requests */ + if (NULL == epi) { + /* return an error */ + rc = PMIX_ERR_BAD_PARAM; + goto exit; + } + leave_topdir = PMIX_INFO_TRUE(&cd->info[n]); + ++cnt; + } + } + if (0 < cnt) { + while (NULL != (cdir = (pmix_cleanup_dir_t*)pmix_list_remove_first(&cachedirs))) { + /* scan the existing list of directories for any duplicate */ + PMIX_LIST_FOREACH(cdir2, &epi->cleanup_dirs, pmix_cleanup_dir_t) { + if (0 == strcmp(cdir2->path, cdir->path)) { + /* duplicate - check for difference in flags per RFC + * precedence rules */ + if (!cdir->recurse && recurse) { + cdir->recurse = recurse; + } + if (!cdir->leave_topdir && leave_topdir) { + cdir->leave_topdir = leave_topdir; + } + PMIX_RELEASE(cdir); + cdir = NULL; + break; + } + } + if (NULL != cdir) { + /* check for conflict with ignore */ + PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) { + if (0 == strcmp(cf->path, cdir->path)) { + /* return an error */ + rc = PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES; + PMIX_LIST_DESTRUCT(&cachedirs); + PMIX_LIST_DESTRUCT(&cachefiles); + goto exit; + } + } + cdir->recurse = recurse; + cdir->leave_topdir = leave_topdir; + /* just append it to the end of the list */ + pmix_list_append(&epi->cleanup_dirs, &cdir->super); + } + } + PMIX_DESTRUCT(&cachedirs); + while (NULL != (cf = (pmix_cleanup_file_t*)pmix_list_remove_first(&cachefiles))) { + /* scan the existing list of files for any duplicate */ + PMIX_LIST_FOREACH(cf2, &epi->cleanup_files, pmix_cleanup_file_t) { + if (0 == strcmp(cf2->path, cf->path)) { + PMIX_RELEASE(cf); + cf = NULL; + break; + } + } + if (NULL != cf) { + /* check for conflict with ignore */ + PMIX_LIST_FOREACH(cf2, &epi->ignores, pmix_cleanup_file_t) { + if (0 == strcmp(cf->path, cf2->path)) { + /* return an error */ + rc = PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES; + PMIX_LIST_DESTRUCT(&cachedirs); + PMIX_LIST_DESTRUCT(&cachefiles); + goto exit; + } + } + /* just append it to the end of the list */ + pmix_list_append(&epi->cleanup_files, &cf->super); + } + } + PMIX_DESTRUCT(&cachefiles); + if (cnt == cd->ninfo) { + /* nothing more to do */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, cd, NULL, NULL); + } + return PMIX_SUCCESS; + } + } + /* setup the requesting peer name */ (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/error.c b/opal/mca/pmix/pmix3x/pmix/src/util/error.c index ae3851da051..ed2e230a387 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/error.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/error.c @@ -171,6 +171,8 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum) return "PMIX MODEL DECLARED"; case PMIX_ERR_TEMP_UNAVAILABLE: return "PMIX TEMPORARILY UNAVAILABLE"; + case PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES: + return "PMIX CONFLICTING CLEANUP DIRECTIVES"; case PMIX_SUCCESS: return "SUCCESS"; default: diff --git a/opal/mca/pmix/pmix3x/pmix3x.c b/opal/mca/pmix/pmix3x/pmix3x.c index 5499d18d0ab..1127be4a29c 100644 --- a/opal/mca/pmix/pmix3x/pmix3x.c +++ b/opal/mca/pmix/pmix3x/pmix3x.c @@ -25,6 +25,9 @@ #ifdef HAVE_UNISTD_H #include #endif +#ifdef HAVE_SYS_STAT_H +#include +#endif #include "opal/dss/dss.h" #include "opal/mca/event/event.h" @@ -71,6 +74,8 @@ static void pmix3x_query(opal_list_t *queries, static void pmix3x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +static int pmix3x_register_cleanup(char *path, bool ignore, bool jobscope); + const opal_pmix_base_module_t opal_pmix_pmix3x_module = { /* client APIs */ .init = pmix3x_client_init, @@ -101,6 +106,7 @@ const opal_pmix_base_module_t opal_pmix_pmix3x_module = { .log = pmix3x_log, .allocate = pmix3x_allocate, .job_control = pmix3x_job_control, + .register_cleanup = pmix3x_register_cleanup, /* server APIs */ .server_init = pmix3x_server_init, .server_finalize = pmix3x_server_finalize, @@ -333,6 +339,78 @@ void pmix3x_event_hdlr(size_t evhdlr_registration_id, return; } +static void cleanup_cbfunc(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata; + + OPAL_POST_OBJECT(lk); + + /* let the library release the data and cleanup from + * the operation */ + if (NULL != release_fn) { + release_fn(release_cbdata); + } + + /* release the block */ + lk->status = pmix3x_convert_rc(status); + OPAL_PMIX_WAKEUP_THREAD(lk); +} + +static int pmix3x_register_cleanup(char *path, bool ignore, bool jobscope) +{ + opal_pmix_lock_t lk; + pmix_info_t pinfo[3]; + size_t n, ninfo=0; + pmix_status_t rc; + int ret; + struct stat statbuf; + + OPAL_PMIX_CONSTRUCT_LOCK(&lk); + + if (ignore) { + /* they want this path ignored */ + PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_IGNORE, path, PMIX_STRING); + ++ninfo; + } else { + /* order cleanup of the provided path */ + PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_REGISTER_CLEANUP, path, PMIX_STRING); + ++ninfo; + /* if the path is a directory, then we need to tell the server + * to recursively clean up */ + if (stat(path, &statbuf) != 0) { + return OPAL_ERR_NOT_FOUND; + } + if (S_ISDIR(statbuf.st_mode)) { + /* recursively cleanup directories */ + PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_RECURSIVE, NULL, PMIX_BOOL); + ++ninfo; + } + } + + /* if they want this applied to the job, then indicate so */ + if (jobscope) { + rc = PMIx_Job_control_nb(NULL, 0, pinfo, ninfo, cleanup_cbfunc, (void*)&lk); + } else { + /* only applies to us */ + rc = PMIx_Job_control_nb(&mca_pmix_pmix3x_component.myproc, 1, pinfo, ninfo, cleanup_cbfunc, (void*)&lk); + } + if (PMIX_SUCCESS != rc) { + ret = pmix3x_convert_rc(rc); + } else { + OPAL_PMIX_WAIT_THREAD(&lk); + ret = lk.status; + } + OPAL_PMIX_DESTRUCT_LOCK(&lk); + for (n=0; n < ninfo; n++) { + PMIX_INFO_DESTRUCT(&pinfo[n]); + } + return ret; +} + opal_vpid_t pmix3x_convert_rank(pmix_rank_t rank) { switch(rank) { diff --git a/opal/mca/pmix/pmix3x/pmix3x.h b/opal/mca/pmix/pmix3x/pmix3x.h index a5b67f146ed..9227da1d769 100644 --- a/opal/mca/pmix/pmix3x/pmix3x.h +++ b/opal/mca/pmix/pmix3x/pmix3x.h @@ -38,15 +38,16 @@ BEGIN_C_DECLS typedef struct { - opal_pmix_base_component_t super; - opal_list_t jobids; - bool native_launch; - size_t evindex; - opal_list_t events; - int cache_size; - opal_list_t cache; - opal_list_t dmdx; - bool silence_warning; + opal_pmix_base_component_t super; + pmix_proc_t myproc; + opal_list_t jobids; + bool native_launch; + size_t evindex; + opal_list_t events; + int cache_size; + opal_list_t cache; + opal_list_t dmdx; + bool silence_warning; } mca_pmix_pmix3x_component_t; OPAL_DECLSPEC extern mca_pmix_pmix3x_component_t mca_pmix_pmix3x_component; diff --git a/opal/mca/pmix/pmix3x/pmix3x_client.c b/opal/mca/pmix/pmix3x/pmix3x_client.c index 58a7e0b39dd..97343c07143 100644 --- a/opal/mca/pmix/pmix3x/pmix3x_client.c +++ b/opal/mca/pmix/pmix3x/pmix3x_client.c @@ -5,7 +5,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -38,7 +38,6 @@ #include "pmix.h" #include "pmix_tool.h" -static pmix_proc_t my_proc; static char *dbgvalue=NULL; static void errreg_cbfunc (pmix_status_t status, @@ -105,7 +104,7 @@ int pmix3x_client_init(opal_list_t *ilist) } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - rc = PMIx_Init(&my_proc, pinfo, ninfo); + rc = PMIx_Init(&mca_pmix_pmix3x_component.myproc, pinfo, ninfo); if (NULL != pinfo) { PMIX_INFO_FREE(pinfo, ninfo); } @@ -127,20 +126,20 @@ int pmix3x_client_init(opal_list_t *ilist) /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ mca_pmix_pmix3x_component.native_launch = true; - opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace); + opal_convert_string_to_jobid(&pname.jobid, mca_pmix_pmix3x_component.myproc.nspace); } else { /* we were launched by someone else, so make the * jobid just be the hash of the nspace */ - OPAL_HASH_JOBID(my_proc.nspace, pname.jobid); + OPAL_HASH_JOBID(mca_pmix_pmix3x_component.myproc.nspace, pname.jobid); } /* insert this into our list of jobids - it will be the * first, and so we'll check it first */ job = OBJ_NEW(opal_pmix3x_jobid_trkr_t); - (void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN); + (void)strncpy(job->nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN); job->jobid = pname.jobid; opal_list_append(&mca_pmix_pmix3x_component.jobids, &job->super); - pname.vpid = pmix3x_convert_rank(my_proc.rank); + pname.vpid = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank); opal_proc_set_name(&pname); /* release the thread in case the event handler fires when @@ -221,10 +220,10 @@ int pmix3x_tool_init(opal_list_t *info) /* check to see if our name is being given from above */ if (0 == strcmp(val->key, OPAL_PMIX_TOOL_NSPACE)) { opal_convert_string_to_jobid(&pname.jobid, val->data.string); - (void)strncpy(my_proc.nspace, val->data.string, PMIX_MAX_NSLEN); + (void)strncpy(mca_pmix_pmix3x_component.myproc.nspace, val->data.string, PMIX_MAX_NSLEN); } else if (0 == strcmp(val->key, OPAL_PMIX_TOOL_RANK)) { pname.vpid = val->data.name.vpid; - my_proc.rank = pname.vpid; + mca_pmix_pmix3x_component.myproc.rank = pname.vpid; } } } else { @@ -236,7 +235,7 @@ int pmix3x_tool_init(opal_list_t *info) mca_pmix_pmix3x_component.native_launch = true; OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - rc = PMIx_tool_init(&my_proc, pinfo, ninfo); + rc = PMIx_tool_init(&mca_pmix_pmix3x_component.myproc, pinfo, ninfo); if (NULL != pinfo) { PMIX_INFO_FREE(pinfo, ninfo); } @@ -254,13 +253,13 @@ int pmix3x_tool_init(opal_list_t *info) } /* store our jobid and rank */ - opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace); - pname.vpid = pmix3x_convert_rank(my_proc.rank); + opal_convert_string_to_jobid(&pname.jobid, mca_pmix_pmix3x_component.myproc.nspace); + pname.vpid = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank); /* insert this into our list of jobids - it will be the * first, and so we'll check it first */ job = OBJ_NEW(opal_pmix3x_jobid_trkr_t); - (void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN); + (void)strncpy(job->nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN); job->jobid = pname.jobid; opal_list_append(&mca_pmix_pmix3x_component.jobids, &job->super); @@ -399,7 +398,7 @@ int pmix3x_store_local(const opal_process_name_t *proc, opal_value_t *val) p.rank = pmix3x_convert_opalrank(proc->vpid); } else { /* use our name */ - (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + (void)strncpy(p.nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN); p.rank = pmix3x_convert_opalrank(OPAL_PROC_MY_NAME.vpid); } @@ -614,7 +613,7 @@ int pmix3x_get(const opal_process_name_t *proc, const char *key, if (0 == strcmp(key, OPAL_PMIX_RANK)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_INT; - (*val)->data.integer = pmix3x_convert_rank(my_proc.rank); + (*val)->data.integer = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank); OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } @@ -622,7 +621,7 @@ int pmix3x_get(const opal_process_name_t *proc, const char *key, *val = NULL; if (NULL == proc) { - (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + (void)strncpy(p.nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN); p.rank = pmix3x_convert_rank(PMIX_RANK_WILDCARD); } else { if (NULL == (nsptr = pmix3x_convert_jobid(proc->jobid))) { @@ -719,7 +718,7 @@ int pmix3x_getnb(const opal_process_name_t *proc, const char *key, if (NULL != cbfunc) { val = OBJ_NEW(opal_value_t); val->type = OPAL_INT; - val->data.integer = pmix3x_convert_rank(my_proc.rank); + val->data.integer = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank); cbfunc(OPAL_SUCCESS, val, cbdata); } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); @@ -733,7 +732,7 @@ int pmix3x_getnb(const opal_process_name_t *proc, const char *key, op->cbdata = cbdata; if (NULL == proc) { - (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + (void)strncpy(op->p.nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN); op->p.rank = pmix3x_convert_rank(PMIX_RANK_WILDCARD); } else { if (NULL == (nsptr = pmix3x_convert_jobid(proc->jobid))) { diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index f9c58e7d735..4c18ba7eb48 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -118,6 +118,7 @@ BEGIN_C_DECLS /* information about relative ranks as assigned by the RM */ +#define OPAL_PMIX_CLUSTER_ID "pmix.clid" // (char*) a string name for the cluster this proc is executing on #define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier #define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job #define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler @@ -189,6 +190,7 @@ BEGIN_C_DECLS #define OPAL_PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job #define OPAL_PMIX_RANGE "pmix.range" // (int) opal_pmix_data_range_t value for calls to publish/lookup/unpublish #define OPAL_PMIX_PERSISTENCE "pmix.persist" // (int) opal_pmix_persistence_t value for calls to publish +#define OPAL_PMIX_DATA_SCOPE "pmix.scope" // (pmix_scope_t) scope of the data to be found in a PMIx_Get call #define OPAL_PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do // not request data from the server if not found #define OPAL_PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the @@ -364,6 +366,16 @@ BEGIN_C_DECLS #define OPAL_PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned #define OPAL_PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted #define OPAL_PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs +#define OPAL_PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files/directories to + // be removed upon process termination +#define OPAL_PMIX_CLEANUP_RECURSIVE "pmix.clnup.recurse" // (bool) recursively cleanup all subdirectories under the + // specified one(s) +#define OPAL_PMIX_CLEANUP_EMPTY "pmix.clnup.empty" // (bool) only remove empty subdirectories +#define OPAL_PMIX_CLEANUP_IGNORE "pmix.clnup.ignore" // (char*) comma-delimited list of filenames that are not + // to be removed +#define OPAL_PMIX_CLEANUP_LEAVE_TOPDIR "pmix.clnup.lvtop" // (bool) when recursively cleaning subdirs, do not remove + // the top-level directory (the one given in the + // cleanup request) /* monitoring attributes */ diff --git a/opal/util/output.c b/opal/util/output.c index f096a88b478..633901ee593 100644 --- a/opal/util/output.c +++ b/opal/util/output.c @@ -16,6 +16,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,6 +45,7 @@ #include "opal/util/output.h" #include "opal/threads/mutex.h" #include "opal/constants.h" +#include "opal/mca/pmix/pmix.h" /* * Private data @@ -505,10 +507,10 @@ void opal_output_finalize(void) output_dir = NULL; if(NULL != temp_str) { - free(temp_str); - temp_str = NULL; - temp_str_len = 0; - } + free(temp_str); + temp_str = NULL; + temp_str_len = 0; + } OBJ_DESTRUCT(&verbose); OBJ_DESTRUCT(&mutex); } @@ -785,18 +787,24 @@ static int open_file(int i) /* Actually open the file */ info[i].ldi_fd = open(filename, flags, 0644); - free(filename); /* release the filename in all cases */ if (-1 == info[i].ldi_fd) { info[i].ldi_used = false; + free(filename); /* release the filename in all cases */ return OPAL_ERR_IN_ERRNO; } /* Make the file be close-on-exec to prevent child inheritance * problems */ if (-1 == fcntl(info[i].ldi_fd, F_SETFD, 1)) { - return OPAL_ERR_IN_ERRNO; + free(filename); /* release the filename in all cases */ + return OPAL_ERR_IN_ERRNO; } + /* register it to be ignored */ + if (NULL != opal_pmix.register_cleanup) { + opal_pmix.register_cleanup(filename, true, false); + } + free(filename); /* release the filename in all cases */ } /* Return successfully even if the session dir did not exist yet; @@ -814,20 +822,20 @@ static void free_descriptor(int output_id) output_desc_t *ldi; if (output_id >= 0 && output_id < OPAL_OUTPUT_MAX_STREAMS && - info[output_id].ldi_used && info[output_id].ldi_enabled) { - ldi = &info[output_id]; + info[output_id].ldi_used && info[output_id].ldi_enabled) { + ldi = &info[output_id]; - if (-1 != ldi->ldi_fd) { - close(ldi->ldi_fd); - } - ldi->ldi_used = false; + if (-1 != ldi->ldi_fd) { + close(ldi->ldi_fd); + } + ldi->ldi_used = false; - /* If we strduped a prefix, suffix, or syslog ident, free it */ + /* If we strduped a prefix, suffix, or syslog ident, free it */ - if (NULL != ldi->ldi_prefix) { - free(ldi->ldi_prefix); - } - ldi->ldi_prefix = NULL; + if (NULL != ldi->ldi_prefix) { + free(ldi->ldi_prefix); + } + ldi->ldi_prefix = NULL; if (NULL != ldi->ldi_suffix) { free(ldi->ldi_suffix); @@ -835,14 +843,14 @@ static void free_descriptor(int output_id) ldi->ldi_suffix = NULL; if (NULL != ldi->ldi_file_suffix) { - free(ldi->ldi_file_suffix); - } - ldi->ldi_file_suffix = NULL; - - if (NULL != ldi->ldi_syslog_ident) { - free(ldi->ldi_syslog_ident); - } - ldi->ldi_syslog_ident = NULL; + free(ldi->ldi_file_suffix); + } + ldi->ldi_file_suffix = NULL; + + if (NULL != ldi->ldi_syslog_ident) { + free(ldi->ldi_syslog_ident); + } + ldi->ldi_syslog_ident = NULL; } } diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index 475304a8e23..22817cbcd9c 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -129,7 +129,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_errmgr_base_open"; goto error; } - /* setup my session directory */ if (orte_create_session_dirs) { OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output, @@ -147,6 +146,22 @@ int orte_ess_base_app_setup(bool db_restrict_local) proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); + /* register the directory for cleanup */ + if (NULL != opal_pmix.register_cleanup) { + if (orte_standalone_operation) { + if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.top_session_dir, false, true))) { + ORTE_ERROR_LOG(ret); + error = "register cleanup"; + goto error; + } + } else { + if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.jobfam_session_dir, false, false))) { + ORTE_ERROR_LOG(ret); + error = "register cleanup"; + goto error; + } + } + } } /* Setup the communication infrastructure */ /* Routed system */ @@ -357,7 +372,9 @@ int orte_ess_base_app_finalize(void) (void) mca_base_framework_close(&orte_oob_base_framework); (void) mca_base_framework_close(&orte_state_base_framework); - orte_session_dir_finalize(ORTE_PROC_MY_NAME); + if (NULL == opal_pmix.register_cleanup) { + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + } /* cleanup the process info */ orte_proc_info_finalize();