diff --git a/opal/mca/pmix/pmix112/pmix/NEWS b/opal/mca/pmix/pmix112/pmix/NEWS index 16b4ad8f86c..1acf9297d0e 100644 --- a/opal/mca/pmix/pmix112/pmix/NEWS +++ b/opal/mca/pmix/pmix112/pmix/NEWS @@ -1,5 +1,5 @@ Copyright (c) 2015-2016 Intel, Inc. All rights reserved. -Copyright (c) 2016 IBM Corporation. All rights reserved. +Copyright (c) 2016-2017 IBM Corporation. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -20,6 +20,14 @@ other, a single NEWS-worthy item might apply to different series. For example, a bug might be fixed in the master, and then moved to the current release as well as the "stable" bug fix release branch. +1.2.1 +----- +- dstore: Fix data corruption bug in key overwrite cases +- dstore: Performance and scalability fixes +- sm: Use posix_fallocate() before mmap +- pmi1/pmi2: Restore support +- dstore: Fix extension slot size allocation (Issue #280) + 1.2.0 ----- diff --git a/opal/mca/pmix/pmix112/pmix/VERSION b/opal/mca/pmix/pmix112/pmix/VERSION index 1bda44c1099..8f8cad68461 100644 --- a/opal/mca/pmix/pmix112/pmix/VERSION +++ b/opal/mca/pmix/pmix112/pmix/VERSION @@ -4,7 +4,7 @@ # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2014-2016 Intel, Inc. All rights reserved. -# Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2016-2017 IBM Corporation. All rights reserved. # This is the VERSION file for PMIx, describing the precise # version of PMIx in this distribution. The various components of @@ -16,7 +16,7 @@ major=1 minor=2 -release=0 +release=1 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -31,7 +31,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git33736edb +repo_rev=gitef61cf0a # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -45,7 +45,7 @@ tarball_version= # The date when this release was created -date="Dec 14, 2016" +date="Feb 21, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix112/pmix/config/Makefile.am b/opal/mca/pmix/pmix112/pmix/config/Makefile.am index e78b92de62a..1c2bb55f7e7 100644 --- a/opal/mca/pmix/pmix112/pmix/config/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/config/Makefile.am @@ -40,6 +40,7 @@ EXTRA_DIST += \ config/pmix_check_sasl.m4 \ config/pmix_check_vendor.m4 \ config/pmix_check_visibility.m4 \ + config/pmix_check_lock.m4 \ config/pmix_ensure_contains_optflags.m4 \ config/pmix_functions.m4 \ config/pmix.m4 \ @@ -48,6 +49,5 @@ EXTRA_DIST += \ config/pmix_setup_hwloc.m4 \ config/pmix_setup_libevent.m4 - maintainer-clean-local: rm -f config/pmix_get_version.sh diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix.m4 index 87637bfbc4f..44d9210143b 100644 --- a/opal/mca/pmix/pmix112/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix112/pmix/config/pmix.m4 @@ -18,7 +18,7 @@ dnl reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. dnl Copyright (c) 2013-2016 Intel, Inc. All rights reserved -dnl Copyright (c) 2015 Research Organization for Information Science +dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2016 Mellanox Technologies, Inc. dnl All rights reserved. @@ -500,7 +500,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib PMIX_SEARCH_LIBS_CORE([ceil], [m]) - AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen]) + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate]) # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get # confused. On others, it's in the standard library, but stubbed with @@ -594,6 +594,14 @@ AC_DEFUN([PMIX_SETUP_CORE],[ PMIX_MUNGE_CONFIG + ################################## + # Dstore Locking + ################################## + + pmix_show_title "Dstore Locking" + + PMIX_CHECK_DSTOR_LOCK + ############################################################################ # final compiler config ############################################################################ @@ -778,6 +786,22 @@ AC_DEFINE_UNQUOTED([PMIX_ENABLE_DSTORE], [if want shared memory dstore feature]) AM_CONDITIONAL([WANT_DSTORE],[test "x$enable_dstore" != "xno"]) +# +# Use pthread-based locking +# +DSTORE_PTHREAD_LOCK="1" +AC_MSG_CHECKING([if want dstore pthread-based locking]) +AC_ARG_ENABLE([dstore-pthlck], + [AC_HELP_STRING([--disable-dstore-pthlck], + [Disable pthread-based lockig in dstor (default: enabled)])]) +if test "$enable_dstore_pthlck" == "no" ; then + AC_MSG_RESULT([no]) + DSTORE_PTHREAD_LOCK="0" +else + AC_MSG_RESULT([yes]) + DSTORE_PTHREAD_LOCK="1" +fi + # # Ident string # diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_compiler_version.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_compiler_version.m4 index b0a347e27bf..78343b9126d 100644 --- a/opal/mca/pmix/pmix112/pmix/config/pmix_check_compiler_version.m4 +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_compiler_version.m4 @@ -1,7 +1,7 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. dnl dnl $COPYRIGHT$ dnl @@ -35,7 +35,6 @@ AC_DEFUN([PMIX_CHECK_COMPILER], [ AC_TRY_RUN([ #include #include -#include "pmix_portable_platform.h" int main (int argc, char * argv[]) { @@ -68,7 +67,6 @@ AC_DEFUN([PMIX_CHECK_COMPILER_STRINGIFY], [ AC_TRY_RUN([ #include #include -#include "pmix_portable_platform.h" int main (int argc, char * argv[]) { diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_check_lock.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_check_lock.m4 new file mode 100644 index 00000000000..2d5c1ba8646 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_check_lock.m4 @@ -0,0 +1,58 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2017 Mellanox Technologies, Inc. +dnl All rights reserved. +dnl Copyright (c) 2017 IBM Corporation. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_CHECK_DSTOR_LOCK],[ + orig_libs=$LIBS + LIBS="-lpthread $LIBS" + + _x_ac_pthread_lock_found="0" + _x_ac_fcntl_lock_found="0" + + AC_CHECK_MEMBERS([struct flock.l_type], + [ + AC_DEFINE([HAVE_FCNTL_FLOCK], [1], + [Define to 1 if you have the locking by fcntl.]) + _x_ac_fcntl_lock_found="1" + ], [], [#include ]) + + if test "$DSTORE_PTHREAD_LOCK" == "1"; then + AC_CHECK_FUNC([pthread_rwlockattr_setkind_np], + [AC_EGREP_HEADER([PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP], + [pthread.h],[ + AC_DEFINE([HAVE_PTHREAD_SETKIND], [1], + [Define to 1 if you have the `pthread_rwlockattr_setkind_np` function.])])]) + + AC_CHECK_FUNC([pthread_rwlockattr_setpshared], + [AC_EGREP_HEADER([PTHREAD_PROCESS_SHARED], + [pthread.h],[ + AC_DEFINE([HAVE_PTHREAD_SHARED], [1], + [Define to 1 if you have the `PTHREAD_PROCESS_SHARED` definition. + ]) + _x_ac_pthread_lock_found="1" + ]) + ]) + + if test "$_x_ac_pthread_lock_found" == "0"; then + if test "$_x_ac_fcntl_lock_found" == "1"; then + AC_MSG_WARN([dstore: pthread-based locking not found, will use fcntl-based locking.]) + else + AC_MSG_ERROR([dstore: no available locking mechanisms was found. Can not continue. Try disabling dstore]) + fi + fi + else + if test "$_x_ac_fcntl_lock_found" == "0"; then + AC_MSG_ERROR([dstore: no available locking mechanisms was found. Can not continue. Try disabling dstore]) + fi + fi + + LIBS="$orig_libs" +]) diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_functions.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_functions.m4 index df590b4c8ee..5fb6d7a58cd 100644 --- a/opal/mca/pmix/pmix112/pmix/config/pmix_functions.m4 +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_functions.m4 @@ -317,103 +317,6 @@ dnl ####################################################################### dnl ####################################################################### dnl ####################################################################### -# Remove all duplicate -I, -L, and -l flags from the variable named $1 -AC_DEFUN([PMIX_FLAGS_UNIQ],[ - # 1 is the variable name to be uniq-ized - pmix_name=$1 - - # Go through each item in the variable and only keep the unique ones - - pmix_count=0 - for val in ${$1}; do - pmix_done=0 - pmix_i=1 - pmix_found=0 - - # Loop over every token we've seen so far - - pmix_done="`expr $pmix_i \> $pmix_count`" - while test "$pmix_found" = "0" && test "$pmix_done" = "0"; do - - # Have we seen this token already? Prefix the comparison - # with "x" so that "-Lfoo" values won't be cause an error. - - pmix_eval="expr x$val = x\$pmix_array_$pmix_i" - pmix_found=`eval $pmix_eval` - - # Check the ending condition - - pmix_done="`expr $pmix_i \>= $pmix_count`" - - # Increment the counter - - pmix_i="`expr $pmix_i + 1`" - done - - # Check for special cases where we do want to allow repeated - # arguments (per - # http://www.open-mpi.org/community/lists/devel/2012/08/11362.php - # and - # https://github.com/open-mpi/ompi/issues/324). - - case $val in - -Xclang) - pmix_found=0 - pmix_i=`expr $pmix_count + 1` - ;; - -framework) - pmix_found=0 - pmix_i=`expr $pmix_count + 1` - ;; - --param) - pmix_found=0 - pmix_i=`expr $pmix_count + 1` - ;; - esac - - # If we didn't find the token, add it to the "array" - - if test "$pmix_found" = "0"; then - pmix_eval="pmix_array_$pmix_i=$val" - eval $pmix_eval - pmix_count="`expr $pmix_count + 1`" - else - pmix_i="`expr $pmix_i - 1`" - fi - done - - # Take all the items in the "array" and assemble them back into a - # single variable - - pmix_i=1 - pmix_done="`expr $pmix_i \> $pmix_count`" - pmix_newval= - while test "$pmix_done" = "0"; do - pmix_eval="pmix_newval=\"$pmix_newval \$pmix_array_$pmix_i\"" - eval $pmix_eval - - pmix_eval="unset pmix_array_$pmix_i" - eval $pmix_eval - - pmix_done="`expr $pmix_i \>= $pmix_count`" - pmix_i="`expr $pmix_i + 1`" - done - - # Done; do the assignment - - pmix_newval="`echo $pmix_newval`" - pmix_eval="$pmix_name=\"$pmix_newval\"" - eval $pmix_eval - - # Clean up - - unset pmix_name pmix_i pmix_done pmix_newval pmix_eval pmix_count -])dnl - -dnl ####################################################################### -dnl ####################################################################### -dnl ####################################################################### - # PMIX_APPEND_UNIQ(variable, new_argument) # ---------------------------------------- # Append new_argument to variable if not already in variable. This assumes a diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix_setup_cc.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix_setup_cc.m4 index 59b5dedbdd0..5e58631099c 100644 --- a/opal/mca/pmix/pmix112/pmix/config/pmix_setup_cc.m4 +++ b/opal/mca/pmix/pmix112/pmix/config/pmix_setup_cc.m4 @@ -72,7 +72,7 @@ AC_DEFUN([PMIX_SETUP_CC],[ if test "$WANT_DEBUG" = "1" && test "$enable_debug_symbols" != "no" ; then CFLAGS="$CFLAGS -g" - PMIX_FLAGS_UNIQ(CFLAGS) + PMIX_UNIQ(CFLAGS) AC_MSG_WARN([-g has been added to CFLAGS (--enable-debug)]) fi @@ -138,7 +138,7 @@ AC_DEFUN([PMIX_SETUP_CC],[ fi CFLAGS="$CFLAGS_orig $add" - PMIX_FLAGS_UNIQ(CFLAGS) + PMIX_UNIQ(CFLAGS) AC_MSG_WARN([$add has been added to CFLAGS (--enable-picky)]) unset add fi @@ -188,7 +188,7 @@ AC_DEFUN([PMIX_SETUP_CC],[ fi CFLAGS="$CFLAGS_orig$add" - PMIX_FLAGS_UNIQ(CFLAGS) + PMIX_UNIQ(CFLAGS) AC_MSG_WARN([$add has been added to CFLAGS]) unset add fi @@ -217,7 +217,7 @@ AC_DEFUN([PMIX_SETUP_CC],[ fi CFLAGS="${CFLAGS_orig}${add}" - PMIX_FLAGS_UNIQ([CFLAGS]) + PMIX_UNIQ([CFLAGS]) if test "$add" != "" ; then AC_MSG_WARN([$add has been added to CFLAGS]) fi diff --git a/opal/mca/pmix/pmix112/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix112/pmix/src/client/pmi1.c index 2d044c17d5f..c53462c1ee3 100644 --- a/opal/mca/pmix/pmix112/pmix/src/client/pmi1.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmi1.c @@ -202,6 +202,30 @@ PMIX_EXPORT int PMI_KVS_Get( const char kvsname[], const char key[], char value[ pmix_output_verbose(2, pmix_globals.debug_output, "PMI_KVS_Get: KVS=%s, key=%s value=%s", kvsname, key, value); + /* PMI-1 expects resource manager to set + * process mapping in ANL notation. */ + if (!strcmp(key, ANL_MAPPING)) { + /* we are looking in the job-data. If there is nothing there + * we don't want to look in rank's data, thus set rank to widcard */ + proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) && + (NULL != val) && (PMIX_STRING == val->type)) { + strncpy(value, val->data.string, length); + PMIX_VALUE_FREE(val, 1); + return PMI_SUCCESS; + } else { + /* artpol: + * Some RM's (i.e. SLURM) already have ANL precomputed. The export it + * through PMIX_ANL_MAP variable. + * If we haven't found it we want to have our own packing functionality + * since it's common. + * Somebody else has to write it since I've already done that for + * GPL'ed SLURM :) */ + return PMI_FAIL; + } + } + /* retrieve the data from PMIx - since we don't have a rank, * we indicate that by passing the UNDEF value */ (void)strncpy(proc.nspace, kvsname, PMIX_MAX_NSLEN); @@ -344,6 +368,10 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) if (PMIX_SUCCESS == rc) { rc = convert_int(appnum, val); PMIX_VALUE_RELEASE(val); + } else if( PMIX_ERR_NOT_FOUND == rc ){ + /* this is optional value, set to 0 */ + *appnum = 0; + rc = PMIX_SUCCESS; } PMIX_INFO_DESTRUCT(&info[0]); diff --git a/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c index 347c64da575..2b0bcfba5d1 100644 --- a/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c @@ -36,7 +36,7 @@ #include "src/util/output.h" #include "src/include/pmix_globals.h" - +#define ANL_MAPPING "PMI_process_mapping" #define PMI2_CHECK() \ do { \ @@ -501,6 +501,33 @@ PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuel PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); + /* PMI-2 expects resource manager to set + * process mapping in ANL notation. */ + if (!strcmp(name, ANL_MAPPING)) { + /* we are looking in the job-data. If there is nothing there + * we don't want to look in rank's data, thus set rank to widcard */ + proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) && + (NULL != val) && (PMIX_STRING == val->type)) { + strncpy(value, val->data.string, valuelen); + PMIX_VALUE_FREE(val, 1); + *found = 1; + return PMI2_SUCCESS; + } else { + /* artpol: + * Some RM's (i.e. SLURM) already have ANL precomputed. The export it + * through PMIX_ANL_MAP variable. + * If we haven't found it we want to have our own packing functionality + * since it's common. + * Somebody else has to write it since I've already done that for + * GPL'ed SLURM :) */ + *found = 1; + return PMI2_FAIL; + } + } + + *found = 0; rc = PMIx_Get(&proc, name, info, 1, &val); if (PMIX_SUCCESS == rc && NULL != val) { diff --git a/opal/mca/pmix/pmix112/pmix/src/common/pmix_jobdata.c b/opal/mca/pmix/pmix112/pmix/src/common/pmix_jobdata.c index 7fc5057bc7d..e587a7890e9 100644 --- a/opal/mca/pmix/pmix112/pmix/src/common/pmix_jobdata.c +++ b/opal/mca/pmix/pmix112/pmix/src/common/pmix_jobdata.c @@ -329,12 +329,9 @@ static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata) #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (NULL != cb->dstore_fn) { - uint32_t size = (uint32_t)pmix_value_array_get_size(cb->bufs); - for (i = 0; i < size; i++) { - if (PMIX_SUCCESS != (rc = _rank_key_dstore_store(cbdata))) { - PMIX_ERROR_LOG(rc); - goto exit; - } + if (PMIX_SUCCESS != (rc = _rank_key_dstore_store(cbdata))) { + PMIX_ERROR_LOG(rc); + goto exit; } } #endif diff --git a/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.c b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.c index 119ce797fc8..bff054bf160 100644 --- a/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.c +++ b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.c @@ -1,8 +1,11 @@ /* - * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -35,6 +37,14 @@ #include "pmix_dstore.h" #include "pmix_esh.h" +#ifdef ESH_FCNTL_LOCK +#include +#endif + +#ifdef ESH_PTHREAD_LOCK +#include +#endif + static int _esh_init(pmix_info_t info[], size_t ninfo); static int _esh_finalize(void); static int _esh_store(const char *nspace, int rank, pmix_kval_t *kv); @@ -61,9 +71,107 @@ pmix_dstore_base_module_t pmix_dstore_esh_module = { #define ESH_ENV_NS_DATA_SEG_SIZE "NS_DATA_SEG_SIZE" #define ESH_ENV_LINEAR "SM_USE_LINEAR_SEARCH" -#define EXT_SLOT_SIZE(key) (strlen(key) + 1 + 2*sizeof(size_t)) /* in ext slot new offset will be stored in case if new data were added for the same process during next commit */ -#define KVAL_SIZE(key, size) (strlen(key) + 1 + sizeof(size_t) + size) +#define ESH_KEY_SIZE(key, size) \ +__extension__ ({ \ + size_t len = strlen(key) + 1 + sizeof(size_t) + size; \ + len; \ +}) + +/* in ext slot new offset will be stored in case if + * new data were added for the same process during + * next commit + */ +#define EXT_SLOT_SIZE() \ + (ESH_KEY_SIZE(ESH_REGION_EXTENSION, sizeof(size_t))) + + +#define ESH_KV_SIZE(addr) \ +__extension__ ({ \ + size_t sz; \ + memcpy(&sz, addr + ESH_KNAME_LEN(ESH_KNAME_PTR(addr)), \ + sizeof(size_t)); \ + sz += ESH_KNAME_LEN(ESH_KNAME_PTR(addr)) + \ + sizeof(size_t); \ + sz; \ +}) + +#define ESH_KNAME_PTR(addr) \ +__extension__ ({ \ + char *name_ptr = (char *)addr; \ + name_ptr; \ +}) + +#define ESH_KNAME_LEN(key) \ +__extension__ ({ \ + size_t len = strlen((char*)key) + 1; \ + len; \ +}) + +#define ESH_DATA_PTR(addr) \ +__extension__ ({ \ + uint8_t *data_ptr = \ + addr + \ + sizeof(size_t) + \ + ESH_KNAME_LEN(ESH_KNAME_PTR(addr)); \ + data_ptr; \ +}) + +#define ESH_DATA_SIZE(addr) \ +__extension__ ({ \ + size_t data_size; \ + memcpy(&data_size, \ + addr + ESH_KNAME_LEN(ESH_KNAME_PTR(addr)), \ + sizeof(size_t)); \ + data_size; \ +}) +#define ESH_PUT_KEY(addr, key, buffer, size) \ +__extension__ ({ \ + size_t sz = size; \ + memset(addr, 0, ESH_KNAME_LEN(key)); \ + strncpy((char *)addr, key, ESH_KNAME_LEN(key)); \ + memcpy(addr + ESH_KNAME_LEN(key), &sz, sizeof(size_t)); \ + memcpy(addr + ESH_KNAME_LEN(key) + sizeof(size_t), \ + buffer, size); \ +}) + +#ifdef ESH_PTHREAD_LOCK +#define _ESH_LOCK(rwlock, operation) \ +__extension__ ({ \ + pmix_status_t ret = PMIX_SUCCESS; \ + int rc; \ + switch (operation) { \ + case F_WRLCK: \ + rc = pthread_rwlock_wrlock(rwlock); \ + break; \ + case F_RDLCK: \ + rc = pthread_rwlock_rdlock(rwlock); \ + break; \ + case F_UNLCK: \ + rc = pthread_rwlock_unlock(rwlock); \ + break; \ + default: \ + rc = PMIX_ERR_BAD_PARAM; \ + } \ + if (0 != rc) { \ + switch (errno) { \ + case EINVAL: \ + ret = PMIX_ERR_INIT; \ + break; \ + case EPERM: \ + ret = PMIX_ERR_NO_PERMISSIONS; \ + break; \ + } \ + } \ + if (ret) { \ + pmix_output(0, "%s %d:%s lock failed: %s", \ + __FILE__, __LINE__, __func__, strerror(errno)); \ + } \ + ret; \ +}) +#endif + +#ifdef ESH_FCNTL_LOCK #define _ESH_LOCK(lockfd, operation) \ __extension__ ({ \ pmix_status_t ret = PMIX_SUCCESS; \ @@ -101,10 +209,11 @@ __extension__ ({ \ } \ ret; \ }) +#endif -#define _ESH_WRLOCK(lockfd) _ESH_LOCK(lockfd, F_WRLCK) -#define _ESH_RDLOCK(lockfd) _ESH_LOCK(lockfd, F_RDLCK) -#define _ESH_UNLOCK(lockfd) _ESH_LOCK(lockfd, F_UNLCK) +#define _ESH_WRLOCK(lock) _ESH_LOCK(lock, F_WRLCK) +#define _ESH_RDLOCK(lock) _ESH_LOCK(lock, F_RDLCK) +#define _ESH_UNLOCK(lock) _ESH_LOCK(lock, F_UNLCK) #define ESH_INIT_SESSION_TBL_SIZE 2 #define ESH_INIT_NS_MAP_TBL_SIZE 2 @@ -122,7 +231,7 @@ static void _update_initial_segment_info(const ns_map_data_t *ns_map); static void _set_constants_from_env(void); static void _delete_sm_desc(seg_desc_t *desc); static int _pmix_getpagesize(void); -static inline uint32_t _get_univ_size(const char *nspace); +static inline ssize_t _get_univ_size(const char *nspace); static inline ns_map_data_t * _esh_session_map_search_server(const char *nspace); static inline ns_map_data_t * _esh_session_map_search_client(const char *nspace); @@ -143,6 +252,7 @@ static size_t _max_ns_num; static size_t _meta_segment_size = 0; static size_t _max_meta_elems; static size_t _data_segment_size = 0; +static size_t _lock_segment_size = 0; static uid_t _jobuid; static char _setjobuid = 0; @@ -151,15 +261,27 @@ static pmix_value_array_t *_ns_map_array = NULL; static pmix_value_array_t *_ns_track_array = NULL; ns_map_data_t * (*_esh_session_map_search)(const char *nspace) = NULL; +int (*_esh_lock_init)(size_t idx) = NULL; #define _ESH_SESSION_path(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].nspace_path) #define _ESH_SESSION_lockfile(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].lockfile) +#define _ESH_SESSION_setjobuid(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].setjobuid) #define _ESH_SESSION_jobuid(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].jobuid) -#define _ESH_SESSION_lockfd(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].lockfd) #define _ESH_SESSION_sm_seg_first(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].sm_seg_first) #define _ESH_SESSION_sm_seg_last(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].sm_seg_last) #define _ESH_SESSION_ns_info(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].ns_info) +#ifdef ESH_PTHREAD_LOCK +#define _ESH_SESSION_pthread_rwlock(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].rwlock) +#define _ESH_SESSION_pthread_seg(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].rwlock_seg) +#define _ESH_SESSION_lock(tbl_idx) _ESH_SESSION_pthread_rwlock(tbl_idx) +#endif + +#ifdef ESH_FCNTL_LOCK +#define _ESH_SESSION_lockfd(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].lockfd) +#define _ESH_SESSION_lock(tbl_idx) _ESH_SESSION_lockfd(tbl_idx) +#endif + /* If _direct_mode is set, it means that we use linear search * along the array of rank meta info objects inside a meta segment * to find the requested rank. Otherwise, we do a fast lookup @@ -197,6 +319,149 @@ static inline void _esh_session_map_clean(ns_map_t *m) { m->data.track_idx = -1; } +#ifdef ESH_FCNTL_LOCK +static inline int _flock_init(size_t idx) { + pmix_status_t rc = PMIX_SUCCESS; + + if ( _is_server() ) { + _ESH_SESSION_lock(idx) = open(_ESH_SESSION_lockfile(idx), O_CREAT | O_RDWR | O_EXCL, 0600); + + /* if previous launch was crashed, the lockfile might not be deleted and unlocked, + * so we delete it and create a new one. */ + if (_ESH_SESSION_lock(idx) < 0) { + unlink(_ESH_SESSION_lockfile(idx)); + _ESH_SESSION_lock(idx) = open(_ESH_SESSION_lockfile(idx), O_CREAT | O_RDWR, 0600); + if (_ESH_SESSION_lock(idx) < 0) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + if (_ESH_SESSION_setjobuid(idx) > 0) { + if (0 > chown(_ESH_SESSION_lockfile(idx), (uid_t) _ESH_SESSION_jobuid(idx), (gid_t) -1)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + if (0 > chmod(_ESH_SESSION_lockfile(idx), S_IRUSR | S_IWGRP | S_IRGRP)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + } + else { + _ESH_SESSION_lock(idx) = open(_ESH_SESSION_lockfile(idx), O_RDONLY); + if (-1 == _ESH_SESSION_lock(idx)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + return rc; +} +#endif + +#ifdef ESH_PTHREAD_LOCK +static inline int _rwlock_init(size_t idx) { + pmix_status_t rc = PMIX_SUCCESS; + size_t size = _lock_segment_size; + pthread_rwlockattr_t attr; + + if ((NULL != _ESH_SESSION_pthread_seg(idx)) || (NULL != _ESH_SESSION_pthread_rwlock(idx))) { + rc = PMIX_ERR_INIT; + return rc; + } + _ESH_SESSION_pthread_seg(idx) = (pmix_sm_seg_t *)malloc(sizeof(pmix_sm_seg_t)); + if (NULL == _ESH_SESSION_pthread_seg(idx)) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + return rc; + } + + if (_is_server()) { + if (PMIX_SUCCESS != (rc = pmix_sm_segment_create(_ESH_SESSION_pthread_seg(idx), _ESH_SESSION_lockfile(idx), size))) { + return rc; + } + memset(_ESH_SESSION_pthread_seg(idx)->seg_base_addr, 0, size); + if (_ESH_SESSION_setjobuid(idx) > 0) { + if (0 > chown(_ESH_SESSION_lockfile(idx), (uid_t) _ESH_SESSION_jobuid(idx), (gid_t) -1)){ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + /* set the mode as required */ + if (0 > chmod(_ESH_SESSION_lockfile(idx), S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP )) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + _ESH_SESSION_pthread_rwlock(idx) = (pthread_rwlock_t *)_ESH_SESSION_pthread_seg(idx)->seg_base_addr; + + if (0 != pthread_rwlockattr_init(&attr)) { + rc = PMIX_ERR_INIT; + pmix_sm_segment_detach(_ESH_SESSION_pthread_seg(idx)); + return rc; + } + if (0 != pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) { + rc = PMIX_ERR_INIT; + pmix_sm_segment_detach(_ESH_SESSION_pthread_seg(idx)); + pthread_rwlockattr_destroy(&attr); + return rc; + } +#ifdef HAVE_PTHREAD_SETKIND + if (0 != pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP)) { + rc = PMIX_ERR_INIT; + pmix_sm_segment_detach(_ESH_SESSION_pthread_seg(idx)); + pthread_rwlockattr_destroy(&attr); + return rc; + } +#endif + if (0 != pthread_rwlock_init(_ESH_SESSION_pthread_rwlock(idx), &attr)) { + rc = PMIX_ERR_INIT; + pmix_sm_segment_detach(_ESH_SESSION_pthread_seg(idx)); + pthread_rwlockattr_destroy(&attr); + return rc; + } + if (0 != pthread_rwlockattr_destroy(&attr)) { + rc = PMIX_ERR_INIT; + return rc; + } + + } + else { + _ESH_SESSION_pthread_seg(idx)->seg_size = size; + snprintf(_ESH_SESSION_pthread_seg(idx)->seg_name, PMIX_PATH_MAX, "%s", _ESH_SESSION_lockfile(idx)); + if (PMIX_SUCCESS != (rc = pmix_sm_segment_attach(_ESH_SESSION_pthread_seg(idx), PMIX_SM_RW))) { + return rc; + } + _ESH_SESSION_pthread_rwlock(idx) = (pthread_rwlock_t *)_ESH_SESSION_pthread_seg(idx)->seg_base_addr; + } + + return rc; +} + +static inline void _rwlock_release(session_t *s) { + pmix_status_t rc; + + if (0 != pthread_rwlock_destroy(s->rwlock)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return; + } + + /* detach & unlink from current desc */ + if (s->rwlock_seg->seg_cpid == getpid()) { + pmix_sm_segment_unlink(s->rwlock_seg); + } + pmix_sm_segment_detach(s->rwlock_seg); + + free(s->rwlock_seg); + s->rwlock_seg = NULL; + s->rwlock = NULL; +} +#endif + static inline const char *_unique_id(void) { static const char *str = NULL; @@ -527,6 +792,7 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, return rc; } + s->setjobuid = setjobuid; s->jobuid = jobuid; s->nspace_path = strdup(_base_path); @@ -549,31 +815,8 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, return rc; } } - s->lockfd = open(s->lockfile, O_CREAT | O_RDWR | O_EXCL, 0600); - - /* if previous launch was crashed, the lockfile might not be deleted and unlocked, - * so we delete it and create a new one. */ - if (s->lockfd < 0) { - unlink(s->lockfile); - s->lockfd = open(s->lockfile, O_CREAT | O_RDWR, 0600); - if (s->lockfd < 0) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - } - if (setjobuid > 0){ - if (0 > chown(s->nspace_path, (uid_t) jobuid, (gid_t) -1)){ - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - if (0 > chown(s->lockfile, (uid_t) jobuid, (gid_t) -1)) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - if (0 > chmod(s->lockfile, S_IRUSR | S_IWGRP | S_IRGRP)) { + if (s->setjobuid > 0){ + if (0 > chown(s->nspace_path, (uid_t) s->jobuid, (gid_t) -1)){ rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); return rc; @@ -587,12 +830,6 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, } } else { - s->lockfd = open(s->lockfile, O_RDONLY); - if (-1 == s->lockfd) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } seg = _attach_new_segment(INITIAL_SEGMENT, m, 0); if( NULL == seg ){ rc = PMIX_ERR_OUT_OF_RESOURCE; @@ -601,6 +838,16 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, } } + if (NULL == _esh_lock_init) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + return rc; + } + if ( PMIX_SUCCESS != (rc = _esh_lock_init(m->tbl_idx))) { + PMIX_ERROR_LOG(rc); + return rc; + } + s->sm_seg_first = seg; s->sm_seg_last = s->sm_seg_first; return PMIX_SUCCESS; @@ -627,6 +874,9 @@ static inline void _esh_session_release(session_t *s) } free(s->nspace_path); } +#ifdef ESH_PTHREAD_LOCK + _rwlock_release(s); +#endif memset ((char *) s, 0, sizeof(*s)); } @@ -645,6 +895,13 @@ int _esh_init(pmix_info_t info[], size_t ninfo) _jobuid = getuid(); _setjobuid = 0; +#ifdef ESH_PTHREAD_LOCK + _esh_lock_init = _rwlock_init; +#endif +#ifdef ESH_FCNTL_LOCK + _esh_lock_init = _flock_init; +#endif + if (PMIX_SUCCESS != (rc = _esh_tbls_init())) { PMIX_ERROR_LOG(rc); goto err_exit; @@ -830,7 +1087,7 @@ int _esh_store(const char *nspace, int rank, pmix_kval_t *kv) } /* set exclusive lock */ - if (PMIX_SUCCESS != (rc = _ESH_WRLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + if (PMIX_SUCCESS != (rc = _ESH_WRLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { PMIX_ERROR_LOG(rc); return rc; } @@ -880,17 +1137,14 @@ int _esh_store(const char *nspace, int rank, pmix_kval_t *kv) /* Now we know info about meta segment for this namespace. If meta segment * is not empty, then we look for data for the target rank. If they present, replace it. */ - PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); PMIX_CONSTRUCT(&xfer, pmix_buffer_t); PMIX_LOAD_BUFFER(&xfer, kv->value->data.bo.bytes, kv->value->data.bo.size); - pmix_buffer_t *pxfer = &xfer; - pmix_bfrop.pack(&pbkt, &pxfer, 1, PMIX_BUFFER); + + rc = _store_data_for_rank(elem, rank, &xfer); + xfer.base_ptr = NULL; xfer.bytes_used = 0; - - rc = _store_data_for_rank(elem, rank, &pbkt); PMIX_DESTRUCT(&xfer); - PMIX_DESTRUCT(&pbkt); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); @@ -898,14 +1152,14 @@ int _esh_store(const char *nspace, int rank, pmix_kval_t *kv) } /* unset lock */ - if (PMIX_SUCCESS != (rc = _ESH_UNLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + if (PMIX_SUCCESS != (rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { PMIX_ERROR_LOG(rc); } return rc; err_exit: /* unset lock */ - if (PMIX_SUCCESS != (tmp_rc = _ESH_UNLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + if (PMIX_SUCCESS != (tmp_rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { PMIX_ERROR_LOG(tmp_rc); } return rc; @@ -926,7 +1180,7 @@ int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs uint8_t *addr; pmix_buffer_t buffer; pmix_value_t val; - uint32_t nprocs; + size_t nprocs; int cur_rank; ns_map_data_t *ns_map = NULL; bool all_ranks_found = true; @@ -958,7 +1212,12 @@ int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs } if (PMIX_RANK_UNDEF == rank) { - nprocs = _get_univ_size(ns_map->name); + ssize_t _nprocs = _get_univ_size(ns_map->name); + if( 0 > _nprocs ){ + PMIX_ERROR_LOG(rc); + return rc; + } + nprocs = (size_t) _nprocs; cur_rank = -1; } else { nprocs = 1; @@ -966,7 +1225,7 @@ int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs } /* grab shared lock */ - if (PMIX_SUCCESS != (lock_rc = _ESH_RDLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + if (PMIX_SUCCESS != (lock_rc = _ESH_RDLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { /* Something wrong with the lock. The error is fatal */ rc = PMIX_ERROR; PMIX_ERROR_LOG(lock_rc); @@ -1046,8 +1305,8 @@ int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs while (0 < kval_cnt) { /* data is stored in the following format: * key_val_pair { - * char key[PMIX_MAX_KEYLEN+1]; * size_t size; + * char key[KNAME_LEN(addr)]; * byte_t byte[size]; // should be loaded to pmix_buffer_t and unpacked. * }; * segment_format { @@ -1057,18 +1316,16 @@ int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs * EXTENSION slot which has key = EXTENSION_SLOT and a size_t value for offset * to next data address for this process. */ - if (0 == strncmp((const char *)addr, ESH_REGION_INVALIDATED, strlen(ESH_REGION_INVALIDATED)+1)) { + if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_INVALIDATED, ESH_KNAME_LEN(ESH_REGION_INVALIDATED))) { PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, "%s:%d:%s: for rank %s:%d, skip %s region", __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_INVALIDATED)); - /*skip it */ - size_t size; - memcpy(&size, addr + strlen(ESH_REGION_INVALIDATED) + 1, sizeof(size_t)); - /* go to next item, updating address */ - addr += KVAL_SIZE(ESH_REGION_INVALIDATED, size); - } else if (0 == strncmp((const char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1)) { + /* skip it + * go to next item, updating address */ + addr += ESH_KV_SIZE(addr); + } else if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { size_t offset; - memcpy(&offset, addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), sizeof(size_t)); + memcpy(&offset, ESH_DATA_PTR(addr), sizeof(size_t)); PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, "%s:%d:%s: for rank %s:%d, reached %s with %lu value", __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_EXTENSION, offset)); @@ -1088,16 +1345,15 @@ int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs __FILE__, __LINE__, __func__, cur_rank, key)); break; } - } else if (0 == strncmp((const char *)addr, key, strlen(key)+1)) { + } else if (0 == strncmp(ESH_KNAME_PTR(addr), key, ESH_KNAME_LEN(key))) { PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, "%s:%d:%s: for rank %s:%d, found target key %s", __FILE__, __LINE__, __func__, nspace, cur_rank, key)); /* target key is found, get value */ - size_t size; - memcpy(&size, addr + strlen(key) + 1, sizeof(size_t)); - addr += strlen(key) + 1 + sizeof(size_t); + uint8_t *data_ptr = ESH_DATA_PTR(addr); + size_t data_size = ESH_DATA_SIZE(addr); PMIX_CONSTRUCT(&buffer, pmix_buffer_t); - PMIX_LOAD_BUFFER(&buffer, addr, size); + PMIX_LOAD_BUFFER(&buffer, data_ptr, data_size); int cnt = 1; /* unpack value for this key from the buffer. */ PMIX_VALUE_CONSTRUCT(&val); @@ -1116,14 +1372,11 @@ int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs key_found = true; goto done; } else { - char ckey[PMIX_MAX_KEYLEN+1] = {0}; - strncpy(ckey, (const char *)addr, strlen((const char *)addr)+1); - size_t size; - memcpy(&size, addr + strlen(ckey) + 1, sizeof(size_t)); PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %s:%d, skip key %s look for key %s", __FILE__, __LINE__, __func__, nspace, cur_rank, ckey, key)); + "%s:%d:%s: for rank %s:%u, skip key %s look for key %s", + __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_KNAME_PTR(addr), key)); /* go to next item, updating address */ - addr += KVAL_SIZE(ckey, size); + addr += ESH_KV_SIZE(addr); kval_cnt--; } } @@ -1131,7 +1384,7 @@ int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs done: /* unset lock */ - if (PMIX_SUCCESS != (lock_rc = _ESH_UNLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + if (PMIX_SUCCESS != (lock_rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { PMIX_ERROR_LOG(lock_rc); } @@ -1331,6 +1584,7 @@ static void _set_constants_from_env() } } + _lock_segment_size = page_size; _max_ns_num = (_initial_segment_size - sizeof(size_t) * 2) / sizeof(ns_seg_info_t); _max_meta_elems = (_meta_segment_size - sizeof(size_t)) / sizeof(rank_meta_info); @@ -1407,9 +1661,9 @@ static seg_desc_t *_create_new_segment(segment_type type, const ns_map_data_t *n } memset(new_seg->seg_info.seg_base_addr, 0, size); - if (_setjobuid > 0){ + if (_ESH_SESSION_setjobuid(ns_map->tbl_idx) > 0){ rc = PMIX_ERR_NO_PERMISSIONS; - if (0 > chown(file_name, (uid_t) _jobuid, (gid_t) -1)){ + if (0 > chown(file_name, (uid_t) _ESH_SESSION_jobuid(ns_map->tbl_idx), (gid_t) -1)){ PMIX_ERROR_LOG(rc); goto err_exit; } @@ -1897,23 +2151,19 @@ static size_t get_free_offset(seg_desc_t *data_seg) static int put_empty_ext_slot(seg_desc_t *dataseg) { - size_t global_offset, rel_offset, data_ended, sz, val; + size_t global_offset, rel_offset, data_ended, val = 0; uint8_t *addr; global_offset = get_free_offset(dataseg); rel_offset = global_offset % _data_segment_size; - if (rel_offset + EXT_SLOT_SIZE(ESH_REGION_EXTENSION) > _data_segment_size) { + if (rel_offset + EXT_SLOT_SIZE() > _data_segment_size) { PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; } addr = _get_data_region_by_offset(dataseg, global_offset); - strncpy((char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1); - val = 0; - sz = sizeof(size_t); - memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1, &sz, sz); - memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), &val, sz); + ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&val, sizeof(size_t)); /* update offset at the beginning of current segment */ - data_ended = rel_offset + EXT_SLOT_SIZE(ESH_REGION_EXTENSION); + data_ended = rel_offset + EXT_SLOT_SIZE(); addr = (uint8_t*)(addr - rel_offset); memcpy(addr, &data_ended, sizeof(size_t)); return PMIX_SUCCESS; @@ -1921,12 +2171,10 @@ static int put_empty_ext_slot(seg_desc_t *dataseg) static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, char *key, void *buffer, size_t size) { - size_t offset; + size_t offset, id = 0; seg_desc_t *tmp; - int id = 0; size_t global_offset, data_ended; uint8_t *addr; - size_t sz; PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, "%s:%d:%s: key %s", @@ -1940,16 +2188,25 @@ static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, global_offset = get_free_offset(dataseg); offset = global_offset % _data_segment_size; - /* We should provide additional space at the end of segment to place EXTENSION_SLOT to have an ability to enlarge data for this rank.*/ - if (sizeof(size_t) + KVAL_SIZE(key, size) + EXT_SLOT_SIZE(key) > _data_segment_size) { + /* We should provide additional space at the end of segment to + * place EXTENSION_SLOT to have an ability to enlarge data for this rank.*/ + if ((sizeof(size_t) + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) { /* this is an error case: segment is so small that cannot place evem a single key-value pair. * warn a user about it and fail. */ offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ pmix_output(0, "PLEASE set NS_DATA_SEG_SIZE to value which is larger when %lu.", - sizeof(size_t) + strlen(key) + 1 + sizeof(size_t) + size + EXT_SLOT_SIZE(key)); + sizeof(size_t) + strlen(key) + 1 + sizeof(size_t) + size + EXT_SLOT_SIZE()); return offset; } - if (offset + KVAL_SIZE(key, size) + EXT_SLOT_SIZE(key) > _data_segment_size) { + + /* check the corner case that was observed at large scales: + * https://github.com/pmix/master/pull/282#issuecomment-277454198 + * + * if last time we stopped exactly on the border of the segment + * new segment wasn't allocated to us but (global_offset % _data_segment_size) == 0 + * so if offset is 0 here - we need to allocate the segment as well + */ + if ( (0 == offset) || ( (offset + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) ) { id++; /* create a new data segment. */ tmp = extend_segment(tmp, &ns_info->ns_map); @@ -1971,16 +2228,13 @@ static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, } global_offset = offset + id * _data_segment_size; addr = (uint8_t*)(tmp->seg_info.seg_base_addr)+offset; - strncpy((char *)addr, key, strlen(key)+1); - sz = size; - memcpy(addr + strlen(key) + 1, &sz, sizeof(size_t)); - memcpy(addr + strlen(key) + 1 + sizeof(size_t), buffer, size); + ESH_PUT_KEY(addr, key, buffer, size); /* update offset at the beginning of current segment */ - data_ended = offset + KVAL_SIZE(key, size); + data_ended = offset + ESH_KEY_SIZE(key, size); addr = (uint8_t*)(tmp->seg_info.seg_base_addr); memcpy(addr, &data_ended, sizeof(size_t)); - PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, + PMIX_OUTPUT_VERBOSE((1, pmix_globals.debug_output, "%s:%d:%s: key %s, rel start offset %lu, rel end offset %lu, abs shift %lu size %lu", __FILE__, __LINE__, __func__, key, offset, data_ended, id * _data_segment_size, size)); return global_offset; @@ -2029,11 +2283,8 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, int rank, pmix_kval_t *kval, * It should be equal in the normal case. It it's not true, then it means that * segment was extended, and we put data to the next segment, so we now need to * put extension slot at the end of previous segment with a "reference" to a new_offset */ - size_t sz = sizeof(size_t); addr = _get_data_region_by_offset(datadesc, free_offset); - strncpy((char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1); - memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1, &sz, sizeof(size_t)); - memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), &offset, sizeof(size_t)); + ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); } if (NULL == *rinfo) { *rinfo = (rank_meta_info*)malloc(sizeof(rank_meta_info)); @@ -2059,15 +2310,15 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, int rank, pmix_kval_t *kval, int add_to_the_end = 1; while (0 < kval_cnt) { /* data is stored in the following format: - * key[PMIX_MAX_KEYLEN+1] * size_t size + * key[ESH_KNAME_LEN(addr)] * byte buffer containing pmix_value, should be loaded to pmix_buffer_t and unpacked. * next kval pair * ..... * extension slot which has key = EXTENSION_SLOT and a size_t value for offset to next data address for this process. */ - if (0 == strncmp((const char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1)) { - memcpy(&offset, addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), sizeof(size_t)); + if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { + memcpy(&offset, ESH_DATA_PTR(addr), sizeof(size_t)); if (0 < offset) { PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, "%s:%d:%s: for rank %d, replace flag %d %s is filled with %lu value", @@ -2082,22 +2333,24 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, int rank, pmix_kval_t *kval, } else { /* should not be, we should be out of cycle when this happens */ } - } else if (0 == strncmp((const char *)addr, kval->key, strlen(kval->key)+1)) { + } else if (0 == strncmp(ESH_KNAME_PTR(addr), kval->key, ESH_KNAME_LEN(kval->key))) { PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, "%s:%d:%s: for rank %d, replace flag %d found target key %s", __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); /* target key is found, compare value sizes */ - size_t cur_size; - memcpy(&cur_size, addr + strlen(kval->key) + 1, sizeof(size_t)); - if (cur_size != size) { - //if (1) { /* if we want to test replacing values for existing keys. */ + if (ESH_DATA_SIZE(addr) != size) { + /* changing current key name to INVALIDATED, changing the size of data to retain the offset. */ + size_t old_data_size = ESH_DATA_SIZE(addr); + int diff_size = ESH_KNAME_LEN(addr) - ESH_KNAME_LEN(ESH_REGION_INVALIDATED); + size_t new_size = old_data_size + diff_size; + /* invalidate current value and store another one at the end of data region. */ - strncpy((char *)addr, ESH_REGION_INVALIDATED, strlen(ESH_REGION_INVALIDATED)+1); + ESH_PUT_KEY(addr, ESH_REGION_INVALIDATED, addr, new_size); /* decrementing count, it will be incremented back when we add a new value for this key at the end of region. */ (*rinfo)->count--; kval_cnt--; /* go to next item, updating address */ - addr += KVAL_SIZE(ESH_REGION_INVALIDATED, cur_size); + addr += ESH_KV_SIZE(addr); PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, "%s:%d:%s: for rank %d, replace flag %d mark key %s regions as invalidated. put new data at the end.", __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); @@ -2106,38 +2359,31 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, int rank, pmix_kval_t *kval, "%s:%d:%s: for rank %d, replace flag %d replace data for key %s type %d in place", __FILE__, __LINE__, __func__, rank, data_exist, kval->key, kval->value->type)); /* replace old data with new one. */ - addr += strlen(kval->key) + 1; - memcpy(addr, &size, sizeof(size_t)); - addr += sizeof(size_t); - memset(addr, 0, cur_size); - memcpy(addr, buffer->base_ptr, size); - addr += cur_size; + memset(ESH_DATA_PTR(addr), 0, ESH_DATA_SIZE(addr)); + memcpy(ESH_DATA_PTR(addr), buffer->base_ptr, size); + addr += ESH_KV_SIZE(addr); add_to_the_end = 0; break; } } else { - char ckey[PMIX_MAX_KEYLEN+1] = {0}; - strncpy(ckey, (const char *)addr, strlen(addr)+1); PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %d, replace flag %d skip %s key, look for %s key", - __FILE__, __LINE__, __func__, rank, data_exist, ckey, kval->key)); + "%s:%d:%s: for rank %u, replace flag %d skip %s key, look for %s key", + __FILE__, __LINE__, __func__, rank, data_exist, ESH_KNAME_PTR(addr), kval->key)); /* Skip it: key is "INVALIDATED" or key is valid but different from target one. */ - if (0 != strncmp(ESH_REGION_INVALIDATED, ckey, strlen(ckey)+1)) { + if (0 != strncmp(ESH_REGION_INVALIDATED, ESH_KNAME_PTR(addr), ESH_KNAME_LEN(ESH_KNAME_PTR(addr)))) { /* count only valid items */ kval_cnt--; } - size_t size; - memcpy(&size, addr + strlen(ckey) + 1, sizeof(size_t)); /* go to next item, updating address */ - addr += KVAL_SIZE(ckey, size); + addr += ESH_KV_SIZE(addr); } } if (1 == add_to_the_end) { /* if we get here, it means that we want to add a new item for the target rank, or * we mark existing item with the same key as "invalidated" and want to add new item * for the same key. */ - (*rinfo)->count++; size_t free_offset; + (*rinfo)->count++; free_offset = get_free_offset(datadesc); /* add to the end */ offset = put_data_to_the_end(ns_info, datadesc, kval->key, buffer->base_ptr, size); @@ -2151,11 +2397,11 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, int rank, pmix_kval_t *kval, * data for different ranks, and that's why next element is EXTENSION_SLOT. * We put new data to the end of data region and just update EXTENSION_SLOT value by new offset. */ - if (0 == strncmp((const char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1)) { + if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, "%s:%d:%s: for rank %d, replace flag %d %s should be filled with offset %lu value", __FILE__, __LINE__, __func__, rank, data_exist, ESH_REGION_EXTENSION, offset)); - memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), &offset, sizeof(size_t)); + memcpy(ESH_DATA_PTR(addr), &offset, sizeof(size_t)); } else { /* (2) - we point to the first free offset, no more data is stored further in this segment. * There is no EXTENSION_SLOT by this addr since we continue pushing data for the same rank, @@ -2165,11 +2411,7 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, int rank, pmix_kval_t *kval, * forcibly and store new offset in its value. */ if (free_offset != offset) { /* segment was extended, need to put extension slot by free_offset indicating new_offset */ - size_t sz = sizeof(size_t); - size_t length = strlen(ESH_REGION_EXTENSION); - strncpy((char *)addr, ESH_REGION_EXTENSION, length + 1); - memcpy(addr + length + 1, &sz, sz); - memcpy(addr + length + 1 + sizeof(size_t), &offset, sz); + ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); } } PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, @@ -2186,9 +2428,7 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, int rank, pmix_kval_t *kval, static int _store_data_for_rank(ns_track_elem_t *ns_info, int rank, pmix_buffer_t *buf) { pmix_status_t rc; - int32_t cnt; - pmix_buffer_t *bptr; pmix_kval_t *kp; seg_desc_t *metadesc, *datadesc; @@ -2223,30 +2463,20 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, int rank, pmix_buffer_ * so unpack these buffers, and then unpack kvals from each modex buffer, * storing them in the shared memory dstore. */ - cnt = 1; free_offset = get_free_offset(datadesc); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) { - cnt = 1; - kp = PMIX_NEW(pmix_kval_t); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, kp, &cnt, PMIX_KVAL))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: unpacked key %s", kp->key); - if (PMIX_SUCCESS != (rc = pmix_sm_store(ns_info, rank, kp, &rinfo, data_exist))) { - PMIX_ERROR_LOG(rc); - return rc; - } - PMIX_RELEASE(kp); // maintain acctg - hash_store does a retain - cnt = 1; - kp = PMIX_NEW(pmix_kval_t); - } - cnt = 1; - PMIX_RELEASE(kp); - PMIX_RELEASE(bptr); // free's the data region - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + kp = PMIX_NEW(pmix_kval_t); + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, kp, &(int){1}, PMIX_KVAL))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: unpacked key %s", kp->key); + if (PMIX_SUCCESS != (rc = pmix_sm_store(ns_info, rank, kp, &rinfo, data_exist))) { PMIX_ERROR_LOG(rc); - break; + return rc; } + PMIX_RELEASE(kp); // maintain acctg - hash_store does a retain + kp = PMIX_NEW(pmix_kval_t); } + PMIX_RELEASE(kp); + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); /* TODO: should we error-exit here? */ @@ -2269,7 +2499,7 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, int rank, pmix_buffer_ * */ rc = put_empty_ext_slot(ns_info->data_seg); if (PMIX_SUCCESS != rc) { - if (NULL != rinfo) { + if ((0 == data_exist) && NULL != rinfo) { free(rinfo); } PMIX_ERROR_LOG(rc); @@ -2289,26 +2519,23 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, int rank, pmix_buffer_ return rc; } -static inline uint32_t _get_univ_size(const char *nspace) +static inline ssize_t _get_univ_size(const char *nspace) { - pmix_value_t *val = NULL; - uint32_t nprocs = 0; - pmix_nspace_t *ns, *nptr; - - nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(nspace, ns->nspace)) { - nptr = ns; - break; - } - } + ssize_t nprocs = 0; + pmix_value_t *val; + int rc; - if (nptr && (PMIX_SUCCESS == pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, PMIX_UNIV_SIZE, &val))) { - if (val->type == PMIX_UINT32) { - nprocs = val->data.uint32; - } - PMIX_VALUE_RELEASE(val); + rc = _esh_fetch(nspace, PMIX_RANK_WILDCARD, PMIX_UNIV_SIZE, &val); + if( PMIX_SUCCESS != rc ) { + PMIX_ERROR_LOG(rc); + return rc; } - + if( val->type != PMIX_UINT32 ){ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + nprocs = (ssize_t)val->data.uint32; + PMIX_VALUE_RELEASE(val); return nprocs; } diff --git a/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.h b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.h index 47ad97103c0..64c809d0536 100644 --- a/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.h +++ b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.h @@ -24,6 +24,14 @@ BEGIN_C_DECLS #define PMIX_DSTORE_ESH_BASE_PATH "PMIX_DSTORE_ESH_BASE_PATH" +#ifdef HAVE_PTHREAD_SHARED +#define ESH_PTHREAD_LOCK +#elif defined HAVE_FCNTL_FLOCK +#define ESH_FCNTL_LOCK +#else +#error No locking mechanism was found +#endif + /* this structs are used to store information about * shared segments addresses locally at each process, * so they are common for different types of segments @@ -51,8 +59,13 @@ typedef struct ns_map_s ns_map_t; struct session_s { int in_use; uid_t jobuid; + char setjobuid; char *nspace_path; char *lockfile; +#ifdef ESH_PTHREAD_LOCK + pmix_sm_seg_t *rwlock_seg; + pthread_rwlock_t *rwlock; +#endif int lockfd; seg_desc_t *sm_seg_first; seg_desc_t *sm_seg_last; diff --git a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c index e2a3c198120..ace475674a0 100644 --- a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c @@ -233,7 +233,14 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) // may provide the user with a proper help... *Cough*, *Cough* OSX... if ((strlen(tdir) + strlen(pmix_pid) + 1) > sizeof(myaddress.sun_path)-1) { free(pmix_pid); - return PMIX_ERR_INVALID_LENGTH; + /* we don't have show-help in this version, so pretty-print something + * the hard way */ + fprintf(stderr, "PMIx has detected a temporary directory name that results\n"); + fprintf(stderr, "in a path that is too long for the Unix domain socket:\n\n"); + fprintf(stderr, " Temp dir: %s\n\n", tdir); + fprintf(stderr, "Try setting your TMPDIR environmental variable to point to\n"); + fprintf(stderr, "something shorter in length\n"); + return PMIX_ERR_SILENT; // return a silent error so our host knows we printed a message } snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/%s", tdir, pmix_pid); free(pmix_pid); diff --git a/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.c b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.c index a3c89afc885..bc58a4d8942 100644 --- a/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.c +++ b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2015-2016 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,6 +18,7 @@ #include #include #include +#include #include #include "src/include/pmix_globals.h" @@ -58,12 +61,25 @@ int _mmap_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t si goto out; } /* size backing file - note the use of real_size here */ +#ifdef HAVE_POSIX_FALLOCATE + if (0 != posix_fallocate(sm_seg->seg_id, 0, size)) { + pmix_output_verbose(2, pmix_globals.debug_output, + "sys call posix_fallocate(2) fail\n"); + if (ENOSPC == errno) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + } else { + rc = PMIX_ERROR; + } + goto out; + } +#else if (0 != ftruncate(sm_seg->seg_id, size)) { pmix_output_verbose(2, pmix_globals.debug_output, "sys call ftruncate(2) fail\n"); rc = PMIX_ERROR; goto out; } +#endif if (MAP_FAILED == (seg_addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, sm_seg->seg_id, 0))) { diff --git a/opal/mca/pmix/pmix112/pmix/test/Makefile.am b/opal/mca/pmix/pmix112/pmix/test/Makefile.am index 9dbd63eedeb..5712033ee4d 100644 --- a/opal/mca/pmix/pmix112/pmix/test/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/test/Makefile.am @@ -25,7 +25,9 @@ if !WANT_HIDDEN SUBDIRS = simple endif -headers = test_common.h cli_stages.h server_callbacks.h utils.h test_fence.h test_publish.h test_spawn.h test_cd.h test_resolve_peers.h test_error.h +headers = test_common.h cli_stages.h server_callbacks.h utils.h test_fence.h \ + test_publish.h test_spawn.h test_cd.h test_resolve_peers.h test_error.h \ + test_replace.h test_internal.h AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/src/api @@ -54,7 +56,8 @@ pmi2_client_LDADD = \ $(top_builddir)/libpmix.la pmix_client_SOURCES = $(headers) \ - pmix_client.c test_fence.c test_common.c test_publish.c test_spawn.c test_cd.c test_resolve_peers.c test_error.c + pmix_client.c test_fence.c test_common.c test_publish.c test_spawn.c \ + test_cd.c test_resolve_peers.c test_error.c test_replace.c test_internal.c pmix_client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmix_client_LDADD = \ $(top_builddir)/libpmix.la diff --git a/opal/mca/pmix/pmix112/pmix/test/pmix_client.c b/opal/mca/pmix/pmix112/pmix/test/pmix_client.c index b9c648e1b08..2a18f4d7d78 100644 --- a/opal/mca/pmix/pmix112/pmix/test/pmix_client.c +++ b/opal/mca/pmix/pmix112/pmix/test/pmix_client.c @@ -14,7 +14,7 @@ * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2015-2017 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,7 +39,8 @@ #include "test_cd.h" #include "test_resolve_peers.h" #include "test_error.h" - +#include "test_replace.h" +#include "test_internal.h" static void errhandler(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, @@ -188,6 +189,24 @@ int main(int argc, char **argv) } } + if (NULL != params.key_replace) { + rc = test_replace(myproc.nspace, myproc.rank, params); + if (PMIX_SUCCESS != rc) { + FREE_TEST_PARAMS(params); + TEST_ERROR(("%s:%d error key replace test failed: %d", myproc.nspace, myproc.rank, rc)); + exit(0); + } + } + + if (params.test_internal) { + rc = test_internal(myproc.nspace, myproc.rank, params); + if (PMIX_SUCCESS != rc) { + FREE_TEST_PARAMS(params); + TEST_ERROR(("%s:%d error key store internal test failed: %d", myproc.nspace, myproc.rank, rc)); + exit(0); + } + } + TEST_VERBOSE(("Client ns %s rank %d: PASSED", myproc.nspace, myproc.rank)); PMIx_Deregister_errhandler(1, op_callbk, NULL); diff --git a/opal/mca/pmix/pmix112/pmix/test/test_common.c b/opal/mca/pmix/pmix112/pmix/test/test_common.c index 83197ec2345..d78ff1973db 100644 --- a/opal/mca/pmix/pmix112/pmix/test/test_common.c +++ b/opal/mca/pmix/pmix112/pmix/test/test_common.c @@ -2,7 +2,7 @@ * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. + * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -18,6 +18,7 @@ #include "test_common.h" #include #include +#include int pmix_test_verbose = 0; @@ -76,7 +77,9 @@ void parse_cmd(int argc, char **argv, test_params *params) fprintf(stderr, "\t--test-spawn test spawn api.\n"); fprintf(stderr, "\t--test-connect test connect/disconnect api.\n"); fprintf(stderr, "\t--test-resolve-peers test resolve_peers api.\n"); - fprintf(stderr, "t--test-error test error handling api.\n"); + fprintf(stderr, "\t--test-error test error handling api.\n"); + fprintf(stderr, "\t--test-replace N:k0,k1,...,k(N-1) test key replace for N keys, k0,k1,k(N-1) - key indexes to replace \n"); + fprintf(stderr, "\t--test-internal N test store internal key, N - number of internal keys\n"); exit(0); } else if (0 == strcmp(argv[i], "--exec") || 0 == strcmp(argv[i], "-e")) { i++; @@ -169,6 +172,24 @@ void parse_cmd(int argc, char **argv, test_params *params) params->test_resolve_peers = 1; } else if( 0 == strcmp(argv[i], "--test-error") ){ params->test_error = 1; + } else if(0 == strcmp(argv[i], "--test-replace") ) { + i++; + if (NULL != argv[i] && (*argv[i] != '-')) { + params->key_replace = strdup(argv[i]); + if (0 != parse_replace(params->key_replace, 0, NULL)) { + fprintf(stderr, "Incorrect --test-replace option format: %s\n", params->key_replace); + exit(1); + } + } else { + params->key_replace = strdup(TEST_REPLACE_DEFAULT); + } + } else if(0 == strcmp(argv[i], "--test-internal")) { + i++; + if ((NULL != argv[i]) && (*argv[i] != '-')) { + params->test_internal = strtol(argv[i], NULL, 10); + } else { + params->test_internal = 1; + } } else { @@ -252,11 +273,16 @@ PMIX_CLASS_INSTANCE(participant_t, pmix_list_item_t, NULL, NULL); +PMIX_CLASS_INSTANCE(key_replace_t, + pmix_list_item_t, + NULL, NULL); + static int ns_id = -1; static fence_desc_t *fdesc = NULL; pmix_list_t *participants = NULL; pmix_list_t test_fences; pmix_list_t *noise_range = NULL; +pmix_list_t key_replace; #define CHECK_STRTOL_VAL(val, str, store) do { \ if (0 == val) { \ @@ -481,6 +507,67 @@ int parse_noise(char *noise_param, int store) return ret; } +static int is_digit(const char *str) +{ + if (NULL == str) + return 0; + + while (0 != *str) { + if (!isdigit(*str)) { + return 0; + } + else { + str++; + } + } + return 1; +} + +int parse_replace(char *replace_param, int store, int *key_num) { + int ret = 0; + char *tmp = strdup(replace_param); + char tmp_str[32]; + char * pch, *ech; + key_replace_t *item; + int cnt = 0; + + if (NULL == replace_param) { + free(tmp); + return 1; + } + + pch = strchr(tmp, ':'); + snprintf(tmp_str, pch - tmp + 1, "%s", tmp); + cnt = atol(tmp_str); + + if (NULL != key_num) { + *key_num = cnt; + } + + while(NULL != pch) { + pch++; + ech = strchr(pch, ','); + if (NULL != ech || (strlen(pch) > 0)) { + snprintf(tmp_str, ech - pch + 1, "%s", pch); + if ((0 == is_digit(tmp_str)) || ((atoi(tmp_str) + 1) > cnt)) { + ret = 1; + break; + } + pch = ech; + if (store) { + item = PMIX_NEW(key_replace_t); + item->key_idx = atoi(tmp_str); + pmix_list_append(&key_replace, &item->super); + } + } else { + ret = 1; + break; + } + } + free(tmp); + return ret; +} + int get_total_ns_number(test_params params) { int num = 0; diff --git a/opal/mca/pmix/pmix112/pmix/test/test_common.h b/opal/mca/pmix/pmix112/pmix/test/test_common.h index f98f3acbfaa..a55bc90254f 100644 --- a/opal/mca/pmix/pmix112/pmix/test/test_common.h +++ b/opal/mca/pmix/pmix112/pmix/test/test_common.h @@ -4,7 +4,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. + * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -25,6 +25,7 @@ #include #include #include +#include #include "src/class/pmix_list.h" #include "src/util/argv.h" @@ -73,6 +74,7 @@ extern FILE *file; #define TEST_DEFAULT_TIMEOUT 10 #define MAX_DIGIT_LEN 10 +#define TEST_REPLACE_DEFAULT "3:1" #define TEST_SET_FILE(prefix, ns_id, rank) { \ char *fname = malloc( strlen(prefix) + MAX_DIGIT_LEN + 2 ); \ @@ -117,6 +119,8 @@ typedef struct { int test_connect; int test_resolve_peers; int test_error; + char *key_replace; + int test_internal; } test_params; #define INIT_TEST_PARAMS(params) do { \ @@ -145,6 +149,8 @@ typedef struct { params.noise = NULL; \ params.ns_dist = NULL; \ params.test_error = 0; \ + params.key_replace = NULL; \ + params.test_internal = 0; \ } while (0) #define FREE_TEST_PARAMS(params) do { \ @@ -174,6 +180,7 @@ typedef struct { void parse_cmd(int argc, char **argv, test_params *params); int parse_fence(char *fence_param, int store); int parse_noise(char *noise_param, int store); +int parse_replace(char *replace_param, int store, int *key_num); typedef struct { pmix_list_item_t super; @@ -189,11 +196,140 @@ typedef struct { } participant_t; PMIX_CLASS_DECLARATION(participant_t); +typedef struct { + pmix_list_item_t super; + int key_idx; +} key_replace_t; +PMIX_CLASS_DECLARATION(key_replace_t); + extern pmix_list_t test_fences; extern pmix_list_t *noise_range; +extern pmix_list_t key_replace; #define NODE_NAME "node1" int get_total_ns_number(test_params params); int get_all_ranks_from_namespace(test_params params, char *nspace, pmix_proc_t **ranks, size_t *nranks); +typedef struct { + int in_progress; + pmix_value_t *kv; + int status; +} get_cbdata; + +#define SET_KEY(key, fence_num, ind, use_same_keys) do { \ + if (use_same_keys) { \ + (void)snprintf(key, sizeof(key)-1, "key-%d", ind); \ + } else { \ + (void)snprintf(key, sizeof(key)-1, "key-f%d:%d", fence_num, ind); \ + } \ +} while (0) + +#define PUT(dtype, data, flag, fence_num, ind, use_same_keys) do { \ + char key[50]; \ + pmix_value_t value; \ + SET_KEY(key, fence_num, ind, use_same_keys); \ + PMIX_VAL_SET(&value, dtype, data); \ + TEST_VERBOSE(("%s:%d put key %s", my_nspace, my_rank, key)); \ + if (PMIX_SUCCESS != (rc = PMIx_Put(flag, key, &value))) { \ + TEST_ERROR(("%s:%d: PMIx_Put key %s failed: %d", my_nspace, my_rank, key, rc)); \ + rc = PMIX_ERROR; \ + } \ + PMIX_VALUE_DESTRUCT(&value); \ +} while (0) + +#define GET(dtype, data, ns, r, fence_num, ind, use_same_keys, blocking, ok_notfnd) do { \ + char key[50]; \ + pmix_value_t *val; \ + get_cbdata cbdata; \ + cbdata.status = PMIX_SUCCESS; \ + pmix_proc_t foobar; \ + SET_KEY(key, fence_num, ind, use_same_keys); \ + (void)strncpy(foobar.nspace, ns, PMIX_MAX_NSLEN); \ + foobar.rank = r; \ + TEST_VERBOSE(("%s:%d want to get from %s:%d key %s", my_nspace, my_rank, ns, r, key)); \ + if (blocking) { \ + if (PMIX_SUCCESS != (rc = PMIx_Get(&foobar, key, NULL, 0, &val))) { \ + if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \ + TEST_ERROR(("%s:%d: PMIx_Get failed: %d from %s:%d, key %s", my_nspace, my_rank, rc, ns, r, key)); \ + } \ + rc = PMIX_ERROR; \ + } \ + } else { \ + int count; \ + cbdata.in_progress = 1; \ + PMIX_VALUE_CREATE(val, 1); \ + cbdata.kv = val; \ + if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&foobar, key, NULL, 0, get_cb, (void*)&cbdata))) { \ + TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", my_nspace, my_rank, rc, ns, r, key)); \ + rc = PMIX_ERROR; \ + } else { \ + count = 0; \ + while(cbdata.in_progress){ \ + struct timespec ts; \ + ts.tv_sec = 0; \ + ts.tv_nsec = 100; \ + nanosleep(&ts,NULL); \ + count++; \ + } \ + } \ + } \ + if (PMIX_SUCCESS == rc) { \ + if( PMIX_SUCCESS != cbdata.status ){ \ + if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \ + TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \ + my_nspace, my_rank, rc, my_nspace, r)); \ + } \ + rc = PMIX_ERROR; \ + } else if (NULL == val) { \ + TEST_VERBOSE(("%s:%d: PMIx_Get returned NULL value", my_nspace, my_rank)); \ + rc = PMIX_ERROR; \ + } \ + else if (val->type != PMIX_VAL_TYPE_ ## dtype || PMIX_VAL_CMP(dtype, PMIX_VAL_FIELD_ ## dtype((val)), data)) { \ + TEST_VERBOSE(("%s:%u: from %s:%d Key %s value or type mismatch," \ + " want type %d get type %d", \ + my_nspace, my_rank, ns, r, key, PMIX_VAL_TYPE_ ## dtype, val->type)); \ + rc = PMIX_ERROR; \ + } \ + } \ + if (PMIX_SUCCESS == rc) { \ + TEST_VERBOSE(("%s:%d: GET OF %s from %s:%d SUCCEEDED", my_nspace, my_rank, key, ns, r)); \ + PMIX_VALUE_RELEASE(val); \ + } \ +} while (0) + +#define FENCE(blocking, data_ex, pcs, nprocs) do { \ + if( blocking ){ \ + pmix_info_t *info = NULL; \ + size_t ninfo = 0; \ + if (data_ex) { \ + bool value = 1; \ + PMIX_INFO_CREATE(info, 1); \ + (void)strncpy(info->key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); \ + pmix_value_load(&info->value, &value, PMIX_BOOL); \ + ninfo = 1; \ + } \ + rc = PMIx_Fence(pcs, nprocs, info, ninfo); \ + PMIX_INFO_FREE(info, ninfo); \ + } else { \ + int in_progress = 1, count; \ + rc = PMIx_Fence_nb(pcs, nprocs, NULL, 0, release_cb, &in_progress); \ + if ( PMIX_SUCCESS == rc ) { \ + count = 0; \ + while( in_progress ){ \ + struct timespec ts; \ + ts.tv_sec = 0; \ + ts.tv_nsec = 100; \ + nanosleep(&ts,NULL); \ + count++; \ + } \ + TEST_VERBOSE(("PMIx_Fence_nb(barrier,collect): free time: %lfs", \ + count*100*1E-9)); \ + } \ + } \ + if (PMIX_SUCCESS == rc) { \ + TEST_VERBOSE(("%s:%d: Fence successfully completed", \ + my_nspace, my_rank)); \ + } \ +} while (0) + #endif // TEST_COMMON_H diff --git a/opal/mca/pmix/pmix112/pmix/test/test_fence.c b/opal/mca/pmix/pmix112/pmix/test/test_fence.c index 78a545a651b..37315f99b55 100644 --- a/opal/mca/pmix/pmix112/pmix/test/test_fence.c +++ b/opal/mca/pmix/pmix112/pmix/test/test_fence.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. + * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -13,18 +13,6 @@ #include "test_fence.h" #include "src/buffer_ops/buffer_ops.h" -static void release_cb(pmix_status_t status, void *cbdata) -{ - int *ptr = (int*)cbdata; - *ptr = 0; -} - -typedef struct { - int in_progress; - pmix_value_t *kv; - int status; -} get_cbdata; - static void get_cb(pmix_status_t status, pmix_value_t *kv, void *cbdata) { get_cbdata *cb = (get_cbdata*)cbdata; @@ -58,121 +46,11 @@ static void add_noise(char *noise_param, char *my_nspace, int my_rank) } } -#define SET_KEY(key, fence_num, ind, use_same_keys) do { \ - if (use_same_keys) { \ - (void)snprintf(key, sizeof(key)-1, "key-%d", ind); \ - } else { \ - (void)snprintf(key, sizeof(key)-1, "key-f%d:%d", fence_num, ind); \ - } \ -} while (0) - -#define PUT(dtype, data, flag, fence_num, ind, use_same_keys) do { \ - char key[50]; \ - pmix_value_t value; \ - SET_KEY(key, fence_num, ind, use_same_keys); \ - PMIX_VAL_SET(&value, dtype, data); \ - TEST_VERBOSE(("%s:%d put key %s", my_nspace, my_rank, key)); \ - if (PMIX_SUCCESS != (rc = PMIx_Put(flag, key, &value))) { \ - TEST_ERROR(("%s:%d: PMIx_Put key %s failed: %d", my_nspace, my_rank, key, rc)); \ - rc = PMIX_ERROR; \ - } \ - PMIX_VALUE_DESTRUCT(&value); \ -} while (0) - -#define GET(dtype, data, ns, r, fence_num, ind, use_same_keys, blocking, ok_notfnd) do { \ - char key[50]; \ - pmix_value_t *val; \ - get_cbdata cbdata; \ - cbdata.status = PMIX_SUCCESS; \ - pmix_proc_t foobar; \ - SET_KEY(key, fence_num, ind, use_same_keys); \ - (void)strncpy(foobar.nspace, ns, PMIX_MAX_NSLEN); \ - foobar.rank = r; \ - TEST_VERBOSE(("%s:%d want to get from %s:%d key %s", my_nspace, my_rank, ns, r, key)); \ - if (blocking) { \ - if (PMIX_SUCCESS != (rc = PMIx_Get(&foobar, key, NULL, 0, &val))) { \ - if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \ - TEST_ERROR(("%s:%d: PMIx_Get failed: %d from %s:%d, key %s", my_nspace, my_rank, rc, ns, r, key)); \ - } \ - rc = PMIX_ERROR; \ - } \ - } else { \ - int count; \ - cbdata.in_progress = 1; \ - PMIX_VALUE_CREATE(val, 1); \ - cbdata.kv = val; \ - if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&foobar, key, NULL, 0, get_cb, (void*)&cbdata))) { \ - TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", my_nspace, my_rank, rc, ns, r, key)); \ - rc = PMIX_ERROR; \ - } else { \ - count = 0; \ - while(cbdata.in_progress){ \ - struct timespec ts; \ - ts.tv_sec = 0; \ - ts.tv_nsec = 100; \ - nanosleep(&ts,NULL); \ - count++; \ - } \ - } \ - } \ - if (PMIX_SUCCESS == rc) { \ - if( PMIX_SUCCESS != cbdata.status ){ \ - if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \ - TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \ - my_nspace, my_rank, rc, my_nspace, r)); \ - } \ - rc = PMIX_ERROR; \ - } else if (NULL == val) { \ - TEST_VERBOSE(("%s:%d: PMIx_Get returned NULL value", my_nspace, my_rank)); \ - rc = PMIX_ERROR; \ - } \ - else if (val->type != PMIX_VAL_TYPE_ ## dtype || PMIX_VAL_CMP(dtype, PMIX_VAL_FIELD_ ## dtype((val)), data)) { \ - TEST_VERBOSE(("%s:%d: from %s:%d Key %s value or type mismatch," \ - " want type %d get type %d", \ - my_nspace, my_rank, ns, r, key, PMIX_VAL_TYPE_ ## dtype, val->type)); \ - rc = PMIX_ERROR; \ - } \ - } \ - if (PMIX_SUCCESS == rc) { \ - TEST_VERBOSE(("%s:%d: GET OF %s from %s:%d SUCCEEDED", my_nspace, my_rank, key, ns, r)); \ - PMIX_VALUE_RELEASE(val); \ - } \ -} while (0) - -#define FENCE(blocking, data_ex, pcs, nprocs) do { \ - if( blocking ){ \ - pmix_info_t *info = NULL; \ - size_t ninfo = 0; \ - if (data_ex) { \ - bool value = 1; \ - PMIX_INFO_CREATE(info, 1); \ - (void)strncpy(info->key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); \ - pmix_value_load(&info->value, &value, PMIX_BOOL); \ - ninfo = 1; \ - } \ - rc = PMIx_Fence(pcs, nprocs, info, ninfo); \ - PMIX_INFO_FREE(info, ninfo); \ - } else { \ - int in_progress = 1, count; \ - rc = PMIx_Fence_nb(pcs, nprocs, NULL, 0, release_cb, &in_progress); \ - if ( PMIX_SUCCESS == rc ) { \ - count = 0; \ - while( in_progress ){ \ - struct timespec ts; \ - ts.tv_sec = 0; \ - ts.tv_nsec = 100; \ - nanosleep(&ts,NULL); \ - count++; \ - } \ - TEST_VERBOSE(("PMIx_Fence_nb(barrier,collect): free time: %lfs", \ - count*100*1E-9)); \ - } \ - } \ - if (PMIX_SUCCESS == rc) { \ - TEST_VERBOSE(("%s:%d: Fence successfully completed", \ - my_nspace, my_rank)); \ - } \ -} while (0) +static void release_cb(pmix_status_t status, void *cbdata) +{ + int *ptr = (int*)cbdata; + *ptr = 0; +} int test_fence(test_params params, char *my_nspace, int my_rank) { @@ -194,6 +72,8 @@ int test_fence(test_params params, char *my_nspace, int my_rank) PMIX_CONSTRUCT(&test_fences, pmix_list_t); parse_fence(params.fences, 1); + TEST_VERBOSE(("fences %s\n", params.fences)); + /* cycle thru all the test fence descriptors to find * those that include my nspace/rank */ PMIX_LIST_FOREACH(desc, &test_fences, fence_desc_t) { diff --git a/opal/mca/pmix/pmix112/pmix/test/test_internal.c b/opal/mca/pmix/pmix112/pmix/test/test_internal.c new file mode 100644 index 00000000000..ce9aa4bc788 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/test/test_internal.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "test_internal.h" + +static void release_cb(pmix_status_t status, void *cbdata) +{ + int *ptr = (int*)cbdata; + *ptr = 0; +} + +static void get_cb(pmix_status_t status, pmix_value_t *kv, void *cbdata) +{ + get_cbdata *cb = (get_cbdata*)cbdata; + if (PMIX_SUCCESS == status) { + pmix_value_xfer(cb->kv, kv); + } + cb->in_progress = 0; + cb->status = status; +} + +int test_internal(char *my_nspace, int my_rank, test_params params) { + int idx; + char sval[PMIX_MAX_NSLEN]; + char key[PMIX_MAX_KEYLEN]; + pmix_value_t value; + pmix_proc_t proc; + pmix_status_t rc; + + PMIX_PROC_CONSTRUCT(&proc); + (void)strncpy(proc.nspace, my_nspace, PMIX_MAX_NSLEN); + proc.rank = my_rank; + + for (idx = 0; idx < params.test_internal; idx++) { + memset(sval, 0, PMIX_MAX_NSLEN); + sprintf(sval, "test_internal:%s:%d:%d", my_nspace, my_rank, idx); + + SET_KEY(key, 0, idx, 1); + value.type = PMIX_STRING; + value.data.string = sval; + if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&proc, key, &value))) { + TEST_ERROR(("%s:%d: PMIx_Store_internal failed: %d", my_nspace, my_rank, rc)); + PMIX_PROC_DESTRUCT(&proc); + return PMIX_ERROR; + } + } + + /* Submit the data */ + if (PMIX_SUCCESS != (rc = PMIx_Commit())) { + TEST_ERROR(("%s:%d: PMIx_Commit failed: %d", my_nspace, my_rank, rc)); + PMIX_LIST_DESTRUCT(&key_replace); + PMIX_PROC_DESTRUCT(&proc); + return PMIX_ERROR; + } + + proc.rank = PMIX_RANK_WILDCARD; + FENCE(1, 1, (&proc), 1); + if (PMIX_SUCCESS != rc) { + TEST_ERROR(("%s:%d: PMIx_Fence failed: %d", my_nspace, my_rank, rc)); + PMIX_LIST_DESTRUCT(&key_replace); + PMIX_PROC_DESTRUCT(&proc); + return rc; + } + + for (idx = 0; idx < params.test_internal; idx++) { + memset(sval, 0, PMIX_MAX_NSLEN); + sprintf(sval, "test_internal:%s:%d:%d", my_nspace, my_rank, idx); + + GET(string, sval, my_nspace, my_rank, 0, idx, 1, 1, 0); + if (PMIX_SUCCESS != rc) { + TEST_ERROR(("%s:%d: PMIx_Get of remote key on local proc", my_nspace, my_rank)); + PMIX_PROC_DESTRUCT(&proc); + return PMIX_ERROR; + } + } + + PMIX_PROC_DESTRUCT(&proc); + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix112/pmix/test/test_internal.h b/opal/mca/pmix/pmix112/pmix/test/test_internal.h new file mode 100644 index 00000000000..25bd146d584 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/test/test_internal.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2017 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include +#include + +#include "test_common.h" + +int test_internal(char *my_nspace, int my_rank, test_params params); diff --git a/opal/mca/pmix/pmix112/pmix/test/test_replace.c b/opal/mca/pmix/pmix112/pmix/test/test_replace.c new file mode 100644 index 00000000000..bdede6e76ff --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/test/test_replace.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2017 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "test_replace.h" + +static void release_cb(pmix_status_t status, void *cbdata) +{ + int *ptr = (int*)cbdata; + *ptr = 0; +} + +static void get_cb(pmix_status_t status, pmix_value_t *kv, void *cbdata) +{ + get_cbdata *cb = (get_cbdata*)cbdata; + if (PMIX_SUCCESS == status) { + pmix_value_xfer(cb->kv, kv); + } + cb->in_progress = 0; + cb->status = status; +} + +static int key_is_replace(int key_idx) { + key_replace_t *item; + + PMIX_LIST_FOREACH(item, &key_replace, key_replace_t) { + if (item->key_idx == key_idx) + return 1; + } + return 0; +} + +int test_replace(char *my_nspace, int my_rank, test_params params) { + int key_idx = 0; + int key_cnt = 0; + char sval[PMIX_MAX_NSLEN]; + pmix_proc_t proc; + pmix_status_t rc; + key_replace_t *item; + + PMIX_CONSTRUCT(&key_replace, pmix_list_t); + parse_replace(params.key_replace, 1, &key_cnt); + + for (key_idx = 0; key_idx < key_cnt; key_idx++) { + memset(sval, 0, PMIX_MAX_NSLEN); + sprintf(sval, "test_replace:%s:%d:%d", my_nspace, my_rank, key_idx); + + PUT(string, sval, PMIX_GLOBAL, 0, key_idx, 1); + if (PMIX_SUCCESS != rc) { + TEST_ERROR(("%s:%d: PMIx_Put failed: %d", my_nspace, my_rank, rc)); + PMIX_LIST_DESTRUCT(&key_replace); + return rc; + } + } + + PMIX_PROC_CONSTRUCT(&proc); + (void)strncpy(proc.nspace, my_nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + + /* Submit the data */ + if (PMIX_SUCCESS != (rc = PMIx_Commit())) { + TEST_ERROR(("%s:%d: PMIx_Commit failed: %d", my_nspace, my_rank, rc)); + PMIX_LIST_DESTRUCT(&key_replace); + PMIX_PROC_DESTRUCT(&proc); + return PMIX_ERROR; + } + + FENCE(1, 1, (&proc), 1); + if (PMIX_SUCCESS != rc) { + TEST_ERROR(("%s:%d: PMIx_Fence failed: %d", my_nspace, my_rank, rc)); + PMIX_LIST_DESTRUCT(&key_replace); + PMIX_PROC_DESTRUCT(&proc); + return rc; + } + + PMIX_LIST_FOREACH(item, &key_replace, key_replace_t) { + memset(sval, 0, PMIX_MAX_NSLEN); + sprintf(sval, "test_replace:%s:%d:%d: replaced key", my_nspace, my_rank, item->key_idx); + + PUT(string, sval, PMIX_GLOBAL, 0, item->key_idx, 1); + if (PMIX_SUCCESS != rc) { + TEST_ERROR(("%s:%d: PMIx_Put failed: %d", my_nspace, my_rank, rc)); + PMIX_LIST_DESTRUCT(&key_replace); + PMIX_PROC_DESTRUCT(&proc); + return rc; + } + } + + + /* Submit the data */ + if (PMIX_SUCCESS != (rc = PMIx_Commit())) { + TEST_ERROR(("%s:%d: PMIx_Commit failed: %d", my_nspace, my_rank, rc)); + PMIX_LIST_DESTRUCT(&key_replace); + PMIX_PROC_DESTRUCT(&proc); + return PMIX_ERROR; + } + + FENCE(1, 1, (&proc), 1); + if (PMIX_SUCCESS != rc) { + TEST_ERROR(("%s:%d: PMIx_Fence failed: %d", my_nspace, my_rank, rc)); + PMIX_LIST_DESTRUCT(&key_replace); + PMIX_PROC_DESTRUCT(&proc); + return rc; + } + + for (key_idx = 0; key_idx < key_cnt; key_idx++) { + memset(sval, 0, PMIX_MAX_NSLEN); + + if (key_is_replace(key_idx)) { + sprintf(sval, "test_replace:%s:%d:%d: replaced key", my_nspace, my_rank, key_idx); + } else { + sprintf(sval, "test_replace:%s:%d:%d", my_nspace, my_rank, key_idx); + } + + + GET(string, sval, my_nspace, my_rank, 0, key_idx, 1, 1, 0); + if (PMIX_SUCCESS != rc) { + TEST_ERROR(("%s:%d: PMIx_Get of remote key on local proc", my_nspace, my_rank)); + PMIX_LIST_DESTRUCT(&key_replace); + PMIX_PROC_DESTRUCT(&proc); + return PMIX_ERROR; + } + } + + PMIX_LIST_DESTRUCT(&key_replace); + PMIX_PROC_DESTRUCT(&proc); + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix112/pmix/test/test_replace.h b/opal/mca/pmix/pmix112/pmix/test/test_replace.h new file mode 100644 index 00000000000..5af9d2ed5ca --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/test/test_replace.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2017 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include +#include + +#include "test_common.h" + +int test_replace(char *my_nspace, int my_rank, test_params params); diff --git a/opal/mca/pmix/pmix112/pmix/test/utils.c b/opal/mca/pmix/pmix112/pmix/test/utils.c index 87c7e6ad9f4..22f60d32787 100644 --- a/opal/mca/pmix/pmix112/pmix/test/utils.c +++ b/opal/mca/pmix/pmix112/pmix/test/utils.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. + * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -153,6 +153,16 @@ void set_client_argv(test_params *params, char ***argv) if (params->test_error) { pmix_argv_append_nosize(argv, "--test-error"); } + if (params->key_replace) { + pmix_argv_append_nosize(argv, "--test-replace"); + pmix_argv_append_nosize(argv, params->key_replace); + } + if (params->test_internal) { + char tmp[32]; + sprintf(tmp, "%d", params->test_internal); + pmix_argv_append_nosize(argv, "--test-internal"); + pmix_argv_append_nosize(argv, tmp); + } } int launch_clients(int num_procs, char *binary, char *** client_env, char ***base_argv)