diff --git a/.gitignore b/.gitignore index a07b9c927e7..d9de74cae8f 100644 --- a/.gitignore +++ b/.gitignore @@ -199,6 +199,8 @@ ompi/mca/rte/orte/mpirun.1 ompi/mca/sharedfp/addproc/mca_sharedfp_addproc_control +ompi/mca/topo/treematch/config.h + ompi/mpi/c/profile/p*.c ompi/mpi/fortran/configure-fortran-output.h @@ -228,6 +230,52 @@ ompi/mpi/fortran/use-mpi-tkr/mpi_kinds.ompi_module ompi/mpi/fortran/use-mpi-tkr/mpi-tkr-sizeof.f90 ompi/mpi/fortran/use-mpi-tkr/mpi-tkr-sizeof.h +ompi/mpiext/pcollreq/c/profile/pallgather_init.c +ompi/mpiext/pcollreq/c/profile/pallgatherv_init.c +ompi/mpiext/pcollreq/c/profile/pallreduce_init.c +ompi/mpiext/pcollreq/c/profile/palltoall_init.c +ompi/mpiext/pcollreq/c/profile/palltoallv_init.c +ompi/mpiext/pcollreq/c/profile/palltoallw_init.c +ompi/mpiext/pcollreq/c/profile/pbarrier_init.c +ompi/mpiext/pcollreq/c/profile/pbcast_init.c +ompi/mpiext/pcollreq/c/profile/pexscan_init.c +ompi/mpiext/pcollreq/c/profile/pgather_init.c +ompi/mpiext/pcollreq/c/profile/pgatherv_init.c +ompi/mpiext/pcollreq/c/profile/pmpiext_pcollreq_c.h +ompi/mpiext/pcollreq/c/profile/pneighbor_allgather_init.c +ompi/mpiext/pcollreq/c/profile/pneighbor_allgatherv_init.c +ompi/mpiext/pcollreq/c/profile/pneighbor_alltoall_init.c +ompi/mpiext/pcollreq/c/profile/pneighbor_alltoallv_init.c +ompi/mpiext/pcollreq/c/profile/pneighbor_alltoallw_init.c +ompi/mpiext/pcollreq/c/profile/preduce_init.c +ompi/mpiext/pcollreq/c/profile/preduce_scatter_block_init.c +ompi/mpiext/pcollreq/c/profile/preduce_scatter_init.c +ompi/mpiext/pcollreq/c/profile/pscan_init.c +ompi/mpiext/pcollreq/c/profile/pscatter_init.c +ompi/mpiext/pcollreq/c/profile/pscatterv_init.c +ompi/mpiext/pcollreq/mpif-h/profile/pallgather_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pallgatherv_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pallreduce_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/palltoall_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/palltoallv_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/palltoallw_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pbarrier_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pbcast_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pexscan_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pgather_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pgatherv_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_allgather_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_allgatherv_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_alltoall_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_alltoallv_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_alltoallw_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/preduce_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/preduce_scatter_block_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/preduce_scatter_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pscan_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pscatter_init_f.c +ompi/mpiext/pcollreq/mpif-h/profile/pscatterv_init_f.c + ompi/mpi/java/java/mpi ompi/mpi/java/java/*.jar ompi/mpi/java/java/*.h @@ -319,6 +367,8 @@ opal/mca/hwloc/base/static-components.h.new.struct opal/mca/installdirs/config/install_dirs.h +!opal/mca/pmix/pmix*/pmix/AUTHORS +!opal/mca/pmix/pmix*/pmix/contrib/perf_tools/Makefile opal/mca/pmix/pmix*/pmix/include/pmix/autogen/config.h opal/mca/pmix/pmix*/pmix/include/pmix/autogen/config.h.in opal/mca/pmix/pmix*/pmix/src/include/private/autogen/config.h.in @@ -351,6 +401,24 @@ opal/mca/pmix/ext3x/ext3x_client.c opal/mca/pmix/ext3x/ext3x_component.c opal/mca/pmix/ext3x/ext3x_server_north.c opal/mca/pmix/ext3x/ext3x_server_south.c +opal/mca/pmix/pmix3x/pmix/config/mca_library_paths.txt +opal/mca/pmix/pmix3x/pmix/config/test-driver +opal/mca/pmix/pmix3x/pmix/test/run_tests00.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests01.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests02.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests03.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests04.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests05.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests06.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests07.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests08.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests09.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests10.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests11.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests12.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests13.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests14.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests15.pl opal/tools/opal-checkpoint/opal-checkpoint opal/tools/opal-checkpoint/opal-checkpoint.1 diff --git a/LICENSE b/LICENSE index c835765b580..29b02918cee 100644 --- a/LICENSE +++ b/LICENSE @@ -8,24 +8,24 @@ corresponding files. Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. -Copyright (c) 2004-2017 The University of Tennessee and The University +Copyright (c) 2004-2019 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. -Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, +Copyright (c) 2004-2019 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2008 The Regents of the University of California. All rights reserved. -Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights +Copyright (c) 2006-2018 Los Alamos National Security, LLC. All rights reserved. -Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2019 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006-2010 Voltaire, Inc. All rights reserved. -Copyright (c) 2006-2017 Sandia National Laboratories. All rights reserved. +Copyright (c) 2006-2018 Sandia National Laboratories. All rights reserved. Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. -Copyright (c) 2006-2017 The University of Houston. All rights reserved. +Copyright (c) 2006-2019 The University of Houston. All rights reserved. Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. -Copyright (c) 2007-2017 UT-Battelle, LLC. All rights reserved. -Copyright (c) 2007-2017 IBM Corporation. All rights reserved. +Copyright (c) 2007-2018 UT-Battelle, LLC. All rights reserved. +Copyright (c) 2007-2019 IBM Corporation. All rights reserved. Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing Centre, Federal Republic of Germany Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany @@ -35,7 +35,7 @@ Copyright (c) 2008-2009 Institut National de Recherche en Informatique. All rights reserved. Copyright (c) 2007 Lawrence Livermore National Security, LLC. All rights reserved. -Copyright (c) 2007-2017 Mellanox Technologies. All rights reserved. +Copyright (c) 2007-2019 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. Copyright (c) 2008-2017 Oak Ridge National Labs. All rights reserved. Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. @@ -45,15 +45,17 @@ Copyright (c) 2016 ARM, Inc. All rights reserved. Copyright (c) 2010-2011 Alex Brick . All rights reserved. Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights reserved. -Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +Copyright (c) 2013-2019 Intel, Inc. All rights reserved. Copyright (c) 2011-2017 NVIDIA Corporation. All rights reserved. Copyright (c) 2016 Broadcom Limited. All rights reserved. -Copyright (c) 2011-2017 Fujitsu Limited. All rights reserved. +Copyright (c) 2011-2019 Fujitsu Limited. All rights reserved. Copyright (c) 2014-2015 Hewlett-Packard Development Company, LP. All rights reserved. -Copyright (c) 2013-2017 Research Organization for Information Science (RIST). +Copyright (c) 2013-2019 Research Organization for Information Science (RIST). All rights reserved. -Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights +Copyright (c) 2017-2018 Amazon.com, Inc. or its affiliates. All Rights + reserved. +Copyright (c) 2019 Triad National Security, LLC. All rights reserved. $COPYRIGHT$ diff --git a/Makefile.ompi-rules b/Makefile.ompi-rules index 2a9cb2b7b5b..d7e1041ca9f 100644 --- a/Makefile.ompi-rules +++ b/Makefile.ompi-rules @@ -1,5 +1,5 @@ # -*- makefile -*- -# Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # $COPYRIGHT$ # @@ -20,7 +20,7 @@ if ! MAN_PAGE_BUILD_USEMPIF08_BINDINGS endif .1in.1: - $(OMPI_V_GEN) $(top_srcdir)/ompi/mpi/man/make_manpage.pl \ + $(OMPI_V_GEN) $(top_srcdir)/config/make_manpage.pl \ --package-name='@PACKAGE_NAME@' \ --package-version='@PACKAGE_VERSION@' \ --ompi-date='@OMPI_RELEASE_DATE@' \ @@ -30,7 +30,7 @@ endif --output=$@ .3in.3: - $(OMPI_V_GEN) $(top_srcdir)/ompi/mpi/man/make_manpage.pl \ + $(OMPI_V_GEN) $(top_srcdir)/config/make_manpage.pl \ --package-name='@PACKAGE_NAME@' \ --package-version='@PACKAGE_VERSION@' \ --ompi-date='@OMPI_RELEASE_DATE@' \ @@ -41,7 +41,7 @@ endif --output=$@ .7in.7: - $(OMPI_V_GEN) $(top_srcdir)/ompi/mpi/man/make_manpage.pl \ + $(OMPI_V_GEN) $(top_srcdir)/config/make_manpage.pl \ --package-name='@PACKAGE_NAME@' \ --package-version='@PACKAGE_VERSION@' \ --ompi-date='@OMPI_RELEASE_DATE@' \ diff --git a/NEWS b/NEWS index 2d823536c55..b2f4789136a 100644 --- a/NEWS +++ b/NEWS @@ -12,9 +12,9 @@ Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006 Voltaire, Inc. All rights reserved. Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. -Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights +Copyright (c) 2006-2018 Los Alamos National Security, LLC. All rights reserved. -Copyright (c) 2010-2017 IBM Corporation. All rights reserved. +Copyright (c) 2010-2019 IBM Corporation. All rights reserved. Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. Copyright (c) 2012 Sandia National Laboratories. All rights reserved. Copyright (c) 2012 University of Houston. All rights reserved. @@ -22,6 +22,8 @@ Copyright (c) 2013 NVIDIA Corporation. All rights reserved. Copyright (c) 2013-2018 Intel, Inc. All rights reserved. Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. +Copyright (c) 2019 Triad National Security, LLC. All rights + reserved. $COPYRIGHT$ Additional copyrights may follow @@ -55,30 +57,202 @@ included in the vX.Y.Z section and be denoted as: (** also appeared: A.B.C) -- indicating that this item was previously included in release version vA.B.C. -Master (not on release branches yet) ------------------------------------- - -********************************************************************** -* PRE-DEPRECATION WARNING: MPIR Support -* -* As was announced in summer 2017, Open MPI is deprecating support for -* MPIR-based tools beginning with the future release of OMPI v5.0, with -* full removal of that support tentatively planned for OMPI v6.0. -* -* This serves as a pre-deprecation warning to provide tools developers -* with sufficient time to migrate to PMIx. Support for PMIx-based -* tools will be rolled out during the OMPI v4.x series. No runtime -* deprecation warnings will be output during this time. -* -* Runtime deprecation warnings will be output beginning with the OMPI v5.0 -* release whenever MPIR-based tools connect to Open MPI's mpirun/mpiexec -* launcher. -********************************************************************** +4.0.2 -- September, 2019 +------------------------ +- Update embedded PMIx to 3.1.4 +- Enhance Open MPI to detect when processes are running in + different name spaces on the same node, in which case the + vader CMA single copy mechanism is disabled. Thanks + to Adrian Reber for reporting and providing a fix. +- Fix an issue with ORTE job tree launch mechanism. Thanks + to @lanyangyang for reporting. +- Fix an issue with env processing when running as root. + Thanks to Simon Byrne for reporting and providing a fix. +- Fix Fortran MPI_FILE_GET_POSITION return code bug. + Thanks to Wei-Keng Liao for reporting. +- Fix user defined datatypes/ops leak in nonblocking base collective + component. Thanks to Andrey Maslennikov for verifying fix. +- Fixed shared memory not working with spawned processes. + Thanks to @rodarima for reporting. +- Fix data corruption of overlapping datatypes on sends. + Thanks to DKRZ for reporting. +- Fix segfault in oob_tcp component on close with active listeners. + Thanks to Orivej Desh for reporting and providing a fix. +- Fix divide by zero segfault in ompio. + Thanks to @haraldkl for reporting and providing a fix. +- Fix finalize of flux compnents. + Thanks to Stephen Herbein and Jim Garlick for providing a fix. +- Fix osc_rdma_acc_single_intrinsic regression. + Thanks to Joseph Schuchart for reporting and providing a fix. +- Fix hostnames with large integers. + Thanks to @perrynzhou for reporting and providing a fix. +- Fix Deadlock in MPI_Fetch_and_op when using UCX + Thanks to Joseph Schuchart for reporting. +- Fix the SLURM plm for mpirun-based launching. + Thanks to Jordon Hayes for reporting and providing a fix. +- Prevent grep failure in rpmbuild from aborting. + Thanks to Daniel Letai for reporting. +- Fix btl/vader finalize sequence. + Thanks to Daniel Vollmer for reporting. +- Fix pml/ob1 local handle sent during PUT control message. + Thanks to @EmmanuelBRELLE for reporting and providing a fix. +- Fix Memory leak with persistent MPI sends and the ob1 "get" protocol. + Thanks to @s-kuberski for reporting. +- v4.0.x: mpi: mark MPI_COMBINER_{HVECTOR,HINDEXED,STRUCT}_INTEGER + removed unless configured with --enable-mpi1-compatibility +- Fix make-authors.pl when run in a git submodule. + Thanks to Michael Heinz for reporting and providing a fix. +- Fix deadlock with mpi_assert_allow_overtaking in MPI_Issend. + Thanks to Joseph Schuchart and George Bosilca for reporting. +- Add compilation flag to allow unwinding through files that are + present in the stack when attaching with MPIR. + Thanks to James A Clark for reporting and providing a fix. -- Fix rank-by algorithms to properly rank by object and span -- Do not build Open SHMEM layer when there are no SPMLs available. - Currently, this means the Open SHMEM layer will only build if +Known issues: + +- There is a known issue with the OFI libfabric and PSM2 MTLs when trying to send + very long (> 4 GBytes) messages. In this release, these MTLs will catch + this case and abort the transfer. A future release will provide a + better solution to this issue. + +4.0.1 -- March, 2019 +-------------------- + +- Update embedded PMIx to 3.1.2. +- Fix an issue with Vader (shared-memory) transport on OS-X. Thanks + to Daniel Vollmer for reporting. +- Fix a problem with the usNIC BTL Makefile. Thanks to George Marselis + for reporting. +- Fix an issue when using --enable-visibility configure option + and older versions of hwloc. Thanks to Ben Menadue for reporting + and providing a fix. +- Fix an issue with MPI_WIN_CREATE_DYNAMIC and MPI_GET from self. + Thanks to Bart Janssens for reporting. +- Fix an issue of excessive compiler warning messages from mpi.h + when using newer C++ compilers. Thanks to @Shadow-fax for + reporting. +- Fix a problem when building Open MPI using clang 5.0. +- Fix a problem with MPI_WIN_CREATE when using UCX. Thanks + to Adam Simpson for reporting. +- Fix a memory leak encountered for certain MPI datatype + destructor operations. Thanks to Axel Huebl for reporting. +- Fix several problems with MPI RMA accumulate operations. + Thanks to Jeff Hammond for reporting. +- Fix possible race condition in closing some file descriptors + during job launch using mpirun. Thanks to Jason Williams + for reporting and providing a fix. +- Fix a problem in OMPIO for large individual write operations. + Thanks to Axel Huebl for reporting. +- Fix a problem with parsing of map-by ppr options to mpirun. + Thanks to David Rich for reporting. +- Fix a problem observed when using the mpool hugepage component. Thanks + to Hunter Easterday for reporting and fixing. +- Fix valgrind warning generated when invoking certain MPI Fortran + data type creation functions. Thanks to @rtoijala for reporting. +- Fix a problem when trying to build with a PMIX 3.1 or newer + release. Thanks to Alastair McKinstry for reporting. +- Fix a problem encountered with building MPI F08 module files. + Thanks to Igor Andriyash and Axel Huebl for reporting. +- Fix two memory leaks encountered for certain MPI-RMA usage patterns. + Thanks to Joseph Schuchart for reporting and fixing. +- Fix a problem with the ORTE rmaps_base_oversubscribe MCA paramater. + Thanks to @iassiour for reporting. +- Fix a problem with UCX PML default error handler for MPI communicators. + Thanks to Marcin Krotkiewski for reporting. +- Fix various issues with OMPIO uncovered by the testmpio test suite. + +4.0.0 -- September, 2018 +------------------------ + +- OSHMEM updated to the OpenSHMEM 1.4 API. +- Do not build OpenSHMEM layer when there are no SPMLs available. + Currently, this means the OpenSHMEM layer will only build if a MXM or UCX library is found. +- A UCX BTL was added for enhanced MPI RMA support using UCX +- With this release, OpenIB BTL now only supports iWarp and RoCE by default. +- Updated internal HWLOC to 2.0.2 +- Updated internal PMIx to 3.0.2 +- Change the priority for selecting external verses internal HWLOC + and PMIx packages to build. Starting with this release, configure + by default selects available external HWLOC and PMIx packages over + the internal ones. +- Updated internal ROMIO to 3.2.1. +- Removed support for the MXM MTL. +- Removed support for SCIF. +- Improved CUDA support when using UCX. +- Enable use of CUDA allocated buffers for OMPIO. +- Improved support for two phase MPI I/O operations when using OMPIO. +- Added support for Software-based Performance Counters, see + https://github.com/davideberius/ompi/wiki/How-to-Use-Software-Based-Performance-Counters-(SPCs)-in-Open-MPI +- Change MTL OFI from opting-IN on "psm,psm2,gni" to opting-OUT on + "shm,sockets,tcp,udp,rstream" +- Various improvements to MPI RMA performance when using RDMA + capable interconnects. +- Update memkind component to use the memkind 1.6 public API. +- Fix a problem with javadoc builds using OpenJDK 11. Thanks to + Siegmar Gross for reporting. +- Fix a memory leak using UCX. Thanks to Charles Taylor for reporting. +- Fix hangs in MPI_FINALIZE when using UCX. +- Fix a problem with building Open MPI using an external PMIx 2.1.2 + library. Thanks to Marcin Krotkiewski for reporting. +- Fix race conditions in Vader (shared memory) transport. +- Fix problems with use of newer map-by mpirun options. Thanks to + Tony Reina for reporting. +- Fix rank-by algorithms to properly rank by object and span +- Allow for running as root of two environment variables are set. + Requested by Axel Huebl. +- Fix a problem with building the Java bindings when using Java 10. + Thanks to Bryce Glover for reporting. +- Fix a problem with ORTE not reporting error messages if an application + terminated normally but exited with non-zero error code. Thanks to + Emre Brookes for reporting. + +3.1.2 -- August, 2018 +------------------------ + +- A subtle race condition bug was discovered in the "vader" BTL + (shared memory communications) that, in rare instances, can cause + MPI processes to crash or incorrectly classify (or effectively drop) + an MPI message sent via shared memory. If you are using the "ob1" + PML with "vader" for shared memory communication (note that vader is + the default for shared memory communication with ob1), you need to + upgrade to v3.1.2 or later to fix this issue. You may also upgrade + to the following versions to fix this issue: + - Open MPI v2.1.5 (expected end of August, 2018) or later in the + v2.1.x series + - Open MPI v3.0.1 (released March, 2018) or later in the v3.0.x + series +- Assorted Portals 4.0 bug fixes. +- Fix for possible data corruption in MPI_BSEND. +- Move shared memory file for vader btl into /dev/shm on Linux. +- Fix for MPI_ISCATTER/MPI_ISCATTERV Fortran interfaces with MPI_IN_PLACE. +- Upgrade PMIx to v2.1.3. +- Numerous One-sided bug fixes. +- Fix for race condition in uGNI BTL. +- Improve handling of large number of interfaces with TCP BTL. +- Numerous UCX bug fixes. + +3.1.1 -- June, 2018 +------------------- + +- Fix potential hang in UCX PML during MPI_FINALIZE +- Update internal PMIx to v2.1.2rc2 to fix forward version compatibility. +- Add new MCA parameter osc_sm_backing_store to allow users to specify + where in the filesystem the backing file for the shared memory + one-sided component should live. Defaults to /dev/shm on Linux. +- Fix potential hang on non-x86 platforms when using builds with + optimization flags turned off. +- Disable osc/pt2pt when using MPI_THREAD_MULTIPLE due to numerous + race conditions in the component. +- Fix dummy variable names for the mpi and mpi_f08 Fortran bindings to + match the MPI standard. This may break applications which use + name-based parameters in Fortran which used our internal names + rather than those documented in the MPI standard. +- Revamp Java detection to properly handle new Java versions which do + not provide a javah wrapper. +- Fix RMA function signatures for use-mpi-f08 bindings to have the + asynchonous property on all buffers. +- Improved configure logic for finding the UCX library. 3.1.0 -- May, 2018 ------------------ @@ -246,6 +420,68 @@ Known issues: - MPI_Connect/accept between applications started by different mpirun commands will fail, even if ompi-server is running. +2.1.5 -- August 2018 +-------------------- + +- A subtle race condition bug was discovered in the "vader" BTL + (shared memory communications) that, in rare instances, can cause + MPI processes to crash or incorrectly classify (or effectively drop) + an MPI message sent via shared memory. If you are using the "ob1" + PML with "vader" for shared memory communication (note that vader is + the default for shared memory communication with ob1), you need to + upgrade to v2.1.5 to fix this issue. You may also upgrade to the + following versions to fix this issue: + - Open MPI v3.0.1 (released March, 2018) or later in the v3.0.x + series + - Open MPI v3.1.2 (expected end of August, 2018) or later +- A link issue was fixed when the UCX library was not located in the + linker-default search paths. + +2.1.4 -- August, 2018 +--------------------- + +Bug fixes/minor improvements: +- Disable the POWER 7/BE block in configure. Note that POWER 7/BE is + still not a supported platform, but it is no longer automatically + disabled. See + https://github.com/open-mpi/ompi/issues/4349#issuecomment-374970982 + for more information. +- Fix bug with request-based one-sided MPI operations when using the + "rdma" component. +- Fix issue with large data structure in the TCP BTL causing problems + in some environments. Thanks to @lgarithm for reporting the issue. +- Minor Cygwin build fixes. +- Minor fixes for the openib BTL: + - Support for the QLogic RoCE HCA + - Support for the Boradcom Cumulus RoCE HCA + - Enable support for HDR link speeds +- Fix MPI_FINALIZED hang if invoked from an attribute destructor + during the MPI_COMM_SELF destruction in MPI_FINALIZE. Thanks to + @AndrewGaspar for reporting the issue. +- Java fixes: + - Modernize Java framework detection, especially on OS X/MacOS. + Thanks to Bryce Glover for reporting and submitting the fixes. + - Prefer "javac -h" to "javah" to support newer Java frameworks. +- Fortran fixes: + - Use conformant dummy parameter names for Fortran bindings. Thanks + to Themos Tsikas for reporting and submitting the fixes. + - Build the MPI_SIZEOF() interfaces in the "TKR"-style "mpi" module + whenever possible. Thanks to Themos Tsikas for reporting the + issue. + - Fix array of argv handling for the Fortran bindings of + MPI_COMM_SPAWN_MULTIPLE (and its associated man page). + - Make NAG Fortran compiler support more robust in configure. +- Disable the "pt2pt" one-sided MPI component when MPI_THREAD_MULTIPLE + is used. This component is simply not safe in MPI_THREAD_MULTIPLE + scenarios, and will not be fixed in the v2.1.x series. +- Make the "external" hwloc component fail gracefully if it is tries + to use an hwloc v2.x.y installation. hwloc v2.x.y will not be + supported in the Open MPI v2.1.x series. +- Fix "vader" shared memory support for messages larger than 2GB. + Thanks to Heiko Bauke for the bug report. +- Configure fixes for external PMI directory detection. Thanks to + Davide Vanzo for the report. + 2.1.3 -- March, 2018 -------------------- diff --git a/README b/README index 86e0f13696a..e6b8018f940 100644 --- a/README +++ b/README @@ -8,11 +8,11 @@ Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2007 The Regents of the University of California. All rights reserved. -Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006-2011 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. Copyright (c) 2007 Myricom, Inc. All rights reserved. -Copyright (c) 2008-2017 IBM Corporation. All rights reserved. +Copyright (c) 2008-2019 IBM Corporation. All rights reserved. Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2011 University of Houston. All rights reserved. Copyright (c) 2013-2017 Intel, Inc. All rights reserved. @@ -21,6 +21,8 @@ Copyright (c) 2017-2018 Los Alamos National Security, LLC. All rights reserved. Copyright (c) 2017 Research Organization for Information Science and Technology (RIST). All rights reserved. +Copyright (c) 2019 Triad National Security, LLC. All rights + reserved. $COPYRIGHT$ @@ -64,7 +66,7 @@ Much, much more information is also available in the Open MPI FAQ: =========================================================================== The following abbreviated list of release notes applies to this code -base as of this writing (March 2017): +base as of this writing (February 2019): General notes ------------- @@ -115,7 +117,7 @@ General notes - The run-time systems that are currently supported are: - rsh / ssh - PBS Pro, Torque - - Platform LSF (v7.0.2 and later) + - Platform LSF (tested with v9.1.1 and later) - SLURM - Cray XE, XC, and XK - Oracle Grid Engine (OGE) 6.1, 6.2 and open source Grid Engine @@ -479,6 +481,63 @@ MPI Functionality and Features - All MPI-3 functionality is supported. +- Note that starting with Open MPI v4.0.0, prototypes for several + legacy MPI-1 symbols that were deleted in the MPI-3.0 specification + (which was published in 2012) are no longer available by default in + mpi.h. Specifically, several MPI-1 symbols were deprecated in the + 1996 publishing of the MPI-2.0 specification. These deprecated + symbols were eventually removed from the MPI-3.0 specification in + 2012. + + The symbols that now no longer appear by default in Open MPI's mpi.h + are: + + - MPI_Address (replaced by MPI_Get_address) + - MPI_Errhandler_create (replaced by MPI_Comm_create_errhandler) + - MPI_Errhandler_get (replaced by MPI_Comm_get_errhandler) + - MPI_Errhandler_set (replaced by MPI_Comm_set_errhandler) + - MPI_Type_extent (replaced by MPI_Type_get_extent) + - MPI_Type_hindexed (replaced by MPI_Type_create_hindexed) + - MPI_Type_hvector (replaced by MPI_Type_create_hvector) + - MPI_Type_lb (replaced by MPI_Type_get_extent) + - MPI_Type_struct (replaced by MPI_Type_create_struct) + - MPI_Type_ub (replaced by MPI_Type_get_extent) + - MPI_LB (replaced by MPI_Type_create_resized) + - MPI_UB (replaced by MPI_Type_create_resized) + - MPI_COMBINER_HINDEXED_INTEGER + - MPI_COMBINER_HVECTOR_INTEGER + - MPI_COMBINER_STRUCT_INTEGER + - MPI_Handler_function (replaced by MPI_Comm_errhandler_function) + + Although these symbols are no longer prototyped in mpi.h, they + are still present in the MPI library in Open MPI v4.0.1 and later + releases of the v4.0.x release stream. This enables legacy MPI + applications to link and run successfully with + Open MPI v4.0.x, even though they will fail to compile. + + *** Future releases of Open MPI beyond the v4.0.x series may + remove these symbols altogether. + + *** The Open MPI team STRONGLY encourages all MPI application + developers to stop using these constructs that were first + deprecated over 20 years ago, and finally removed from the MPI + specification in MPI-3.0 (in 2012). + + *** The Open MPI FAQ (https://www.open-mpi.org/faq/?category=mpi-removed) + contains examples of how to update legacy MPI applications using + these deleted symbols to use the "new" symbols. + + All that being said, if you are unable to immediately update your + application to stop using these legacy MPI-1 symbols, you can + re-enable them in mpi.h by configuring Open MPI with the + --enable-mpi1-compatibility flag. + + NOTE: Open MPI v4.0.0 had an error where these symbols were not + included in the library if configured without --enable-mpi1-compatibility + (see https://github.com/open-mpi/ompi/issues/6114). + This is fixed in v4.0.1, where --enable-mpi1-compatibility + flag only controls what declarations are present in the MPI header. + - Rank reordering support is available using the TreeMatch library. It is activated for the graph and dist_graph topologies. @@ -507,7 +566,6 @@ MPI Functionality and Features - yalla (1) The cm PML and the following MTLs support MPI_THREAD_MULTIPLE: - - MXM - ofi (Libfabric) - portals4 @@ -522,8 +580,7 @@ MPI Functionality and Features - vader (shared memory) The openib BTL's RDMACM based connection setup mechanism is also not - thread safe. The default UDCM method should be used for - applications requiring MPI_THREAD_MULTIPLE support. + thread safe. Currently, MPI File operations are not thread safe even if MPI is initialized for MPI_THREAD_MULTIPLE support. @@ -560,7 +617,7 @@ MPI Functionality and Features OpenSHMEM Functionality and Features ------------------------------------ -- All OpenSHMEM-1.3 functionality is supported. +- All OpenSHMEM-1.4 functionality is supported starting in release v4.0.1. MPI Collectives @@ -592,20 +649,19 @@ OpenSHMEM Collectives Network Support --------------- -- There are four main MPI network models available: "ob1", "cm", - "yalla", and "ucx". "ob1" uses BTL ("Byte Transfer Layer") +- There are several main MPI network models available: "ob1", "cm", + "ucx", and "yalla". "ob1" uses BTL ("Byte Transfer Layer") components for each supported network. "cm" uses MTL ("Matching - Transport Layer") components for each supported network. "yalla" - uses the Mellanox MXM transport. "ucx" uses the OpenUCX transport. + Transport Layer") components for each supported network. "ucx" uses + the OpenUCX transport. - "ob1" supports a variety of networks that can be used in combination with each other: - - OpenFabrics: InfiniBand, iWARP, and RoCE + - OpenFabrics: iWARP and RoCE - Loopback (send-to-self) - Shared memory - TCP - - Intel Phi SCIF - SMCUDA - Cisco usNIC - uGNI (Cray Gemini, Aries) @@ -620,42 +676,93 @@ Network Support - OpenFabrics Interfaces ("libfabric" tag matching) - Portals 4 - Open MPI will, by default, choose to use "cm" when one of the - above transports can be used, unless OpenUCX or MXM support is - detected, in which case the "ucx" or "yalla" PML will be used - by default. Otherwise, "ob1" will be used and the corresponding - BTLs will be selected. Users can force the use of ob1 or cm if - desired by setting the "pml" MCA parameter at run-time: + - UCX is the Unified Communication X (UCX) communication library + (http://www.openucx.org/). This is an open-source project + developed in collaboration between industry, laboratories, and + academia to create an open-source production grade communication + framework for data centric and high-performance applications. The + UCX library can be downloaded from repositories (e.g., + Fedora/RedHat yum repositories). The UCX library is also part of + Mellanox OFED and Mellanox HPC-X binary distributions. + + UCX currently supports: + + - OpenFabrics Verbs (including InfiniBand and RoCE) + - Cray's uGNI + - TCP + - Shared memory + - NVIDIA CUDA drivers + + While users can manually select any of the above transports at run + time, Open MPI will select a default transport as follows: + + 1. If InfiniBand devices are available, use the UCX PML. + + 2. If PSM, PSM2, or other tag-matching-supporting Libfabric + transport devices are available (e.g., Cray uGNI), use the "cm" + PML and a single appropriate corresponding "mtl" module. + + 3. If MXM/InfiniBand devices are availble, use the "yalla" PML + (NOTE: the "yalla"/MXM PML is deprecated -- see below). + + 4. Otherwise, use the ob1 PML and one or more appropriate "btl" + modules. - shell$ mpirun --mca pml ob1 ... + Users can override Open MPI's default selection algorithms and force + the use of a specific transport if desired by setting the "pml" MCA + parameter (and potentially the "btl" and/or "mtl" MCA parameters) at + run-time: + + shell$ mpirun --mca pml ob1 --mca btl [comma-delimted-BTLs] ... + or + shell$ mpirun --mca pml cm --mca mtl [MTL] ... or - shell$ mpirun --mca pml cm ... - -- Similarly, there are two OpenSHMEM network models available: "ucx", - and "ikrit": - - "ucx" interfaces directly with UCX; - - "ikrit" interfaces directly with Mellanox MXM. - -- UCX is the Unified Communication X (UCX) communication library - (http://www.openucx.org/). - This is an open-source project developed in collaboration between - industry, laboratories, and academia to create an open-source - production grade communication framework for data centric and - high-performance applications. - UCX currently supports: - - OFA Verbs; - - Cray's uGNI; - - NVIDIA CUDA drivers. - -- MXM is the Mellanox Messaging Accelerator library utilizing a full - range of IB transports to provide the following messaging services - to the upper level MPI/OpenSHMEM libraries: - - - Usage of all available IB transports - - Native RDMA support - - Progress thread - - Shared memory communication - - Hardware-assisted reliability + shell$ mpirun --mca pml ucx ... + + As alluded to above, there is actually a fourth MPI point-to-point + transport, but it is deprecated and will likely be removed in a + future Open MPI release: + + - "yalla" uses the Mellanox MXM transport library. MXM is the + deprecated Mellanox Messaging Accelerator library, utilizing a + full range of IB transports to provide the following messaging + services to the upper level MPI/OpenSHMEM libraries. MXM is only + included in this release of Open MPI for backwards compatibility; + the "ucx" PML should be used insead. + +- The main OpenSHMEM network model is "ucx"; it interfaces directly + with UCX. + + The "ikrit" OpenSHMEM network model is also available, but is + deprecated; it uses the deprecated Mellanox Message Accelerator + (MXM) library. + +- In prior versions of Open MPI, InfiniBand and RoCE support was + provided through the openib BTL and ob1 PML plugins. Starting with + Open MPI 4.0.0, InfiniBand support through the openib plugin is both + deprecated and superseded by the ucx PML component. + + While the openib BTL depended on libibverbs, the UCX PML depends on + the UCX library. + + Once installed, Open MPI can be built with UCX support by adding + --with-ucx to the Open MPI configure command. Once Open MPI is + configured to use UCX, the runtime will automatically select the UCX + PML if one of the supported networks is detected (e.g., InfiniBand). + It's possible to force using UCX in the mpirun or oshrun command + lines by specifying any or all of the following mca parameters: + "--mca pml ucx" for MPI point-to-point operations, "--mca spml ucx" + for OpenSHMEM support, and "--mca osc ucx" for MPI RMA (one-sided) + operations. + +- Although the ob1 PML+openib BTL is still the default for iWARP and + RoCE devices, it will reject InfiniBand defaults (by default) so + that they will use the ucx PML. If using the openib BTL is still + desired, set the following MCA parameters: + + # Note that "vader" is Open MPI's shared memory BTL + $ mpirun --mca pml ob1 --mca btl openib,vader,self \ + --mca btl_openib_allow_ib 1 ... - The usnic BTL is support for Cisco's usNIC device ("userspace NIC") on Cisco UCS servers with the Virtualized Interface Card (VIC). @@ -717,14 +824,14 @@ Open MPI Extensions - The following extensions are included in this version of Open MPI: - - pcollreq: Provides routines for persistent collective - communication operations and persistent neighborhood collective - communication operations, which are proposed in the MPI Forum as - of June 2018. The function names are prefixed with MPIX_ instead - of MPI_, like MPIX_Barrier_init, because they are not standardized - yet. Future versions of Open MPI will switch to the MPI_ prefix - once the MPI Standard which includes this feature is published. - See their man page for more details. + - pcollreq: Provides routines for persistent collective communication + operations and persistent neighborhood collective communication + operations, which are planned to be included in the next MPI + Standard after MPI-3.1 as of Nov. 2018. The function names are + prefixed with MPIX_ instead of MPI_, like MPIX_Barrier_init, + because they are not standardized yet. Future versions of Open MPI + will switch to the MPI_ prefix once the MPI Standard which includes + this feature is published. See their man page for more details. - affinity: Provides the OMPI_Affinity_str() routine on retrieving a string that contains what resources a process is bound to. See its man page for more details. @@ -768,6 +875,26 @@ Open MPI is unable to find relevant support for , configure will assume that it was unable to provide a feature that was specifically requested and will abort so that a human can resolve out the issue. +Additionally, if a search directory is specified in the form +--with-=, Open MPI will: + +1. Search for 's header files in /include. +2. Search for 's library files: + 2a. If --with--libdir= was specified, search in + . + 2b. Otherwise, search in /lib, and if they are not found + there, search again in /lib64. +3. If both the relevant header files and libraries are found: + 3a. Open MPI will build support for . + 3b. If the root path where the libraries are found is neither + "/usr" nor "/usr/local", Open MPI will compile itself with + RPATH flags pointing to the directory where 's libraries + are located. Open MPI does not RPATH /usr/lib[64] and + /usr/local/lib[64] because many systems already search these + directories for run-time libraries by default; adding RPATH for + them could have unintended consequences for the search path + ordering. + INSTALLATION OPTIONS --prefix= @@ -1000,8 +1127,18 @@ NETWORKING SUPPORT / OPTIONS covers most cases. This option is only needed for special configurations. ---with-scif= - Look in directory for Intel SCIF support libraries +--with-ucx= + Specify the directory where the UCX libraries and header files are + located. This option is generally only necessary if the UCX headers + and libraries are not in default compiler/linker search paths. + +--with-ucx-libdir= + Look in directory for the UCX libraries. By default, Open MPI will + look in /lib and /lib64, which covers + most cases. This option is only needed for special configurations. + +--with-usnic + Abort configure if Cisco usNIC support cannot be built. --with-verbs= Specify the directory where the verbs (also known as OpenFabrics @@ -1020,18 +1157,34 @@ NETWORKING SUPPORT / OPTIONS configurations. --with-verbs-usnic + Note that this option is no longer necessary in recent Linux distro + versions. If your Linux distro uses the "rdma-core" package (instead + of a standalone "libibverbs" package), not only do you not need this + option, you shouldn't use it, either. More below. + This option will activate support in Open MPI for disabling a dire-sounding warning message from libibverbs that Cisco usNIC devices are not supported (because Cisco usNIC devices are supported through libfabric, not libibverbs). This libibverbs warning can also be suppressed by installing the "no op" libusnic_verbs plugin for libibverbs (see https://github.com/cisco/libusnic_verbs, or - download binaries from cisco.com). This option is disabled by - default because it causes libopen-pal.so to depend on libibverbs.so, - which is undesirable to many downstream packagers. + download binaries from cisco.com). + + This option is disabled by default for two reasons: + + 1. It causes libopen-pal.so to depend on libibverbs.so, which is + undesirable to many downstream packagers. + 2. As mentioned above, recent versions of the libibverbs library + (included in the "rdma-core" package) do not have the bug that + will emit dire-sounding warnings about usnic devices. Indeed, + the --with-verbs-usnic option will enable code in Open MPI that + is actually incompatible with rdma-core (i.e., cause Open MPI to + fail to compile). + + If you enable --with-verbs-usnic and your system uses the rdma-core + package, configure will safely abort with a helpful message telling + you that you should not use --with-verbs-usnic. ---with-usnic - Abort configure if Cisco usNIC support cannot be built. RUN-TIME SYSTEM SUPPORT @@ -1062,13 +1215,6 @@ RUN-TIME SYSTEM SUPPORT LSF is a resource manager system, frequently used as a batch scheduler in HPC systems. - NOTE: If you are using LSF version 7.0.5, you will need to add - "LIBS=-ldl" to the configure command line. For example: - - ./configure LIBS=-ldl --with-lsf ... - - This workaround should *only* be needed for LSF 7.0.5. - --with-lsf-libdir= Look in directory for the LSF libraries. By default, Open MPI will look in /lib and /lib64, which covers @@ -1104,36 +1250,6 @@ RUN-TIME SYSTEM SUPPORT MISCELLANEOUS SUPPORT LIBRARIES ---with-blcr= - Specify the directory where the Berkeley Labs Checkpoint / Restart - (BLCR) libraries and header files are located. This option is - generally only necessary if the BLCR headers and libraries are not - in default compiler/linker search paths. - - This option is only meaningful if the --with-ft option is also used - to active Open MPI's fault tolerance behavior. - ---with-blcr-libdir= - Look in directory for the BLCR libraries. By default, Open MPI will - look in /lib and /lib64, which - covers most cases. This option is only needed for special - configurations. - ---with-dmtcp= - Specify the directory where the Distributed MultiThreaded - Checkpointing (DMTCP) libraries and header files are located. This - option is generally only necessary if the DMTCP headers and - libraries are not in default compiler/linker search paths. - - This option is only meaningful if the --with-ft option is also used - to active Open MPI's fault tolerance behavior. - ---with-dmtcp-libdir= - Look in directory for the DMTCP libraries. By default, Open MPI - will look in /lib and /lib64, - which covers most cases. This option is only needed for special - configurations. - --with-libevent(=value) This option specifies where to find the libevent support headers and library. The following VALUEs are permitted: @@ -1342,11 +1458,6 @@ MISCELLANEOUS FUNCTIONALITY However, it may be necessary to disable the memory manager in order to build Open MPI statically. ---with-ft=TYPE - Specify the type of fault tolerance to enable. Options: LAM - (LAM/MPI-like), cr (Checkpoint/Restart). Fault tolerance support is - disabled unless this option is specified. - --enable-peruse Enable the PERUSE MPI data analysis interface. @@ -1355,6 +1466,9 @@ MISCELLANEOUS FUNCTIONALITY with different endian representations). Heterogeneous support is disabled by default because it imposes a minor performance penalty. + --enable-spc + Enable software-based performance counters capability. + *** THIS FUNCTIONALITY IS CURRENTLY BROKEN - DO NOT USE *** --with-wrapper-cflags= @@ -1999,7 +2113,7 @@ timer - High-resolution timers Each framework typically has one or more components that are used at run-time. For example, the btl framework is used by the MPI layer to send bytes across different types underlying networks. The tcp btl, -for example, sends messages across TCP-based networks; the openib btl +for example, sends messages across TCP-based networks; the UCX PML sends messages across OpenFabrics-based networks. Each component typically has some tunable parameters that can be diff --git a/VERSION b/VERSION index 6fadf030121..ce7c45397b2 100644 --- a/VERSION +++ b/VERSION @@ -1,11 +1,15 @@ # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2019 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 NVIDIA Corporation. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. -# Copyright (c) 2016 IBM Corporation. All rights reserved. # Copyright (c) 2017 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2019 Triad National Security, LLC. All rights +# reserved. +# Copyright (c) 2018-2019 IBM Corporation. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # This is the VERSION file for Open MPI, describing the precise # version of Open MPI in this distribution. The various components of @@ -17,7 +21,7 @@ major=4 minor=0 -release=0 +release=2 # greek is generally used for alpha or beta release tags. If it is # non-empty, it will be appended to the version number. It does not @@ -26,7 +30,7 @@ release=0 # requirement is that it must be entirely printable ASCII characters # and have no white space. -greek=a1 +greek=rc3 # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" @@ -52,7 +56,7 @@ date="Unreleased developer copy" # The shared library version of each of Open MPI's public libraries. # These versions are maintained in accordance with the "Library # Interface Versions" chapter from the GNU Libtool documentation. The -# first Open MPI release to programmatically specify these versions was +# first Open MPI release to programatically specify these versions was # v1.3.4 (note that Libtool defaulted all prior releases to 0:0:0). # All changes in these version numbers are dictated by the Open MPI # release managers (not individual developers). Notes: @@ -84,17 +88,17 @@ date="Unreleased developer copy" # Version numbers are described in the Libtool current:revision:age # format. -libmpi_so_version=0:0:0 -libmpi_cxx_so_version=0:0:0 -libmpi_mpifh_so_version=0:0:0 -libmpi_usempi_tkr_so_version=0:0:0 -libmpi_usempi_ignore_tkr_so_version=0:0:0 -libmpi_usempif08_so_version=0:0:0 -libopen_rte_so_version=0:0:0 -libopen_pal_so_version=0:0:0 -libmpi_java_so_version=0:0:0 -liboshmem_so_version=0:0:0 -libompitrace_so_version=0:0:0 +libmpi_so_version=60:2:20 +libmpi_cxx_so_version=60:1:20 +libmpi_mpifh_so_version=60:2:20 +libmpi_usempi_tkr_so_version=60:0:20 +libmpi_usempi_ignore_tkr_so_version=60:0:20 +libmpi_usempif08_so_version=60:1:20 +libopen_rte_so_version=60:2:20 +libopen_pal_so_version=60:2:20 +libmpi_java_so_version=60:0:20 +liboshmem_so_version=62:0:22 +libompitrace_so_version=60:0:20 # "Common" components install standalone libraries that are run-time # linked by one or more components. So they need to be versioned as @@ -102,16 +106,15 @@ libompitrace_so_version=0:0:0 # components-don't-affect-the-build-system abstraction. # OMPI layer -libmca_ompi_common_ompio_so_version=0:0:0 -libmca_ompi_common_monitoring_so_version=0:0:0 +libmca_ompi_common_ompio_so_version=60:2:19 +libmca_ompi_common_monitoring_so_version=60:0:10 # ORTE layer -libmca_orte_common_alps_so_version=0:0:0 +libmca_orte_common_alps_so_version=60:0:20 # OPAL layer -libmca_opal_common_cuda_so_version=0:0:0 -libmca_opal_common_ofi_so_version=0:0:0 -libmca_opal_common_sm_so_version=0:0:0 -libmca_opal_common_ucx_so_version=0:0:0 -libmca_opal_common_ugni_so_version=0:0:0 -libmca_opal_common_verbs_so_version=0:0:0 +libmca_opal_common_cuda_so_version=60:0:20 +libmca_opal_common_sm_so_version=60:0:20 +libmca_opal_common_ucx_so_version=60:1:20 +libmca_opal_common_ugni_so_version=60:0:20 +libmca_opal_common_verbs_so_version=60:0:20 diff --git a/config/Makefile.am b/config/Makefile.am index 85a222a7c91..4379498100a 100644 --- a/config/Makefile.am +++ b/config/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010 Oracle and/or its affiliates. All rights # reserved. # Copyright (c) 2014-2015 Intel, Inc. All rights reserved. @@ -28,7 +28,8 @@ EXTRA_DIST = \ ltmain_nag_pthread.diff \ ltmain_pgi_tp.diff \ opal_mca_priority_sort.pl \ - find_common_syms + find_common_syms \ + make_manpage.pl maintainer-clean-local: rm -f opal_get_version.sh diff --git a/ompi/mpi/man/make_manpage.pl b/config/make_manpage.pl similarity index 100% rename from ompi/mpi/man/make_manpage.pl rename to config/make_manpage.pl diff --git a/config/ompi_check_ucx.m4 b/config/ompi_check_ucx.m4 index 8b5332faac6..7f04ba3a52c 100644 --- a/config/ompi_check_ucx.m4 +++ b/config/ompi_check_ucx.m4 @@ -46,7 +46,7 @@ AC_DEFUN([OMPI_CHECK_UCX],[ [ucp/api/ucp.h], [ucp], [ucp_cleanup], - [], + [-luct -lucm -lucs], [], [], [ompi_check_ucx_happy="yes"], @@ -78,7 +78,7 @@ AC_DEFUN([OMPI_CHECK_UCX],[ [ucp/api/ucp.h], [ucp], [ucp_cleanup], - [], + [-luct -lucm -lucs], [$ompi_check_ucx_dir], [$ompi_check_ucx_libdir], [ompi_check_ucx_happy="yes"], @@ -112,14 +112,22 @@ AC_DEFUN([OMPI_CHECK_UCX],[ ucp_request_check_status, ucp_put_nb, ucp_get_nb], [], [], [#include ]) + AC_CHECK_DECLS([ucm_test_events], + [], [], + [#include ]) AC_CHECK_DECLS([UCP_ATOMIC_POST_OP_AND, UCP_ATOMIC_POST_OP_OR, UCP_ATOMIC_POST_OP_XOR, UCP_ATOMIC_FETCH_OP_FAND, UCP_ATOMIC_FETCH_OP_FOR, - UCP_ATOMIC_FETCH_OP_FXOR], + UCP_ATOMIC_FETCH_OP_FXOR, + UCP_PARAM_FIELD_ESTIMATED_NUM_PPN], [], [], [#include ]) + AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS], + [AC_DEFINE([HAVE_UCP_WORKER_ADDRESS_FLAGS], [1], + [have worker address attribute])], [], + [#include ]) CPPFLAGS=$old_CPPFLAGS OPAL_SUMMARY_ADD([[Transports]],[[Open UCX]],[$1],[$ompi_check_ucx_happy])])]) @@ -128,9 +136,11 @@ AC_DEFUN([OMPI_CHECK_UCX],[ [$1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_ucx_CPPFLAGS" $1_LDFLAGS="[$]$1_LDFLAGS $ompi_check_ucx_LDFLAGS" $1_LIBS="[$]$1_LIBS $ompi_check_ucx_LIBS" + AC_DEFINE([HAVE_UCX], [1], [have ucx]) $2], [AS_IF([test ! -z "$with_ucx" && test "$with_ucx" != "no"], [AC_MSG_ERROR([UCX support requested but not found. Aborting])]) + AC_DEFINE([HAVE_UCX], [0], [have ucx]) $3]) OPAL_VAR_SCOPE_POP diff --git a/config/ompi_config_files.m4 b/config/ompi_config_files.m4 index 160a5d1c528..274b404d75d 100644 --- a/config/ompi_config_files.m4 +++ b/config/ompi_config_files.m4 @@ -1,7 +1,7 @@ # -*- shell-script -*- # # Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2017 Research Organization for Information Science +# Copyright (c) 2017-2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2018 Los Alamos National Security, LLC. All rights # reserved. @@ -38,6 +38,7 @@ AC_DEFUN([OMPI_CONFIG_FILES],[ ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-file-interfaces.h ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-removed-interfaces.h ompi/mpi/fortran/use-mpi-f08/Makefile + ompi/mpi/fortran/use-mpi-f08/bindings/Makefile ompi/mpi/fortran/use-mpi-f08/mod/Makefile ompi/mpi/fortran/mpiext-use-mpi/Makefile ompi/mpi/fortran/mpiext-use-mpi-f08/Makefile diff --git a/config/ompi_ext.m4 b/config/ompi_ext.m4 index ab3920d2fbc..827658e6db3 100644 --- a/config/ompi_ext.m4 +++ b/config/ompi_ext.m4 @@ -5,8 +5,8 @@ dnl University Research and Technology dnl Corporation. All rights reserved. dnl Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved dnl Copyright (c) 2011-2012 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2015-2017 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2015-2018 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2017 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. @@ -405,11 +405,15 @@ AC_DEFUN([EXT_CONFIGURE_M4_CONFIG_COMPONENT],[ #-------------------- # # C: -# - c/mpiext__c.h: is included in mpi_ext.h +# - c/mpiext__c.h: is installed to +# /openmpi/mpiext/mpiext__c.h and is included in +# mpi_ext.h # - c/libmpiext_.la: convneience library slurped into libmpi.la # # mpi.f.h: -# - mpif-h/mpiext__mpifh.h: is included mpi mpif_ext.h +# - mpif-h/mpiext__mpifh.h: is installed to +# openmpi/mpiext/mpiext__mpifh.h and is included mpi +# mpif_ext.h # - mpif-h/libmpiext__mpifh.la: convenience library slurped # into libmpi_mpifh.la # @@ -461,7 +465,7 @@ AC_DEFUN([EXT_PROCESS_COMPONENT],[ ############### # C Bindings ############### - test_header="${srcdir}/ompi/mpiext/$component/c/mpiext_${component}_c.h" + test_header="${srcdir}/ompi/mpiext/${component}/c/mpiext_${component}_c.h" AC_MSG_CHECKING([if MPI Extension $component has C bindings]) @@ -479,14 +483,14 @@ AC_DEFUN([EXT_PROCESS_COMPONENT],[ $3="$$3 $component" # JMS Where is this needed? - EXT_C_HEADERS="$EXT_C_HEADERS mpiext/$component/c/mpiext_${component}_c.h" + EXT_C_HEADERS="$EXT_C_HEADERS mpiext/c/mpiext_${component}_c.h" component_header="mpiext_${component}_c.h" cat >> $mpi_ext_h < 26 characters. But just in case, put a check here + # to make sure: error out if the MPI extension name is > 26 + # characters (because otherwise it'll just be a really weird / + # hard to diagnose compile error when a user tries to compile + # a Fortran MPI application that includes `mpif-ext.h`). + len=`echo $component | wc -c` + result=`expr $len \> 26` + AS_IF([test $result -eq 1], + [AC_MSG_WARN([MPI extension name too long: $component]) + AC_MSG_WARN([For esoteric reasons, MPI Extensions with mpif.h bindings must have a name that is <= 26 characters]) + AC_MSG_ERROR([Cannot continue])]) + component_header="mpiext_${component}_mpifh.h" cat >> $mpif_ext_h <> $mpiusempi_ext_h <> $mpiusempif08_ext_h < /dev/null | wc -l` + AS_IF([test "$files" -gt 0], + [AC_MSG_RESULT([found]) + opal_external_pmix_header_happy=yes], + [AC_MSG_RESULT([not found]) + AC_MSG_CHECKING([for pmix.h in $1/include]) + files=`ls $1/include/pmix.h 2> /dev/null | wc -l` + AS_IF([test "$files" -gt 0], + [AC_MSG_RESULT([found]) + opal_external_pmix_header_happy=yes], + [AC_MSG_RESULT([not found]) + opal_external_pmix_header_happy=no])]) + + AS_IF([test "$opal_external_pmix_header_happy" = "yes"], + [AS_IF([test -n "$2"], + [AC_MSG_CHECKING([libpmix.* in $2]) + files=`ls $2/libpmix.* 2> /dev/null | wc -l` + AS_IF([test "$files" -gt 0], + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$2], + [AC_MSG_RESULT([not found]) + AC_MSG_CHECKING([libpmix.* in $2/lib64]) + files=`ls $2/lib64/libpmix.* 2> /dev/null | wc -l` + AS_IF([test "$files" -gt 0], + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$2/lib64], + [AC_MSG_RESULT([not found]) + AC_MSG_CHECKING([libpmix.* in $2/lib]) + files=`ls $2/lib/libpmix.* 2> /dev/null | wc -l` + AS_IF([test "$files" -gt 0], + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$2/lib], + [AC_MSG_RESULT([not found]) + AC_MSG_ERROR([Cannot continue])])])])], + [# check for presence of lib64 directory - if found, see if the + # desired library is present and matches our build requirements + AC_MSG_CHECKING([libpmix.* in $1/lib64]) + files=`ls $1/lib64/libpmix.* 2> /dev/null | wc -l` + AS_IF([test "$files" -gt 0], + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$1/lib64], + [AC_MSG_RESULT([not found]) + AC_MSG_CHECKING([libpmix.* in $1/lib]) + files=`ls $1/lib/libpmix.* 2> /dev/null | wc -l` + AS_IF([test "$files" -gt 0], + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$1/lib], + [AC_MSG_RESULT([not found]) + AC_MSG_ERROR([Cannot continue])])])]) + + # check the version + opal_external_pmix_save_CPPFLAGS=$CPPFLAGS + opal_external_pmix_save_LDFLAGS=$LDFLAGS + opal_external_pmix_save_LIBS=$LIBS + + # if the pmix_version.h file does not exist, then + # this must be from a pre-1.1.5 version OMPI does + # NOT support anything older than v1.2.5 + AC_MSG_CHECKING([PMIx version]) + CPPFLAGS="-I$1/include $CPPFLAGS" + AS_IF([test "x`ls $1/include/pmix_version.h 2> /dev/null`" = "x"], + [AC_MSG_RESULT([version file not found - assuming v1.1.4]) + opal_external_pmix_version_found=1 + opal_external_pmix_happy=no + opal_external_pmix_version=internal], + [AC_MSG_RESULT([version file found]) + opal_external_pmix_version_found=0]) + + # if it does exist, then we need to parse it to find + # the actual release series + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 4x]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR < 4L) + #error "not version 4 or above" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=4x + opal_external_pmix_version_found=1 + opal_external_pmix_happy=yes], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 3x or above]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR != 3L) + #error "not version 3" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=3x + opal_external_pmix_version_found=1 + opal_external_pmix_happy=yes], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 2x]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR != 2L) + #error "not version 2" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=2x + opal_external_pmix_version_found=1 + opal_external_pmix_happy=yes], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 1x]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR != 1L && PMIX_VERSION_MINOR != 2L) + #error "not version 1.2.x" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=1x + opal_external_pmix_version_found=1 + opal_external_have_pmix1=1 + opal_external_pmix_happy=yes], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "x$opal_external_pmix_version" = "x"], + [AC_MSG_WARN([External PMIx support detected, but version]) + AC_MSG_WARN([information of the external lib could not]) + AC_MSG_WARN([be detected]) + opal_external_pmix_happy=no]) + + CPPFLAGS=$opal_external_pmix_save_CPPFLAGS + LDFLAGS=$opal_external_pmix_save_LDFLAGS + LIBS=$opal_external_pmix_save_LIBS + ]) + AS_IF([test "$opal_external_pmix_happy" = "yes"], + [$3], [$4]) + + OPAL_VAR_SCOPE_POP +]) + + +AC_DEFUN([OPAL_CHECK_PMIX],[ AC_ARG_WITH([pmix], [AC_HELP_STRING([--with-pmix(=DIR)], @@ -247,177 +408,66 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ AC_MSG_WARN([an external copy that you supply.]) AC_MSG_ERROR([Cannot continue])]) - AC_MSG_CHECKING([if user requested internal PMIx support($with_pmix)]) - opal_prun_happy=no - opal_external_pmix_happy=no opal_external_have_pmix1=0 - - AS_IF([test "$with_pmix" = "internal"], - [AC_MSG_RESULT([yes]) + AS_IF([test "$opal_enable_pmix" = "no"], + [AC_MSG_CHECKING([if user requested internal PMIx support($with_pmix)]) opal_external_pmix_happy=no - opal_prun_happy=yes - opal_external_pmix_version=internal], + pmix_ext_install_libdir= + pmix_ext_install_dir= - [AC_MSG_RESULT([no]) - # check for external pmix lib */ - AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "external"], - [pmix_ext_install_dir=/usr], - [pmix_ext_install_dir=$with_pmix]) - - # Make sure we have the headers and libs in the correct location - AC_MSG_CHECKING([for pmix.h in $pmix_ext_install_dir]) - files=`ls $pmix_ext_install_dir/pmix.h 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - opal_external_pmix_header_happy=yes], - [AC_MSG_RESULT([not found]) - AC_MSG_CHECKING([for pmix.h in $pmix_ext_install_dir/include]) - files=`ls $pmix_ext_install_dir/include/pmix.h 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - opal_external_pmix_header_happy=yes], - [AC_MSG_RESULT([not found]) - opal_external_pmix_header_happy=no - opal_external_pmix_version=internal])]) - - AS_IF([test "$opal_external_pmix_header_happy" = "yes"], - [AS_IF([test -n "$with_pmix_libdir"], - [AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir]) - files=`ls $with_pmix_libdir/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$with_pmix_libdir], - [AC_MSG_RESULT([not found]) - AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir/lib64]) - files=`ls $with_pmix_libdir/lib64/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$with_pmix_libdir/lib64], - [AC_MSG_RESULT([not found]) - AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir/lib]) - files=`ls $with_pmix_libdir/lib/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$with_pmix_libdir/lib], - [AC_MSG_RESULT([not found]) - AC_MSG_ERROR([Cannot continue])])])])], - [# check for presence of lib64 directory - if found, see if the - # desired library is present and matches our build requirements - AC_MSG_CHECKING([libpmix.* in $pmix_ext_install_dir/lib64]) - files=`ls $pmix_ext_install_dir/lib64/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$pmix_ext_install_dir/lib64], - [AC_MSG_RESULT([not found]) - AC_MSG_CHECKING([libpmix.* in $pmix_ext_install_dir/lib]) - files=`ls $pmix_ext_install_dir/lib/libpmix.* 2> /dev/null | wc -l` - AS_IF([test "$files" -gt 0], - [AC_MSG_RESULT([found]) - pmix_ext_install_libdir=$pmix_ext_install_dir/lib], - [AC_MSG_RESULT([not found]) - AC_MSG_ERROR([Cannot continue])])])]) - - # check the version - opal_external_pmix_save_CPPFLAGS=$CPPFLAGS - opal_external_pmix_save_LDFLAGS=$LDFLAGS - opal_external_pmix_save_LIBS=$LIBS - - # if the pmix_version.h file does not exist, then - # this must be from a pre-1.1.5 version OMPI does - # NOT support anything older than v1.2.5 - AC_MSG_CHECKING([PMIx version]) - CPPFLAGS="-I$pmix_ext_install_dir/include $CPPFLAGS" - AS_IF([test "x`ls $pmix_ext_install_dir/include/pmix_version.h 2> /dev/null`" = "x"], - [AC_MSG_RESULT([version file not found - assuming v1.1.4]) - opal_external_pmix_version_found=1 - opal_external_pmix_happy=no - opal_external_pmix_version=internal], - [AC_MSG_RESULT([version file found]) - opal_external_pmix_version_found=0]) - - # if it does exist, then we need to parse it to find - # the actual release series - AS_IF([test "$opal_external_pmix_version_found" = "0"], - [AC_MSG_CHECKING([version 3x or above]) - AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ - #include - #if (PMIX_VERSION_MAJOR < 3L) - #error "not version 3 or above" - #endif - ], [])], - [AC_MSG_RESULT([found]) - opal_external_pmix_version=3x - opal_external_pmix_version_found=1 - opal_external_pmix_happy=yes], - [AC_MSG_RESULT([not found])])]) - - AS_IF([test "$opal_external_pmix_version_found" = "0"], - [AC_MSG_CHECKING([version 2x]) - AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ - #include - #if (PMIX_VERSION_MAJOR != 2L) - #error "not version 2" - #endif - ], [])], - [AC_MSG_RESULT([found]) - opal_external_pmix_version=2x - opal_external_pmix_version_found=1 - opal_prun_happy=yes - opal_external_pmix_happy=yes], - [AC_MSG_RESULT([not found])])]) - - AS_IF([test "$opal_external_pmix_version_found" = "0"], - [AC_MSG_CHECKING([version 1x]) - AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ - #include - #if (PMIX_VERSION_MAJOR != 1L && PMIX_VERSION_MINOR != 2L) - #error "not version 1.2.x" - #endif - ], [])], - [AC_MSG_RESULT([found]) - opal_external_pmix_version=1.2.x - opal_external_pmix_version_found=1 - opal_external_have_pmix1=1 - opal_external_pmix_happy=yes], - [AC_MSG_RESULT([not found])])]) - - AS_IF([test "x$opal_external_pmix_version" = "x"], - [AC_MSG_WARN([External PMIx support detected, but version]) - AC_MSG_WARN([information of the external lib could not]) - AC_MSG_WARN([be detected]) - AC_MSG_WARN([Internal version will be used]) - opal_external_pmix_happy=no]) - - CPPFLAGS=$opal_external_pmix_save_CPPFLAGS - LDFLAGS=$opal_external_pmix_save_LDFLAGS - LIBS=$opal_external_pmix_save_LIBS - ]) - ]) - - # Final check - if they explicitly pointed us at an external - # installation that wasn't acceptable, then error out - AS_IF([test -n "$with_pmix" && test "$with_pmix" != "yes" && test "$with_pmix" != "external" && test "$with_pmix" != "internal" && test "$opal_external_pmix_happy" = "no"], - [AC_MSG_WARN([External PMIx support requested, but either the version]) - AC_MSG_WARN([of the external lib was not supported or the required]) - AC_MSG_WARN([header/library files were not found]) - AC_MSG_ERROR([Cannot continue])]) + AS_IF([test "$with_pmix" = "internal"], + [AC_MSG_RESULT([yes]) + opal_external_pmix_happy=no + opal_external_pmix_version=internal + opal_enable_pmix=yes], - AC_MSG_CHECKING([PMIx version to be used]) - AS_IF([test "$opal_external_pmix_happy" = "yes"], - [AC_MSG_RESULT([external($opal_external_pmix_version)]) - AS_IF([test "$pmix_ext_install_dir" != "/usr"], - [opal_external_pmix_CPPFLAGS="-I$pmix_ext_install_dir/include" - opal_external_pmix_LDFLAGS=-L$pmix_ext_install_libdir]) - opal_external_pmix_LIBS=-lpmix], - [AC_MSG_RESULT([internal])]) + [AC_MSG_RESULT([no]) + # check for external pmix lib */ + AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "external"], + [pmix_ext_install_dir=/usr], + [pmix_ext_install_dir=$with_pmix]) + AS_IF([test -n "$with_pmix_libdir"], + [pmix_ext_install_libdir=$with_pmix_libdir]) + OPAL_CHECK_PMIX_LIB([$pmix_ext_install_dir], + [$pmix_ext_install_libdir], + [opal_external_pmix_happy=yes + opal_enable_pmix=yes], + [opal_external_pmix_happy=no])]) + + # Final check - if they explicitly pointed us at an external + # installation that wasn't acceptable, then error out + AS_IF([test -n "$with_pmix" && test "$with_pmix" != "yes" && test "$with_pmix" != "external" && test "$with_pmix" != "internal" && test "$opal_external_pmix_happy" = "no"], + [AC_MSG_WARN([External PMIx support requested, but either the version]) + AC_MSG_WARN([of the external lib was not supported or the required]) + AC_MSG_WARN([header/library files were not found]) + AC_MSG_ERROR([Cannot continue])]) + + # Final check - if they didn't point us explicitly at an external version + # but we found one anyway, use the internal version if it is higher + AS_IF([test "$opal_external_pmix_version" != "internal" && (test -z "$with_pmix" || test "$with_pmix" = "yes")], + [AS_IF([test "$opal_external_pmix_version" != "3x"], + [AC_MSG_WARN([discovered external PMIx version is less than internal version 3.x]) + AC_MSG_WARN([using internal PMIx]) + opal_external_pmix_version=internal + opal_external_pmix_happy=no])]) + ]) + + AS_IF([test "$opal_enable_pmix" = "yes"], + [AC_MSG_CHECKING([PMIx version to be used]) + AS_IF([test "$opal_external_pmix_happy" = "yes"], + [AC_MSG_RESULT([external($opal_external_pmix_version)]) + AS_IF([test "$pmix_ext_install_dir" != "/usr"], + [opal_external_pmix_CPPFLAGS="-I$pmix_ext_install_dir/include" + opal_external_pmix_LDFLAGS=-L$pmix_ext_install_libdir]) + opal_external_pmix_LIBS=-lpmix], + [AC_MSG_RESULT([internal])])]) AC_DEFINE_UNQUOTED([OPAL_PMIX_V1],[$opal_external_have_pmix1], [Whether the external PMIx library is v1]) - AM_CONDITIONAL([OPAL_WANT_PRUN], [test "$opal_prun_happy" = "yes"]) - - AS_IF([test "$opal_external_pmix_version" = "1.2.x"], - [OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [$opal_external_pmix_version: WARNING - DYNAMIC OPS NOT SUPPORTED])], - [OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [$opal_external_pmix_version])]) - OPAL_VAR_SCOPE_POP + AS_IF([test "$opal_external_pmix_happy" = "yes"], + [AS_IF([test "$opal_external_pmix_version" = "1x"], + [OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [External (1.2.5) WARNING - DYNAMIC OPS NOT SUPPORTED])], + [OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [External ($opal_external_pmix_version)])])], + [OPAL_SUMMARY_ADD([[Miscellaneous]], [[PMIx support]], [opal_pmix], [Internal])]) ]) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index db120d409e7..65675d16b58 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -2,14 +2,14 @@ dnl dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana dnl University Research and Technology dnl Corporation. All rights reserved. -dnl Copyright (c) 2004-2015 The University of Tennessee and The University +dnl Copyright (c) 2004-2018 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. @@ -24,65 +24,211 @@ dnl dnl $HEADER$ dnl +dnl This is a C test to see if 128-bit __atomic_compare_exchange_n() +dnl actually works (e.g., it compiles and links successfully on +dnl ARM64+clang, but returns incorrect answers as of August 2018). +AC_DEFUN([OPAL_ATOMIC_COMPARE_EXCHANGE_N_TEST_SOURCE],[[ +#include +#include +#include -AC_DEFUN([OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128], [ +typedef union { + uint64_t fake@<:@2@:>@; + __int128 real; +} ompi128; + +static void test1(void) +{ + // As of Aug 2018, we could not figure out a way to assign 128-bit + // constants -- the compilers would not accept it. So use a fake + // union to assign 2 uin64_t's to make a single __int128. + ompi128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + ompi128 expected = { .fake = { 0x11EEDDCCBBAA0099, 0x88776655443322FF }}; + ompi128 desired = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = __atomic_compare_exchange_n(&ptr.real, &expected.real, + desired.real, true, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + if ( !(r == false && ptr.real == expected.real)) { + exit(1); + } +} + +static void test2(void) +{ + ompi128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + ompi128 expected = ptr; + ompi128 desired = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = __atomic_compare_exchange_n(&ptr.real, &expected.real, + desired.real, true, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + if (!(r == true && ptr.real == desired.real)) { + exit(2); + } +} + +int main(int argc, char** argv) +{ + test1(); + test2(); + return 0; +} +]]) + +dnl ------------------------------------------------------------------ + +dnl This is a C test to see if 128-bit __sync_bool_compare_and_swap() +dnl actually works (e.g., it compiles and links successfully on +dnl ARM64+clang, but returns incorrect answers as of August 2018). +AC_DEFUN([OPAL_SYNC_BOOL_COMPARE_AND_SWAP_TEST_SOURCE],[[ +#include +#include +#include - OPAL_VAR_SCOPE_PUSH([sync_bool_compare_and_swap_128_result CFLAGS_save]) +typedef union { + uint64_t fake@<:@2@:>@; + __int128 real; +} ompi128; - AC_ARG_ENABLE([cross-cmpset128],[AC_HELP_STRING([--enable-cross-cmpset128], - [enable the use of the __sync builtin atomic compare-and-swap 128 when cross compiling])]) +static void test1(void) +{ + // As of Aug 2018, we could not figure out a way to assign 128-bit + // constants -- the compilers would not accept it. So use a fake + // union to assign 2 uin64_t's to make a single __int128. + ompi128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + ompi128 oldval = { .fake = { 0x11EEDDCCBBAA0099, 0x88776655443322FF }}; + ompi128 newval = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = __sync_bool_compare_and_swap(&ptr.real, oldval.real, newval.real); + if (!(r == false && ptr.real != newval.real)) { + exit(1); + } +} - sync_bool_compare_and_swap_128_result=0 +static void test2(void) +{ + ompi128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + ompi128 oldval = ptr; + ompi128 newval = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = __sync_bool_compare_and_swap(&ptr.real, oldval.real, newval.real); + if (!(r == true && ptr.real == newval.real)) { + exit(2); + } +} - if test ! "$enable_cross_cmpset128" = "yes" ; then - AC_MSG_CHECKING([for processor support of __sync builtin atomic compare-and-swap on 128-bit values]) +int main(int argc, char** argv) +{ + test1(); + test2(); + return 0; +} +]]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);])], - [AC_MSG_RESULT([yes]) - sync_bool_compare_and_swap_128_result=1], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([no (cross compiling)])]) +dnl ------------------------------------------------------------------ - if test $sync_bool_compare_and_swap_128_result = 0 ; then - CFLAGS_save=$CFLAGS - CFLAGS="$CFLAGS -mcx16" +dnl +dnl Check to see if a specific function is linkable. +dnl +dnl Check with: +dnl 1. No compiler/linker flags. +dnl 2. CFLAGS += -mcx16 +dnl 3. LIBS += -latomic +dnl 4. Finally, if it links ok with any of #1, #2, or #3, actually try +dnl to run the test code (if we're not cross-compiling) and verify +dnl that it actually gives us the correct result. +dnl +dnl Note that we unfortunately can't use AC SEARCH_LIBS because its +dnl check incorrectly fails (because these functions are special compiler +dnl intrinsics -- SEARCH_LIBS tries with "check FUNC()", which the +dnl compiler complains doesn't match the internal prototype). So we have +dnl to use our own LINK_IFELSE tests. Indeed, since these functions are +dnl so special, we actually need a valid source code that calls the +dnl functions with correct arguments, etc. It's not enough, for example, +dnl to do the usual "try to set a function pointer to the symbol" trick to +dnl determine if these functions are available, because the compiler may +dnl not implement these as actual symbols. So just try to link a real +dnl test code. +dnl +dnl $1: function name to print +dnl $2: program to test +dnl $3: action if any of 1, 2, or 3 succeeds +dnl #4: action if all of 1, 2, and 3 fail +dnl +AC_DEFUN([OPAL_ASM_CHECK_ATOMIC_FUNC],[ + OPAL_VAR_SCOPE_PUSH([opal_asm_check_func_happy opal_asm_check_func_CFLAGS_save opal_asm_check_func_LIBS_save]) + + opal_asm_check_func_CFLAGS_save=$CFLAGS + opal_asm_check_func_LIBS_save=$LIBS + + dnl Check with no compiler/linker flags + AC_MSG_CHECKING([for $1]) + AC_LINK_IFELSE([$2], + [opal_asm_check_func_happy=1 + AC_MSG_RESULT([yes])], + [opal_asm_check_func_happy=0 + AC_MSG_RESULT([no])]) + + dnl If that didn't work, try again with CFLAGS+=mcx16 + AS_IF([test $opal_asm_check_func_happy -eq 0], + [AC_MSG_CHECKING([for $1 with -mcx16]) + CFLAGS="$CFLAGS -mcx16" + AC_LINK_IFELSE([$2], + [opal_asm_check_func_happy=1 + AC_MSG_RESULT([yes])], + [opal_asm_check_func_happy=0 + CFLAGS=$opal_asm_check_func_CFLAGS_save + AC_MSG_RESULT([no])]) + ]) + + dnl If that didn't work, try again with LIBS+=-latomic + AS_IF([test $opal_asm_check_func_happy -eq 0], + [AC_MSG_CHECKING([for $1 with -latomic]) + LIBS="$LIBS -latomic" + AC_LINK_IFELSE([$2], + [opal_asm_check_func_happy=1 + AC_MSG_RESULT([yes])], + [opal_asm_check_func_happy=0 + LIBS=$opal_asm_check_func_LIBS_save + AC_MSG_RESULT([no])]) + ]) + + dnl If we have it, try it and make sure it gives a correct result. + dnl As of Aug 2018, we know that it links but does *not* work on clang + dnl 6 on ARM64. + AS_IF([test $opal_asm_check_func_happy -eq 1], + [AC_MSG_CHECKING([if $1() gives correct results]) + AC_RUN_IFELSE([$2], + [AC_MSG_RESULT([yes])], + [opal_asm_check_func_happy=0 + AC_MSG_RESULT([no])], + [AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])]) + ]) - AC_MSG_CHECKING([for __sync builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);])], - [AC_MSG_RESULT([yes]) - sync_bool_compare_and_swap_128_result=1 - CFLAGS_save="$CFLAGS"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([no (cross compiling)])]) + dnl If we were unsuccessful, restore CFLAGS/LIBS + AS_IF([test $opal_asm_check_func_happy -eq 0], + [CFLAGS=$opal_asm_check_func_CFLAGS_save + LIBS=$opal_asm_check_func_LIBS_save]) - CFLAGS=$CFLAGS_save - fi - else - AC_MSG_CHECKING([for compiler support of __sync builtin atomic compare-and-swap on 128-bit values]) + dnl Run the user actions + AS_IF([test $opal_asm_check_func_happy -eq 1], [$3], [$4]) - # Check if the compiler supports the __sync builtin - AC_TRY_LINK([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);], - [AC_MSG_RESULT([yes]) - sync_bool_compare_and_swap_128_result=1], - [AC_MSG_RESULT([no])]) + OPAL_VAR_SCOPE_POP +]) - if test $sync_bool_compare_and_swap_128_result = 0 ; then - CFLAGS_save=$CFLAGS - CFLAGS="$CFLAGS -mcx16" +dnl ------------------------------------------------------------------ - AC_MSG_CHECKING([for __sync builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) - AC_TRY_LINK([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);], - [AC_MSG_RESULT([yes]) - sync_bool_compare_and_swap_128_result=1 - CFLAGS_save="$CFLAGS"], - [AC_MSG_RESULT([no])]) +AC_DEFUN([OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128], [ + OPAL_VAR_SCOPE_PUSH([sync_bool_compare_and_swap_128_result]) - CFLAGS=$CFLAGS_save - fi - fi + # Do we have __sync_bool_compare_and_swap? + # Use a special macro because we need to check with a few different + # CFLAGS/LIBS. + OPAL_ASM_CHECK_ATOMIC_FUNC([__sync_bool_compare_and_swap], + [AC_LANG_SOURCE(OPAL_SYNC_BOOL_COMPARE_AND_SWAP_TEST_SOURCE)], + [sync_bool_compare_and_swap_128_result=1], + [sync_bool_compare_and_swap_128_result=0]) - AC_DEFINE_UNQUOTED([OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128], [$sync_bool_compare_and_swap_128_result], - [Whether the __sync builtin atomic compare and swap supports 128-bit values]) + AC_DEFINE_UNQUOTED([OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128], + [$sync_bool_compare_and_swap_128_result], + [Whether the __sync builtin atomic compare and swap supports 128-bit values]) OPAL_VAR_SCOPE_POP ]) @@ -111,7 +257,7 @@ __sync_add_and_fetch(&tmp, 1);], opal_asm_sync_have_64bit=0]) AC_DEFINE_UNQUOTED([OPAL_ASM_SYNC_HAVE_64BIT],[$opal_asm_sync_have_64bit], - [Whether 64-bit is supported by the __sync builtin atomics]) + [Whether 64-bit is supported by the __sync builtin atomics]) # Check for 128-bit support OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128 @@ -119,73 +265,45 @@ __sync_add_and_fetch(&tmp, 1);], AC_DEFUN([OPAL_CHECK_GCC_BUILTIN_CSWAP_INT128], [ - - OPAL_VAR_SCOPE_PUSH([atomic_compare_exchange_n_128_result CFLAGS_save]) - - AC_ARG_ENABLE([cross-cmpset128],[AC_HELP_STRING([--enable-cross-cmpset128], - [enable the use of the __sync builtin atomic compare-and-swap 128 when cross compiling])]) - - atomic_compare_exchange_n_128_result=0 - - if test ! "$enable_cross_cmpset128" = "yes" ; then - AC_MSG_CHECKING([for processor support of __atomic builtin atomic compare-and-swap on 128-bit values]) - - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], - [AC_MSG_RESULT([yes]) - atomic_compare_exchange_n_128_result=1], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([no (cross compiling)])]) - - if test $atomic_compare_exchange_n_128_result = 0 ; then - CFLAGS_save=$CFLAGS - CFLAGS="$CFLAGS -mcx16" - - AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], - [AC_MSG_RESULT([yes]) - atomic_compare_exchange_n_128_result=1 - CFLAGS_save="$CFLAGS"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([no (cross compiling)])]) - - CFLAGS=$CFLAGS_save - fi - - if test $atomic_compare_exchange_n_128_result = 1 ; then - AC_MSG_CHECKING([if __int128 atomic compare-and-swap is always lock-free]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [if (!__atomic_always_lock_free(16, 0)) { return 1; }])], + OPAL_VAR_SCOPE_PUSH([atomic_compare_exchange_n_128_result atomic_compare_exchange_n_128_CFLAGS_save atomic_compare_exchange_n_128_LIBS_save]) + + atomic_compare_exchange_n_128_CFLAGS_save=$CFLAGS + atomic_compare_exchange_n_128_LIBS_save=$LIBS + + # Do we have __sync_bool_compare_and_swap? + # Use a special macro because we need to check with a few different + # CFLAGS/LIBS. + OPAL_ASM_CHECK_ATOMIC_FUNC([__atomic_compare_exchange_n], + [AC_LANG_SOURCE(OPAL_ATOMIC_COMPARE_EXCHANGE_N_TEST_SOURCE)], + [atomic_compare_exchange_n_128_result=1], + [atomic_compare_exchange_n_128_result=0]) + + # If we have it and it works, check to make sure it is always lock + # free. + AS_IF([test $atomic_compare_exchange_n_128_result -eq 1], + [AC_MSG_CHECKING([if __int128 atomic compare-and-swap is always lock-free]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [if (!__atomic_always_lock_free(16, 0)) { return 1; }])], [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128 - atomic_compare_exchange_n_128_result=0], - [AC_MSG_RESULT([no (cross compiling)])]) - fi - else - AC_MSG_CHECKING([for compiler support of __atomic builtin atomic compare-and-swap on 128-bit values]) - - # Check if the compiler supports the __atomic builtin - AC_TRY_LINK([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);], - [AC_MSG_RESULT([yes]) - atomic_compare_exchange_n_128_result=1], - [AC_MSG_RESULT([no])]) - - if test $atomic_compare_exchange_n_128_result = 0 ; then - CFLAGS_save=$CFLAGS - CFLAGS="$CFLAGS -mcx16" - - AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) - AC_TRY_LINK([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);], - [AC_MSG_RESULT([yes]) - atomic_compare_exchange_n_128_result=1 - CFLAGS_save="$CFLAGS"], - [AC_MSG_RESULT([no])]) - - CFLAGS=$CFLAGS_save - fi - fi - - AC_DEFINE_UNQUOTED([OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128], [$atomic_compare_exchange_n_128_result], - [Whether the __atomic builtin atomic compare and swap is lock-free on 128-bit values]) + [atomic_compare_exchange_n_128_result=0 + # If this test fails, need to reset CFLAGS/LIBS (the + # above tests atomically set CFLAGS/LIBS or not; this + # test is running after the fact, so we have to undo + # the side-effects of setting CFLAGS/LIBS if the above + # tests passed). + CFLAGS=$atomic_compare_exchange_n_128_CFLAGS_save + LIBS=$atomic_compare_exchange_n_128_LIBS_save + AC_MSG_RESULT([no])], + [AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])]) + ]) + + AC_DEFINE_UNQUOTED([OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128], + [$atomic_compare_exchange_n_128_result], + [Whether the __atomic builtin atomic compare swap is both supported and lock-free on 128-bit values]) + + dnl If we could not find decent support for 128-bits __atomic let's + dnl try the GCC _sync + AS_IF([test $atomic_compare_exchange_n_128_result -eq 0], + [OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128]) OPAL_VAR_SCOPE_POP ]) @@ -726,7 +844,7 @@ AC_DEFUN([OPAL_CHECK_SPARCV8PLUS],[ AC_MSG_CHECKING([if have Sparc v8+/v9 support]) sparc_result=0 OPAL_TRY_ASSEMBLE([$opal_cv_asm_text - casa [%o0] 0x80, %o1, %o2], + casa [%o0] 0x80, %o1, %o2], [sparc_result=1], [sparc_result=0]) if test "$sparc_result" = "1" ; then @@ -745,35 +863,8 @@ dnl dnl OPAL_CHECK_CMPXCHG16B dnl dnl ################################################################# -AC_DEFUN([OPAL_CHECK_CMPXCHG16B],[ - OPAL_VAR_SCOPE_PUSH([cmpxchg16b_result]) - - AC_ARG_ENABLE([cross-cmpxchg16b],[AC_HELP_STRING([--enable-cross-cmpxchg16b], - [enable the use of the cmpxchg16b instruction when cross compiling])]) - - if test ! "$enable_cross_cmpxchg16b" = "yes" ; then - AC_MSG_CHECKING([if processor supports x86_64 16-byte compare-and-exchange]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([[unsigned char tmp[16];]],[[ - __asm__ __volatile__ ("lock cmpxchg16b (%%rsi)" : : "S" (tmp) : "memory", "cc");]])], - [AC_MSG_RESULT([yes]) - cmpxchg16b_result=1], - [AC_MSG_RESULT([no]) - cmpxchg16b_result=0], - [AC_MSG_RESULT([no (cross-compiling)]) - cmpxchg16b_result=0]) - else - AC_MSG_CHECKING([if assembler supports x86_64 16-byte compare-and-exchange]) - - OPAL_TRY_ASSEMBLE([$opal_cv_asm_text - cmpxchg16b 0], - [AC_MSG_RESULT([yes]) - cmpxchg16b_result=1], - [AC_MSG_RESULT([no]) - cmpxchg16b_result=0]) - fi - if test "$cmpxchg16b_result" = 1; then - AC_MSG_CHECKING([if compiler correctly handles volatile 128bits]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([#include +AC_DEFUN([OPAL_CMPXCHG16B_TEST_SOURCE],[[ +#include #include union opal_counted_pointer_t { @@ -787,8 +878,10 @@ union opal_counted_pointer_t { int128_t value; #endif }; -typedef union opal_counted_pointer_t opal_counted_pointer_t;], - [volatile opal_counted_pointer_t a; +typedef union opal_counted_pointer_t opal_counted_pointer_t; + +int main(int argc, char* argv) { + volatile opal_counted_pointer_t a; opal_counted_pointer_t b; a.data.counter = 0; @@ -813,12 +906,28 @@ typedef union opal_counted_pointer_t opal_counted_pointer_t;], return (a.value != b.value); #else return 0; -#endif])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - cmpxchg16b_result=0], - [AC_MSG_RESULT([untested, assuming ok])]) - fi +#endif +} +]]) + +AC_DEFUN([OPAL_CHECK_CMPXCHG16B],[ + OPAL_VAR_SCOPE_PUSH([cmpxchg16b_result]) + + OPAL_ASM_CHECK_ATOMIC_FUNC([cmpxchg16b], + [AC_LANG_PROGRAM([[unsigned char tmp[16];]], + [[__asm__ __volatile__ ("lock cmpxchg16b (%%rsi)" : : "S" (tmp) : "memory", "cc");]])], + [cmpxchg16b_result=1], + [cmpxchg16b_result=0]) + # If we have it, make sure it works. + AS_IF([test $cmpxchg16b_result -eq 1], + [AC_MSG_CHECKING([if cmpxchg16b_result works]) + AC_RUN_IFELSE([AC_LANG_SOURCE(OPAL_CMPXCHG16B_TEST_SOURCE)], + [AC_MSG_RESULT([yes])], + [cmpxchg16b_result=0 + AC_MSG_RESULT([no])], + [AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])]) + ]) + AC_DEFINE_UNQUOTED([OPAL_HAVE_CMPXCHG16B], [$cmpxchg16b_result], [Whether the processor supports the cmpxchg16b instruction]) OPAL_VAR_SCOPE_POP @@ -884,7 +993,7 @@ return ret; if test "$asm_result" = "yes" ; then OPAL_C_GCC_INLINE_ASSEMBLY=1 - opal_cv_asm_inline_supported="yes" + opal_cv_asm_inline_supported="yes" else OPAL_C_GCC_INLINE_ASSEMBLY=0 fi @@ -960,7 +1069,7 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], [AC_MSG_ERROR([No atomic primitives available for $host])]) ;; - aarch64*) + aarch64*) opal_cv_asm_arch="ARM64" OPAL_ASM_SUPPORT_64BIT=1 OPAL_ASM_ARM_VERSION=8 @@ -1068,11 +1177,11 @@ AC_MSG_ERROR([Can not continue.]) ;; esac - if test "x$OPAL_ASM_SUPPORT_64BIT" = "x1" && test "$opal_cv_asm_builtin" = "BUILTIN_SYNC" && - test "$opal_asm_sync_have_64bit" = "0" ; then - # __sync builtins exist but do not implement 64-bit support. Fall back on inline asm. - opal_cv_asm_builtin="BUILTIN_NO" - fi + if test "x$OPAL_ASM_SUPPORT_64BIT" = "x1" && test "$opal_cv_asm_builtin" = "BUILTIN_SYNC" && + test "$opal_asm_sync_have_64bit" = "0" ; then + # __sync builtins exist but do not implement 64-bit support. Fall back on inline asm. + opal_cv_asm_builtin="BUILTIN_NO" + fi if test "$opal_cv_asm_builtin" = "BUILTIN_SYNC" || test "$opal_cv_asm_builtin" = "BUILTIN_GCC" ; then AC_DEFINE([OPAL_C_GCC_INLINE_ASSEMBLY], [1], @@ -1095,7 +1204,7 @@ AC_MSG_ERROR([Can not continue.]) ;; esac - opal_cv_asm_inline_supported="no" + opal_cv_asm_inline_supported="no" # now that we know our architecture, try to inline assemble OPAL_CHECK_INLINE_C_GCC([$OPAL_GCC_INLINE_ASSIGN]) diff --git a/config/opal_config_subdir_args.m4 b/config/opal_config_subdir_args.m4 index 3b7a35580f9..0d5f8febabc 100644 --- a/config/opal_config_subdir_args.m4 +++ b/config/opal_config_subdir_args.m4 @@ -10,7 +10,7 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2018 Intel, Inc. All rights reserved. dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. dnl $COPYRIGHT$ dnl @@ -60,6 +60,10 @@ do ;; -with-platform=* | --with-platform=*) ;; + --with*=internal) + ;; + --with*=external) + ;; *) case $subdir_arg in *\'*) subdir_arg=`echo "$subdir_arg" | sed "s/'/'\\\\\\\\''/g"` ;; diff --git a/config/opal_functions.m4 b/config/opal_functions.m4 index 34c965df31f..7a85dbf8766 100644 --- a/config/opal_functions.m4 +++ b/config/opal_functions.m4 @@ -3,7 +3,7 @@ dnl dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana dnl University Research and Technology dnl Corporation. All rights reserved. -dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl Copyright (c) 2004-2018 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -506,7 +506,7 @@ dnl ####################################################################### dnl ####################################################################### dnl ####################################################################### -# Declare some variables; use OPAL_VAR_SCOPE_END to ensure that they +# Declare some variables; use OPAL_VAR_SCOPE_POP to ensure that they # are cleaned up / undefined. AC_DEFUN([OPAL_VAR_SCOPE_PUSH],[ @@ -524,7 +524,7 @@ AC_DEFUN([OPAL_VAR_SCOPE_PUSH],[ eval $opal_str if test "x$opal_str" != "x"; then - AC_MSG_WARN([Found configure shell variable clash!]) + AC_MSG_WARN([Found configure shell variable clash at line $LINENO!]) AC_MSG_WARN([[OPAL_VAR_SCOPE_PUSH] called on "$opal_var",]) AC_MSG_WARN([but it is already defined with value "$opal_str"]) AC_MSG_WARN([This usually indicates an error in configure.]) diff --git a/config/opal_setup_cc.m4 b/config/opal_setup_cc.m4 index e6cb81c1387..1382e218ce5 100644 --- a/config/opal_setup_cc.m4 +++ b/config/opal_setup_cc.m4 @@ -14,7 +14,7 @@ dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights dnl reserved. -dnl Copyright (c) 2015-2018 Research Organization for Information Science +dnl Copyright (c) 2015-2019 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl @@ -59,7 +59,11 @@ AC_DEFUN([OPAL_PROG_CC_C11_HELPER],[ OPAL_CC_HELPER([if $CC $1 supports C11 _Static_assert], [opal_prog_cc_c11_helper__static_assert_available], [[#include ]],[[_Static_assert(sizeof(int64_t) == 8, "WTH");]]) - AS_IF([test $opal_prog_cc_c11_helper__Thread_local_available -eq 1 && test $opal_prog_cc_c11_helper_atomic_var_available -eq 1], + OPAL_CC_HELPER([if $CC $1 supports C11 atomic_fetch_xor_explicit], [opal_prog_cc_c11_helper_atomic_fetch_xor_explicit_available], + [[#include +#include ]],[[_Atomic uint32_t a; uint32_t b; atomic_fetch_xor_explicit(&a, b, memory_order_relaxed);]]) + + AS_IF([test $opal_prog_cc_c11_helper__Thread_local_available -eq 1 && test $opal_prog_cc_c11_helper_atomic_var_available -eq 1 && test $opal_prog_cc_c11_helper_atomic_fetch_xor_explicit_available -eq 1], [$2], [$3]) @@ -127,7 +131,7 @@ AC_DEFUN([OPAL_SETUP_CC],[ AC_REQUIRE([_OPAL_PROG_CC]) AC_REQUIRE([AM_PROG_CC_C_O]) - OPAL_VAR_SCOPE_PUSH([opal_prog_cc_c11_helper__Thread_local_available opal_prog_cc_c11_helper_atomic_var_available opal_prog_cc_c11_helper__Atomic_available opal_prog_cc_c11_helper__static_assert_available opal_prog_cc_c11_helper__Generic_available opal_prog_cc__thread_available]) + OPAL_VAR_SCOPE_PUSH([opal_prog_cc_c11_helper__Thread_local_available opal_prog_cc_c11_helper_atomic_var_available opal_prog_cc_c11_helper__Atomic_available opal_prog_cc_c11_helper__static_assert_available opal_prog_cc_c11_helper__Generic_available opal_prog_cc__thread_available opal_prog_cc_c11_helper_atomic_fetch_xor_explicit_available]) # AC_PROG_CC_C99 changes CC (instead of CFLAGS) so save CC (without c99 # flags) for use in our wrappers. diff --git a/config/orte_config_files.m4 b/config/orte_config_files.m4 index 82a7f7f8246..191d280131c 100644 --- a/config/orte_config_files.m4 +++ b/config/orte_config_files.m4 @@ -25,12 +25,8 @@ AC_DEFUN([ORTE_CONFIG_FILES],[ orte/tools/wrappers/Makefile orte/tools/wrappers/ortecc-wrapper-data.txt orte/tools/wrappers/orte.pc - orte/tools/orte-ps/Makefile orte/tools/orte-clean/Makefile - orte/tools/orte-top/Makefile orte/tools/orte-info/Makefile orte/tools/orte-server/Makefile - orte/tools/orte-dvm/Makefile - orte/tools/ompi-prun/Makefile ]) ]) diff --git a/config/orte_setup_debugger_flags.m4 b/config/orte_setup_debugger_flags.m4 index 39ac77defef..5bd970bf7d8 100644 --- a/config/orte_setup_debugger_flags.m4 +++ b/config/orte_setup_debugger_flags.m4 @@ -10,7 +10,7 @@ dnl Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2006-2019 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2006-2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights dnl reserved. @@ -24,6 +24,28 @@ dnl dnl $HEADER$ dnl +dnl Check to see if specific CFLAGS work +dnl $1: compiler flags to check +dnl $2: Action if the flags work +dnl $3: Action if the flags do not work +AC_DEFUN([_ORTE_SETUP_DEBUGGER_FLAGS_TRY_CFLAGS],[ + OPAL_VAR_SCOPE_PUSH([ORTE_SETUP_DEBUGGER_FLAGS_CFLAGS_save]) + + ORTE_SETUP_DEBUGGER_FLAGS_CFLAGS_save=$CFLAGS + AC_MSG_CHECKING([if $1 compiler flag works]) + CFLAGS="$CFLAGS $1" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[int i = 3;])], + [ORTE_SETUP_DEBUGGER_FLAGS_HAPPY=yes], + [ORTE_SETUP_DEBUGGER_FLAGS_HAPPY=no]) + AC_MSG_RESULT([$ORTE_SETUP_DEBUGGER_FLAGS_HAPPY]) + CFLAGS=$ORTE_SETUP_DEBUGGER_FLAGS_CFLAGS_save + + OPAL_VAR_SCOPE_POP + + AS_IF([test $ORTE_SETUP_DEBUGGER_FLAGS_HAPPY = yes], + [$2], [$3]) +]) + AC_DEFUN([ORTE_SETUP_DEBUGGER_FLAGS],[ # # Do a final process of the CFLAGS to make a WITHOUT_OPTFLAGS @@ -53,4 +75,22 @@ AC_DEFUN([ORTE_SETUP_DEBUGGER_FLAGS],[ AC_SUBST(CFLAGS_WITHOUT_OPTFLAGS) AC_SUBST(DEBUGGER_CFLAGS) + + # Check for compiler specific flag to add in unwind information. + # This is needed when attaching using MPIR to unwind back to the + # user's main function. Certain optimisations can prevent GDB from + # producing a stack when explicit unwind information is unavailable. + # This is implied by -g, but we want to save space and don't need + # full debug symbols. + _ORTE_SETUP_DEBUGGER_FLAGS_TRY_CFLAGS([-fasynchronous-unwind-tables], + [MPIR_UNWIND_CFLAGS="-fasynchronous-unwind-tables"], + [_ORTE_SETUP_DEBUGGER_FLAGS_TRY_CFLAGS([-Meh_frame -Mframe], + [MPIR_UNWIND_CFLAGS="-Meh_frame -Mframe"], + [MPIR_UNWIND_CFLAGS=-g]) + ]) + + AC_MSG_CHECKING([for final compiler unwind flags]) + AC_MSG_RESULT([$MPIR_UNWIND_CFLAGS]) + + AC_SUBST(MPIR_UNWIND_CFLAGS) ]) diff --git a/configure.ac b/configure.ac index 92d661c305f..e696df3f7c0 100644 --- a/configure.ac +++ b/configure.ac @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2006-2019 Cisco Systems, Inc. All rights reserved # Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights # reserved. @@ -19,7 +19,7 @@ # Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2014-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016-2017 IBM Corporation. All rights reserved. @@ -154,7 +154,6 @@ AC_SUBST(libopen_pal_so_version) # transparently by adding some intelligence in autogen.pl # and/or opal_mca.m4, but I don't have the cycles to do this # right now. -AC_SUBST(libmca_opal_common_ofi_so_version) AC_SUBST(libmca_opal_common_cuda_so_version) AC_SUBST(libmca_opal_common_sm_so_version) AC_SUBST(libmca_opal_common_ugni_so_version) @@ -261,6 +260,7 @@ m4_ifdef([project_oshmem], OPAL_CONFIGURE_OPTIONS OPAL_CHECK_OS_FLAVORS OPAL_CHECK_CUDA +OPAL_CHECK_PMI OPAL_CHECK_PMIX m4_ifdef([project_orte], [ORTE_CONFIGURE_OPTIONS]) m4_ifdef([project_ompi], [OMPI_CONFIGURE_OPTIONS]) @@ -858,7 +858,7 @@ OPAL_SEARCH_LIBS_CORE([ceil], [m]) # -lrt might be needed for clock_gettime OPAL_SEARCH_LIBS_CORE([clock_gettime], [rt]) -AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog vsyslog regcmp regexec regfree _NSGetEnviron socketpair strncpy_s usleep mkfifo dbopen dbm_open statfs statvfs setpgid setenv __malloc_initialize_hook]) +AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog vsyslog regcmp regexec regfree _NSGetEnviron socketpair strncpy_s usleep mkfifo dbopen dbm_open statfs statvfs setpgid setenv __malloc_initialize_hook __clear_cache]) # Sanity check: ensure that we got at least one of statfs or statvfs. if test $ac_cv_func_statfs = no && test $ac_cv_func_statvfs = no; then diff --git a/contrib/Makefile.am b/contrib/Makefile.am index bf78f975ad5..029fea39def 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -88,12 +88,8 @@ EXTRA_DIST = \ platform/lanl/darwin/mic-common \ platform/lanl/darwin/debug \ platform/lanl/darwin/debug.conf \ - platform/lanl/darwin/debug-mic \ - platform/lanl/darwin/debug-mic.conf \ platform/lanl/darwin/optimized \ platform/lanl/darwin/optimized.conf \ - platform/lanl/darwin/optimized-mic \ - platform/lanl/darwin/optimized-mic.conf \ platform/snl/portals4-m5 \ platform/snl/portals4-orte \ platform/ibm/debug-ppc32-gcc \ diff --git a/contrib/dist/linux/README b/contrib/dist/linux/README index 045b3734b7f..f9a3aa8841c 100644 --- a/contrib/dist/linux/README +++ b/contrib/dist/linux/README @@ -86,6 +86,9 @@ Please, do NOT set the same settings with parameters and config vars. file from the tarball specified on the command line. By default, the script will look for the specfile in the current directory. +-R directory + Specifies the top level RPM build direcotry. + -h Prints script usage information. diff --git a/contrib/dist/linux/buildrpm.sh b/contrib/dist/linux/buildrpm.sh index 84a9045b39c..3b609ee574c 100755 --- a/contrib/dist/linux/buildrpm.sh +++ b/contrib/dist/linux/buildrpm.sh @@ -58,6 +58,9 @@ # file from the tarball specified on the command line. By default, # the script will look for the specfile in the current directory. # +# -R directory +# Specifies the top level RPM build direcotry. +# # -h # Prints script usage information. # @@ -107,7 +110,7 @@ orig_param="$@" # # usage information # -usage="Usage: $0 [-b][-o][-m][-d][-u][-s][-h] [-n name][-f lf_location][-t tm_location] tarball +usage="Usage: $0 [-b][-o][-m][-d][-u][-s][-h] [-n name][-f lf_location][-t tm_location][-R directory] tarball -b build all-in-one binary RPM only (required for all other flags to work) @@ -146,6 +149,9 @@ usage="Usage: $0 [-b][-o][-m][-d][-u][-s][-h] [-n name][-f lf_location][-t tm_lo -r parameter add custom RPM build parameter + -R directory + Specifies the top level RPM build direcotry. + -h print this message and exit tarball path to Open MPI source tarball @@ -155,8 +161,9 @@ usage="Usage: $0 [-b][-o][-m][-d][-u][-s][-h] [-n name][-f lf_location][-t tm_lo # parse args # libfabric_path="" +rpmtopdir= -while getopts bn:omif:t:dc:r:sh flag; do +while getopts bn:omif:t:dc:r:sR:h flag; do case "$flag" in b) build_srpm="no" build_single="yes" @@ -180,6 +187,8 @@ while getopts bn:omif:t:dc:r:sh flag; do ;; r) configure_options="$rpmbuild_options $OPTARG" ;; + R) rpmtopdir="$OPTARG" + ;; s) unpack_spec="1" ;; h) echo "$usage" 1>&2 @@ -267,25 +276,30 @@ fi # Find where the top RPM-building directory is # -rpmtopdir= -file=~/.rpmmacros -if test -r $file; then - rpmtopdir=${rpmtopdir:-"`grep %_topdir $file | awk '{ print $2 }'`"} +# if the user did not specify an $rpmtopdir, check for an .rpmmacros file. +if test "$rpmtopdir" == ""; then + file=~/.rpmmacros + if test -r $file; then + rpmtopdir=${rpmtopdir:-"`grep %_topdir $file | awk '{ print $2 }'`"} + fi fi + +# If needed, initialize the $rpmtopdir directory. If no $rpmtopdir was +# specified, try various system-level defaults. if test "$rpmtopdir" != ""; then - rpmbuild_options="$rpmbuild_options --define '_topdir $rpmtopdir'" + rpmbuild_options="$rpmbuild_options --define '_topdir $rpmtopdir'" if test ! -d "$rpmtopdir"; then - mkdir -p "$rpmtopdir" - mkdir -p "$rpmtopdir/BUILD" - mkdir -p "$rpmtopdir/RPMS" - mkdir -p "$rpmtopdir/RPMS/i386" - mkdir -p "$rpmtopdir/RPMS/i586" - mkdir -p "$rpmtopdir/RPMS/i686" - mkdir -p "$rpmtopdir/RPMS/noarch" - mkdir -p "$rpmtopdir/RPMS/athlon" - mkdir -p "$rpmtopdir/SOURCES" - mkdir -p "$rpmtopdir/SPECS" - mkdir -p "$rpmtopdir/SRPMS" + mkdir -p "$rpmtopdir" + mkdir -p "$rpmtopdir/BUILD" + mkdir -p "$rpmtopdir/RPMS" + mkdir -p "$rpmtopdir/RPMS/i386" + mkdir -p "$rpmtopdir/RPMS/i586" + mkdir -p "$rpmtopdir/RPMS/i686" + mkdir -p "$rpmtopdir/RPMS/noarch" + mkdir -p "$rpmtopdir/RPMS/athlon" + mkdir -p "$rpmtopdir/SOURCES" + mkdir -p "$rpmtopdir/SPECS" + mkdir -p "$rpmtopdir/SRPMS" fi need_root=0 elif test -d /usr/src/RPM; then diff --git a/contrib/dist/linux/openmpi.spec b/contrib/dist/linux/openmpi.spec index 2a80af296b8..01a7b1b4d0f 100644 --- a/contrib/dist/linux/openmpi.spec +++ b/contrib/dist/linux/openmpi.spec @@ -600,18 +600,18 @@ grep -v -f devel.files remaining.files > docs.files # runtime sub package %if !%{sysconfdir_in_prefix} -grep -v %{_sysconfdir} runtime.files > tmp.files +grep -v %{_sysconfdir} runtime.files > tmp.files | /bin/true mv tmp.files runtime.files %endif -grep -v %{_pkgdatadir} runtime.files > tmp.files +grep -v %{_pkgdatadir} runtime.files > tmp.files | /bin/true mv tmp.files runtime.files # devel sub package -grep -v %{_includedir} devel.files > tmp.files +grep -v %{_includedir} devel.files > tmp.files | /bin/true mv tmp.files devel.files # docs sub package -grep -v %{_mandir} docs.files > tmp.files +grep -v %{_mandir} docs.files > tmp.files | /bin/true mv tmp.files docs.files %endif diff --git a/contrib/dist/make-authors.pl b/contrib/dist/make-authors.pl index 92df0a4b230..fc9f7c161d5 100755 --- a/contrib/dist/make-authors.pl +++ b/contrib/dist/make-authors.pl @@ -29,7 +29,7 @@ # directory and make life easier. chdir($srcdir); -if (! -d ".git") { +if (! -e ".git") { if ($skip_ok == 0) { print STDERR "I don't seem to be in a git repo :(\n"; exit(1); diff --git a/contrib/platform/lanl/darwin/debug-mic.conf b/contrib/platform/lanl/darwin/debug-mic.conf deleted file mode 100644 index 20e183856f9..00000000000 --- a/contrib/platform/lanl/darwin/debug-mic.conf +++ /dev/null @@ -1,100 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This is the default system-wide MCA parameters defaults file. -# Specifically, the MCA parameter "mca_param_files" defaults to a -# value of -# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" -# (this file is the latter of the two). So if the default value of -# mca_param_files is not changed, this file is used to set system-wide -# MCA parameters. This file can therefore be used to set system-wide -# default MCA parameters for all users. Of course, users can override -# these values if they want, but this file is an excellent location -# for setting system-specific MCA parameters for those users who don't -# know / care enough to investigate the proper values for them. - -# Note that this file is only applicable where it is visible (in a -# filesystem sense). Specifically, MPI processes each read this file -# during their startup to determine what default values for MCA -# parameters should be used. mpirun does not bundle up the values in -# this file from the node where it was run and send them to all nodes; -# the default value decisions are effectively distributed. Hence, -# these values are only applicable on nodes that "see" this file. If -# $sysconf is a directory on a local disk, it is likely that changes -# to this file will need to be propagated to other nodes. If $sysconf -# is a directory that is shared via a networked filesystem, changes to -# this file will be visible to all nodes that share this $sysconf. - -# The format is straightforward: one per line, mca_param_name = -# rvalue. Quoting is ignored (so if you use quotes or escape -# characters, they'll be included as part of the value). For example: - -# Disable run-time MPI parameter checking -# mpi_param_check = 0 - -# Note that the value "~/" will be expanded to the current user's home -# directory. For example: - -# Change component loading path -# component_path = /usr/local/lib/openmpi:~/my_openmpi_components - -# See "ompi_info --param all all" for a full listing of Open MPI MCA -# parameters available and their default values. -# - -# Basic behavior to smooth startup -mca_base_component_show_load_errors = 0 -opal_set_max_sys_limits = 1 -orte_report_launch_progress = 1 - -# Define timeout for daemons to report back during launch -orte_startup_timeout = 10000 - -## Protect the shared file systems -orte_no_session_dirs = /panfs,/scratch,/users,/usr/projects -orte_tmpdir_base = /tmp - -## Require an allocation to run - protects the frontend -## from inadvertent job executions -orte_allocation_required = 1 - -## Add the interface for out-of-band communication -## and set it up -oob_tcp_if_include=mic0 -oob_tcp_peer_retries = 1000 -oob_tcp_sndbuf = 32768 -oob_tcp_rcvbuf = 32768 - -## Define the MPI interconnects -btl = sm,scif,openib,self - -## Setup OpenIB - just in case -btl_openib_want_fork_support = 0 -btl_openib_receive_queues = S,4096,1024:S,12288,512:S,65536,512 - -## Enable cpu affinity -hwloc_base_binding_policy = core - -## Setup MPI options -mpi_show_handle_leaks = 1 -mpi_warn_on_fork = 1 -#mpi_abort_print_stack = 1 - diff --git a/contrib/platform/lanl/darwin/debug.conf b/contrib/platform/lanl/darwin/debug.conf index 35b4fc9c579..a28026cc4ba 100644 --- a/contrib/platform/lanl/darwin/debug.conf +++ b/contrib/platform/lanl/darwin/debug.conf @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# Copyright (c) 2011-2018 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ # @@ -84,7 +84,7 @@ oob_tcp_sndbuf = 32768 oob_tcp_rcvbuf = 32768 ## Define the MPI interconnects -btl = sm,scif,openib,self +btl = sm,openib,self ## Setup OpenIB - just in case btl_openib_want_fork_support = 0 diff --git a/contrib/platform/lanl/darwin/optimized-mic.conf b/contrib/platform/lanl/darwin/optimized-mic.conf deleted file mode 100644 index c59f2c233cf..00000000000 --- a/contrib/platform/lanl/darwin/optimized-mic.conf +++ /dev/null @@ -1,100 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This is the default system-wide MCA parameters defaults file. -# Specifically, the MCA parameter "mca_param_files" defaults to a -# value of -# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" -# (this file is the latter of the two). So if the default value of -# mca_param_files is not changed, this file is used to set system-wide -# MCA parameters. This file can therefore be used to set system-wide -# default MCA parameters for all users. Of course, users can override -# these values if they want, but this file is an excellent location -# for setting system-specific MCA parameters for those users who don't -# know / care enough to investigate the proper values for them. - -# Note that this file is only applicable where it is visible (in a -# filesystem sense). Specifically, MPI processes each read this file -# during their startup to determine what default values for MCA -# parameters should be used. mpirun does not bundle up the values in -# this file from the node where it was run and send them to all nodes; -# the default value decisions are effectively distributed. Hence, -# these values are only applicable on nodes that "see" this file. If -# $sysconf is a directory on a local disk, it is likely that changes -# to this file will need to be propagated to other nodes. If $sysconf -# is a directory that is shared via a networked filesystem, changes to -# this file will be visible to all nodes that share this $sysconf. - -# The format is straightforward: one per line, mca_param_name = -# rvalue. Quoting is ignored (so if you use quotes or escape -# characters, they'll be included as part of the value). For example: - -# Disable run-time MPI parameter checking -# mpi_param_check = 0 - -# Note that the value "~/" will be expanded to the current user's home -# directory. For example: - -# Change component loading path -# component_path = /usr/local/lib/openmpi:~/my_openmpi_components - -# See "ompi_info --param all all" for a full listing of Open MPI MCA -# parameters available and their default values. -# - -# Basic behavior to smooth startup -mca_base_component_show_load_errors = 0 -opal_set_max_sys_limits = 1 -orte_report_launch_progress = 1 - -# Define timeout for daemons to report back during launch -orte_startup_timeout = 10000 - -## Protect the shared file systems -orte_no_session_dirs = /panfs,/scratch,/users,/usr/projects -orte_tmpdir_base = /tmp - -## Require an allocation to run - protects the frontend -## from inadvertent job executions -orte_allocation_required = 1 - -## Add the interface for out-of-band communication -## and set it up -oob_tcp_if_include = mic0 -oob_tcp_peer_retries = 1000 -oob_tcp_sndbuf = 32768 -oob_tcp_rcvbuf = 32768 - -## Define the MPI interconnects -btl = sm,scif,openib,self - -## Setup OpenIB - just in case -btl_openib_want_fork_support = 0 -btl_openib_receive_queues = S,4096,1024:S,12288,512:S,65536,512 - -## Enable cpu affinity -hwloc_base_binding_policy = core - -## Setup MPI options -mpi_show_handle_leaks = 0 -mpi_warn_on_fork = 1 -#mpi_abort_print_stack = 0 - diff --git a/contrib/platform/lanl/darwin/optimized.conf b/contrib/platform/lanl/darwin/optimized.conf index 6313a49a8dd..1012072a433 100644 --- a/contrib/platform/lanl/darwin/optimized.conf +++ b/contrib/platform/lanl/darwin/optimized.conf @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights +# Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ # @@ -84,7 +84,7 @@ oob_tcp_sndbuf = 32768 oob_tcp_rcvbuf = 32768 ## Define the MPI interconnects -btl = sm,scif,openib,self +btl = sm,openib,self ## Setup OpenIB - just in case btl_openib_want_fork_support = 0 diff --git a/contrib/platform/mellanox/optimized b/contrib/platform/mellanox/optimized index ae89528ce5f..f49a0576c64 100644 --- a/contrib/platform/mellanox/optimized +++ b/contrib/platform/mellanox/optimized @@ -1,7 +1,7 @@ -enable_mca_no_build=coll-ml +enable_mca_no_build=coll-ml,btl-uct enable_debug_symbols=yes enable_orterun_prefix_by_default=yes -with_verbs=yes +with_verbs=no with_devel_headers=yes enable_oshmem=yes enable_oshmem_fortran=yes @@ -23,26 +23,11 @@ if [ "$mellanox_autodetect" == "yes" ]; then with_ucx=$ucx_dir fi - mxm_dir=${mxm_dir:="$(pkg-config --variable=prefix mxm)"} - if [ -d $mxm_dir ]; then - with_mxm=$mxm_dir - fi - - fca_dir=${fca_dir:="$(pkg-config --variable=prefix fca)"} - if [ -d $fca_dir ]; then - with_fca=$fca_dir - fi - hcoll_dir=${hcoll_dir:="$(pkg-config --variable=prefix hcoll)"} if [ -d $hcoll_dir ]; then with_hcoll=$hcoll_dir fi - knem_dir=${knem_dir:="$(pkg-config --variable=prefix knem)"} - if [ -d $knem_dir ]; then - with_knem=$knem_dir - fi - slurm_dir=${slurm_dir:="/usr"} if [ -f $slurm_dir/include/slurm/slurm.h ]; then with_slurm=$slurm_dir diff --git a/contrib/platform/mellanox/optimized.conf b/contrib/platform/mellanox/optimized.conf index d4fe40d513c..048d85f3427 100644 --- a/contrib/platform/mellanox/optimized.conf +++ b/contrib/platform/mellanox/optimized.conf @@ -56,12 +56,10 @@ # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. -coll_fca_enable = 0 -scoll_fca_enable = 0 #rmaps_base_mapping_policy = dist:auto coll = ^ml hwloc_base_binding_policy = core -btl = vader,openib,self +btl = self # Basic behavior to smooth startup mca_base_component_show_load_errors = 0 orte_abort_timeout = 10 @@ -77,3 +75,13 @@ oob_tcp_sndbuf = 32768 oob_tcp_rcvbuf = 32768 opal_event_include=epoll + +opal_warn_on_missing_libcuda = 0 + +bml_r2_show_unreach_errors = 0 + +# alltoall algorithm selection settings for tuned coll mca +coll_tuned_alltoall_large_msg = 250000 +coll_tuned_alltoall_min_procs = 2048 +coll_tuned_alltoall_algorithm_max_requests = 8 + diff --git a/ompi/Makefile.am b/ompi/Makefile.am index cd5ff0f5281..5d1ce31ea88 100644 --- a/ompi/Makefile.am +++ b/ompi/Makefile.am @@ -15,7 +15,7 @@ # Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2015-2017 Intel, Inc. All rights reserved. -# Copyright (c) 2015-2017 Research Organization for Information Science +# Copyright (c) 2015-2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. # Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. @@ -93,6 +93,7 @@ SUBDIRS = \ $(OMPI_FORTRAN_USEMPI_DIR) \ mpi/fortran/mpiext-use-mpi \ mpi/fortran/use-mpi-f08/mod \ + mpi/fortran/use-mpi-f08/bindings \ $(OMPI_MPIEXT_USEMPIF08_DIRS) \ mpi/fortran/use-mpi-f08 \ mpi/fortran/mpiext-use-mpi-f08 \ @@ -124,6 +125,7 @@ DIST_SUBDIRS = \ mpi/fortran/mpiext-use-mpi \ mpi/fortran/use-mpi-f08 \ mpi/fortran/use-mpi-f08/mod \ + mpi/fortran/use-mpi-f08/bindings \ mpi/fortran/mpiext-use-mpi-f08 \ mpi/java \ $(OMPI_MPIEXT_ALL_SUBDIRS) \ diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 228abae7ab7..f0ad19e4f8d 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -1884,6 +1884,10 @@ int ompi_comm_enable(ompi_communicator_t *old_comm, { int ret = OMPI_SUCCESS; + /* set the rank information before calling nextcid */ + new_comm->c_local_group->grp_my_rank = new_rank; + new_comm->c_my_rank = new_rank; + /* Determine context id. It is identical to f_2_c_handle */ ret = ompi_comm_nextcid (new_comm, old_comm, NULL, NULL, NULL, false, OMPI_COMM_CID_INTRA); diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index 8b48bc30973..f589c874b64 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2009-2013 The University of Tennessee and The University + * Copyright (c) 2009-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index 737d3e51827..cc50bc968e1 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -13,8 +13,8 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -237,6 +237,8 @@ int32_t ompi_datatype_set_args( ompi_datatype_t* pData, */ OBJ_RETAIN( d[pos] ); pArgs->total_pack_size += ((ompi_datatype_args_t*)d[pos]->args)->total_pack_size; + } else { + pArgs->total_pack_size += sizeof(int); /* _NAMED */ } pArgs->total_pack_size += sizeof(int); /* each data has an ID */ } diff --git a/ompi/datatype/ompi_datatype_create_contiguous.c b/ompi/datatype/ompi_datatype_create_contiguous.c index fb44673ef5c..6a287caa41c 100644 --- a/ompi/datatype/ompi_datatype_create_contiguous.c +++ b/ompi/datatype/ompi_datatype_create_contiguous.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -29,13 +29,12 @@ int32_t ompi_datatype_create_contiguous( int count, const ompi_datatype_t* oldTy { ompi_datatype_t* pdt; - if( 0 == count ) { - pdt = ompi_datatype_create( 0 ); - ompi_datatype_add( pdt, &ompi_mpi_datatype_null.dt, 0, 0, 0 ); - } else { - pdt = ompi_datatype_create( oldType->super.desc.used + 2 ); - opal_datatype_add( &(pdt->super), &(oldType->super), count, 0, (oldType->super.ub - oldType->super.lb) ); + if( (0 == count) || (0 == oldType->super.size) ) { + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } + + pdt = ompi_datatype_create( oldType->super.desc.used + 2 ); + opal_datatype_add( &(pdt->super), &(oldType->super), count, 0, (oldType->super.ub - oldType->super.lb) ); *newType = pdt; return OMPI_SUCCESS; } diff --git a/ompi/datatype/ompi_datatype_create_darray.c b/ompi/datatype/ompi_datatype_create_darray.c index a245dcebce4..e0292755c4b 100644 --- a/ompi/datatype/ompi_datatype_create_darray.c +++ b/ompi/datatype/ompi_datatype_create_darray.c @@ -192,9 +192,7 @@ int32_t ompi_datatype_create_darray(int size, if (ndims < 1) { /* Don't just return MPI_DATATYPE_NULL as that can't be MPI_TYPE_FREE()ed, and that seems bad */ - *newtype = ompi_datatype_create(0); - ompi_datatype_add(*newtype, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return MPI_SUCCESS; + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newtype); } rc = ompi_datatype_type_extent(oldtype, &orig_extent); diff --git a/ompi/datatype/ompi_datatype_create_indexed.c b/ompi/datatype/ompi_datatype_create_indexed.c index 50c521b7bf9..2684d9d7df0 100644 --- a/ompi/datatype/ompi_datatype_create_indexed.c +++ b/ompi/datatype/ompi_datatype_create_indexed.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -15,6 +15,7 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,23 +34,28 @@ int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const int* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { + ptrdiff_t extent, disp, endat; ompi_datatype_t* pdt; - int i, dLength, endat, disp; - ptrdiff_t extent; + size_t dLength; + int i; - if( 0 == count ) { + /* ignore all cases that lead to an empty type */ + ompi_datatype_type_size(oldType, &dLength); + for( i = 0; (i < count) && (0 == pBlockLength[i]); i++ ); /* find first non zero */ + if( (i == count) || (0 == dLength) ) { return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } - disp = pDisp[0]; - dLength = pBlockLength[0]; + disp = pDisp[i]; + dLength = pBlockLength[i]; endat = disp + dLength; ompi_datatype_type_extent( oldType, &extent ); - pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); - for( i = 1; i < count; i++ ) { - if( endat == pDisp[i] ) { - /* contiguous with the previsious */ + pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) ); + for( i += 1; i < count; i++ ) { + if( 0 == pBlockLength[i] ) /* ignore empty length */ + continue; + if( endat == pDisp[i] ) { /* contiguous with the previsious */ dLength += pBlockLength[i]; endat += pBlockLength[i]; } else { @@ -69,25 +75,28 @@ int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const ptrdiff_t* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { - ompi_datatype_t* pdt; - int i, dLength; ptrdiff_t extent, disp, endat; + ompi_datatype_t* pdt; + size_t dLength; + int i; - if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + /* ignore all cases that lead to an empty type */ + ompi_datatype_type_size(oldType, &dLength); + for( i = 0; (i < count) && (0 == pBlockLength[i]); i++ ); /* find first non zero */ + if( (i == count) || (0 == dLength) ) { + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } ompi_datatype_type_extent( oldType, &extent ); - pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); - disp = pDisp[0]; - dLength = pBlockLength[0]; + disp = pDisp[i]; + dLength = pBlockLength[i]; endat = disp + dLength * extent; - for( i = 1; i < count; i++ ) { - if( endat == pDisp[i] ) { - /* contiguous with the previsious */ + pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) ); + for( i += 1; i < count; i++ ) { + if( 0 == pBlockLength[i] ) /* ignore empty length */ + continue; + if( endat == pDisp[i] ) { /* contiguous with the previsious */ dLength += pBlockLength[i]; endat += pBlockLength[i] * extent; } else { @@ -107,20 +116,15 @@ int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { + ptrdiff_t extent, disp, endat; ompi_datatype_t* pdt; - int i, dLength, endat, disp; - ptrdiff_t extent; + size_t dLength; + int i; - ompi_datatype_type_extent( oldType, &extent ); if( (count == 0) || (bLength == 0) ) { - if( 0 == count ) { - return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); - } else { - *newType = ompi_datatype_create(1); - ompi_datatype_add( *newType, oldType, 0, pDisp[0] * extent, extent ); - return OMPI_SUCCESS; - } + return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); } + ompi_datatype_type_extent( oldType, &extent ); pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); disp = pDisp[0]; dLength = bLength; @@ -146,33 +150,29 @@ int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* p int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdiff_t* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { - ompi_datatype_t* pdt; - int i, dLength; ptrdiff_t extent, disp, endat; + ompi_datatype_t* pdt; + size_t dLength; + int i; - ompi_datatype_type_extent( oldType, &extent ); if( (count == 0) || (bLength == 0) ) { - *newType = ompi_datatype_create(1); - if( 0 == count ) - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0 ); - else - ompi_datatype_add( *newType, oldType, 0, pDisp[0] * extent, extent ); - return OMPI_SUCCESS; + return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); } + ompi_datatype_type_extent( oldType, &extent ); pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); disp = pDisp[0]; dLength = bLength; - endat = disp + dLength; + endat = disp + dLength * extent; for( i = 1; i < count; i++ ) { if( endat == pDisp[i] ) { /* contiguous with the previsious */ dLength += bLength; - endat += bLength; + endat += bLength * extent; } else { ompi_datatype_add( pdt, oldType, dLength, disp, extent ); disp = pDisp[i]; dLength = bLength; - endat = disp + bLength; + endat = disp + bLength * extent; } } ompi_datatype_add( pdt, oldType, dLength, disp, extent ); diff --git a/ompi/datatype/ompi_datatype_create_struct.c b/ompi/datatype/ompi_datatype_create_struct.c index 98daa8bacbb..9c78f53fee3 100644 --- a/ompi/datatype/ompi_datatype_create_struct.c +++ b/ompi/datatype/ompi_datatype_create_struct.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -31,27 +31,27 @@ int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const ptrdiff_t* pDisp, ompi_datatype_t* const * pTypes, ompi_datatype_t** newType ) { - int i; ptrdiff_t disp = 0, endto, lastExtent, lastDisp; - int lastBlock; ompi_datatype_t *pdt, *lastType; + int lastBlock; + int i, start_from; - if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + /* Find first non-zero length element */ + for( i = 0; (i < count) && (0 == pBlockLength[i]); i++ ); + if( i == count ) { /* either nothing or nothing relevant */ + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } - - /* if we compute the total number of elements before we can + /* compute the total number of elements before we can * avoid increasing the size of the desc array often. */ - lastType = (ompi_datatype_t*)pTypes[0]; - lastBlock = pBlockLength[0]; + start_from = i; + lastType = (ompi_datatype_t*)pTypes[start_from]; + lastBlock = pBlockLength[start_from]; lastExtent = lastType->super.ub - lastType->super.lb; - lastDisp = pDisp[0]; - endto = pDisp[0] + lastExtent * lastBlock; + lastDisp = pDisp[start_from]; + endto = pDisp[start_from] + lastExtent * lastBlock; - for( i = 1; i < count; i++ ) { + for( i = (start_from + 1); i < count; i++ ) { if( (pTypes[i] == lastType) && (pDisp[i] == endto) ) { lastBlock += pBlockLength[i]; endto = lastDisp + lastBlock * lastExtent; @@ -68,16 +68,16 @@ int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const p disp += lastType->super.desc.used; if( lastBlock != 1 ) disp += 2; - lastType = (ompi_datatype_t*)pTypes[0]; - lastBlock = pBlockLength[0]; + lastType = (ompi_datatype_t*)pTypes[start_from]; + lastBlock = pBlockLength[start_from]; lastExtent = lastType->super.ub - lastType->super.lb; - lastDisp = pDisp[0]; - endto = pDisp[0] + lastExtent * lastBlock; + lastDisp = pDisp[start_from]; + endto = pDisp[start_from] + lastExtent * lastBlock; pdt = ompi_datatype_create( (int32_t)disp ); /* Do again the same loop but now add the elements */ - for( i = 1; i < count; i++ ) { + for( i = (start_from + 1); i < count; i++ ) { if( (pTypes[i] == lastType) && (pDisp[i] == endto) ) { lastBlock += pBlockLength[i]; endto = lastDisp + lastBlock * lastExtent; diff --git a/ompi/datatype/ompi_datatype_create_vector.c b/ompi/datatype/ompi_datatype_create_vector.c index 1de8df4d2d2..c4829a4b54c 100644 --- a/ompi/datatype/ompi_datatype_create_vector.c +++ b/ompi/datatype/ompi_datatype_create_vector.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -28,23 +28,14 @@ #include "ompi/datatype/ompi_datatype.h" -/* Open questions ... - * - how to improuve the handling of these vectors (creating a temporary datatype - * can be ONLY a initial solution. - * - */ - int32_t ompi_datatype_create_vector( int count, int bLength, int stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ompi_datatype_t *pTempData, *pData; ptrdiff_t extent = oldType->super.ub - oldType->super.lb; - - if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + if( (0 == count) || (0 == bLength) ) { + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } pData = ompi_datatype_create( oldType->super.desc.used + 2 ); @@ -72,10 +63,8 @@ int32_t ompi_datatype_create_hvector( int count, int bLength, ptrdiff_t stride, ompi_datatype_t *pTempData, *pData; ptrdiff_t extent = oldType->super.ub - oldType->super.lb; - if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + if( (0 == count) || (0 == bLength) ) { + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } pTempData = ompi_datatype_create( oldType->super.desc.used + 2 ); diff --git a/ompi/datatype/ompi_datatype_external.c b/ompi/datatype/ompi_datatype_external.c index d47531ef29e..53b907218cf 100644 --- a/ompi/datatype/ompi_datatype_external.c +++ b/ompi/datatype/ompi_datatype_external.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -26,7 +26,6 @@ #include #include "ompi/runtime/params.h" -#include "ompi/communicator/communicator.h" #include "ompi/datatype/ompi_datatype.h" #include "opal/datatype/opal_convertor.h" diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index 3ee09173cd8..33e8d9b9e92 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -736,14 +736,14 @@ void ompi_datatype_dump( const ompi_datatype_t* pData ) length = length * 100 + 500; buffer = (char*)malloc( length ); index += snprintf( buffer, length - index, - "Datatype %p[%s] id %d size %ld align %d opal_id %d length %d used %d\n" - "true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n" - "nbElems %d loops %d flags %X (", - (void*)pData, pData->name, pData->id, - (long)pData->super.size, (int)pData->super.align, pData->super.id, (int)pData->super.desc.length, (int)pData->super.desc.used, - (long)pData->super.true_lb, (long)pData->super.true_ub, (long)(pData->super.true_ub - pData->super.true_lb), - (long)pData->super.lb, (long)pData->super.ub, (long)(pData->super.ub - pData->super.lb), - (int)pData->super.nbElems, (int)pData->super.loops, (int)pData->super.flags ); + "Datatype %p[%s] id %d size %" PRIsize_t " align %u opal_id %u length %" PRIsize_t " used %" PRIsize_t "\n" + "true_lb %td true_ub %td (true_extent %td) lb %td ub %td (extent %td)\n" + "nbElems %" PRIsize_t " loops %u flags %X (", + (void*)pData, pData->name, pData->id, + pData->super.size, pData->super.align, (uint32_t)pData->super.id, pData->super.desc.length, pData->super.desc.used, + pData->super.true_lb, pData->super.true_ub, pData->super.true_ub - pData->super.true_lb, + pData->super.lb, pData->super.ub, pData->super.ub - pData->super.lb, + pData->super.nbElems, pData->super.loops, (int)pData->super.flags ); /* dump the flags */ if( ompi_datatype_is_predefined(pData) ) { index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index 14810f6b028..9fb42dcce08 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -404,9 +404,43 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, goto exit; } if (0 < opal_list_get_size(&ilist)) { + uint32_t *peer_ranks = NULL; + int prn, nprn; + char *val, *mycpuset; + uint16_t u16; + opal_process_name_t wildcard_rank; /* convert the list of new procs to a proc_t array */ new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist), sizeof(ompi_proc_t *)); + /* get the list of local peers for the new procs */ + cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist); + proc = cd->p; + wildcard_rank.jobid = proc->super.proc_name.jobid; + wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid; + /* retrieve the local peers */ + OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_PEERS, + &wildcard_rank, &val, OPAL_STRING); + if (OPAL_SUCCESS == rc && NULL != val) { + char **peers = opal_argv_split(val, ','); + free(val); + nprn = opal_argv_count(peers); + peer_ranks = (uint32_t*)calloc(nprn, sizeof(uint32_t)); + for (prn = 0; NULL != peers[prn]; prn++) { + peer_ranks[prn] = strtoul(peers[prn], NULL, 10); + } + opal_argv_free(peers); + } + + /* get my locality string */ + val = NULL; + OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING, + OMPI_PROC_MY_NAME, &val, OPAL_STRING); + if (OPAL_SUCCESS == rc && NULL != val) { + mycpuset = val; + } else { + mycpuset = NULL; + } + i = 0; OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) { opal_value_t *kv; @@ -416,15 +450,41 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, * OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without * them, we are just fine */ ompi_proc_complete_init_single(proc); - /* save the locality for later */ - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_LOCALITY); - kv->type = OPAL_UINT16; - kv->data.uint16 = proc->super.proc_flags; - opal_pmix.store_local(&proc->super.proc_name, kv); - OBJ_RELEASE(kv); // maintain accounting + /* if this proc is local, then get its locality */ + if (NULL != peer_ranks) { + for (prn=0; prn < nprn; prn++) { + if (peer_ranks[prn] == proc->super.proc_name.vpid) { + /* get their locality string */ + val = NULL; + OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, OPAL_PMIX_LOCALITY_STRING, + &proc->super.proc_name, &val, OPAL_STRING); + if (OPAL_SUCCESS == rc && NULL != val) { + u16 = opal_hwloc_compute_relative_locality(mycpuset, val); + free(val); + } else { + /* all we can say is that it shares our node */ + u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; + } + proc->super.proc_flags = u16; + /* save the locality for later */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCALITY); + kv->type = OPAL_UINT16; + kv->data.uint16 = proc->super.proc_flags; + opal_pmix.store_local(&proc->super.proc_name, kv); + OBJ_RELEASE(kv); // maintain accounting + break; + } + } + } ++i; } + if (NULL != mycpuset) { + free(mycpuset); + } + if (NULL != peer_ranks) { + free(peer_ranks); + } /* call add_procs on the new ones */ rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist))); free(new_proc_list); @@ -589,7 +649,11 @@ int ompi_dpm_disconnect(ompi_communicator_t *comm) /* ensure we tell the host RM to disconnect us - this * is a blocking operation so just use a fence */ - ret = opal_pmix.fence(&coll, false); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(&coll, false))) { + OMPI_ERROR_LOG(ret); + OPAL_LIST_DESTRUCT(&coll); + return ret; + } OPAL_LIST_DESTRUCT(&coll); return ret; diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index df101a1c858..9a8c4877f63 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2007-2019 Cisco Systems, Inc. All rights reserved * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. @@ -17,9 +17,10 @@ * reserved. * Copyright (c) 2011-2013 INRIA. All rights reserved. * Copyright (c) 2015 University of Houston. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2019 IBM Corporation. All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -279,10 +280,52 @@ # define __mpi_interface_deprecated__(msg) __attribute__((__deprecated__)) # endif # endif -# if OMPI_ENABLE_MPI1_COMPAT -# define __mpi_interface_removed__(msg) __mpi_interface_deprecated__(msg) -# define OMPI_OMIT_MPI1_COMPAT_DECLS 0 -# endif +# endif + + /* For MPI removed APIs, there is no generally portable way to cause + * the C compiler to error with a nice message, on the _usage_ of + * one of these symbols. We've gone with tiered appraoch: + * + * If the user configured with --enable-mpi1-compatibility, + * just emit a compiletime warning (via the deprecation function + * attribute) that they're using an MPI1 removed function. + * + * Otherwise, we'd like to issue a fatal error directing the user + * that they've used an MPI1 removed function. If the user's + * compiler supports C11 _Static_assert feature, we #define + * the MPI routines to instead be a call to _Static_assert + * with an appropreate message suggesting the new MPI3 equivalent. + * + * Otherwise, if the user's compiler supports the error function + * attribute, define the MPI routines with that error attribute. + * This is supported by most modern GNU compilers. + * + * Finally if the compiler doesn't support any of those, just + * Don't declare those MPI routines at all in mpi.h + * + * Don't do MACRO magic for building Profiling library as it + * interferes with the above. + */ +# if defined(OMPI_OMIT_MPI1_COMPAT_DECLS) + /* The user set OMPI_OMIT_MPI1_COMPAT_DECLS, do what he commands */ +# elif (OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING) +# define OMPI_OMIT_MPI1_COMPAT_DECLS 0 +# define OMPI_REMOVED_USE_STATIC_ASSERT 0 +# define __mpi_interface_removed__(func, newfunc) __mpi_interface_deprecated__(#func " was removed in MPI-3.0. Use " #newfunc " instead. continuing...") +# elif (__STDC_VERSION__ >= 201112L) +# define OMPI_OMIT_MPI1_COMPAT_DECLS 1 +# define OMPI_REMOVED_USE_STATIC_ASSERT 1 +// This macro definition may show up in compiler output. So we both +// outdent it back to column 0 and give it a user-friendly name to +// help users grok what we are trying to tell them here. +#define THIS_SYMBOL_WAS_REMOVED_IN_MPI30(func, newfunc) _Static_assert(0, #func " was removed in MPI-3.0. Use " #newfunc " instead.") +# elif OPAL_HAVE_ATTRIBUTE_ERROR +# define OMPI_OMIT_MPI1_COMPAT_DECLS 0 +# define OMPI_REMOVED_USE_STATIC_ASSERT 0 +# define __mpi_interface_removed__(func, newfunc) __attribute__((__error__(#func " was removed in MPI-3.0. Use " #newfunc " instead."))) +# else +# define OMPI_OMIT_MPI1_COMPAT_DECLS 1 +# define OMPI_REMOVED_USE_STATIC_ASSERT 0 # endif # endif #endif @@ -297,7 +340,15 @@ #endif #if !defined(__mpi_interface_removed__) -# define __mpi_interface_removed__(msg) +# define __mpi_interface_removed__(A,B) +#endif + +#if !defined(THIS_SYMBOL_WAS_REMOVED_IN_MPI30) +# define THIS_SYMBOL_WAS_REMOVED_IN_MPI30(func, newfunc) +#endif + +#if !defined(OMPI_REMOVED_USE_STATIC_ASSERT) +# define OMPI_REMOVED_USE_STATIC_ASSERT 0 #endif #if !defined(OMPI_OMIT_MPI1_COMPAT_DECLS) @@ -325,7 +376,11 @@ * when building OMPI). */ #if !OMPI_BUILDING +#if defined(c_plusplus) || defined(__cplusplus) +#define OMPI_PREDEFINED_GLOBAL(type, global) (static_cast (static_cast (&(global)))) +#else #define OMPI_PREDEFINED_GLOBAL(type, global) ((type) ((void *) &(global))) +#endif #else #define OMPI_PREDEFINED_GLOBAL(type, global) ((type) &(global)) #endif @@ -377,7 +432,6 @@ typedef struct ompi_status_public_t ompi_status_public_t; /* * User typedefs - * */ typedef int (MPI_Datarep_extent_function)(MPI_Datatype, MPI_Aint *, void *); typedef int (MPI_Datarep_conversion_function)(void *, MPI_Datatype, @@ -389,7 +443,6 @@ typedef void (MPI_Comm_errhandler_function)(MPI_Comm *, int *, ...); allows us to maintain a stable ABI within OMPI, at least for apps that don't use MPI I/O. */ typedef void (ompi_file_errhandler_fn)(MPI_File *, int *, ...); -typedef ompi_file_errhandler_fn MPI_File_errhandler_function; typedef void (MPI_Win_errhandler_function)(MPI_Win *, int *, ...); typedef void (MPI_User_function)(void *, void *, int *, MPI_Datatype *); typedef int (MPI_Comm_copy_attr_function)(MPI_Comm, int, void *, @@ -406,26 +459,17 @@ typedef int (MPI_Grequest_query_function)(void *, MPI_Status *); typedef int (MPI_Grequest_free_function)(void *); typedef int (MPI_Grequest_cancel_function)(void *, int); -#if !OMPI_OMIT_MPI1_COMPAT_DECLS /* - * Removed typedefs. These will be deleted in a future Open MPI release. + * Deprecated typedefs. Usage is discouraged, as these may be deleted + * in future versions of the MPI Standard. */ typedef MPI_Comm_errhandler_function MPI_Comm_errhandler_fn - __mpi_interface_removed__("MPI_Comm_errhandler_fn was removed in MPI-3.0; use MPI_Comm_errhandler_function instead"); + __mpi_interface_deprecated__("MPI_Comm_errhandler_fn was deprecated in MPI-2.2; use MPI_Comm_errhandler_function instead"); typedef ompi_file_errhandler_fn MPI_File_errhandler_fn - __mpi_interface_removed__("MPI_File_errhandler_fn was removed in MPI-3.0; use MPI_File_errhandler_function instead"); + __mpi_interface_deprecated__("MPI_File_errhandler_fn was deprecated in MPI-2.2; use MPI_File_errhandler_function instead"); +typedef ompi_file_errhandler_fn MPI_File_errhandler_function; typedef MPI_Win_errhandler_function MPI_Win_errhandler_fn - __mpi_interface_removed__("MPI_Win_errhandler_fn was removed in MPI-3.0; use MPI_Win_errhandler_function instead"); - -// NOTE: We intentionally do *not* mark the following as -// deprecated/removed because they are used below in function -// prototypes (and would therefore emit warnings, just for #including -// ). -typedef void (MPI_Handler_function)(MPI_Comm *, int *, ...); -typedef int (MPI_Copy_function)(MPI_Comm, int, void *, - void *, void *, int *); -typedef int (MPI_Delete_function)(MPI_Comm, int, void *, void *); -#endif /* !OMPI_OMIT_MPI1_COMPAT_DECLS */ + __mpi_interface_deprecated__("MPI_Win_errhandler_fn was deprecated in MPI-2.2; use MPI_Win_errhandler_function instead"); /* @@ -658,13 +702,25 @@ enum { MPI_COMBINER_DUP, MPI_COMBINER_CONTIGUOUS, MPI_COMBINER_VECTOR, +#if (OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING) MPI_COMBINER_HVECTOR_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_HVECTOR_INTEGER, /* preserve ABI compatibility */ +#endif MPI_COMBINER_HVECTOR, MPI_COMBINER_INDEXED, +#if (OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING) MPI_COMBINER_HINDEXED_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_HINDEXED_INTEGER, /* preserve ABI compatibility */ +#endif MPI_COMBINER_HINDEXED, MPI_COMBINER_INDEXED_BLOCK, +#if (OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING) MPI_COMBINER_STRUCT_INTEGER, +#else + OMPI_WAS_MPI_COMBINER_STRUCT_INTEGER, /* preserve ABI compatibility */ +#endif MPI_COMBINER_STRUCT, MPI_COMBINER_SUBARRAY, MPI_COMBINER_DARRAY, @@ -675,6 +731,20 @@ enum { MPI_COMBINER_HINDEXED_BLOCK }; +#if (OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING) +#else +/* If not building or configured --enable-mpi1-compatibility, then + * we don't want these datatypes, instead we define MPI_COMBINER_*_INTEGER + * to our Static Assert message if the compiler supports + * that staticly assert with a nice message. + */ +# if (OMPI_REMOVED_USE_STATIC_ASSERT) +# define MPI_COMBINER_HVECTOR_INTEGER THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_COMBINER_HVECTOR_INTEGER, MPI_COMBINER_HVECTOR); +# define MPI_COMBINER_HINDEXED_INTEGER THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_COMBINER_HINDEXED_INTEGER, MPI_COMBINER_HINDEXED); +# define MPI_COMBINER_STRUCT_INTEGER THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_COMBINER_STRUCT_INTEGER, MPI_COMBINER_STRUCT); +# endif /* OMPI_REMOVED_USE_STATIC_ASSERT */ +#endif /* Removed datatypes */ + /* * Communicator split type constants. * Do not change the order of these without also modifying mpif.h.in @@ -776,8 +846,13 @@ enum { */ #define MPI_INFO_ENV OMPI_PREDEFINED_GLOBAL(MPI_Info, ompi_mpi_info_env) +#if defined(c_plusplus) || defined(__cplusplus) +#define MPI_STATUS_IGNORE (static_cast (0)) +#define MPI_STATUSES_IGNORE (static_cast (0)) +#else #define MPI_STATUS_IGNORE ((MPI_Status *) 0) #define MPI_STATUSES_IGNORE ((MPI_Status *) 0) +#endif /* * Special MPI_T handles @@ -804,9 +879,6 @@ enum { protection for this case. */ #if !defined(OMPI_COMPILING_FORTRAN_WRAPPERS) -#define MPI_NULL_DELETE_FN OMPI_C_MPI_NULL_DELETE_FN -#define MPI_NULL_COPY_FN OMPI_C_MPI_NULL_COPY_FN -#define MPI_DUP_FN OMPI_C_MPI_DUP_FN #define MPI_TYPE_NULL_DELETE_FN OMPI_C_MPI_TYPE_NULL_DELETE_FN #define MPI_TYPE_NULL_COPY_FN OMPI_C_MPI_TYPE_NULL_COPY_FN @@ -875,24 +947,6 @@ OMPI_DECLSPEC int OMPI_C_MPI_WIN_DUP_FN( MPI_Win window, int win_keyval, void* attribute_val_in, void* attribute_val_out, int* flag ); -#if !OMPI_OMIT_MPI1_COMPAT_DECLS -/* - * Removed in MPI-3.0. Will be deleted in a future Open MPI release. - */ -OMPI_DECLSPEC int OMPI_C_MPI_NULL_DELETE_FN( MPI_Comm comm, int comm_keyval, - void* attribute_val_out, - void* extra_state ); -OMPI_DECLSPEC int OMPI_C_MPI_NULL_COPY_FN( MPI_Comm comm, int comm_keyval, - void* extra_state, - void* attribute_val_in, - void* attribute_val_out, - int* flag ); -OMPI_DECLSPEC int OMPI_C_MPI_DUP_FN( MPI_Comm comm, int comm_keyval, - void* extra_state, - void* attribute_val_in, - void* attribute_val_out, - int* flag ); -#endif /* !OMPI_OMIT_MPI1_COMPAT_DECLS */ /* * External variables @@ -1033,13 +1087,32 @@ OMPI_DECLSPEC extern struct ompi_predefined_info_t ompi_mpi_info_env; OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUS_IGNORE; OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE; -#if !OMPI_OMIT_MPI1_COMPAT_DECLS /* - * Removed datatypes + * Removed datatypes. These datatypes are only available if Open MPI + * was configured with --enable-mpi1-compatibility. + * + * These datatypes were formally removed from the MPI specification + * and should no longer be used in MPI applications. */ -OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_lb __mpi_interface_removed__("MPI_LB was removed in MPI-3.0"); -OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_ub __mpi_interface_removed__("MPI_UB was removed in MPI-3.0"); -#endif /* !OMPI_OMIT_MPI1_COMPAT_DECLS */ +#if (OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING) +# define MPI_UB OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_ub) +# define MPI_LB OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_lb) + +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_lb; +OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_ub; + +#else +/* If not building or configured --enable-mpi1-compatibility, then + * we don't want these datatypes, instead we define MPI_UB and + * MPI_LB to our Static Assert message if the compiler supports + * that staticly assert with a nice message. + */ +# if (OMPI_REMOVED_USE_STATIC_ASSERT) +# define MPI_UB THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_UB, MPI_Type_create_resized); +# define MPI_LB THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_LB, MPI_Type_create_resized); +# endif /* OMPI_REMOVED_USE_STATIC_ASSERT */ +#endif /* Removed datatypes */ + /* * MPI predefined handles @@ -1088,8 +1161,7 @@ OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_ub __mpi_interfa #define MPI_LONG_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_long_int) #define MPI_SHORT_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_short_int) #define MPI_2INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_2int) -#define MPI_UB OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_ub) -#define MPI_LB OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_lb) + #define MPI_WCHAR OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_wchar) #if OPAL_HAVE_LONG_LONG #define MPI_LONG_LONG_INT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_long_long_int) @@ -2617,91 +2689,167 @@ OMPI_DECLSPEC int MPI_T_pvar_readreset(MPI_T_pvar_session session, MPI_T_pvar_h OMPI_DECLSPEC int MPI_T_enum_get_info(MPI_T_enum enumtype, int *num, char *name, int *name_len); OMPI_DECLSPEC int MPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name, int *name_len); - -#if !OMPI_OMIT_MPI1_COMPAT_DECLS /* - * Removed APIs + * Deprecated prototypes. Usage is discouraged, as these may be + * deleted in future versions of the MPI Standard. */ -OMPI_DECLSPEC int MPI_Address(void *location, MPI_Aint *address) - __mpi_interface_removed__("MPI_Address is superseded by MPI_Get_address in MPI-2.0"); OMPI_DECLSPEC int MPI_Attr_delete(MPI_Comm comm, int keyval) - __mpi_interface_deprecated__("MPI_Attr_delete is superseded by MPI_Comm_delete_attr in MPI-2.0"); + __mpi_interface_deprecated__("MPI_Attr_delete was deprecated in MPI-2.0; use MPI_Comm_delete_attr instead"); +OMPI_DECLSPEC int PMPI_Attr_delete(MPI_Comm comm, int keyval) + __mpi_interface_deprecated__("PMPI_Attr_delete was deprecated in MPI-2.0; use PMPI_Comm_delete_attr instead"); OMPI_DECLSPEC int MPI_Attr_get(MPI_Comm comm, int keyval, void *attribute_val, int *flag) - __mpi_interface_deprecated__("MPI_Attr_get is superseded by MPI_Comm_get_attr in MPI-2.0"); + __mpi_interface_deprecated__("MPI_Attr_get was deprecated in MPI-2.0; use MPI_Comm_get_attr instead"); +OMPI_DECLSPEC int PMPI_Attr_get(MPI_Comm comm, int keyval, void *attribute_val, int *flag) + __mpi_interface_deprecated__("PMPI_Attr_get was deprecated in MPI-2.0; use PMPI_Comm_get_attr instead"); OMPI_DECLSPEC int MPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val) - __mpi_interface_deprecated__("MPI_Attr_put is superseded by MPI_Comm_set_attr in MPI-2.0"); -OMPI_DECLSPEC int MPI_Errhandler_create(MPI_Handler_function *function, - MPI_Errhandler *errhandler) - __mpi_interface_removed__("MPI_Errhandler_create is superseded by MPI_Comm_create_errhandler in MPI-2.0"); -OMPI_DECLSPEC int MPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler) - __mpi_interface_removed__("MPI_Errhandler_get is superseded by MPI_Comm_get_errhandler in MPI-2.0"); -OMPI_DECLSPEC int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler) - __mpi_interface_removed__("MPI_Errhandler_set is superseded by MPI_Comm_set_errhandler in MPI-2.0"); + __mpi_interface_deprecated__("MPI_Attr_put was deprecated in MPI-2.0; use MPI_Comm_set_attr instead"); +OMPI_DECLSPEC int PMPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val) + __mpi_interface_deprecated__("PMPI_Attr_put was deprecated in MPI-2.0; use PMPI_Comm_set_attr instead"); + +/* + * Even though MPI_Copy_function and MPI_Delete_function are + * deprecated, we do not use the attributes marking them as such, + * because otherwise the compiler will warn for all the functions that + * are declared using them (e.g., MPI_Keyval_create). + */ +typedef int (MPI_Copy_function)(MPI_Comm, int, void *, + void *, void *, int *); +/* MPI_Copy_function was deprecated in MPI-2.0; use MPI_Comm_copy_attr_function instead. */ +typedef int (MPI_Delete_function)(MPI_Comm, int, void *, void *); +/* MPI_Delete_function was deprecated in MPI-2.0; use MPI_Comm_delete_attr_function instead. */ OMPI_DECLSPEC int MPI_Keyval_create(MPI_Copy_function *copy_fn, MPI_Delete_function *delete_fn, int *keyval, void *extra_state) - __mpi_interface_deprecated__("MPI_Keyval_create is superseded by MPI_Comm_create_keyval in MPI-2.0"); + __mpi_interface_deprecated__("MPI_Keyval_create was deprecated in MPI-2.0; use MPI_Comm_create_keyval instead."); +OMPI_DECLSPEC int PMPI_Keyval_create(MPI_Copy_function *copy_fn, + MPI_Delete_function *delete_fn, + int *keyval, void *extra_state) + __mpi_interface_deprecated__("PMPI_Keyval_create was deprecated in MPI-2.0; use PMPI_Comm_create_keyval instead."); OMPI_DECLSPEC int MPI_Keyval_free(int *keyval) - __mpi_interface_deprecated__("MPI_Keyval_free is superseded by MPI_Comm_free_keyval in MPI-2.0"); -OMPI_DECLSPEC int MPI_Type_extent(MPI_Datatype type, MPI_Aint *extent) - __mpi_interface_removed__("MPI_Type_extent is superseded by MPI_Type_get_extent in MPI-2.0"); -OMPI_DECLSPEC int MPI_Type_hindexed(int count, int array_of_blocklengths[], - MPI_Aint array_of_displacements[], - MPI_Datatype oldtype, MPI_Datatype *newtype) - __mpi_interface_removed__("MPI_Type_hindexed is superseded by MPI_Type_create_hindexed in MPI-2.0"); -OMPI_DECLSPEC int MPI_Type_hvector(int count, int blocklength, MPI_Aint stride, - MPI_Datatype oldtype, MPI_Datatype *newtype) - __mpi_interface_removed__("MPI_Type_hvector is superseded by MPI_Type_create_hvector in MPI-2.0"); -OMPI_DECLSPEC int MPI_Type_lb(MPI_Datatype type, MPI_Aint *lb) - __mpi_interface_removed__("MPI_Type_lb has been removed, use MPI_Type_get_extent in MPI-2.0"); -OMPI_DECLSPEC int MPI_Type_struct(int count, int array_of_blocklengths[], - MPI_Aint array_of_displacements[], - MPI_Datatype array_of_types[], - MPI_Datatype *newtype) - __mpi_interface_removed__("MPI_Type_struct is superseded by MPI_Type_create_struct in MPI-2.0"); -OMPI_DECLSPEC int MPI_Type_ub(MPI_Datatype mtype, MPI_Aint *ub) - __mpi_interface_removed__("MPI_Type_ub has been removed, use MPI_Type_get_extent in MPI-2.0"); + __mpi_interface_deprecated__("MPI_Keyval_free was deprecated in MPI-2.0; MPI_Comm_free_keyval instead."); +OMPI_DECLSPEC int PMPI_Keyval_free(int *keyval) + __mpi_interface_deprecated__("PMPI_Keyval_free was deprecated in MPI-2.0; PMPI_Comm_free_keyval instead."); + +#if !defined(OMPI_COMPILING_FORTRAN_WRAPPERS) +#define MPI_DUP_FN OMPI_C_MPI_DUP_FN +#endif +OMPI_DECLSPEC int OMPI_C_MPI_DUP_FN( MPI_Comm comm, int comm_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ) + __mpi_interface_deprecated__("MPI_DUP_FN was deprecated in MPI-2.0; use MPI_COMM_DUP_FN instead."); + +#if !defined(OMPI_COMPILING_FORTRAN_WRAPPERS) +#define MPI_NULL_COPY_FN OMPI_C_MPI_NULL_COPY_FN +#endif +OMPI_DECLSPEC int OMPI_C_MPI_NULL_COPY_FN( MPI_Comm comm, int comm_keyval, + void* extra_state, + void* attribute_val_in, + void* attribute_val_out, + int* flag ) + __mpi_interface_deprecated__("MPI_NULL_COPY_FN was deprecated in MPI-2.0; use MPI_COMM_NULL_COPY_FN instead."); + +#if !defined(OMPI_COMPILING_FORTRAN_WRAPPERS) +#define MPI_NULL_DELETE_FN OMPI_C_MPI_NULL_DELETE_FN +#endif +OMPI_DECLSPEC int OMPI_C_MPI_NULL_DELETE_FN( MPI_Comm comm, int comm_keyval, + void* attribute_val_out, + void* extra_state ) + __mpi_interface_deprecated__("MPI_NULL_DELETE_FN was deprecated in MPI-2.0; use MPI_COMM_NULL_DELETE_FN instead."); + +#if (!OMPI_OMIT_MPI1_COMPAT_DECLS || OMPI_BUILDING) +/* + * Removed typedefs. These typedefs are only available if Open MPI + * was configured with --enable-mpi1-compatibility. + * + * These typedefs were formally removed from the MPI specification + * and should no longer be used in MPI applications. + * + * Even though MPI_Handler_function is removed, we do not use the + * attributes marking it as such, because otherwise the compiler + * will warn for all the functions that are declared using them + * (e.g., MPI_Errhandler_create). + */ +typedef void (MPI_Handler_function)(MPI_Comm *, int *, ...); +/* MPI_Handler_function was removed in MPI-3.0; use MPI_Comm_use_errhandler_function instead. */ + +/* + * Removed prototypes. These prototypes are only available if Open + * MPI was configured with --enable-mpi1-compatibility. + * + * These functions were formally removed from the MPI specification + * and should no longer be used in MPI applications. + */ +OMPI_DECLSPEC int MPI_Address(void *location, MPI_Aint *address) + __mpi_interface_removed__(MPI_Address, MPI_Get_address); OMPI_DECLSPEC int PMPI_Address(void *location, MPI_Aint *address) - __mpi_interface_removed__("MPI_Address is superseded by MPI_Get_address in MPI-2.0"); -OMPI_DECLSPEC int PMPI_Attr_delete(MPI_Comm comm, int keyval) - __mpi_interface_deprecated__("MPI_Attr_delete is superseded by MPI_Comm_delete_attr in MPI-2.0"); -OMPI_DECLSPEC int PMPI_Attr_get(MPI_Comm comm, int keyval, void *attribute_val, int *flag) - __mpi_interface_deprecated__("MPI_Attr_get is superseded by MPI_Comm_get_attr in MPI-2.0"); -OMPI_DECLSPEC int PMPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val) - __mpi_interface_deprecated__("MPI_Attr_put is superseded by MPI_Comm_set_attr in MPI-2.0"); + __mpi_interface_removed__(PMPI_Address, PMPI_Get_address); +OMPI_DECLSPEC int MPI_Errhandler_create(MPI_Handler_function *function, + MPI_Errhandler *errhandler) + __mpi_interface_removed__(MPI_Errhandler_create, MPI_Comm_create_errhandler); OMPI_DECLSPEC int PMPI_Errhandler_create(MPI_Handler_function *function, MPI_Errhandler *errhandler) - __mpi_interface_removed__("MPI_Errhandler_create is superseded by MPI_Comm_create_errhandler in MPI-2.0"); + __mpi_interface_removed__(PMPI_Errhandler_create, PMPI_Comm_create_errhandler); +OMPI_DECLSPEC int MPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler) + __mpi_interface_removed__(MPI_Errhandler_get, MPI_Comm_get_errhandler); OMPI_DECLSPEC int PMPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler) - __mpi_interface_removed__("MPI_Errhandler_get is superseded by MPI_Comm_get_errhandler in MPI-2.0"); + __mpi_interface_removed__(PMPI_Errhandler_get, PMPI_Comm_get_errhandler); +OMPI_DECLSPEC int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler) + __mpi_interface_removed__(MPI_Errhandler_set, MPI_Comm_set_errhandler); OMPI_DECLSPEC int PMPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler) - __mpi_interface_removed__("MPI_Errhandler_set is superseded by MPI_Comm_set_errhandler in MPI-2.0"); -OMPI_DECLSPEC int PMPI_Keyval_create(MPI_Copy_function *copy_fn, - MPI_Delete_function *delete_fn, - int *keyval, void *extra_state) - __mpi_interface_deprecated__("MPI_Keyval_create is superseded by MPI_Comm_create_keyval in MPI-2.0"); -OMPI_DECLSPEC int PMPI_Keyval_free(int *keyval) - __mpi_interface_deprecated__("MPI_Keyval_free is superseded by MPI_Comm_free_keyval in MPI-2.0"); + __mpi_interface_removed__(PMPI_Errhandler_set, PMPI_Comm_set_errhandler); +OMPI_DECLSPEC int MPI_Type_extent(MPI_Datatype type, MPI_Aint *extent) + __mpi_interface_removed__(MPI_Type_extent, MPI_Type_get_extent); OMPI_DECLSPEC int PMPI_Type_extent(MPI_Datatype type, MPI_Aint *extent) - __mpi_interface_removed__("MPI_Type_extent is superseded by MPI_Type_get_extent in MPI-2.0"); + __mpi_interface_removed__(PMPI_Type_extent, PMPI_Type_get_extent); +OMPI_DECLSPEC int MPI_Type_hindexed(int count, int array_of_blocklengths[], + MPI_Aint array_of_displacements[], + MPI_Datatype oldtype, MPI_Datatype *newtype) + __mpi_interface_removed__(MPI_Type_hindexed, MPI_Type_create_hindexed); OMPI_DECLSPEC int PMPI_Type_hindexed(int count, int array_of_blocklengths[], MPI_Aint array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype *newtype) - __mpi_interface_removed__("MPI_Type_hindexed is superseded by MPI_Type_create_hindexed in MPI-2.0"); + __mpi_interface_removed__(PMPI_Type_hindexed, PMPI_Type_create_hindexed); +OMPI_DECLSPEC int MPI_Type_hvector(int count, int blocklength, MPI_Aint stride, + MPI_Datatype oldtype, MPI_Datatype *newtype) + __mpi_interface_removed__(MPI_Type_hvector, MPI_Type_create_hvector); OMPI_DECLSPEC int PMPI_Type_hvector(int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype) - __mpi_interface_removed__("MPI_Type_hvector is superseded by MPI_Type_create_hvector in MPI-2.0"); + __mpi_interface_removed__(PMPI_Type_hvector, PMPI_Type_create_hvector); +OMPI_DECLSPEC int MPI_Type_lb(MPI_Datatype type, MPI_Aint *lb) + __mpi_interface_removed__(MPI_Type_lb, MPI_Type_get_extent); OMPI_DECLSPEC int PMPI_Type_lb(MPI_Datatype type, MPI_Aint *lb) - __mpi_interface_removed__("MPI_Type_lb has been removed, use MPI_Type_get_extent in MPI-2.0"); + __mpi_interface_removed__(PMPI_Type_lb, PMPI_Type_get_extent); +OMPI_DECLSPEC int MPI_Type_struct(int count, int array_of_blocklengths[], + MPI_Aint array_of_displacements[], + MPI_Datatype array_of_types[], + MPI_Datatype *newtype) + __mpi_interface_removed__(MPI_Type_struct, MPI_Type_create_struct); OMPI_DECLSPEC int PMPI_Type_struct(int count, int array_of_blocklengths[], MPI_Aint array_of_displacements[], MPI_Datatype array_of_types[], MPI_Datatype *newtype) - __mpi_interface_removed__("MPI_Type_struct is superseded by MPI_Type_create_struct in MPI-2.0"); + __mpi_interface_removed__(PMPI_Type_struct, PMPI_Type_create_struct); +OMPI_DECLSPEC int MPI_Type_ub(MPI_Datatype mtype, MPI_Aint *ub) + __mpi_interface_removed__(MPI_Type_ub, MPI_Type_get_extent); OMPI_DECLSPEC int PMPI_Type_ub(MPI_Datatype mtype, MPI_Aint *ub) - __mpi_interface_removed__("MPI_Type_ub has been removed, use MPI_Type_get_extent in MPI-2.0"); + __mpi_interface_removed__(PMPI_Type_ub, PMPI_Type_get_extent); #endif /* !OMPI_OMIT_MPI1_COMPAT_DECLS */ +#if OMPI_REMOVED_USE_STATIC_ASSERT +#define MPI_Address(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Address, MPI_Get_address) +#define MPI_Errhandler_create(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Errhandler_create, MPI_Comm_create_errhandler) +#define MPI_Errhandler_get(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Errhandler_get, MPI_Comm_get_errhandler) +#define MPI_Errhandler_set(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Errhandler_set, MPI_Comm_set_errhandler) +#define MPI_Type_extent(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Type_extent, MPI_Type_get_extent) +#define MPI_Type_hindexed(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Type_hindexed, MPI_Type_create_hindexed) +#define MPI_Type_hvector(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Type_hvector, MPI_Type_create_hvector) +#define MPI_Type_lb(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Type_lb, MPI_Type_get_extent) +#define MPI_Type_struct(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Type_struct, MPI_Type_create_struct) +#define MPI_Type_ub(...) THIS_SYMBOL_WAS_REMOVED_IN_MPI30(MPI_Type_ub, MPI_Type_get_extent) +#endif + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/ompi/info/info.c b/ompi/info/info.c index f209ca00574..03904b07b01 100644 --- a/ompi/info/info.c +++ b/ompi/info/info.c @@ -14,7 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -259,6 +259,9 @@ int ompi_mpiinfo_finalize(void) opal_info_entry_t *entry; bool found = false; + OBJ_DESTRUCT(&ompi_mpi_info_null); + OBJ_DESTRUCT(&ompi_mpi_info_env); + /* Go through the f2c table and see if anything is left. Free them all. */ diff --git a/ompi/mca/bml/r2/bml_r2_ft.c b/ompi/mca/bml/r2/bml_r2_ft.c index 95fc6ade66b..8dc45d4f1e3 100644 --- a/ompi/mca/bml/r2/bml_r2_ft.c +++ b/ompi/mca/bml/r2/bml_r2_ft.c @@ -155,7 +155,10 @@ int mca_bml_r2_ft_event(int state) * Barrier to make all processes have been successfully restarted before * we try to remove some restart only files. */ - opal_pmix.fence(NULL, 0); + if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "bml:r2: ft_event(Restart): Failed to fence complete\n"); + return ret; + } /* * Re-open the BTL framework to get the full list of components. @@ -224,7 +227,10 @@ int mca_bml_r2_ft_event(int state) * Barrier to make all processes have been successfully restarted before * we try to remove some restart only files. */ - opal_pmix.fence(NULL, 0); + if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "bml:r2: ft_event(Restart): Failed to fence complete\n"); + return ret; + } /* * Re-open the BTL framework to get the full list of components. diff --git a/ompi/mca/coll/base/coll_base_allgather.c b/ompi/mca/coll/base/coll_base_allgather.c index c774b3cd41d..f3d3fd1d0a7 100644 --- a/ompi/mca/coll/base/coll_base_allgather.c +++ b/ompi/mca/coll/base/coll_base_allgather.c @@ -90,7 +90,7 @@ int ompi_coll_base_allgather_intra_bruck(const void *sbuf, int scount, mca_coll_base_module_t *module) { int line = -1, rank, size, sendto, recvfrom, distance, blockcount, err = 0; - ptrdiff_t slb, rlb, sext, rext; + ptrdiff_t rlb, rext; char *tmpsend = NULL, *tmprecv = NULL; size = ompi_comm_size(comm); @@ -99,9 +99,6 @@ int ompi_coll_base_allgather_intra_bruck(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:allgather_intra_bruck rank %d", rank)); - err = ompi_datatype_get_extent (sdtype, &slb, &sext); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - err = ompi_datatype_get_extent (rdtype, &rlb, &rext); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } @@ -262,7 +259,7 @@ ompi_coll_base_allgather_intra_recursivedoubling(const void *sbuf, int scount, { int line = -1, rank, size, pow2size, err; int remote, distance, sendblocklocation; - ptrdiff_t slb, rlb, sext, rext; + ptrdiff_t rlb, rext; char *tmpsend = NULL, *tmprecv = NULL; size = ompi_comm_size(comm); @@ -289,9 +286,6 @@ ompi_coll_base_allgather_intra_recursivedoubling(const void *sbuf, int scount, "coll:base:allgather_intra_recursivedoubling rank %d, size %d", rank, size)); - err = ompi_datatype_get_extent (sdtype, &slb, &sext); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - err = ompi_datatype_get_extent (rdtype, &rlb, &rext); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } @@ -369,7 +363,7 @@ int ompi_coll_base_allgather_intra_ring(const void *sbuf, int scount, mca_coll_base_module_t *module) { int line = -1, rank, size, err, sendto, recvfrom, i, recvdatafrom, senddatafrom; - ptrdiff_t slb, rlb, sext, rext; + ptrdiff_t rlb, rext; char *tmpsend = NULL, *tmprecv = NULL; size = ompi_comm_size(comm); @@ -378,9 +372,6 @@ int ompi_coll_base_allgather_intra_ring(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:allgather_intra_ring rank %d", rank)); - err = ompi_datatype_get_extent (sdtype, &slb, &sext); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - err = ompi_datatype_get_extent (rdtype, &rlb, &rext); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } @@ -499,7 +490,7 @@ ompi_coll_base_allgather_intra_neighborexchange(const void *sbuf, int scount, { int line = -1, rank, size, i, even_rank, err; int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from; - ptrdiff_t slb, rlb, sext, rext; + ptrdiff_t rlb, rext; char *tmpsend = NULL, *tmprecv = NULL; size = ompi_comm_size(comm); @@ -517,9 +508,6 @@ ompi_coll_base_allgather_intra_neighborexchange(const void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:allgather_intra_neighborexchange rank %d", rank)); - err = ompi_datatype_get_extent (sdtype, &slb, &sext); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - err = ompi_datatype_get_extent (rdtype, &rlb, &rext); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } @@ -616,7 +604,7 @@ int ompi_coll_base_allgather_intra_two_procs(const void *sbuf, int scount, { int line = -1, err, rank, remote; char *tmpsend = NULL, *tmprecv = NULL; - ptrdiff_t sext, rext, lb; + ptrdiff_t rext, lb; rank = ompi_comm_rank(comm); @@ -627,9 +615,6 @@ int ompi_coll_base_allgather_intra_two_procs(const void *sbuf, int scount, return MPI_ERR_UNSUPPORTED_OPERATION; } - err = ompi_datatype_get_extent (sdtype, &lb, &sext); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - err = ompi_datatype_get_extent (rdtype, &lb, &rext); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index 3509ed36414..3f1bdc5fb58 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -398,22 +398,22 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount, prcv = (char *) rbuf; psnd = (char *) sbuf; - /* Post first batch or ireceive and isend requests */ + /* Post first batch of irecv and isend requests */ for (nreqs = 0, nrreqs = 0, ri = (rank + 1) % size; nreqs < total_reqs; ri = (ri + 1) % size, ++nrreqs) { - nreqs++; error = MCA_PML_CALL(irecv (prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri, MCA_COLL_BASE_TAG_ALLTOALL, comm, &reqs[nreqs])); + nreqs++; if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; } } for (nsreqs = 0, si = (rank + size - 1) % size; nreqs < 2 * total_reqs; - si = (si + size - 1) % size, ++nsreqs) { - nreqs++; + si = (si + size - 1) % size, ++nsreqs) { error = MCA_PML_CALL(isend (psnd + (ptrdiff_t)si * sext, scount, sdtype, si, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, &reqs[nreqs])); + nreqs++; if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; } } diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index 6187098598f..5736c0946ff 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -9,8 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,6 +26,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/base/coll_tags.h" #include "ompi/mca/coll/base/coll_base_functions.h" +#include "ompi/mca/topo/base/base.h" #include "ompi/mca/pml/pml.h" #include "coll_base_util.h" @@ -103,3 +104,204 @@ int ompi_rounddown(int num, int factor) num /= factor; return num * factor; /* floor(num / factor) * factor */ } + +static void release_objs_callback(struct ompi_coll_base_nbc_request_t *request) { + if (NULL != request->data.objs.objs[0]) { + OBJ_RELEASE(request->data.objs.objs[0]); + request->data.objs.objs[0] = NULL; + } + if (NULL != request->data.objs.objs[1]) { + OBJ_RELEASE(request->data.objs.objs[1]); + request->data.objs.objs[1] = NULL; + } +} + +static int complete_objs_callback(struct ompi_request_t *req) { + struct ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + int rc = OMPI_SUCCESS; + assert (NULL != request); + if (NULL != request->cb.req_complete_cb) { + rc = request->cb.req_complete_cb(request->req_complete_cb_data); + } + release_objs_callback(request); + return rc; +} + +static int free_objs_callback(struct ompi_request_t **rptr) { + struct ompi_coll_base_nbc_request_t *request = *(ompi_coll_base_nbc_request_t **)rptr; + int rc = OMPI_SUCCESS; + if (NULL != request->cb.req_free) { + rc = request->cb.req_free(rptr); + } + release_objs_callback(request); + return rc; +} + +int ompi_coll_base_retain_op( ompi_request_t *req, ompi_op_t *op, + ompi_datatype_t *type) { + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + bool retain = false; + if (REQUEST_COMPLETE(req)) { + return OMPI_SUCCESS; + } + if (!ompi_op_is_intrinsic(op)) { + OBJ_RETAIN(op); + request->data.op.op = op; + retain = true; + } + if (!ompi_datatype_is_predefined(type)) { + OBJ_RETAIN(type); + request->data.op.datatype = type; + retain = true; + } + if (OPAL_UNLIKELY(retain)) { + /* We need to consider two cases : + * - non blocking collectives: + * the objects can be released when MPI_Wait() completes + * and we use the req_complete_cb callback + * - persistent non blocking collectives: + * the objects can only be released when the request is freed + * (e.g. MPI_Request_free() completes) and we use req_free callback + */ + if (req->req_persistent) { + request->cb.req_free = req->req_free; + req->req_free = free_objs_callback; + } else { + request->cb.req_complete_cb = req->req_complete_cb; + request->req_complete_cb_data = req->req_complete_cb_data; + req->req_complete_cb = complete_objs_callback; + req->req_complete_cb_data = request; + } + } + return OMPI_SUCCESS; +} + +int ompi_coll_base_retain_datatypes( ompi_request_t *req, ompi_datatype_t *stype, + ompi_datatype_t *rtype) { + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + bool retain = false; + if (REQUEST_COMPLETE(req)) { + return OMPI_SUCCESS; + } + if (NULL != stype && !ompi_datatype_is_predefined(stype)) { + OBJ_RETAIN(stype); + request->data.types.stype = stype; + retain = true; + } + if (NULL != rtype && !ompi_datatype_is_predefined(rtype)) { + OBJ_RETAIN(rtype); + request->data.types.rtype = rtype; + retain = true; + } + if (OPAL_UNLIKELY(retain)) { + if (req->req_persistent) { + request->cb.req_free = req->req_free; + req->req_free = free_objs_callback; + } else { + request->cb.req_complete_cb = req->req_complete_cb; + request->req_complete_cb_data = req->req_complete_cb_data; + req->req_complete_cb = complete_objs_callback; + req->req_complete_cb_data = request; + } + } + return OMPI_SUCCESS; +} + +static void release_vecs_callback(ompi_coll_base_nbc_request_t *request) { + ompi_communicator_t *comm = request->super.req_mpi_object.comm; + int scount, rcount; + if (OMPI_COMM_IS_TOPO(comm)) { + (void)mca_topo_base_neighbor_count (comm, &rcount, &scount); + } else { + scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm); + } + if (NULL != request->data.vecs.stypes) { + for (int i=0; idata.vecs.stypes[i]) { + OMPI_DATATYPE_RELEASE(request->data.vecs.stypes[i]); + } + } + request->data.vecs.stypes = NULL; + } + if (NULL != request->data.vecs.rtypes) { + for (int i=0; idata.vecs.rtypes[i]) { + OMPI_DATATYPE_RELEASE(request->data.vecs.rtypes[i]); + } + } + request->data.vecs.rtypes = NULL; + } +} + +static int complete_vecs_callback(struct ompi_request_t *req) { + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + int rc = OMPI_SUCCESS; + assert (NULL != request); + if (NULL != request->cb.req_complete_cb) { + rc = request->cb.req_complete_cb(request->req_complete_cb_data); + } + release_vecs_callback(request); + return rc; +} + +static int free_vecs_callback(struct ompi_request_t **rptr) { + struct ompi_coll_base_nbc_request_t *request = *(ompi_coll_base_nbc_request_t **)rptr; + int rc = OMPI_SUCCESS; + if (NULL != request->cb.req_free) { + rc = request->cb.req_free(rptr); + } + release_vecs_callback(request); + return rc; +} + +int ompi_coll_base_retain_datatypes_w( ompi_request_t *req, + ompi_datatype_t *stypes[], ompi_datatype_t *rtypes[]) { + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + bool retain = false; + ompi_communicator_t *comm = request->super.req_mpi_object.comm; + int scount, rcount; + if (REQUEST_COMPLETE(req)) { + return OMPI_SUCCESS; + } + if (OMPI_COMM_IS_TOPO(comm)) { + (void)mca_topo_base_neighbor_count (comm, &rcount, &scount); + } else { + scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm); + } + + for (int i=0; idata.vecs.stypes = stypes; + request->data.vecs.rtypes = rtypes; + if (req->req_persistent) { + request->cb.req_free = req->req_free; + req->req_free = free_vecs_callback; + } else { + request->cb.req_complete_cb = req->req_complete_cb; + request->req_complete_cb_data = req->req_complete_cb_data; + req->req_complete_cb = complete_vecs_callback; + req->req_complete_cb_data = request; + } + } + return OMPI_SUCCESS; +} + +static void nbc_req_cons(ompi_coll_base_nbc_request_t *req) { + req->cb.req_complete_cb = NULL; + req->req_complete_cb_data = NULL; + req->data.objs.objs[0] = NULL; + req->data.objs.objs[1] = NULL; +} + +OBJ_CLASS_INSTANCE(ompi_coll_base_nbc_request_t, ompi_request_t, nbc_req_cons, NULL); diff --git a/ompi/mca/coll/base/coll_base_util.h b/ompi/mca/coll/base/coll_base_util.h index 8306b8fe83d..a5b80161240 100644 --- a/ompi/mca/coll/base/coll_base_util.h +++ b/ompi/mca/coll/base/coll_base_util.h @@ -9,8 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,10 +27,41 @@ #include "ompi/mca/mca.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/request/request.h" +#include "ompi/op/op.h" #include "ompi/mca/pml/pml.h" BEGIN_C_DECLS +struct ompi_coll_base_nbc_request_t { + ompi_request_t super; + union { + ompi_request_complete_fn_t req_complete_cb; + ompi_request_free_fn_t req_free; + } cb; + void *req_complete_cb_data; + union { + struct { + ompi_op_t *op; + ompi_datatype_t *datatype; + } op; + struct { + ompi_datatype_t *stype; + ompi_datatype_t *rtype; + } types; + struct { + opal_object_t *objs[2]; + } objs; + struct { + ompi_datatype_t **stypes; + ompi_datatype_t **rtypes; + } vecs; + } data; +}; + +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_coll_base_nbc_request_t); + +typedef struct ompi_coll_base_nbc_request_t ompi_coll_base_nbc_request_t; + /** * A MPI_like function doing a send and a receive simultaneously. * If one of the communications results in a zero-byte message the @@ -84,5 +115,17 @@ unsigned int ompi_mirror_perm(unsigned int x, int nbits); */ int ompi_rounddown(int num, int factor); +int ompi_coll_base_retain_op( ompi_request_t *request, + ompi_op_t *op, + ompi_datatype_t *type); + +int ompi_coll_base_retain_datatypes( ompi_request_t *request, + ompi_datatype_t *stype, + ompi_datatype_t *rtype); + +int ompi_coll_base_retain_datatypes_w( ompi_request_t *request, + ompi_datatype_t *stypes[], + ompi_datatype_t *rtypes[]); + END_C_DECLS #endif /* MCA_COLL_BASE_UTIL_EXPORT_H */ diff --git a/ompi/mca/coll/hcoll/coll_hcoll_component.c b/ompi/mca/coll/hcoll/coll_hcoll_component.c index 29ea5689c73..a7a79286a3f 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_component.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_component.c @@ -209,29 +209,10 @@ static int hcoll_open(void) { mca_coll_hcoll_component_t *cm; cm = &mca_coll_hcoll_component; - mca_coll_hcoll_output = opal_output_open(NULL); opal_output_set_verbosity(mca_coll_hcoll_output, cm->hcoll_verbose); - hcoll_rte_fns_setup(); - cm->libhcoll_initialized = false; - - (void)mca_base_framework_open(&opal_memory_base_framework, 0); - - /* Register memory hooks */ - if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == - ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & - opal_mem_hooks_support_level())) - { - setenv("MXM_HCOLL_MEM_ON_DEMAND_MAP", "y", 0); - HCOL_VERBOSE(1, "Enabling on-demand memory mapping"); - cm->using_mem_hooks = 1; - } else { - HCOL_VERBOSE(1, "Disabling on-demand memory mapping"); - cm->using_mem_hooks = 0; - } - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/hcoll/coll_hcoll_module.c b/ompi/mca/coll/hcoll/coll_hcoll_module.c index 6e2fbdda310..aa262c98492 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_module.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_module.c @@ -301,17 +301,28 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) HCOL_ERROR("Hcol library init failed"); return NULL; } - #if HCOLL_API >= HCOLL_VERSION(3,2) - if (cm->using_mem_hooks && cm->init_opts->mem_hook_needed) { + if (cm->init_opts->mem_hook_needed) { #else - if (cm->using_mem_hooks && hcoll_check_mem_release_cb_needed()) { + if (hcoll_check_mem_release_cb_needed()) { #endif - opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL); + rc = mca_base_framework_open(&opal_memory_base_framework, 0); + if (OPAL_SUCCESS != rc) { + HCOL_VERBOSE(1, "failed to initialize memory base framework: %d, " + "memory hooks will not be used", rc); + } else { + if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == + ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & + opal_mem_hooks_support_level())) { + HCOL_VERBOSE(1, "using OPAL memory hooks as external events"); + cm->using_mem_hooks = 1; + opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL); + setenv("MXM_HCOLL_MEM_ON_DEMAND_MAP", "y", 0); + } + } } else { cm->using_mem_hooks = 0; } - copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN; del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn; err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &hcoll_comm_attr_keyval, NULL ,0, NULL); diff --git a/ompi/mca/coll/hcoll/coll_hcoll_rte.c b/ompi/mca/coll/hcoll/coll_hcoll_rte.c index 6df2dde7e90..b7b87d0f41a 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_rte.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_rte.c @@ -39,7 +39,7 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/mca/pml/pml.h" - +#include "ompi/mca/coll/base/coll_base_util.h" #include "hcoll/api/hcoll_dte.h" #include "hcoll/api/hcoll_api.h" @@ -151,25 +151,13 @@ void hcoll_rte_fns_setup(void) { init_module_fns(); OBJ_CONSTRUCT(&mca_coll_hcoll_component.requests, opal_free_list_t); - opal_free_list_init( - &(mca_coll_hcoll_component.requests), - sizeof(ompi_request_t), - /* no special alignment needed */ - 8, - OBJ_CLASS(ompi_request_t), - /* no payload data */ - 0, 0, - /* NOTE: hack - need to parametrize this */ - 10, - -1, - 10, - /* No Mpool or init function */ - NULL, - 0, - NULL, - NULL, - NULL - ); + opal_free_list_init(&(mca_coll_hcoll_component.requests), + sizeof(ompi_coll_base_nbc_request_t), + opal_cache_line_size, OBJ_CLASS(ompi_coll_base_nbc_request_t), + /* no payload data */ + 0, 0, 10, -1, 10, + /* No Mpool or init function */ + NULL, 0, NULL, NULL, NULL); } static int recv_nb(struct dte_data_representation_t data, @@ -349,20 +337,23 @@ request_free(struct ompi_request_t **ompi_req) static void* get_coll_handle(void) { - ompi_request_t *ompi_req; + ompi_coll_base_nbc_request_t *ompi_req; opal_free_list_item_t *item; item = opal_free_list_wait (&(mca_coll_hcoll_component.requests)); if (OPAL_UNLIKELY(NULL == item)) { HCOL_ERROR("Wait for free list failed.\n"); return NULL; } - ompi_req = (ompi_request_t *)item; - OMPI_REQUEST_INIT(ompi_req,false); - ompi_req->req_complete_cb = NULL; - ompi_req->req_status.MPI_ERROR = MPI_SUCCESS; - ompi_req->req_state = OMPI_REQUEST_ACTIVE; - ompi_req->req_free = request_free; - ompi_req->req_type = OMPI_REQUEST_COLL; + ompi_req = (ompi_coll_base_nbc_request_t *)item; + OMPI_REQUEST_INIT(&ompi_req->super,false); + ompi_req->super.req_complete_cb = NULL; + ompi_req->super.req_complete_cb_data = NULL; + ompi_req->super.req_status.MPI_ERROR = MPI_SUCCESS; + ompi_req->super.req_state = OMPI_REQUEST_ACTIVE; + ompi_req->super.req_free = request_free; + ompi_req->super.req_type = OMPI_REQUEST_COLL; + ompi_req->data.objs.objs[0] = NULL; + ompi_req->data.objs.objs[1] = NULL; return (void *)ompi_req; } diff --git a/ompi/mca/coll/libnbc/coll_libnbc.h b/ompi/mca/coll/libnbc/coll_libnbc.h index 967a7794257..17abf86f2ab 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc.h +++ b/ompi/mca/coll/libnbc/coll_libnbc.h @@ -13,8 +13,8 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -28,7 +28,7 @@ #define MCA_COLL_LIBNBC_EXPORT_H #include "ompi/mca/coll/coll.h" -#include "ompi/request/request.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "opal/sys/atomic.h" BEGIN_C_DECLS @@ -114,7 +114,7 @@ typedef struct NBC_Schedule NBC_Schedule; OBJ_CLASS_DECLARATION(NBC_Schedule); struct ompi_coll_libnbc_request_t { - ompi_request_t super; + ompi_coll_base_nbc_request_t super; MPI_Comm comm; long row_offset; bool nbc_complete; /* status in libnbc level */ @@ -138,13 +138,13 @@ typedef ompi_coll_libnbc_request_t NBC_Handle; opal_free_list_item_t *item; \ item = opal_free_list_wait (&mca_coll_libnbc_component.requests); \ req = (ompi_coll_libnbc_request_t*) item; \ - OMPI_REQUEST_INIT(&req->super, persistent); \ - req->super.req_mpi_object.comm = comm; \ + OMPI_REQUEST_INIT(&req->super.super, persistent); \ + req->super.super.req_mpi_object.comm = comm; \ } while (0) #define OMPI_COLL_LIBNBC_REQUEST_RETURN(req) \ do { \ - OMPI_REQUEST_FINI(&(req)->super); \ + OMPI_REQUEST_FINI(&(req)->super.super); \ opal_free_list_return (&mca_coll_libnbc_component.requests, \ (opal_free_list_item_t*) (req)); \ } while (0) diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index bf4960d9235..6598972773d 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -13,8 +13,8 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2017 Ian Bradley Morgan and Anthony Skjellum. All * rights reserved. @@ -328,21 +328,21 @@ ompi_coll_libnbc_progress(void) /* done, remove and complete */ OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); opal_list_remove_item(&mca_coll_libnbc_component.active_requests, - &request->super.super.super); + &request->super.super.super.super); OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); if( OMPI_SUCCESS == res || NBC_OK == res || NBC_SUCCESS == res ) { - request->super.req_status.MPI_ERROR = OMPI_SUCCESS; + request->super.super.req_status.MPI_ERROR = OMPI_SUCCESS; } else { - request->super.req_status.MPI_ERROR = res; + request->super.super.req_status.MPI_ERROR = res; } - if(request->super.req_persistent) { + if(request->super.super.req_persistent) { /* reset for the next communication */ request->row_offset = 0; } - if(!request->super.req_persistent || !REQUEST_COMPLETE(&request->super)) { - ompi_request_complete(&request->super, true); + if(!request->super.super.req_persistent || !REQUEST_COMPLETE(&request->super.super)) { + ompi_request_complete(&request->super.super, true); } } OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); @@ -407,7 +407,7 @@ request_start(size_t count, ompi_request_t ** requests) NBC_DEBUG(5, "tmpbuf address=%p size=%u\n", handle->tmpbuf, sizeof(handle->tmpbuf)); NBC_DEBUG(5, "--------------------------------\n"); - handle->super.req_complete = REQUEST_PENDING; + handle->super.super.req_complete = REQUEST_PENDING; handle->nbc_complete = false; res = NBC_Start(handle); @@ -437,7 +437,7 @@ request_free(struct ompi_request_t **ompi_req) ompi_coll_libnbc_request_t *request = (ompi_coll_libnbc_request_t*) *ompi_req; - if( !REQUEST_COMPLETE(&request->super) ) { + if( !REQUEST_COMPLETE(&request->super.super) ) { return MPI_ERR_REQUEST; } @@ -451,15 +451,15 @@ request_free(struct ompi_request_t **ompi_req) static void request_construct(ompi_coll_libnbc_request_t *request) { - request->super.req_type = OMPI_REQUEST_COLL; - request->super.req_status._cancelled = 0; - request->super.req_start = request_start; - request->super.req_free = request_free; - request->super.req_cancel = request_cancel; + request->super.super.req_type = OMPI_REQUEST_COLL; + request->super.super.req_status._cancelled = 0; + request->super.super.req_start = request_start; + request->super.super.req_free = request_free; + request->super.super.req_cancel = request_cancel; } OBJ_CLASS_INSTANCE(ompi_coll_libnbc_request_t, - ompi_request_t, + ompi_coll_base_nbc_request_t, request_construct, NULL); diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index 5b48d6b9315..171f5a37e9c 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -3,15 +3,15 @@ * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University + * Copyright (c) 2013-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler * @@ -319,8 +319,6 @@ int NBC_Progress(NBC_Handle *handle) { bool flag; unsigned long size = 0; char *delim; - int i; - ompi_status_public_t status; if (handle->nbc_complete) { return NBC_OK; @@ -337,8 +335,14 @@ int NBC_Progress(NBC_Handle *handle) { while (handle->req_count) { ompi_request_t *subreq = handle->req_array[handle->req_count - 1]; if (REQUEST_COMPLETE(subreq)) { - ompi_request_free(&subreq); + if(OPAL_UNLIKELY( OMPI_SUCCESS != subreq->req_status.MPI_ERROR )) { + NBC_Error ("MPI Error in NBC subrequest %p : %d", subreq, subreq->req_status.MPI_ERROR); + /* copy the error code from the underlying request and let the + * round finish */ + handle->super.super.req_status.MPI_ERROR = subreq->req_status.MPI_ERROR; + } handle->req_count--; + ompi_request_free(&subreq); } else { flag = false; break; @@ -351,6 +355,26 @@ int NBC_Progress(NBC_Handle *handle) { /* a round is finished */ if (flag) { + /* reset handle for next round */ + if (NULL != handle->req_array) { + /* free request array */ + free (handle->req_array); + handle->req_array = NULL; + } + + handle->req_count = 0; + + /* previous round had an error */ + if (OPAL_UNLIKELY(OMPI_SUCCESS != handle->super.super.req_status.MPI_ERROR)) { + res = handle->super.super.req_status.MPI_ERROR; + NBC_Error("NBC_Progress: an error %d was found during schedule %p at row-offset %li - aborting the schedule\n", res, handle->schedule, handle->row_offset); + handle->nbc_complete = true; + if (!handle->super.super.req_persistent) { + NBC_Free(handle); + } + return res; + } + /* adjust delim to start of current round */ NBC_DEBUG(5, "NBC_Progress: going in schedule %p to row-offset: %li\n", handle->schedule, handle->row_offset); delim = handle->schedule->data + handle->row_offset; @@ -360,20 +384,12 @@ int NBC_Progress(NBC_Handle *handle) { /* adjust delim to end of current round -> delimiter */ delim = delim + size; - if (NULL != handle->req_array) { - /* free request array */ - free (handle->req_array); - handle->req_array = NULL; - } - - handle->req_count = 0; - if (*delim == 0) { /* this was the last round - we're done */ NBC_DEBUG(5, "NBC_Progress last round finished - we're done\n"); handle->nbc_complete = true; - if (!handle->super.req_persistent) { + if (!handle->super.super.req_persistent) { NBC_Free(handle); } @@ -639,14 +655,15 @@ int NBC_Start(NBC_Handle *handle) { } /* kick off first round */ - handle->super.req_state = OMPI_REQUEST_ACTIVE; + handle->super.super.req_state = OMPI_REQUEST_ACTIVE; + handle->super.super.req_status.MPI_ERROR = OMPI_SUCCESS; res = NBC_Start_round(handle); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); - opal_list_append(&mca_coll_libnbc_component.active_requests, &(handle->super.super.super)); + opal_list_append(&mca_coll_libnbc_component.active_requests, (opal_list_item_t *)handle); OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); return OMPI_SUCCESS; diff --git a/ompi/mca/coll/libnbc/nbc_iallgatherv.c b/ompi/mca/coll/libnbc/nbc_iallgatherv.c index 500a29dd6cd..b2046ab50e6 100644 --- a/ompi/mca/coll/libnbc/nbc_iallgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_iallgatherv.c @@ -11,8 +11,8 @@ * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -130,7 +130,7 @@ int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatyp res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -209,7 +209,7 @@ int ompi_coll_libnbc_iallgatherv_inter(const void* sendbuf, int sendcount, MPI_D res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_iallreduce.c b/ompi/mca/coll/libnbc/nbc_iallreduce.c index f61b5fbb93f..57aa0d77e0d 100644 --- a/ompi/mca/coll/libnbc/nbc_iallreduce.c +++ b/ompi/mca/coll/libnbc/nbc_iallreduce.c @@ -7,8 +7,8 @@ * rights reserved. * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -206,7 +206,7 @@ int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, M res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -289,7 +289,7 @@ int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int co res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoall.c b/ompi/mca/coll/libnbc/nbc_ialltoall.c index 0b93af0530b..e2731e1a1d3 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoall.c @@ -292,7 +292,7 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -376,7 +376,7 @@ int ompi_coll_libnbc_ialltoall_inter (const void* sendbuf, int sendcount, MPI_Da res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallv.c b/ompi/mca/coll/libnbc/nbc_ialltoallv.c index 93f54aa20fd..5d13d524ea4 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallv.c @@ -5,8 +5,8 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -153,7 +153,7 @@ int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, cons res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -241,7 +241,7 @@ int ompi_coll_libnbc_ialltoallv_inter (const void* sendbuf, const int *sendcount res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallw.c b/ompi/mca/coll/libnbc/nbc_ialltoallw.c index 52d53b121ec..ae293697c7c 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallw.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallw.c @@ -5,8 +5,8 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -139,7 +139,7 @@ int ompi_coll_libnbc_ialltoallw(const void* sendbuf, const int *sendcounts, cons res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -214,7 +214,7 @@ int ompi_coll_libnbc_ialltoallw_inter(const void* sendbuf, const int *sendcounts res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ibarrier.c b/ompi/mca/coll/libnbc/nbc_ibarrier.c index bed454b859c..05da51de34a 100644 --- a/ompi/mca/coll/libnbc/nbc_ibarrier.c +++ b/ompi/mca/coll/libnbc/nbc_ibarrier.c @@ -7,8 +7,8 @@ * rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. @@ -108,7 +108,7 @@ int ompi_coll_libnbc_ibarrier(struct ompi_communicator_t *comm, ompi_request_t * res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -195,7 +195,7 @@ int ompi_coll_libnbc_ibarrier_inter(struct ompi_communicator_t *comm, ompi_reque res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ibcast.c b/ompi/mca/coll/libnbc/nbc_ibcast.c index 8f2ecd64c12..3cd1ca7b267 100644 --- a/ompi/mca/coll/libnbc/nbc_ibcast.c +++ b/ompi/mca/coll/libnbc/nbc_ibcast.c @@ -5,8 +5,8 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. @@ -182,7 +182,7 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -405,7 +405,7 @@ int ompi_coll_libnbc_ibcast_inter(void *buffer, int count, MPI_Datatype datatype res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_iexscan.c b/ompi/mca/coll/libnbc/nbc_iexscan.c index 7cc8c7f4e66..90a6b6bf27f 100644 --- a/ompi/mca/coll/libnbc/nbc_iexscan.c +++ b/ompi/mca/coll/libnbc/nbc_iexscan.c @@ -7,8 +7,8 @@ * rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -205,7 +205,7 @@ int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_ res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_igather.c b/ompi/mca/coll/libnbc/nbc_igather.c index 521d583af8b..47203d0a467 100644 --- a/ompi/mca/coll/libnbc/nbc_igather.c +++ b/ompi/mca/coll/libnbc/nbc_igather.c @@ -8,8 +8,8 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -185,7 +185,7 @@ int ompi_coll_libnbc_igather(const void* sendbuf, int sendcount, MPI_Datatype se res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -265,7 +265,7 @@ int ompi_coll_libnbc_igather_inter(const void* sendbuf, int sendcount, MPI_Datat res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_igatherv.c b/ompi/mca/coll/libnbc/nbc_igatherv.c index 0fe0fbfd803..387a668e678 100644 --- a/ompi/mca/coll/libnbc/nbc_igatherv.c +++ b/ompi/mca/coll/libnbc/nbc_igatherv.c @@ -8,8 +8,8 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. @@ -117,7 +117,7 @@ int ompi_coll_libnbc_igatherv(const void* sendbuf, int sendcount, MPI_Datatype s res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -197,7 +197,7 @@ int ompi_coll_libnbc_igatherv_inter(const void* sendbuf, int sendcount, MPI_Data res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c index 9835b0f5a0d..e15ddf33269 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c @@ -5,8 +5,8 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -173,7 +173,7 @@ int ompi_coll_libnbc_ineighbor_allgather(const void *sbuf, int scount, MPI_Datat } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c index 649349b751c..9871111326b 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c @@ -5,8 +5,8 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -175,7 +175,7 @@ int ompi_coll_libnbc_ineighbor_allgatherv(const void *sbuf, int scount, MPI_Data } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c index c24a8781bd1..c2aa09b5e02 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c @@ -5,8 +5,8 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -177,7 +177,7 @@ int ompi_coll_libnbc_ineighbor_alltoall(const void *sbuf, int scount, MPI_Dataty } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c index 58307dbd436..054b047cb8e 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c @@ -5,8 +5,8 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -182,7 +182,7 @@ int ompi_coll_libnbc_ineighbor_alltoallv(const void *sbuf, const int *scounts, c } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c index adc0ba3ae52..270a0b20b34 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c @@ -5,8 +5,8 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -167,7 +167,7 @@ int ompi_coll_libnbc_ineighbor_alltoallw(const void *sbuf, const int *scounts, c } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce.c b/ompi/mca/coll/libnbc/nbc_ireduce.c index d4bcb62e06f..c222fa3a7f0 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce.c @@ -7,8 +7,8 @@ * rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -218,7 +218,7 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_ } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -284,7 +284,7 @@ int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c index 3b318ea1d36..230bcaa0101 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c @@ -7,8 +7,8 @@ * rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -219,7 +219,7 @@ int ompi_coll_libnbc_ireduce_scatter (const void* sendbuf, void* recvbuf, const } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -361,7 +361,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf, } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c index 6553b44d63b..6dadd1eafa8 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c @@ -8,8 +8,8 @@ * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -217,7 +217,7 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -356,7 +356,7 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void* sendbuf, void* recv } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_iscan.c b/ompi/mca/coll/libnbc/nbc_iscan.c index 4d4f3677b7c..33374ede7ae 100644 --- a/ompi/mca/coll/libnbc/nbc_iscan.c +++ b/ompi/mca/coll/libnbc/nbc_iscan.c @@ -5,8 +5,8 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -182,7 +182,7 @@ int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Da } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_iscatter.c b/ompi/mca/coll/libnbc/nbc_iscatter.c index a7bbb42b66c..c1b2f2ac552 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatter.c +++ b/ompi/mca/coll/libnbc/nbc_iscatter.c @@ -10,8 +10,8 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -179,7 +179,7 @@ int ompi_coll_libnbc_iscatter (const void* sendbuf, int sendcount, MPI_Datatype } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -258,7 +258,7 @@ int ompi_coll_libnbc_iscatter_inter (const void* sendbuf, int sendcount, MPI_Dat } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/libnbc/nbc_iscatterv.c b/ompi/mca/coll/libnbc/nbc_iscatterv.c index e9f8ba7394c..8badee86554 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatterv.c +++ b/ompi/mca/coll/libnbc/nbc_iscatterv.c @@ -10,8 +10,8 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -114,7 +114,7 @@ int ompi_coll_libnbc_iscatterv(const void* sendbuf, const int *sendcounts, const } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } @@ -192,7 +192,7 @@ int ompi_coll_libnbc_iscatterv_inter(const void* sendbuf, const int *sendcounts, } res = NBC_Start(*(ompi_coll_libnbc_request_t **)request); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle ((ompi_coll_libnbc_request_t *)request); + NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request); *request = &ompi_request_null.request; return res; } diff --git a/ompi/mca/coll/portals4/coll_portals4_allreduce.c b/ompi/mca/coll/portals4/coll_portals4_allreduce.c index fe0250defd6..6a5c781b1b3 100644 --- a/ompi/mca/coll/portals4/coll_portals4_allreduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_allreduce.c @@ -343,15 +343,38 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, static int allreduce_kary_tree_bottom(ompi_coll_portals4_request_t *request) { + int ret; + if (request->u.allreduce.is_optim) { PtlAtomicSync(); if (request->u.allreduce.child_nb) { - PtlCTFree(request->u.allreduce.ack_ct_h); + ret = PtlCTFree(request->u.allreduce.ack_ct_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_coll_base_framework.framework_output, + "%s:%d: PtlCTFree failed: %d\n", + __FILE__, __LINE__, ret); + return OMPI_ERROR; + } } - PtlMEUnlink(request->u.allreduce.data_me_h); - PtlCTFree(request->u.allreduce.trig_ct_h); + do { + ret = PtlMEUnlink(request->u.allreduce.data_me_h); + } while (PTL_IN_USE == ret); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_coll_base_framework.framework_output, + "%s:%d: PtlMEUnlink failed: %d\n", + __FILE__, __LINE__, ret); + return OMPI_ERROR; + } + + ret = PtlCTFree(request->u.allreduce.trig_ct_h); + if (PTL_OK != ret) { + opal_output_verbose(1, ompi_coll_base_framework.framework_output, + "%s:%d: PtlCTFree failed: %d\n", + __FILE__, __LINE__, ret); + return OMPI_ERROR; + } } return (OMPI_SUCCESS); diff --git a/ompi/mca/coll/portals4/coll_portals4_barrier.c b/ompi/mca/coll/portals4/coll_portals4_barrier.c index b40c1f9256d..8021b60e378 100644 --- a/ompi/mca/coll/portals4/coll_portals4_barrier.c +++ b/ompi/mca/coll/portals4/coll_portals4_barrier.c @@ -206,7 +206,9 @@ barrier_hypercube_bottom(ompi_coll_portals4_request_t *request) int ret; /* cleanup */ - ret = PtlMEUnlink(request->u.barrier.data_me_h); + do { + ret = PtlMEUnlink(request->u.barrier.data_me_h); + } while (PTL_IN_USE == ret); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEUnlink failed: %d\n", diff --git a/ompi/mca/coll/portals4/coll_portals4_component.c b/ompi/mca/coll/portals4/coll_portals4_component.c index 1be495861c0..d632340ee26 100644 --- a/ompi/mca/coll/portals4/coll_portals4_component.c +++ b/ompi/mca/coll/portals4/coll_portals4_component.c @@ -285,7 +285,9 @@ portals4_close(void) mca_coll_portals4_component.data_md_h = PTL_INVALID_HANDLE; if (!PtlHandleIsEqual(mca_coll_portals4_component.finish_me_h, PTL_INVALID_HANDLE)) { - ret = PtlMEUnlink(mca_coll_portals4_component.finish_me_h); + do { + ret = PtlMEUnlink(mca_coll_portals4_component.finish_me_h); + } while (PTL_IN_USE == ret); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEUnlink failed: %d\n", @@ -293,7 +295,9 @@ portals4_close(void) } } if (!PtlHandleIsEqual(mca_coll_portals4_component.unex_me_h, PTL_INVALID_HANDLE)) { - ret = PtlMEUnlink(mca_coll_portals4_component.unex_me_h); + do { + ret = PtlMEUnlink(mca_coll_portals4_component.unex_me_h); + } while (PTL_IN_USE == ret); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEUnlink failed: %d\n", diff --git a/ompi/mca/coll/portals4/coll_portals4_gather.c b/ompi/mca/coll/portals4/coll_portals4_gather.c index 7e38e27c009..274e9d4ee89 100644 --- a/ompi/mca/coll/portals4/coll_portals4_gather.c +++ b/ompi/mca/coll/portals4/coll_portals4_gather.c @@ -460,7 +460,9 @@ cleanup_gather_handles(ompi_coll_portals4_request_t *request) /**********************************/ /* Cleanup Gather Handles */ /**********************************/ - ret = PtlMEUnlink(request->u.gather.gather_meh); + do { + ret = PtlMEUnlink(request->u.gather.gather_meh); + } while (PTL_IN_USE == ret); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } ret = PtlCTFree(request->u.gather.gather_cth); @@ -484,7 +486,9 @@ cleanup_sync_handles(ompi_coll_portals4_request_t *request) /**********************************/ /* Cleanup Sync Handles */ /**********************************/ - ret = PtlMEUnlink(request->u.gather.sync_meh); + do { + ret = PtlMEUnlink(request->u.gather.sync_meh); + } while (PTL_IN_USE == ret); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } ret = PtlCTFree(request->u.gather.sync_cth); diff --git a/ompi/mca/coll/portals4/coll_portals4_reduce.c b/ompi/mca/coll/portals4/coll_portals4_reduce.c index 798a1fa17e4..2a60ef57dc8 100644 --- a/ompi/mca/coll/portals4/coll_portals4_reduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_reduce.c @@ -340,24 +340,38 @@ reduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, static int reduce_kary_tree_bottom(ompi_coll_portals4_request_t *request) { + int ret, line; + if (request->u.reduce.is_optim) { PtlAtomicSync(); if (request->u.reduce.use_ack_ct_h) { - PtlCTFree(request->u.reduce.ack_ct_h); + ret = PtlCTFree(request->u.reduce.ack_ct_h); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } } if (request->u.reduce.child_nb) { - PtlMEUnlink(request->u.reduce.data_me_h); + do { + ret = PtlMEUnlink(request->u.reduce.data_me_h); + } while (PTL_IN_USE == ret); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } } - PtlCTFree(request->u.reduce.trig_ct_h); + ret = PtlCTFree(request->u.reduce.trig_ct_h); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } if (request->u.reduce.free_buffer) { free(request->u.reduce.free_buffer); } } return (OMPI_SUCCESS); + +err_hdlr: + opal_output(ompi_coll_base_framework.framework_output, + "%s:%4d:%4d\tError occurred ret=%d", + __FILE__, __LINE__, line, ret); + + return ret; } diff --git a/ompi/mca/coll/portals4/coll_portals4_scatter.c b/ompi/mca/coll/portals4/coll_portals4_scatter.c index 4f3351ac784..0049a61d001 100644 --- a/ompi/mca/coll/portals4/coll_portals4_scatter.c +++ b/ompi/mca/coll/portals4/coll_portals4_scatter.c @@ -253,14 +253,8 @@ cleanup_scatter_handles(ompi_coll_portals4_request_t *request) /**********************************/ do { ret = PtlMEUnlink(request->u.scatter.scatter_meh); - if (PTL_IN_USE == ret) { - opal_output(ompi_coll_base_framework.framework_output, - "%s:%4d: scatter_meh still in use (ret=%d, rank %2d)", - __FILE__, __LINE__, ret, request->u.scatter.my_rank); - continue; - } - if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } - } while (ret == PTL_IN_USE); + } while (PTL_IN_USE == ret); + if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } ret = PtlCTFree(request->u.scatter.scatter_cth); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } @@ -292,14 +286,8 @@ cleanup_sync_handles(ompi_coll_portals4_request_t *request) /**********************************/ do { ret = PtlMEUnlink(request->u.scatter.sync_meh); - if (PTL_IN_USE == ret) { - opal_output(ompi_coll_base_framework.framework_output, - "%s:%4d: sync_meh still in use (ret=%d, rank %2d)", - __FILE__, __LINE__, ret, request->u.scatter.my_rank); - continue; - } - if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } - } while (ret == PTL_IN_USE); + } while (PTL_IN_USE == ret); + if (PTL_OK != ret) { ptl_ret = ret; ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } ret = PtlCTFree(request->u.scatter.sync_cth); if (PTL_OK != ret) { ptl_ret = ret; ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index d4b201bc7a3..d6fc4b89bde 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -38,6 +38,9 @@ extern int ompi_coll_tuned_init_chain_fanout; extern int ompi_coll_tuned_init_max_requests; extern int ompi_coll_tuned_alltoall_small_msg; extern int ompi_coll_tuned_alltoall_intermediate_msg; +extern int ompi_coll_tuned_alltoall_large_msg; +extern int ompi_coll_tuned_alltoall_min_procs; +extern int ompi_coll_tuned_alltoall_max_requests; /* forced algorithm choices */ /* this structure is for storing the indexes to the forced algorithm mca params... */ diff --git a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c index 2ef1e6b9038..b63037e1237 100644 --- a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c @@ -28,7 +28,6 @@ /* alltoall algorithm variables */ static int coll_tuned_alltoall_forced_algorithm = 0; static int coll_tuned_alltoall_segment_size = 0; -static int coll_tuned_alltoall_max_requests; static int coll_tuned_alltoall_tree_fanout; static int coll_tuned_alltoall_chain_fanout; @@ -115,7 +114,22 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm MCA_BASE_VAR_SCOPE_ALL, &coll_tuned_alltoall_chain_fanout); - coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */ + (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, + "alltoall_large_msg", + "use pairwise exchange algorithm for messages larger than this value", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_6, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_coll_tuned_alltoall_large_msg); + + (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, + "alltoall_min_procs", + "use pairwise exchange algorithm for communicators larger than this value", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_6, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_coll_tuned_alltoall_min_procs); + mca_param_indices->max_requests_param_index = mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "alltoall_algorithm_max_requests", @@ -123,17 +137,16 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL, - &coll_tuned_alltoall_max_requests); + &ompi_coll_tuned_alltoall_max_requests); if (mca_param_indices->max_requests_param_index < 0) { return mca_param_indices->max_requests_param_index; } - if (coll_tuned_alltoall_max_requests < 0) { + if (ompi_coll_tuned_alltoall_max_requests < 0) { if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) { - opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to system level default %d \n", - ompi_coll_tuned_init_max_requests ); + opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to 0 \n"); } - coll_tuned_alltoall_max_requests = 0; + ompi_coll_tuned_alltoall_max_requests = 0; } return (MPI_SUCCESS); diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index be0d14a988f..25e9bc77a0d 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -57,6 +57,13 @@ int ompi_coll_tuned_init_max_requests = 128; int ompi_coll_tuned_alltoall_small_msg = 200; int ompi_coll_tuned_alltoall_intermediate_msg = 3000; +/* Set it to the same value as intermediate msg by default, so it does not affect + * default algorithm selection. Changing this value will force using linear with + * sync algorithm on certain message sizes. */ +int ompi_coll_tuned_alltoall_large_msg = 3000; +int ompi_coll_tuned_alltoall_min_procs = 0; /* disable by default */ +int ompi_coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */ + /* forced alogrithm variables */ /* indices for the MCA parameters */ coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}}; diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index 102e4ee11f3..97560c5c089 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -119,7 +119,11 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(const void *sbuf, int scount, the University of Tennessee (2GB MX) up to 64 nodes. Has better performance for messages of intermediate sizes than the old one */ /* determine block size */ - ompi_datatype_type_size(sdtype, &dsize); + if (MPI_IN_PLACE != sbuf) { + ompi_datatype_type_size(sdtype, &dsize); + } else { + ompi_datatype_type_size(rdtype, &dsize); + } block_dsize = dsize * (ptrdiff_t)scount; if ((block_dsize < (size_t) ompi_coll_tuned_alltoall_small_msg) @@ -132,6 +136,12 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(const void *sbuf, int scount, return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module); + } else if ((block_dsize < (size_t) ompi_coll_tuned_alltoall_large_msg) && + (communicator_size <= ompi_coll_tuned_alltoall_min_procs)) { + return ompi_coll_base_alltoall_intra_linear_sync(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module, + ompi_coll_tuned_alltoall_max_requests); } return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype, @@ -549,7 +559,11 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(const void *sbuf, int scount, } /* Determine complete data size */ - ompi_datatype_type_size(sdtype, &dsize); + if (MPI_IN_PLACE != sbuf) { + ompi_datatype_type_size(sdtype, &dsize); + } else { + ompi_datatype_type_size(rdtype, &dsize); + } total_dsize = dsize * (ptrdiff_t)scount * (ptrdiff_t)communicator_size; OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_fixed" @@ -644,7 +658,12 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount, } /* Determine complete data size */ - ompi_datatype_type_size(sdtype, &dsize); + if (MPI_IN_PLACE != sbuf) { + ompi_datatype_type_size(sdtype, &dsize); + } else { + ompi_datatype_type_size(rdtype, &dsize); + } + total_dsize = 0; for (i = 0; i < communicator_size; i++) { total_dsize += dsize * (ptrdiff_t)rcounts[i]; diff --git a/ompi/mca/common/monitoring/common_monitoring.c b/ompi/mca/common/monitoring/common_monitoring.c index e521ca56417..ff252bf944f 100644 --- a/ompi/mca/common/monitoring/common_monitoring.c +++ b/ompi/mca/common/monitoring/common_monitoring.c @@ -268,7 +268,7 @@ void mca_common_monitoring_register(void*pml_monitoring_component) &mca_common_monitoring_enabled); mca_common_monitoring_current_state = mca_common_monitoring_enabled; - + (void)mca_base_var_register("ompi", "pml", "monitoring", "enable_output", "Enable the PML monitoring textual output at MPI_Finalize " "(it will be automatically turned off when MPIT is used to " @@ -278,7 +278,7 @@ void mca_common_monitoring_register(void*pml_monitoring_component) MCA_BASE_VAR_FLAG_DWG, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_common_monitoring_output_enabled); - + (void)mca_base_var_register("ompi", "pml", "monitoring", "filename", /*&mca_common_monitoring_component.pmlm_version, "filename",*/ "The name of the file where the monitoring information " @@ -292,7 +292,7 @@ void mca_common_monitoring_register(void*pml_monitoring_component) /* Now that the MCA variables are automatically unregistered when * their component close, we need to keep a safe copy of the - * filename. + * filename. * Keep the copy completely separated in order to let the initial * filename to be handled by the framework. It's easier to deal * with the string lifetime. diff --git a/ompi/mca/common/ompio/common_ompio.h b/ompi/mca/common/ompio/common_ompio.h index e1d7fe21280..a1c195de08c 100644 --- a/ompi/mca/common/ompio/common_ompio.h +++ b/ompi/mca/common/ompio/common_ompio.h @@ -261,7 +261,7 @@ OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp, OMPI_MP int count, struct ompi_datatype_t *datatype, ompi_request_t **request); OMPI_DECLSPEC int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles, - size_t bytes_per_cycle, int max_data, uint32_t iov_count, + size_t bytes_per_cycle, size_t max_data, uint32_t iov_count, struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw, size_t *spc ); diff --git a/ompi/mca/common/ompio/common_ompio_aggregators.c b/ompi/mca/common/ompio/common_ompio_aggregators.c index aa751cd4a1d..5a570d8e005 100644 --- a/ompi/mca/common/ompio/common_ompio_aggregators.c +++ b/ompi/mca/common/ompio/common_ompio_aggregators.c @@ -896,11 +896,14 @@ int mca_common_ompio_split_initial_groups(ompio_file_t *fh, int size_smallest_group = 0; int num_groups = 0; int ret = OMPI_SUCCESS; + OMPI_MPI_COUNT_TYPE bytes_per_agg_group = 0; OMPI_MPI_OFFSET_TYPE max_cci = 0; OMPI_MPI_OFFSET_TYPE min_cci = 0; - size_new_group = ceil ((float)OMPIO_MCA_GET(fh, bytes_per_agg) * fh->f_init_procs_per_group/ bytes_per_group); + bytes_per_agg_group = (OMPI_MPI_COUNT_TYPE)OMPIO_MCA_GET(fh, bytes_per_agg); + // integer round up + size_new_group = (int)(bytes_per_agg_group / bytes_per_group + (bytes_per_agg_group % bytes_per_group ? 1u : 0u)); size_old_group = fh->f_init_procs_per_group; ret = mca_common_ompio_split_a_group(fh, @@ -948,7 +951,7 @@ int mca_common_ompio_split_initial_groups(ompio_file_t *fh, if((max_cci < OMPIO_CONTG_THRESHOLD) && (size_new_group < size_old_group)){ - size_new_group = floor( (float) (size_new_group + size_old_group ) / 2 ); + size_new_group = (size_new_group + size_old_group ) / 2; ret = mca_common_ompio_split_a_group(fh, start_offsets_lens, end_offsets, @@ -976,7 +979,9 @@ int mca_common_ompio_split_initial_groups(ompio_file_t *fh, (size_new_group < size_old_group)){ //can be a better condition //monitor the previous iteration //break if it has not changed. - size_new_group = ceil( (float) (size_new_group + size_old_group ) / 2 ); + size_new_group = size_new_group + size_old_group; + // integer round up + size_new_group = size_new_group / 2 + (size_new_group % 2 ? 1 : 0); ret = mca_common_ompio_split_a_group(fh, start_offsets_lens, end_offsets, @@ -1486,13 +1491,12 @@ static double cost_calc (int P, int P_a, size_t d_p, size_t b_c, int dim ) } case DIM2: { - int P_x, P_y, c; + int P_x, P_y; P_x = P_y = (int) sqrt(P); - c = (float) P_a / (float)P_x; + n_as = (float) P_a / (float)P_x; n_ar = (float) P_y; - n_as = (float) c; if ( d_p > (P_a*b_c/P )) { m_s = fmin(b_c / P_y, d_p); } diff --git a/ompi/mca/common/ompio/common_ompio_file_open.c b/ompi/mca/common/ompio/common_ompio_file_open.c index cb0d7d1987f..cf701d3e63a 100644 --- a/ompi/mca/common/ompio/common_ompio_file_open.c +++ b/ompi/mca/common/ompio/common_ompio_file_open.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2017 University of Houston. All rights reserved. + * Copyright (c) 2008-2019 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -164,15 +164,6 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm, goto fn_fail; } - /* Set default file view */ - mca_common_ompio_set_view(ompio_fh, - 0, - &ompi_mpi_byte.dt, - &ompi_mpi_byte.dt, - "native", - info); - - if ( true == use_sharedfp ) { /* open the file once more for the shared file pointer if required. ** Can be disabled by the user if no shared file pointer operations @@ -191,6 +182,15 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm, } } + /* Set default file view */ + mca_common_ompio_set_view(ompio_fh, + 0, + &ompi_mpi_byte.dt, + &ompi_mpi_byte.dt, + "native", + info); + + /* If file has been opened in the append mode, move the internal file pointer of OMPIO to the very end of the file. */ @@ -384,6 +384,13 @@ int mca_common_ompio_file_get_position (ompio_file_t *fh, { OMPI_MPI_OFFSET_TYPE off; + if ( 0 == fh->f_view_extent || + 0 == fh->f_view_size || + 0 == fh->f_etype_size ) { + *offset = 0; + return OMPI_SUCCESS; + } + /* No. of copies of the entire file view */ off = (fh->f_offset - fh->f_disp)/fh->f_view_extent; @@ -506,7 +513,7 @@ int mca_common_ompio_file_delete (const char *filename, return ret; } - ret = fh->f_fs->fs_file_delete (filename, NULL); + ret = fh->f_fs->fs_file_delete ( (char *)filename, NULL); free(fh); if (OMPI_SUCCESS != ret) { diff --git a/ompi/mca/common/ompio/common_ompio_file_read.c b/ompi/mca/common/ompio/common_ompio_file_read.c index 6d6d112eb30..eafd1c44319 100644 --- a/ompi/mca/common/ompio/common_ompio_file_read.c +++ b/ompi/mca/common/ompio/common_ompio_file_read.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2018 University of Houston. All rights reserved. + * Copyright (c) 2008-2019 University of Houston. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -33,8 +33,8 @@ #include "common_ompio.h" #include "common_ompio_request.h" -#include "math.h" #include +#include #if OPAL_CUDA_SUPPORT #include "common_ompio_cuda.h" @@ -77,6 +77,12 @@ int mca_common_ompio_file_read (ompio_file_t *fh, int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file vie iovec */ + if (fh->f_amode & MPI_MODE_WRONLY){ +// opal_output(10, "Improper use of FILE Mode, Using WRONLY for Read!\n"); + ret = MPI_ERR_ACCESS; + return ret; + } + if ( 0 == count ) { if ( MPI_STATUS_IGNORE != status ) { status->_ucount = 0; @@ -84,11 +90,6 @@ int mca_common_ompio_file_read (ompio_file_t *fh, return ret; } - if (fh->f_amode & MPI_MODE_WRONLY){ - printf("Improper use of FILE Mode, Using WRONLY for Read!\n"); - ret = OMPI_ERROR; - return ret; - } #if OPAL_CUDA_SUPPORT int is_gpu, is_managed; @@ -131,8 +132,8 @@ int mca_common_ompio_file_read (ompio_file_t *fh, else { bytes_per_cycle = OMPIO_MCA_GET(fh, cycle_buffer_size); } - cycles = ceil((float)max_data/bytes_per_cycle); - + cycles = ceil((double)max_data/bytes_per_cycle); + #if 0 printf ("Bytes per Cycle: %d Cycles: %d max_data:%d \n",bytes_per_cycle, cycles, max_data); #endif @@ -226,6 +227,12 @@ int mca_common_ompio_file_iread (ompio_file_t *fh, mca_ompio_request_t *ompio_req=NULL; size_t spc=0; + if (fh->f_amode & MPI_MODE_WRONLY){ +// opal_output(10, "Improper use of FILE Mode, Using WRONLY for Read!\n"); + ret = MPI_ERR_ACCESS; + return ret; + } + mca_common_ompio_request_alloc ( &ompio_req, MCA_OMPIO_REQUEST_READ); if ( 0 == count ) { @@ -422,8 +429,8 @@ int mca_common_ompio_file_iread_at_all (ompio_file_t *fp, int mca_common_ompio_set_explicit_offset (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset) { - int i = 0; - int k = 0; + size_t i = 0; + size_t k = 0; if ( fh->f_view_size > 0 ) { /* starting offset of the current copy of the filew view */ diff --git a/ompi/mca/common/ompio/common_ompio_file_view.c b/ompi/mca/common/ompio/common_ompio_file_view.c index 71ba14ba02a..bf8a25345b8 100644 --- a/ompi/mca/common/ompio/common_ompio_file_view.c +++ b/ompi/mca/common/ompio/common_ompio_file_view.c @@ -141,6 +141,10 @@ int mca_common_ompio_set_view (ompio_file_t *fh, // in orig_file type, No need to set args on this one. ompi_datatype_duplicate (newfiletype, &fh->f_filetype); + if ( (fh->f_view_size % fh->f_etype_size) ) { + // File view is not a multiple of the etype. + return MPI_ERR_ARG; + } if( SIMPLE_PLUS == OMPIO_MCA_GET(fh, grouping_option) ) { fh->f_cc_size = get_contiguous_chunk_size (fh, 1); diff --git a/ompi/mca/common/ompio/common_ompio_file_write.c b/ompi/mca/common/ompio/common_ompio_file_write.c index fb62edf2d91..62f728e66a3 100644 --- a/ompi/mca/common/ompio/common_ompio_file_write.c +++ b/ompi/mca/common/ompio/common_ompio_file_write.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2018 University of Houston. All rights reserved. + * Copyright (c) 2008-2019 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -31,8 +31,8 @@ #include "common_ompio.h" #include "common_ompio_request.h" -#include "math.h" #include +#include #if OPAL_CUDA_SUPPORT #include "common_ompio_cuda.h" @@ -58,6 +58,13 @@ int mca_common_ompio_file_write (ompio_file_t *fh, int i = 0; /* index into the decoded iovec of the buffer */ int j = 0; /* index into the file view iovec */ + if (fh->f_amode & MPI_MODE_RDONLY){ +// opal_output(10, "Improper use of FILE Mode, Using RDONLY for write!\n"); + ret = MPI_ERR_READ_ONLY; + return ret; + } + + if ( 0 == count ) { if ( MPI_STATUS_IGNORE != status ) { status->_ucount = 0; @@ -109,7 +116,7 @@ int mca_common_ompio_file_write (ompio_file_t *fh, else { bytes_per_cycle = OMPIO_MCA_GET(fh, cycle_buffer_size); } - cycles = ceil((float)max_data/bytes_per_cycle); + cycles = ceil((double)max_data/bytes_per_cycle); #if 0 printf ("Bytes per Cycle: %d Cycles: %d\n", bytes_per_cycle, cycles); @@ -194,6 +201,12 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh, mca_ompio_request_t *ompio_req=NULL; size_t spc=0; + if (fh->f_amode & MPI_MODE_RDONLY){ +// opal_output(10, "Improper use of FILE Mode, Using RDONLY for write!\n"); + ret = MPI_ERR_READ_ONLY; + return ret; + } + mca_common_ompio_request_alloc ( &ompio_req, MCA_OMPIO_REQUEST_WRITE); if ( 0 == count ) { @@ -396,7 +409,7 @@ int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp, /**************************************************************/ int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles, - size_t bytes_per_cycle, int max_data, uint32_t iov_count, + size_t bytes_per_cycle, size_t max_data, uint32_t iov_count, struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw, size_t *spc) { diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c index 7d5e480095a..3276df26d0a 100644 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c +++ b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c @@ -3028,7 +3028,10 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event( if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR0); - opal_pmix.fence(NULL, 0); + if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + exit_status = ret; + goto DONE; + } } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP0); @@ -3096,7 +3099,10 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event( if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR1); - opal_pmix.fence(NULL, 0); + if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + exit_status = ret; + goto DONE; + } } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE2); } @@ -6207,14 +6213,16 @@ static void clear_timers(void) { static void display_all_timers(int state) { bool report_ready = false; double barrier_start, barrier_stop; - int i; + int i, ret; if( 0 != OMPI_PROC_MY_NAME->vpid ) { if( 2 > timing_enabled ) { return; } else if( 2 == timing_enabled ) { - opal_pmix.fence(NULL, 0); + if( OPAL_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + OPAL_ERROR_LOG(ret); + } return; } } @@ -6235,7 +6243,9 @@ static void display_all_timers(int state) { if( timing_enabled >= 2) { barrier_start = get_time(); - opal_pmix.fence(NULL, 0); + if( OPAL_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + OPAL_ERROR_LOG(ret); + } barrier_stop = get_time(); opal_output(0, "crcp:bkmrk: timing(%20s): %20s = %10.2f s\n", diff --git a/ompi/mca/hook/base/hook_base.c b/ompi/mca/hook/base/hook_base.c index 6a5723a7a42..8017d749415 100644 --- a/ompi/mca/hook/base/hook_base.c +++ b/ompi/mca/hook/base/hook_base.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -119,6 +121,7 @@ static int ompi_hook_base_close( void ) if( OMPI_SUCCESS != ret ) { return ret; } + OBJ_RELEASE(additional_callback_components); ompi_hook_is_framework_open = false; return OMPI_SUCCESS; diff --git a/ompi/mca/io/base/io_base_delete.c b/ompi/mca/io/base/io_base_delete.c index 48265b23478..b5926aad5b8 100644 --- a/ompi/mca/io/base/io_base_delete.c +++ b/ompi/mca/io/base/io_base_delete.c @@ -13,6 +13,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2008-2018 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,6 +36,8 @@ #include "opal/mca/base/base.h" #include "ompi/mca/io/io.h" #include "ompi/mca/io/base/base.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" /* * Local types @@ -68,6 +71,8 @@ static void unquery(avail_io_t *avail, const char *filename, struct opal_info_t static int delete_file(avail_io_t *avail, const char *filename, struct opal_info_t *info); +extern opal_mutex_t ompi_mpi_ompio_bootstrap_mutex; + /* * Stuff for the OBJ interface @@ -142,8 +147,25 @@ int mca_io_base_delete(const char *filename, struct opal_info_t *info) } OBJ_RELEASE(selectable); - /* Finally -- delete the file with the selected component */ + if (!strcmp (selected.ai_component.v2_0_0.io_version.mca_component_name, + "ompio")) { + int ret; + + opal_mutex_lock(&ompi_mpi_ompio_bootstrap_mutex); + if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_fs_base_framework, 0))) { + opal_mutex_unlock(&ompi_mpi_ompio_bootstrap_mutex); + return err; + } + opal_mutex_unlock(&ompi_mpi_ompio_bootstrap_mutex); + if (OMPI_SUCCESS != + (ret = mca_fs_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, 1))) { + return err; + } + } + + + /* Finally -- delete the file with the selected component */ if (OMPI_SUCCESS != (err = delete_file(&selected, filename, info))) { return err; } diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index 37f7b308b72..dbb62d718cc 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2018 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -372,12 +372,58 @@ int mca_io_ompio_file_sync (ompi_file_t *fh) OPAL_THREAD_UNLOCK(&fh->f_lock); return MPI_ERR_ACCESS; } + // Make sure all processes reach this point before syncing the file. + ret = data->ompio_fh.f_comm->c_coll->coll_barrier (data->ompio_fh.f_comm, + data->ompio_fh.f_comm->c_coll->coll_barrier_module); + if ( MPI_SUCCESS != ret ) { + OPAL_THREAD_UNLOCK(&fh->f_lock); + return ret; + } ret = data->ompio_fh.f_fs->fs_file_sync (&data->ompio_fh); OPAL_THREAD_UNLOCK(&fh->f_lock); return ret; } +static void mca_io_ompio_file_get_eof_offset (ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE in_offset, + OMPI_MPI_OFFSET_TYPE *out_offset) +{ + /* a file_seek with SEEK_END might require an actual offset that is + not lined up with the end of the file, depending on the file view. + This routine determines the closest (smaller or equal) offset to + the provided in_offset value, avoiding gaps in the file view and avoiding to + break up an etype. + */ + OMPI_MPI_OFFSET_TYPE offset=0, prev_offset=0, start_offset=0; + size_t k=0, blocklen=0; + size_t index_in_file_view=0; + + in_offset -= fh->f_disp; + if ( fh->f_view_size > 0 ) { + /* starting offset of the current copy of the filew view */ + start_offset = in_offset / fh->f_view_extent; + + index_in_file_view = 0; + /* determine block id that the offset is located in and + the starting offset of that block */ + while ( offset <= in_offset && index_in_file_view < fh->f_iov_count) { + prev_offset = offset; + offset = start_offset + (OMPI_MPI_OFFSET_TYPE)(intptr_t) fh->f_decoded_iov[index_in_file_view++].iov_base; + } + + offset = prev_offset; + blocklen = fh->f_decoded_iov[index_in_file_view-1].iov_len; + while ( offset <= in_offset && k <= blocklen ) { + prev_offset = offset; + offset += fh->f_etype_size; + k += fh->f_etype_size; + } + + *out_offset = prev_offset; + } + return; +} int mca_io_ompio_file_seek (ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE off, @@ -385,7 +431,7 @@ int mca_io_ompio_file_seek (ompi_file_t *fh, { int ret = OMPI_SUCCESS; mca_common_ompio_data_t *data; - OMPI_MPI_OFFSET_TYPE offset, temp_offset; + OMPI_MPI_OFFSET_TYPE offset, temp_offset, temp_offset2; data = (mca_common_ompio_data_t *) fh->f_io_selected_data; @@ -400,8 +446,9 @@ int mca_io_ompio_file_seek (ompi_file_t *fh, } break; case MPI_SEEK_CUR: - offset += data->ompio_fh.f_position_in_file_view; - offset += data->ompio_fh.f_disp; + ret = mca_common_ompio_file_get_position (&data->ompio_fh, + &temp_offset); + offset += temp_offset * data->ompio_fh.f_etype_size; if (offset < 0) { OPAL_THREAD_UNLOCK(&fh->f_lock); return OMPI_ERROR; @@ -409,7 +456,9 @@ int mca_io_ompio_file_seek (ompi_file_t *fh, break; case MPI_SEEK_END: ret = data->ompio_fh.f_fs->fs_file_get_size (&data->ompio_fh, - &temp_offset); + &temp_offset2); + mca_io_ompio_file_get_eof_offset (&data->ompio_fh, + temp_offset2, &temp_offset); offset += temp_offset; if (offset < 0 || OMPI_SUCCESS != ret) { OPAL_THREAD_UNLOCK(&fh->f_lock); @@ -428,6 +477,7 @@ int mca_io_ompio_file_seek (ompi_file_t *fh, return ret; } + int mca_io_ompio_file_get_position (ompi_file_t *fd, OMPI_MPI_OFFSET_TYPE *offset) { diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index ba18db8fe14..72671c3410a 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2018 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. @@ -66,13 +66,17 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, mca_common_ompio_data_t *data; ompio_file_t *fh; + if ( (strcmp(datarep, "native") && strcmp(datarep, "NATIVE"))) { + return MPI_ERR_UNSUPPORTED_DATAREP; + } + data = (mca_common_ompio_data_t *) fp->f_io_selected_data; /* we need to call the internal file set view twice: once for the individual file pointer, once for the shared file pointer (if it is existent) */ fh = &data->ompio_fh; - + OPAL_THREAD_LOCK(&fp->f_lock); ret = mca_common_ompio_set_view(fh, disp, etype, filetype, datarep, info); OPAL_THREAD_UNLOCK(&fp->f_lock); diff --git a/ompi/mca/io/romio321/romio/Makefile.am b/ompi/mca/io/romio321/romio/Makefile.am index 69d70142df8..71d82a9fa73 100644 --- a/ompi/mca/io/romio321/romio/Makefile.am +++ b/ompi/mca/io/romio321/romio/Makefile.am @@ -68,6 +68,9 @@ AM_CPPFLAGS += -I$(top_builddir)/include -I$(top_srcdir)/include noinst_HEADERS += include/mpio.h noinst_HEADERS += include/io_romio_conv.h +# Included for Open MPI's --enable-grequest-extensions feature. +noinst_HEADERS += include/ompi_grequestx.h + # ------------------------------------------------------------------------ SUBDIRS = diff --git a/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_aggrs.c b/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_aggrs.c index 3eb3d84969a..f6df24748f0 100644 --- a/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_aggrs.c +++ b/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_aggrs.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------- */ -/* (C)Copyright IBM Corp. 2007, 2008 */ +/* (C)Copyright IBM Corp. 2007, 2008, 2019 */ /* ---------------------------------------------------------------- */ /** * \file ad_gpfs_aggrs.c @@ -663,16 +663,6 @@ void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs, /* Parameters for MPI_Alltoallv */ int *scounts, *sdispls, *rcounts, *rdispls; - /* Parameters for MPI_Alltoallv. These are the buffers, which - * are later computed to be the lowest address of all buffers - * to be sent/received for offsets and lengths. Initialize to - * the highest possible address which is the current minimum. - */ - void *sendBufForOffsets=(void*)0xFFFFFFFFFFFFFFFF, - *sendBufForLens =(void*)0xFFFFFFFFFFFFFFFF, - *recvBufForOffsets=(void*)0xFFFFFFFFFFFFFFFF, - *recvBufForLens =(void*)0xFFFFFFFFFFFFFFFF; - /* first find out how much to send/recv and from/to whom */ #ifdef AGGREGATION_PROFILE MPE_Log_event (5026, 0, NULL); @@ -719,11 +709,6 @@ void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs, others_req[i].lens = ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset)); - if ( (MPIU_Upint)others_req[i].offsets < (MPIU_Upint)recvBufForOffsets ) - recvBufForOffsets = others_req[i].offsets; - if ( (MPIU_Upint)others_req[i].lens < (MPIU_Upint)recvBufForLens ) - recvBufForLens = others_req[i].lens; - others_req[i].mem_ptrs = (MPI_Aint *) ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint)); @@ -736,9 +721,6 @@ void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs, others_req[i].lens = NULL; } } - /* If no recv buffer was allocated in the loop above, make it NULL */ - if ( recvBufForOffsets == (void*)0xFFFFFFFFFFFFFFFF) recvBufForOffsets = NULL; - if ( recvBufForLens == (void*)0xFFFFFFFFFFFFFFFF) recvBufForLens = NULL; /* Now send the calculated offsets and lengths to respective processes */ @@ -746,56 +728,53 @@ void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs, /* Exchange the offsets */ /************************/ - /* Determine the lowest sendBufForOffsets/Lens */ - for (i=0; icomm); + for (i=0; icomm); + for (i=0; i 0) { count+= ret; events_read = ret; for (i = 0; i < events_read; i++) { - if (NULL != ompi_mtl_ofi.progress_entries[i].op_context) { - ofi_req = TO_OFI_REQ(ompi_mtl_ofi.progress_entries[i].op_context); + if (NULL != wc[i].op_context) { + ofi_req = TO_OFI_REQ(wc[i].op_context); assert(ofi_req); - ret = ofi_req->event_callback(&ompi_mtl_ofi.progress_entries[i], ofi_req); + ret = ofi_req->event_callback(&wc[i], ofi_req); if (OMPI_SUCCESS != ret) { opal_output(0, "%s:%d: Error returned by request event callback: %zd.\n" "*** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n", @@ -134,6 +129,24 @@ ompi_mtl_ofi_progress(void) return count; } +/** + * When attempting to execute an OFI operation we need to handle + * resource overrun cases. When a call to an OFI OP fails with -FI_EAGAIN + * the OFI mtl will attempt to progress any pending Completion Queue + * events that may prevent additional operations to be enqueued. + * If the call to ofi progress is successful, then the function call + * will be retried. + */ +#define MTL_OFI_RETRY_UNTIL_DONE(FUNC, RETURN) \ + do { \ + do { \ + RETURN = FUNC; \ + if (OPAL_LIKELY(0 == RETURN)) {break;} \ + if (OPAL_LIKELY(RETURN == -FI_EAGAIN)) { \ + ompi_mtl_ofi_progress(); \ + } \ + } while (OPAL_LIKELY(-FI_EAGAIN == RETURN)); \ + } while (0); /* MTL interface functions */ int ompi_mtl_ofi_finalize(struct mca_mtl_base_module_t *mtl); @@ -227,34 +240,91 @@ ompi_mtl_ofi_isend_callback(struct fi_cq_tagged_entry *wc, } __opal_attribute_always_inline__ static inline int -ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, - struct ompi_communicator_t *comm, - int dest, - int tag, - struct opal_convertor_t *convertor, - mca_pml_base_send_mode_t mode, - ompi_mtl_ofi_request_t *ofi_req) +ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req, + struct ompi_communicator_t *comm, + fi_addr_t *src_addr, + ompi_mtl_ofi_request_t *ofi_req, + mca_mtl_ofi_endpoint_t *endpoint, + uint64_t *match_bits, + int tag) { + ssize_t ret = OMPI_SUCCESS; + ack_req = malloc(sizeof(ompi_mtl_ofi_request_t)); + + assert(ack_req); + + ack_req->parent = ofi_req; + ack_req->event_callback = ompi_mtl_ofi_send_ack_callback; + ack_req->error_callback = ompi_mtl_ofi_send_ack_error_callback; + + ofi_req->completion_count += 1; + + MTL_OFI_RETRY_UNTIL_DONE(fi_trecv(ompi_mtl_ofi.ep, + NULL, + 0, + NULL, + *src_addr, + *match_bits | ompi_mtl_ofi.sync_send_ack, + 0, /* Exact match, no ignore bits */ + (void *) &ack_req->ctx), ret); + if (OPAL_UNLIKELY(0 > ret)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: fi_trecv failed: %s(%zd)", + __FILE__, __LINE__, fi_strerror(-ret), ret); + free(ack_req); + return ompi_mtl_ofi_get_error(ret); + } + + /* The SYNC_SEND tag bit is set for the send operation only.*/ + MTL_OFI_SET_SYNC_SEND(*match_bits); + return OMPI_SUCCESS; +} + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_send(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm, + int dest, + int tag, + struct opal_convertor_t *convertor, + mca_pml_base_send_mode_t mode) +{ + ssize_t ret = OMPI_SUCCESS; + ompi_mtl_ofi_request_t ofi_req; int ompi_ret; void *start; - size_t length; - ssize_t ret; bool free_after; + size_t length; uint64_t match_bits; ompi_proc_t *ompi_proc = NULL; mca_mtl_ofi_endpoint_t *endpoint = NULL; ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */ fi_addr_t src_addr = 0; + /** + * Create a send request, start it and wait until it completes. + */ + ofi_req.event_callback = ompi_mtl_ofi_send_callback; + ofi_req.error_callback = ompi_mtl_ofi_send_error_callback; + ompi_proc = ompi_comm_peer_lookup(comm, dest); endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); ompi_ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); - if (OMPI_SUCCESS != ompi_ret) return ompi_ret; + if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) { + return ompi_ret; + } - ofi_req->buffer = (free_after) ? start : NULL; - ofi_req->length = length; - ofi_req->status.MPI_ERROR = OMPI_SUCCESS; + ofi_req.buffer = (free_after) ? start : NULL; + ofi_req.length = length; + ofi_req.status.MPI_ERROR = OMPI_SUCCESS; + ofi_req.completion_count = 0; + + if (OPAL_UNLIKELY(length > endpoint->mtl_ofi_module->max_msg_size)) { + opal_show_help("help-mtl-ofi.txt", + "message too big", false, + length, endpoint->mtl_ofi_module->max_msg_size); + return OMPI_ERROR; + } if (ompi_mtl_ofi.fi_cq_data) { match_bits = mtl_ofi_create_send_tag_CQD(comm->c_contextid, tag); @@ -266,33 +336,11 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, } if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) { - ack_req = malloc(sizeof(ompi_mtl_ofi_request_t)); - assert(ack_req); - ack_req->parent = ofi_req; - ack_req->event_callback = ompi_mtl_ofi_send_ack_callback; - ack_req->error_callback = ompi_mtl_ofi_send_ack_error_callback; - - ofi_req->completion_count = 2; - - MTL_OFI_RETRY_UNTIL_DONE(fi_trecv(ompi_mtl_ofi.ep, - NULL, - 0, - NULL, - src_addr, - match_bits | ompi_mtl_ofi.sync_send_ack, - 0, /* Exact match, no ignore bits */ - (void *) &ack_req->ctx)); - if (OPAL_UNLIKELY(0 > ret)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: fi_trecv failed: %s(%zd)", - __FILE__, __LINE__, fi_strerror(-ret), ret); - free(ack_req); - return ompi_mtl_ofi_get_error(ret); - } - /* The SYNC_SEND tag bit is set for the send operation only.*/ - MTL_OFI_SET_SYNC_SEND(match_bits); - } else { - ofi_req->completion_count = 1; + ofi_req.status.MPI_ERROR = ompi_mtl_ofi_ssend_recv(ack_req, comm, &src_addr, + &ofi_req, endpoint, + &match_bits, tag); + if (OPAL_UNLIKELY(ofi_req.status.MPI_ERROR != OMPI_SUCCESS)) + goto free_request_buffer; } if (ompi_mtl_ofi.max_inject_size >= length) { @@ -302,15 +350,14 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, length, comm->c_my_rank, endpoint->peer_fiaddr, - match_bits)); + match_bits), ret); } else { MTL_OFI_RETRY_UNTIL_DONE(fi_tinject(ompi_mtl_ofi.ep, start, length, endpoint->peer_fiaddr, - match_bits)); + match_bits), ret); } - if (OPAL_UNLIKELY(0 > ret)) { char *fi_api = ompi_mtl_ofi.fi_cq_data ? "fi_tinjectddata" : "fi_tinject"; opal_output_verbose(1, ompi_mtl_base_framework.framework_output, @@ -321,11 +368,12 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, fi_cancel((fid_t)ompi_mtl_ofi.ep, &ack_req->ctx); free(ack_req); } - return ompi_mtl_ofi_get_error(ret); - } - ofi_req->event_callback(NULL,ofi_req); + ofi_req.status.MPI_ERROR = ompi_mtl_ofi_get_error(ret); + goto free_request_buffer; + } } else { + ofi_req.completion_count += 1; if (ompi_mtl_ofi.fi_cq_data) { MTL_OFI_RETRY_UNTIL_DONE(fi_tsenddata(ompi_mtl_ofi.ep, start, @@ -334,7 +382,7 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, comm->c_my_rank, endpoint->peer_fiaddr, match_bits, - (void *) &ofi_req->ctx)); + (void *) &ofi_req.ctx), ret); } else { MTL_OFI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep, start, @@ -342,44 +390,18 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, NULL, endpoint->peer_fiaddr, match_bits, - (void *) &ofi_req->ctx)); + (void *) &ofi_req.ctx), ret); } if (OPAL_UNLIKELY(0 > ret)) { char *fi_api = ompi_mtl_ofi.fi_cq_data ? "fi_tsendddata" : "fi_send"; opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: %s failed: %s(%zd)", __FILE__, __LINE__,fi_api, fi_strerror(-ret), ret); - return ompi_mtl_ofi_get_error(ret); - } - } - - return OMPI_SUCCESS; -} - -__opal_attribute_always_inline__ static inline int -ompi_mtl_ofi_send(struct mca_mtl_base_module_t *mtl, - struct ompi_communicator_t *comm, - int dest, - int tag, - struct opal_convertor_t *convertor, - mca_pml_base_send_mode_t mode) -{ - int ret = OMPI_SUCCESS; - ompi_mtl_ofi_request_t ofi_req; + free(fi_api); - /** - * Create a send request, start it and wait until it completes. - */ - ofi_req.event_callback = ompi_mtl_ofi_send_callback; - ofi_req.error_callback = ompi_mtl_ofi_send_error_callback; - - ret = ompi_mtl_ofi_send_start(mtl, comm, dest, tag, - convertor, mode, &ofi_req); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - if (NULL != ofi_req.buffer) { - free(ofi_req.buffer); + ofi_req.status.MPI_ERROR = ompi_mtl_ofi_get_error(ret); + goto free_request_buffer; } - return ret; } /** @@ -390,6 +412,7 @@ ompi_mtl_ofi_send(struct mca_mtl_base_module_t *mtl, ompi_mtl_ofi_progress(); } +free_request_buffer: if (OPAL_UNLIKELY(NULL != ofi_req.buffer)) { free(ofi_req.buffer); } @@ -407,20 +430,96 @@ ompi_mtl_ofi_isend(struct mca_mtl_base_module_t *mtl, bool blocking, mca_mtl_request_t *mtl_request) { - int ret = OMPI_SUCCESS; - ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t*) mtl_request; + ssize_t ret = OMPI_SUCCESS; + ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t *) mtl_request; + int ompi_ret; + void *start; + size_t length; + bool free_after; + uint64_t match_bits; + ompi_proc_t *ompi_proc = NULL; + mca_mtl_ofi_endpoint_t *endpoint = NULL; + ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */ + fi_addr_t src_addr = 0; ofi_req->event_callback = ompi_mtl_ofi_isend_callback; ofi_req->error_callback = ompi_mtl_ofi_send_error_callback; - ret = ompi_mtl_ofi_send_start(mtl, comm, dest, tag, - convertor, mode, ofi_req); + ompi_proc = ompi_comm_peer_lookup(comm, dest); + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); + + ompi_ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) return ompi_ret; + + ofi_req->buffer = (free_after) ? start : NULL; + ofi_req->length = length; + ofi_req->status.MPI_ERROR = OMPI_SUCCESS; + ofi_req->completion_count = 1; + + if (OPAL_UNLIKELY(length > endpoint->mtl_ofi_module->max_msg_size)) { + opal_show_help("help-mtl-ofi.txt", + "message too big", false, + length, endpoint->mtl_ofi_module->max_msg_size); + return OMPI_ERROR; + } + + if (ompi_mtl_ofi.fi_cq_data) { + match_bits = mtl_ofi_create_send_tag_CQD(comm->c_contextid, tag); + src_addr = endpoint->peer_fiaddr; + } else { + match_bits = mtl_ofi_create_send_tag(comm->c_contextid, + comm->c_my_rank, tag); + /* src_addr is ignored when FI_DIRECTED_RECV is not supported */ + } + + if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) { + ofi_req->status.MPI_ERROR = ompi_mtl_ofi_ssend_recv(ack_req, comm, &src_addr, + ofi_req, endpoint, + &match_bits, tag); + if (OPAL_UNLIKELY(ofi_req->status.MPI_ERROR != OMPI_SUCCESS)) + goto free_request_buffer; + } - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret && NULL != ofi_req->buffer)) { + if (ompi_mtl_ofi.fi_cq_data) { + MTL_OFI_RETRY_UNTIL_DONE(fi_tsenddata(ompi_mtl_ofi.ep, + start, + length, + NULL, + comm->c_my_rank, + endpoint->peer_fiaddr, + match_bits, + (void *) &ofi_req->ctx), ret); + } else { + MTL_OFI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep, + start, + length, + NULL, + endpoint->peer_fiaddr, + match_bits, + (void *) &ofi_req->ctx), ret); + } + if (OPAL_UNLIKELY(0 > ret)) { + char *fi_api; + if (ompi_mtl_ofi.fi_cq_data) { + asprintf( &fi_api, "fi_tsendddata") ; + } + else { + asprintf( &fi_api, "fi_send") ; + } + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: %s failed: %s(%zd)", + __FILE__, __LINE__,fi_api, fi_strerror(-ret), ret); + free(fi_api); + ofi_req->status.MPI_ERROR = ompi_mtl_ofi_get_error(ret); + } + +free_request_buffer: + if (OPAL_UNLIKELY(OMPI_SUCCESS != ofi_req->status.MPI_ERROR + && NULL != ofi_req->buffer)) { free(ofi_req->buffer); } - return ret; + return ofi_req->status.MPI_ERROR; } /** @@ -517,7 +616,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc, tagged_msg.data = 0; MTL_OFI_RETRY_UNTIL_DONE(fi_tsendmsg(ompi_mtl_ofi.ep, - &tagged_msg, 0)); + &tagged_msg, 0), ret); if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: fi_tsendmsg failed: %s(%zd)", @@ -621,7 +720,7 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl, remote_addr, match_bits, mask_bits, - (void *)&ofi_req->ctx)); + (void *)&ofi_req->ctx), ret); if (OPAL_UNLIKELY(0 > ret)) { if (NULL != ofi_req->buffer) { free(ofi_req->buffer); @@ -734,7 +833,7 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl, msg.context = (void *)&ofi_req->ctx; msg.data = 0; - MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags)); + MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags), ret); if (OPAL_UNLIKELY(0 > ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: fi_trecvmsg failed: %s(%zd)", @@ -833,7 +932,7 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl, ofi_req.completion_count = 1; ofi_req.match_state = 0; - MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags)); + MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags), ret); if (-FI_ENOMSG == ret) { /** * The search request completed but no matching message was found. @@ -928,7 +1027,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl, ofi_req->match_state = 0; ofi_req->mask_bits = mask_bits; - MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags)); + MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ep, &msg, msgflags), ret); if (-FI_ENOMSG == ret) { /** * The search request completed but no matching message was found. @@ -1003,8 +1102,11 @@ ompi_mtl_ofi_cancel(struct mca_mtl_base_module_t *mtl, */ while (!ofi_req->super.ompi_req->req_status._cancelled) { opal_progress(); + if (ofi_req->req_started) + goto ofi_cancel_not_possible; } } else { +ofi_cancel_not_possible: /** * Could not cancel the request. */ diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index a7c076b61fb..1da8f2b0451 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -124,7 +124,7 @@ ompi_mtl_ofi_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); - prov_include = "psm,psm2,gni"; + prov_include = NULL; mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, "provider_include", "Comma-delimited list of OFI providers that are considered for use (e.g., \"psm,psm2\"; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude.", @@ -133,7 +133,7 @@ ompi_mtl_ofi_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &prov_include); - prov_exclude = NULL; + prov_exclude = "shm,sockets,tcp,udp,rstream"; mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, "provider_exclude", "Comma-delimited list of OFI providers that are not considered for use (default: \"sockets,mxm\"; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include.", @@ -630,9 +630,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, } /** - * Save the maximum inject size. + * Save the maximum sizes. */ ompi_mtl_ofi.max_inject_size = prov->tx_attr->inject_size; + ompi_mtl_ofi.max_msg_size = prov->ep_attr->max_msg_size; /** * Create the objects that will be bound to the endpoint. @@ -663,21 +664,6 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, goto error; } - /** - * Allocate memory for storing the CQ events read in OFI progress. - */ - ompi_mtl_ofi.progress_entries = calloc(ompi_mtl_ofi.ofi_progress_event_count, sizeof(struct fi_cq_tagged_entry)); - if (NULL == ompi_mtl_ofi.progress_entries) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: alloc of CQ event storage failed: %s\n", - __FILE__, __LINE__, strerror(errno)); - goto error; - } - - /** - * The remote fi_addr will be stored in the ofi_endpoint struct. - */ - av_attr.type = (MTL_OFI_AV_TABLE == av_type) ? FI_AV_TABLE: FI_AV_MAP; ret = fi_av_open(ompi_mtl_ofi.domain, &av_attr, &ompi_mtl_ofi.av, NULL); @@ -799,9 +785,6 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, if (ompi_mtl_ofi.fabric) { (void) fi_close((fid_t)ompi_mtl_ofi.fabric); } - if (ompi_mtl_ofi.progress_entries) { - free(ompi_mtl_ofi.progress_entries); - } return NULL; } @@ -834,8 +817,6 @@ ompi_mtl_ofi_finalize(struct mca_mtl_base_module_t *mtl) goto finalize_err; } - free(ompi_mtl_ofi.progress_entries); - return OMPI_SUCCESS; finalize_err: diff --git a/ompi/mca/mtl/ofi/mtl_ofi_types.h b/ompi/mca/mtl/ofi/mtl_ofi_types.h index ee414734e0d..ec82cde5894 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_types.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_types.h @@ -49,12 +49,12 @@ typedef struct mca_mtl_ofi_module_t { /** Maximum inject size */ size_t max_inject_size; + /** Largest message that can be sent in a single send. */ + size_t max_msg_size; + /** Maximum number of CQ events to read in OFI Progress */ int ofi_progress_event_count; - /** CQ event storage */ - struct fi_cq_tagged_entry *progress_entries; - /** Use FI_REMOTE_CQ_DATA*/ bool fi_cq_data; diff --git a/ompi/mca/mtl/psm2/mtl_psm2_component.c b/ompi/mca/mtl/psm2/mtl_psm2_component.c index 0785193b401..30cb0ed5e77 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_component.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_component.c @@ -14,7 +14,7 @@ * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved - * Copyright (c) 2017 Research Organization for Information Science + * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -84,129 +84,6 @@ mca_mtl_psm2_component_t mca_mtl_psm2_component = { } }; -struct ompi_mtl_psm2_shadow_variable { - int variable_type; - void *storage; - mca_base_var_storage_t default_value; - const char *env_name; - mca_base_var_info_lvl_t info_level; - const char *mca_name; - const char *description; - mca_base_var_flag_t flags; -}; - -struct ompi_mtl_psm2_shadow_variable ompi_mtl_psm2_shadow_variables[] = { - {MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_devices, {.stringval = "self,shm,hfi"}, "PSM2_DEVICES", OPAL_INFO_LVL_3, - "devices", - "Comma-delimited list of PSM2 devices. Valid values: self, shm, hfi (default: self,shm,hfi. Reduced to self,shm in single node jobs)",0}, - {MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_memory, {.stringval = "normal"}, "PSM2_MEMORY", OPAL_INFO_LVL_9, - "memory_model", "PSM2 memory usage mode. Valid values: min, normal, large (default: normal)", 0}, - {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_sendreqs_max, {.ulval = 0}, "PSM2_MQ_SENDREQS_MAX", OPAL_INFO_LVL_3, - "mq_sendreqs_max", "PSM2 maximum number of isend requests in flight (default: unset, let libpsm2 use its default)", MCA_BASE_VAR_FLAG_DEF_UNSET}, - {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_recvreqs_max, {.ulval = 0}, "PSM2_MQ_RECVREQS_MAX", OPAL_INFO_LVL_3, - "mq_recvreqs_max", "PSM2 maximum number of irecv requests in flight (default: unset, let libpsm2 use its default)", MCA_BASE_VAR_FLAG_DEF_UNSET}, - {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_hfi_threshold, {.ulval = 0}, "PSM2_MQ_RNDV_HFI_THRESH", OPAL_INFO_LVL_3, - "hfi_eager_limit", "PSM2 eager to rendezvous threshold (default: unset, let libpsm2 use its defaults)", MCA_BASE_VAR_FLAG_DEF_UNSET}, - {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_shm_threshold, {.ulval = 0}, "PSM2_MQ_RNDV_SHM_THRESH", OPAL_INFO_LVL_3, - "shm_eager_limit", "PSM2 shared memory eager to rendezvous threshold (default: unset, let libpsm2 use its default)", MCA_BASE_VAR_FLAG_DEF_UNSET}, - {MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_recvthread, {.boolval = true}, "PSM2_RCVTHREAD", OPAL_INFO_LVL_3, - "use_receive_thread", "Use PSM2 progress thread (default: true)"}, - {MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_shared_contexts, {.boolval = true}, "PSM2_SHAREDCONTEXTS", OPAL_INFO_LVL_6, - "use_shared_contexts", "Share PSM contexts between MPI processes (default: true)"}, - {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_max_contexts_per_job, {.ulval = 0}, "PSM2_MAX_CONTEXTS_PER_JOB", OPAL_INFO_LVL_9, - "max_contexts_per_job", "Maximum number of contexts available on a node (default: unset, let libpsm2 use its default)", MCA_BASE_VAR_FLAG_DEF_UNSET}, - {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_tracemask, {.ulval = 1}, "PSM2_TRACEMASK", OPAL_INFO_LVL_9, - "trace_mask", "PSM2 tracemask value. See PSM2 documentation for accepted values in 0x (default: 1)"}, - {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_opa_sl, {.ulval = 0}, "HFI_SL", OPAL_INFO_LVL_9, - "opa_service_level", "HFI Service Level (default: unset, let libpsm2 use its defaults)", MCA_BASE_VAR_FLAG_DEF_UNSET}, - {-1}, -}; - -static void ompi_mtl_psm2_set_shadow_env (struct ompi_mtl_psm2_shadow_variable *variable) -{ - mca_base_var_storage_t *storage = variable->storage; - char *env_value; - int ret = 0; - int var_index = 0; - const mca_base_var_t *mca_base_var; - - var_index = mca_base_var_find("ompi", "mtl", "psm2", variable->mca_name); - ret = mca_base_var_get (var_index,&mca_base_var); - /* Something is fundamentally broken if registered variables are - * not found */ - if (OPAL_SUCCESS != ret) { - fprintf (stderr, "ERROR setting PSM2 environment variable: %s\n", variable->env_name); - return; - } - - /** Skip setting variables for which the default behavior is "unset" */ - if ((mca_base_var->mbv_flags & MCA_BASE_VAR_FLAG_DEF_UNSET) && - (MCA_BASE_VAR_SOURCE_DEFAULT == mca_base_var->mbv_source)){ - return ; - } - - switch (variable->variable_type) { - case MCA_BASE_VAR_TYPE_BOOL: - ret = asprintf (&env_value, "%s=%d", variable->env_name, storage->boolval ? 1 : 0); - break; - case MCA_BASE_VAR_TYPE_UNSIGNED_LONG: - if (0 == strcmp (variable->env_name, "PSM2_TRACEMASK")) { - /* PSM2 documentation shows the tracemask as a hexidecimal number. to be consitent - * use hexidecimal here. */ - ret = asprintf (&env_value, "%s=0x%lx", variable->env_name, storage->ulval); - } else { - ret = asprintf (&env_value, "%s=%lu", variable->env_name, storage->ulval); - } - break; - case MCA_BASE_VAR_TYPE_STRING: - ret = asprintf (&env_value, "%s=%s", variable->env_name, storage->stringval); - break; - } - - if (0 > ret) { - fprintf (stderr, "ERROR setting PSM2 environment variable: %s\n", variable->env_name); - } else { - putenv (env_value); - } -} - -static void ompi_mtl_psm2_register_shadow_env (struct ompi_mtl_psm2_shadow_variable *variable) -{ - mca_base_var_storage_t *storage = variable->storage; - char *env_value; - - env_value = getenv (variable->env_name); - switch (variable->variable_type) { - case MCA_BASE_VAR_TYPE_BOOL: - if (env_value) { - int tmp; - (void) mca_base_var_enum_bool.value_from_string (&mca_base_var_enum_bool, env_value, &tmp); - storage->boolval = !!tmp; - } else { - storage->boolval = variable->default_value.boolval; - } - break; - case MCA_BASE_VAR_TYPE_UNSIGNED_LONG: - if (env_value) { - storage->ulval = strtol (env_value, NULL, 0); - } else { - storage->ulval = variable->default_value.ulval; - } - break; - case MCA_BASE_VAR_TYPE_STRING: - if (env_value) { - storage->stringval = env_value; - } else { - storage->stringval = variable->default_value.stringval; - } - break; - } - - (void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, variable->mca_name, variable->description, - variable->variable_type, NULL, 0, variable->flags, variable->info_level, MCA_BASE_VAR_SCOPE_READONLY, - variable->storage); -} - static int get_num_total_procs(int *out_ntp) { @@ -247,12 +124,9 @@ ompi_mtl_psm2_component_register(void) * process assume it is ompi_info or this is most likely going to spawn, for * which all PSM2 devices are needed */ setenv("PSM2_DEVICES", "self,shm", 0); - /* ob1 is much faster than psm2 with shared memory */ - param_priority = 10; - } else { - param_priority = 40; } + param_priority = 40; (void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, "priority", "Priority of the PSM2 MTL component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -260,10 +134,6 @@ ompi_mtl_psm2_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); - for (int i = 0 ; ompi_mtl_psm2_shadow_variables[i].variable_type >= 0 ; ++i) { - ompi_mtl_psm2_register_shadow_env (ompi_mtl_psm2_shadow_variables + i); - } - ompi_mtl_psm2_register_pvars(); return OMPI_SUCCESS; @@ -277,7 +147,7 @@ ompi_mtl_psm2_component_open(void) /* Component available only if Omni-Path hardware is present */ res = glob("/dev/hfi1_[0-9]", GLOB_DOOFFS, NULL, &globbuf); - if (globbuf.gl_pathc > 0) { + if (globbuf.gl_pathc > 0 || GLOB_NOMATCH==res) { globfree(&globbuf); } if (0 != res) { @@ -395,10 +265,6 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads, return NULL; } - for (int i = 0 ; ompi_mtl_psm2_shadow_variables[i].variable_type >= 0 ; ++i) { - ompi_mtl_psm2_set_shadow_env (ompi_mtl_psm2_shadow_variables + i); - } - #if OPAL_CUDA_SUPPORT /* * If using CUDA enabled Open MPI, the user likely intends to diff --git a/ompi/mca/mtl/psm2/mtl_psm2_types.h b/ompi/mca/mtl/psm2/mtl_psm2_types.h index 20c404129f4..12dd9f9b064 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_types.h +++ b/ompi/mca/mtl/psm2/mtl_psm2_types.h @@ -50,17 +50,6 @@ struct mca_mtl_psm2_module_t { psm2_mq_t mq; psm2_epid_t epid; psm2_epaddr_t epaddr; - char *psm2_devices; - char *psm2_memory; - unsigned long psm2_mq_sendreqs_max; - unsigned long psm2_mq_recvreqs_max; - unsigned long psm2_mq_rndv_hfi_threshold; - unsigned long psm2_mq_rndv_shm_threshold; - unsigned long psm2_max_contexts_per_job; - unsigned long psm2_tracemask; - bool psm2_recvthread; - bool psm2_shared_contexts; - unsigned long psm2_opa_sl; }; typedef struct mca_mtl_psm2_module_t mca_mtl_psm2_module_t; diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index 277be1a1e90..6aed111ddf0 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -77,9 +77,6 @@ struct ompi_osc_rdma_component_t { /** RDMA component buffer size */ unsigned int buffer_size; - /** aggregation limit */ - unsigned int aggregation_limit; - /** List of requests that need to be freed */ opal_list_t request_gc; @@ -104,9 +101,6 @@ struct ompi_osc_rdma_component_t { /** Priority of the osc/rdma component */ unsigned int priority; - /** aggregation free list */ - opal_free_list_t aggregate; - /** directory where to place backing files */ char *backing_directory; }; @@ -151,6 +145,9 @@ struct ompi_osc_rdma_module_t { bool acc_use_amo; + /** whether the group is located on a single node */ + bool single_node; + /** flavor of this window */ int flavor; @@ -569,16 +566,6 @@ static inline void ompi_osc_rdma_sync_rdma_dec (ompi_osc_rdma_sync_t *rdma_sync) */ static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync) { - if (opal_list_get_size (&sync->aggregations)) { - ompi_osc_rdma_aggregation_t *aggregation, *next; - - OPAL_THREAD_SCOPED_LOCK(&sync->lock, - OPAL_LIST_FOREACH_SAFE(aggregation, next, &sync->aggregations, ompi_osc_rdma_aggregation_t) { - fprintf (stderr, "Flushing aggregation %p, peer %p\n", (void*)aggregation, (void*)aggregation->peer); - ompi_osc_rdma_peer_aggregate_flush (aggregation->peer); - }); - } - #if !defined(BTL_VERSION) || (BTL_VERSION < 310) do { opal_progress (); @@ -611,16 +598,6 @@ static inline bool ompi_osc_rdma_access_epoch_active (ompi_osc_rdma_module_t *mo return (module->all_sync.epoch_active || ompi_osc_rdma_in_passive_epoch (module)); } -static inline void ompi_osc_rdma_aggregation_return (ompi_osc_rdma_aggregation_t *aggregation) -{ - if (aggregation->sync) { - opal_list_remove_item (&aggregation->sync->aggregations, (opal_list_item_t *) aggregation); - } - - opal_free_list_return(&mca_osc_rdma_component.aggregate, (opal_free_list_item_t *) aggregation); -} - - __opal_attribute_always_inline__ static inline bool ompi_osc_rdma_oor (int rc) { diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 31c3fc29bef..7fa896e96c6 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -889,10 +889,19 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); } + /* accumulate in (shared) memory if there is only a single node + * OR if we have an exclusive lock + * OR if other processes won't try to use the network either */ + bool use_shared_mem = module->single_node || + (ompi_osc_rdma_peer_local_base (peer) && + (ompi_osc_rdma_peer_is_exclusive (peer) || + !module->acc_single_intrinsic)); + /* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute * the atomic operation. this should be safe in all cases as either 1) the user has assured us they will - * never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */ - if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) { + * never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock. + * avoid using the NIC if the operation can be done directly in shared memory. */ + if (origin_extent <= 8 && 1 == origin_count && !use_shared_mem) { if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) { if (NULL == result_addr) { ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address, diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index 4e3736d9515..1c166767783 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -434,28 +434,6 @@ static void ompi_osc_rdma_put_complete_flush (struct mca_btl_base_module_t *btl, } } -static void ompi_osc_rdma_aggregate_put_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - void *local_address, mca_btl_base_registration_handle_t *local_handle, - void *context, void *data, int status) -{ - ompi_osc_rdma_aggregation_t *aggregation = (ompi_osc_rdma_aggregation_t *) context; - ompi_osc_rdma_sync_t *sync = aggregation->sync; - ompi_osc_rdma_frag_t *frag = aggregation->frag; - - assert (OPAL_SUCCESS == status); - - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "aggregate put complete %p on sync %p. local address %p. status %d", - (void *) aggregation, (void *) sync, local_address, status); - - ompi_osc_rdma_frag_complete (frag); - ompi_osc_rdma_aggregation_return (aggregation); - - /* make sure the aggregation is returned before marking the operation as complete */ - opal_atomic_wmb (); - - ompi_osc_rdma_sync_rdma_dec (sync); -} - static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle, void *ptr, mca_btl_base_registration_handle_t *local_handle, size_t size, @@ -492,75 +470,11 @@ static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_pee return ret; } -#if 0 -static void ompi_osc_rdma_aggregate_append (ompi_osc_rdma_aggregation_t *aggregation, ompi_osc_rdma_request_t *request, - void *source_buffer, size_t size) -{ - size_t offset = aggregation->buffer_used; - - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "appending %lu bytes of data from %p to aggregate fragment %p with start " - "address 0x%lx", (unsigned long) size, source_buffer, (void *) aggregation, - (unsigned long) aggregation->target_address); - - memcpy (aggregation->buffer + offset, source_buffer, size); - - aggregation->buffer_used += size; - - if (request) { - /* the local buffer is now available */ - ompi_osc_rdma_request_complete (request, 0); - } -} - -static int ompi_osc_rdma_aggregate_alloc (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, - mca_btl_base_registration_handle_t *target_handle, void *source_buffer, size_t size, - ompi_osc_rdma_request_t *request, int type) -{ - ompi_osc_rdma_module_t *module = sync->module; - ompi_osc_rdma_aggregation_t *aggregation; - int ret; - - aggregation = (ompi_osc_rdma_aggregation_t *) opal_free_list_get (&mca_osc_rdma_component.aggregate); - if (OPAL_UNLIKELY(NULL == aggregation)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - ret = ompi_osc_rdma_frag_alloc (module, mca_osc_rdma_component.aggregation_limit, &aggregation->frag, - &aggregation->buffer); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - opal_free_list_return(&mca_osc_rdma_component.aggregate, (opal_free_list_item_t *) aggregation); - return ret; - } - - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocated new aggregate fragment %p for target %d", (void *) aggregation, - peer->rank); - - peer->aggregate = aggregation; - - aggregation->target_address = target_address; - aggregation->target_handle = target_handle; - aggregation->buffer_size = mca_osc_rdma_component.aggregation_limit; - aggregation->sync = sync; - aggregation->peer = peer; - aggregation->type = type; - aggregation->buffer_used = 0; - - ompi_osc_rdma_aggregate_append (aggregation, request, source_buffer, size); - - opal_list_append (&sync->aggregations, (opal_list_item_t *) aggregation); - - return OMPI_SUCCESS; -} -#endif - int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle, void *source_buffer, size_t size, ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; -#if 0 - ompi_osc_rdma_aggregation_t *aggregation = peer->aggregate; -#endif mca_btl_base_registration_handle_t *local_handle = NULL; mca_btl_base_rdma_completion_fn_t cbfunc = NULL; ompi_osc_rdma_frag_t *frag = NULL; @@ -568,34 +482,6 @@ int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t * void *cbcontext; int ret; -#if 0 - if (aggregation) { - if (size <= (aggregation->buffer_size - aggregation->buffer_used) && (target_handle == aggregation->target_handle) && - (target_address == aggregation->target_address + aggregation->buffer_used)) { - assert (OMPI_OSC_RDMA_TYPE_PUT == aggregation->type); - ompi_osc_rdma_aggregate_append (aggregation, request, source_buffer, size); - return OMPI_SUCCESS; - } - - /* can't aggregate this operation. flush the previous segment */ - ret = ompi_osc_rdma_peer_aggregate_flush (peer); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - } - - if (size <= (mca_osc_rdma_component.aggregation_limit >> 2)) { - ret = ompi_osc_rdma_aggregate_alloc (sync, peer, target_address, target_handle, source_buffer, size, request, - OMPI_OSC_RDMA_TYPE_PUT); - if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { - if (request) { - - } - return ret; - } - } -#endif - if (module->selected_btl->btl_register_mem && size > module->selected_btl->btl_put_local_registration_threshold) { ret = ompi_osc_rdma_frag_alloc (module, size, &frag, &ptr); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -680,37 +566,6 @@ static void ompi_osc_rdma_get_complete (struct mca_btl_base_module_t *btl, struc ompi_osc_rdma_request_complete (request, status); } -int ompi_osc_rdma_peer_aggregate_flush (ompi_osc_rdma_peer_t *peer) -{ - ompi_osc_rdma_aggregation_t *aggregation = peer->aggregate; - int ret; - - if (NULL == aggregation) { - return OMPI_SUCCESS; - } - - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flusing aggregate fragment %p", (void *) aggregation); - - assert (OMPI_OSC_RDMA_TYPE_PUT == aggregation->type); - - ret = ompi_osc_rdma_put_real (aggregation->sync, peer, aggregation->target_address, aggregation->target_handle, - aggregation->buffer, aggregation->frag->handle, aggregation->buffer_used, - ompi_osc_rdma_aggregate_put_complete, (void *) aggregation, NULL); - - peer->aggregate = NULL; - - if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) { - return OMPI_SUCCESS; - } - - ompi_osc_rdma_cleanup_rdma (aggregation->sync, false, aggregation->frag, NULL, NULL); - - ompi_osc_rdma_aggregation_return (aggregation); - - return ret; - -} - static int ompi_osc_rdma_get_partial (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t source_address, mca_btl_base_registration_handle_t *source_handle, void *target_buffer, size_t size, ompi_osc_rdma_request_t *request) { diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index bf6c1a84bb3..d1e99b98dd1 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -18,6 +18,8 @@ * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,6 +43,7 @@ #include "opal/threads/mutex.h" #include "opal/util/arch.h" #include "opal/util/argv.h" +#include "opal/util/printf.h" #include "opal/align.h" #if OPAL_CUDA_SUPPORT #include "opal/datatype/opal_datatype_cuda.h" @@ -223,16 +226,6 @@ static int ompi_osc_rdma_component_register (void) MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.max_attach); free(description_str); - mca_osc_rdma_component.aggregation_limit = 1024; - asprintf(&description_str, "Maximum size of an aggregated put/get. Messages are aggregated for consecutive" - "put and get operations. In some cases this may lead to higher latency but " - "should also lead to higher bandwidth utilization. Set to 0 to disable (default: %d)", - mca_osc_rdma_component.aggregation_limit); - (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "aggregation_limit", - description_str, MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.aggregation_limit); - free(description_str); - mca_osc_rdma_component.priority = 101; asprintf(&description_str, "Priority of the osc/rdma component (default: %d)", mca_osc_rdma_component.priority); @@ -336,24 +329,6 @@ static int ompi_osc_rdma_component_init (bool enable_progress_threads, __FILE__, __LINE__, ret); } - OBJ_CONSTRUCT(&mca_osc_rdma_component.aggregate, opal_free_list_t); - - if (!enable_mpi_threads && mca_osc_rdma_component.aggregation_limit) { - ret = opal_free_list_init (&mca_osc_rdma_component.aggregate, - sizeof(ompi_osc_rdma_aggregation_t), 8, - OBJ_CLASS(ompi_osc_rdma_aggregation_t), 0, 0, - 32, 128, 32, NULL, 0, NULL, NULL, NULL); - - if (OPAL_SUCCESS != ret) { - opal_output_verbose(1, ompi_osc_base_framework.framework_output, - "%s:%d: opal_free_list_init failed: %d\n", - __FILE__, __LINE__, ret); - } - } else { - /* only enable put aggregation when not using threads */ - mca_osc_rdma_component.aggregation_limit = 0; - } - return ret; } @@ -373,7 +348,6 @@ int ompi_osc_rdma_component_finalize (void) OBJ_DESTRUCT(&mca_osc_rdma_component.requests); OBJ_DESTRUCT(&mca_osc_rdma_component.request_gc); OBJ_DESTRUCT(&mca_osc_rdma_component.buffer_gc); - OBJ_DESTRUCT(&mca_osc_rdma_component.aggregate); return OMPI_SUCCESS; } @@ -550,6 +524,19 @@ struct _local_data { size_t size; }; +static int synchronize_errorcode(int errorcode, ompi_communicator_t *comm) +{ + int ret; + int err = errorcode; + /* This assumes that error codes are negative integers */ + ret = comm->c_coll->coll_allreduce (MPI_IN_PLACE, &err, 1, MPI_INT, MPI_MIN, + comm, comm->c_coll->coll_allreduce_module); + if (OPAL_UNLIKELY (OMPI_SUCCESS != ret)) { + err = ret; + } + return err; +} + static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, size_t size) { ompi_communicator_t *shared_comm; @@ -569,7 +556,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s local_size = ompi_comm_size (shared_comm); /* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */ - module->use_cpu_atomics = local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB); + module->single_node = local_size == global_size; + module->use_cpu_atomics = module->single_node || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB); if (1 == local_size) { /* no point using a shared segment if there are no other processes on this node */ @@ -613,28 +601,31 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s } } - /* allocate the shared memory segment */ - ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%d", - mca_osc_rdma_component.backing_directory, ompi_process_info.nodename, - OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm)); - if (0 > ret) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - break; - } - if (0 == local_rank) { - /* allocate enough space for the state + data for all local ranks */ - ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size); - free (data_file); - if (OPAL_SUCCESS != ret) { - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment"); - break; + /* allocate the shared memory segment */ + ret = opal_asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%d", + mca_osc_rdma_component.backing_directory, ompi_process_info.nodename, + OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm)); + if (0 > ret) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + } else { + /* allocate enough space for the state + data for all local ranks */ + ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size); + free (data_file); + if (OPAL_SUCCESS != ret) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment"); + } } } - ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0, + ret = synchronize_errorcode(ret, shared_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; + } + + ret = shared_comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0, shared_comm, shared_comm->c_coll->coll_bcast_module); - if (OMPI_SUCCESS != ret) { + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -642,6 +633,10 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s if (NULL == module->segment_base) { OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to attach to the shared memory segment"); ret = OPAL_ERROR; + } + + ret = synchronize_errorcode(ret, shared_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -661,35 +656,28 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s memset (module->state, 0, module->state_size); if (0 == local_rank) { + /* unlink the shared memory backing file */ + opal_shmem_unlink (&module->seg_ds); /* just go ahead and register the whole segment */ ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, module->segment_base, total_size, MCA_BTL_REG_FLAG_ACCESS_ANY, &module->state_handle); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - break; + if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { + state_region->base = (intptr_t) module->segment_base; + if (module->state_handle) { + memcpy (state_region->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size); + } } + } - state_region->base = (intptr_t) module->segment_base; - if (module->state_handle) { - memcpy (state_region->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size); - } + /* synchronization to make sure memory is registered */ + ret = synchronize_errorcode(ret, shared_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; } if (MPI_WIN_FLAVOR_CREATE == module->flavor) { ret = ompi_osc_rdma_initialize_region (module, base, size); - if (OMPI_SUCCESS != ret) { - break; - } - } - - /* barrier to make sure all ranks have attached */ - shared_comm->c_coll->coll_barrier(shared_comm, shared_comm->c_coll->coll_barrier_module); - - /* unlink the shared memory backing file */ - if (0 == local_rank) { - opal_shmem_unlink (&module->seg_ds); - } - - if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + } else if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *) module->state->regions; module->state->disp_unit = module->disp_unit; module->state->region_count = 1; @@ -700,8 +688,11 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s } } - /* barrier to make sure all ranks have set up their region data */ - shared_comm->c_coll->coll_barrier(shared_comm, shared_comm->c_coll->coll_barrier_module); + /* synchronization to make sure all ranks have set up their region data */ + ret = synchronize_errorcode(ret, shared_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; + } offset = data_base; for (int i = 0 ; i < local_size ; ++i) { @@ -748,7 +739,13 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s ompi_osc_module_add_peer (module, peer); - if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor || 0 == temp[i].size) { + if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor) { + if (module->use_cpu_atomics && peer_rank == my_rank) { + peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE; + } + /* nothing more to do */ + continue; + } else if (0 == temp[i].size) { /* nothing more to do */ continue; } @@ -1014,13 +1011,7 @@ static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module) free (temp); } while (0); - - ret = module->comm->c_coll->coll_allreduce (&ret, &global_result, 1, MPI_INT, MPI_MIN, module->comm, - module->comm->c_coll->coll_allreduce_module); - - if (OMPI_SUCCESS != ret) { - global_result = ret; - } + global_result = synchronize_errorcode(ret, module->comm); /* none of these communicators are needed anymore so free them now*/ if (MPI_COMM_NULL != module->local_leaders) { @@ -1255,6 +1246,9 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, /* fill in our part */ ret = allocate_state_shared (module, base, size); + + /* notify all others if something went wrong */ + ret = synchronize_errorcode(ret, module->comm); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to allocate internal state"); ompi_osc_rdma_free (win); @@ -1355,53 +1349,3 @@ static char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, char *ke */ return module->no_locks ? "true" : "false"; } - -#if 0 // stale code? -static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct opal_info_t *info) -{ - ompi_osc_rdma_module_t *module = GET_MODULE(win); - bool temp; - - temp = check_config_value_bool ("no_locks", info); - if (temp && !module->no_locks) { - /* clean up the lock hash. it is up to the user to ensure no lock is - * outstanding from this process when setting the info key */ - OBJ_DESTRUCT(&module->outstanding_locks); - OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); - - module->no_locks = true; - win->w_flags |= OMPI_WIN_NO_LOCKS; - } else if (!temp && module->no_locks) { - int world_size = ompi_comm_size (module->comm); - int init_limit = world_size > 256 ? 256 : world_size; - int ret; - - ret = opal_hash_table_init (&module->outstanding_locks, init_limit); - if (OPAL_SUCCESS != ret) { - return ret; - } - - module->no_locks = false; - win->w_flags &= ~OMPI_WIN_NO_LOCKS; - } - - /* enforce collectiveness... */ - return module->comm->c_coll->coll_barrier(module->comm, - module->comm->c_coll->coll_barrier_module); -} - - -static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct opal_info_t **info_used) -{ - opal_info_t *info = OBJ_NEW(opal_info_t); - - if (NULL == info) { - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - *info_used = info; - - return OMPI_SUCCESS; -} -#endif -OBJ_CLASS_INSTANCE(ompi_osc_rdma_aggregation_t, opal_list_item_t, NULL, NULL); diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index 70f09908798..7af4d703f6f 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -40,7 +40,7 @@ static inline int ompi_osc_rdma_btl_fop (ompi_osc_rdma_module_t *module, struct ompi_osc_rdma_pending_op_cb_fn_t cbfunc, void *cbdata, void *cbcontext) { ompi_osc_rdma_pending_op_t *pending_op; - int ret; + int ret = OPAL_ERROR; pending_op = OBJ_NEW(ompi_osc_rdma_pending_op_t); assert (NULL != pending_op); diff --git a/ompi/mca/osc/rdma/osc_rdma_module.c b/ompi/mca/osc/rdma/osc_rdma_module.c index e7d04fb96fe..40765cb2d23 100644 --- a/ompi/mca/osc/rdma/osc_rdma_module.c +++ b/ompi/mca/osc/rdma/osc_rdma_module.c @@ -111,7 +111,7 @@ int ompi_osc_rdma_free(ompi_win_t *win) OBJ_DESTRUCT(&module->peer_hash); } else if (NULL != module->comm) { - for (int i = 0 ; i < ompi_comm_rank (module->comm) ; ++i) { + for (int i = 0 ; i < ompi_comm_size (module->comm) ; ++i) { if (NULL != module->peer_array[i]) { OBJ_RELEASE(module->peer_array[i]); } diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.h b/ompi/mca/osc/rdma/osc_rdma_peer.h index 0e46ec6dfc4..a0db4c4a7f8 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.h +++ b/ompi/mca/osc/rdma/osc_rdma_peer.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -44,9 +44,6 @@ struct ompi_osc_rdma_peer_t { /** peer flags */ volatile int32_t flags; - - /** aggregation support */ - ompi_osc_rdma_aggregation_t *aggregate; }; typedef struct ompi_osc_rdma_peer_t ompi_osc_rdma_peer_t; @@ -164,13 +161,6 @@ int ompi_osc_rdma_new_peer (struct ompi_osc_rdma_module_t *module, int peer_id, */ struct ompi_osc_rdma_peer_t *ompi_osc_rdma_peer_lookup (struct ompi_osc_rdma_module_t *module, int peer_id); -/** - * @brief flush queued aggregated operation - * - * @param[in] peer osc rdma peer - */ -int ompi_osc_rdma_peer_aggregate_flush (ompi_osc_rdma_peer_t *peer); - /** * @brief lookup the btl endpoint for a peer * diff --git a/ompi/mca/osc/rdma/osc_rdma_sync.c b/ompi/mca/osc/rdma/osc_rdma_sync.c index f07ea4f7839..49aae970add 100644 --- a/ompi/mca/osc/rdma/osc_rdma_sync.c +++ b/ompi/mca/osc/rdma/osc_rdma_sync.c @@ -17,14 +17,12 @@ static void ompi_osc_rdma_sync_constructor (ompi_osc_rdma_sync_t *rdma_sync) rdma_sync->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE; rdma_sync->epoch_active = false; rdma_sync->outstanding_rdma.counter = 0; - OBJ_CONSTRUCT(&rdma_sync->aggregations, opal_list_t); OBJ_CONSTRUCT(&rdma_sync->lock, opal_mutex_t); OBJ_CONSTRUCT(&rdma_sync->demand_locked_peers, opal_list_t); } static void ompi_osc_rdma_sync_destructor (ompi_osc_rdma_sync_t *rdma_sync) { - OBJ_DESTRUCT(&rdma_sync->aggregations); OBJ_DESTRUCT(&rdma_sync->lock); OBJ_DESTRUCT(&rdma_sync->demand_locked_peers); } diff --git a/ompi/mca/osc/rdma/osc_rdma_sync.h b/ompi/mca/osc/rdma/osc_rdma_sync.h index e33b32d4371..202bf792656 100644 --- a/ompi/mca/osc/rdma/osc_rdma_sync.h +++ b/ompi/mca/osc/rdma/osc_rdma_sync.h @@ -97,9 +97,6 @@ struct ompi_osc_rdma_sync_t { /** outstanding rdma operations on epoch */ ompi_osc_rdma_sync_aligned_counter_t outstanding_rdma __opal_attribute_aligned__(64); - /** aggregated operations in this epoch */ - opal_list_t aggregations; - /** lock to protect sync structure members */ opal_mutex_t lock; }; diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index 790b8802cb2..4acb40154de 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -171,40 +171,6 @@ struct ompi_osc_rdma_state_t { }; typedef struct ompi_osc_rdma_state_t ompi_osc_rdma_state_t; -struct ompi_osc_rdma_aggregation_t { - opal_list_item_t super; - - /** associated peer */ - struct ompi_osc_rdma_peer_t *peer; - - /** aggregation buffer frag */ - struct ompi_osc_rdma_frag_t *frag; - - /** synchronization object */ - struct ompi_osc_rdma_sync_t *sync; - - /** aggregation buffer */ - char *buffer; - - /** target for the operation */ - osc_rdma_base_t target_address; - - /** handle for target memory address */ - mca_btl_base_registration_handle_t *target_handle; - - /** buffer size */ - size_t buffer_size; - - /** buffer used */ - size_t buffer_used; - - /** type */ - int type; -}; -typedef struct ompi_osc_rdma_aggregation_t ompi_osc_rdma_aggregation_t; - -OBJ_CLASS_DECLARATION(ompi_osc_rdma_aggregation_t); - typedef void (*ompi_osc_rdma_pending_op_cb_fn_t) (void *, void *, int); struct ompi_osc_rdma_pending_op_t { diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index ab0f73f87c6..e34389be6c7 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -151,7 +151,7 @@ ompi_osc_sm_start(struct ompi_group_t *group, for (int i = 0 ; i < size ; ++i) { int rank_byte = ranks[i] >> OSC_SM_POST_BITS; - osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f); + osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & OSC_SM_POST_MASK); /* wait for rank to post */ while (!(module->posts[my_rank][rank_byte] & rank_bit)) { @@ -221,8 +221,8 @@ ompi_osc_sm_post(struct ompi_group_t *group, ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; int my_rank = ompi_comm_rank (module->comm); - int my_byte = my_rank >> 6; - uint64_t my_bit = ((uint64_t) 1) << (my_rank & 0x3f); + int my_byte = my_rank >> OSC_SM_POST_BITS; + osc_sm_post_type_t my_bit = ((osc_sm_post_type_t) 1) << (my_rank & OSC_SM_POST_MASK); int gsize; OPAL_THREAD_LOCK(&module->lock); diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index de891e71a11..0475e65266b 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -242,7 +242,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit int i, flag; size_t pagesize; size_t state_size; - size_t posts_size, post_size = (comm_size + 63) / 64; + size_t posts_size, post_size = (comm_size + OSC_SM_POST_MASK) / (OSC_SM_POST_MASK + 1); OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, "allocating shared memory region of size %ld\n", (long) size)); diff --git a/ompi/mca/osc/ucx/osc_ucx.h b/ompi/mca/osc/ucx/osc_ucx.h index 095de34c272..44dff95a845 100644 --- a/ompi/mca/osc/ucx/osc_ucx.h +++ b/ompi/mca/osc/ucx/osc_ucx.h @@ -38,6 +38,7 @@ typedef struct ompi_osc_ucx_component { opal_free_list_t requests; /* request free list for the r* communication variants */ bool env_initialized; /* UCX environment is initialized or not */ int num_incomplete_req_ops; + int num_modules; unsigned int priority; } ompi_osc_ucx_component_t; diff --git a/ompi/mca/osc/ucx/osc_ucx_active_target.c b/ompi/mca/osc/ucx/osc_ucx_active_target.c index 102cecabf65..3ee2ba6896d 100644 --- a/ompi/mca/osc/ucx/osc_ucx_active_target.c +++ b/ompi/mca/osc/ucx/osc_ucx_active_target.c @@ -193,7 +193,10 @@ int ompi_osc_ucx_complete(struct ompi_win_t *win) { OSC_UCX_VERBOSE(1, "ucp_atomic_post failed: %d", status); } - opal_common_ucx_ep_flush(ep, mca_osc_ucx_component.ucp_worker); + ret = opal_common_ucx_ep_flush(ep, mca_osc_ucx_component.ucp_worker); + if (OMPI_SUCCESS != ret) { + OSC_UCX_VERBOSE(1, "opal_common_ucx_ep_flush failed: %d", ret); + } } OBJ_RELEASE(module->start_group); @@ -273,6 +276,7 @@ int ompi_osc_ucx_post(struct ompi_group_t *group, int assert, struct ompi_win_t ompi_osc_ucx_handle_incoming_post(module, &(module->state.post_state[j]), NULL, 0); } + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); usleep(100); } while (1); } diff --git a/ompi/mca/osc/ucx/osc_ucx_comm.c b/ompi/mca/osc/ucx/osc_ucx_comm.c index 9211f20e798..adedae5c3ec 100644 --- a/ompi/mca/osc/ucx/osc_ucx_comm.c +++ b/ompi/mca/osc/ucx/osc_ucx_comm.c @@ -17,6 +17,13 @@ #include "osc_ucx.h" #include "osc_ucx_request.h" + +#define CHECK_VALID_RKEY(_module, _target, _count) \ + if (!((_module)->win_info_array[_target]).rkey_init && ((_count) > 0)) { \ + OSC_UCX_VERBOSE(1, "window with non-zero length does not have an rkey"); \ + return OMPI_ERROR; \ + } + typedef struct ucx_iovec { void *addr; size_t len; @@ -274,7 +281,7 @@ static inline int start_atomicity(ompi_osc_ucx_module_t *module, ucp_ep_h ep, in uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_ACC_LOCK_OFFSET; ucs_status_t status; - while (result_value != TARGET_LOCK_UNLOCKED) { + for (;;) { status = opal_common_ucx_atomic_cswap(ep, TARGET_LOCK_UNLOCKED, TARGET_LOCK_EXCLUSIVE, &result_value, sizeof(result_value), remote_addr, rkey, @@ -283,9 +290,13 @@ static inline int start_atomicity(ompi_osc_ucx_module_t *module, ucp_ep_h ep, in OSC_UCX_VERBOSE(1, "ucp_atomic_cswap64 failed: %d", status); return OMPI_ERROR; } + if (result_value == TARGET_LOCK_UNLOCKED) { + return OMPI_SUCCESS; + } + + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); } - return OMPI_SUCCESS; } static inline int end_atomicity(ompi_osc_ucx_module_t *module, ucp_ep_h ep, int target) { @@ -380,6 +391,12 @@ int ompi_osc_ucx_put(const void *origin_addr, int origin_count, struct ompi_data } } + CHECK_VALID_RKEY(module, target, target_count); + + if (!target_count) { + return OMPI_SUCCESS; + } + rkey = (module->win_info_array[target]).rkey; ompi_datatype_get_true_extent(origin_dt, &origin_lb, &origin_extent); @@ -434,6 +451,12 @@ int ompi_osc_ucx_get(void *origin_addr, int origin_count, } } + CHECK_VALID_RKEY(module, target, target_count); + + if (!target_count) { + return OMPI_SUCCESS; + } + rkey = (module->win_info_array[target]).rkey; ompi_datatype_get_true_extent(origin_dt, &origin_lb, &origin_extent); @@ -543,12 +566,13 @@ int ompi_osc_ucx_accumulate(const void *origin_addr, int origin_count, if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { size_t temp_size; + char *curr_temp_addr = (char *)temp_addr; ompi_datatype_type_size(temp_dt, &temp_size); while (origin_ucx_iov_idx < origin_ucx_iov_count) { int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, - temp_addr, curr_count, temp_dt); - temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); + curr_temp_addr, curr_count, temp_dt); + curr_temp_addr += curr_count * temp_size; origin_ucx_iov_idx++; } } else { @@ -654,7 +678,7 @@ int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr, ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target); ucp_rkey_h rkey; - uint64_t value = *(uint64_t *)origin_addr; + uint64_t value = origin_addr ? *(uint64_t *)origin_addr : 0; ucp_atomic_fetch_op_t opcode; size_t dt_bytes; ompi_osc_ucx_internal_request_t *req = NULL; @@ -788,12 +812,13 @@ int ompi_osc_ucx_get_accumulate(const void *origin_addr, int origin_count, if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { size_t temp_size; + char *curr_temp_addr = (char *)temp_addr; ompi_datatype_type_size(temp_dt, &temp_size); while (origin_ucx_iov_idx < origin_ucx_iov_count) { int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, - temp_addr, curr_count, temp_dt); - temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); + curr_temp_addr, curr_count, temp_dt); + curr_temp_addr += curr_count * temp_size; origin_ucx_iov_idx++; } } else { @@ -860,7 +885,9 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count, } } - rkey = (module->win_info_array[target]).rkey; + CHECK_VALID_RKEY(module, target, target_count); + + rkey = (module->state_info_array[target]).rkey; OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); assert(NULL != ucx_req); @@ -919,7 +946,9 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count, } } - rkey = (module->win_info_array[target]).rkey; + CHECK_VALID_RKEY(module, target, target_count); + + rkey = (module->state_info_array[target]).rkey; OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); assert(NULL != ucx_req); diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index dc6c5f2e44c..f4032a67151 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -17,6 +17,24 @@ #include "osc_ucx.h" #include "osc_ucx_request.h" +#define memcpy_off(_dst, _src, _len, _off) \ + memcpy(((char*)(_dst)) + (_off), _src, _len); \ + (_off) += (_len); + +opal_mutex_t mca_osc_service_mutex = OPAL_MUTEX_STATIC_INIT; +static void _osc_ucx_init_lock(void) +{ + if(mca_osc_ucx_component.enable_mpi_threads) { + opal_mutex_lock(&mca_osc_service_mutex); + } +} +static void _osc_ucx_init_unlock(void) +{ + if(mca_osc_ucx_component.enable_mpi_threads) { + opal_mutex_unlock(&mca_osc_service_mutex); + } +} + static int component_open(void); static int component_register(void); static int component_init(bool enable_progress_threads, bool enable_mpi_threads); @@ -26,6 +44,7 @@ static int component_query(struct ompi_win_t *win, void **base, size_t size, int static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); +static void ompi_osc_ucx_unregister_progress(void); ompi_osc_ucx_component_t mca_osc_ucx_component = { { /* ompi_osc_base_component_t */ @@ -45,7 +64,12 @@ ompi_osc_ucx_component_t mca_osc_ucx_component = { .osc_query = component_query, .osc_select = component_select, .osc_finalize = component_finalize, - } + }, + .ucp_context = NULL, + .ucp_worker = NULL, + .env_initialized = false, + .num_incomplete_req_ops = 0, + .num_modules = 0 }; ompi_osc_ucx_module_t ompi_osc_ucx_module_template = { @@ -92,8 +116,15 @@ static int component_open(void) { } static int component_register(void) { + unsigned major = 0; + unsigned minor = 0; + unsigned release_number = 0; char *description_str; - mca_osc_ucx_component.priority = 0; + + ucp_get_version(&major, &minor, &release_number); + + mca_osc_ucx_component.priority = UCX_VERSION(major, minor, release_number) >= UCX_VERSION(1, 5, 0) ? 60 : 0; + asprintf(&description_str, "Priority of the osc/ucx component (default: %d)", mca_osc_ucx_component.priority); (void) mca_base_component_var_register(&mca_osc_ucx_component.super.osc_version, "priority", description_str, @@ -101,28 +132,21 @@ static int component_register(void) { MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_ucx_component.priority); free(description_str); + opal_common_ucx_mca_var_register(&mca_osc_ucx_component.super.osc_version); + return OMPI_SUCCESS; } static int progress_callback(void) { - if (mca_osc_ucx_component.ucp_worker != NULL && - mca_osc_ucx_component.num_incomplete_req_ops > 0) { - ucp_worker_progress(mca_osc_ucx_component.ucp_worker); - } + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); return 0; } static int component_init(bool enable_progress_threads, bool enable_mpi_threads) { - int ret = OMPI_SUCCESS; - - mca_osc_ucx_component.ucp_context = NULL; - mca_osc_ucx_component.ucp_worker = NULL; mca_osc_ucx_component.enable_mpi_threads = enable_mpi_threads; - mca_osc_ucx_component.env_initialized = false; - mca_osc_ucx_component.num_incomplete_req_ops = 0; opal_common_ucx_mca_register(); - return ret; + return OMPI_SUCCESS; } static int component_finalize(void) { @@ -141,7 +165,6 @@ static int component_finalize(void) { assert(mca_osc_ucx_component.num_incomplete_req_ops == 0); if (mca_osc_ucx_component.env_initialized == true) { OBJ_DESTRUCT(&mca_osc_ucx_component.requests); - opal_progress_unregister(progress_callback); ucp_cleanup(mca_osc_ucx_component.ucp_context); mca_osc_ucx_component.env_initialized = false; } @@ -241,6 +264,25 @@ static inline int mem_map(void **base, size_t size, ucp_mem_h *memh_ptr, return ret; } +static void ompi_osc_ucx_unregister_progress() +{ + int ret; + + /* May be called concurrently - protect */ + _osc_ucx_init_lock(); + + mca_osc_ucx_component.num_modules--; + OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules >= 0); + if (0 == mca_osc_ucx_component.num_modules) { + ret = opal_progress_unregister(progress_callback); + if (OMPI_SUCCESS != ret) { + OSC_UCX_VERBOSE(1, "opal_progress_unregister failed: %d", ret); + } + } + + _osc_ucx_init_unlock(); +} + static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model) { @@ -251,7 +293,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in ucs_status_t status; int i, comm_size = ompi_comm_size(comm); int is_eps_ready; - bool progress_registered = false, eps_created = false, env_initialized = false; + bool eps_created = false, env_initialized = false; ucp_address_t *my_addr = NULL; size_t my_addr_len; char *recv_buf = NULL; @@ -263,6 +305,8 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in int disps[comm_size]; int rkey_sizes[comm_size]; uint64_t zero = 0; + size_t info_offset; + uint64_t size_u64; /* the osc/sm component is the exclusive provider for support for * shared memory windows */ @@ -270,6 +314,8 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in return OMPI_ERR_NOT_SUPPORTED; } + _osc_ucx_init_lock(); + if (mca_osc_ucx_component.env_initialized == false) { ucp_config_t *config = NULL; ucp_params_t context_params; @@ -279,7 +325,8 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in status = ucp_config_read("MPI", NULL, &config); if (UCS_OK != status) { OSC_UCX_VERBOSE(1, "ucp_config_read failed: %d", status); - return OMPI_ERROR; + ret = OMPI_ERROR; + goto select_unlock; } OBJ_CONSTRUCT(&mca_osc_ucx_component.requests, opal_free_list_t); @@ -290,7 +337,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in 0, 0, 8, 0, 8, NULL, 0, NULL, NULL, NULL); if (OMPI_SUCCESS != ret) { OSC_UCX_VERBOSE(1, "opal_free_list_init failed: %d", ret); - goto error; + goto select_unlock; } /* initialize UCP context */ @@ -312,7 +359,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in if (UCS_OK != status) { OSC_UCX_VERBOSE(1, "ucp_init failed: %d", status); ret = OMPI_ERROR; - goto error; + goto select_unlock; } assert(mca_osc_ucx_component.ucp_worker == NULL); @@ -324,15 +371,8 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in &(mca_osc_ucx_component.ucp_worker)); if (UCS_OK != status) { OSC_UCX_VERBOSE(1, "ucp_worker_create failed: %d", status); - ret = OMPI_ERROR; - goto error_nomem; - } - - ret = opal_progress_register(progress_callback); - progress_registered = true; - if (OMPI_SUCCESS != ret) { - OSC_UCX_VERBOSE(1, "opal_progress_register failed: %d", ret); - goto error; + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto select_unlock; } /* query UCP worker attributes */ @@ -340,20 +380,44 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr); if (UCS_OK != status) { OSC_UCX_VERBOSE(1, "ucp_worker_query failed: %d", status); - ret = OMPI_ERROR; - goto error_nomem; + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto select_unlock; } if (mca_osc_ucx_component.enable_mpi_threads == true && worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) { OSC_UCX_VERBOSE(1, "ucx does not support multithreading"); - ret = OMPI_ERROR; - goto error_nomem; + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto select_unlock; } mca_osc_ucx_component.env_initialized = true; env_initialized = true; } + + mca_osc_ucx_component.num_modules++; + + OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules > 0); + if (1 == mca_osc_ucx_component.num_modules) { + ret = opal_progress_register(progress_callback); + if (OMPI_SUCCESS != ret) { + OSC_UCX_VERBOSE(1, "opal_progress_register failed: %d", ret); + goto select_unlock; + } + } + +select_unlock: + _osc_ucx_init_unlock(); + switch(ret) { + case OMPI_SUCCESS: + break; + case OMPI_ERROR: + goto error; + case OMPI_ERR_TEMP_OUT_OF_RESOURCE: + goto error_nomem; + default: + goto error; + } /* create module structure */ module = (ompi_osc_ucx_module_t *)calloc(1, sizeof(ompi_osc_ucx_module_t)); @@ -362,6 +426,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in goto error_nomem; } + /* fill in the function pointer part */ memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t)); @@ -511,22 +576,27 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in goto error; } - my_info_len = 2 * sizeof(uint64_t) + rkey_buffer_size + state_rkey_buffer_size; + size_u64 = (uint64_t)size; + my_info_len = 3 * sizeof(uint64_t) + rkey_buffer_size + state_rkey_buffer_size; my_info = malloc(my_info_len); if (my_info == NULL) { ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; goto error; } + info_offset = 0; + if (flavor == MPI_WIN_FLAVOR_ALLOCATE || flavor == MPI_WIN_FLAVOR_CREATE) { - memcpy(my_info, base, sizeof(uint64_t)); + memcpy_off(my_info, base, sizeof(uint64_t), info_offset); } else { - memcpy(my_info, &zero, sizeof(uint64_t)); + memcpy_off(my_info, &zero, sizeof(uint64_t), info_offset); } - memcpy((void *)((char *)my_info + sizeof(uint64_t)), &state_base, sizeof(uint64_t)); - memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t)), rkey_buffer, rkey_buffer_size); - memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t) + rkey_buffer_size), - state_rkey_buffer, state_rkey_buffer_size); + memcpy_off(my_info, &state_base, sizeof(uint64_t), info_offset); + memcpy_off(my_info, &size_u64, sizeof(uint64_t), info_offset); + memcpy_off(my_info, rkey_buffer, rkey_buffer_size, info_offset); + memcpy_off(my_info, state_rkey_buffer, state_rkey_buffer_size, info_offset); + + assert(my_info_len == info_offset); ret = allgather_len_and_info(my_info, (int)my_info_len, &recv_buf, disps, module->comm); if (ret != OMPI_SUCCESS) { @@ -542,25 +612,32 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in for (i = 0; i < comm_size; i++) { ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i); + uint64_t dest_size; assert(ep != NULL); - memcpy(&(module->win_info_array[i]).addr, &recv_buf[disps[i]], sizeof(uint64_t)); - memcpy(&(module->state_info_array[i]).addr, &recv_buf[disps[i] + sizeof(uint64_t)], - sizeof(uint64_t)); + info_offset = disps[i]; + + memcpy(&(module->win_info_array[i]).addr, &recv_buf[info_offset], sizeof(uint64_t)); + info_offset += sizeof(uint64_t); + memcpy(&(module->state_info_array[i]).addr, &recv_buf[info_offset], sizeof(uint64_t)); + info_offset += sizeof(uint64_t); + memcpy(&dest_size, &recv_buf[info_offset], sizeof(uint64_t)); + info_offset += sizeof(uint64_t); (module->win_info_array[i]).rkey_init = false; - if (size > 0 && (flavor == MPI_WIN_FLAVOR_ALLOCATE || flavor == MPI_WIN_FLAVOR_CREATE)) { - status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t)]), + if (dest_size > 0 && (flavor == MPI_WIN_FLAVOR_ALLOCATE || flavor == MPI_WIN_FLAVOR_CREATE)) { + status = ucp_ep_rkey_unpack(ep, &recv_buf[info_offset], &((module->win_info_array[i]).rkey)); if (status != UCS_OK) { OSC_UCX_VERBOSE(1, "ucp_ep_rkey_unpack failed: %d", status); ret = OMPI_ERROR; goto error; } + info_offset += rkey_sizes[i]; (module->win_info_array[i]).rkey_init = true; } - status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t) + rkey_sizes[i]]), + status = ucp_ep_rkey_unpack(ep, &recv_buf[info_offset], &((module->state_info_array[i]).rkey)); if (status != UCS_OK) { OSC_UCX_VERBOSE(1, "ucp_ep_rkey_unpack failed: %d", status); @@ -643,8 +720,10 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in ucp_ep_destroy(ep); } } - if (progress_registered) opal_progress_unregister(progress_callback); - if (module) free(module); + if (module) { + free(module); + ompi_osc_ucx_unregister_progress(); + } error_nomem: if (env_initialized == true) { @@ -787,6 +866,11 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) { ucp_worker_progress(mca_osc_ucx_component.ucp_worker); } + ret = opal_common_ucx_worker_flush(mca_osc_ucx_component.ucp_worker); + if (OMPI_SUCCESS != ret) { + OSC_UCX_VERBOSE(1, "opal_common_ucx_worker_flush failed: %d", ret); + } + ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); @@ -812,6 +896,7 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) { ompi_comm_free(&module->comm); free(module); + ompi_osc_ucx_unregister_progress(); return ret; } diff --git a/ompi/mca/osc/ucx/osc_ucx_passive_target.c b/ompi/mca/osc/ucx/osc_ucx_passive_target.c index 3a7ad3e9e24..89920a29918 100644 --- a/ompi/mca/osc/ucx/osc_ucx_passive_target.c +++ b/ompi/mca/osc/ucx/osc_ucx_passive_target.c @@ -44,6 +44,7 @@ static inline int start_shared(ompi_osc_ucx_module_t *module, int target) { } else { break; } + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); } return OMPI_SUCCESS; @@ -72,7 +73,7 @@ static inline int start_exclusive(ompi_osc_ucx_module_t *module, int target) { uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_LOCK_OFFSET; ucs_status_t status; - while (result_value != TARGET_LOCK_UNLOCKED) { + for (;;) { status = opal_common_ucx_atomic_cswap(ep, TARGET_LOCK_UNLOCKED, TARGET_LOCK_EXCLUSIVE, &result_value, sizeof(result_value), remote_addr, rkey, @@ -80,27 +81,28 @@ static inline int start_exclusive(ompi_osc_ucx_module_t *module, int target) { if (status != UCS_OK) { return OMPI_ERROR; } - } + if (result_value == TARGET_LOCK_UNLOCKED) { + return OMPI_SUCCESS; + } - return OMPI_SUCCESS; + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); + } } static inline int end_exclusive(ompi_osc_ucx_module_t *module, int target) { - uint64_t result_value = 0; ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); ucp_rkey_h rkey = (module->state_info_array)[target].rkey; uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_LOCK_OFFSET; - int ret; + ucs_status_t status; - ret = opal_common_ucx_atomic_fetch(ep, UCP_ATOMIC_FETCH_OP_SWAP, TARGET_LOCK_UNLOCKED, - &result_value, sizeof(result_value), - remote_addr, rkey, mca_osc_ucx_component.ucp_worker); - if (OMPI_SUCCESS != ret) { - return ret; + status = ucp_atomic_post(ep, UCP_ATOMIC_POST_OP_ADD, + -((int64_t)TARGET_LOCK_EXCLUSIVE), sizeof(uint64_t), + remote_addr, rkey); + if (UCS_OK != status) { + OSC_UCX_VERBOSE(1, "ucp_atomic_post(OP_ADD) failed: %d", status); + return OMPI_ERROR; } - assert(result_value >= TARGET_LOCK_EXCLUSIVE); - return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/ucx/osc_ucx_request.c b/ompi/mca/osc/ucx/osc_ucx_request.c index efbd9c38cc6..4be050e3dcc 100644 --- a/ompi/mca/osc/ucx/osc_ucx_request.c +++ b/ompi/mca/osc/ucx/osc_ucx_request.c @@ -55,6 +55,7 @@ void req_completion(void *request, ucs_status_t status) { if(req->external_req != NULL) { ompi_request_complete(&(req->external_req->super), true); + req->external_req = NULL; ucp_request_release(req); mca_osc_ucx_component.num_incomplete_req_ops--; assert(mca_osc_ucx_component.num_incomplete_req_ops >= 0); diff --git a/ompi/mca/pml/bfo/.opal_ignore b/ompi/mca/pml/bfo/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/ompi/mca/pml/bfo/Makefile.am b/ompi/mca/pml/bfo/Makefile.am deleted file mode 100644 index 7565d84c13e..00000000000 --- a/ompi/mca/pml/bfo/Makefile.am +++ /dev/null @@ -1,78 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = -DPML_BFO=1 - -dist_ompidata_DATA = \ - help-mpi-pml-bfo.txt - -EXTRA_DIST = post_configure.sh - -bfo_sources = \ - pml_bfo.c \ - pml_bfo.h \ - pml_bfo_comm.c \ - pml_bfo_comm.h \ - pml_bfo_component.c \ - pml_bfo_component.h \ - pml_bfo_failover.c \ - pml_bfo_failover.h \ - pml_bfo_hdr.h \ - pml_bfo_iprobe.c \ - pml_bfo_irecv.c \ - pml_bfo_isend.c \ - pml_bfo_progress.c \ - pml_bfo_rdma.c \ - pml_bfo_rdma.h \ - pml_bfo_rdmafrag.c \ - pml_bfo_rdmafrag.h \ - pml_bfo_recvfrag.c \ - pml_bfo_recvfrag.h \ - pml_bfo_recvreq.c \ - pml_bfo_recvreq.h \ - pml_bfo_sendreq.c \ - pml_bfo_sendreq.h \ - pml_bfo_start.c - -# If we have CUDA support requested, build the CUDA file also -if OPAL_cuda_support -bfo_sources += \ - pml_bfo_cuda.c -endif - -if MCA_BUILD_ompi_pml_bfo_DSO -component_noinst = -component_install = mca_pml_bfo.la -else -component_noinst = libmca_pml_bfo.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pml_bfo_la_SOURCES = $(bfo_sources) -mca_pml_bfo_la_LDFLAGS = -module -avoid-version -mca_pml_bfo_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pml_bfo_la_SOURCES = $(bfo_sources) -libmca_pml_bfo_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/pml/bfo/README b/ompi/mca/pml/bfo/README deleted file mode 100644 index 88c3b1a70f1..00000000000 --- a/ompi/mca/pml/bfo/README +++ /dev/null @@ -1,340 +0,0 @@ -Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - -BFO DESIGN DOCUMENT -This document describes the use and design of the bfo. In addition, -there is a section at the end explaining why this functionality was -not merged into the ob1 PML. - -1. GENERAL USAGE -First, one has to configure the failover code into the openib BTL so -that bfo will work correctly. To do this: -configure --enable-btl-openib-failover. - -Then, when running one needs to select the bfo PML explicitly. -mpirun --mca pml bfo - -Note that one needs to both configure with --enable-btl-openib-failover -and run with --mca pml bfo to get the failover support. If one of -these two steps is skipped, then the MPI job will just abort in the -case of an error like it normally does with the ob1 PML. - -2. GENERAL FUNCTION -The bfo failover feature requires two or more openib BTLs in use. In -normal operation, it will stripe the communication over the multiple -BTLs. When an error is detected, it will stop using the BTL that -incurred the error and continue the communication over the remaining -BTL. Once a BTL has been mapped out, it cannot be used by the job -again, even if the underlying fabric becomes functional again. Only -new jobs started after the fabric comes back up will use both BTLs. - -The bfo works in conjunction with changes that were made in the openib -BTL. As noted above, those changes need to be configured into the -BTL for everything to work properly. - -The bfo only fails over between openib BTLs. It cannot failover from -an openib BTL to TCP, for example. - -3. GENERAL DESIGN -The bfo (Btl FailOver) PML was designed to work in clusters that have -multiple openib BTLs. It was designed to be lightweight so as to -avoid any adverse effects on latency. To that end, there is no -tracking of fragments or messages in the bfo PML. Rather, it depends -on the underlying BTL to notify it of each fragment that has an error. -The bfo then decides what needs to be done based on the type of -fragment that gets an error. - -No additional sequence numbers were introduced in the bfo. Instead, -it makes use of the sequence numbers that exist in the MATCH, RNDV and -RGET fragment header. In that way, duplicate fragments that have -MATCH information in them can be detected. Other fragments, like PUT -and ACK, are never retransmitted so it does not matter that they do -not have sequence numbers. The FIN header was a special case in that -it was changed to include the MATCH header so that the tag, source, -and context fields could be used to check for duplicate FINs. - -Note that the assumption is that the underlying BTL will always issue -a callback with an error flag when it thinks a fragment has an error. -This means that even after an error is detected on a BTL, the BTL -continues to be checked for any other messages that may also complete -with an error. This is potentially a unique characteristic of the -openib BTL when running over RC connections that allows the BFO to -work properly. - -One scenario that is particularly difficult to handle is the case -where a fragment has an error but the message actually makes it to the -other side. It is because of this that all fragments need to be -checked to make sure they are not a duplicate. This scenario also -complicates some of the rendezvous protocols as the two sides may not -agree where the problem occurred. For example, one can imagine a -sender getting an error on a final FIN message, but the FIN message -actually arrives at the other side. The receiver thinks the -communication is done and moves on. The sender thinks there was a -problem, and that the communication needs to restart. - -It is also important to note that a message cannot signal a successful -completion and *not* make it to the receiver. This would probably cause -the bfo to hang. - -4. ERRORS -Errors are detected in the openib BTL layer and propagated to the PML -layer. Typically, the errors occur while polling the completion -queue, but can happen in other areas as well. When an error occurs, -an additional callback is called so the PML can map out the connection -for future sending. Then the callback associated with the fragment is -called, but with the error field set to OMPI_ERROR. This way, the PML -knows that this fragment may not have made it to the remote side. - -The first callback into the PML is via the mca_pml_bfo_error_handler() -callback and the PML uses this to remove a connection for future -sending. If the error_proc_t field is NULL, then the entire BTL is -removed for any future communication. If the error_proc_t is not -NULL, then the BTL is only removed for the connection associated with -the error_proc_t. - -The second callback is the standard one for a completion event, and -this can trigger various activities in the PML. The regular callback -function is called but the status is set to OMPI_ERROR. The PML layer -detects this and calls some failover specific routines depending on -the type of fragment that got the error. - - -5. RECOVERY OF MATCH FRAGMENTS -Note: For a general description of how the various fragments interact, -see Appendix 1 at the end of this document. - -In the case of a MATCH fragment, the fragment is simply resent. Care -has to be taken with a MATCH fragment that is sent via the standard -interface and one that is sent via the sendi interface. In the -standard send, the send request is still available and is therefore -reset reused to send the MATCH fragment. In the case of the sendi -fragment, the send request is gone, so the fragment is regenerated -from the information contained within the fragment. - -6. RECOVERY OF RNDV or LARGE MESSAGE RDMA -In the case of a large message RDMA transfer or a RNDV transfer where -the message consists of several fragments, the restart is a little -more complicated. This includes fragments like RNDV, PUT, RGET, FRAG, -FIN, and RDMA write and RDMA read completions. In most cases, the -requests associated with these fragments are reset and restarted. - -First, it should be pointed out that a new variable was added to the -send and receive requests. This variable tracks outstanding send -events that have not yet received their completion events. This new -variable is used so that a request is not restarted until all the -outstanding events have completed. If one does not wait for the -outstanding events to complete, then one may restart a request and -then a completion event will happen on the wrong request. - -There is a second variable added to each request and that is one that -shows whether the request is already in an error state. When a request -reaches the state that it has an error flagged on it and the outstanding -completion events are down to zero, it can start the restart dance -as described below. - -7. SPECIAL CASE FOR FIN FRAGMENT -Like the MATCH fragment, the FIN message is also simply resent. Like -the sendi MATCH fragment, there may be no request associated with the -FIN message when it gets an error, so the fragment is recreated from -the information in the fragment. The FIN fragment was modified to -have additional information like what is in a MATCH fragment including -the context, source, and tag. In this way, we can figure out if the -FIN message is a duplicate on the receiving side. - -8. RESTART DANCE -When the bfo determines that there are no outstanding completion events, -a restart dance is initiated. There are four new PML message types that -have been created to participate in the dance. - 1. RNDVRESTARTNOTIFY - 2. RECVERRNOTIFY - 3. RNDVRESTARTACK - 4. RNDVRESTARTNACK - -When the send request is in an error state and the outstanding -completion events is zero, RNDVRESTARTNOTIFY is sent from the sender -to the receiver to let it know that the communication needs to be -restarted. Upon receipt of the RNDVRESTARTNOTIFY, the receiver first -checks to make sure that it is still pointing to a valid receiver -request. If so, it marks the receive request in error. It then -checks to see if there are any outstanding completion events on the -receiver. If there are no outstanding completion events, the receiver -sends the RNDVRESTARTACK. If there are outstanding completion events, -then the RNDVRESTARTACK gets sent later when a completion event occurs -that brings the outstanding event count to zero. - -In the case that the receiver determines that it is no longer looking -at a valid receive request, which means the request is complete, the -receiver responds with a RNDVRESTARTNACK. While rare, this case can -happen for example, when a final FRAG message triggers an error on the -sender, but actually makes it to the receiver. - -The RECVERRNOTIFY fragment is used so the receiver can let the sender -sender know that it had an error. The sender then waits for all of -its completion events, and then sends a RNDVRESTARTNOTIFY. - -All the handling of these new messages is contained in the -pml_bfo_failover files. - -9. BTL SUPPORT -The openib BTL also supplies a lot of support for the bfo PML. First, -fragments can be stored in the BTL during normal operation if -resources become scarce. This means that when an error is detected in -the BTL, it needs to scour its internal queues for fragments that are -destined for the BTL and error them out. The function -error_out_all_pending_frags() takes care of this functionality. And -some of the fragments stored can be coalesced, so care has to be taken -to tease out each message from a coalesced fragment. - -There is also some special code in the BTL to handle some strange -occurrences that were observed in the BTL. First, there are times -where only one half of the connection gets an error. This can result -in a mismatch between what the PML thinks is available to it and can -cause hangs. Therefore, when a BTL detects an error, it sends a -special message down the working BTL connection to tell the remote -side that it needs to be brought down as well. - -Secondly, it has been observed that a message can get stuck in the -eager RDMA connection between two BTLs. In this case, an error is -detected on one side, but the other side never sees the message. -Therefore, a special message is sent to the other side telling it to -move along in the eager RDMA connection. This is all somewhat -confusing. See the code in the btl_openib_failover.c file for the -details. - -10. MERGING -Every effort was made to try and merge the bfo PML into the ob1 PML. -The idea was that any upgrades to the ob1 PML would automatically make -it into the bfo PML and this would enhance maintainability of all the -code. However, it was deemed that this merging would cause more -problems than it would solve. What was attempted and why the -conclusion was made are documented here. - -One can look at the bfo and easily see the differences between it and -ob1. All the bfo specific code is surrounded by #if PML_BFO. In -addition, there are two additional files in the bfo, -pml_bfo_failover.c and pml_bfo_failover.h. - -To merge them, the following was attempted. First, add all the code -in #if regions into the ob1 PML. As of this writing, there are 73 -#ifs that would have to be added into ob1. - -Secondly, remove almost all the pml_bfo files and replace them with -links to the ob1 files. - -Third, create a new header file that did name shifting of all the -functions so that ob1 and bfo could live together. This also included -having to create macros for the names of header files as well. To -help illustrate the name shifting issue, here is what the file might -look like in the bfo directory. - -/* Need macros for the header files as they are different in the - * different PMLs */ -#define PML "bfo" -#define PML_OB1_H "pml_bfo.h" -#define PML_OB1_COMM_H "pml_bfo_comm.h" -#define PML_OB1_COMPONENT_H "pml_bfo_component.h" -#define PML_OB1_HDR_H "pml_bfo_hdr.h" -#define PML_OB1_RDMA_H "pml_bfo_rdma.h" -#define PML_OB1_RDMAFRAG_H "pml_bfo_rdmafrag.h" -#define PML_OB1_RECVFRAG_H "pml_bfo_recvfrag.h" -#define PML_OB1_RECVREQ_H "pml_bfo_recvreq.h" -#define PML_OB1_SENDREQ_H "pml_bfo_sendreq.h" - -/* Name shifting of functions from ob1 to bfo (incomplete list) */ -#define mca_pml_ob1 mca_pml_bfo -#define mca_pml_ob1_t mca_pml_bfo_t -#define mca_pml_ob1_component mca_pml_bfo_component -#define mca_pml_ob1_add_procs mca_pml_bfo_add_procs -#define mca_pml_ob1_del_procs mca_pml_bfo_del_procs -#define mca_pml_ob1_enable mca_pml_bfo_enable -#define mca_pml_ob1_progress mca_pml_bfo_progress -#define mca_pml_ob1_add_comm mca_pml_bfo_add_comm -#define mca_pml_ob1_del_comm mca_pml_bfo_del_comm -#define mca_pml_ob1_irecv_init mca_pml_bfo_irecv_init -#define mca_pml_ob1_irecv mca_pml_bfo_irecv -#define mca_pml_ob1_recv mca_pml_bfo_recv -#define mca_pml_ob1_isend_init mca_pml_bfo_isend_init -#define mca_pml_ob1_isend mca_pml_bfo_isend -#define mca_pml_ob1_send mca_pml_bfo_send -#define mca_pml_ob1_iprobe mca_pml_bfo_iprobe -[...and much more ...] - -The pml_bfo_hdr.h file was not a link because the changes in it were -so extensive. Also the Makefile was kept separate so it could include -the additional failover files as well as add a compile directive that -would force the files to be compiled as bfo instead of ob1. - -After these changes were made, several independent developers reviewed -the results and concluded that making these changes would have too -much of a negative impact on ob1 maintenance. First, the code became -much harder to read with all the additional #ifdefs. Secondly, the -possibility of adding other features, like csum, to ob1 would only -make this issue even worse. Therefore, it was decided to keep the bfo -PML separate from ob1. - -11. UTILITIES -In an ideal world, any bug fixes that are made in the ob1 PML would -also be made in the csum and the bfo PMLs. However, that does not -always happen. Therefore, there are two new utilities added to the -contrib directory. - -check-ob1-revision.pl -check-ob1-pml-diffs.pl - -The first one can be run to see if ob1 has changed from its last known -state. Here is an example. - - machine =>check-ob1-revision.pl -Running svn diff -r24138 ../ompi/mca/pml/ob1 -No new changes detected in ob1. Everything is fine. - -If there are differences, then one needs to review them and potentially -add them to the bfo (and csum also if one feels like it). -After that, bump up the value in the script to the latest value. - -The second script allows one to see the differences between the ob1 -and bfo PML. Here is an example. - - machine =>check-ob1-pml-diffs.pl - -Starting script to check differences between bfo and ob1... -Files Compared: pml_ob1.c and pml_bfo.c -No differences encountered -Files Compared: pml_ob1.h and pml_bfo.h -[...snip...] -Files Compared: pml_ob1_start.c and pml_bfo_start.c -No differences encountered - -There is a lot more in the script that tells how it is used. - - -Appendix 1: SIMPLE OVERVIEW OF COMMUNICATION PROTOCOLS -The drawings below attempt to describe some of the general flow of -fragments in the various protocols that are supported in the PMLs. -The "read" and "write" are actual RDMA actions and do not pertain to -fragments that are sent. As can be inferred, they use FIN messages to -indicate their completion. - - -MATCH PROTOCOL -sender >->->-> MATCH >->->-> receiver - -SEND WITH MULTIPLE FRAGMENTS -sender >->->-> RNDV >->->-> receiver - <-<-<-< ACK <-<-<-< - >->->-> FRAG >->->-> - >->->-> FRAG >->->-> - >->->-> FRAG >->->-> - -RDMA PUT -sender >->->-> RNDV >->->-> receiver - <-<-<-< PUT <-<-<-< - <-<-<-< PUT <-<-<-< - >->->-> write >->->-> - >->->-> FIN >->->-> - >->->-> write >->->-> - >->->-> FIN >->->-> - -RMA GET -sender >->->-> RGET >->->-> receiver - <-<-<-< read <-<-<-< - <-<-<-< FIN <-<-<-< diff --git a/ompi/mca/pml/bfo/configure.m4 b/ompi/mca/pml/bfo/configure.m4 deleted file mode 100644 index 4001c94d650..00000000000 --- a/ompi/mca/pml/bfo/configure.m4 +++ /dev/null @@ -1,27 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2013 Sandia National Laboratories. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_ompi_pml_bfo_POST_CONFIG(will_build) -# ---------------------------------------- -# The BFO PML requires a BML endpoint tag to compile, so require it. -# Require in POST_CONFIG instead of CONFIG so that we only require it -# if we're not disabled. -AC_DEFUN([MCA_ompi_pml_bfo_POST_CONFIG], [ - AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])]) -])dnl - -# MCA_ompi_pml_bfo_CONFIG(action-if-can-compile, -# [action-if-cant-compile]) -# ------------------------------------------------ -# We can always build, unless we were explicitly disabled. -AC_DEFUN([MCA_ompi_pml_bfo_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/pml/bfo/Makefile]) - [$1] -])dnl diff --git a/ompi/mca/pml/bfo/help-mpi-pml-bfo.txt b/ompi/mca/pml/bfo/help-mpi-pml-bfo.txt deleted file mode 100644 index 4bbff8ff6d7..00000000000 --- a/ompi/mca/pml/bfo/help-mpi-pml-bfo.txt +++ /dev/null @@ -1,20 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -[eager_limit_too_small] -The "eager limit" MCA parameter in the %s BTL was set to a value which -is too low for Open MPI to function properly. Please re-run your job -with a higher eager limit value for this BTL; the exact MCA parameter -name and its corresponding minimum value is shown below. - - Local host: %s - BTL name: %s - BTL eager limit value: %d (set via btl_%s_eager_limit) - BTL eager limit minimum: %d - MCA parameter name: btl_%s_eager_limit diff --git a/ompi/mca/pml/bfo/owner.txt b/ompi/mca/pml/bfo/owner.txt deleted file mode 100644 index f1dfe8edb40..00000000000 --- a/ompi/mca/pml/bfo/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: NVIDIA -status: unmaintained diff --git a/ompi/mca/pml/bfo/pml_bfo.c b/ompi/mca/pml/bfo/pml_bfo.c deleted file mode 100644 index e3a1beb447a..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo.c +++ /dev/null @@ -1,873 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2006-2008 University of Houston. All rights reserved. - * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include - -#include "opal/class/opal_bitmap.h" -#include "opal/util/output.h" -#include "opal/util/show_help.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "opal/mca/pmix/pmix.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/runtime/ompi_cr.h" - -#include "pml_bfo.h" -#include "pml_bfo_component.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_recvfrag.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_rdmafrag.h" -#if PML_BFO -#include "pml_bfo_failover.h" -#endif /* PML_BFO */ - -mca_pml_bfo_t mca_pml_bfo = { - { - mca_pml_bfo_add_procs, - mca_pml_bfo_del_procs, - mca_pml_bfo_enable, - mca_pml_bfo_progress, - mca_pml_bfo_add_comm, - mca_pml_bfo_del_comm, - mca_pml_bfo_irecv_init, - mca_pml_bfo_irecv, - mca_pml_bfo_recv, - mca_pml_bfo_isend_init, - mca_pml_bfo_isend, - mca_pml_bfo_send, - mca_pml_bfo_iprobe, - mca_pml_bfo_probe, - mca_pml_bfo_start, - mca_pml_bfo_improbe, - mca_pml_bfo_mprobe, - mca_pml_bfo_imrecv, - mca_pml_bfo_mrecv, - mca_pml_bfo_dump, - mca_pml_bfo_ft_event, - 65535, - INT_MAX - } -}; - - -void mca_pml_bfo_error_handler( struct mca_btl_base_module_t* btl, - int32_t flags, ompi_proc_t* errproc, - char* btlinfo ); - -int mca_pml_bfo_enable(bool enable) -{ - if( false == enable ) { - return OMPI_SUCCESS; - } - - OBJ_CONSTRUCT(&mca_pml_bfo.lock, opal_mutex_t); - - /* fragments */ - OBJ_CONSTRUCT(&mca_pml_bfo.rdma_frags, opal_free_list_t); - opal_free_list_init( &mca_pml_bfo.rdma_frags, - sizeof(mca_pml_bfo_rdma_frag_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_rdma_frag_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL, 0, NULL, NULL, NULL ); - - OBJ_CONSTRUCT(&mca_pml_bfo.recv_frags, opal_free_list_t); - opal_free_list_init( &mca_pml_bfo.recv_frags, - sizeof(mca_pml_bfo_recv_frag_t) + mca_pml_bfo.unexpected_limit, - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_recv_frag_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL, 0, NULL, NULL, NULL ); - - OBJ_CONSTRUCT(&mca_pml_bfo.pending_pckts, opal_free_list_t); - opal_free_list_init( &mca_pml_bfo.pending_pckts, - sizeof(mca_pml_bfo_pckt_pending_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_pckt_pending_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL, 0, NULL, NULL, NULL ); - - OBJ_CONSTRUCT(&mca_pml_bfo.buffers, opal_free_list_t); - OBJ_CONSTRUCT(&mca_pml_bfo.send_ranges, opal_free_list_t); - opal_free_list_init( &mca_pml_bfo.send_ranges, - sizeof(mca_pml_bfo_send_range_t) + - (mca_pml_bfo.max_send_per_range - 1) * sizeof(mca_pml_bfo_com_btl_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_send_range_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL, 0, NULL, NULL, NULL ); - - /* pending operations */ - OBJ_CONSTRUCT(&mca_pml_bfo.send_pending, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_bfo.recv_pending, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_bfo.pckt_pending, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_bfo.rdma_pending, opal_list_t); - /* missing communicator pending list */ - OBJ_CONSTRUCT(&mca_pml_bfo.non_existing_communicator_pending, opal_list_t); - - /** - * If we get here this is the PML who get selected for the run. We - * should get ownership for the send and receive requests list, and - * initialize them with the size of our own requests. - */ - opal_free_list_init( &mca_pml_base_send_requests, - sizeof(mca_pml_bfo_send_request_t) + - (mca_pml_bfo.max_rdma_per_request - 1) * - sizeof(mca_pml_bfo_com_btl_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_send_request_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL, 0, NULL, NULL, NULL ); - - opal_free_list_init( &mca_pml_base_recv_requests, - sizeof(mca_pml_bfo_recv_request_t) + - (mca_pml_bfo.max_rdma_per_request - 1) * - sizeof(mca_pml_bfo_com_btl_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_bfo_recv_request_t), - 0,opal_cache_line_size, - mca_pml_bfo.free_list_num, - mca_pml_bfo.free_list_max, - mca_pml_bfo.free_list_inc, - NULL, 0, NULL, NULL, NULL ); - - mca_pml_bfo.enabled = true; - return OMPI_SUCCESS; -} - -int mca_pml_bfo_add_comm(ompi_communicator_t* comm) -{ - /* allocate pml specific comm data */ - mca_pml_bfo_comm_t* pml_comm = OBJ_NEW(mca_pml_bfo_comm_t); - opal_list_item_t *item, *next_item; - mca_pml_bfo_recv_frag_t* frag; - mca_pml_bfo_comm_proc_t* pml_proc; - mca_pml_bfo_match_hdr_t* hdr; - int i; - - if (NULL == pml_comm) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* should never happen, but it was, so check */ - if (comm->c_contextid > mca_pml_bfo.super.pml_max_contextid) { - OBJ_RELEASE(pml_comm); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - mca_pml_bfo_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count); - comm->c_pml_comm = pml_comm; - - for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) { - pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i); - OBJ_RETAIN(pml_comm->procs[i].ompi_proc); - } - /* Grab all related messages from the non_existing_communicator pending queue */ - for( item = opal_list_get_first(&mca_pml_bfo.non_existing_communicator_pending); - item != opal_list_get_end(&mca_pml_bfo.non_existing_communicator_pending); - item = next_item ) { - frag = (mca_pml_bfo_recv_frag_t*)item; - next_item = opal_list_get_next(item); - hdr = &frag->hdr.hdr_match; - - /* Is this fragment for the current communicator ? */ - if( frag->hdr.hdr_match.hdr_ctx != comm->c_contextid ) - continue; - - /* As we now know we work on a fragment for this communicator - * we should remove it from the - * non_existing_communicator_pending list. */ - opal_list_remove_item( &mca_pml_bfo.non_existing_communicator_pending, - item ); - - add_fragment_to_unexpected: - - /* We generate the MSG_ARRIVED event as soon as the PML is aware - * of a matching fragment arrival. Independing if it is received - * on the correct order or not. This will allow the tools to - * figure out if the messages are not received in the correct - * order (if multiple network interfaces). - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* There is no matching to be done, and no lock to be held on the communicator as - * we know at this point that the communicator has not yet been returned to the user. - * The only required protection is around the non_existing_communicator_pending queue. - * We just have to push the fragment into the unexpected list of the corresponding - * proc, or into the out-of-order (cant_match) list. - */ - pml_proc = &(pml_comm->procs[hdr->hdr_src]); - - if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) { - /* We're now expecting the next sequence number. */ - pml_proc->expected_sequence++; - opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag ); - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - /* And now the ugly part. As some fragments can be inserted in the cant_match list, - * every time we succesfully add a fragment in the unexpected list we have to make - * sure the next one is not in the cant_match. Otherwise, we will endup in a deadlock - * situation as the cant_match is only checked when a new fragment is received from - * the network. - */ - for(frag = (mca_pml_bfo_recv_frag_t *)opal_list_get_first(&pml_proc->frags_cant_match); - frag != (mca_pml_bfo_recv_frag_t *)opal_list_get_end(&pml_proc->frags_cant_match); - frag = (mca_pml_bfo_recv_frag_t *)opal_list_get_next(frag)) { - hdr = &frag->hdr.hdr_match; - /* If the message has the next expected seq from that proc... */ - if(hdr->hdr_seq != pml_proc->expected_sequence) - continue; - - opal_list_remove_item(&pml_proc->frags_cant_match, (opal_list_item_t*)frag); - goto add_fragment_to_unexpected; - } - } else { - opal_list_append( &pml_proc->frags_cant_match, (opal_list_item_t*)frag ); - } - } - return OMPI_SUCCESS; -} - -int mca_pml_bfo_del_comm(ompi_communicator_t* comm) -{ - mca_pml_bfo_comm_t* pml_comm = comm->c_pml_comm; - int i; - - for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) { - OBJ_RELEASE(pml_comm->procs[i].ompi_proc); - } - OBJ_RELEASE(comm->c_pml_comm); - comm->c_pml_comm = NULL; - return OMPI_SUCCESS; -} - - -/* - * For each proc setup a datastructure that indicates the BTLs - * that can be used to reach the destination. - * - */ - -int mca_pml_bfo_add_procs(ompi_proc_t** procs, size_t nprocs) -{ - opal_bitmap_t reachable; - int rc; - opal_list_item_t *item; - - if(nprocs == 0) - return OMPI_SUCCESS; - - OBJ_CONSTRUCT(&reachable, opal_bitmap_t); - rc = opal_bitmap_init(&reachable, (int)nprocs); - if(OMPI_SUCCESS != rc) - return rc; - - /* - * JJH: Disable this in FT enabled builds since - * we use a wrapper PML. It will cause this check to - * return failure as all processes will return the wrapper PML - * component in use instead of the wrapped PML component underneath. - */ -#if OPAL_ENABLE_FT_CR == 0 - /* make sure remote procs are using the same PML as us */ - if (OMPI_SUCCESS != (rc = mca_pml_base_pml_check_selected("bfo", - procs, - nprocs))) { - return rc; - } -#endif - - rc = mca_bml.bml_add_procs( nprocs, - procs, - &reachable ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - /* Check that values supplied by all initialized btls will work - for us. Note that this is the list of all initialized BTLs, - not the ones used for the just added procs. This is a little - overkill and inaccurate, as we may end up not using the BTL in - question and all add_procs calls after the first one are - duplicating an already completed check. But the final - initialization of the PML occurs before the final - initialization of the BTLs, and iterating through the in-use - BTLs requires iterating over the procs, as the BML does not - expose all currently in use btls. */ - - for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ; - item != opal_list_get_end(&mca_btl_base_modules_initialized) ; - item = opal_list_get_next(item)) { - mca_btl_base_selected_module_t *sm = - (mca_btl_base_selected_module_t*) item; - if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_bfo_hdr_t)) { - opal_show_help("help-mpi-pml-bfo.txt", "eager_limit_too_small", - true, - sm->btl_component->btl_version.mca_component_name, - ompi_process_info.nodename, - sm->btl_component->btl_version.mca_component_name, - sm->btl_module->btl_eager_limit, - sm->btl_component->btl_version.mca_component_name, - sizeof(mca_pml_bfo_hdr_t), - sm->btl_component->btl_version.mca_component_name); - rc = OMPI_ERR_BAD_PARAM; - goto cleanup_and_return; - } - } - - - /* TODO: Move these callback registration to another place */ - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_MATCH, - mca_pml_bfo_recv_frag_callback_match, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDV, - mca_pml_bfo_recv_frag_callback_rndv, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RGET, - mca_pml_bfo_recv_frag_callback_rget, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_ACK, - mca_pml_bfo_recv_frag_callback_ack, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_FRAG, - mca_pml_bfo_recv_frag_callback_frag, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_PUT, - mca_pml_bfo_recv_frag_callback_put, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_FIN, - mca_pml_bfo_recv_frag_callback_fin, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - -#if PML_BFO - rc = mca_pml_bfo_register_callbacks(); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; -#endif /* PML_BFO */ - /* register error handlers */ - rc = mca_bml.bml_register_error((mca_btl_base_module_error_cb_fn_t)mca_pml_bfo_error_handler); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - cleanup_and_return: - OBJ_DESTRUCT(&reachable); - - return rc; -} - -/* - * iterate through each proc and notify any PTLs associated - * with the proc that it is/has gone away - */ - -int mca_pml_bfo_del_procs(ompi_proc_t** procs, size_t nprocs) -{ - return mca_bml.bml_del_procs(nprocs, procs); -} - -/* - * diagnostics - */ - -int mca_pml_bfo_dump(struct ompi_communicator_t* comm, int verbose) -{ - struct mca_pml_comm_t* pml_comm = comm->c_pml_comm; - int i; - - /* iterate through all procs on communicator */ - for( i = 0; i < (int)pml_comm->num_procs; i++ ) { - mca_pml_bfo_comm_proc_t* proc = &pml_comm->procs[i]; - mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - size_t n; - - opal_output(0, "[Rank %d]\n", i); - /* dump all receive queues */ - - /* dump all btls */ - for(n=0; nbtl_eager.arr_size; n++) { - mca_bml_base_btl_t* bml_btl = &ep->btl_eager.bml_btls[n]; - bml_btl->btl->btl_dump(bml_btl->btl, bml_btl->btl_endpoint, verbose); - } - } - return OMPI_SUCCESS; -} - -static void mca_pml_bfo_fin_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - -#if PML_BFO - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { - mca_pml_bfo_repost_fin(des); - return; - } - MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des); -#endif /* PML_BFO */ - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/** - * Send an FIN to the peer. If we fail to send this ack (no more available - * fragments or the send failed) this function automatically add the FIN - * to the list of pending FIN, Which guarantee that the FIN will be sent - * later. - */ -int mca_pml_bfo_send_fin( ompi_proc_t* proc, - mca_bml_base_btl_t* bml_btl, - opal_ptr_t hdr_des, - uint8_t order, -#if PML_BFO - uint32_t status, - uint16_t seq, - uint8_t restartseq, - uint16_t ctx, uint32_t src) -#else /* PML_BFO */ - uint32_t status ) -#endif /* PML_BFO */ -{ - mca_btl_base_descriptor_t* fin; - mca_pml_bfo_fin_hdr_t* hdr; - int rc; - - mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_bfo_fin_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - - if(NULL == fin) { - MCA_PML_BFO_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status); - return OMPI_ERR_OUT_OF_RESOURCE; - } - fin->des_cbfunc = mca_pml_bfo_fin_completion; - fin->des_cbdata = NULL; - - /* fill in header */ - hdr = (mca_pml_bfo_fin_hdr_t*)fin->des_local->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN; - hdr->hdr_des = hdr_des; - hdr->hdr_fail = status; -#if PML_BFO - fin->des_cbdata = proc; - hdr->hdr_match.hdr_seq = seq; - hdr->hdr_match.hdr_ctx = ctx; - hdr->hdr_match.hdr_src = src; - hdr->hdr_match.hdr_common.hdr_flags = restartseq; /* use unused hdr_flags field */ -#endif /* PML_BFO */ - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_FIN, proc); - - /* queue request */ - rc = mca_bml_base_send( bml_btl, - fin, - MCA_PML_BFO_HDR_TYPE_FIN ); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); - } - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, fin); - MCA_PML_BFO_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status); - return OMPI_ERR_OUT_OF_RESOURCE; -} - -void mca_pml_bfo_process_pending_packets(mca_bml_base_btl_t* bml_btl) -{ - mca_pml_bfo_pckt_pending_t *pckt; - int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_bfo.pckt_pending); - - for(i = 0; i < s; i++) { - mca_bml_base_btl_t *send_dst = NULL; - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - pckt = (mca_pml_bfo_pckt_pending_t*) - opal_list_remove_first(&mca_pml_bfo.pckt_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - if(NULL == pckt) - break; - if(pckt->bml_btl != NULL && - pckt->bml_btl->btl == bml_btl->btl) { - send_dst = pckt->bml_btl; - } else { - mca_bml_base_endpoint_t* endpoint = - (mca_bml_base_endpoint_t*) pckt->proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - send_dst = mca_bml_base_btl_array_find( - &endpoint->btl_eager, bml_btl->btl); - } - if(NULL == send_dst) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - switch(pckt->hdr.hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_ACK: - rc = mca_pml_bfo_recv_request_ack_send_btl(pckt->proc, - send_dst, - pckt->hdr.hdr_ack.hdr_src_req.lval, - pckt->hdr.hdr_ack.hdr_dst_req.pval, - pckt->hdr.hdr_ack.hdr_send_offset, - pckt->hdr.hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_NORDMA); - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return; - } - break; - case MCA_PML_BFO_HDR_TYPE_FIN: - rc = mca_pml_bfo_send_fin(pckt->proc, send_dst, - pckt->hdr.hdr_fin.hdr_des, - pckt->order, -#if PML_BFO - pckt->hdr.hdr_fin.hdr_fail, - pckt->hdr.hdr_fin.hdr_match.hdr_seq, - pckt->hdr.hdr_fin.hdr_match.hdr_common.hdr_flags, - pckt->hdr.hdr_fin.hdr_match.hdr_ctx, - pckt->hdr.hdr_fin.hdr_match.hdr_src); -#else /* PML_BFO */ - pckt->hdr.hdr_fin.hdr_fail); -#endif /* PML_BFO */ - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - return; - } - break; - default: - opal_output(0, "[%s:%d] wrong header type\n", - __FILE__, __LINE__); - break; - } - /* We're done with this packet, return it back to the free list */ - MCA_PML_BFO_PCKT_PENDING_RETURN(pckt); - } -} - -void mca_pml_bfo_process_pending_rdma(void) -{ - mca_pml_bfo_rdma_frag_t* frag; - int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_bfo.rdma_pending); - - for(i = 0; i < s; i++) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - frag = (mca_pml_bfo_rdma_frag_t*) - opal_list_remove_first(&mca_pml_bfo.rdma_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - if(NULL == frag) - break; - if(frag->rdma_state == MCA_PML_BFO_RDMA_PUT) { - frag->retries++; - rc = mca_pml_bfo_send_request_put_frag(frag); - } else { - rc = mca_pml_bfo_recv_request_get_frag(frag); - } - if(OMPI_ERR_OUT_OF_RESOURCE == rc) - break; - } -} - - -void mca_pml_bfo_error_handler( - struct mca_btl_base_module_t* btl, int32_t flags, - ompi_proc_t* errproc, char* btlinfo ) { -#if PML_BFO - if (flags & MCA_BTL_ERROR_FLAGS_NONFATAL) { - mca_pml_bfo_failover_error_handler(btl, flags, errproc, btlinfo); - return; - } -#endif /* PML_BFO */ - ompi_rte_abort(-1, NULL); -} - -#if OPAL_ENABLE_FT_CR == 0 -int mca_pml_bfo_ft_event( int state ) { - return OMPI_SUCCESS; -} -#else -int mca_pml_bfo_ft_event( int state ) -{ - static bool first_continue_pass = false; - ompi_proc_t** procs = NULL; - size_t num_procs; - int ret, p; - - if(OPAL_CRS_CHECKPOINT == state) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1); - opal_pmix.fence(NULL, 0); - } - - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0); - } - else if(OPAL_CRS_CONTINUE == state) { - first_continue_pass = !first_continue_pass; - - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2); - } - - if (opal_cr_continue_like_restart && !first_continue_pass) { - /* - * Get a list of processes - */ - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* - * Refresh the proc structure, and publish our proc info in the modex. - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. - */ - if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { - opal_output(0, - "pml:bfo: ft_event(Restart): proc_refresh Failed %d", - ret); - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free (procs); - return ret; - } - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - /* Nothing here */ - } - else if(OPAL_CRS_RESTART == state ) { - /* - * Get a list of processes - */ - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* - * Clean out the modex information since it is invalid now. - * ompi_rte_purge_proc_attrs(); - * This happens at the ORTE level, so doing it again here will cause - * some issues with socket caching. - */ - - - /* - * Refresh the proc structure, and publish our proc info in the modex. - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. - */ - if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { - opal_output(0, - "pml:bfo: ft_event(Restart): proc_refresh Failed %d", - ret); - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free (procs); - return ret; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - /* Call the BML - * BML is expected to call ft_event in - * - BTL(s) - * - MPool(s) - */ - if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) { - opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n", - ret); - } - - if(OPAL_CRS_CHECKPOINT == state) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P1); - - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR0); - /* JJH Cannot barrier here due to progress engine -- ompi_rte_barrier();*/ - } - } - else if(OPAL_CRS_CONTINUE == state) { - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3); - } - - if (opal_cr_continue_like_restart && !first_continue_pass) { - /* - * Exchange the modex information once again. - * BTLs will have republished their modex information. - */ - opal_pmix.fence(NULL, 0); - - /* - * Startup the PML stack now that the modex is running again - * Add the new procs (BTLs redo modex recv's) - */ - if( OMPI_SUCCESS != (ret = mca_pml_bfo_add_procs(procs, num_procs) ) ) { - opal_output(0, "pml:bfo: ft_event(Restart): Failed in add_procs (%d)", ret); - return ret; - } - - /* Is this barrier necessary ? JJH */ - opal_pmix.fence(NULL, 0); - - if( NULL != procs ) { - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free(procs); - procs = NULL; - } - } - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2); - opal_pmix.fence(NULL, 0); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1); - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - /* Nothing here */ - } - else if(OPAL_CRS_RESTART == state ) { - /* - * Exchange the modex information once again. - * BTLs will have republished their modex information. - */ - opal_pmix.fence(NULL, 0); - - /* - * Startup the PML stack now that the modex is running again - * Add the new procs (BTLs redo modex recv's) - */ - if( OMPI_SUCCESS != (ret = mca_pml_bfo_add_procs(procs, num_procs) ) ) { - opal_output(0, "pml:bfo: ft_event(Restart): Failed in add_procs (%d)", ret); - return ret; - } - - /* Is this barrier necessary ? JJH */ - opal_pmix.fence(NULL, 0); - - if( NULL != procs ) { - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free(procs); - procs = NULL; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} -#endif /* OPAL_ENABLE_FT_CR */ - -int mca_pml_bfo_com_btl_comp(const void *v1, const void *v2) -{ - const mca_pml_bfo_com_btl_t *b1 = (const mca_pml_bfo_com_btl_t *) v1; - const mca_pml_bfo_com_btl_t *b2 = (const mca_pml_bfo_com_btl_t *) v2; - - if(b1->bml_btl->btl_weight < b2->bml_btl->btl_weight) - return 1; - if(b1->bml_btl->btl_weight > b2->bml_btl->btl_weight) - return -1; - - return 0; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo.h b/ompi/mca/pml/bfo/pml_bfo.h deleted file mode 100644 index ef606f2669a..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo.h +++ /dev/null @@ -1,362 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_H -#define MCA_PML_BFO_H - -#include "ompi_config.h" -#include "opal/class/opal_free_list.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_request.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/datatype/ompi_datatype.h" -#include "pml_bfo_hdr.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/proc/proc.h" -#include "opal/mca/allocator/base/base.h" - -BEGIN_C_DECLS - -/** - * BFO PML module - */ - -struct mca_pml_bfo_t { - mca_pml_base_module_t super; - - int priority; - int free_list_num; /* initial size of free list */ - int free_list_max; /* maximum size of free list */ - int free_list_inc; /* number of elements to grow free list */ - unsigned int send_pipeline_depth; - unsigned int recv_pipeline_depth; - unsigned int rdma_put_retries_limit; - int max_rdma_per_request; - int max_send_per_range; - bool leave_pinned; - int leave_pinned_pipeline; - - /* lock queue access */ - opal_mutex_t lock; - - /* free lists */ - opal_free_list_t rdma_frags; - opal_free_list_t recv_frags; - opal_free_list_t pending_pckts; - opal_free_list_t buffers; - opal_free_list_t send_ranges; - - /* list of pending operations */ - opal_list_t pckt_pending; - opal_list_t send_pending; - opal_list_t recv_pending; - opal_list_t rdma_pending; - /* List of pending fragments without a matching communicator */ - opal_list_t non_existing_communicator_pending; - bool enabled; - char* allocator_name; - mca_allocator_base_module_t* allocator; - unsigned int unexpected_limit; -}; -typedef struct mca_pml_bfo_t mca_pml_bfo_t; - -extern mca_pml_bfo_t mca_pml_bfo; -extern int mca_pml_bfo_output; - -/* - * PML interface functions. - */ - -extern int mca_pml_bfo_add_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_bfo_del_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_bfo_add_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_bfo_del_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_bfo_enable( bool enable ); - -extern int mca_pml_bfo_progress(void); - -extern int mca_pml_bfo_iprobe( int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_probe( int dst, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_improbe( int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_mprobe( int dst, - int tag, - struct ompi_communicator_t* comm, - struct ompi_message_t **message, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_isend_init( void *buf, - size_t count, - ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_isend( void *buf, - size_t count, - ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_send( void *buf, - size_t count, - ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm ); - -extern int mca_pml_bfo_irecv_init( void *buf, - size_t count, - ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_irecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_recv( void *buf, - size_t count, - ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_imrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request ); - -extern int mca_pml_bfo_mrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status ); - -extern int mca_pml_bfo_dump( struct ompi_communicator_t* comm, - int verbose ); - -extern int mca_pml_bfo_start( size_t count, - ompi_request_t** requests ); - -extern int mca_pml_bfo_ft_event( int state ); - -END_C_DECLS - -struct mca_pml_bfo_pckt_pending_t { - opal_free_list_item_t super; - ompi_proc_t* proc; - mca_pml_bfo_hdr_t hdr; - struct mca_bml_base_btl_t *bml_btl; - uint8_t order; -}; -typedef struct mca_pml_bfo_pckt_pending_t mca_pml_bfo_pckt_pending_t; -OBJ_CLASS_DECLARATION(mca_pml_bfo_pckt_pending_t); - -#define MCA_PML_BFO_PCKT_PENDING_ALLOC(pckt) \ -do { \ - opal_free_list_item_t* item; \ - OPAL_FREE_LIST_WAIT(&mca_pml_bfo.pending_pckts, item); \ - pckt = (mca_pml_bfo_pckt_pending_t*)item; \ -} while (0) - -#define MCA_PML_BFO_PCKT_PENDING_RETURN(pckt) \ -do { \ - /* return packet */ \ - OPAL_FREE_LIST_RETURN(&mca_pml_bfo.pending_pckts, \ - (opal_free_list_item_t*)pckt); \ -} while(0) - -#define MCA_PML_BFO_ADD_FIN_TO_PENDING(P, D, B, O, S) \ - do { \ - mca_pml_bfo_pckt_pending_t *_pckt; \ - \ - MCA_PML_BFO_PCKT_PENDING_ALLOC(_pckt); \ - _pckt->hdr.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN; \ - _pckt->hdr.hdr_fin.hdr_des = (D); \ - _pckt->hdr.hdr_fin.hdr_fail = (S); \ - _pckt->proc = (P); \ - _pckt->bml_btl = (B); \ - _pckt->order = (O); \ - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); \ - opal_list_append(&mca_pml_bfo.pckt_pending, \ - (opal_list_item_t*)_pckt); \ - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); \ - } while(0) - - -int mca_pml_bfo_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, -#if PML_BFO - opal_ptr_t hdr_des, uint8_t order, uint32_t status, - uint16_t seq, uint8_t reqseq, uint16_t ctx, uint32_t src); -#else /* PML_BFO */ - opal_ptr_t hdr_des, uint8_t order, uint32_t status); -#endif /* PML_BFO */ - -/* This function tries to resend FIN/ACK packets from pckt_pending queue. - * Packets are added to the queue when sending of FIN or ACK is failed due to - * resource unavailability. bml_btl passed to the function doesn't represents - * packet's destination, it represents BTL on which resource was freed, so only - * this BTL should be considered for resending packets */ -void mca_pml_bfo_process_pending_packets(mca_bml_base_btl_t* bml_btl); - -/* This function retries failed PUT/GET operations on frag. When RDMA operation - * cannot be accomplished for some reason, frag is put on the rdma_pending list. - * Later the operation is retried. The destination of RDMA operation is stored - * inside the frag structure */ -void mca_pml_bfo_process_pending_rdma(void); - -#define MCA_PML_BFO_PROGRESS_PENDING(bml_btl) \ - do { \ - if(opal_list_get_size(&mca_pml_bfo.pckt_pending)) \ - mca_pml_bfo_process_pending_packets(bml_btl); \ - if(opal_list_get_size(&mca_pml_bfo.recv_pending)) \ - mca_pml_bfo_recv_request_process_pending(); \ - if(opal_list_get_size(&mca_pml_bfo.send_pending)) \ - mca_pml_bfo_send_request_process_pending(bml_btl); \ - if(opal_list_get_size(&mca_pml_bfo.rdma_pending)) \ - mca_pml_bfo_process_pending_rdma(); \ - } while (0) - -/* - * Compute the total number of bytes on supplied descriptor - */ -static inline int mca_pml_bfo_compute_segment_length (size_t seg_size, void *segments, size_t count, - size_t hdrlen) { - size_t i, length; - - for (i = 0, length = -hdrlen ; i < count ; ++i) { - mca_btl_base_segment_t *segment = - (mca_btl_base_segment_t *)((char *) segments + i * seg_size); - - length += segment->seg_len; - } - - return length; -} - -static inline int mca_pml_bfo_compute_segment_length_base (mca_btl_base_segment_t *segments, - size_t count, size_t hdrlen) { - size_t i, length; - - for (i = 0, length = -hdrlen ; i < count ; ++i) { - length += segments[i].seg_len; - } - - return length; -} - -/* represent BTL chosen for sending request */ -struct mca_pml_bfo_com_btl_t { - mca_bml_base_btl_t *bml_btl; - struct mca_mpool_base_registration_t* btl_reg; - size_t length; -}; -typedef struct mca_pml_bfo_com_btl_t mca_pml_bfo_com_btl_t; - -int mca_pml_bfo_com_btl_comp(const void *v1, const void *v2); - -/* Calculate what percentage of a message to send through each BTL according to - * relative weight */ -static inline void -mca_pml_bfo_calc_weighted_length( mca_pml_bfo_com_btl_t *btls, int num_btls, size_t size, - double weight_total ) -{ - int i; - size_t length_left; - - /* shortcut for common case for only one BTL */ - if( OPAL_LIKELY(1 == num_btls) ) { - btls[0].length = size; - return; - } - - /* sort BTLs according of their weights so BTLs with smaller weight will - * not hijack all of the traffic */ - qsort( btls, num_btls, sizeof(mca_pml_bfo_com_btl_t), - mca_pml_bfo_com_btl_comp ); - - for(length_left = size, i = 0; i < num_btls; i++) { - mca_bml_base_btl_t* bml_btl = btls[i].bml_btl; - size_t length = 0; - if( OPAL_UNLIKELY(0 != length_left) ) { - length = (length_left > bml_btl->btl->btl_eager_limit)? - ((size_t)(size * (bml_btl->btl_weight / weight_total))) : - length_left; - - if(length > length_left) - length = length_left; - length_left -= length; - } - btls[i].length = length; - } - - /* account for rounding errors */ - btls[0].length += length_left; -} - -#endif diff --git a/ompi/mca/pml/bfo/pml_bfo_comm.c b/ompi/mca/pml/bfo/pml_bfo_comm.c deleted file mode 100644 index 997f1911492..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_comm.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include - -#include "pml_bfo.h" -#include "pml_bfo_comm.h" - - - -static void mca_pml_bfo_comm_proc_construct(mca_pml_bfo_comm_proc_t* proc) -{ - proc->expected_sequence = 1; - proc->ompi_proc = NULL; - proc->send_sequence = 0; - OBJ_CONSTRUCT(&proc->frags_cant_match, opal_list_t); - OBJ_CONSTRUCT(&proc->specific_receives, opal_list_t); - OBJ_CONSTRUCT(&proc->unexpected_frags, opal_list_t); -} - - -static void mca_pml_bfo_comm_proc_destruct(mca_pml_bfo_comm_proc_t* proc) -{ - OBJ_DESTRUCT(&proc->frags_cant_match); - OBJ_DESTRUCT(&proc->specific_receives); - OBJ_DESTRUCT(&proc->unexpected_frags); -} - - -static OBJ_CLASS_INSTANCE( - mca_pml_bfo_comm_proc_t, - opal_object_t, - mca_pml_bfo_comm_proc_construct, - mca_pml_bfo_comm_proc_destruct); - - -static void mca_pml_bfo_comm_construct(mca_pml_bfo_comm_t* comm) -{ - OBJ_CONSTRUCT(&comm->wild_receives, opal_list_t); - OBJ_CONSTRUCT(&comm->matching_lock, opal_mutex_t); - comm->recv_sequence = 0; - comm->procs = NULL; - comm->last_probed = 0; - comm->num_procs = 0; -} - - -static void mca_pml_bfo_comm_destruct(mca_pml_bfo_comm_t* comm) -{ - size_t i; - for(i=0; inum_procs; i++) - OBJ_DESTRUCT((&comm->procs[i])); - if(NULL != comm->procs) - free(comm->procs); - OBJ_DESTRUCT(&comm->wild_receives); - OBJ_DESTRUCT(&comm->matching_lock); -} - - -OBJ_CLASS_INSTANCE( - mca_pml_bfo_comm_t, - opal_object_t, - mca_pml_bfo_comm_construct, - mca_pml_bfo_comm_destruct); - - -int mca_pml_bfo_comm_init_size(mca_pml_bfo_comm_t* comm, size_t size) -{ - size_t i; - - /* send message sequence-number support - sender side */ - comm->procs = (mca_pml_bfo_comm_proc_t*)malloc(sizeof(mca_pml_bfo_comm_proc_t)*size); - if(NULL == comm->procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - for(i=0; iprocs+i, mca_pml_bfo_comm_proc_t); - } - comm->num_procs = size; - return OMPI_SUCCESS; -} - - diff --git a/ompi/mca/pml/bfo/pml_bfo_comm.h b/ompi/mca/pml/bfo/pml_bfo_comm.h deleted file mode 100644 index c70b4514d34..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_comm.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_BFO_COMM_H -#define MCA_PML_BFO_COMM_H - -#include "opal/threads/mutex.h" -#include "opal/class/opal_list.h" -#include "ompi/proc/proc.h" -BEGIN_C_DECLS - - -struct mca_pml_bfo_comm_proc_t { - opal_object_t super; - uint16_t expected_sequence; /**< send message sequence number - receiver side */ - struct ompi_proc_t* ompi_proc; -#if OPAL_ENABLE_MULTI_THREADS - volatile int32_t send_sequence; /**< send side sequence number */ -#else - int32_t send_sequence; /**< send side sequence number */ -#endif - opal_list_t frags_cant_match; /**< out-of-order fragment queues */ - opal_list_t specific_receives; /**< queues of unmatched specific receives */ - opal_list_t unexpected_frags; /**< unexpected fragment queues */ -}; -typedef struct mca_pml_bfo_comm_proc_t mca_pml_bfo_comm_proc_t; - - -/** - * Cached on ompi_communicator_t to hold queues/state - * used by the PML<->PTL interface for matching logic. - */ -struct mca_pml_comm_t { - opal_object_t super; -#if OPAL_ENABLE_MULTI_THREADS - volatile uint32_t recv_sequence; /**< recv request sequence number - receiver side */ -#else - uint32_t recv_sequence; /**< recv request sequence number - receiver side */ -#endif - opal_mutex_t matching_lock; /**< matching lock */ - opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */ - mca_pml_bfo_comm_proc_t* procs; - size_t num_procs; - size_t last_probed; -}; -typedef struct mca_pml_comm_t mca_pml_bfo_comm_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_comm_t); - - -/** - * Initialize an instance of mca_pml_bfo_comm_t based on the communicator size. - * - * @param comm Instance of mca_pml_bfo_comm_t - * @param size Size of communicator - * @return OMPI_SUCCESS or error status on failure. - */ - -extern int mca_pml_bfo_comm_init_size(mca_pml_bfo_comm_t* comm, size_t size); - -END_C_DECLS -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_component.c b/ompi/mca/pml/bfo/pml_bfo_component.c deleted file mode 100644 index 67e59272613..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_component.c +++ /dev/null @@ -1,274 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "opal/mca/event/event.h" -#include "mpi.h" -#include "ompi/runtime/params.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "pml_bfo.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_rdmafrag.h" -#include "pml_bfo_recvfrag.h" -#include "ompi/mca/bml/base/base.h" -#include "pml_bfo_component.h" -#include "opal/mca/allocator/base/base.h" -#include "opal/runtime/opal_params.h" - -OBJ_CLASS_INSTANCE( mca_pml_bfo_pckt_pending_t, - ompi_free_list_item_t, - NULL, - NULL ); - -static int mca_pml_bfo_component_register(void); -static int mca_pml_bfo_component_open(void); -static int mca_pml_bfo_component_close(void); -static mca_pml_base_module_t* -mca_pml_bfo_component_init( int* priority, bool enable_progress_threads, - bool enable_mpi_threads ); -static int mca_pml_bfo_component_fini(void); -int mca_pml_bfo_output = 0; -static int mca_pml_bfo_verbose = 0; - -mca_pml_base_component_2_0_0_t mca_pml_bfo_component = { - - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .pmlm_version = { - MCA_PML_BASE_VERSION_2_0_0, - - .mca_component_name = "bfo", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = mca_pml_bfo_component_open, - .mca_close_component = mca_pml_bfo_component_close, - .mca_register_component_params = mca_pml_bfo_component_register, - }, - .pmlm_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .pmlm_init = mca_pml_bfo_component_init, - .pmlm_finalize = mca_pml_bfo_component_fini, -}; - -void *mca_pml_bfo_seg_alloc( struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration); - -void mca_pml_bfo_seg_free( struct mca_mpool_base_module_t* mpool, - void* segment ); - -static inline int mca_pml_bfo_param_register_int( - const char* param_name, - int default_value, - int *storage) -{ - *storage = default_value; - (void) mca_base_component_var_register(&mca_pml_bfo_component.pmlm_version, param_name, - NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - - return *storage; -} - -static inline unsigned int mca_pml_bfo_param_register_uint( - const char* param_name, - unsigned int default_value, - unsigned int *storage) -{ - *storage = default_value; - (void) mca_base_component_var_register(&mca_pml_bfo_component.pmlm_version, param_name, - NULL, MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - - return *storage; -} - -static int mca_pml_bfo_component_register(void) -{ - int default_priority; - -#if PML_BFO - default_priority = 5; -#else /* PML_BFO */ - default_priority = 20; - mca_pml_bfo_param_register_int("priority", 20); -#endif /* PML_BFO */ - - (void) mca_pml_bfo_param_register_int("verbose", 0, &mca_pml_bfo_verbose); - (void) mca_pml_bfo_param_register_int("free_list_num", 4, &mca_pml_bfo.free_list_num); - (void) mca_pml_bfo_param_register_int("free_list_max", -1, &mca_pml_bfo.free_list_max); - (void) mca_pml_bfo_param_register_int("free_list_inc", 64, &mca_pml_bfo.free_list_inc); - (void) mca_pml_bfo_param_register_int("priority", default_priority, &mca_pml_bfo.priority); - (void) mca_pml_bfo_param_register_uint("send_pipeline_depth", 3, &mca_pml_bfo.send_pipeline_depth); - (void) mca_pml_bfo_param_register_uint("recv_pipeline_depth", 4, &mca_pml_bfo.recv_pipeline_depth); - (void) mca_pml_bfo_param_register_uint("rdma_put_retries_limit", 5, &mca_pml_bfo.rdma_put_retries_limit); - (void) mca_pml_bfo_param_register_int("max_rdma_per_request", 4, &mca_pml_bfo.max_rdma_per_request); - (void) mca_pml_bfo_param_register_int("max_send_per_range", 4, &mca_pml_bfo.max_send_per_range); - (void) mca_pml_bfo_param_register_uint("unexpected_limit", 128, &mca_pml_bfo.unexpected_limit); - - mca_pml_bfo.allocator_name = "bucket"; - (void) mca_base_component_var_register(&mca_pml_bfo_component.pmlm_version, - "allocator", - "Name of allocator component for unexpected messages", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_pml_bfo.allocator_name); - - return OMPI_SUCCESS; -} - -static int mca_pml_bfo_component_open(void) -{ - mca_pml_bfo_output = opal_output_open(NULL); - opal_output_set_verbosity(mca_pml_bfo_output, mca_pml_bfo_verbose); - - mca_pml_bfo.enabled = false; - return mca_base_framework_open(&ompi_bml_base_framework, 0); -} - - -static int mca_pml_bfo_component_close(void) -{ - int rc; - - if (OMPI_SUCCESS != (rc = mca_base_framework_close(&ompi_bml_base_framework))) { - return rc; - } - opal_output_close(mca_pml_bfo_output); - - return OMPI_SUCCESS; -} - - -static mca_pml_base_module_t* -mca_pml_bfo_component_init( int* priority, - bool enable_progress_threads, - bool enable_mpi_threads ) -{ - mca_allocator_base_component_t* allocator_component; - - opal_output_verbose( 10, mca_pml_bfo_output, - "in bfo, my priority is %d\n", mca_pml_bfo.priority); - - if((*priority) > mca_pml_bfo.priority) { - *priority = mca_pml_bfo.priority; - return NULL; - } - *priority = mca_pml_bfo.priority; - - allocator_component = mca_allocator_component_lookup( mca_pml_bfo.allocator_name ); - if(NULL == allocator_component) { - opal_output(0, "mca_pml_bfo_component_init: can't find allocator: %s\n", mca_pml_bfo.allocator_name); - return NULL; - } - - mca_pml_bfo.allocator = allocator_component->allocator_init(true, - mca_pml_bfo_seg_alloc, - mca_pml_bfo_seg_free, NULL); - if(NULL == mca_pml_bfo.allocator) { - opal_output(0, "mca_pml_bfo_component_init: unable to initialize allocator\n"); - return NULL; - } - - - if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads, - enable_mpi_threads)) { - return NULL; - } - - /* Set this here (vs in component_open()) because - opal_leave_pinned* may have been set after MCA params were - read (e.g., by the openib btl) */ - mca_pml_bfo.leave_pinned = (1 == opal_leave_pinned); - mca_pml_bfo.leave_pinned_pipeline = (int) opal_leave_pinned_pipeline; - - return &mca_pml_bfo.super; -} - -int mca_pml_bfo_component_fini(void) -{ - int rc; - - /* Shutdown BML */ - if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize())) - return rc; - - if(!mca_pml_bfo.enabled) - return OMPI_SUCCESS; /* never selected.. return success.. */ - mca_pml_bfo.enabled = false; /* not anymore */ - - OBJ_DESTRUCT(&mca_pml_bfo.rdma_pending); - OBJ_DESTRUCT(&mca_pml_bfo.pckt_pending); - OBJ_DESTRUCT(&mca_pml_bfo.recv_pending); - OBJ_DESTRUCT(&mca_pml_bfo.send_pending); - OBJ_DESTRUCT(&mca_pml_bfo.non_existing_communicator_pending); - OBJ_DESTRUCT(&mca_pml_bfo.buffers); - OBJ_DESTRUCT(&mca_pml_bfo.pending_pckts); - OBJ_DESTRUCT(&mca_pml_bfo.recv_frags); - OBJ_DESTRUCT(&mca_pml_bfo.rdma_frags); - OBJ_DESTRUCT(&mca_pml_bfo.lock); - - if(OMPI_SUCCESS != (rc = mca_pml_bfo.allocator->alc_finalize(mca_pml_bfo.allocator))) { - return rc; - } - -#if 0 - if (mca_pml_base_send_requests.fl_num_allocated != - mca_pml_base_send_requests.super.opal_list_length) { - opal_output(0, "bfo send requests: %d allocated %d returned\n", - mca_pml_base_send_requests.fl_num_allocated, - mca_pml_base_send_requests.super.opal_list_length); - } - if (mca_pml_base_recv_requests.fl_num_allocated != - mca_pml_base_recv_requests.super.opal_list_length) { - opal_output(0, "bfo recv requests: %d allocated %d returned\n", - mca_pml_base_recv_requests.fl_num_allocated, - mca_pml_base_recv_requests.super.opal_list_length); - } -#endif - - return OMPI_SUCCESS; -} - -void *mca_pml_bfo_seg_alloc( struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration) { - return malloc(*size); -} - -void mca_pml_bfo_seg_free( struct mca_mpool_base_module_t* mpool, - void* segment ) { - free(segment); -} diff --git a/ompi/mca/pml/bfo/pml_bfo_component.h b/ompi/mca/pml/bfo/pml_bfo_component.h deleted file mode 100644 index 60a9828a54c..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_component.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_COMPONENT_H -#define MCA_PML_BFO_COMPONENT_H - -BEGIN_C_DECLS - -/* - * PML module functions. - */ -OMPI_MODULE_DECLSPEC extern mca_pml_base_component_2_0_0_t mca_pml_bfo_component; - -END_C_DECLS - -#endif diff --git a/ompi/mca/pml/bfo/pml_bfo_cuda.c b/ompi/mca/pml/bfo/pml_bfo_cuda.c deleted file mode 100644 index eb35b226e0e..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_cuda.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "opal/prefetch.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "pml_bfo.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_rdmafrag.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_sendreq.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/memchecker.h" - -size_t mca_pml_bfo_rdma_cuda_btls( - mca_bml_base_endpoint_t* bml_endpoint, - unsigned char* base, - size_t size, - mca_pml_bfo_com_btl_t* rdma_btls); - -int mca_pml_bfo_cuda_need_buffers(void * rreq, - mca_btl_base_module_t* btl); - -/** - * Handle the CUDA buffer. - */ -int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size) { - int rc; - sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA; - if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) { - unsigned char *base; - opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base ); - /* Set flag back */ - sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; - if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_bfo_rdma_cuda_btls( - sendreq->req_endpoint, - base, - sendreq->req_send.req_bytes_packed, - sendreq->req_rdma))) { - rc = mca_pml_bfo_send_request_start_rdma(sendreq, bml_btl, - sendreq->req_send.req_bytes_packed); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_pml_bfo_free_rdma_resources(sendreq); - } - } else { - if (bml_btl->btl_flags & MCA_BTL_FLAGS_CUDA_PUT) { - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, - MCA_PML_BFO_HDR_FLAGS_CONTIG); - } else { - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0); - } - } - } else { - /* Do not send anything with first rendezvous message as copying GPU - * memory into RNDV message is expensive. */ - sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0); - } - return rc; -} - - - -size_t mca_pml_bfo_rdma_cuda_btls( - mca_bml_base_endpoint_t* bml_endpoint, - unsigned char* base, - size_t size, - mca_pml_bfo_com_btl_t* rdma_btls) -{ - int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); - double weight_total = 0; - int num_btls_used = 0, n; - - /* shortcut when there are no rdma capable btls */ - if(num_btls == 0) { - return 0; - } - - /* check to see if memory is registered */ - for(n = 0; n < num_btls && num_btls_used < mca_pml_bfo.max_rdma_per_request; - n++) { - mca_bml_base_btl_t* bml_btl = - mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n); - - if (bml_btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET) { - mca_mpool_base_registration_t* reg = NULL; - mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool; - - if( NULL != btl_mpool ) { - /* register the memory */ - btl_mpool->mpool_register(btl_mpool, base, size, 0, ®); - } - - if(NULL == reg) - continue; - - rdma_btls[num_btls_used].bml_btl = bml_btl; - rdma_btls[num_btls_used].btl_reg = reg; - weight_total += bml_btl->btl_weight; - num_btls_used++; - } - } - - /* if we don't use leave_pinned and all BTLs that already have this memory - * registered amount to less then half of available bandwidth - fall back to - * pipeline protocol */ - if(0 == num_btls_used || (!mca_pml_bfo.leave_pinned && weight_total < 0.5)) - return 0; - - mca_pml_bfo_calc_weighted_length(rdma_btls, num_btls_used, size, - weight_total); - - return num_btls_used; -} - -int mca_pml_bfo_cuda_need_buffers(void * rreq, - mca_btl_base_module_t* btl) -{ - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)rreq; - if ((recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) && - (btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET)) { - recvreq->req_recv.req_base.req_convertor.flags &= ~CONVERTOR_CUDA; - if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) { - recvreq->req_recv.req_base.req_convertor.flags |= CONVERTOR_CUDA; - return true; - } else { - recvreq->req_recv.req_base.req_convertor.flags |= CONVERTOR_CUDA; - return false; - } - } - return true; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_failover.c b/ompi/mca/pml/bfo/pml_bfo_failover.c deleted file mode 100644 index bd8e3a54527..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_failover.c +++ /dev/null @@ -1,2187 +0,0 @@ -/* - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * Functions that implement failover capabilities. To utilize the - * failover feature, one needs to configure the library with - * --enable-openib-failover. Then the system that is being used - * must have two or more openib BTLs in use. When an error occurs, - * the BTL will call into this PML to map out the offending BTL and - * continue using the one that is still working. - * Most of the differences between the ob1 PML and the bfo PML are - * contained in this file. - */ - -#include "ompi_config.h" - -#include -#include - -#include "opal/class/opal_bitmap.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/pml/base/base.h" -#include "pml_bfo.h" -#include "pml_bfo_component.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_recvfrag.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_rdmafrag.h" -#include "pml_bfo_failover.h" -#include "ompi/mca/bml/base/base.h" - -#include "ompi/runtime/ompi_cr.h" - -static void mca_pml_bfo_error_pending_packets(mca_btl_base_module_t* btl, - mca_bml_base_endpoint_t* ep); - -/** - * When running with failover enabled, check the PML sequence numbers - * to see if we have received a duplicate message. This check is done - * for for all MATCH fragments. It is also done for RNDV and RGET - * fragments that do not have the MCA_PML_BFO_HDR_FLAGS_RESTART flag - * set. - * We set the window size to half the total range of sequence numbers. - * We only enter this code when the seq_num is not the expected one. - * A few more notes on the algorithm used here. In normal operation, - * the expected value will either be equal to or less than the - * sequence number of the header. This is because we are using this - * sequence number to detect packets arriving prior to them being - * expected. If we determine that expected is less than header, then - * make sure this is not a rollover case. We do that by adding the - * maxnum to the expected. - * @param proc Pointer to proc from where message came - * @param hdr Pointer to header of message - */ -bool mca_pml_bfo_is_duplicate_msg(mca_pml_bfo_comm_proc_t* proc, - mca_pml_bfo_match_hdr_t *hdr) -{ - const int window = 32768; - const int maxnum = 65536; - mca_pml_bfo_recv_frag_t *frag; - -#if 0 - opal_output(0, "checking dup, exp=%d, act=%d, type=%d, cant_match=%d\n", - (uint16_t)proc->expected_sequence, - hdr->hdr_seq, hdr->hdr_common.hdr_type, - opal_list_get_size(&proc->frags_cant_match)); -#endif - - /* Few cases near end of values where expected may equal 65535 and - * an out of order shows up that may equal something like 1. */ - if (OPAL_UNLIKELY((uint16_t)proc->expected_sequence > hdr->hdr_seq)) { - if (((uint16_t)proc->expected_sequence - hdr->hdr_seq) < window) { - opal_output_verbose(20, mca_pml_bfo_output, - "%s:%d: frag duplicated, exp=%d, act=%d, type=%d\n", - __FILE__, __LINE__, (uint16_t)proc->expected_sequence, - hdr->hdr_seq, hdr->hdr_common.hdr_type); - return true; - } - } else { - /* This is the normal flow through this code. We also need to - * use the maxnum to ensure that we handle cases where the - * expected number has rolled over but then a duplicate message - * shows up that is greater than it. */ - if ((((uint16_t)proc->expected_sequence + maxnum) - hdr->hdr_seq) < window) { - opal_output_verbose(20, mca_pml_bfo_output, - "%s:%d: frag duplicated, exp=%d, act=%d, type=%d\n", - __FILE__, __LINE__, (uint16_t)proc->expected_sequence, - hdr->hdr_seq, hdr->hdr_common.hdr_type); - return true; - } - } - - /* Need to explicitly check against any out of order fragments. Unfortunately, we - * always have to do this since we can get a duplicate out of order fragment. */ - if(OPAL_UNLIKELY(opal_list_get_size(&proc->frags_cant_match) > 0)) { - for(frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_first(&proc->frags_cant_match); - frag != (mca_pml_bfo_recv_frag_t*)opal_list_get_end(&proc->frags_cant_match); - frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_next(frag)) - { - mca_pml_bfo_match_hdr_t* mhdr = &frag->hdr.hdr_match; - - if(mhdr->hdr_seq == hdr->hdr_seq) { - opal_output_verbose(20, mca_pml_bfo_output, - "%s:%d: frag duplicated on frags_cant_match list, seq=%d, type=%d\n", - __FILE__, __LINE__, hdr->hdr_seq, hdr->hdr_common.hdr_type); - return true; - } - } - } - - return false; -} - -/** - * This function checks to see if we have received a duplicate FIN - * message. This is done by first pulling the pointer of the request - * that the FIN message is pointing to from the message. We then - * check the various fields in the request to the fields in the header - * and make sure they match. If they do not, then the request must - * have been recycled already and this is a duplicate FIN message. We - * have to do this check on every FIN message that we receive. - */ -bool mca_pml_bfo_is_duplicate_fin(mca_pml_bfo_hdr_t* hdr, mca_btl_base_descriptor_t* rdma, - mca_btl_base_module_t* btl) -{ - mca_pml_base_request_t* basereq; - /* When running with failover enabled, need to ensure that this - * is not a duplicate FIN message. */ - if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) { - /* The first check is to make sure the descriptor is pointing - * to a valid request. The descriptor may be pointing to NULL - * if it was freed and not reused yet. */ - if (NULL == rdma->des_cbdata) { - opal_output_verbose(20, mca_pml_bfo_output, - "FIN: received: dropping because not pointing to valid descriptor " - "PML=%d CTX=%d SRC=%d RQS=%d", - hdr->hdr_fin.hdr_match.hdr_seq, - hdr->hdr_fin.hdr_match.hdr_ctx, - hdr->hdr_fin.hdr_match.hdr_src, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags); - return true; - } - - basereq = (mca_pml_base_request_t*)rdma->des_cbdata; - /* Now we know the descriptor is pointing to a non-null request. - * Does it match what we expect? To make sure the receiver request - * matches the FIN message, check the context number, source of the - * message, and MPI sequence number. Then make sure that it also - * matches the internal sequencing number of the requests. We need - * to look at the type of request we are pointing at to figure out - * what fields to access. */ - if (basereq->req_type == MCA_PML_REQUEST_RECV) { - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)basereq; - if ((hdr->hdr_fin.hdr_match.hdr_ctx != - recvreq->req_recv.req_base.req_comm->c_contextid) || - (hdr->hdr_fin.hdr_match.hdr_src != - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE) || - (hdr->hdr_fin.hdr_match.hdr_seq != (uint16_t)recvreq->req_msgseq)) { - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received on receiver: dropping because no match " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - (uint16_t)recvreq->req_msgseq, hdr->hdr_fin.hdr_match.hdr_seq, - recvreq->req_recv.req_base.req_comm->c_contextid, - hdr->hdr_fin.hdr_match.hdr_ctx, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_fin.hdr_match.hdr_src, - recvreq->req_restartseq, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - (void *)recvreq); - return true; - } - if (hdr->hdr_fin.hdr_match.hdr_common.hdr_flags != recvreq->req_restartseq) { - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received on receiver: dropping because old " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - (uint16_t)recvreq->req_msgseq, hdr->hdr_fin.hdr_match.hdr_seq, - recvreq->req_recv.req_base.req_comm->c_contextid, - hdr->hdr_fin.hdr_match.hdr_ctx, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_fin.hdr_match.hdr_src, - recvreq->req_restartseq, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - (void *)recvreq); - return true; - } - } else if (basereq->req_type == MCA_PML_REQUEST_SEND) { - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)basereq; - if ((hdr->hdr_fin.hdr_match.hdr_ctx != - sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_fin.hdr_match.hdr_src != - sendreq->req_send.req_base.req_peer) || - (hdr->hdr_fin.hdr_match.hdr_seq != - (uint16_t)sendreq->req_send.req_base.req_sequence)) { - uint16_t seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received on sender: dropping because no match " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - seq, hdr->hdr_fin.hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, - hdr->hdr_fin.hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, - hdr->hdr_fin.hdr_match.hdr_src, - sendreq->req_restartseq, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - (void *)sendreq); - return true; - } - if (hdr->hdr_fin.hdr_match.hdr_common.hdr_flags != sendreq->req_restartseq) { - uint16_t seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received on sender: dropping because old " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - seq, hdr->hdr_fin.hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, - hdr->hdr_fin.hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, - hdr->hdr_fin.hdr_match.hdr_src, - sendreq->req_restartseq, - hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - (void *)sendreq); - return true; - } - } else { - /* We can get here if the descriptor has been reused, but - * not as an RDMA descriptor. In that case, the callback - * function has been set to something else. Clearly the - * descriptor we are interested is gone, so just drop the - * FIN message. */ - opal_output_verbose(5, mca_pml_bfo_output, - "FIN: received: dropping because descriptor has been reused " - "PML=%d CTX=%d SRC=%d RQS=%d rdma->des_flags=%d", - hdr->hdr_fin.hdr_match.hdr_seq, hdr->hdr_fin.hdr_match.hdr_ctx, - hdr->hdr_fin.hdr_match.hdr_src, hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, - rdma->des_flags); - return true; - } - } - return false; -} - -/** - * Repost a FIN message if we get an error on the completion event. - */ -void mca_pml_bfo_repost_fin(struct mca_btl_base_descriptor_t* des) { - /* In the error case, we will repost the FIN message. I had - * considered restarting the request. The problem is that the - * request may be already complete when we detect that a FIN - * message got an error on its completion event. For example, with - * the PUT protocol, if the RDMA writes succeed and all the data - * has been sent, then the request is marked as complete and can be - * freed. Therefore, an error on the FIN message has no request to - * refer back to. So, we will just repost it. However, we are also - * faced with the case where the FIN message has an error but it - * actually makes it to the other side. In that case we are now - * sending a FIN message to a non-existent request on the receiver - * side. To handle that, we have added the match information to - * the FIN message. That way, we can check on the receiving side - * to ensure that it is pointing to a valid request. */ - mca_pml_bfo_fin_hdr_t* hdr; - mca_bml_base_endpoint_t* bml_endpoint; - ompi_proc_t *proc; - mca_bml_base_btl_t* bml_btl; - - proc = (ompi_proc_t*) des->des_cbdata; - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - hdr = (mca_pml_bfo_fin_hdr_t*)des->des_local->seg_addr.pval; - - opal_output_verbose(20, mca_pml_bfo_output, - "REPOST: BFO_HDR_TYPE_FIN: seq=%d,myrank=%d,peer=%d,hdr->hdr_fail=%d,src=%d", - hdr->hdr_match.hdr_seq, OMPI_PROC_MY_NAME->vpid, OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid, - hdr->hdr_fail, hdr->hdr_match.hdr_src); - - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - - /* Reconstruct the fin for sending on the other BTL */ - mca_pml_bfo_send_fin(proc, bml_btl, - hdr->hdr_des, MCA_BTL_NO_ORDER, - hdr->hdr_fail, hdr->hdr_match.hdr_seq, - hdr->hdr_match.hdr_common.hdr_flags, - hdr->hdr_match.hdr_ctx, hdr->hdr_match.hdr_src); - return; -} - -/** - * This function is called when a RNDV or RGET is received with the - * FLAGS_RESTART flag set. This means this message already has a - * receive request already associated with it. - */ -mca_pml_bfo_recv_request_t* mca_pml_bfo_get_request(mca_pml_bfo_match_hdr_t *hdr) { - mca_pml_bfo_recv_request_t *match = NULL; - mca_pml_bfo_rendezvous_hdr_t * rhdr = (mca_pml_bfo_rendezvous_hdr_t *) hdr; - match = (mca_pml_bfo_recv_request_t *) rhdr->hdr_dst_req.pval; - - /* Check to see if we have received a duplicate RNDV (or RGET). This can - * occur because we got an error when we reposted the RNDV. Therefore, - * we make sure that the request has not completed from underneath us - * and been recycled. Secondly, make sure we are not getting it a - * second time for the same request. */ - if ((rhdr->hdr_match.hdr_ctx != match->req_recv.req_base.req_comm->c_contextid) || - (rhdr->hdr_match.hdr_src != match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE) || - (rhdr->hdr_match.hdr_seq != (uint16_t)match->req_msgseq) || - (rhdr->hdr_restartseq == match->req_restartseq)) { - if (hdr->hdr_common.hdr_type == MCA_PML_BFO_HDR_TYPE_RNDV) { - opal_output_verbose(20, mca_pml_bfo_output, - "RNDV: received with RESTART flag: duplicate, dropping " - "PML:exp=%d,act=%d RQS=%d, src_req=%p, dst_req=%p, peer=%d", - match->req_msgseq, rhdr->hdr_match.hdr_seq, match->req_restartseq, - match->remote_req_send.pval, (void *)match, - match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } else { - opal_output_verbose(20, mca_pml_bfo_output, - "RGET: received with RESTART flag: duplicate, dropping " - "PML:exp=%d,act=%d RQS=%d, src_req=%p, dst_req=%p, peer=%d", - match->req_msgseq, rhdr->hdr_match.hdr_seq, match->req_restartseq, - match->remote_req_send.pval, (void *)match, - match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } - return NULL; - } - - mca_pml_bfo_recv_request_reset(match); - if (hdr->hdr_common.hdr_type == MCA_PML_BFO_HDR_TYPE_RNDV) { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDV: received with RESTART flag: restarting recv, " - "PML:exp=%d,act=%d RQS(new)=%d, src_req=%p, dst_req=%p, peer=%d", - match->req_msgseq, rhdr->hdr_match.hdr_seq, match->req_restartseq, - match->remote_req_send.pval, (void *)match, - match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "RGET: received with RESTART flag: restarting recv, " - "PML:exp=%d,act=%d RQS(new)=%d, src_req=%p, dst_req=%p, peer=%d", - match->req_msgseq, rhdr->hdr_match.hdr_seq, match->req_restartseq, - match->remote_req_send.pval, (void *)match, - match->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } - return match; -} - -/** - * Callback for when a RNDVRESTARTNOTIFY message is received. A - * RNDVRESTARTNOTIFY message is sent from the sender to the receiver - * telling the receiver that the message is going to be started over. - * The receiver first makes sure that the request being pointed to is - * still valid. If it is not, that means the receiver must have - * completed the request and therefore we need to send a NACK back to - * the sender. The receiver then makes sure this is not a duplicate - * message. If it is a duplicate, it will just drop it. Otherwise, - * it will then send a RNDVRESTARTACK message if there are no - * outstanding events on the receiver. Otherwise, it will just change - * the state of the request and wait for another event to send the - * RNDVRESTARTACK to the sender. - */ -void mca_pml_bfo_recv_frag_callback_rndvrestartnotify(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_recv_request_t* recvreq; - ompi_proc_t* ompi_proc; - ompi_process_name_t orte_proc; - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY); - recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_restart.hdr_dst_req.pval; - - /* Check to see if the receive request is still valid. If the - * request is recycled, that means the original request must have - * completed and we therefore need to send a NACK back to the sender. - * Note that when the request is gone, we need to pull some information - * off the header so that we can figure out where to send the NACK - * message back to. */ - if ((hdr->hdr_match.hdr_ctx != recvreq->req_recv.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE) || - (hdr->hdr_match.hdr_seq != (uint16_t)recvreq->req_msgseq)) { - orte_proc.jobid = hdr->hdr_restart.hdr_jobid; - orte_proc.vpid = hdr->hdr_restart.hdr_vpid; - - ompi_proc = ompi_proc_find(&orte_proc); - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: received: does not match request, sending NACK back " - "PML:req=%d,hdr=%d CTX:req=%d,hdr=%d SRC:req=%d,hdr=%d " - "RQS:req=%d,hdr=%d src_req=%p, dst_req=%p, peer=%d, hdr->hdr_jobid=%d, " - "hdr->hdr_vpid=%d, proc_hostname=%s", - (uint16_t)recvreq->req_msgseq, hdr->hdr_match.hdr_seq, - recvreq->req_recv.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_match.hdr_src, recvreq->req_restartseq, - hdr->hdr_restart.hdr_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_restart.hdr_jobid, hdr->hdr_restart.hdr_vpid, - (NULL == ompi_proc->super.proc_hostname) ? "unknown" : ompi_proc->super.proc_hostname); - mca_pml_bfo_recv_request_rndvrestartnack(des, ompi_proc, false); - return; - } - - /* We know that we have the correct receive request. Make sure this is not - * a duplicate RNDVRESTARTNOTIFY on this request. */ - if (hdr->hdr_restart.hdr_restartseq == recvreq->req_restartseq) { - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: received duplicate: dropping RNDVRESTARTNOTIFY " - "message PML:req=%d,hdr=%d CTX:req=%d,hdr=%d SRC:req=%d,hdr=%d " - "RQS:req=%d,hdr=%d src_req=%p, dst_req=%p, peer=%d", - (uint16_t)recvreq->req_msgseq, hdr->hdr_match.hdr_seq, - recvreq->req_recv.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_match.hdr_src, recvreq->req_restartseq, - hdr->hdr_restart.hdr_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - return; - } - - /* Increment restart number. */ - recvreq->req_restartseq++; - recvreq->req_errstate |= RECVREQ_RNDVRESTART_RECVED; - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: received: outstanding receive events=%d, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - if (0 == recvreq->req_events) { - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, - OMPI_SUCCESS, btl); - } - - return; -} - -/** - * Callback for when a RNDVRESTARTACK message is received. This - * message is sent from the receiver to the sender to acknowledge - * the receipt of the RNDVRESTARTNOTIFY message. At this point, - * the sender can reset the send request and restart the message. - */ -void mca_pml_bfo_recv_frag_callback_rndvrestartack(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_restart.hdr_src_req.pval; - - /* Check to see if we have received a duplicate message. The - * first three comparisons make sure that we are not looking at a - * recycled request. The last check makes sure we are not getting - * a duplicate message for this specific request. All of this is - * needed because the receiver might get an error and repost the - * RNDVRESTARTACK message, but the RNDVRESTARTACK was actually received. */ - if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_peer) || - (hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence) || - (hdr->hdr_restart.hdr_restartseq != sendreq->req_restartseq)) { - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTACK: received: does not match request, dropping " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d EXP:exp=%d,act=%d " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, hdr->hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - return; - } - - sendreq->req_restart++; - if (2 == sendreq->req_restart) { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTACK: received: restarting send " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - hdr->hdr_match.hdr_seq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - mca_pml_bfo_send_request_restart(sendreq, false, 0); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTACK received: waiting for RNDVRESTARTNOTIFY completion " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - hdr->hdr_match.hdr_seq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - } - return; -} - - -/** - * Callback for when a RECVERRNOTIFY message is received. This message - * is sent from the receiver to the sender and tells the sender that - * the receiver has seen an error. This will trigger the sender - * to start the request restart sequence. - */ -void mca_pml_bfo_recv_frag_callback_recverrnotify(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_restart.hdr_src_req.pval; - - /* First make sure that this message is pointing to a valid request. - * This can be determined if the communicator context, the source of - * the message, and the MPI sequence number all match. */ - if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_peer) || - (hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence)) { - opal_output_verbose(20, mca_pml_bfo_output, - "RECVERRNOTIFY: received: does not match request, dropping " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d RQS:exp=%d,act=%d " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, hdr->hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - return; - } - - /* If a good ACK was never received, then the first ACK received - * might be a RECVERRNOTIFY message. In that case, the sendreq does not - * have a valid req_recv pointer in it. Therefore, check for that - * case and update the field in the sendreq if necessary. */ - if (NULL == sendreq->req_recv.pval) { - sendreq->req_recv = hdr->hdr_restart.hdr_dst_req; - } - - /* Now check to see a restart needs to be issued. The request - * sequence number in the header is compared against the current - * request sequence number in the send request. If the header - * sequence number is greater than or equal to the send request - * number, then a rndvrestartnotify is issued. There are some cases - * where a few extra rndvrestartnotifys are issued. That is OK as - * it will all work itself out. The idea is to prevent many - * restarts unnecessarily. This still allows multiple restarts to - * happen. It could be that sometime later another error occurs - * which initiates a restart. That is OK as it will have the new - * sequence number and all is well. */ - if (hdr->hdr_restart.hdr_restartseq >= sendreq->req_restartseq) { - assert(sendreq->req_send.req_base.req_ompi.req_state == OMPI_REQUEST_ACTIVE); - sendreq->req_error++; - opal_output_verbose(30, mca_pml_bfo_output, - "RECVERRNOTIFY: received: sendreq has error, outstanding events=%d, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - - if (0 == sendreq->req_events) { - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, - MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, - OMPI_SUCCESS, btl); - } - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "RECVERRNOTIFY: received: error has already been noted, ignoring " - "PML:exp=%d,act=%d RQS:exp=%d,act=%d src_req=%p, dst_req=%p, peer=%d", - sendreq->req_restartseq, hdr->hdr_restart.hdr_restartseq, - (uint16_t)sendreq->req_send.req_base.req_sequence, hdr->hdr_match.hdr_seq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - } - return; -} - -/** - * Callback for when a RNDVRESTARTNACK message is received. This message - * is sent from the receiver to the sender and tells the sender that - * the receiver has already completed the message and there is nothing - * else to be done. The sender should then just make the send request - * complete. - */ -void mca_pml_bfo_recv_frag_callback_rndvrestartnack(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_restart.hdr_src_req.pval; - - /* Not convinced a RNDVRESTARTNACK that does not match a request can - * happen, but have the check in here anyways for now */ - if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_peer) || - (hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence) || - (hdr->hdr_restart.hdr_restartseq != sendreq->req_restartseq)) { - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTNACK: received: does not match request, dropping " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d EXP:exp=%d,act=%d " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, hdr->hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_restart.hdr_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - return; - } - - opal_output_verbose(20, mca_pml_bfo_output, - "RNDVRESTARTNACK: received: marking send request as complete " - "PML=%d CTX=%d SRC=%d EXP=%d " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_peer, sendreq->req_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - /* Mark the sender complete. This data exchange is over. */ - send_request_pml_complete(sendreq); - return; -} - - -/** - * This function gets called when failover is enabled and an error - * occurs during the rendezvous protocol. A message is sent to the - * receiving side notifying the request that the communication is - * going to be starting over. However, none of the information in the - * send request is reset yet, so that any in flight fragments can - * still find a home. Information in the send request gets reset when - * the completion event for this send occurs AND an ACK has been - * received back from the receiver. - */ -void mca_pml_bfo_send_request_rndvrestartnotify(mca_pml_bfo_send_request_t* sendreq, - bool repost, mca_btl_base_tag_t tag, - int status, mca_btl_base_module_t* btl) -{ - mca_btl_base_descriptor_t* des; - mca_pml_bfo_restart_hdr_t* restart; - int rc; - mca_bml_base_btl_t* bml_btl; - ompi_proc_t* proc = (ompi_proc_t*)sendreq->req_send.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - /* If this message is not a repost, then update the sequence number. */ - if (!repost) { - /* Bump up the rendezvous request sequence number. */ - sendreq->req_restartseq++; - } - - assert(0 == sendreq->req_events); - assert(0 != bml_endpoint->btl_eager.arr_size); - - /* In the case that this is started because the receiver has - * sent us a message, then attempt to use a different BTL than the - * error message was received on. This may potentially tickle the - * error sooner if this side has not seen it yet. */ - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - if (bml_btl->btl == btl) { - /* If there is more than one BTL left, then we will get a - * different one. If there is only one, we will just get - * the same one back again. That is OK. */ - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - } - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_restart_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_output(0, "%s:%d Our of resources, cannot proceed", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* fill out header */ - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - restart->hdr_match.hdr_common.hdr_flags = 0; - restart->hdr_match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY; - restart->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - restart->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - restart->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - restart->hdr_restartseq = sendreq->req_restartseq; - restart->hdr_src_req.pval = sendreq; - restart->hdr_dst_req = sendreq->req_recv; - restart->hdr_dst_rank = sendreq->req_send.req_base.req_peer; /* Needed for NACKs */ - restart->hdr_jobid = OMPI_PROC_MY_NAME->jobid; - restart->hdr_vpid = OMPI_PROC_MY_NAME->vpid; - - bfo_hdr_hton(restart, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, proc); - - /* initialize descriptor */ - des->des_cbfunc = mca_pml_bfo_rndvrestartnotify_completion; - - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: sent: PML=%d, RQS(new)=%d, CTX=%d, SRC=%d, " - "src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_restartseq, - restart->hdr_match.hdr_ctx, restart->hdr_match.hdr_src, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY); - if( OPAL_UNLIKELY( rc < 0 ) ) { - opal_output(0, "[%s:%d] Cannot send rndvrestartnotify message", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - -} - -/** - * This function restarts a RNDV send request. When this is called, - * all the fields in the send request are reset and the send is - * started over. The sendreq->req_restartseq will be non-zero which will - * trigger a special flag in the RNDV header which indicates the match - * has already happened on the receiving side. - */ -void mca_pml_bfo_send_request_restart(mca_pml_bfo_send_request_t* sendreq, - bool repost, mca_btl_base_tag_t tag) -{ - size_t offset = 0; - opal_list_item_t *first_item; - opal_list_item_t *last_item; - mca_bml_base_endpoint_t* endpoint; - size_t i; - - /* If the tag is something valid, it was a repost. We could also - * check the repost field as well. Maybe I can drop the - * repost and have the tag double as it. */ - switch (tag) { - case MCA_PML_BFO_HDR_TYPE_RNDV: - opal_output_verbose(30, mca_pml_bfo_output, - "RNDV: completion failed, reset and repost: PML=%d, RQS=%d, " - "CTX=%d, SRC=%d, src_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank, (void *)sendreq, - sendreq->req_send.req_base.req_peer); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - opal_output_verbose(30, mca_pml_bfo_output, - "RGET: completion failed, reset and repost: PML=%d, RQS=%d, " - "CTX=%d, SRC=%d, src_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank, (void *)sendreq, - sendreq->req_send.req_base.req_peer); - break; - default: - break; - } - - /* Return mpool resources, they get reacquired when request starts over. */ - mca_pml_bfo_free_rdma_resources(sendreq); - - /* Release any memory in use if this is a buffered send */ - if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED && - sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) { - mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); - } - - /* Clear out any unsent send ranges. Recreate the functionality - * from the get_send_range() and get_next_send_range() functions. */ - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - first_item = opal_list_get_begin(&sendreq->req_send_ranges); - last_item = opal_list_get_last(&sendreq->req_send_ranges); - while (first_item != last_item) { - opal_list_remove_item(&sendreq->req_send_ranges, last_item); - OMPI_FREE_LIST_RETURN_MT(&mca_pml_bfo.send_ranges, (ompi_free_list_item_t *)last_item); - last_item = opal_list_get_last(&sendreq->req_send_ranges); - } - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); - - /* Reset the converter to the beginning. */ - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &offset); - - /* Bump up internal sequence number to handle possible duplicate - * RNDV messages. In the case of reposting a RNDV message, do not - * increment the value. That way, a duplicate message can be - * detected. */ - if (!repost) { - sendreq->req_restartseq++; - } - - /* This code here is essentially the same is mca_pml_bfo_send_request_start() - * but with a few modifications since we are restarting the request, not - * starting entirely from scratch. */ - endpoint = (mca_bml_base_endpoint_t*)sendreq->req_send.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - sendreq->req_endpoint = endpoint; - sendreq->req_state = 0; - sendreq->req_lock = 0; - sendreq->req_pipeline_depth = 0; - sendreq->req_bytes_delivered = 0; - sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE; - - /* Note that we do not reset the following three items. - * They stay with their original values. - * sendreq->req_send.req_base.req_sequence - * sendreq->req_restartseq - * sendreq->req_recv.pval - */ - sendreq->req_restart = 0; /* reset in case we restart again */ - sendreq->req_error = 0; /* clear error state */ - sendreq->req_events = 0; /* clear events, probably 0 anyways */ - - MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); - - for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { - mca_bml_base_btl_t* bml_btl; - int rc; - - /* select a btl */ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl); - if(OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc)) - return; - } - add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true); -} - -/** - * This function will repost a match fragment. This function has to - * handle the case where there may not be a request associated with - * the fragment and just use the information in the fragment to - * repost the send. - */ -void mca_pml_bfo_repost_match_fragment(struct mca_btl_base_descriptor_t* des) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - struct mca_bml_base_endpoint_t* endpoint; - int rc; - size_t offset = 0; - - /* At this point a determination has to be made whether the - * BFO_HDR_TYPE_MATCH fragment was sent via the sendi interface or - * via the regular send interface. This is important because if it - * was sent via the sendi interface, then the request associated - * with it has already been completed and released. This can be - * determined by looking at the des->des_flags field of the - * descriptor. If the ALWAYS_CALLBACK flag is set then it is known - * that there is a valid send request associated with the fragment - * and it can be used to extricate information. If ALWAYS_CALLBACK - * is not set, then the endpoint information is in the callback - * data field and where to resend the fragment can be determined - * from the fragment. */ - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - endpoint = sendreq->req_endpoint; - opal_output_verbose(30, mca_pml_bfo_output, - "MATCH: repost: src_req=%p", - (void *)sendreq); - } else { - endpoint = des->des_cbdata; - opal_output_verbose(30, mca_pml_bfo_output, - "MATCH: repost: des=%p (sendi fragment)", - (void *)des); - } - - assert(0 != endpoint->btl_eager.arr_size); - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - /* Reset the converter to the beginning if the message is - * not a zero-length message. In the case of zero-length - * message, the convertor is not being used. */ - if (0 != sendreq->req_send.req_bytes_packed) { - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &offset); - } - rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl); - if (OMPI_SUCCESS == rc) { - return; - } else if (OMPI_ERR_OUT_OF_RESOURCE == rc) { - opal_output_verbose(30, mca_pml_bfo_output, - "Warning: delaying reposting of BFO_HDR_TYPE_MATCH, btls=%d", - (int)sendreq->req_endpoint->btl_eager.arr_size); - add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true); - return; - } else { - opal_output(0, "%s:%d FATAL ERROR, cannot repost BFO_HDR_TYPE_MATCH", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } else { - /* No send request available so alloc and repost explicitly */ - mca_btl_base_descriptor_t* newdes = NULL; - mca_btl_base_segment_t* oldseg; - mca_btl_base_segment_t* newseg; - - oldseg = des->des_local; - /* The alloc routine must be called with the MCA_BTL_NO_ORDER - * flag so that the allocation routine works. The allocation - * will fill in the order flag in the descriptor. */ - mca_bml_base_alloc( bml_btl, &newdes, - MCA_BTL_NO_ORDER, - oldseg->seg_len, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if (OPAL_UNLIKELY(NULL == newdes)) { - opal_output(0, "%s:%d FATAL ERROR, cannot repost BFO_HDR_TYPE_MATCH", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - newseg = newdes->des_local; - /* Copy over all the data that is actually sent over the wire */ - memcpy(newseg->seg_addr.pval, oldseg->seg_addr.pval, oldseg->seg_len); - newseg->seg_len = oldseg->seg_len; - - /* This call will either return OMPI_SUCCESS or OMPI_ERROR. The - * OMPI_SUCCESS only says that the send request can be freed. - * It may be that the message was queued up in the BTL. */ - rc = mca_bml_base_send(bml_btl, newdes, MCA_PML_BFO_HDR_TYPE_MATCH); - - /* Some BTLs will set the CALLBACK flag but we do not want that - * as there is no longer a request associated with this descriptor. - * Therefore, always make sure it is cleared. */ - newdes->des_flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - - if( OPAL_LIKELY( rc >= 0 )) { - /* Just let the normal flow of data free whatever needs - * to be freed */ - return; - } else { - opal_output(0, "%s:%d FATAL ERROR, cannot repost BFO_HDR_TYPE_MATCH", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } - /* No need to free any descriptors. The BTLs take care of it since - * we originally allocated with MCA_BTL_DES_FLAGS_BTL_OWNERSHIP. */ -} - -/** - * Completion callback for rndvrestartnotify completion event. If the - * RNDVRESTARTACK has already been received, then reset and restart. - * Otherwise, just update the state and let the RNDVRESTARTACK trigger - * the reset and restart. - */ -void -mca_pml_bfo_rndvrestartnotify_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status) -{ - mca_pml_bfo_restart_hdr_t* restart; - mca_pml_bfo_send_request_t* sendreq; - - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - sendreq = (mca_pml_bfo_send_request_t*) restart->hdr_src_req.pval; - - /* Need to resend this message in the case that it fails */ - if( OPAL_UNLIKELY((OMPI_SUCCESS != status))) { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: completion failed: repost " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - /* Repost the message and indicate it is a repost, not a new one. No need - * to check the req_events as this is the only possible outstanding send - * event when we have posted this message. We also know the sendreq is still - * available because nothing can proceed until this completion event happens - * successfully as we track the req_restart value. */ - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, true, - MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, - status, btl); - return; - } - - /* The req_restart value is incremented to indicate completion of - * the RNDVRESTARTNOTIFY message. Then (typically) the arrival of the - * ACK message will cause the request to reset and restart. Need to - * make sure that RNDVRESTARTNOTIFY callback has been called as well as - * the ACK back from the receiver prior to resetting and restarting - * the request. This is needed in case we get an error on the - * RNDVRESTARTNOTIFY message, but it actually makes it over. We want - * to make sure the send request has not restarted yet. So, keep a - * counter that counts to 2. */ - sendreq->req_restart++; - if (2 == sendreq->req_restart) { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: completion: restarting request " - "PML=%d, RQS=%d, CTX=%d, src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_recv.pval, (void *)sendreq, - sendreq->req_send.req_base.req_peer); - mca_pml_bfo_send_request_restart(sendreq, false, 0); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNOTIFY: completion: waiting for ack " - "PML=%d, RQS=%d, CTX=%d, src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_recv.pval, (void *)sendreq, - sendreq->req_send.req_base.req_peer); - } -} - -/** - * This function is called when an error is detected on a completion - * event on the receiving side. This can come from a ACK, PUT, RDMA - * read (GET) or RECVERRNOTIFY completion event. When this happens, check - * the state of the request and decide if the sender needs be notified - * that a problem was seen. If no RECVERRNOTIFY message has been sent and - * no RNDVRESTARTNOTIFY has been received from the sender, then send a - * message telling the sender an error was seen. - */ -void mca_pml_bfo_recv_request_recverrnotify(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_tag_t tag, int status) -{ - mca_btl_base_descriptor_t* des; - mca_pml_bfo_restart_hdr_t* restart; - ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - mca_bml_base_btl_t* bml_btl; - int rc; - - assert(0 != bml_endpoint->btl_eager.arr_size); - - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_restart_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_output(0, "%s:%d Out of resources, cannot proceed", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* fill out header */ - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - restart->hdr_match.hdr_common.hdr_flags = 0; - restart->hdr_match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY; - restart->hdr_match.hdr_ctx = recvreq->req_recv.req_base.req_comm->c_contextid; - restart->hdr_match.hdr_src = recvreq->req_recv.req_base.req_comm->c_my_rank; - restart->hdr_match.hdr_seq = (uint16_t)recvreq->req_msgseq; - restart->hdr_restartseq = recvreq->req_restartseq; - restart->hdr_src_req = recvreq->remote_req_send; - restart->hdr_dst_req.pval = recvreq; - - bfo_hdr_hton(restart, MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, proc); - - /* initialize descriptor */ - des->des_cbfunc = mca_pml_bfo_recv_restart_completion; - - opal_output_verbose(30, mca_pml_bfo_output, - "RECVERRNOTIFY: sending to sender, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d, btl=%p", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, - (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - (void *)bml_btl->btl); - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY); - if( OPAL_UNLIKELY( rc < 0 ) ) { - opal_output(0, "[%s:%d] Cannot send recverrnotify message", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - /* Prevent future error messages on this request */ - recvreq->req_errstate |= RECVREQ_RECVERRSENT; -} - -/** - * This function is called when it may be time to send a RNDVRESTARTACK - * message back to the sending side. This can happen because we - * received a RNDVRESTARTNOTIFY message from the sender. This can - * also happen if we have noticed that the request has received the - * RNDVRESTARTNOTIFY message, but has not yet sent out the RNDVRESTARTACK - * because there were still some pending receive events on the request. - * That means we can enter this routine from a completion event on a ACK, - * PUT, or RDMA read as well as from the receipt of a RNDVRESTARTNOTIFY - * message. If all is good, we sent the RNDVRESTARTACK message back to - * the sender. Then sometime later a message will arrive telling us - * to reset and restart the receive request. - */ -void mca_pml_bfo_recv_request_rndvrestartack(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_tag_t tag, int status, - mca_btl_base_module_t* btl) -{ - mca_btl_base_descriptor_t* des; - mca_pml_bfo_restart_hdr_t* restart; - ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - mca_bml_base_btl_t* bml_btl; - int rc; - - assert((recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED) == RECVREQ_RNDVRESTART_RECVED); - assert((recvreq->req_errstate & RECVREQ_RNDVRESTART_ACKED) == 0); - assert(0 != bml_endpoint->btl_eager.arr_size); - - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - - /* Attempt to use a different BTL than the error message was - * received on. This may potentially tickle the error sooner if - * this side has not seen it yet. */ - if (bml_btl->btl == btl) { - /* If there is more than one BTL left, then we will get a - * different one. If there is only one, we will just get - * the same one back again. That is OK. */ - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - } - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_restart_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_output(0, "%s:%d Out of resources, cannot proceed", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* fill out header */ - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - restart->hdr_match.hdr_common.hdr_flags = 0; - restart->hdr_match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK; - restart->hdr_match.hdr_ctx = recvreq->req_recv.req_base.req_comm->c_contextid; - restart->hdr_match.hdr_src = recvreq->req_recv.req_base.req_comm->c_my_rank; - restart->hdr_match.hdr_seq = (uint16_t)recvreq->req_msgseq; - restart->hdr_restartseq = recvreq->req_restartseq; - restart->hdr_src_req = recvreq->remote_req_send; - restart->hdr_dst_req.pval = recvreq; - - bfo_hdr_hton(restart, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK, proc); - - /* initialize descriptor */ - des->des_cbfunc = mca_pml_bfo_recv_restart_completion; - des->des_cbdata = (void *)proc; - - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTACK: due to PML tag=%d completion, sending to " - "sender, PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, " - "peer=%d, btl=%p", - tag, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, status, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - (void *)bml_btl->btl); - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK); - if( OPAL_UNLIKELY( rc < 0 ) ) { - opal_output(0, "[%s:%d] Cannot send rndvrestartack message", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - /* Move to the next state so we do not send anymore ACKs */ - recvreq->req_errstate &= ~RECVREQ_RNDVRESTART_RECVED; - recvreq->req_errstate |= RECVREQ_RNDVRESTART_ACKED; -} - -/** - * Called after the receipt of a RNDVRESTARTNOTIFY message to a request - * that no longer matches. This can happen if the sender detected an - * error, but the receiver actually received all the data. Therefore - * send a NACK back instead of the ACK so that the sender can complete - * its request. This happens very rarely. Note that we need to make - * use of the hdr_dst_rank that we received from the notify message. - * This is so the sending side make sure the message matches a valid - * request on the sending side. - */ -void mca_pml_bfo_recv_request_rndvrestartnack(mca_btl_base_descriptor_t* olddes, - ompi_proc_t* ompi_proc, bool repost) -{ - mca_btl_base_segment_t* segments; - mca_pml_bfo_restart_hdr_t* hdr; /* hdr of NOTIFY message */ - mca_pml_bfo_restart_hdr_t* nack; /* hdr of NACK message */ - mca_btl_base_descriptor_t* des; - mca_bml_base_endpoint_t* bml_endpoint; - mca_bml_base_btl_t* bml_btl; - int rc; - - if (repost) { - /* In the case where we are reposting the NACK, the information - * is in the src area, since we are reposting a send. In addition, - * we get the ompi_proc from the old descriptor. */ - ompi_proc = olddes->des_cbdata; - } - - segments = olddes->des_local; - hdr = (mca_pml_bfo_restart_hdr_t*)segments->seg_addr.pval; - - bml_endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - assert(0 != bml_endpoint->btl_eager.arr_size); - bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_restart_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_output(0, "%s:%d Out of resources, cannot proceed", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* fill out header */ - nack = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - nack->hdr_match.hdr_common.hdr_flags = 0; - nack->hdr_match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK; - nack->hdr_match.hdr_ctx = hdr->hdr_match.hdr_ctx; - nack->hdr_match.hdr_src = hdr->hdr_dst_rank; /* Receiver rank */ - nack->hdr_match.hdr_seq = hdr->hdr_match.hdr_seq; - nack->hdr_restartseq = hdr->hdr_restartseq; - nack->hdr_src_req = hdr->hdr_src_req; - nack->hdr_dst_req.pval = 0; - - bfo_hdr_hton(nack, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK, ompi_proc); - - /* Initialize descriptor. Save away ompi_proc in case we need - * to respost this fragmnet. */ - des->des_cbfunc = mca_pml_bfo_recv_restart_completion; - des->des_cbdata = ompi_proc; - - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNACK: sending to sender, " - "PML=%d, RQS=%d, CTX=%d, SRC=%d, peer=%d", - nack->hdr_match.hdr_seq, nack->hdr_restartseq, - nack->hdr_match.hdr_ctx, nack->hdr_match.hdr_src, - OMPI_CAST_RTE_NAME(&ompi_proc->super.proc_name)->vpid); - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK); - if( OPAL_UNLIKELY( rc < 0 ) ) { - opal_output(0, "[%s:%d] Cannot send rndvrestartnack message", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } -} - - -/** - * Reset all the receive request fields to match what a request - * looks like when it is first started. This gets called when - * the rendezvous/rget message is being restarted. - */ -void mca_pml_bfo_recv_request_reset(mca_pml_bfo_recv_request_t* match) { - int i; - - assert(true != match->req_recv.req_base.req_pml_complete); - - /* Free up any resources that were reserved for this receive. This - * was copied from the receive completion code. */ - for(i = 0; i < (int)match->req_rdma_cnt; i++) { - mca_mpool_base_registration_t* btl_reg = match->req_rdma[i].btl_reg; - if( NULL != btl_reg && btl_reg->mpool != NULL) { - btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg ); - } - } - match->req_rdma_cnt = 0; - - /* This code is mostly copied from mca_pml_bfo_recv_req_start. - * Note 1: Leave req_bytes_expected as the original value. No - * need to adjust this as it is set when convertor is created. - * Note 2: Leave req_bytes_delivered as the original value. - * This is created when the convertor is created and represents - * the expected bytes from the user. */ - assert(0 == match->req_events); - match->req_errstate = 0; - match->req_lock = 0; - match->req_pipeline_depth = 0; - match->req_bytes_received = 0; - match->req_rdma_idx = 0; - match->req_rdma_offset = 0; - match->req_send_offset = 0; - match->req_pending = false; - match->req_ack_sent = false; - match->req_restartseq++; - - /* These really should not need to be set, but this matches some - * of the initialization within MCA_PML_BASE_RECV_START. */ - match->req_recv.req_base.req_pml_complete = false; - match->req_recv.req_base.req_ompi.req_complete = false; - match->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; - - /* Reset the convertor */ - opal_convertor_set_position(&match->req_recv.req_base.req_convertor, - &match->req_rdma_offset); - return; -} - -/* - * Completion callback for RNDVRESTARTACK, RNDVRESTARTNACK and RECVERRNOTIFY. - */ -void mca_pml_bfo_recv_restart_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) { - mca_pml_bfo_common_hdr_t* common = des->des_local->seg_addr.pval; - mca_pml_bfo_restart_hdr_t* restart; /* RESTART header */ - mca_pml_bfo_recv_request_t* recvreq; - - switch (common->hdr_type) { - case MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK: - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - recvreq = (mca_pml_bfo_recv_request_t*) restart->hdr_dst_req.pval; - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTACK: completion failed: try again " - "PML:req=%d,hdr=%d RQS:req=%d,hdr=%d CTX:req=%d,hdr=%d " - "src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, restart->hdr_match.hdr_seq, - recvreq->req_restartseq, restart->hdr_restartseq, - recvreq->req_recv.req_base.req_comm->c_contextid, - restart->hdr_match.hdr_ctx, - recvreq->remote_req_send.pval, - (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - /* Adjust the states back to avoid assert errors */ - recvreq->req_errstate &= ~RECVREQ_RNDVRESTART_ACKED; - recvreq->req_errstate |= RECVREQ_RNDVRESTART_RECVED; - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK, - status, btl); - break; - case MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK: - opal_output_verbose(30, mca_pml_bfo_output, - "RNDVRESTARTNACK: completion failed: try again " - "des=%p ", (void *)des); - /* Just blast it again. No request associated with it. */ - mca_pml_bfo_recv_request_rndvrestartnack(des, NULL, true); - break; - case MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY: - restart = (mca_pml_bfo_restart_hdr_t*)des->des_local->seg_addr.pval; - recvreq = (mca_pml_bfo_recv_request_t*) restart->hdr_dst_req.pval; - /* With just two BTLs, this should never happen as we are - * typically sending the RECVERRNOTIFY message on the - * working BTL. But, just in case, if we get an error, - * send it again. */ - opal_output_verbose(30, mca_pml_bfo_output, - "RECVERRNOTIFY: completion failed: try again, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, - (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, - status); - break; - default: - opal_output(0, "[%s:%d] Unknown callback error", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } -} - -/* - * Remove a btl for future communication on an endpoint. - */ -void mca_pml_bfo_map_out_btl(struct mca_btl_base_module_t* btl, - ompi_proc_t *errproc, char *btlname) -{ - mca_bml_base_endpoint_t* ep; - bool remove = false; - int i; - - ep = (mca_bml_base_endpoint_t*)errproc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - /* The bml_del_proc_btl function does not indicate if it - * actually removed a btl, so let me check up front. This is - * done so that we can only print out messages when a btl is - * actually going to be removed. These arrays are small so it - * is OK to walk through all of them even though it may be - * redundant. */ - for( i = 0; i < (int)ep->btl_eager.arr_size; i++ ) { - if( ep->btl_eager.bml_btls[i].btl == btl ) { - remove = true; - } - } - for( i = 0; i < (int)ep->btl_send.arr_size; i++ ) { - if( ep->btl_send.bml_btls[i].btl == btl ) { - remove = true; - } - } - for( i = 0; i < (int)ep->btl_rdma.arr_size; i++ ) { - if( ep->btl_rdma.bml_btls[i].btl == btl ) { - remove = true; - } - } - - if (true == remove) { - mca_bml.bml_del_proc_btl(errproc, btl); - - opal_output_verbose(10, mca_pml_bfo_output, - "BTL %s error: rank=%d mapping out %s " - "to rank=%d on node=%s \n", - btl->btl_component->btl_version.mca_component_name, - OMPI_PROC_MY_NAME->vpid, - btlname, OMPI_CAST_RTE_NAME(&errproc->super.proc_name)->vpid, - (NULL == errproc->super.proc_hostname) ? "unknown" : errproc->super.proc_hostname); - - /* Need to search for any pending packets associated - * with this endpoint and remove them. We may also - * have to restarts depending on the state of the - * requests. */ - mca_pml_bfo_error_pending_packets(btl, ep); - - if ((ep->btl_eager.arr_size == 0) && - (ep->btl_send.arr_size == 0) && - (ep->btl_rdma.arr_size == 0)) { - opal_output(0, "%s:%d: No more interfaces, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } -} - -void mca_pml_bfo_failover_error_handler(struct mca_btl_base_module_t* btl, - int32_t flags, ompi_proc_t *errproc, char *btlname) -{ - ompi_proc_t** procs; - size_t p, num_procs; - - /* If we are in here, we know that the we were called - * with the flags == MCA_BTL_ERROR_FLAGS_NONFATAL so no - * need to check it in here. */ - assert(flags & MCA_BTL_ERROR_FLAGS_NONFATAL); - - procs = ompi_proc_all(&num_procs); - - if(NULL == procs) { - opal_output(0, "%s:%d: Out of memory, giving up.", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - if (NULL == btlname) { - btlname = "unknown"; - } - - /* If the process to map out is not specified then map out the - * entire BTL. Otherwise, only map out the BTL for the specific - * remote process. */ - if (NULL == errproc) { - for( p = 0; p < num_procs; p++ ) { - mca_pml_bfo_map_out_btl(btl, procs[p], btlname); - } - } else { - mca_pml_bfo_map_out_btl(btl, errproc, btlname); - } - free(procs); -} - -/** - * This function is called since when we are mapping out a BML. This - * will walk through the four PML lists and dispatch with the - * fragments/requests. There are four different lists and each one is - * handled slighty differently. In all cases, we first see if the - * message is associated with the endpoint that is being mapped out. - * If not, then just leave it alone and put it back on the list. If - * it is associated with the endpoint, then a each list handles it - * slighlty differently. Also, in some cases, we actually adjust the - * pointers to the BMLs in the messages as they may have changed when - * the BML is mapped out. That is because this is called after we - * have mapped out the offending BML and adjusted the array of - * available BMLs. - */ -static void mca_pml_bfo_error_pending_packets(mca_btl_base_module_t* btl, - mca_bml_base_endpoint_t* ep) { - int32_t i, s; - - /* The pckt_pending list contains both ACK and FIN messages. - * ACKs can be sent over any BTL associated with the endpoint. - * Therefore, the bml_btl entry for ACKS is NULL and they do - * not need to be adjusted. It is also worth noting that - * the ACK will be the only outstanding message associated - * with a request so we can just let nature takes it course. - * - * FIN messages do have a BML associated with them, but they - * can also be sent over any BTL. Therefore, adjust the bml - * pointer in the pckt to ensure it points at a valid BML. - */ - - s = (int32_t)opal_list_get_size(&mca_pml_bfo.pckt_pending); - for(i = 0; i < s; i++) { - mca_pml_bfo_pckt_pending_t *pckt; - opal_output_verbose(0, mca_pml_bfo_output, - "INFO: pckt_pending list has %d entries", s); -#if 1 - /* TODO: Error out until code is tested */ - opal_output_verbose(0, mca_pml_bfo_output, - "%s:%d: Support not implemented, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - pckt = (mca_pml_bfo_pckt_pending_t*) - opal_list_remove_first(&mca_pml_bfo.pckt_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - /* My guess is that this can happen in the threaded - * case where the other thread removed some packets - * after we determined the size of the list. */ - if(NULL == pckt) - break; - - /* If there is no bml stored on the packet, then just - * put it back on the list as there is nothing to adjust. - * This appears to be true with ACK packets. */ - if (NULL == pckt->bml_btl) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - /* Now see if this endpoint matches the one we are mapping - * out. If so, adjust the bml entry so to ensure it is - * not pointing at a stale bml. We do not really care - * which BML it is pointing at as long as it is valid. - * In either case, then put entry back on the list. */ - if (pckt->proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] == ep) { - opal_output_verbose(15, mca_pml_bfo_output, - "INFO: Found matching pckt on pckt_pending list, adjusting bml"); - pckt->bml_btl = mca_bml_base_btl_array_get_next(&ep->btl_eager); - } - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - } - - /* This next list holds rdma fragments. We need to walk through - * the list and see if any are associated with the endpoint - * we are mapping out. If not, then just put back on the - * list. If they are, then we need to error them out. One issue - * is that we need to deal with the case where there may be more - * then one pending rdma fragment for a request. */ - s = (int32_t)opal_list_get_size(&mca_pml_bfo.rdma_pending); - for(i = 0; i < s; i++) { - mca_pml_bfo_rdma_frag_t* frag; - mca_pml_bfo_send_request_t* sendreq; - mca_pml_bfo_recv_request_t* recvreq; - opal_output_verbose(0, mca_pml_bfo_output, - "INFO: rdma_pending list has %d entries", s); -#if 1 - /* TODO: Error out until code is tested */ - opal_output_verbose(0, mca_pml_bfo_output, - "%s:%d: Support not implemented, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - frag = (mca_pml_bfo_rdma_frag_t*) - opal_list_remove_first(&mca_pml_bfo.rdma_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - /* My guess is that this can happen in the threaded - * case where the other thread removed some packets - * after we determined the size of the list. */ - if(NULL == frag) - break; - - /* Check to see if it matches our endpoint. If it does, - * then check if it matches the BTL that is being mapped - * out. If it does not, then just readjust the BML pointer. - * If it does, then we need to do something with it. */ - if (frag->rdma_ep != ep) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - /* If we are here, then we know we are working on the same - * endpoint. Now check the BTL. */ - if (frag->rdma_btl != btl) { - opal_output_verbose(15, mca_pml_bfo_output, - "INFO: Found matching frag on rdma_pending list, adjusting bml"); - /* The BTL this RDMA is associated with is not the - * one that is getting mapped out, so just adjust the - * BML pointer and put back on the list. */ - frag->rdma_bml = mca_bml_base_btl_array_find(&ep->btl_rdma, frag->rdma_btl); - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - /* Now we call the restart routine. This is just like if we got - * a completion event after calling an RDMA write. This will - * take care of figuring out if we need to restart the request - * or wait for any outstanding events to complete. */ - if(frag->rdma_state == MCA_PML_BFO_RDMA_PUT) { - opal_output_verbose(15, mca_pml_bfo_output, - "INFO: Found matching PUT frag on rdma_pending list, restarting"); - sendreq = frag->rdma_req; - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, - MCA_PML_BFO_HDR_TYPE_PUT, 2, btl); - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); - } else { - opal_output_verbose(15, mca_pml_bfo_output, - "INFO: Found matching RGET frag on rdma_pending list, sending reqerror"); - /* This is just like what we do on an rget completion event */ - recvreq = (mca_pml_bfo_recv_request_t*)frag->rdma_req; - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, 2); - - /* See if the request has received a RNDVRESTARTNOTIFY */ - if( OPAL_UNLIKELY(recvreq->req_errstate)) { - if (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED) { - mca_pml_bfo_recv_request_rndvrestartack(recvreq, - MCA_PML_BFO_HDR_TYPE_RGET, - 2, btl); - } - } - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); - } - } - - s = opal_list_get_size(&mca_pml_bfo.send_pending); - /* Look for pending events on our endpoint */ - for(i = 0; i < s; i++) { - mca_pml_bfo_send_request_t* sendreq; - ompi_proc_t* proc; - mca_bml_base_endpoint_t* bml_endpoint; - opal_output_verbose(0, mca_pml_bfo_output, - "INFO: send_pending list has %d entries", s); -#if 1 - /* TODO: Error out until code is tested */ - opal_output_verbose(0, mca_pml_bfo_output, - "%s:%d: Support not implemented, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - sendreq = (mca_pml_bfo_send_request_t*) - opal_list_remove_first(&mca_pml_bfo.send_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - /* My guess is that this can happen in the threaded - * case where the other thread removed some packets - * after we determined the size of the list. */ - if(NULL == sendreq) - break; - - proc = (ompi_proc_t*)sendreq->req_send.req_base.req_proc; - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - /* Check to see if it matches our endpoint. If it does not, - * then just put it back on the list as there is nothing - * we need to do with it. */ - if (bml_endpoint != ep) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.send_pending, - (opal_list_item_t*)sendreq); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - switch(sendreq->req_pending) { - case MCA_PML_BFO_SEND_PENDING_SCHEDULE: - /* If this send request is using the endpoint that received - * the error, then let us error it out. In the case - * where there is only one fragment left to be scheduled - * and it would have gone over the good BTL, this is - * not necessary. But, we will use simplicity here - * and assume that some of the fragments are still - * scheduled to go over the broken BTL. */ - sendreq->req_error++; - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, - MCA_PML_BFO_HDR_TYPE_FRAG, 2, btl); - break; - case MCA_PML_BFO_SEND_PENDING_START: - /* If the request has not even started, then just put it back - * on the list. Nothing else to do with it. */ - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.send_pending, - (opal_list_item_t*)sendreq); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - break; - default: - opal_output(0, "[%s:%d] wrong send request type\n", - __FILE__, __LINE__); - break; - } - } - - s = (int)opal_list_get_size(&mca_pml_bfo.recv_pending); - for(i = 0; i < s; i++) { - mca_pml_bfo_recv_request_t* recvreq; - ompi_proc_t* proc; - mca_bml_base_endpoint_t* bml_endpoint; - opal_output_verbose(0, mca_pml_bfo_output, - "INFO: recv_pending list has %d entries", s); -#if 1 - /* TODO: Error out until code is tested */ - opal_output_verbose(0, mca_pml_bfo_output, - "%s:%d: Support not implemented, aborting", - __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - recvreq = (mca_pml_bfo_recv_request_t*) - opal_list_remove_first(&mca_pml_bfo.recv_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - /* My guess is that this can happen in the threaded - * case where the other thread removed some packets - * after we determined the size of the list. */ - if(NULL == recvreq) - break; - - proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - if (bml_endpoint != ep) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.recv_pending, - (opal_list_item_t*)recvreq); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - continue; - } - - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_PUT, 2); - } -} - -/** - * Call each time we get a completion event on ACK or PUT message. - * These types of messages are receive control type messages. This - * function is only called if the underlying BTL supports failover. - * Otherwise, there is no need for this check. - */ -void mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des, - int status) -{ - mca_pml_bfo_common_hdr_t * common = des->des_local->seg_addr.pval; - mca_pml_bfo_rdma_hdr_t* hdr; /* PUT header */ - struct mca_btl_base_descriptor_t* rdma_des; - mca_pml_bfo_recv_request_t* recvreq; - - if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) { - switch (common->hdr_type) { - case MCA_PML_BFO_HDR_TYPE_ACK: - recvreq = des->des_cbdata; - - /* Record the error. Send RECVERRNOTIFY if necessary. */ - if (recvreq->req_errstate) { - opal_output_verbose(30, mca_pml_bfo_output, - "ACK: completion failed, error already seen, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "ACK: completion failed, sending RECVERRNOTIFY to sender, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_ACK, status); - } - break; - - case MCA_PML_BFO_HDR_TYPE_PUT: - hdr = (mca_pml_bfo_rdma_hdr_t*)des->des_local->seg_addr.pval; - rdma_des = hdr->hdr_des.pval; - recvreq = des->des_cbdata; - if ((NULL != rdma_des->des_cbdata) && (recvreq == rdma_des->des_cbdata)) { - /* We now record the error, send the RECVERRNOTIFY if - * necessary, and free the descriptor. Prior to this, - * we want to ensure that we have not reached the case - * where the PUT message actually made it over and we - * have already received a FIN back. We first check to - * see if the RDMA descriptor cbdata is pointing to - * NULL. If it is, this means that the PUT message must - * have made it over and a corresponding FIN already - * made it back and freed the RDMA descriptor. Second, - * if it is non-null, we make sure that it is pointing - * to the same request as the PUT descriptor is. If - * it is not, again we assume that the FIN came back - * and freed it. And we can count on the fact that the - * recvreq has not been freed or reused as it is held - * until this very completion event occurs. */ - if (recvreq->req_errstate) { - opal_output_verbose(30, mca_pml_bfo_output, - "PUT: completion failed, error already seen, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "PUT: completion failed, sending RECVERRNOTIFY to sender, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_PUT, status); - } -#if 0 - /* TODO: Add descriptor to receive request so it can - * be freed only when receive request is freed and - * only if needed. */ - btl->btl_free(btl, rdma_des); -#endif - } - break; - default: - ompi_rte_abort(-1, NULL); - } - } - - switch (common->hdr_type) { - case MCA_PML_BFO_HDR_TYPE_ACK: - recvreq = des->des_cbdata; - recvreq->req_events--; - assert(recvreq->req_events >= 0); - if(OPAL_UNLIKELY (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED)) { - opal_output_verbose(30, mca_pml_bfo_output, - "ACK: completion: recvreq in error, outstanding events=%d " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d", - recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, status, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - if (0 == recvreq->req_events) { - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_ACK, - status, btl); - } - return; - } - recv_request_pml_complete_check(recvreq); - break; - case MCA_PML_BFO_HDR_TYPE_PUT: - recvreq = des->des_cbdata; - recvreq->req_events--; - assert(recvreq->req_events >= 0); - if(OPAL_UNLIKELY(recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED)) { - opal_output_verbose(30, mca_pml_bfo_output, - "PUT: completion: recvreq in error, outstanding events=%d " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d", - recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, status, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - if (0 == recvreq->req_events) { - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_PUT, - status, btl); - } - return; - } - recv_request_pml_complete_check(recvreq); - break; - } -} - -/** - * Register four functions to handle extra PML message types that - * are utilized when a failover occurs. - */ -int mca_pml_bfo_register_callbacks(void) { - int rc; - /* The following four functions are utilized when failover - * support for openib is enabled. */ - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, - mca_pml_bfo_recv_frag_callback_rndvrestartnotify, - NULL ); - if(OMPI_SUCCESS != rc) - return rc; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK, - mca_pml_bfo_recv_frag_callback_rndvrestartack, - NULL ); - if(OMPI_SUCCESS != rc) - return rc; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK, - mca_pml_bfo_recv_frag_callback_rndvrestartnack, - NULL ); - if(OMPI_SUCCESS != rc) - return rc; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, - mca_pml_bfo_recv_frag_callback_recverrnotify, - NULL ); - if(OMPI_SUCCESS != rc) - return rc; - - return rc; -} - -/** - * Update a few fields when we are restarting either a RNDV or - * RGET type message. - */ -void mca_pml_bfo_update_rndv_fields(mca_pml_bfo_hdr_t* hdr, - mca_pml_bfo_send_request_t* sendreq, char *type) -{ - hdr->hdr_common.hdr_flags |= MCA_PML_BFO_HDR_FLAGS_RESTART; - hdr->hdr_rndv.hdr_dst_req = sendreq->req_recv; - hdr->hdr_rndv.hdr_restartseq = sendreq->req_restartseq; - opal_output_verbose(30, mca_pml_bfo_output, - "%s: restarting: PML=%d, RQS=%d, CTX=%d, SRC=%d, " - "src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, - sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank, (void *)sendreq, - sendreq->req_recv.pval, sendreq->req_send.req_base.req_peer); -} - -/** - * The following set of functions are all called when it is determined - * that the cached bml_btl->btl does not match the btl handed back - * by the callback function. This means that the bml_btl array has - * been shuffled and the bml_btl matching the btl has to be found - * back. If it cannot be found, then just find a different one to - * use. - */ -void mca_pml_bfo_update_eager_bml_btl_recv_ctl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des) -{ - if ((*bml_btl)->btl != btl) { - mca_pml_bfo_common_hdr_t * common = des->des_local->seg_addr.pval; - mca_pml_bfo_ack_hdr_t* ack; /* ACK header */ - mca_pml_bfo_recv_request_t* recvreq = NULL; - char *type = NULL; - - switch (common->hdr_type) { - case MCA_PML_BFO_HDR_TYPE_ACK: - ack = (mca_pml_bfo_ack_hdr_t*)des->des_local->seg_addr.pval; - recvreq = (mca_pml_bfo_recv_request_t*) ack->hdr_dst_req.pval; - type = "ACK"; - break; - case MCA_PML_BFO_HDR_TYPE_PUT: - recvreq = des->des_cbdata; - type = "PUT"; - break; - default: - /* In theory, this can never happen. */ - opal_output(0, "%s:%d FATAL ERROR, unknown header (hdr=%d)", - __FILE__, __LINE__, common->hdr_type); - ompi_rte_abort(-1, NULL); - } - - mca_pml_bfo_find_recvreq_eager_bml_btl(bml_btl, btl, recvreq, type); - } -} - -void mca_pml_bfo_find_sendreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_send_request_t* sendreq, - char* type) -{ - if ((*bml_btl)->btl != btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML does not match BTL, find it back, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - *bml_btl = mca_bml_base_btl_array_find(&sendreq->req_endpoint->btl_eager, btl); - if (NULL == *bml_btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML is gone, find another one, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - *bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->btl_eager); - } - } -} - -void mca_pml_bfo_find_sendreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_send_request_t* sendreq, - char* type) -{ - if ((*bml_btl)->btl != btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML does not match BTL, find it back, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - *bml_btl = mca_bml_base_btl_array_find(&sendreq->req_endpoint->btl_rdma, btl); - if (NULL == *bml_btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML is gone, find another one, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - *bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->btl_rdma); - } - } -} - -void mca_pml_bfo_find_recvreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_recv_request_t* recvreq, - char* type) -{ - if ((*bml_btl)->btl != btl) { - ompi_proc_t *proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML does not match BTL, find it back, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - *bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl); - if (NULL == *bml_btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML is gone, find another one, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); - } - } -} - -void mca_pml_bfo_find_recvreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_recv_request_t* recvreq, - char* type) -{ - if ((*bml_btl)->btl != btl) { - ompi_proc_t *proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML does not match BTL, find it back, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - *bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); - if (NULL == *bml_btl) { - opal_output_verbose(25, mca_pml_bfo_output, - "%s completion: BML is gone, find another one, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - type, recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->remote_req_send.pval, (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - *bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); - } - } -} - -/** - * The completion event for the RNDV message has returned with an - * error. We know that the send request we are looking at is valid - * because it cannot be completed until the sendreq->req_state value - * reaches 0. And for the sendreq->req_state to reach 0, the - * completion event on the RNDV message must occur. So, we do not - * bother checking whether the send request is valid, because we know - * it is, but we put a few asserts in for good measure. We then check - * a few fields in the request to decide what to do. If the - * sendreq->req_error is set, that means that something has happend - * already to the request and we do not want to restart it. - * Presumably, we may have received a RECVERRNOTIFY message from the - * receiver. We also check the sendreq->req_acked field to see if it - * has been acked. If it has, then again we do not restart everything - * because obviously the RNDV message has made it to the other side. - */ -bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t* des, - mca_pml_bfo_send_request_t* sendreq) -{ - assert(((mca_pml_bfo_hdr_t*)((des)->des_local->seg_addr.pval))->hdr_match.hdr_ctx == - (sendreq)->req_send.req_base.req_comm->c_contextid); - assert(((mca_pml_bfo_hdr_t*)((des)->des_local->seg_addr.pval))->hdr_match.hdr_src == - (sendreq)->req_send.req_base.req_comm->c_my_rank); - assert(((mca_pml_bfo_hdr_t*)((des)->des_local->seg_addr.pval))->hdr_match.hdr_seq == - (uint16_t)(sendreq)->req_send.req_base.req_sequence); - if ((!(sendreq)->req_error) && (NULL == (sendreq)->req_recv.pval)) { - (sendreq)->req_events--; - /* Assume RNDV did not make it, so restart from the beginning. */ - mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RNDV); - return true; - } - return false; -} - -/** - * Check to see if an error has occurred on this send request. If it has - * and there are no outstanding events, then we can start the restart dance. - */ -void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t* sendreq, - int status, - mca_btl_base_module_t* btl, - int type, - char *description) -{ - opal_output_verbose(30, mca_pml_bfo_output, - "%s: completion: sendreq has error, outstanding events=%d, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d", - description, - sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, (void *)sendreq, - sendreq->req_recv.pval, - status, sendreq->req_send.req_base.req_peer); - if (0 == sendreq->req_events) { - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, - type, status, btl); - } -} - -/* If we get an error on the RGET message, then first make sure that - * header matches the send request that we are pointing to. This is - * necessary, because even though the sending side got an error, the - * RGET may have made it to the receiving side and the message transfer - * may have completed. This would then mean the send request has been - * completed and perhaps in use by another communication. So there is - * no need to restart this request. Therefore, ensure that we are - * looking at the same request that the header thinks we are looking - * at. If not, then there is nothing else to be done. */ -void mca_pml_bfo_send_ctl_completion_status_error(struct mca_btl_base_descriptor_t* des) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_pml_bfo_hdr_t* hdr = des->des_local->seg_addr.pval; - switch (hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_RGET: - if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_comm->c_my_rank) || - (hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence)) { - opal_output_verbose(30, mca_pml_bfo_output, - "RGET: completion event: dropping because no valid request " - "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " - "RQS:exp=%d,act=%d, dst_req=%p", - (uint16_t)sendreq->req_send.req_base.req_sequence, - hdr->hdr_match.hdr_seq, - sendreq->req_send.req_base.req_comm->c_contextid, - hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_comm->c_my_rank, - hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_rndv.hdr_restartseq, - (void *)sendreq); - return; - } - mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RGET); - return; - default: - opal_output(0, "%s:%d FATAL ERROR, unknown header (hdr=%d)", - __FILE__, __LINE__, hdr->hdr_common.hdr_type); - ompi_rte_abort(-1, NULL); - } -} diff --git a/ompi/mca/pml/bfo/pml_bfo_failover.h b/ompi/mca/pml/bfo/pml_bfo_failover.h deleted file mode 100644 index ea4f70fdc48..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_failover.h +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * Functions that implement failover capabilities. - */ - -#ifndef MCA_PML_BFO_FAILOVER_H -#define MCA_PML_BFO_FAILOVER_H - -#include "opal/mca/btl/btl.h" -#include "pml_bfo_hdr.h" - -BEGIN_C_DECLS - -bool mca_pml_bfo_is_duplicate_msg(mca_pml_bfo_comm_proc_t* proc, - mca_pml_bfo_match_hdr_t *hdr); -bool mca_pml_bfo_is_duplicate_fin(mca_pml_bfo_hdr_t* hdr, mca_btl_base_descriptor_t* rdma, - mca_btl_base_module_t* btl); - -mca_pml_bfo_recv_request_t* mca_pml_bfo_get_request(mca_pml_bfo_match_hdr_t *hdr); - -void mca_pml_bfo_send_request_restart(mca_pml_bfo_send_request_t* sendreq, - bool repost, mca_btl_base_tag_t tag); -void mca_pml_bfo_send_request_rndvrestartnotify(mca_pml_bfo_send_request_t* sendreq, - bool repost, mca_btl_base_tag_t tag, int status, - mca_btl_base_module_t* btl); - -void -mca_pml_bfo_rndvrestartnotify_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status); -void -mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des, - int status); - -/* Reset a receive request to the beginning */ -void mca_pml_bfo_recv_request_reset(mca_pml_bfo_recv_request_t* recvreq); -/* Notify sender that receiver detected an error */ -void mca_pml_bfo_recv_request_recverrnotify(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_tag_t tag, int status); -/* Ack the RNDVRESTARTNOTIFY message */ -void mca_pml_bfo_recv_request_rndvrestartack(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_tag_t tag, int status, - mca_btl_base_module_t* btl); -/* Nack the RNDVRESTARTNOTIFY message */ -void mca_pml_bfo_recv_request_rndvrestartnack(mca_btl_base_descriptor_t* olddes, - ompi_proc_t* ompi_proc, bool repost); - -void mca_pml_bfo_recv_restart_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status); -void mca_pml_bfo_failover_error_handler(struct mca_btl_base_module_t* btl, - int32_t flags, ompi_proc_t *errproc, char *btlname); -void mca_pml_bfo_repost_match_fragment(struct mca_btl_base_descriptor_t* des); -void mca_pml_bfo_repost_fin(struct mca_btl_base_descriptor_t* des); - -void mca_pml_bfo_map_out_btl(struct mca_btl_base_module_t* btl, - ompi_proc_t *errproc, char *btlname); - -extern void mca_pml_bfo_map_out( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -int mca_pml_bfo_register_callbacks(void); - -void mca_pml_bfo_update_rndv_fields(mca_pml_bfo_hdr_t* hdr, - mca_pml_bfo_send_request_t*, char *type); - -void mca_pml_bfo_update_bml_btl(mca_bml_base_btl_t** bml_btl, mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des); - -void mca_pml_bfo_find_recvreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_recv_request_t* recvreq, - char* type); - -void mca_pml_bfo_find_sendreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_send_request_t* sendreq, - char* type); - -void mca_pml_bfo_find_sendreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_send_request_t* sendreq, - char* type); - -void mca_pml_bfo_update_eager_bml_btl_recv_ctl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - struct mca_btl_base_descriptor_t* des); -void mca_pml_bfo_find_recvreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl, - mca_btl_base_module_t* btl, - mca_pml_bfo_recv_request_t* recvreq, - char* type); - -bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t* des, - mca_pml_bfo_send_request_t* sendreq); -void mca_pml_bfo_send_ctl_completion_status_error(struct mca_btl_base_descriptor_t* des); - - -void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t* sendreq, - int status, - mca_btl_base_module_t* btl, - int type, - char *description); -/** - * Four new callbacks for the four new message types. - */ -extern void mca_pml_bfo_recv_frag_callback_rndvrestartnotify( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -extern void mca_pml_bfo_recv_frag_callback_rndvrestartack( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -extern void mca_pml_bfo_recv_frag_callback_rndvrestartnack( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -/** - * A bunch of macros to help isolate failover code from regular ob1 code. - */ - -/* Drop any ACK fragments if request is in error state. Do not want - * to initiate any more activity. */ -#define MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq) \ - if( OPAL_UNLIKELY((sendreq)->req_error)) { \ - opal_output_verbose(20, mca_pml_bfo_output, \ - "ACK: received: dropping because request in error, " \ - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \ - (uint16_t)(sendreq)->req_send.req_base.req_sequence, \ - (sendreq)->req_restartseq, \ - (void *)(sendreq), (sendreq)->req_recv.pval, \ - (sendreq)->req_send.req_base.req_peer); \ - return; \ - } - -/* Drop any FRAG fragments if request is in error state. Do not want - * to initiate any more activity. */ -#define MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq) \ - if( OPAL_UNLIKELY((recvreq)->req_errstate)) { \ - opal_output_verbose(20, mca_pml_bfo_output, \ - "FRAG: received: dropping because request in error, " \ - "PML=%d, src_req=%p, dst_req=%p, peer=%d, offset=%d", \ - (uint16_t)(recvreq)->req_msgseq, \ - (recvreq)->remote_req_send.pval, \ - (void *)(recvreq), \ - (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, \ - (int)hdr->hdr_frag.hdr_frag_offset); \ - return; \ - } - -/* Drop any PUT fragments if request is in error state. Do not want - * to initiate any more activity. */ -#define MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq) \ - if( OPAL_UNLIKELY((sendreq)->req_error)) { \ - opal_output_verbose(20, mca_pml_bfo_output, \ - "PUT: received: dropping because request in error, " \ - "PML=%d, src_req=%p, dst_req=%p, peer=%d", \ - (uint16_t)(sendreq)->req_send.req_base.req_sequence, \ - (void *)(sendreq), (sendreq)->req_recv.pval, \ - (sendreq)->req_send.req_base.req_peer); \ - return; \ - } - -/** - * Macros for pml_bfo_recvreq.c file. - */ - -/* This can happen if a FIN message arrives after the request was - * marked in error. So, just drop the message. Note that the status - * field is not being checked. That is because the status field is the - * value returned in the FIN hdr.hdr_fail field and may be used for - * other things. Note that we allow the various fields to be updated - * in case this actually completes the request and the sending side - * thinks it is done. */ -#define MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq) \ - if( OPAL_UNLIKELY((recvreq)->req_errstate)) { \ - opal_output_verbose(20, mca_pml_bfo_output, \ - "FIN: received on broken request, skipping, " \ - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \ - (recvreq)->req_msgseq, (recvreq)->req_restartseq, \ - (recvreq)->remote_req_send.pval, (void *)(recvreq), \ - (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ - /* Even though in error, it still might complete. */ \ - recv_request_pml_complete_check(recvreq); \ - return; \ - } - -#define MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq) \ - if ((recvreq)->req_errstate) { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "RDMA read: completion failed, error already seen, " \ - "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", \ - (recvreq)->req_msgseq, (recvreq)->req_restartseq, \ - (unsigned long)(recvreq)->remote_req_send.pval, \ - (unsigned long)(recvreq), \ - (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ - return; \ - } else { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "RDMA read: completion failed, sending RECVERRNOTIFY to " \ - "sender, PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", \ - (recvreq)->req_msgseq, (recvreq)->req_restartseq, \ - (unsigned long)(recvreq)->remote_req_send.pval, \ - (unsigned long)(recvreq), \ - (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, status); \ - } - -#define MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl) \ - /* See if the request has received a RNDVRESTARTNOTIFY */ \ - if( OPAL_UNLIKELY(recvreq->req_errstate)) { \ - if (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED) { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "RDMA read: completion: recvreq has error, outstanding events=%d " \ - "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, status=%d, peer=%d", \ - recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, \ - (unsigned long)recvreq->remote_req_send.pval, \ - (unsigned long)recvreq, status, \ - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ - if (0 == recvreq->req_events) { \ - mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, \ - status, btl); \ - } \ - } \ - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \ - return; \ - } - -/** - * Macros for pml_bfo_sendreq.c file. - */ - -/* This macro is called on the sending side after receiving - * a PUT message. There is a chance that this PUT message - * has shown up and is attempting to modify the state of - * the req_state, but the req_state is no longer being tracked - * because the RNDV message has turned into a RGET message - * because it got an error on the RNDV completion. - */ -#define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \ - if (sendreq->req_state == -1) { \ - OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, 1); \ - } - -/* Now check the error state. This request can be in error if the - * RNDV message made it over, but the receiver got an error trying to - * send the ACK back and therefore sent a RECVERRNOTIFY message. In - * that case, we want to start the restart dance as the receiver has - * matched this message already. Only restart if there are no - * outstanding events on send request. */ -#define MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \ - if( OPAL_UNLIKELY ((sendreq)->req_error)) { \ - mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \ - btl, type, description); \ - return; \ - } - -/** - * This macro is called within the frag completion function in two - * places. It is called to see if any errors occur prior to the - * completion event on the frag. It is then called a second time - * after the scheduling routine is called as the scheduling routine - * may have detected that a BTL that was cached on the request had - * been removed and therefore marked the request in error. In that - * case, the scheduling of fragments can no longer proceed properly, - * and if there are no outstanding events, iniated the restart dance. - */ -#define MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \ - if( OPAL_UNLIKELY((sendreq)->req_error)) { \ - mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \ - btl, type, description); \ - return; \ - } - -/* This can happen if a FIN message arrives after the request was - * marked in error. So, just drop the message. Note that the status - * field is not checked here. That is because that is the value - * returned in the FIN hdr.hdr_fail field and may be used for other - * things. */ -#define MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des) \ - if( OPAL_UNLIKELY(sendreq->req_error)) { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "FIN: received on broken request, skipping, " \ - "PML=%d, src_req=%lx, dst_req=%lx, peer=%d", \ - (uint16_t)sendreq->req_send.req_base.req_sequence, \ - (unsigned long)sendreq, (unsigned long)sendreq->req_recv.pval, \ - sendreq->req_send.req_base.req_peer); \ - btl->btl_free(btl, des); \ - return; \ - } - - -/* Check if there has been an error on the send request when we get - * a completion event on the RDMA write. */ -#define MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl) \ - if ( OPAL_UNLIKELY(sendreq->req_error)) { \ - mca_pml_bfo_completion_sendreq_has_error(sendreq, status, btl, \ - MCA_PML_BFO_HDR_TYPE_PUT, "RDMA write"); \ - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \ - return; \ - } - -#define MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, type) \ - if (0 < sendreq->req_restartseq) { \ - mca_pml_bfo_update_rndv_fields(hdr, sendreq, type); \ - } - -/* If a bml_btl gets mapped out, then we need to adjust it based - * on the btl from the callback function. These macros are called on - * every callback to make sure things are copacetic. - */ -#define MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des) \ - if (bml_btl->btl != btl) { \ - ompi_proc_t *proc = (ompi_proc_t*) des->des_cbdata; \ - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; \ - bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl); \ - } -#define MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, type) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_find_sendreq_eager_bml_btl(&bml_btl, btl, sendreq, type); \ - } -#define MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, type) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_find_sendreq_rdma_bml_btl(&bml_btl, btl, sendreq, type); \ - } - -#define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, type) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_find_recvreq_eager_bml_btl(&bml_btl, btl, recvreq, type); \ - } - -#define MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, type) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_find_recvreq_rdma_bml_btl(&bml_btl, btl, recvreq, type); \ - } - -#define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des) \ - if (bml_btl->btl != btl) { \ - mca_pml_bfo_update_eager_bml_btl_recv_ctl(&bml_btl, btl, des); \ - } - -#define MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl) \ - if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { \ - opal_output_verbose(30, mca_pml_bfo_output, \ - "PUT received: no matching BTL to RDMA write to, oustanding " \ - "events=%d, PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \ - sendreq->req_events, \ - (uint16_t)sendreq->req_send.req_base.req_sequence, \ - sendreq->req_restartseq, (void *)sendreq, \ - sendreq->req_recv.pval, sendreq->req_send.req_base.req_peer); \ - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \ - sendreq->req_error++; \ - if (0 == sendreq->req_events) { \ - mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, \ - MCA_PML_BFO_HDR_TYPE_PUT, \ - OMPI_ERROR, btl); \ - } \ - return; \ - } - -/* This macro checks to see if the cached number of BTLs in the - * send request still matches the value from the endpoint. - * If it does not, this means that a BTL was removed from the - * available list. In this case, start the request over. - */ -#define MCA_PML_BFO_CHECK_FOR_REMOVED_BTL(sendreq, range) \ - if ((int)mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->btl_send) \ - != range->range_btl_cnt) { \ - sendreq->req_error++; \ - return OMPI_ERROR; \ - } - - -END_C_DECLS - -#endif diff --git a/ompi/mca/pml/bfo/pml_bfo_hdr.h b/ompi/mca/pml/bfo/pml_bfo_hdr.h deleted file mode 100644 index 90a6f2326d7..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_hdr.h +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_BFO_HEADER_H -#define MCA_PML_BFO_HEADER_H - -#include "ompi_config.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif - -#include "opal/types.h" -#include "opal/util/arch.h" -#include "opal/mca/btl/btl.h" -#include "ompi/proc/proc.h" - -#define MCA_PML_BFO_HDR_TYPE_MATCH (MCA_BTL_TAG_PML + 1) -#define MCA_PML_BFO_HDR_TYPE_RNDV (MCA_BTL_TAG_PML + 2) -#define MCA_PML_BFO_HDR_TYPE_RGET (MCA_BTL_TAG_PML + 3) -#define MCA_PML_BFO_HDR_TYPE_ACK (MCA_BTL_TAG_PML + 4) -#define MCA_PML_BFO_HDR_TYPE_NACK (MCA_BTL_TAG_PML + 5) -#define MCA_PML_BFO_HDR_TYPE_FRAG (MCA_BTL_TAG_PML + 6) -#define MCA_PML_BFO_HDR_TYPE_GET (MCA_BTL_TAG_PML + 7) -#define MCA_PML_BFO_HDR_TYPE_PUT (MCA_BTL_TAG_PML + 8) -#define MCA_PML_BFO_HDR_TYPE_FIN (MCA_BTL_TAG_PML + 9) -#if PML_BFO -#define MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY (MCA_BTL_TAG_PML + 10) -#define MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK (MCA_BTL_TAG_PML + 11) -#define MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK (MCA_BTL_TAG_PML + 12) -#define MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY (MCA_BTL_TAG_PML + 13) -#endif /* PML_BFO */ - -#define MCA_PML_BFO_HDR_FLAGS_ACK 1 /* is an ack required */ -#define MCA_PML_BFO_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */ -#define MCA_PML_BFO_HDR_FLAGS_PIN 4 /* is user buffer pinned */ -#define MCA_PML_BFO_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */ -#define MCA_PML_BFO_HDR_FLAGS_NORDMA 16 /* rest will be send by copy-in-out */ -#if PML_BFO -#define MCA_PML_BFO_HDR_FLAGS_RESTART 32 /* restart RNDV because of error */ -#endif /* PML_BFO */ - -/** - * Common hdr attributes - must be first element in each hdr type - */ -struct mca_pml_bfo_common_hdr_t { - uint8_t hdr_type; /**< type of envelope */ - uint8_t hdr_flags; /**< flags indicating how fragment should be processed */ -}; -typedef struct mca_pml_bfo_common_hdr_t mca_pml_bfo_common_hdr_t; - -#define MCA_PML_BFO_COMMON_HDR_NTOH(h) -#define MCA_PML_BFO_COMMON_HDR_HTON(h) - -/** - * Header definition for the first fragment, contains the - * attributes required to match the corresponding posted receive. - */ -struct mca_pml_bfo_match_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ - uint16_t hdr_ctx; /**< communicator index */ - int32_t hdr_src; /**< source rank */ - int32_t hdr_tag; /**< user tag */ - uint16_t hdr_seq; /**< message sequence number */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[2]; /**< explicitly pad to 16 bytes. Compilers seem to already prefer to do this, but make it explicit just in case */ -#endif -}; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT -#define OMPI_PML_BFO_MATCH_HDR_LEN 16 -#else -#define OMPI_PML_BFO_MATCH_HDR_LEN 14 -#endif - -typedef struct mca_pml_bfo_match_hdr_t mca_pml_bfo_match_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_MATCH_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ -} while(0) -#else -#define MCA_PML_BFO_MATCH_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_MATCH_HDR_NTOH(h) \ -do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_ctx = ntohs((h).hdr_ctx); \ - (h).hdr_src = ntohl((h).hdr_src); \ - (h).hdr_tag = ntohl((h).hdr_tag); \ - (h).hdr_seq = ntohs((h).hdr_seq); \ -} while (0) - -#define MCA_PML_BFO_MATCH_HDR_HTON(h) \ -do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_MATCH_HDR_FILL(h); \ - (h).hdr_ctx = htons((h).hdr_ctx); \ - (h).hdr_src = htonl((h).hdr_src); \ - (h).hdr_tag = htonl((h).hdr_tag); \ - (h).hdr_seq = htons((h).hdr_seq); \ -} while (0) - -/** - * Header definition for the first fragment when an acknowledgment - * is required. This could be the first fragment of a large message - * or a short message that requires an ack (synchronous). - */ -struct mca_pml_bfo_rendezvous_hdr_t { - mca_pml_bfo_match_hdr_t hdr_match; - uint64_t hdr_msg_length; /**< message length */ - opal_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */ -#if PML_BFO - opal_ptr_t hdr_dst_req; /**< pointer to dst req */ - uint8_t hdr_restartseq; /**< restart sequence */ -#endif /* PML_BFO */ -}; -typedef struct mca_pml_bfo_rendezvous_hdr_t mca_pml_bfo_rendezvous_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_RNDV_HDR_FILL(h) \ - MCA_PML_BFO_MATCH_HDR_FILL((h).hdr_match) -#else -#define MCA_PML_BFO_RNDV_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -/* Note that hdr_src_req is not put in network byte order because it - is never processed by the receiver, other than being copied into - the ack header */ -#define MCA_PML_BFO_RNDV_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \ - (h).hdr_msg_length = ntoh64((h).hdr_msg_length); \ - } while (0) - -#define MCA_PML_BFO_RNDV_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \ - MCA_PML_BFO_RNDV_HDR_FILL(h); \ - (h).hdr_msg_length = hton64((h).hdr_msg_length); \ - } while (0) - -/** - * Header definition for a combined rdma rendezvous/get - */ -struct mca_pml_bfo_rget_hdr_t { - mca_pml_bfo_rendezvous_hdr_t hdr_rndv; - uint32_t hdr_seg_cnt; /**< number of segments for rdma */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[4]; -#endif - opal_ptr_t hdr_des; /**< source descriptor */ -}; -typedef struct mca_pml_bfo_rget_hdr_t mca_pml_bfo_rget_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_RGET_HDR_FILL(h) \ -do { \ - MCA_PML_BFO_RNDV_HDR_FILL((h).hdr_rndv); \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - (h).hdr_padding[2] = 0; \ - (h).hdr_padding[3] = 0; \ -} while(0) -#else -#define MCA_PML_BFO_RGET_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_RGET_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_RNDV_HDR_NTOH((h).hdr_rndv); \ - (h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \ - } while (0) - -#define MCA_PML_BFO_RGET_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_RNDV_HDR_HTON((h).hdr_rndv); \ - MCA_PML_BFO_RGET_HDR_FILL(h); \ - (h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \ - } while (0) - -/** - * Header for subsequent fragments. - */ -struct mca_pml_bfo_frag_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[6]; -#endif - uint64_t hdr_frag_offset; /**< offset into message */ - opal_ptr_t hdr_src_req; /**< pointer to source request */ - opal_ptr_t hdr_dst_req; /**< pointer to matched receive */ -}; -typedef struct mca_pml_bfo_frag_hdr_t mca_pml_bfo_frag_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_FRAG_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - (h).hdr_padding[2] = 0; \ - (h).hdr_padding[3] = 0; \ - (h).hdr_padding[4] = 0; \ - (h).hdr_padding[5] = 0; \ -} while(0) -#else -#define MCA_PML_BFO_FRAG_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_FRAG_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \ - } while (0) - -#define MCA_PML_BFO_FRAG_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_FRAG_HDR_FILL(h); \ - (h).hdr_frag_offset = hton64((h).hdr_frag_offset); \ - } while (0) - -/** - * Header used to acknowledgment outstanding fragment(s). - */ - -struct mca_pml_bfo_ack_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[6]; -#endif - opal_ptr_t hdr_src_req; /**< source request */ - opal_ptr_t hdr_dst_req; /**< matched receive request */ - uint64_t hdr_send_offset; /**< starting point of copy in/out */ -}; -typedef struct mca_pml_bfo_ack_hdr_t mca_pml_bfo_ack_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_ACK_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - (h).hdr_padding[2] = 0; \ - (h).hdr_padding[3] = 0; \ - (h).hdr_padding[4] = 0; \ - (h).hdr_padding[5] = 0; \ -} while (0) -#else -#define MCA_PML_BFO_ACK_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -/* Note that the request headers are not put in NBO because the - src_req is already in receiver's byte order and the dst_req is not - used by the receiver for anything other than backpointers in return - headers */ -#define MCA_PML_BFO_ACK_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_send_offset = ntoh64((h).hdr_send_offset); \ - } while (0) - -#define MCA_PML_BFO_ACK_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_ACK_HDR_FILL(h); \ - (h).hdr_send_offset = hton64((h).hdr_send_offset); \ - } while (0) - -/** - * Header used to initiate an RDMA operation. - */ - -struct mca_pml_bfo_rdma_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[2]; /** two to pad out the hdr to a 4 byte alignment. hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */ -#endif - uint32_t hdr_seg_cnt; /**< number of segments for rdma */ - opal_ptr_t hdr_req; /**< destination request */ -#if PML_BFO - opal_ptr_t hdr_dst_req; /**< pointer to destination request */ -#endif /* PML_BFO */ - opal_ptr_t hdr_des; /**< source descriptor */ - uint64_t hdr_rdma_offset; /**< current offset into user buffer */ - mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */ -}; -typedef struct mca_pml_bfo_rdma_hdr_t mca_pml_bfo_rdma_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_RDMA_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ -} while(0) -#else -#define MCA_PML_BFO_RDMA_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_RDMA_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \ - (h).hdr_rdma_offset = ntoh64((h).hdr_rdma_offset); \ - } while (0) - -#define MCA_PML_BFO_RDMA_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_RDMA_HDR_FILL(h); \ - (h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \ - (h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \ - } while (0) - -/** - * Header used to complete an RDMA operation. - */ - -struct mca_pml_bfo_fin_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[2]; -#endif -#if PML_BFO - /* Match info is needed to check for duplicate FIN messages. */ - mca_pml_bfo_match_hdr_t hdr_match; -#endif /* PML_BFO */ - uint32_t hdr_fail; /**< RDMA operation failed */ - opal_ptr_t hdr_des; /**< completed descriptor */ -}; -typedef struct mca_pml_bfo_fin_hdr_t mca_pml_bfo_fin_hdr_t; - -#if PML_BFO -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_FIN_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - MCA_PML_BFO_MATCH_HDR_FILL((h).hdr_match); \ -} while (0) -#else -#define MCA_PML_BFO_FIN_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_FIN_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \ - } while (0) - -#define MCA_PML_BFO_FIN_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \ - MCA_PML_BFO_FIN_HDR_FILL(h); \ - } while (0) -#else /* PML_BFO */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_FIN_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ -} while (0) -#else -#define MCA_PML_BFO_FIN_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_BFO_FIN_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ - } while (0) - -#define MCA_PML_BFO_FIN_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_FIN_HDR_FILL(h); \ - } while (0) -#endif /* PML_BFO */ - -#if PML_BFO -/** - * Header used to restart a rendezvous request. - */ -struct mca_pml_bfo_restart_hdr_t { - mca_pml_bfo_match_hdr_t hdr_match; /**< needed to avoid duplicate messages */ - uint8_t hdr_restartseq; /**< restart sequence */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[3]; -#endif - opal_ptr_t hdr_src_req; /**< source request */ - opal_ptr_t hdr_dst_req; /**< matched receive request */ - int32_t hdr_dst_rank; /**< needed to send NACK */ - uint32_t hdr_jobid; /**< needed to send NACK */ - uint32_t hdr_vpid; /**< needed to send NACK */ -}; -typedef struct mca_pml_bfo_restart_hdr_t mca_pml_bfo_restart_hdr_t; - -/* Only need to put parts of the restart header in NBO. No need - to do hdr_src_req and hdr_dst_req as they are only used on the - by the process that originated them. */ -#define MCA_PML_BFO_RESTART_HDR_NTOH(h) \ - do { \ - MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \ - (h).hdr_dst_rank = ntohl((h).hdr_dst_rank); \ - (h).hdr_jobid = ntohl((h).hdr_jobid); \ - (h).hdr_vpid = ntohl((h).hdr_vpid); \ - } while (0) - -#define MCA_PML_BFO_RESTART_HDR_HTON(h) \ - do { \ - MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \ - (h).hdr_dst_rank = htonl((h).hdr_dst_rank); \ - (h).hdr_jobid = htonl((h).hdr_jobid); \ - (h).hdr_vpid = htonl((h).hdr_vpid); \ - } while (0) - -#endif /* PML_BFO */ -/** - * Union of defined hdr types. - */ -union mca_pml_bfo_hdr_t { - mca_pml_bfo_common_hdr_t hdr_common; - mca_pml_bfo_match_hdr_t hdr_match; - mca_pml_bfo_rendezvous_hdr_t hdr_rndv; - mca_pml_bfo_rget_hdr_t hdr_rget; - mca_pml_bfo_frag_hdr_t hdr_frag; - mca_pml_bfo_ack_hdr_t hdr_ack; - mca_pml_bfo_rdma_hdr_t hdr_rdma; - mca_pml_bfo_fin_hdr_t hdr_fin; -#if PML_BFO - mca_pml_bfo_restart_hdr_t hdr_restart; -#endif /* PML_BFO */ -}; -typedef union mca_pml_bfo_hdr_t mca_pml_bfo_hdr_t; - -#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT -static inline __opal_attribute_always_inline__ void -bfo_hdr_ntoh(mca_pml_bfo_hdr_t *hdr, const uint8_t hdr_type) -{ - if(!(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_NBO)) - return; - - switch(hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - MCA_PML_BFO_MATCH_HDR_NTOH(hdr->hdr_match); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - MCA_PML_BFO_RNDV_HDR_NTOH(hdr->hdr_rndv); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - MCA_PML_BFO_RGET_HDR_NTOH(hdr->hdr_rget); - break; - case MCA_PML_BFO_HDR_TYPE_ACK: - MCA_PML_BFO_ACK_HDR_NTOH(hdr->hdr_ack); - break; - case MCA_PML_BFO_HDR_TYPE_FRAG: - MCA_PML_BFO_FRAG_HDR_NTOH(hdr->hdr_frag); - break; - case MCA_PML_BFO_HDR_TYPE_PUT: - MCA_PML_BFO_RDMA_HDR_NTOH(hdr->hdr_rdma); - break; - case MCA_PML_BFO_HDR_TYPE_FIN: - MCA_PML_BFO_FIN_HDR_NTOH(hdr->hdr_fin); - break; - default: - assert(0); - break; - } -} -#else -#define bfo_hdr_ntoh(h, t) do{}while(0) -#endif - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT -#define bfo_hdr_hton(h, t, p) \ - bfo_hdr_hton_intr((mca_pml_bfo_hdr_t*)h, t, p) -static inline __opal_attribute_always_inline__ void -bfo_hdr_hton_intr(mca_pml_bfo_hdr_t *hdr, const uint8_t hdr_type, - const ompi_proc_t *proc) -{ -#ifdef WORDS_BIGENDIAN - hdr->hdr_common.hdr_flags |= MCA_PML_BFO_HDR_FLAGS_NBO; -#else - - if(!(proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN)) - return; - - hdr->hdr_common.hdr_flags |= MCA_PML_BFO_HDR_FLAGS_NBO; - switch(hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - MCA_PML_BFO_MATCH_HDR_HTON(hdr->hdr_match); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - MCA_PML_BFO_RNDV_HDR_HTON(hdr->hdr_rndv); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - MCA_PML_BFO_RGET_HDR_HTON(hdr->hdr_rget); - break; - case MCA_PML_BFO_HDR_TYPE_ACK: - MCA_PML_BFO_ACK_HDR_HTON(hdr->hdr_ack); - break; - case MCA_PML_BFO_HDR_TYPE_FRAG: - MCA_PML_BFO_FRAG_HDR_HTON(hdr->hdr_frag); - break; - case MCA_PML_BFO_HDR_TYPE_PUT: - MCA_PML_BFO_RDMA_HDR_HTON(hdr->hdr_rdma); - break; - case MCA_PML_BFO_HDR_TYPE_FIN: - MCA_PML_BFO_FIN_HDR_HTON(hdr->hdr_fin); - break; - default: - assert(0); - break; - } -#endif -} -#else -#define bfo_hdr_hton(h, t, p) do{}while(0) -#endif -#endif diff --git a/ompi/mca/pml/bfo/pml_bfo_iprobe.c b/ompi/mca/pml/bfo/pml_bfo_iprobe.c deleted file mode 100644 index c3d432367af..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_iprobe.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "ompi/message/message.h" -#include "pml_bfo_recvreq.h" - - -int mca_pml_bfo_iprobe(int src, - int tag, - struct ompi_communicator_t *comm, - int *matched, ompi_status_public_t * status) -{ - int rc = OMPI_SUCCESS; - mca_pml_bfo_recv_request_t recvreq; - - OBJ_CONSTRUCT( &recvreq, mca_pml_bfo_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE; - - MCA_PML_BFO_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, false); - MCA_PML_BFO_RECV_REQUEST_START(&recvreq); - - if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) { - if( NULL != status ) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR; - *matched = 1; - } else { - *matched = 0; - opal_progress(); - } - MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); - return rc; -} - - -int mca_pml_bfo_probe(int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - int rc = OMPI_SUCCESS; - mca_pml_bfo_recv_request_t recvreq; - - OBJ_CONSTRUCT( &recvreq, mca_pml_bfo_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE; - - MCA_PML_BFO_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, false); - MCA_PML_BFO_RECV_REQUEST_START(&recvreq); - - ompi_request_wait_completion(&recvreq.req_recv.req_base.req_ompi); - rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR; - if (NULL != status) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - - MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); - return rc; -} - - -int -mca_pml_bfo_improbe(int src, - int tag, - struct ompi_communicator_t *comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t * status) -{ - int rc = OMPI_SUCCESS; - mca_pml_bfo_recv_request_t *recvreq; - - *message = ompi_message_alloc(); - if (NULL == *message) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_OUT_OF_RESOURCE; - recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_IMPROBE; - - /* initialize the request enough to probe and get the status */ - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt, - src, tag, comm, false); - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - - if( recvreq->req_recv.req_base.req_ompi.req_complete == true ) { - if( NULL != status ) { - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - *matched = 1; - - (*message)->comm = comm; - (*message)->req_ptr = recvreq; - (*message)->peer = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; - (*message)->count = recvreq->req_recv.req_base.req_ompi.req_status._ucount; - - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - } else { - *matched = 0; - - /* we only free if we didn't match, because we're going to - translate the request into a receive request later on if it - was matched */ - MCA_PML_BFO_RECV_REQUEST_RETURN( recvreq ); - ompi_message_return(*message); - *message = MPI_MESSAGE_NULL; - - opal_progress(); - } - - return rc; -} - - -int -mca_pml_bfo_mprobe(int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_message_t **message, - ompi_status_public_t * status) -{ - int rc = OMPI_SUCCESS; - mca_pml_bfo_recv_request_t *recvreq; - - *message = ompi_message_alloc(); - if (NULL == *message) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_MPROBE; - - /* initialize the request enough to probe and get the status */ - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt, - src, tag, comm, false); - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - - ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi); - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - - if( NULL != status ) { - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - - (*message)->comm = comm; - (*message)->req_ptr = recvreq; - (*message)->peer = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; - (*message)->count = recvreq->req_recv.req_base.req_ompi.req_status._ucount; - - return rc; -} diff --git a/ompi/mca/pml/bfo/pml_bfo_irecv.c b/ompi/mca/pml/bfo/pml_bfo_irecv.c deleted file mode 100644 index b4b2cb352b2..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_irecv.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_recvfrag.h" -#include "ompi/peruse/peruse-internal.h" -#include "ompi/message/message.h" - -int mca_pml_bfo_irecv_init(void *addr, - size_t count, - ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - mca_pml_bfo_recv_request_t *recvreq; - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, true); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - -int mca_pml_bfo_irecv(void *addr, - size_t count, - ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - mca_pml_bfo_recv_request_t *recvreq; - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - - -int mca_pml_bfo_recv(void *addr, - size_t count, - ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - int rc; - mca_pml_bfo_recv_request_t *recvreq; - MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq); - if (NULL == recvreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi); - - if (NULL != status) { /* return status */ - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - ompi_request_free( (ompi_request_t**)&recvreq ); - return rc; -} - - -int -mca_pml_bfo_imrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request ) -{ - mca_pml_bfo_recv_frag_t* frag; - mca_pml_bfo_recv_request_t *recvreq; - mca_pml_bfo_hdr_t *hdr; - int src, tag; - ompi_communicator_t *comm; - mca_pml_bfo_comm_proc_t* proc; - mca_pml_bfo_comm_t* bfo_comm; - uint64_t seq; - - /* get the request from the message and the frag from the request - before we overwrite everything */ - recvreq = (mca_pml_bfo_recv_request_t*) (*message)->req_ptr; - frag = (mca_pml_bfo_recv_frag_t*) recvreq->req_recv.req_base.req_addr; - src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; - tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG; - comm = (*message)->comm; - bfo_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm; - seq = recvreq->req_recv.req_base.req_sequence; - - /* make the request a recv request again */ - /* The old request kept pointers to comm and the char datatype. - We're about to release those, but need to make sure comm - doesn't go out of scope (we don't care about the char datatype - anymore). So retain comm, then release the frag, then reinit - the frag (which will retain comm), then release comm (but the - frag still has it's ref, so it'll stay in scope). Make - sense? */ - OBJ_RETAIN(comm); - MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv); - recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - buf, - count, datatype, - src, tag, comm, false); - OBJ_RELEASE(comm); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - /* init/re-init the request */ - recvreq->req_lock = 0; - recvreq->req_pipeline_depth = 0; - recvreq->req_bytes_received = 0; - /* What about req_rdma_cnt ? */ - recvreq->req_rdma_idx = 0; - recvreq->req_pending = false; - recvreq->req_ack_sent = false; - - MCA_PML_BASE_RECV_START(&recvreq->req_recv.req_base); - - /* Note - sequence number already assigned */ - recvreq->req_recv.req_base.req_sequence = seq; - - proc = &bfo_comm->procs[recvreq->req_recv.req_base.req_peer]; - recvreq->req_recv.req_base.req_proc = proc->ompi_proc; - prepare_recv_req_converter(recvreq); - - /* we can't go through the match, since we already have the match. - Cheat and do what REQUEST_START does, but without the frag - search */ - hdr = (mca_pml_bfo_hdr_t*)frag->segments->seg_addr.pval; - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - mca_pml_bfo_recv_request_progress_match(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - mca_pml_bfo_recv_request_progress_rndv(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - mca_pml_bfo_recv_request_progress_rget(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - default: - assert(0); - } - MCA_PML_BFO_RECV_FRAG_RETURN(frag); - - ompi_message_return(*message); - *message = MPI_MESSAGE_NULL; - *request = (ompi_request_t *) recvreq; - - return OMPI_SUCCESS; -} - - -int -mca_pml_bfo_mrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status ) -{ - mca_pml_bfo_recv_frag_t* frag; - mca_pml_bfo_recv_request_t *recvreq; - mca_pml_bfo_hdr_t *hdr; - int src, tag, rc; - ompi_communicator_t *comm; - mca_pml_bfo_comm_proc_t* proc; - mca_pml_bfo_comm_t* bfo_comm; - uint64_t seq; - - /* get the request from the message and the frag from the request - before we overwrite everything */ - comm = (*message)->comm; - recvreq = (mca_pml_bfo_recv_request_t*) (*message)->req_ptr; - frag = (mca_pml_bfo_recv_frag_t*) recvreq->req_recv.req_base.req_addr; - src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; - tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG; - seq = recvreq->req_recv.req_base.req_sequence; - bfo_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm; - - /* make the request a recv request again */ - /* The old request kept pointers to comm and the char datatype. - We're about to release those, but need to make sure comm - doesn't go out of scope (we don't care about the char datatype - anymore). So retain comm, then release the frag, then reinit - the frag (which will retain comm), then release comm (but the - frag still has it's ref, so it'll stay in scope). Make - sense? */ - OBJ_RETAIN(comm); - MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv); - recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; - MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, - buf, - count, datatype, - src, tag, comm, false); - OBJ_RELEASE(comm); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - /* init/re-init the request */ - recvreq->req_lock = 0; - recvreq->req_pipeline_depth = 0; - recvreq->req_bytes_received = 0; - recvreq->req_rdma_cnt = 0; - recvreq->req_rdma_idx = 0; - recvreq->req_pending = false; - - MCA_PML_BASE_RECV_START(&recvreq->req_recv.req_base); - - /* Note - sequence number already assigned */ - recvreq->req_recv.req_base.req_sequence = seq; - - proc = &bfo_comm->procs[recvreq->req_recv.req_base.req_peer]; - recvreq->req_recv.req_base.req_proc = proc->ompi_proc; - prepare_recv_req_converter(recvreq); - - /* we can't go through the match, since we already have the match. - Cheat and do what REQUEST_START does, but without the frag - search */ - hdr = (mca_pml_bfo_hdr_t*)frag->segments->seg_addr.pval; - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - mca_pml_bfo_recv_request_progress_match(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - mca_pml_bfo_recv_request_progress_rndv(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - mca_pml_bfo_recv_request_progress_rget(recvreq, frag->btl, frag->segments, - frag->num_segments); - break; - default: - assert(0); - } - - ompi_message_return(*message); - *message = MPI_MESSAGE_NULL; - ompi_request_wait_completion(&(recvreq->req_recv.req_base.req_ompi)); - - MCA_PML_BFO_RECV_FRAG_RETURN(frag); - - if (NULL != status) { /* return status */ - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - ompi_request_free( (ompi_request_t**)&recvreq ); - return rc; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_isend.c b/ompi/mca/pml/bfo/pml_bfo_isend.c deleted file mode 100644 index 599d1afc0a4..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_isend.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_bfo.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_recvreq.h" -#include "ompi/peruse/peruse-internal.h" - -int mca_pml_bfo_isend_init(void *buf, - size_t count, - ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - mca_pml_bfo_send_request_t *sendreq = NULL; - MCA_PML_BFO_SEND_REQUEST_ALLOC(comm, dst, sendreq); - if (NULL == sendreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, true); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &(sendreq)->req_send.req_base, - PERUSE_SEND); - - *request = (ompi_request_t *) sendreq; - return OMPI_SUCCESS; -} - - -int mca_pml_bfo_isend(void *buf, - size_t count, - ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - int rc; - mca_pml_bfo_send_request_t *sendreq = NULL; - - MCA_PML_BFO_SEND_REQUEST_ALLOC(comm, dst, sendreq); - if (NULL == sendreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &(sendreq)->req_send.req_base, - PERUSE_SEND); - - MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc); - *request = (ompi_request_t *) sendreq; - return rc; -} - - -int mca_pml_bfo_send(void *buf, - size_t count, - ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm) -{ - int rc; - mca_pml_bfo_send_request_t *sendreq; - - MCA_PML_BFO_SEND_REQUEST_ALLOC(comm, dst, sendreq); - if (NULL == sendreq) - return OMPI_ERR_OUT_OF_RESOURCE; - - MCA_PML_BFO_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &(sendreq)->req_send.req_base, - PERUSE_SEND); - - MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc); - if (rc != OMPI_SUCCESS) { - MCA_PML_BFO_SEND_REQUEST_RETURN( sendreq ); - return rc; - } - - ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi); - - rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR; - ompi_request_free( (ompi_request_t**)&sendreq ); - return rc; -} diff --git a/ompi/mca/pml/bfo/pml_bfo_progress.c b/ompi/mca/pml/bfo/pml_bfo_progress.c deleted file mode 100644 index 1ab217357b6..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_progress.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_bfo.h" -#include "pml_bfo_sendreq.h" -#include "ompi/mca/bml/base/base.h" - -int mca_pml_bfo_progress(void) -{ - int i, queue_length = opal_list_get_size(&mca_pml_bfo.send_pending); - int j, completed_requests = 0; - bool send_succedded; - - if( OPAL_LIKELY(0 == queue_length) ) - return 0; - - for( i = 0; i < queue_length; i++ ) { - mca_pml_bfo_send_pending_t pending_type = MCA_PML_BFO_SEND_PENDING_NONE; - mca_pml_bfo_send_request_t* sendreq; - mca_bml_base_endpoint_t* endpoint; - - sendreq = get_request_from_send_pending(&pending_type); - if(OPAL_UNLIKELY(NULL == sendreq)) - break; - - switch(pending_type) { - case MCA_PML_BFO_SEND_PENDING_NONE: - assert(0); - return 0; - case MCA_PML_BFO_SEND_PENDING_SCHEDULE: - if( mca_pml_bfo_send_request_schedule_exclusive(sendreq) == - OMPI_ERR_OUT_OF_RESOURCE ) { - return 0; - } - completed_requests++; - break; - case MCA_PML_BFO_SEND_PENDING_START: - endpoint = sendreq->req_endpoint; - send_succedded = false; - for(j = 0; j < (int)mca_bml_base_btl_array_get_size(&endpoint->btl_eager); j++) { - mca_bml_base_btl_t* bml_btl; - int rc; - - /* select a btl */ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl); - if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { - send_succedded = true; - completed_requests++; - break; - } - } - if( false == send_succedded ) { - add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true); - } - } - } - return completed_requests; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_rdma.c b/ompi/mca/pml/bfo/pml_bfo_rdma.c deleted file mode 100644 index 0bd99849073..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_rdma.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/bml/bml.h" -#include "opal/mca/mpool/mpool.h" -#include "pml_bfo.h" -#include "pml_bfo_rdma.h" - -/* Use this registration if no registration needed for a BTL instead of NULL. - * This will help other code to distinguish case when memory is not registered - * from case when registration is not needed */ -static mca_mpool_base_registration_t pml_bfo_dummy_reg; - -/* - * Check to see if memory is registered or can be registered. Build a - * set of registrations on the request. - */ - -size_t mca_pml_bfo_rdma_btls( - mca_bml_base_endpoint_t* bml_endpoint, - unsigned char* base, - size_t size, - mca_pml_bfo_com_btl_t* rdma_btls) -{ - int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - double weight_total = 0; - int num_btls_used = 0, n; - - /* shortcut when there are no rdma capable btls */ - if(num_btls == 0) { - return 0; - } - - /* check to see if memory is registered */ - for(n = 0; n < num_btls && num_btls_used < mca_pml_bfo.max_rdma_per_request; - n++) { - mca_bml_base_btl_t* bml_btl = - mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, - (bml_endpoint->btl_rdma_index + n) % num_btls); - mca_mpool_base_registration_t* reg = &pml_bfo_dummy_reg; - mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool; - - if( NULL != btl_mpool ) { - if(!mca_pml_bfo.leave_pinned) { - /* look through existing registrations */ - btl_mpool->mpool_find(btl_mpool, base, size, ®); - } else { - /* register the memory */ - btl_mpool->mpool_register(btl_mpool, base, size, 0, ®); - } - - if(NULL == reg) - continue; - } - - rdma_btls[num_btls_used].bml_btl = bml_btl; - rdma_btls[num_btls_used].btl_reg = reg; - weight_total += bml_btl->btl_weight; - num_btls_used++; - } - - /* if we don't use leave_pinned and all BTLs that already have this memory - * registered amount to less then half of available bandwidth - fall back to - * pipeline protocol */ - if(0 == num_btls_used || (!mca_pml_bfo.leave_pinned && weight_total < 0.5)) - return 0; - - mca_pml_bfo_calc_weighted_length(rdma_btls, num_btls_used, size, - weight_total); - - bml_endpoint->btl_rdma_index = (bml_endpoint->btl_rdma_index + 1) % num_btls; - return num_btls_used; -} - -size_t mca_pml_bfo_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint, - size_t size, - mca_pml_bfo_com_btl_t* rdma_btls ) -{ - int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - double weight_total = 0; - - for(i = 0; i < num_btls && i < mca_pml_bfo.max_rdma_per_request; i++) { - rdma_btls[i].bml_btl = - mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); - if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool) - rdma_btls[i].btl_reg = NULL; - else - rdma_btls[i].btl_reg = &pml_bfo_dummy_reg; - - weight_total += rdma_btls[i].bml_btl->btl_weight; - } - - mca_pml_bfo_calc_weighted_length(rdma_btls, i, size, weight_total); - - return i; -} diff --git a/ompi/mca/pml/bfo/pml_bfo_rdma.h b/ompi/mca/pml/bfo/pml_bfo_rdma.h deleted file mode 100644 index abeb65a0f8b..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_rdma.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_RDMA_H -#define MCA_PML_BFO_RDMA_H - -struct mca_bml_base_endpoint_t; - -/* - * Of the set of available btls that support RDMA, - * find those that already have registrations - or - * register if required (for leave_pinned option) - */ -size_t mca_pml_bfo_rdma_btls(struct mca_bml_base_endpoint_t* endpoint, - unsigned char* base, size_t size, struct mca_pml_bfo_com_btl_t* btls); - -/* Choose RDMA BTLs to use for sending of a request by pipeline protocol. - * Calculate number of bytes to send through each BTL according to available - * bandwidth */ -size_t mca_pml_bfo_rdma_pipeline_btls(struct mca_bml_base_endpoint_t* endpoint, - size_t size, mca_pml_bfo_com_btl_t* rdma_btls); -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.c b/ompi/mca/pml/bfo/pml_bfo_rdmafrag.c deleted file mode 100644 index b99e30a8de5..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_bfo.h" -#include "pml_bfo_rdmafrag.h" - - -OBJ_CLASS_INSTANCE( - mca_pml_bfo_rdma_frag_t, - ompi_free_list_item_t, - NULL, - NULL); diff --git a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.h b/ompi/mca/pml/bfo/pml_bfo_rdmafrag.h deleted file mode 100644 index 883c16baa1f..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_RDMAFRAG_H -#define MCA_PML_BFO_RDMAFRAG_H - -#include "pml_bfo_hdr.h" -#include "opal/mca/mpool/base/base.h" - -BEGIN_C_DECLS - -typedef enum { - MCA_PML_BFO_RDMA_PUT, - MCA_PML_BFO_RDMA_GET -} mca_pml_bfo_rdma_state_t; - -struct mca_pml_bfo_rdma_frag_t { - opal_free_list_item_t super; - mca_bml_base_btl_t* rdma_bml; -#if PML_BFO - mca_btl_base_module_t* rdma_btl; -#endif /* PML_BFO */ - mca_pml_bfo_hdr_t rdma_hdr; - mca_pml_bfo_rdma_state_t rdma_state; - size_t rdma_length; - uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS]; - void *rdma_req; - struct mca_bml_base_endpoint_t* rdma_ep; - opal_convertor_t convertor; - struct mca_mpool_base_registration_t* reg; - uint32_t retries; -}; -typedef struct mca_pml_bfo_rdma_frag_t mca_pml_bfo_rdma_frag_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_rdma_frag_t); - - -#define MCA_PML_BFO_RDMA_FRAG_ALLOC(frag) \ -do { \ - opal_free_list_item_t* item; \ - OPAL_FREE_LIST_WAIT_MT(&mca_pml_bfo.rdma_frags, item); \ - frag = (mca_pml_bfo_rdma_frag_t*)item; \ -} while(0) - -#define MCA_PML_BFO_RDMA_FRAG_RETURN(frag) \ -do { \ - /* return fragment */ \ - OPAL_FREE_LIST_RETURN_MT(&mca_pml_bfo.rdma_frags, \ - (opal_free_list_item_t*)frag); \ -} while(0) - - -END_C_DECLS - -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c b/ompi/mca/pml/bfo/pml_bfo_recvfrag.c deleted file mode 100644 index c7216c0d538..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c +++ /dev/null @@ -1,743 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2006-2008 University of Houston. All rights reserved. - * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - */ - -#include "ompi_config.h" - -#include "opal/class/opal_list.h" -#include "opal/threads/mutex.h" -#include "opal/prefetch.h" - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/peruse/peruse-internal.h" -#include "ompi/memchecker.h" - -#include "pml_bfo.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_recvfrag.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_hdr.h" -#if PML_BFO -#include "pml_bfo_failover.h" -#endif /* PML_BFO */ - -OBJ_CLASS_INSTANCE( mca_pml_bfo_buffer_t, - ompi_free_list_item_t, - NULL, - NULL ); - -OBJ_CLASS_INSTANCE( mca_pml_bfo_recv_frag_t, - opal_list_item_t, - NULL, - NULL ); - -/** - * Static functions. - */ - -/** - * Append a unexpected descriptor to a queue. This function will allocate and - * initialize the fragment (if necessary) and then will add it to the specified - * queue. The allocated fragment is not returned to the caller. - */ -static void -append_frag_to_list(opal_list_t *queue, mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, mca_pml_bfo_recv_frag_t* frag) -{ - if(NULL == frag) { - MCA_PML_BFO_RECV_FRAG_ALLOC(frag); - MCA_PML_BFO_RECV_FRAG_INIT(frag, hdr, segments, num_segments, btl); - } - opal_list_append(queue, (opal_list_item_t*)frag); -} - -/** - * Match incoming recv_frags against posted receives. - * Supports out of order delivery. - * - * @param frag_header (IN) Header of received recv_frag. - * @param frag_desc (IN) Received recv_frag descriptor. - * @param match_made (OUT) Flag indicating wether a match was made. - * @param additional_matches (OUT) List of additional matches - * @return OMPI_SUCCESS or error status on failure. - */ -static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, - mca_btl_base_segment_t* segments, - size_t num_segments, - int type); - -static mca_pml_bfo_recv_request_t* -match_one(mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, ompi_communicator_t *comm_ptr, - mca_pml_bfo_comm_proc_t *proc, - mca_pml_bfo_recv_frag_t* frag); - -void mca_pml_bfo_recv_frag_callback_match(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_match_hdr_t* hdr = (mca_pml_bfo_match_hdr_t*)segments->seg_addr.pval; - ompi_communicator_t *comm_ptr; - mca_pml_bfo_recv_request_t *match = NULL; - mca_pml_bfo_comm_t *comm; - mca_pml_bfo_comm_proc_t *proc; - size_t num_segments = des->des_local_count; - size_t bytes_received = 0; - - assert(num_segments <= MCA_BTL_DES_MAX_SEGMENTS); - - if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_BFO_MATCH_HDR_LEN) ) { - return; - } - bfo_hdr_ntoh(((mca_pml_bfo_hdr_t*) hdr), MCA_PML_BFO_HDR_TYPE_MATCH); - - /* communicator pointer */ - comm_ptr = ompi_comm_lookup(hdr->hdr_ctx); - if(OPAL_UNLIKELY(NULL == comm_ptr)) { - /* This is a special case. A message for a not yet existing - * communicator can happens. Instead of doing a matching we - * will temporarily add it the a pending queue in the PML. - * Later on, when the communicator is completely instantiated, - * this pending queue will be searched and all matching fragments - * moved to the right communicator. - */ - append_frag_to_list( &mca_pml_bfo.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); - return; - } - comm = (mca_pml_bfo_comm_t *)comm_ptr->c_pml_comm; - - /* source sequence number */ - proc = &comm->procs[hdr->hdr_src]; - - /* We generate the MSG_ARRIVED event as soon as the PML is aware - * of a matching fragment arrival. Independing if it is received - * on the correct order or not. This will allow the tools to - * figure out if the messages are not received in the correct - * order (if multiple network interfaces). - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* get next expected message sequence number - if threaded - * run, lock to make sure that if another thread is processing - * a frag from the same message a match is made only once. - * Also, this prevents other posted receives (for a pair of - * end points) from being processed, and potentially "loosing" - * the fragment. - */ - OPAL_THREAD_LOCK(&comm->matching_lock); - - /* get sequence number of next message that can be processed */ - if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) || - (opal_list_get_size(&proc->frags_cant_match) > 0 ))) { - goto slow_path; - } - - /* This is the sequence number we were expecting, so we can try - * matching it to already posted receives. - */ - - /* We're now expecting the next sequence number. */ - proc->expected_sequence++; - - /* We generate the SEARCH_POSTED_QUEUE only when the message is - * received in the correct sequence. Otherwise, we delay the event - * generation until we reach the correct sequence number. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, NULL); - - /* The match is over. We generate the SEARCH_POSTED_Q_END here, - * before going into the mca_pml_bfo_check_cantmatch_for_match so - * we can make a difference for the searching time for all - * messages. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* release matching lock before processing fragment */ - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - if(OPAL_LIKELY(match)) { - bytes_received = segments->seg_len - OMPI_PML_BFO_MATCH_HDR_LEN; - match->req_recv.req_bytes_packed = bytes_received; - - MCA_PML_BFO_RECV_REQUEST_MATCHED(match, hdr); - if(match->req_bytes_expected > 0) { - struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; - uint32_t iov_count = 1; - - /* - * Make user buffer accessable(defined) before unpacking. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - match->req_recv.req_base.req_addr, - match->req_recv.req_base.req_count, - match->req_recv.req_base.req_datatype); - ); - - iov[0].iov_len = bytes_received; - iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval + - OMPI_PML_BFO_MATCH_HDR_LEN); - while (iov_count < num_segments) { - bytes_received += segments[iov_count].seg_len; - iov[iov_count].iov_len = segments[iov_count].seg_len; - iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval); - iov_count++; - } - opal_convertor_unpack( &match->req_recv.req_base.req_convertor, - iov, - &iov_count, - &bytes_received ); - match->req_bytes_received = bytes_received; - /* - * Unpacking finished, make the user buffer unaccessable again. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - match->req_recv.req_base.req_addr, - match->req_recv.req_base.req_count, - match->req_recv.req_base.req_datatype); - ); - } - - /* no need to check if complete we know we are.. */ - /* don't need a rmb as that is for checking */ - recv_request_pml_complete(match); - } - return; - - slow_path: - OPAL_THREAD_UNLOCK(&comm->matching_lock); -#if PML_BFO - if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) { - return; - } -#endif /* PML_BFO */ - mca_pml_bfo_recv_frag_match(btl, hdr, segments, - num_segments, MCA_PML_BFO_HDR_TYPE_MATCH); -} - - -void mca_pml_bfo_recv_frag_callback_rndv(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RNDV); - mca_pml_bfo_recv_frag_match(btl, &hdr->hdr_match, segments, - des->des_local_count, MCA_PML_BFO_HDR_TYPE_RNDV); - return; -} - -void mca_pml_bfo_recv_frag_callback_rget(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RGET); - mca_pml_bfo_recv_frag_match(btl, &hdr->hdr_match, segments, - des->des_local_count, MCA_PML_BFO_HDR_TYPE_RGET); - return; -} - - - -void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_ACK); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_ack.hdr_src_req.pval; - sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; -#if PML_BFO - MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq); -#endif /* PML_BFO */ - - /* if the request should be delivered entirely by copy in/out - * then throttle sends */ - if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_NORDMA) - sendreq->req_throttle_sends = true; - - mca_pml_bfo_send_request_copy_in_out(sendreq, - hdr->hdr_ack.hdr_send_offset, - sendreq->req_send.req_bytes_packed - - hdr->hdr_ack.hdr_send_offset); - - if (sendreq->req_state != 0) { - /* Typical receipt of an ACK message causes req_state to be - * decremented. However, a send request that started as an - * RGET request can become a RNDV. For example, when the - * receiver determines that its receive buffer is not - * contiguous and therefore cannot support the RGET - * protocol. A send request that started with the RGET - * protocol has req_state == 0 and as such should not be - * decremented. - */ - OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); - } - - if(send_request_pml_complete_check(sendreq) == false) - mca_pml_bfo_send_request_schedule(sendreq); - - return; -} - -void mca_pml_bfo_recv_frag_callback_frag(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_recv_request_t* recvreq; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FRAG); - recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval; -#if PML_BFO - MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq); -#endif /* PML_BFO */ - mca_pml_bfo_recv_request_progress_frag(recvreq,btl,segments,des->des_local_count); - - return; -} - - -void mca_pml_bfo_recv_frag_callback_put(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_send_request_t* sendreq; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_PUT); - sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_rdma.hdr_req.pval; -#if PML_BFO - MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq); -#endif /* PML_BFO */ - mca_pml_bfo_send_request_put(sendreq,btl,&hdr->hdr_rdma); - - return; -} - - -void mca_pml_bfo_recv_frag_callback_fin(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_local; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_btl_base_descriptor_t* rdma; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FIN); - rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval; -#if PML_BFO - if (true == mca_pml_bfo_is_duplicate_fin(hdr, rdma, btl)) { - return; - } -#endif /* PML_BFO */ - rdma->des_cbfunc(btl, NULL, rdma, - hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS); - - return; -} - - - -#define PML_MAX_SEQ ~((mca_pml_sequence_t)0); - -static inline mca_pml_bfo_recv_request_t* get_posted_recv(opal_list_t *queue) -{ - if(opal_list_get_size(queue) == 0) - return NULL; - - return (mca_pml_bfo_recv_request_t*)opal_list_get_first(queue); -} - -static inline mca_pml_bfo_recv_request_t* get_next_posted_recv( - opal_list_t *queue, - mca_pml_bfo_recv_request_t* req) -{ - opal_list_item_t *i = opal_list_get_next((opal_list_item_t*)req); - - if(opal_list_get_end(queue) == i) - return NULL; - - return (mca_pml_bfo_recv_request_t*)i; -} - -static mca_pml_bfo_recv_request_t *match_incomming( - mca_pml_bfo_match_hdr_t *hdr, mca_pml_bfo_comm_t *comm, - mca_pml_bfo_comm_proc_t *proc) -{ - mca_pml_bfo_recv_request_t *specific_recv, *wild_recv; - mca_pml_sequence_t wild_recv_seq, specific_recv_seq; - int tag = hdr->hdr_tag; - - specific_recv = get_posted_recv(&proc->specific_receives); - wild_recv = get_posted_recv(&comm->wild_receives); - - wild_recv_seq = wild_recv ? - wild_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ; - specific_recv_seq = specific_recv ? - specific_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ; - - /* they are equal only if both are PML_MAX_SEQ */ - while(wild_recv_seq != specific_recv_seq) { - mca_pml_bfo_recv_request_t **match; - opal_list_t *queue; - int req_tag; - mca_pml_sequence_t *seq; - - if (OPAL_UNLIKELY(wild_recv_seq < specific_recv_seq)) { - match = &wild_recv; - queue = &comm->wild_receives; - seq = &wild_recv_seq; - } else { - match = &specific_recv; - queue = &proc->specific_receives; - seq = &specific_recv_seq; - } - - req_tag = (*match)->req_recv.req_base.req_tag; - if(req_tag == tag || (req_tag == OMPI_ANY_TAG && tag >= 0)) { - opal_list_remove_item(queue, (opal_list_item_t*)(*match)); - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, - &((*match)->req_recv.req_base), PERUSE_RECV); - return *match; - } - - *match = get_next_posted_recv(queue, *match); - *seq = (*match) ? (*match)->req_recv.req_base.req_sequence : PML_MAX_SEQ; - } - - return NULL; -} - -static mca_pml_bfo_recv_request_t* -match_one(mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, ompi_communicator_t *comm_ptr, - mca_pml_bfo_comm_proc_t *proc, - mca_pml_bfo_recv_frag_t* frag) -{ - mca_pml_bfo_recv_request_t *match; - mca_pml_bfo_comm_t *comm = (mca_pml_bfo_comm_t *)comm_ptr->c_pml_comm; - - do { - match = match_incomming(hdr, comm, proc); - - /* if match found, process data */ - if(OPAL_LIKELY(NULL != match)) { - match->req_recv.req_base.req_proc = proc->ompi_proc; - - if(OPAL_UNLIKELY(MCA_PML_REQUEST_PROBE == match->req_recv.req_base.req_type)) { - /* complete the probe */ - mca_pml_bfo_recv_request_matched_probe(match, btl, segments, - num_segments); - /* attempt to match actual request */ - continue; - } else if (MCA_PML_REQUEST_MPROBE == match->req_recv.req_base.req_type) { - /* create a receive frag and associate it with the - request, which is then completed so that it can be - restarted later during mrecv */ - mca_pml_bfo_recv_frag_t *tmp; - if(NULL == frag) { - MCA_PML_BFO_RECV_FRAG_ALLOC(tmp); - MCA_PML_BFO_RECV_FRAG_INIT(tmp, hdr, segments, num_segments, btl); - } else { - tmp = frag; - } - - match->req_recv.req_base.req_addr = tmp; - mca_pml_bfo_recv_request_matched_probe(match, btl, segments, - num_segments); - /* this frag is already processed, so we want to break out - of the loop and not end up back on the unexpected queue. */ - return NULL; - } - - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_MSG_MATCH_POSTED_REQ, - &(match->req_recv.req_base), PERUSE_RECV); - return match; - } - - /* if no match found, place on unexpected queue */ - append_frag_to_list(&proc->unexpected_frags, btl, hdr, segments, - num_segments, frag); - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - return NULL; - } while(true); -} - -static mca_pml_bfo_recv_frag_t* check_cantmatch_for_match(mca_pml_bfo_comm_proc_t *proc) -{ - mca_pml_bfo_recv_frag_t *frag; - - /* search the list for a fragment from the send with sequence - * number next_msg_seq_expected - */ - for(frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_first(&proc->frags_cant_match); - frag != (mca_pml_bfo_recv_frag_t*)opal_list_get_end(&proc->frags_cant_match); - frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_next(frag)) - { - mca_pml_bfo_match_hdr_t* hdr = &frag->hdr.hdr_match; - /* - * If the message has the next expected seq from that proc... - */ - if(hdr->hdr_seq != proc->expected_sequence) - continue; - - opal_list_remove_item(&proc->frags_cant_match, (opal_list_item_t*)frag); - return frag; - } - - return NULL; -} - -/** - * RCS/CTS receive side matching - * - * @param hdr list of parameters needed for matching - * This list is also embeded in frag, - * but this allows to save a memory copy when - * a match is made in this routine. (IN) - * @param frag pointer to receive fragment which we want - * to match (IN/OUT). If a match is not made, - * hdr is copied to frag. - * @param match_made parameter indicating if we matched frag/ - * hdr (OUT) - * @param additional_matches if a match is made with frag, we - * may be able to match fragments that previously - * have arrived out-of-order. If this is the - * case, the associated fragment descriptors are - * put on this list for further processing. (OUT) - * - * @return OMPI error code - * - * This routine is used to try and match a newly arrived message fragment - * to pre-posted receives. The following assumptions are made - * - fragments are received out of order - * - for long messages, e.g. more than one fragment, a RTS/CTS algorithm - * is used. - * - 2nd and greater fragments include a receive descriptor pointer - * - fragments may be dropped - * - fragments may be corrupt - * - this routine may be called simultaneously by more than one thread - */ -static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl, - mca_pml_bfo_match_hdr_t *hdr, - mca_btl_base_segment_t* segments, - size_t num_segments, - int type) -{ - /* local variables */ - uint16_t next_msg_seq_expected, frag_msg_seq; - ompi_communicator_t *comm_ptr; - mca_pml_bfo_recv_request_t *match = NULL; - mca_pml_bfo_comm_t *comm; - mca_pml_bfo_comm_proc_t *proc; - mca_pml_bfo_recv_frag_t* frag = NULL; - - /* communicator pointer */ - comm_ptr = ompi_comm_lookup(hdr->hdr_ctx); - if(OPAL_UNLIKELY(NULL == comm_ptr)) { - /* This is a special case. A message for a not yet existing - * communicator can happens. Instead of doing a matching we - * will temporarily add it the a pending queue in the PML. - * Later on, when the communicator is completely instantiated, - * this pending queue will be searched and all matching fragments - * moved to the right communicator. - */ - append_frag_to_list( &mca_pml_bfo.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); - return OMPI_SUCCESS; - } - comm = (mca_pml_bfo_comm_t *)comm_ptr->c_pml_comm; - - /* source sequence number */ - frag_msg_seq = hdr->hdr_seq; - proc = &comm->procs[hdr->hdr_src]; - - /** - * We generate the MSG_ARRIVED event as soon as the PML is aware of a matching - * fragment arrival. Independing if it is received on the correct order or not. - * This will allow the tools to figure out if the messages are not received in the - * correct order (if multiple network interfaces). - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* get next expected message sequence number - if threaded - * run, lock to make sure that if another thread is processing - * a frag from the same message a match is made only once. - * Also, this prevents other posted receives (for a pair of - * end points) from being processed, and potentially "loosing" - * the fragment. - */ - OPAL_THREAD_LOCK(&comm->matching_lock); - -#if PML_BFO - if(OPAL_UNLIKELY(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_RESTART)) { - if (NULL == (match = mca_pml_bfo_get_request(hdr))) { - return OMPI_SUCCESS; - } - } else { -#endif /* PML_BFO */ - /* get sequence number of next message that can be processed */ - next_msg_seq_expected = (uint16_t)proc->expected_sequence; - if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected)) - goto wrong_seq; - - /* - * This is the sequence number we were expecting, - * so we can try matching it to already posted - * receives. - */ - -out_of_order_match: - /* We're now expecting the next sequence number. */ - proc->expected_sequence++; - - /** - * We generate the SEARCH_POSTED_QUEUE only when the message is received - * in the correct sequence. Otherwise, we delay the event generation until - * we reach the correct sequence number. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, frag); - - /** - * The match is over. We generate the SEARCH_POSTED_Q_END here, before going - * into the mca_pml_bfo_check_cantmatch_for_match so we can make a difference - * for the searching time for all messages. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* release matching lock before processing fragment */ - OPAL_THREAD_UNLOCK(&comm->matching_lock); - -#if PML_BFO - } -#endif /* PML_BFO */ - if(OPAL_LIKELY(match)) { - switch(type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - mca_pml_bfo_recv_request_progress_match(match, btl, segments, num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - mca_pml_bfo_recv_request_progress_rndv(match, btl, segments, num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - mca_pml_bfo_recv_request_progress_rget(match, btl, segments, num_segments); - break; - } - - if(OPAL_UNLIKELY(frag)) - MCA_PML_BFO_RECV_FRAG_RETURN(frag); - } - - /* - * Now that new message has arrived, check to see if - * any fragments on the c_c_frags_cant_match list - * may now be used to form new matchs - */ - if(OPAL_UNLIKELY(opal_list_get_size(&proc->frags_cant_match) > 0)) { - OPAL_THREAD_LOCK(&comm->matching_lock); - if((frag = check_cantmatch_for_match(proc))) { - hdr = &frag->hdr.hdr_match; - segments = frag->segments; - num_segments = frag->num_segments; - btl = frag->btl; - type = hdr->hdr_common.hdr_type; - goto out_of_order_match; - } - OPAL_THREAD_UNLOCK(&comm->matching_lock); - } - - return OMPI_SUCCESS; -wrong_seq: - /* - * This message comes after the next expected, so it - * is ahead of sequence. Save it for later. - */ -#if PML_BFO - if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) { - return OMPI_SUCCESS; - } -#endif /* PML_BFO */ - append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments, - num_segments, NULL); - OPAL_THREAD_UNLOCK(&comm->matching_lock); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_recvfrag.h b/ompi/mca/pml/bfo/pml_bfo_recvfrag.h deleted file mode 100644 index 72c557daac1..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_recvfrag.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_BFO_RECVFRAG_H -#define MCA_PML_BFO_RECVFRAG_H - -#include "pml_bfo_hdr.h" - -BEGIN_C_DECLS - -struct mca_pml_bfo_buffer_t { - size_t len; - void * addr; -}; -typedef struct mca_pml_bfo_buffer_t mca_pml_bfo_buffer_t; - - -struct mca_pml_bfo_recv_frag_t { - opal_free_list_item_t super; - mca_pml_bfo_hdr_t hdr; - size_t num_segments; - mca_btl_base_module_t* btl; - mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS]; - mca_pml_bfo_buffer_t buffers[MCA_BTL_DES_MAX_SEGMENTS]; - unsigned char addr[1]; -}; -typedef struct mca_pml_bfo_recv_frag_t mca_pml_bfo_recv_frag_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_frag_t); - - -#define MCA_PML_BFO_RECV_FRAG_ALLOC(frag) \ -do { \ - opal_free_list_item_t* item; \ - OPAL_FREE_LIST_WAIT_MT(&mca_pml_bfo.recv_frags, item); \ - frag = (mca_pml_bfo_recv_frag_t*)item; \ -} while(0) - - -#define MCA_PML_BFO_RECV_FRAG_INIT(frag, hdr, segs, cnt, btl ) \ -do { \ - size_t i, _size; \ - mca_btl_base_segment_t* macro_segments = frag->segments; \ - mca_pml_bfo_buffer_t* buffers = frag->buffers; \ - unsigned char* _ptr = (unsigned char*)frag->addr; \ - /* init recv_frag */ \ - frag->btl = btl; \ - frag->hdr = *(mca_pml_bfo_hdr_t*)hdr; \ - frag->num_segments = 1; \ - _size = segs[0].seg_len; \ - for( i = 1; i < cnt; i++ ) { \ - _size += segs[i].seg_len; \ - } \ - /* copy over data */ \ - if(_size <= mca_pml_bfo.unexpected_limit ) { \ - macro_segments[0].seg_addr.pval = frag->addr; \ - } else { \ - buffers[0].len = _size; \ - buffers[0].addr = (char*) \ - mca_pml_bfo.allocator->alc_alloc( mca_pml_bfo.allocator, \ - buffers[0].len, \ - 0, NULL); \ - _ptr = (unsigned char*)(buffers[0].addr); \ - macro_segments[0].seg_addr.pval = buffers[0].addr; \ - } \ - macro_segments[0].seg_len = _size; \ - for( i = 0; i < cnt; i++ ) { \ - memcpy( _ptr, segs[i].seg_addr.pval, segs[i].seg_len); \ - _ptr += segs[i].seg_len; \ - } \ - } while(0) - - -#define MCA_PML_BFO_RECV_FRAG_RETURN(frag) \ -do { \ - if( frag->segments[0].seg_len > mca_pml_bfo.unexpected_limit ) { \ - /* return buffers */ \ - mca_pml_bfo.allocator->alc_free( mca_pml_bfo.allocator, \ - frag->buffers[0].addr ); \ - } \ - frag->num_segments = 0; \ - \ - /* return recv_frag */ \ - OPAL_FREE_LIST_RETURN(&mca_pml_bfo.recv_frags, \ - (opal_free_list_item_t*)frag); \ - } while(0) - - -/** - * Callback from BTL on receipt of a recv_frag (match). - */ - -extern void mca_pml_bfo_recv_frag_callback_match( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -/** - * Callback from BTL on receipt of a recv_frag (rndv). - */ - -extern void mca_pml_bfo_recv_frag_callback_rndv( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (rget). - */ - -extern void mca_pml_bfo_recv_frag_callback_rget( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -/** - * Callback from BTL on receipt of a recv_frag (ack). - */ - -extern void mca_pml_bfo_recv_frag_callback_ack( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (frag). - */ - -extern void mca_pml_bfo_recv_frag_callback_frag( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (put). - */ - -extern void mca_pml_bfo_recv_frag_callback_put( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (fin). - */ - -extern void mca_pml_bfo_recv_frag_callback_fin( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - - -END_C_DECLS - -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.c b/ompi/mca/pml/bfo/pml_bfo_recvreq.c deleted file mode 100644 index c0658f10ef3..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.c +++ /dev/null @@ -1,1165 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/mca/mpool/mpool.h" -#include "opal/util/arch.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/bml/bml.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_recvfrag.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_rdmafrag.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/memchecker.h" -#if OPAL_CUDA_SUPPORT -#include "opal/datatype/opal_datatype_cuda.h" -#include "opal/mca/common/cuda/common_cuda.h" -#endif /* OPAL_CUDA_SUPPORT */ - -#if OPAL_CUDA_SUPPORT -int mca_pml_bfo_cuda_need_buffers(mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl); -#endif /* OPAL_CUDA_SUPPORT */ -#if PML_BFO -#include "pml_bfo_failover.h" -#endif /* PML_BFO */ - -void mca_pml_bfo_recv_request_process_pending(void) -{ - mca_pml_bfo_recv_request_t* recvreq; - int rc, i, s = (int)opal_list_get_size(&mca_pml_bfo.recv_pending); - - for(i = 0; i < s; i++) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - recvreq = (mca_pml_bfo_recv_request_t*) - opal_list_remove_first(&mca_pml_bfo.recv_pending); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - if( OPAL_UNLIKELY(NULL == recvreq) ) - break; - recvreq->req_pending = false; - rc = mca_pml_bfo_recv_request_schedule_exclusive(recvreq, NULL); - if(OMPI_ERR_OUT_OF_RESOURCE == rc) - break; - } -} - -static int mca_pml_bfo_recv_request_free(struct ompi_request_t** request) -{ - mca_pml_bfo_recv_request_t* recvreq = *(mca_pml_bfo_recv_request_t**)request; - - if(false == recvreq->req_recv.req_base.req_free_called) { - - recvreq->req_recv.req_base.req_free_called = true; - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, - &(recvreq->req_recv.req_base), PERUSE_RECV ); - - if( true == recvreq->req_recv.req_base.req_pml_complete ) { - /* make buffer defined when the request is compeleted, - and before releasing the objects. */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - MCA_PML_BFO_RECV_REQUEST_RETURN( recvreq ); - } - - } - *request = MPI_REQUEST_NULL; - return OMPI_SUCCESS; -} - -static int mca_pml_bfo_recv_request_cancel(struct ompi_request_t* ompi_request, int complete) -{ - mca_pml_bfo_recv_request_t* request = (mca_pml_bfo_recv_request_t*)ompi_request; - mca_pml_bfo_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm; - - if( true == ompi_request->req_complete ) { /* way to late to cancel this one */ - /* - * Receive request completed, make user buffer accessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - request->req_recv.req_base.req_addr, - request->req_recv.req_base.req_count, - request->req_recv.req_base.req_datatype); - ); - return OMPI_SUCCESS; - } - - /* The rest should be protected behind the match logic lock */ - OPAL_THREAD_LOCK(&comm->matching_lock); - if( OMPI_ANY_TAG == ompi_request->req_status.MPI_TAG ) { /* the match has not been already done */ - if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { - opal_list_remove_item( &comm->wild_receives, (opal_list_item_t*)request ); - } else { - mca_pml_bfo_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer; - opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); - } - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, - &(request->req_recv.req_base), PERUSE_RECV ); - /** - * As now the PML is done with this request we have to force the pml_complete - * to true. Otherwise, the request will never be freed. - */ - request->req_recv.req_base.req_pml_complete = true; - } - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - ompi_request->req_status._cancelled = true; - /* This macro will set the req_complete to true so the MPI Test/Wait* functions - * on this request will be able to complete. As the status is marked as - * cancelled the cancel state will be detected. - */ - MCA_PML_BFO_RECV_REQUEST_MPI_COMPLETE(request); - /* - * Receive request cancelled, make user buffer accessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - request->req_recv.req_base.req_addr, - request->req_recv.req_base.req_count, - request->req_recv.req_base.req_datatype); - ); - return OMPI_SUCCESS; -} - -static void mca_pml_bfo_recv_request_construct(mca_pml_bfo_recv_request_t* request) -{ - request->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; - request->req_recv.req_base.req_ompi.req_start = mca_pml_bfo_start; - request->req_recv.req_base.req_ompi.req_free = mca_pml_bfo_recv_request_free; - request->req_recv.req_base.req_ompi.req_cancel = mca_pml_bfo_recv_request_cancel; - request->req_rdma_cnt = 0; - OBJ_CONSTRUCT(&request->lock, opal_mutex_t); -} - -OBJ_CLASS_INSTANCE( - mca_pml_bfo_recv_request_t, - mca_pml_base_recv_request_t, - mca_pml_bfo_recv_request_construct, - NULL); - - -/* - * Release resources. - */ - -static void mca_pml_bfo_recv_ctl_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - -#if PML_BFO - if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) { - mca_pml_bfo_check_recv_ctl_completion_status(btl, des, status); - } - MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des); -#endif /* PML_BFO */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/* - * Put operation has completed remotely - update request status - */ - -static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)des->des_cbdata; - size_t bytes_received = 0; - - if( OPAL_LIKELY(status == OMPI_SUCCESS) ) { - bytes_received = mca_pml_bfo_compute_segment_length (btl->btl_seg_size, - (void *) des->des_remote, - des->des_remote_count, 0); - } - OPAL_THREAD_SUB_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1); - -#if PML_BFO - btl->btl_free(btl, des); - MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq); - MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, "PUT"); -#else /* PML_BFO */ - mca_bml_base_free(bml_btl, des); -#endif /* PML_BFO */ - - /* check completion status */ - OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); - if(recv_request_pml_complete_check(recvreq) == false && - recvreq->req_rdma_offset < recvreq->req_send_offset) { - /* schedule additional rdma operations */ - mca_pml_bfo_recv_request_schedule(recvreq, bml_btl); - } - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/* - * - */ - -int mca_pml_bfo_recv_request_ack_send_btl( - ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, - uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset, - bool nordma) -{ - mca_btl_base_descriptor_t* des; - mca_pml_bfo_ack_hdr_t* ack; - int rc; - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_ack_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* fill out header */ - ack = (mca_pml_bfo_ack_hdr_t*)des->des_local->seg_addr.pval; - ack->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_ACK; - ack->hdr_common.hdr_flags = nordma ? MCA_PML_BFO_HDR_FLAGS_NORDMA : 0; - ack->hdr_src_req.lval = hdr_src_req; - ack->hdr_dst_req.pval = hdr_dst_req; - ack->hdr_send_offset = hdr_send_offset; - - bfo_hdr_hton(ack, MCA_PML_BFO_HDR_TYPE_ACK, proc); - - /* initialize descriptor */ - des->des_cbfunc = mca_pml_bfo_recv_ctl_completion; -#if PML_BFO - des->des_cbdata = hdr_dst_req; -#endif /* PML_BFO */ - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_ACK); - if( OPAL_LIKELY( rc >= 0 ) ) { -#if PML_BFO - if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) && - (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) { - ((mca_pml_bfo_recv_request_t *)hdr_dst_req)->req_events++; - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des); - return OMPI_ERR_OUT_OF_RESOURCE; -} - -static int mca_pml_bfo_recv_request_ack( - mca_pml_bfo_recv_request_t* recvreq, - mca_pml_bfo_rendezvous_hdr_t* hdr, - size_t bytes_received) -{ - ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = NULL; - - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - /* by default copy everything */ - recvreq->req_send_offset = bytes_received; - if(hdr->hdr_msg_length > bytes_received) { - size_t rdma_num = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - /* - * lookup request buffer to determine if memory is already - * registered. - */ - - if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == 0 && - hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_CONTIG && - rdma_num != 0) { - unsigned char *base; - opal_convertor_get_current_pointer( &recvreq->req_recv.req_base.req_convertor, (void**)&(base) ); - - if(hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_PIN) - recvreq->req_rdma_cnt = mca_pml_bfo_rdma_btls(bml_endpoint, - base, recvreq->req_recv.req_bytes_packed, - recvreq->req_rdma ); - else - recvreq->req_rdma_cnt = 0; - - /* memory is already registered on both sides */ - if (recvreq->req_rdma_cnt != 0) { - recvreq->req_send_offset = hdr->hdr_msg_length; - /* are rdma devices available for long rdma protocol */ - } else if(bml_endpoint->btl_send_limit < hdr->hdr_msg_length) { - /* use convertor to figure out the rdma offset for this request */ - recvreq->req_send_offset = hdr->hdr_msg_length - - bml_endpoint->btl_pipeline_send_length; - - if(recvreq->req_send_offset < bytes_received) - recvreq->req_send_offset = bytes_received; - - /* use converter to figure out the rdma offset for this - * request */ - opal_convertor_set_position(&recvreq->req_recv.req_base.req_convertor, - &recvreq->req_send_offset); - - recvreq->req_rdma_cnt = - mca_pml_bfo_rdma_pipeline_btls(bml_endpoint, - recvreq->req_send_offset - bytes_received, - recvreq->req_rdma); - } - } - /* nothing to send by copy in/out - no need to ack */ - if(recvreq->req_send_offset == hdr->hdr_msg_length) - return OMPI_SUCCESS; - } - /* let know to shedule function there is no need to put ACK flag */ - recvreq->req_ack_sent = true; - return mca_pml_bfo_recv_request_ack_send(proc, hdr->hdr_src_req.lval, - recvreq, recvreq->req_send_offset, - recvreq->req_send_offset == bytes_received); -} - -/** - * Return resources used by the RDMA - */ - -static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata; - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)frag->rdma_req; - -#if PML_BFO - if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) { - recvreq->req_events--; - } -#endif /* PML_BFO */ - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq); -#else /* PML_BFO */ - /* TSW - FIX */ - OMPI_ERROR_LOG(status); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } -#if PML_BFO - MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl); - MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, "RDMA write"); -#endif /* PML_BFO */ - - mca_pml_bfo_send_fin(recvreq->req_recv.req_base.req_proc, - bml_btl, - frag->rdma_hdr.hdr_rget.hdr_des, -#if PML_BFO - des->order, 0, (uint16_t)recvreq->req_msgseq, recvreq->req_restartseq, - recvreq->req_recv.req_base.req_comm->c_contextid, - recvreq->req_recv.req_base.req_comm->c_my_rank); -#else /* PML_BFO */ - des->order, 0); -#endif /* PML_BFO */ - - /* is receive request complete */ - OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); - recv_request_pml_complete_check(recvreq); - - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); - - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - - -/* - * - */ -int mca_pml_bfo_recv_request_get_frag( mca_pml_bfo_rdma_frag_t* frag ) -{ - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)frag->rdma_req; - mca_bml_base_btl_t* bml_btl = frag->rdma_bml; - mca_btl_base_descriptor_t* descriptor; - size_t save_size = frag->rdma_length; - int rc; - - /* prepare descriptor */ - mca_bml_base_prepare_dst( bml_btl, - NULL, - &recvreq->req_recv.req_base.req_convertor, - MCA_BTL_NO_ORDER, - 0, - &frag->rdma_length, - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK | - MCA_BTL_DES_FLAGS_GET, - &descriptor ); - if( OPAL_UNLIKELY(NULL == descriptor) ) { - frag->rdma_length = save_size; - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - descriptor->des_remote = (mca_btl_base_segment_t *) frag->rdma_segs; - descriptor->des_remote_count = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt; - descriptor->des_cbfunc = mca_pml_bfo_rget_completion; - descriptor->des_cbdata = frag; - - PERUSE_TRACE_COMM_OMPI_EVENT(PERUSE_COMM_REQ_XFER_CONTINUE, - &(recvreq->req_recv.req_base), - frag->rdma_length, PERUSE_RECV); - - /* queue up get request */ - rc = mca_bml_base_get(bml_btl,descriptor); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - mca_bml_base_free(bml_btl, descriptor); - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } else { - OMPI_ERROR_LOG(rc); - ompi_rte_abort(-1, NULL); - } - } -#if PML_BFO - if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) && - (descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) { - recvreq->req_events++; - } -#endif /* PML_BFO */ - - return OMPI_SUCCESS; -} - - - - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_bfo_recv_request_progress_frag( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_received, data_offset = 0; - size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_BFO_RECV_REQUEST_UNPACK */ - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - bytes_received = mca_pml_bfo_compute_segment_length_base (segments, num_segments, - sizeof(mca_pml_bfo_frag_hdr_t)); - data_offset = hdr->hdr_frag.hdr_frag_offset; - /* - * Make user buffer accessable(defined) before unpacking. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - MCA_PML_BFO_RECV_REQUEST_UNPACK( recvreq, - segments, - num_segments, - sizeof(mca_pml_bfo_frag_hdr_t), - data_offset, - bytes_received, - bytes_delivered ); - /* - * Unpacking finished, make the user buffer unaccessable again. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); - /* check completion status */ - if(recv_request_pml_complete_check(recvreq) == false && - recvreq->req_rdma_offset < recvreq->req_send_offset) { - /* schedule additional rdma operations */ - mca_pml_bfo_recv_request_schedule(recvreq, NULL); - } -} - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - mca_pml_bfo_rget_hdr_t* hdr = (mca_pml_bfo_rget_hdr_t*)segments->seg_addr.pval; - mca_bml_base_endpoint_t* bml_endpoint = NULL; - mca_pml_bfo_rdma_frag_t* frag; - size_t i, size = 0; - - recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length; - -#if PML_BFO - recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req; -#endif /* PML_BFO */ - MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match); - - /* if receive buffer is not contiguous we can't just RDMA read into it, so - * fall back to copy in/out protocol. It is a pity because buffer on the - * sender side is already registered. We need to be smarter here, perhaps - * do couple of RDMA reads */ - if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) { -#if OPAL_CUDA_SUPPORT - if (mca_pml_bfo_cuda_need_buffers(recvreq, btl)) { - mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0); - return; - } -#else /* OPAL_CUDA_SUPPORT */ - mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0); - return; -#endif /* OPAL_CUDA_SUPPORT */ - } - - MCA_PML_BFO_RDMA_FRAG_ALLOC(frag); - if( OPAL_UNLIKELY(NULL == frag) ) { - /* GLB - FIX */ - OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); - ompi_rte_abort(-1, NULL); - } - - /* lookup bml datastructures */ - bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs)); - - /* allocate/initialize a fragment */ - memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt); - - for(i = 0; i < hdr->hdr_seg_cnt; i++) { - mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *)(frag->rdma_segs + i * btl->btl_seg_size); - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - if ((recvreq->req_recv.req_base.req_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN) != - (ompi_proc_local()->super.proc_arch & OPAL_ARCH_ISBIGENDIAN)) { - size += opal_swap_bytes4(seg->seg_len); - } else -#endif - { - size += seg->seg_len; - } - } -#if PML_BFO - frag->rdma_btl = btl; -#endif /* PML_BFO */ - frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); -#if OPAL_CUDA_SUPPORT - if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { - if (recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) { - /* Check to see if this is a CUDA get */ - if (btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET) { - frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_send, btl); - } - if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { - opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - } else { - /* Just default back to send and receive. Must be mix of GPU and HOST memory. */ - mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, 0); - return; - } - } -#else /* OPAL_CUDA_SUPPORT */ - if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { - opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } -#endif /* OPAL_CUDA_SUPPORT */ - frag->rdma_hdr.hdr_rget = *hdr; - frag->rdma_req = recvreq; - frag->rdma_ep = bml_endpoint; - frag->rdma_length = size; - frag->rdma_state = MCA_PML_BFO_RDMA_GET; - frag->reg = NULL; - - mca_pml_bfo_recv_request_get_frag(frag); - return; -} - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_bfo_recv_request_progress_rndv( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_received; - size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_BFO_RECV_REQUEST_UNPACK */ - size_t data_offset = 0; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - bytes_received = mca_pml_bfo_compute_segment_length_base (segments, num_segments, - sizeof(mca_pml_bfo_rendezvous_hdr_t)); - - recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length; - recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req; - recvreq->req_rdma_offset = bytes_received; - MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match); - mca_pml_bfo_recv_request_ack(recvreq, &hdr->hdr_rndv, bytes_received); - /** - * The PUT protocol do not attach any data to the original request. - * Therefore, we might want to avoid unpacking if there is nothing to - * unpack. - */ - if( 0 < bytes_received ) { - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - MCA_PML_BFO_RECV_REQUEST_UNPACK( recvreq, - segments, - num_segments, - sizeof(mca_pml_bfo_rendezvous_hdr_t), - data_offset, - bytes_received, - bytes_delivered ); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - } - OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_bytes_received, bytes_received); - /* check completion status */ - if(recv_request_pml_complete_check(recvreq) == false && - recvreq->req_rdma_offset < recvreq->req_send_offset) { - /* schedule additional rdma operations */ - mca_pml_bfo_recv_request_schedule(recvreq, NULL); - } -} - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ -void mca_pml_bfo_recv_request_progress_match( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_received, data_offset = 0; - size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_BFO_RECV_REQUEST_UNPACK */ - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - bytes_received = mca_pml_bfo_compute_segment_length_base (segments, num_segments, - OMPI_PML_BFO_MATCH_HDR_LEN); - - recvreq->req_recv.req_bytes_packed = bytes_received; - - MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match); - /* - * Make user buffer accessable(defined) before unpacking. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - MCA_PML_BFO_RECV_REQUEST_UNPACK( recvreq, - segments, - num_segments, - OMPI_PML_BFO_MATCH_HDR_LEN, - data_offset, - bytes_received, - bytes_delivered); - /* - * Unpacking finished, make the user buffer unaccessable again. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - /* - * No need for atomic here, as we know there is only one fragment - * for this request. - */ - recvreq->req_bytes_received += bytes_received; - recv_request_pml_complete(recvreq); -} - - -/** - * Handle completion of a probe request - */ - -void mca_pml_bfo_recv_request_matched_probe( mca_pml_bfo_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_packed = 0; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - bytes_packed = mca_pml_bfo_compute_segment_length_base (segments, num_segments, - OMPI_PML_BFO_MATCH_HDR_LEN); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - case MCA_PML_BFO_HDR_TYPE_RGET: - bytes_packed = hdr->hdr_rndv.hdr_msg_length; - break; - } - - /* set completion status */ - recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_match.hdr_tag; - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_match.hdr_src; - recvreq->req_bytes_received = bytes_packed; - recvreq->req_bytes_expected = bytes_packed; - - recv_request_pml_complete(recvreq); -} - - -/* - * Schedule RDMA protocol. - * -*/ - -int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq, - mca_bml_base_btl_t *start_bml_btl ) -{ - mca_bml_base_btl_t* bml_btl; - int num_tries = recvreq->req_rdma_cnt, num_fail = 0; - size_t i, prev_bytes_remaining = 0; - size_t bytes_remaining = recvreq->req_send_offset - - recvreq->req_rdma_offset; - - /* if starting bml_btl is provided schedule next fragment on it first */ - if(start_bml_btl != NULL) { - for(i = 0; i < recvreq->req_rdma_cnt; i++) { - if(recvreq->req_rdma[i].bml_btl != start_bml_btl) - continue; - /* something left to be send? */ - if( OPAL_LIKELY(recvreq->req_rdma[i].length) ) - recvreq->req_rdma_idx = i; - break; - } - } - - while(bytes_remaining > 0 && - recvreq->req_pipeline_depth < mca_pml_bfo.recv_pipeline_depth) { - size_t size, seg_size; - mca_pml_bfo_rdma_hdr_t* hdr; - mca_btl_base_descriptor_t* dst; - mca_btl_base_descriptor_t* ctl; - mca_mpool_base_registration_t * reg = NULL; - mca_btl_base_module_t* btl; - int rc, rdma_idx; - - if(prev_bytes_remaining == bytes_remaining) { - if(++num_fail == num_tries) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - if(false == recvreq->req_pending) { - opal_list_append(&mca_pml_bfo.recv_pending, - (opal_list_item_t*)recvreq); - recvreq->req_pending = true; - } - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - } else { - num_fail = 0; - prev_bytes_remaining = bytes_remaining; - } - - do { - rdma_idx = recvreq->req_rdma_idx; - bml_btl = recvreq->req_rdma[rdma_idx].bml_btl; - reg = recvreq->req_rdma[rdma_idx].btl_reg; - size = recvreq->req_rdma[rdma_idx].length; - if(++recvreq->req_rdma_idx >= recvreq->req_rdma_cnt) - recvreq->req_rdma_idx = 0; - } while(!size); - btl = bml_btl->btl; - - /* makes sure that we don't exceed BTL max rdma size - * if memory is not pinned already */ - if( (NULL == reg) && (btl->btl_rdma_pipeline_frag_size != 0) && - (size > btl->btl_rdma_pipeline_frag_size)) { - size = btl->btl_rdma_pipeline_frag_size; - } - - /* take lock to protect converter against concurrent access - * from unpack */ - OPAL_THREAD_LOCK(&recvreq->lock); - opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor, - &recvreq->req_rdma_offset ); - - /* prepare a descriptor for RDMA */ - mca_bml_base_prepare_dst(bml_btl, reg, - &recvreq->req_recv.req_base.req_convertor, - MCA_BTL_NO_ORDER, 0, &size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_FLAGS_PUT, &dst); - OPAL_THREAD_UNLOCK(&recvreq->lock); - - if(OPAL_UNLIKELY(dst == NULL)) { - continue; - } - - dst->des_cbfunc = mca_pml_bfo_put_completion; - dst->des_cbdata = recvreq; - - seg_size = btl->btl_seg_size * dst->des_local_count; - - /* prepare a descriptor for rdma control message */ - mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof(mca_pml_bfo_rdma_hdr_t) + seg_size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - - if( OPAL_UNLIKELY(NULL == ctl) ) { - mca_bml_base_free(bml_btl,dst); - continue; - } - ctl->des_cbfunc = mca_pml_bfo_recv_ctl_completion; -#if PML_BFO - ctl->des_cbdata = recvreq; -#endif /* PML_BFO */ - - /* fill in rdma header */ - hdr = (mca_pml_bfo_rdma_hdr_t*)ctl->des_local->seg_addr.pval; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_PUT; - hdr->hdr_common.hdr_flags = - (!recvreq->req_ack_sent) ? MCA_PML_BFO_HDR_TYPE_ACK : 0; - hdr->hdr_req = recvreq->remote_req_send; -#if PML_BFO - hdr->hdr_dst_req.pval = recvreq; /* only needed in the first put message */ -#endif /* PML_BFO */ - hdr->hdr_des.pval = dst; - hdr->hdr_rdma_offset = recvreq->req_rdma_offset; - hdr->hdr_seg_cnt = dst->des_local_count; - - /* copy segments */ - memmove (hdr + 1, dst->des_local, seg_size); - - if(!recvreq->req_ack_sent) - recvreq->req_ack_sent = true; - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_PUT, recvreq->req_recv.req_base.req_proc); - - PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE, - &(recvreq->req_recv.req_base), size, - PERUSE_RECV); - - /* send rdma request to peer */ - rc = mca_bml_base_send(bml_btl, ctl, MCA_PML_BFO_HDR_TYPE_PUT); - if( OPAL_LIKELY( rc >= 0 ) ) { -#if PML_BFO - if ((btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) && - (ctl->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) { - recvreq->req_events++; - } -#endif /* PML_BFO */ - /* update request state */ - recvreq->req_rdma_offset += size; - OPAL_THREAD_ADD_FETCH_SIZE_T(&recvreq->req_pipeline_depth, 1); - recvreq->req_rdma[rdma_idx].length -= size; - bytes_remaining -= size; - } else { - mca_bml_base_free(bml_btl,ctl); - mca_bml_base_free(bml_btl,dst); - } - } - - return OMPI_SUCCESS; -} - -#define IS_PROB_REQ(R) \ - ((MCA_PML_REQUEST_IPROBE == (R)->req_recv.req_base.req_type) || \ - (MCA_PML_REQUEST_PROBE == (R)->req_recv.req_base.req_type) || \ - (MCA_PML_REQUEST_IMPROBE == (R)->req_recv.req_base.req_type) || \ - (MCA_PML_REQUEST_MPROBE == (R)->req_recv.req_base.req_type)) -#define IS_MPROB_REQ(R) \ - ((MCA_PML_REQUEST_IMPROBE == (R)->req_recv.req_base.req_type) || \ - (MCA_PML_REQUEST_MPROBE == (R)->req_recv.req_base.req_type)) - -static inline void append_recv_req_to_queue(opal_list_t *queue, - mca_pml_bfo_recv_request_t *req) -{ - if(OPAL_UNLIKELY(req->req_recv.req_base.req_type == MCA_PML_REQUEST_IPROBE || - req->req_recv.req_base.req_type == MCA_PML_REQUEST_IMPROBE)) - return; - - opal_list_append(queue, (opal_list_item_t*)req); - - /** - * We don't want to generate this kind of event for MPI_Probe. Hopefully, - * the compiler will optimize out the empty if loop in the case where PERUSE - * support is not required by the user. - */ - if(req->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE || - req->req_recv.req_base.req_type != MCA_PML_REQUEST_MPROBE) { - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_INSERT_IN_POSTED_Q, - &(req->req_recv.req_base), PERUSE_RECV); - } -} - -/* - * this routine tries to match a posted receive. If a match is found, - * it places the request in the appropriate matched receive list. This - * function has to be called with the communicator matching lock held. -*/ -static mca_pml_bfo_recv_frag_t* -recv_req_match_specific_proc( const mca_pml_bfo_recv_request_t *req, - mca_pml_bfo_comm_proc_t *proc ) -{ - opal_list_t* unexpected_frags = &proc->unexpected_frags; - opal_list_item_t *i; - mca_pml_bfo_recv_frag_t* frag; - int tag = req->req_recv.req_base.req_tag; - - if(opal_list_get_size(unexpected_frags) == 0) - return NULL; - - if( OMPI_ANY_TAG == tag ) { - for (i = opal_list_get_first(unexpected_frags); - i != opal_list_get_end(unexpected_frags); - i = opal_list_get_next(i)) { - frag = (mca_pml_bfo_recv_frag_t*)i; - - if( frag->hdr.hdr_match.hdr_tag >= 0 ) - return frag; - } - } else { - for (i = opal_list_get_first(unexpected_frags); - i != opal_list_get_end(unexpected_frags); - i = opal_list_get_next(i)) { - frag = (mca_pml_bfo_recv_frag_t*)i; - - if( frag->hdr.hdr_match.hdr_tag == tag ) - return frag; - } - } - return NULL; -} - -/* - * this routine is used to try and match a wild posted receive - where - * wild is determined by the value assigned to the source process -*/ -static mca_pml_bfo_recv_frag_t* -recv_req_match_wild( mca_pml_bfo_recv_request_t* req, - mca_pml_bfo_comm_proc_t **p) -{ - mca_pml_bfo_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; - mca_pml_bfo_comm_proc_t* proc = comm->procs; - size_t i; - - /* - * Loop over all the outstanding messages to find one that matches. - * There is an outer loop over lists of messages from each - * process, then an inner loop over the messages from the - * process. - * - * In order to avoid starvation do this in a round-robin fashion. - */ - for (i = comm->last_probed + 1; i < comm->num_procs; i++) { - mca_pml_bfo_recv_frag_t* frag; - - /* loop over messages from the current proc */ - if((frag = recv_req_match_specific_proc(req, &proc[i]))) { - *p = &proc[i]; - comm->last_probed = i; - req->req_recv.req_base.req_proc = proc[i].ompi_proc; - prepare_recv_req_converter(req); - return frag; /* match found */ - } - } - for (i = 0; i <= comm->last_probed; i++) { - mca_pml_bfo_recv_frag_t* frag; - - /* loop over messages from the current proc */ - if((frag = recv_req_match_specific_proc(req, &proc[i]))) { - *p = &proc[i]; - comm->last_probed = i; - req->req_recv.req_base.req_proc = proc[i].ompi_proc; - prepare_recv_req_converter(req); - return frag; /* match found */ - } - } - - *p = NULL; - return NULL; -} - - -void mca_pml_bfo_recv_req_start(mca_pml_bfo_recv_request_t *req) -{ - mca_pml_bfo_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; - mca_pml_bfo_comm_proc_t* proc; - mca_pml_bfo_recv_frag_t* frag; - opal_list_t *queue; - mca_pml_bfo_hdr_t* hdr; - - /* init/re-init the request */ - req->req_lock = 0; - req->req_pipeline_depth = 0; - req->req_bytes_received = 0; - req->req_bytes_expected = 0; - /* What about req_rdma_cnt ? */ -#if PML_BFO - req->req_rdma_cnt = 0; - req->req_events = 0; - req->req_restartseq = 0; - req->req_errstate = 0; -#endif /* PML_BFO */ - req->req_rdma_idx = 0; - req->req_pending = false; - req->req_ack_sent = false; - - MCA_PML_BASE_RECV_START(&req->req_recv.req_base); - - OPAL_THREAD_LOCK(&comm->matching_lock); - /** - * The laps of time between the ACTIVATE event and the SEARCH_UNEX one include - * the cost of the request lock. - */ - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_BEGIN, - &(req->req_recv.req_base), PERUSE_RECV); - - /* assign sequence number */ - req->req_recv.req_base.req_sequence = comm->recv_sequence++; - - /* attempt to match posted recv */ - if(req->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { - frag = recv_req_match_wild(req, &proc); - queue = &comm->wild_receives; -#if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT - /* As we are in a homogeneous environment we know that all remote - * architectures are exactly the same as the local one. Therefore, - * we can safely construct the convertor based on the proc - * information of rank 0. - */ - if( NULL == frag ) { - req->req_recv.req_base.req_proc = ompi_proc_local_proc; - prepare_recv_req_converter(req); - } -#endif /* !OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ - } else { - proc = &comm->procs[req->req_recv.req_base.req_peer]; - req->req_recv.req_base.req_proc = proc->ompi_proc; - frag = recv_req_match_specific_proc(req, proc); - queue = &proc->specific_receives; - /* wild cardrecv will be prepared on match */ - prepare_recv_req_converter(req); - } - - if(OPAL_UNLIKELY(NULL == frag)) { - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_END, - &(req->req_recv.req_base), PERUSE_RECV); - /* We didn't find any matches. Record this irecv so we can match - it when the message comes in. */ - append_recv_req_to_queue(queue, req); - req->req_match_received = false; - OPAL_THREAD_UNLOCK(&comm->matching_lock); - } else { - if(OPAL_LIKELY(!IS_PROB_REQ(req))) { - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_MATCH_UNEX, - &(req->req_recv.req_base), PERUSE_RECV); - - hdr = (mca_pml_bfo_hdr_t*)frag->segments->seg_addr.pval; - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_REMOVE_FROM_UNEX_Q, - req->req_recv.req_base.req_comm, - hdr->hdr_match.hdr_src, - hdr->hdr_match.hdr_tag, - PERUSE_RECV); - - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_END, - &(req->req_recv.req_base), PERUSE_RECV); - - opal_list_remove_item(&proc->unexpected_frags, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_BFO_HDR_TYPE_MATCH: - mca_pml_bfo_recv_request_progress_match(req, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RNDV: - mca_pml_bfo_recv_request_progress_rndv(req, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_BFO_HDR_TYPE_RGET: - mca_pml_bfo_recv_request_progress_rget(req, frag->btl, frag->segments, - frag->num_segments); - break; - default: - assert(0); - } - - MCA_PML_BFO_RECV_FRAG_RETURN(frag); - - } else if (OPAL_UNLIKELY(IS_MPROB_REQ(req))) { - /* Remove the fragment from the match list, as it's now - matched. Stash it somewhere in the request (which, - yes, is a complete hack), where it will be plucked out - during the end of mprobe. The request will then be - "recreated" as a receive request, and the frag will be - restarted with this request during mrecv */ - opal_list_remove_item(&proc->unexpected_frags, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - req->req_recv.req_base.req_addr = frag; - mca_pml_bfo_recv_request_matched_probe(req, frag->btl, - frag->segments, frag->num_segments); - - } else { - OPAL_THREAD_UNLOCK(&comm->matching_lock); - mca_pml_bfo_recv_request_matched_probe(req, frag->btl, - frag->segments, frag->num_segments); - } - } -} diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.h b/ompi/mca/pml/bfo/pml_bfo_recvreq.h deleted file mode 100644 index 7b3a6db6271..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.h +++ /dev/null @@ -1,449 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef OMPI_PML_BFO_RECV_REQUEST_H -#define OMPI_PML_BFO_RECV_REQUEST_H - -#include "pml_bfo.h" -#include "pml_bfo_rdma.h" -#include "pml_bfo_rdmafrag.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/pml/bfo/pml_bfo_comm.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/mca/pml/base/pml_base_recvreq.h" -#if PML_BFO -#define RECVREQ_RECVERRSENT 0x01 -#define RECVREQ_RNDVRESTART_RECVED 0x02 -#define RECVREQ_RNDVRESTART_ACKED 0x04 -#endif /* PML_BFO */ - -BEGIN_C_DECLS - -struct mca_pml_bfo_recv_request_t { - mca_pml_base_recv_request_t req_recv; - opal_ptr_t remote_req_send; -#if PML_BFO - int32_t req_msgseq; /* PML sequence number */ - int32_t req_events; /* number of outstanding events on request */ - int32_t req_restartseq; /* sequence number of restarted request */ - int32_t req_errstate; /* state of request if in error */ -#endif /* PML_BFO */ - int32_t req_lock; - size_t req_pipeline_depth; - size_t req_bytes_received; /**< amount of data transferred into the user buffer */ - size_t req_bytes_expected; /**< local size of the data as suggested by the user */ - size_t req_rdma_offset; - size_t req_send_offset; - uint32_t req_rdma_cnt; - uint32_t req_rdma_idx; - bool req_pending; - bool req_ack_sent; /**< whether ack was sent to the sender */ - bool req_match_received; /**< Prevent request to be completed prematurely */ - opal_mutex_t lock; - mca_pml_bfo_com_btl_t req_rdma[1]; -}; -typedef struct mca_pml_bfo_recv_request_t mca_pml_bfo_recv_request_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_request_t); - -static inline bool lock_recv_request(mca_pml_bfo_recv_request_t *recvreq) -{ - return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1; -} - -static inline bool unlock_recv_request(mca_pml_bfo_recv_request_t *recvreq) -{ - return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0; -} - -/** - * Allocate a recv request from the modules free list. - * - * @param rc (OUT) OMPI_SUCCESS or error status on failure. - * @return Receive request. - */ -#define MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq) \ -do { \ - ompi_free_list_item_t* item; \ - OMPI_FREE_LIST_GET_MT(&mca_pml_base_recv_requests, item); \ - recvreq = (mca_pml_bfo_recv_request_t*)item; \ -} while(0) - - -/** - * Initialize a receive request with call parameters. - * - * @param request (IN) Receive request. - * @param addr (IN) User buffer. - * @param count (IN) Number of elements of indicated datatype. - * @param datatype (IN) User defined datatype. - * @param src (IN) Source rank w/in the communicator. - * @param tag (IN) User defined tag. - * @param comm (IN) Communicator. - * @param persistent (IN) Is this a ersistent request. - */ -#define MCA_PML_BFO_RECV_REQUEST_INIT( request, \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent) \ -do { \ - MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv, \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent); \ -} while(0) - -/** - * Mark the request as completed at MPI level for internal purposes. - * - * @param recvreq (IN) Receive request. - */ -#define MCA_PML_BFO_RECV_REQUEST_MPI_COMPLETE( recvreq ) \ - do { \ - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ - &(recvreq->req_recv.req_base), PERUSE_RECV ); \ - ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \ - } while (0) - -/* - * Free the PML receive request - */ -#define MCA_PML_BFO_RECV_REQUEST_RETURN(recvreq) \ - { \ - MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \ - OPAL_FREE_LIST_RETURN( &mca_pml_base_recv_requests, \ - (opal_free_list_item_t*)(recvreq)); \ - } - -/** - * Complete receive request. Request structure cannot be accessed after calling - * this function any more. - * - * @param recvreq (IN) Receive request. - */ -static inline void -recv_request_pml_complete(mca_pml_bfo_recv_request_t *recvreq) -{ - size_t i; - - if(false == recvreq->req_recv.req_base.req_pml_complete) { - - if(recvreq->req_recv.req_bytes_packed > 0) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, - &recvreq->req_recv.req_base, PERUSE_RECV ); - } - - for(i = 0; i < recvreq->req_rdma_cnt; i++) { - mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[i].btl_reg; - if( NULL != btl_reg && btl_reg->mpool != NULL) { - btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg ); - } - } - recvreq->req_rdma_cnt = 0; -#if PML_BFO - recvreq->req_msgseq -= 100; -#endif /* PML_BFO */ - - if(true == recvreq->req_recv.req_base.req_free_called) { - if( MPI_SUCCESS != recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR ) { - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST); - } - MCA_PML_BFO_RECV_REQUEST_RETURN(recvreq); - } else { - /* initialize request status */ - recvreq->req_recv.req_base.req_pml_complete = true; - recvreq->req_recv.req_base.req_ompi.req_status._ucount = - recvreq->req_bytes_received; - if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) { - recvreq->req_recv.req_base.req_ompi.req_status._ucount = - recvreq->req_recv.req_bytes_packed; - recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR = - MPI_ERR_TRUNCATE; - } - MCA_PML_BFO_RECV_REQUEST_MPI_COMPLETE(recvreq); - } - } -} - -static inline bool -recv_request_pml_complete_check(mca_pml_bfo_recv_request_t *recvreq) -{ -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_rmb(); -#endif - if(recvreq->req_match_received && - recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed && -#if PML_BFO - (0 == recvreq->req_events) && lock_recv_request(recvreq)) { -#else /* PML_BFO */ - lock_recv_request(recvreq)) { -#endif /* PML_BFO */ - recv_request_pml_complete(recvreq); - return true; - } - - return false; -} - -extern void mca_pml_bfo_recv_req_start(mca_pml_bfo_recv_request_t *req); -#define MCA_PML_BFO_RECV_REQUEST_START(r) mca_pml_bfo_recv_req_start(r) - -static inline void prepare_recv_req_converter(mca_pml_bfo_recv_request_t *req) -{ - if( req->req_recv.req_base.req_datatype->super.size | req->req_recv.req_base.req_count ) { - opal_convertor_copy_and_prepare_for_recv( - req->req_recv.req_base.req_proc->super.proc_convertor, - &(req->req_recv.req_base.req_datatype->super), - req->req_recv.req_base.req_count, - req->req_recv.req_base.req_addr, - 0, - &req->req_recv.req_base.req_convertor); - opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor, - &req->req_bytes_expected); - } -} - -#define MCA_PML_BFO_RECV_REQUEST_MATCHED(request, hdr) \ - recv_req_matched(request, hdr) - -static inline void recv_req_matched(mca_pml_bfo_recv_request_t *req, - mca_pml_bfo_match_hdr_t *hdr) -{ - req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src; - req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag; - req->req_match_received = true; -#if PML_BFO - req->req_msgseq = hdr->hdr_seq; -#endif /* PML_BFO */ -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_wmb(); -#endif - if(req->req_recv.req_bytes_packed > 0) { -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - if(MPI_ANY_SOURCE == req->req_recv.req_base.req_peer) { - /* non wildcard prepared during post recv */ - prepare_recv_req_converter(req); - } -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_XFER_BEGIN, - &req->req_recv.req_base, PERUSE_RECV); - } -} - - -/** - * - */ - -#define MCA_PML_BFO_RECV_REQUEST_UNPACK( request, \ - segments, \ - num_segments, \ - seg_offset, \ - data_offset, \ - bytes_received, \ - bytes_delivered) \ -do { \ - bytes_delivered = 0; \ - if(request->req_recv.req_bytes_packed > 0) { \ - struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; \ - uint32_t iov_count = 0; \ - size_t max_data = bytes_received; \ - size_t n, offset = seg_offset; \ - mca_btl_base_segment_t* segment = segments; \ - \ - OPAL_THREAD_LOCK(&request->lock); \ - for( n = 0; n < num_segments; n++, segment++ ) { \ - if(offset >= segment->seg_len) { \ - offset -= segment->seg_len; \ - } else { \ - iov[iov_count].iov_len = segment->seg_len - offset; \ - iov[iov_count].iov_base = (IOVBASE_TYPE*) \ - ((unsigned char*)segment->seg_addr.pval + offset); \ - iov_count++; \ - offset = 0; \ - } \ - } \ - PERUSE_TRACE_COMM_OMPI_EVENT (PERUSE_COMM_REQ_XFER_CONTINUE, \ - &(recvreq->req_recv.req_base), max_data, \ - PERUSE_RECV); \ - opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \ - &data_offset ); \ - opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor, \ - iov, \ - &iov_count, \ - &max_data ); \ - bytes_delivered = max_data; \ - OPAL_THREAD_UNLOCK(&request->lock); \ - } \ -} while (0) - - -/** - * - */ - -void mca_pml_bfo_recv_request_progress_match( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_bfo_recv_request_progress_frag( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_bfo_recv_request_progress_rndv( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_bfo_recv_request_progress_rget( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_bfo_recv_request_matched_probe( - mca_pml_bfo_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -int mca_pml_bfo_recv_request_schedule_once( - mca_pml_bfo_recv_request_t* req, mca_bml_base_btl_t* start_bml_btl); - -static inline int mca_pml_bfo_recv_request_schedule_exclusive( - mca_pml_bfo_recv_request_t* req, - mca_bml_base_btl_t* start_bml_btl) -{ - int rc; - - do { - rc = mca_pml_bfo_recv_request_schedule_once(req, start_bml_btl); - if(rc == OMPI_ERR_OUT_OF_RESOURCE) - break; - } while(!unlock_recv_request(req)); - - if(OMPI_SUCCESS == rc) - recv_request_pml_complete_check(req); - - return rc; -} - -static inline void mca_pml_bfo_recv_request_schedule( - mca_pml_bfo_recv_request_t* req, - mca_bml_base_btl_t* start_bml_btl) -{ - if(!lock_recv_request(req)) - return; - - (void)mca_pml_bfo_recv_request_schedule_exclusive(req, start_bml_btl); -} - -#define MCA_PML_BFO_ADD_ACK_TO_PENDING(P, S, D, O) \ - do { \ - mca_pml_bfo_pckt_pending_t *_pckt; \ - \ - MCA_PML_BFO_PCKT_PENDING_ALLOC(_pckt); \ - _pckt->hdr.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_ACK; \ - _pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \ - _pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \ - _pckt->hdr.hdr_ack.hdr_send_offset = (O); \ - _pckt->proc = (P); \ - _pckt->bml_btl = NULL; \ - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); \ - opal_list_append(&mca_pml_bfo.pckt_pending, \ - (opal_list_item_t*)_pckt); \ - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); \ - } while(0) - -int mca_pml_bfo_recv_request_ack_send_btl(ompi_proc_t* proc, - mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req, - uint64_t hdr_rdma_offset, bool nordma); - -static inline int mca_pml_bfo_recv_request_ack_send(ompi_proc_t* proc, - uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset, - bool nordma) -{ - size_t i; - mca_bml_base_btl_t* bml_btl; - mca_bml_base_endpoint_t* endpoint = - (mca_bml_base_endpoint_t*)proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - - for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - if(mca_pml_bfo_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req, - hdr_dst_req, hdr_send_offset, nordma) == OMPI_SUCCESS) - return OMPI_SUCCESS; - } - - MCA_PML_BFO_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req, - hdr_send_offset); - - return OMPI_ERR_OUT_OF_RESOURCE; -} - -int mca_pml_bfo_recv_request_get_frag(mca_pml_bfo_rdma_frag_t* frag); - -/* This function tries to continue recvreq that stuck due to resource - * unavailability. Recvreq is added to recv_pending list if scheduling of put - * operation cannot be accomplished for some reason. */ -void mca_pml_bfo_recv_request_process_pending(void); - -END_C_DECLS - -#endif - diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.c b/ompi/mca/pml/bfo/pml_bfo_sendreq.c deleted file mode 100644 index 176eadf4f6e..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.c +++ /dev/null @@ -1,1401 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "opal/prefetch.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "pml_bfo.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_sendreq.h" -#include "pml_bfo_rdmafrag.h" -#include "pml_bfo_recvreq.h" -#if PML_BFO -#include "pml_bfo_failover.h" -#endif /* PML_BFO */ -#include "ompi/mca/bml/base/base.h" -#include "ompi/memchecker.h" - -OBJ_CLASS_INSTANCE(mca_pml_bfo_send_range_t, ompi_free_list_item_t, - NULL, NULL); - -void mca_pml_bfo_send_request_process_pending(mca_bml_base_btl_t *bml_btl) -{ - int rc, i, s = opal_list_get_size(&mca_pml_bfo.send_pending); - - /* advance pending requests */ - for(i = 0; i < s; i++) { - mca_pml_bfo_send_pending_t pending_type = MCA_PML_BFO_SEND_PENDING_NONE; - mca_pml_bfo_send_request_t* sendreq; - mca_bml_base_btl_t *send_dst; - - sendreq = get_request_from_send_pending(&pending_type); - if(OPAL_UNLIKELY(NULL == sendreq)) - break; - - switch(pending_type) { - case MCA_PML_BFO_SEND_PENDING_SCHEDULE: - rc = mca_pml_bfo_send_request_schedule_exclusive(sendreq); - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - return; - } - break; - case MCA_PML_BFO_SEND_PENDING_START: - send_dst = mca_bml_base_btl_array_find( - &sendreq->req_endpoint->btl_eager, bml_btl->btl); - if (NULL == send_dst) { - /* Put request back onto pending list and try next one. */ - add_request_to_send_pending(sendreq, - MCA_PML_BFO_SEND_PENDING_START, true); - } else { - rc = mca_pml_bfo_send_request_start_btl(sendreq, send_dst); - if (OMPI_ERR_OUT_OF_RESOURCE == rc) { - /* No more resources on this btl so prepend to the pending - * list to minimize reordering and give up for now. */ - add_request_to_send_pending(sendreq, - MCA_PML_BFO_SEND_PENDING_START, false); - return; - } - } - break; - default: - opal_output(0, "[%s:%d] wrong send request type\n", - __FILE__, __LINE__); - break; - } - } -} - -/* - * The free call mark the final stage in a request life-cycle. Starting from this - * point the request is completed at both PML and user level, and can be used - * for others p2p communications. Therefore, in the case of the BFO PML it should - * be added to the free request list. - */ -static int mca_pml_bfo_send_request_free(struct ompi_request_t** request) -{ - mca_pml_bfo_send_request_t* sendreq = *(mca_pml_bfo_send_request_t**)request; - if( false == sendreq->req_send.req_base.req_free_called ) { - - sendreq->req_send.req_base.req_free_called = true; - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, - &(sendreq->req_send.req_base), PERUSE_SEND ); - - if( true == sendreq->req_send.req_base.req_pml_complete ) { - /* make buffer defined when the request is compeleted, - and before releasing the objects. */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - - MCA_PML_BFO_SEND_REQUEST_RETURN( sendreq ); - } - *request = MPI_REQUEST_NULL; - } - return OMPI_SUCCESS; -} - -static int mca_pml_bfo_send_request_cancel(struct ompi_request_t* request, int complete) -{ - /* we dont cancel send requests by now */ - return OMPI_SUCCESS; -} - -static void mca_pml_bfo_send_request_construct(mca_pml_bfo_send_request_t* req) -{ - req->req_send.req_base.req_type = MCA_PML_REQUEST_SEND; - req->req_send.req_base.req_ompi.req_start = mca_pml_bfo_start; - req->req_send.req_base.req_ompi.req_free = mca_pml_bfo_send_request_free; - req->req_send.req_base.req_ompi.req_cancel = mca_pml_bfo_send_request_cancel; - req->req_rdma_cnt = 0; - req->req_throttle_sends = false; - OBJ_CONSTRUCT(&req->req_send_ranges, opal_list_t); - OBJ_CONSTRUCT(&req->req_send_range_lock, opal_mutex_t); -} - -static void mca_pml_bfo_send_request_destruct(mca_pml_bfo_send_request_t* req) -{ - OBJ_DESTRUCT(&req->req_send_ranges); - OBJ_DESTRUCT(&req->req_send_range_lock); -} - -OBJ_CLASS_INSTANCE( mca_pml_bfo_send_request_t, - mca_pml_base_send_request_t, - mca_pml_bfo_send_request_construct, - mca_pml_bfo_send_request_destruct ); - -/** - * Completion of a short message - nothing left to schedule. - */ - -static inline void -mca_pml_bfo_match_completion_free_request( mca_bml_base_btl_t* bml_btl, - mca_pml_bfo_send_request_t* sendreq ) -{ - if( sendreq->req_send.req_bytes_packed > 0 ) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, - &(sendreq->req_send.req_base), PERUSE_SEND ); - } - - /* signal request completion */ - send_request_pml_complete(sendreq); - - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -static void -mca_pml_bfo_match_completion_free( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - mca_pml_bfo_repost_match_fragment(des); - return; -#else /* PML_BFO */ - /* TSW - FIX */ - opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } -#if PML_BFO - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "MATCH"); -#endif /* PML_BFO */ - mca_pml_bfo_match_completion_free_request( bml_btl, sendreq ); -} - -static inline void -mca_pml_bfo_rndv_completion_request( mca_bml_base_btl_t* bml_btl, - mca_pml_bfo_send_request_t* sendreq, - size_t req_bytes_delivered ) -{ - if( sendreq->req_send.req_bytes_packed > 0 ) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, - &(sendreq->req_send.req_base), PERUSE_SEND ); - } - - OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); - - /* advance the request */ - OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); - - send_request_pml_complete_check(sendreq); - - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/* - * Completion of the first fragment of a long message that - * requires an acknowledgement - */ -static void -mca_pml_bfo_rndv_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - size_t req_bytes_delivered; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - if (true == mca_pml_bfo_rndv_completion_status_error(des, sendreq)) - return; -#else /* PML_BFO */ - /* TSW - FIX */ - opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } -#if PML_BFO - sendreq->req_events--; - MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, - MCA_PML_BFO_HDR_TYPE_RNDV, "RNDV"); -#endif /* PML_BFO */ - - /* count bytes of user data actually delivered. As the rndv completion only - * happens in one thread, the increase of the req_bytes_delivered does not - * have to be atomic. - */ - req_bytes_delivered = mca_pml_bfo_compute_segment_length (btl->btl_seg_size, - (void *) des->des_local, - des->des_local_count, - sizeof(mca_pml_bfo_rendezvous_hdr_t)); - -#if PML_BFO - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RNDV"); -#endif /* PML_BFO */ - mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered ); -} - - -/** - * Completion of a get request. - */ - -static void -mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - size_t req_bytes_delivered; -#if PML_BFO - MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des); -#endif /* PML_BFO */ - - /* count bytes of user data actually delivered and check for request completion */ - req_bytes_delivered = mca_pml_bfo_compute_segment_length (btl->btl_seg_size, - (void *) des->des_local, - des->des_local_count, 0); - if (OPAL_LIKELY(0 < req_bytes_delivered)) { - OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); - } - - send_request_pml_complete_check(sendreq); - /* free the descriptor */ -#if PML_BFO - btl->btl_free(btl, des); - MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, "RGET"); -#else /* PML_BFO */ - mca_bml_base_free(bml_btl, des); -#endif /* PML_BFO */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - - -/** - * Completion of a control message - return resources. - */ - -static void -mca_pml_bfo_send_ctl_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - -#if PML_BFO - if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) { - mca_pml_bfo_send_ctl_completion_status_error(des); - return; - } - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, des->des_cbdata, "RGET"); -#endif /* PML_BFO */ - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/** - * Completion of additional fragments of a large message - may need - * to schedule additional fragments. - */ - -static void -mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - size_t req_bytes_delivered; -#if PML_BFO - sendreq->req_events--; -#endif /* PML_BFO */ - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - sendreq->req_error++; -#else /* PML_BFO */ - /* TSW - FIX */ - opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } - - /* count bytes of user data actually delivered */ - req_bytes_delivered = mca_pml_bfo_compute_segment_length (btl->btl_seg_size, - (void *) des->des_local, - des->des_local_count, - sizeof(mca_pml_bfo_frag_hdr_t)); - - OPAL_THREAD_SUB_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1); - OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); - -#if PML_BFO - MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, - MCA_PML_BFO_HDR_TYPE_FRAG, "FRAG"); -#endif /* PML_BFO */ - if(send_request_pml_complete_check(sendreq) == false) { - mca_pml_bfo_send_request_schedule(sendreq); -#if PML_BFO - MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, - MCA_PML_BFO_HDR_TYPE_FRAG, - "FRAG (BTL removal)"); -#endif /* PML_BFO */ - } - - /* check for pending requests */ -#if PML_BFO - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "FRAG"); -#endif /* PML_BFO */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -/** - * Buffer the entire message and mark as complete. - */ - -int mca_pml_bfo_send_request_start_buffered( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size) -{ - mca_btl_base_descriptor_t* des; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - struct iovec iov; - unsigned int iov_count; - size_t max_data, req_bytes_delivered; - int rc; - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rendezvous_hdr_t) + size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* pack the data into the BTL supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - sizeof(mca_pml_bfo_rendezvous_hdr_t)); - iov.iov_len = size; - iov_count = 1; - max_data = size; - if((rc = opal_convertor_pack( &sendreq->req_send.req_base.req_convertor, - &iov, - &iov_count, - &max_data)) < 0) { - mca_bml_base_free(bml_btl, des); - return rc; - } - req_bytes_delivered = max_data; - - /* build rendezvous header */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDV; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV(buffered)"); -#endif /* PML_BFO */ - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV, - sendreq->req_send.req_base.req_proc); - - /* update lengths */ - segment->seg_len = sizeof(mca_pml_bfo_rendezvous_hdr_t) + max_data; - - des->des_cbfunc = mca_pml_bfo_rndv_completion; - des->des_cbdata = sendreq; - - /* buffer the remainder of the message */ - rc = mca_pml_base_bsend_request_alloc((ompi_request_t*)sendreq); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_bml_base_free(bml_btl, des); - return rc; - } - - iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)sendreq->req_send.req_addr) + max_data); - iov.iov_len = max_data = sendreq->req_send.req_bytes_packed - max_data; - - if((rc = opal_convertor_pack( &sendreq->req_send.req_base.req_convertor, - &iov, - &iov_count, - &max_data)) < 0) { - mca_bml_base_free(bml_btl, des); - return rc; - } - - /* re-init convertor for packed data */ - opal_convertor_prepare_for_send( &sendreq->req_send.req_base.req_convertor, - &(ompi_mpi_byte.dt.super), - sendreq->req_send.req_bytes_packed, - sendreq->req_send.req_addr ); - - /* wait for ack and completion */ - sendreq->req_state = 2; - - /* request is complete at mpi level */ - MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, true); - - /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDV); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered); - } -#if PML_BFO - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - sendreq->req_events++; - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des ); - return rc; -} - - -/** - * We work on a buffered request with a size smaller than the eager size - * or the BTL is not able to send the data IN_PLACE. Request a segment - * that is used for initial hdr and any eager data. This is used only - * from the _START macro. - */ -int mca_pml_bfo_send_request_start_copy( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size ) -{ - mca_btl_base_descriptor_t* des = NULL; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - struct iovec iov; - unsigned int iov_count; - size_t max_data = size; - int rc; - - if(NULL != bml_btl->btl->btl_sendi) { - mca_pml_bfo_match_hdr_t match; - match.hdr_common.hdr_flags = 0; - match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_MATCH; - match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - match.hdr_tag = sendreq->req_send.req_base.req_tag; - match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - - bfo_hdr_hton(&match, MCA_PML_BFO_HDR_TYPE_MATCH, - sendreq->req_send.req_base.req_proc); - - /* try to send immediately */ - rc = mca_bml_base_sendi( bml_btl, &sendreq->req_send.req_base.req_convertor, - &match, OMPI_PML_BFO_MATCH_HDR_LEN, - size, MCA_BTL_NO_ORDER, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - MCA_PML_BFO_HDR_TYPE_MATCH, - &des); - if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { -#if PML_BFO - /* Needed in case of failover */ - if (NULL != des) { - des->des_cbfunc = mca_pml_bfo_match_completion_free; - des->des_cbdata = sendreq->req_endpoint; - } -#endif /* PML_BFO */ - /* signal request completion */ - send_request_pml_complete(sendreq); - - /* check for pending requests */ - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); - return OMPI_SUCCESS; - } - } else { - /* allocate descriptor */ - mca_bml_base_alloc( bml_btl, &des, - MCA_BTL_NO_ORDER, - OMPI_PML_BFO_MATCH_HDR_LEN + size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - } - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - segment = des->des_local; - - if(size > 0) { - /* pack the data into the supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - OMPI_PML_BFO_MATCH_HDR_LEN); - iov.iov_len = size; - iov_count = 1; - /* - * Before copy the user buffer, make the target part - * accessible. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - (void)opal_convertor_pack( &sendreq->req_send.req_base.req_convertor, - &iov, &iov_count, &max_data ); - /* - * Packing finished, make the user buffer unaccessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - } - - - /* build match header */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_MATCH; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_MATCH, - sendreq->req_send.req_base.req_proc); - - /* update lengths */ - segment->seg_len = OMPI_PML_BFO_MATCH_HDR_LEN + max_data; - - /* short message */ - des->des_cbdata = sendreq; - des->des_cbfunc = mca_pml_bfo_match_completion_free; - - /* send */ - rc = mca_bml_base_send_status(bml_btl, des, MCA_PML_BFO_HDR_TYPE_MATCH); - if( OPAL_LIKELY( rc >= OMPI_SUCCESS ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_bfo_match_completion_free_request( bml_btl, sendreq ); - } - return OMPI_SUCCESS; - } - if (OMPI_ERR_RESOURCE_BUSY == rc) { - /* No more resources. Allow the upper level to queue the send */ - rc = OMPI_ERR_OUT_OF_RESOURCE; - } - - mca_bml_base_free (bml_btl, des); - - return rc; -} - -/** - * BTL can send directly from user buffer so allow the BTL - * to prepare the segment list. Start sending a small message. - */ - -int mca_pml_bfo_send_request_start_prepare( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size ) -{ - mca_btl_base_descriptor_t* des; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - int rc; - - /* prepare descriptor */ - mca_bml_base_prepare_src( bml_btl, - NULL, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - OMPI_PML_BFO_MATCH_HDR_LEN, - &size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - &des ); - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* build match header */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_MATCH; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_MATCH, - sendreq->req_send.req_base.req_proc); - - /* short message */ - des->des_cbfunc = mca_pml_bfo_match_completion_free; - des->des_cbdata = sendreq; - - /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_MATCH); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_bfo_match_completion_free_request( bml_btl, sendreq ); - } - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des ); - return rc; -} - - -/** - * We have contigous data that is registered - schedule across - * available nics. - */ - -int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size ) -{ - /* - * When req_rdma array is constructed the first element of the array always - * assigned different btl in round robin fashion (if there are more than - * one RDMA capable BTLs). This way round robin distribution of RDMA - * operation is achieved. - */ - - mca_btl_base_descriptor_t* des, *src = NULL; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - bool need_local_cb = false; - int rc; - - bml_btl = sendreq->req_rdma[0].bml_btl; - if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) { - mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg; - size_t seg_size; - size_t old_position = sendreq->req_send.req_base.req_convertor.bConverted; - - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - /* prepare source descriptor/segment(s) */ - /* PML owns this descriptor and will free it in */ - /* get_completion */ - mca_bml_base_prepare_src( bml_btl, - reg, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - 0, - &size, - MCA_BTL_DES_FLAGS_GET, - &src ); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - if( OPAL_UNLIKELY(NULL == src) ) { - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &old_position); - return OMPI_ERR_OUT_OF_RESOURCE; - } - src->des_cbfunc = mca_pml_bfo_rget_completion; - src->des_cbdata = sendreq; - - seg_size = bml_btl->btl->btl_seg_size * src->des_local_count; - - /* allocate space for get hdr + segment list */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rget_hdr_t) + seg_size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor, - &old_position ); - mca_bml_base_free(bml_btl, src); - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* build match header */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = MCA_PML_BFO_HDR_FLAGS_CONTIG|MCA_PML_BFO_HDR_FLAGS_PIN; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RGET; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RGET"); -#endif /* PML_BFO */ - hdr->hdr_rget.hdr_des.pval = src; - hdr->hdr_rget.hdr_seg_cnt = src->des_local_count; - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RGET, - sendreq->req_send.req_base.req_proc); - - /* copy segment data */ - memmove (&hdr->hdr_rget + 1, src->des_local, seg_size); - - des->des_cbfunc = mca_pml_bfo_send_ctl_completion; - - /** - * Well, it's a get so we will not know when the peer get the data anyway. - * If we generate the PERUSE event here, at least we will know when do we - * sent the GET message ... - */ - if( sendreq->req_send.req_bytes_packed > 0 ) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, - &(sendreq->req_send.req_base), PERUSE_SEND ); - } - - } else { - - /* allocate a rendezvous header - dont eager send any data - * receiver will schedule rdma put(s) of the entire message - */ - - mca_bml_base_alloc(bml_btl, &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rendezvous_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( OPAL_UNLIKELY(NULL == des)) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* build hdr */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = MCA_PML_BFO_HDR_FLAGS_CONTIG|MCA_PML_BFO_HDR_FLAGS_PIN; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDV; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV"); -#endif /* PML_BFO */ - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV, - sendreq->req_send.req_base.req_proc); - - /* update lengths with number of bytes actually packed */ - segment->seg_len = sizeof(mca_pml_bfo_rendezvous_hdr_t); - - /* first fragment of a long message */ - des->des_cbfunc = mca_pml_bfo_rndv_completion; - need_local_cb = true; - - /* wait for ack and completion */ - sendreq->req_state = 2; - } - - des->des_cbdata = sendreq; - - /* send */ - rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) && (true == need_local_cb)) { - mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, 0 ); - } -#if PML_BFO - if (MCA_PML_BFO_HDR_TYPE_RNDV == hdr->hdr_common.hdr_type) { - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - sendreq->req_events++; - } - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des); - if (NULL != src) { - mca_bml_base_free (bml_btl, src); - } - - return rc; -} - - -/** - * Rendezvous is required. Not doing rdma so eager send up to - * the btls eager limit. - */ - -int mca_pml_bfo_send_request_start_rndv( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size, - int flags ) -{ - mca_btl_base_descriptor_t* des; - mca_btl_base_segment_t* segment; - mca_pml_bfo_hdr_t* hdr; - int rc; - - /* prepare descriptor */ - if(size == 0) { - mca_bml_base_alloc( bml_btl, - &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rendezvous_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ); - } else { - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - mca_bml_base_prepare_src( bml_btl, - NULL, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_rendezvous_hdr_t), - &size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - &des ); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - } - - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_local; - - /* build hdr */ - hdr = (mca_pml_bfo_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_RNDV; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV"); -#endif /* PML_BFO */ - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV, - sendreq->req_send.req_base.req_proc); - - /* first fragment of a long message */ - des->des_cbdata = sendreq; - des->des_cbfunc = mca_pml_bfo_rndv_completion; - - /* wait for ack and completion */ - sendreq->req_state = 2; - - /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_RNDV); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, size ); - } -#if PML_BFO - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - sendreq->req_events++; - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des ); - return rc; -} - -void mca_pml_bfo_send_request_copy_in_out( mca_pml_bfo_send_request_t *sendreq, - uint64_t send_offset, - uint64_t send_length ) -{ - mca_pml_bfo_send_range_t *sr; - ompi_free_list_item_t *i; - mca_bml_base_endpoint_t* bml_endpoint = sendreq->req_endpoint; - int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); - int n; - double weight_total = 0; - - if( OPAL_UNLIKELY(0 == send_length) ) - return; - - OMPI_FREE_LIST_WAIT_MT(&mca_pml_bfo.send_ranges, i); - - sr = (mca_pml_bfo_send_range_t*)i; - - sr->range_send_offset = send_offset; - sr->range_send_length = send_length; - sr->range_btl_idx = 0; - - for(n = 0; n < num_btls && n < mca_pml_bfo.max_send_per_range; n++) { - sr->range_btls[n].bml_btl = - mca_bml_base_btl_array_get_next(&bml_endpoint->btl_send); - weight_total += sr->range_btls[n].bml_btl->btl_weight; - } - - sr->range_btl_cnt = n; - mca_pml_bfo_calc_weighted_length(sr->range_btls, n, send_length, - weight_total); - - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - opal_list_append(&sendreq->req_send_ranges, (opal_list_item_t*)sr); - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); -} - -static inline mca_pml_bfo_send_range_t * -get_send_range_nolock(mca_pml_bfo_send_request_t* sendreq) -{ - opal_list_item_t *item; - - item = opal_list_get_first(&sendreq->req_send_ranges); - - if(opal_list_get_end(&sendreq->req_send_ranges) == item) - return NULL; - - return (mca_pml_bfo_send_range_t*)item; -} - -static inline mca_pml_bfo_send_range_t * -get_send_range(mca_pml_bfo_send_request_t* sendreq) -{ - mca_pml_bfo_send_range_t *range; - - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - range = get_send_range_nolock(sendreq); - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); - - return range; -} - -static inline mca_pml_bfo_send_range_t * -get_next_send_range(mca_pml_bfo_send_request_t* sendreq, - mca_pml_bfo_send_range_t *range) -{ - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - opal_list_remove_item(&sendreq->req_send_ranges, (opal_list_item_t *)range); - OMPI_FREE_LIST_RETURN_MT(&mca_pml_bfo.send_ranges, &range->base); - range = get_send_range_nolock(sendreq); - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); - - return range; -} - -/** - * Schedule pipeline of send descriptors for the given request. - * Up to the rdma threshold. If this is a send based protocol, - * the rdma threshold is the end of the message. Otherwise, schedule - * fragments up to the threshold to overlap initial registration/setup - * costs of the rdma. Only one thread can be inside this function. - */ - -int -mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t* sendreq) -{ - size_t prev_bytes_remaining = 0; - mca_pml_bfo_send_range_t *range; - int num_fail = 0; - - /* check pipeline_depth here before attempting to get any locks */ - if(true == sendreq->req_throttle_sends && - sendreq->req_pipeline_depth >= mca_pml_bfo.send_pipeline_depth) - return OMPI_SUCCESS; - - range = get_send_range(sendreq); - - while(range && (false == sendreq->req_throttle_sends || - sendreq->req_pipeline_depth < mca_pml_bfo.send_pipeline_depth)) { - mca_pml_bfo_frag_hdr_t* hdr; - mca_btl_base_descriptor_t* des; - int rc, btl_idx; - size_t size, offset, data_remaining = 0; - mca_bml_base_btl_t* bml_btl; - - assert(range->range_send_length != 0); -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_REMOVED_BTL(sendreq, range); -#endif /* PML_BFO */ - - if(prev_bytes_remaining == range->range_send_length) - num_fail++; - else - num_fail = 0; - - prev_bytes_remaining = range->range_send_length; - - if( OPAL_UNLIKELY(num_fail == range->range_btl_cnt) ) { - assert(sendreq->req_pending == MCA_PML_BFO_SEND_PENDING_NONE); - add_request_to_send_pending(sendreq, - MCA_PML_BFO_SEND_PENDING_SCHEDULE, true); - /* Note that request remains locked. send_request_process_pending() - * function will call shedule_exclusive() directly without taking - * the lock */ - return OMPI_ERR_OUT_OF_RESOURCE; - } - -cannot_pack: - do { - btl_idx = range->range_btl_idx; - if(++range->range_btl_idx == range->range_btl_cnt) - range->range_btl_idx = 0; - } while(!range->range_btls[btl_idx].length); - - bml_btl = range->range_btls[btl_idx].bml_btl; - /* If there is a remaining data from another BTL that was too small - * for converter to pack then send it through another BTL */ - range->range_btls[btl_idx].length += data_remaining; - size = range->range_btls[btl_idx].length; - - /* makes sure that we don't exceed BTL max send size */ - if(bml_btl->btl->btl_max_send_size != 0) { - size_t max_send_size = bml_btl->btl->btl_max_send_size - - sizeof(mca_pml_bfo_frag_hdr_t); - - if (size > max_send_size) { - size = max_send_size; - } - } - - /* pack into a descriptor */ - offset = (size_t)range->range_send_offset; - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &offset); - range->range_send_offset = (uint64_t)offset; - - data_remaining = size; - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - mca_bml_base_prepare_src(bml_btl, NULL, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_bfo_frag_hdr_t), - &size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK, &des); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - - if( OPAL_UNLIKELY(des == NULL || size == 0) ) { - if(des) { - /* Converter can't pack this chunk. Append to another chunk - * from other BTL */ - mca_bml_base_free(bml_btl, des); - range->range_btls[btl_idx].length -= data_remaining; - goto cannot_pack; - } - continue; - } - - des->des_cbfunc = mca_pml_bfo_frag_completion; - des->des_cbdata = sendreq; - - /* setup header */ - hdr = (mca_pml_bfo_frag_hdr_t*)des->des_local->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FRAG; - hdr->hdr_frag_offset = range->range_send_offset; - hdr->hdr_src_req.pval = sendreq; - hdr->hdr_dst_req = sendreq->req_recv; - - bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_FRAG, - sendreq->req_send.req_base.req_proc); - -#if OMPI_WANT_PERUSE - PERUSE_TRACE_COMM_OMPI_EVENT(PERUSE_COMM_REQ_XFER_CONTINUE, - &(sendreq->req_send.req_base), size, PERUSE_SEND); -#endif /* OMPI_WANT_PERUSE */ - - /* initiate send - note that this may complete before the call returns */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_FRAG); - if( OPAL_LIKELY(rc >= 0) ) { - /* update state */ - range->range_btls[btl_idx].length -= size; - range->range_send_length -= size; - range->range_send_offset += size; - OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_pipeline_depth, 1); - if(range->range_send_length == 0) { - range = get_next_send_range(sendreq, range); - prev_bytes_remaining = 0; - } -#if PML_BFO - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - sendreq->req_events++; - } -#endif /* PML_BFO */ - } else { - mca_bml_base_free(bml_btl,des); - } - } - - return OMPI_SUCCESS; -} - - -/** - * An RDMA put operation has completed: - * (1) Update request status and if required set completed - * (2) Send FIN control message to the destination - */ - -static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata; - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)frag->rdma_req; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -#if PML_BFO - sendreq->req_error++; -#else /* PML_BFO */ - /* TSW - FIX */ - OMPI_ERROR_LOG(status); - ompi_rte_abort(-1, NULL); -#endif /* PML_BFO */ - } -#if PML_BFO - sendreq->req_events--; - MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl); - MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RDMA write"); -#endif /* PML_BFO */ - - mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc, - bml_btl, - frag->rdma_hdr.hdr_rdma.hdr_des, -#if PML_BFO - des->order, 0, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank); -#else /* PML_BFO */ - des->order, 0); -#endif /* PML_BFO */ - - /* check for request completion */ - OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); - - send_request_pml_complete_check(sendreq); - - MCA_PML_BFO_RDMA_FRAG_RETURN(frag); - - MCA_PML_BFO_PROGRESS_PENDING(bml_btl); -} - -int mca_pml_bfo_send_request_put_frag( mca_pml_bfo_rdma_frag_t* frag ) -{ - mca_mpool_base_registration_t* reg = NULL; - mca_bml_base_btl_t* bml_btl = frag->rdma_bml; - mca_btl_base_descriptor_t* des; - size_t save_size = frag->rdma_length; - int rc; - - /* setup descriptor */ - mca_bml_base_prepare_src( bml_btl, - reg, - &frag->convertor, - MCA_BTL_NO_ORDER, - 0, - &frag->rdma_length, - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_FLAGS_PUT, - &des ); - - if( OPAL_UNLIKELY(NULL == des) ) { - if(frag->retries < mca_pml_bfo.rdma_put_retries_limit) { - size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset; - frag->rdma_length = save_size; - opal_convertor_set_position(&frag->convertor, &offset); - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - } else { - mca_pml_bfo_send_request_t *sendreq = - (mca_pml_bfo_send_request_t*)frag->rdma_req; - - /* tell receiver to unregister memory */ - mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc, - bml_btl, frag->rdma_hdr.hdr_rdma.hdr_des, -#if PML_BFO - MCA_BTL_NO_ORDER, 1, (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid, - sendreq->req_send.req_base.req_comm->c_my_rank); -#else /* PML_BFO */ - MCA_BTL_NO_ORDER, 1); -#endif /* PML_BFO */ - - /* send fragment by copy in/out */ - mca_pml_bfo_send_request_copy_in_out(sendreq, - frag->rdma_hdr.hdr_rdma.hdr_rdma_offset, frag->rdma_length); - /* if a pointer to a receive request is not set it means that - * ACK was not yet received. Don't schedule sends before ACK */ - if(NULL != sendreq->req_recv.pval) - mca_pml_bfo_send_request_schedule(sendreq); - } - return OMPI_ERR_OUT_OF_RESOURCE; - } - - des->des_remote = (mca_btl_base_segment_t *) frag->rdma_segs; - des->des_remote_count = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt; - des->des_cbfunc = mca_pml_bfo_put_completion; - des->des_cbdata = frag; - - PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE, - &(((mca_pml_bfo_send_request_t*)frag->rdma_req)->req_send.req_base), save_size, PERUSE_SEND ); - - rc = mca_bml_base_put(bml_btl, des); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_bml_base_free(bml_btl, des); - frag->rdma_length = save_size; - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - opal_list_append(&mca_pml_bfo.rdma_pending, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } else { - /* TSW - FIX */ - OMPI_ERROR_LOG(rc); - ompi_rte_abort(-1, NULL); - } - } -#if PML_BFO - if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - ((mca_pml_bfo_send_request_t*)frag->rdma_req)->req_events++; - } -#endif /* PML_BFO */ - return OMPI_SUCCESS; -} - -/** - * Receiver has scheduled an RDMA operation: - * (1) Allocate an RDMA fragment to maintain the state of the operation - * (2) Call BTL prepare_src to pin/prepare source buffers - * (3) Queue the RDMA put - */ - -void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq, - mca_btl_base_module_t* btl, - mca_pml_bfo_rdma_hdr_t* hdr ) -{ - mca_bml_base_endpoint_t *bml_endpoint = sendreq->req_endpoint; - mca_pml_bfo_rdma_frag_t* frag; - size_t i, size = 0; - - if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) { - OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1); - } -#if PML_BFO - MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq); - sendreq->req_recv = hdr->hdr_dst_req; /* only needed once, but it is OK */ -#endif /* PML_BFO */ - - MCA_PML_BFO_RDMA_FRAG_ALLOC(frag); - if( OPAL_UNLIKELY(NULL == frag) ) { - /* TSW - FIX */ - OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); - ompi_rte_abort(-1, NULL); - } - - assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs)); - - /* setup fragment */ - memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt); - - for( i = 0; i < hdr->hdr_seg_cnt; i++ ) { - mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *) ((uintptr_t)(frag->rdma_segs) + i * btl->btl_seg_size); - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - if ((sendreq->req_send.req_base.req_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN) != - (ompi_proc_local()->super.proc_arch & OPAL_ARCH_ISBIGENDIAN)) { - size += opal_swap_bytes4(seg->seg_len); - } else -#endif - { - size += seg->seg_len; - } - } - - frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); -#if PML_BFO - MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl); - frag->rdma_btl = btl; /* in case frag ends up on pending */ -#endif /* PML_BFO */ - frag->rdma_hdr.hdr_rdma = *hdr; - frag->rdma_req = sendreq; - frag->rdma_ep = bml_endpoint; - frag->rdma_length = size; - frag->rdma_state = MCA_PML_BFO_RDMA_PUT; - frag->reg = NULL; - frag->retries = 0; - - /* lookup the corresponding registration */ - for(i=0; ireq_rdma_cnt; i++) { - if(sendreq->req_rdma[i].bml_btl == frag->rdma_bml) { - frag->reg = sendreq->req_rdma[i].btl_reg; - break; - } - } - - /* RDMA writes may proceed in parallel to send and to each other, so - * create clone of the convertor for each RDMA fragment - */ - size = hdr->hdr_rdma_offset; - opal_convertor_clone_with_position(&sendreq->req_send.req_base.req_convertor, - &frag->convertor, 0, &size); - - mca_pml_bfo_send_request_put_frag(frag); -} - diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.h b/ompi/mca/pml/bfo/pml_bfo_sendreq.h deleted file mode 100644 index 170512ffe3e..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.h +++ /dev/null @@ -1,499 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_PML_BFO_SEND_REQUEST_H -#define OMPI_PML_BFO_SEND_REQUEST_H - -#include "opal/mca/btl/btl.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "pml_bfo_comm.h" -#include "pml_bfo_hdr.h" -#include "pml_bfo_rdma.h" -#include "pml_bfo_rdmafrag.h" -#include "ompi/mca/bml/bml.h" - -BEGIN_C_DECLS - -typedef enum { - MCA_PML_BFO_SEND_PENDING_NONE, - MCA_PML_BFO_SEND_PENDING_SCHEDULE, - MCA_PML_BFO_SEND_PENDING_START -} mca_pml_bfo_send_pending_t; - -struct mca_pml_bfo_send_request_t { - mca_pml_base_send_request_t req_send; - mca_bml_base_endpoint_t* req_endpoint; - opal_ptr_t req_recv; -#if PML_BFO - int32_t req_events; /* number of outstanding events on request */ - int32_t req_restartseq; /* sequence number of restarted request */ - int32_t req_restart; /* state of restarted request */ - int32_t req_error; /* non-zero when error has occurred on request */ -#endif /* PML_BFO */ - int32_t req_state; - int32_t req_lock; - bool req_throttle_sends; - size_t req_pipeline_depth; - size_t req_bytes_delivered; - uint32_t req_rdma_cnt; - mca_pml_bfo_send_pending_t req_pending; - opal_mutex_t req_send_range_lock; - opal_list_t req_send_ranges; - mca_pml_bfo_com_btl_t req_rdma[1]; -}; -typedef struct mca_pml_bfo_send_request_t mca_pml_bfo_send_request_t; - -OBJ_CLASS_DECLARATION(mca_pml_bfo_send_request_t); - -struct mca_pml_bfo_send_range_t { - opal_free_list_item_t base; - uint64_t range_send_offset; - uint64_t range_send_length; - int range_btl_idx; - int range_btl_cnt; - mca_pml_bfo_com_btl_t range_btls[1]; -}; -typedef struct mca_pml_bfo_send_range_t mca_pml_bfo_send_range_t; -OBJ_CLASS_DECLARATION(mca_pml_bfo_send_range_t); - -static inline bool lock_send_request(mca_pml_bfo_send_request_t *sendreq) -{ - return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, 1) == 1; -} - -static inline bool unlock_send_request(mca_pml_bfo_send_request_t *sendreq) -{ - return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0; -} - -static inline void -add_request_to_send_pending(mca_pml_bfo_send_request_t* sendreq, - const mca_pml_bfo_send_pending_t type, - const bool append) -{ - opal_list_item_t *item = (opal_list_item_t*)sendreq; - - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - sendreq->req_pending = type; - if(append) - opal_list_append(&mca_pml_bfo.send_pending, item); - else - opal_list_prepend(&mca_pml_bfo.send_pending, item); - - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); -} - -static inline mca_pml_bfo_send_request_t* -get_request_from_send_pending(mca_pml_bfo_send_pending_t *type) -{ - mca_pml_bfo_send_request_t *sendreq; - - OPAL_THREAD_LOCK(&mca_pml_bfo.lock); - sendreq = (mca_pml_bfo_send_request_t*) - opal_list_remove_first(&mca_pml_bfo.send_pending); - if(sendreq) { - *type = sendreq->req_pending; - sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE; - } - OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock); - - return sendreq; -} - -#define MCA_PML_BFO_SEND_REQUEST_ALLOC( comm, \ - dst, \ - sendreq) \ - { \ - ompi_proc_t *proc = ompi_comm_peer_lookup( comm, dst ); \ - opal_free_list_item_t* item; \ - \ - sendreq = NULL; \ - if( OPAL_LIKELY(NULL != proc) ) { \ - OPAL_FREE_LIST_WAIT_MT(&mca_pml_base_send_requests, item); \ - sendreq = (mca_pml_bfo_send_request_t*)item; \ - sendreq->req_send.req_base.req_proc = proc; \ - } \ - } - - -#define MCA_PML_BFO_SEND_REQUEST_INIT( sendreq, \ - buf, \ - count, \ - datatype, \ - dst, \ - tag, \ - comm, \ - sendmode, \ - persistent) \ - { \ - MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \ - buf, \ - count, \ - datatype, \ - dst, \ - tag, \ - comm, \ - sendmode, \ - persistent, \ - 0); /* convertor_flags */ \ - (sendreq)->req_recv.pval = NULL; \ - } - - -static inline void mca_pml_bfo_free_rdma_resources(mca_pml_bfo_send_request_t* sendreq) -{ - size_t r; - - /* return mpool resources */ - for(r = 0; r < sendreq->req_rdma_cnt; r++) { - struct mca_btl_base_registration_handle_t* handle = sendreq->req_rdma[r].btl_reg; - mca_bml_base_btl_t *bml_btl = sendreq->req_rdma[r].bml_btl; - - if( NULL != handle ) { - mca_bml_base_deregister_mem (bml_btl, handle); - sendreq->req_rdma[r].btl_reg = NULL; - } - } - sendreq->req_rdma_cnt = 0; -} - - -/** - * Start a send request. - */ - -#define MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc) \ - do { \ - rc = mca_pml_bfo_send_request_start(sendreq); \ - } while (0) - - -/* - * Mark a send request as completed at the MPI level. - */ - -#define MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, with_signal) \ -do { \ - (sendreq)->req_send.req_base.req_ompi.req_status.MPI_SOURCE = \ - (sendreq)->req_send.req_base.req_comm->c_my_rank; \ - (sendreq)->req_send.req_base.req_ompi.req_status.MPI_TAG = \ - (sendreq)->req_send.req_base.req_tag; \ - (sendreq)->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \ - (sendreq)->req_send.req_base.req_ompi.req_status._ucount = \ - (sendreq)->req_send.req_bytes_packed; \ - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ - &(sendreq->req_send.req_base), PERUSE_SEND); \ - \ - ompi_request_complete( &((sendreq)->req_send.req_base.req_ompi), (with_signal) ); \ -} while(0) - -/* - * Release resources associated with a request - */ - -#define MCA_PML_BFO_SEND_REQUEST_RETURN(sendreq) \ - do { \ - MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \ - OPAL_FREE_LIST_RETURN( &mca_pml_base_send_requests, \ - (opal_free_list_item_t*)sendreq); \ - } while(0) - - -/* - * The PML has completed a send request. Note that this request - * may have been orphaned by the user or have already completed - * at the MPI level. - * This function will never be called directly from the upper level, as it - * should only be an internal call to the PML. - * - */ -static inline void -send_request_pml_complete(mca_pml_bfo_send_request_t *sendreq) -{ - if(false == sendreq->req_send.req_base.req_pml_complete) { - if(sendreq->req_send.req_bytes_packed > 0) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, - &(sendreq->req_send.req_base), PERUSE_SEND); - } - - /* return mpool resources */ - mca_pml_bfo_free_rdma_resources(sendreq); - - if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED && - sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) { - mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); - } - - sendreq->req_send.req_base.req_pml_complete = true; - - if( !REQUEST_COMPLETE( &((sendreq->req_send).req_base.req_ompi)) ) { - /* Should only be called for long messages (maybe synchronous) */ - MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, true); - } else { - if( MPI_SUCCESS != sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR ) { - ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST); - } - } -#if PML_BFO - sendreq->req_send.req_base.req_sequence -= 100; -#endif /* PML_BFO */ - - if(true == sendreq->req_send.req_base.req_free_called) { - MCA_PML_BFO_SEND_REQUEST_RETURN(sendreq); - } - } -} - -/* returns true if request was completed on PML level */ -static inline bool -send_request_pml_complete_check(mca_pml_bfo_send_request_t *sendreq) -{ -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_rmb(); -#endif - /* if no more events are expected for the request and the whole message is - * already sent and send fragment scheduling isn't running in another - * thread then complete the request on PML level. From now on, if user - * called free on this request, the request structure can be reused for - * another request or if the request is persistent it can be restarted */ - if(sendreq->req_state == 0 && - sendreq->req_bytes_delivered >= sendreq->req_send.req_bytes_packed - && lock_send_request(sendreq)) { - send_request_pml_complete(sendreq); - return true; - } - - return false; -} - -/** - * Schedule additional fragments - */ -int -mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t*); - -static inline int -mca_pml_bfo_send_request_schedule_exclusive(mca_pml_bfo_send_request_t* sendreq) -{ - int rc; - do { - rc = mca_pml_bfo_send_request_schedule_once(sendreq); - if(rc == OMPI_ERR_OUT_OF_RESOURCE) - break; - } while(!unlock_send_request(sendreq)); - - if(OMPI_SUCCESS == rc) - send_request_pml_complete_check(sendreq); - - return rc; -} - -static inline void -mca_pml_bfo_send_request_schedule(mca_pml_bfo_send_request_t* sendreq) -{ - /* - * Only allow one thread in this routine for a given request. - * However, we cannot block callers on a mutex, so simply keep track - * of the number of times the routine has been called and run through - * the scheduling logic once for every call. - */ - - if(!lock_send_request(sendreq)) - return; - - mca_pml_bfo_send_request_schedule_exclusive(sendreq); -} - -#if OPAL_CUDA_SUPPORT -int mca_pml_bfo_send_request_start_cuda( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); -#endif /* OPAL_CUDA_SUPPORT */ - -/** - * Start the specified request - */ - -int mca_pml_bfo_send_request_start_buffered( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_bfo_send_request_start_copy( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_bfo_send_request_start_prepare( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_bfo_send_request_start_rdma( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_bfo_send_request_start_rndv( - mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size, - int flags); - -static inline int -mca_pml_bfo_send_request_start_btl( mca_pml_bfo_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl ) -{ - size_t size = sendreq->req_send.req_bytes_packed; - mca_btl_base_module_t* btl = bml_btl->btl; - size_t eager_limit = btl->btl_eager_limit - sizeof(mca_pml_bfo_hdr_t); - int rc; - - if( OPAL_LIKELY(size <= eager_limit) ) { - switch(sendreq->req_send.req_send_mode) { - case MCA_PML_BASE_SEND_SYNCHRONOUS: - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0); - break; - case MCA_PML_BASE_SEND_BUFFERED: - rc = mca_pml_bfo_send_request_start_copy(sendreq, bml_btl, size); - break; - case MCA_PML_BASE_SEND_COMPLETE: - rc = mca_pml_bfo_send_request_start_prepare(sendreq, bml_btl, size); - break; - default: - if (size != 0 && bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) { - rc = mca_pml_bfo_send_request_start_prepare(sendreq, bml_btl, size); - } else { - rc = mca_pml_bfo_send_request_start_copy(sendreq, bml_btl, size); - } - break; - } - } else { - size = eager_limit; - if(OPAL_UNLIKELY(btl->btl_rndv_eager_limit < eager_limit)) - size = btl->btl_rndv_eager_limit; - if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { - rc = mca_pml_bfo_send_request_start_buffered(sendreq, bml_btl, size); - } else if - (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) { - unsigned char *base; - opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base ); - - if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_bfo_rdma_btls( - sendreq->req_endpoint, - base, - sendreq->req_send.req_bytes_packed, - sendreq->req_rdma))) { - rc = mca_pml_bfo_send_request_start_rdma(sendreq, bml_btl, - sendreq->req_send.req_bytes_packed); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_pml_bfo_free_rdma_resources(sendreq); - } - } else { - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, - MCA_PML_BFO_HDR_FLAGS_CONTIG); - } - } else { -#if OPAL_CUDA_SUPPORT - if (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) { - return mca_pml_bfo_send_request_start_cuda(sendreq, bml_btl, size); - } -#endif /* OPAL_CUDA_SUPPORT */ - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0); - } - } - - return rc; -} - -static inline int -mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq ) -{ - mca_pml_bfo_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; - mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) - sendreq->req_send.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - size_t i; - - if( OPAL_UNLIKELY(endpoint == NULL) ) { - return OMPI_ERR_UNREACH; - } - - sendreq->req_endpoint = endpoint; - sendreq->req_state = 0; - sendreq->req_lock = 0; - sendreq->req_pipeline_depth = 0; - sendreq->req_bytes_delivered = 0; - sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE; - sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD_FETCH32( - &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); -#if PML_BFO - sendreq->req_restartseq = 0; /* counts up restarts */ - sendreq->req_restart = 0; /* reset in case we restart again */ - sendreq->req_error = 0; /* clear error state */ - sendreq->req_events = 0; /* clear events, probably 0 anyways */ -#endif /* PML_BFO */ - - MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); - - for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { - mca_bml_base_btl_t* bml_btl; - int rc; - - /* select a btl */ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl); - if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) ) - return rc; - } - add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true); - - return OMPI_SUCCESS; -} - -/** - * Initiate a put scheduled by the receiver. - */ - -void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq, - mca_btl_base_module_t* btl, - mca_pml_bfo_rdma_hdr_t* hdr ); - -int mca_pml_bfo_send_request_put_frag(mca_pml_bfo_rdma_frag_t* frag); - -/* This function tries to continue sendreq that was stuck because of resource - * unavailability. A sendreq may be added to send_pending list if there is no - * resource to send initial packet or there is not resource to schedule data - * for sending. The reason the sendreq was added to the list is stored inside - * sendreq struct and appropriate operation is retried when resource became - * available. bml_btl passed to the function doesn't represents sendreq - * destination, it represents BTL on which resource was freed, so only this BTL - * should be considered for sending packets */ -void mca_pml_bfo_send_request_process_pending(mca_bml_base_btl_t *bml_btl); - -void mca_pml_bfo_send_request_copy_in_out(mca_pml_bfo_send_request_t *sendreq, - uint64_t send_offset, uint64_t send_length); - -END_C_DECLS - -#endif /* OMPI_PML_BFO_SEND_REQUEST_H */ diff --git a/ompi/mca/pml/bfo/pml_bfo_start.c b/ompi/mca/pml/bfo/pml_bfo_start.c deleted file mode 100644 index 93e6216c92d..00000000000 --- a/ompi/mca/pml/bfo/pml_bfo_start.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_bfo.h" -#include "pml_bfo_recvreq.h" -#include "pml_bfo_sendreq.h" -#include "ompi/memchecker.h" - - -int mca_pml_bfo_start(size_t count, ompi_request_t** requests) -{ - int rc; - size_t i; - bool reuse_old_request = true; - - for(i=0; ireq_type) { - continue; - } - - /* If the persistent request is currently active - obtain the - * request lock and verify the status is incomplete. if the - * pml layer has not completed the request - mark the request - * as free called - so that it will be freed when the request - * completes - and create a new request. - */ - -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_rmb(); -#endif - reuse_old_request = true; - switch(pml_request->req_ompi.req_state) { - case OMPI_REQUEST_INACTIVE: - if(pml_request->req_pml_complete == true) - break; - /* otherwise fall through */ - case OMPI_REQUEST_ACTIVE: { - - ompi_request_t *request; - if (pml_request->req_pml_complete == false) { - /* free request after it completes */ - pml_request->req_free_called = true; - } else { - /* can reuse the existing request */ - break; - } - - reuse_old_request = false; - /* allocate a new request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: { - mca_pml_base_send_mode_t sendmode = - ((mca_pml_base_send_request_t*)pml_request)->req_send_mode; - rc = mca_pml_bfo_isend_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - sendmode, - pml_request->req_comm, - &request); - break; - } - case MCA_PML_REQUEST_RECV: - rc = mca_pml_bfo_irecv_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - pml_request->req_comm, - &request); - break; - default: - rc = OMPI_ERR_REQUEST; - break; - } - if(OMPI_SUCCESS != rc) - return rc; - pml_request = (mca_pml_base_request_t*)request; - requests[i] = request; - break; - } - default: - return OMPI_ERR_REQUEST; - } - - /* start the request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: - { - mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)pml_request; - MEMCHECKER( - memchecker_call(&opal_memchecker_base_isdefined, - pml_request->req_addr, pml_request->req_count, - pml_request->req_datatype); - ); - if( reuse_old_request && (sendreq->req_send.req_bytes_packed != 0) ) { - size_t offset = 0; - /** - * Reset the convertor in case we're dealing with the original - * request, which when completed do not reset the convertor. - */ - opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor, - &offset ); - } - MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc); - if(rc != OMPI_SUCCESS) - return rc; - break; - } - case MCA_PML_REQUEST_RECV: - { - mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)pml_request; - MCA_PML_BFO_RECV_REQUEST_START(recvreq); - break; - } - default: - return OMPI_ERR_REQUEST; - } - } - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/bfo/post_configure.sh b/ompi/mca/pml/bfo/post_configure.sh deleted file mode 100644 index 77a7d52608a..00000000000 --- a/ompi/mca/pml/bfo/post_configure.sh +++ /dev/null @@ -1 +0,0 @@ -DIRECT_CALL_HEADER="ompi/mca/pml/bfo/pml_bfo.h" diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index 5adf19028a8..f4cc24c8ba0 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -807,7 +807,10 @@ int mca_pml_ob1_ft_event( int state ) if(OPAL_CRS_CHECKPOINT == state) { if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1); - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete"); + return ret; + } } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0); @@ -818,7 +821,10 @@ int mca_pml_ob1_ft_event( int state ) if( !first_continue_pass ) { if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0); - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete"); + return ret; + } } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2); } @@ -918,13 +924,19 @@ int mca_pml_ob1_ft_event( int state ) if( !first_continue_pass ) { if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1); - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete"); + return ret; + } } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3); } if (opal_cr_continue_like_restart && !first_continue_pass) { - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete"); + return ret; + } /* * Startup the PML stack now that the modex is running again @@ -936,7 +948,10 @@ int mca_pml_ob1_ft_event( int state ) } /* Is this barrier necessary ? JJH */ - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete"); + return ret; + } if( NULL != procs ) { for(p = 0; p < (int)num_procs; ++p) { @@ -949,7 +964,10 @@ int mca_pml_ob1_ft_event( int state ) if( !first_continue_pass ) { if( opal_cr_timing_barrier_enabled ) { OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2); - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete"); + return ret; + } } OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1); } @@ -962,7 +980,10 @@ int mca_pml_ob1_ft_event( int state ) * Exchange the modex information once again. * BTLs will have republished their modex information. */ - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete"); + return ret; + } /* * Startup the PML stack now that the modex is running again @@ -974,7 +995,10 @@ int mca_pml_ob1_ft_event( int state ) } /* Is this barrier necessary ? JJH */ - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete"); + return ret; + } if( NULL != procs ) { for(p = 0; p < (int)num_procs; ++p) { diff --git a/ompi/mca/pml/ob1/pml_ob1_component.c b/ompi/mca/pml/ob1/pml_ob1_component.c index 60345cab68c..26670f5dfc7 100644 --- a/ompi/mca/pml/ob1/pml_ob1_component.c +++ b/ompi/mca/pml/ob1/pml_ob1_component.c @@ -14,6 +14,8 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -309,8 +311,14 @@ int mca_pml_ob1_component_fini(void) if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize())) return rc; - if(!mca_pml_ob1.enabled) + if(!mca_pml_ob1.enabled) { + if( NULL != mca_pml_ob1.allocator ) { + (void)mca_pml_ob1.allocator->alc_finalize(mca_pml_ob1.allocator); + mca_pml_ob1.allocator = NULL; + } + return OMPI_SUCCESS; /* never selected.. return success.. */ + } mca_pml_ob1.enabled = false; /* not anymore */ /* return the static receive/send requests to the respective free list and diff --git a/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h b/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h index 70a390d8073..176c830974c 100644 --- a/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h +++ b/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -46,7 +46,8 @@ struct mca_pml_ob1_rdma_frag_t { mca_bml_base_btl_t *rdma_bml; mca_pml_ob1_hdr_t rdma_hdr; mca_pml_ob1_rdma_state_t rdma_state; - size_t rdma_length; + size_t rdma_length; /* how much the fragment will transfer */ + opal_atomic_size_t rdma_bytes_remaining; /* how much is left to be transferred */ void *rdma_req; uint32_t retries; mca_pml_ob1_rdma_frag_callback_t cbfunc; @@ -71,7 +72,6 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_rdma_frag_t); #define MCA_PML_OB1_RDMA_FRAG_RETURN(frag) \ do { \ - /* return fragment */ \ if (frag->local_handle) { \ mca_bml_base_deregister_mem (frag->rdma_bml, frag->local_handle); \ frag->local_handle = NULL; \ diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 5e62cea1b51..66482b4bc62 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2018 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, @@ -558,10 +558,6 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl, * then throttle sends */ if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA) { if (NULL != sendreq->rdma_frag) { - if (NULL != sendreq->rdma_frag->local_handle) { - mca_bml_base_deregister_mem (sendreq->req_rdma[0].bml_btl, sendreq->rdma_frag->local_handle); - sendreq->rdma_frag->local_handle = NULL; - } MCA_PML_OB1_RDMA_FRAG_RETURN(sendreq->rdma_frag); sendreq->rdma_frag = NULL; } @@ -929,19 +925,21 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl, frag_msg_seq = hdr->hdr_seq; next_msg_seq_expected = (uint16_t)proc->expected_sequence; - /* If the sequence number is wrong, queue it up for later. */ - if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected)) { - mca_pml_ob1_recv_frag_t* frag; - MCA_PML_OB1_RECV_FRAG_ALLOC(frag); - MCA_PML_OB1_RECV_FRAG_INIT(frag, hdr, segments, num_segments, btl); - append_frag_to_ordered_list(&proc->frags_cant_match, frag, next_msg_seq_expected); + if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm_ptr)) { + /* If the sequence number is wrong, queue it up for later. */ + if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected)) { + mca_pml_ob1_recv_frag_t* frag; + MCA_PML_OB1_RECV_FRAG_ALLOC(frag); + MCA_PML_OB1_RECV_FRAG_INIT(frag, hdr, segments, num_segments, btl); + append_frag_to_ordered_list(&proc->frags_cant_match, frag, next_msg_seq_expected); - SPC_RECORD(OMPI_SPC_OUT_OF_SEQUENCE, 1); - SPC_RECORD(OMPI_SPC_OOS_IN_QUEUE, 1); - SPC_UPDATE_WATERMARK(OMPI_SPC_MAX_OOS_IN_QUEUE, OMPI_SPC_OOS_IN_QUEUE); + SPC_RECORD(OMPI_SPC_OUT_OF_SEQUENCE, 1); + SPC_RECORD(OMPI_SPC_OOS_IN_QUEUE, 1); + SPC_UPDATE_WATERMARK(OMPI_SPC_MAX_OOS_IN_QUEUE, OMPI_SPC_OOS_IN_QUEUE); - OB1_MATCHING_UNLOCK(&comm->matching_lock); - return OMPI_SUCCESS; + OB1_MATCHING_UNLOCK(&comm->matching_lock); + return OMPI_SUCCESS; + } } /* mca_pml_ob1_recv_frag_match_proc() will release the lock. */ @@ -977,6 +975,10 @@ mca_pml_ob1_recv_frag_match_proc( mca_btl_base_module_t *btl, match_this_frag: /* We're now expecting the next sequence number. */ + /* NOTE: We should have checked for ALLOW_OVERTAKE comm flag here + * but adding a branch in this critical path is not ideal for performance. + * We decided to let it run the sequence number even we are not doing + * anything with it. */ proc->expected_sequence++; /* We generate the SEARCH_POSTED_QUEUE only when the message is diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 06b8e84f6cb..70969415c49 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2018 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -313,7 +313,12 @@ static int mca_pml_ob1_recv_request_ack( return OMPI_SUCCESS; } - /* let know to shedule function there is no need to put ACK flag */ + /* let know to shedule function there is no need to put ACK flag. If not all message went over + * RDMA then we cancel the GET protocol in order to switch back to send/recv. In this case send + * back the remote send request, the peer kept a poointer to the frag locally. In the future we + * might want to cancel the fragment itself, in which case we will have to send back the remote + * fragment instead of the remote request. + */ recvreq->req_ack_sent = true; return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.lval, recvreq, recvreq->req_send_offset, 0, @@ -330,7 +335,9 @@ static int mca_pml_ob1_recv_request_get_frag_failed (mca_pml_ob1_rdma_frag_t *fr if (OMPI_ERR_NOT_AVAILABLE == rc) { /* get isn't supported for this transfer. tell peer to fallback on put */ rc = mca_pml_ob1_recv_request_put_frag (frag); - if (OMPI_ERR_OUT_OF_RESOURCE == rc) { + if (OMPI_SUCCESS == rc){ + return OMPI_SUCCESS; + } else if (OMPI_ERR_OUT_OF_RESOURCE == rc) { OPAL_THREAD_LOCK(&mca_pml_ob1.lock); opal_list_append (&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag); OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); @@ -400,6 +407,7 @@ static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag) #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; #endif + mca_btl_base_registration_handle_t *local_handle = NULL; mca_bml_base_btl_t *bml_btl = frag->rdma_bml; mca_btl_base_descriptor_t *ctl; mca_pml_ob1_rdma_hdr_t *hdr; @@ -408,6 +416,12 @@ static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag) reg_size = bml_btl->btl->btl_registration_handle_size; + if (frag->local_handle) { + local_handle = frag->local_handle; + } else if (recvreq->local_handle) { + local_handle = recvreq->local_handle; + } + /* prepare a descriptor for rdma control message */ mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof (mca_pml_ob1_rdma_hdr_t) + reg_size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | @@ -421,7 +435,7 @@ static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag) hdr = (mca_pml_ob1_rdma_hdr_t *) ctl->des_segments->seg_addr.pval; mca_pml_ob1_rdma_hdr_prepare (hdr, (!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0, recvreq->remote_req_send.lval, frag, recvreq, frag->rdma_offset, - frag->local_address, frag->rdma_length, frag->local_handle, + frag->local_address, frag->rdma_length, local_handle, reg_size); ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_PUT, proc); @@ -643,7 +657,6 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq int rc; prev_sent = offset = 0; - bytes_remaining = hdr->hdr_rndv.hdr_msg_length; recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length; recvreq->req_send_offset = 0; recvreq->req_rdma_offset = 0; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 1626e13e353..2474374572d 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2018 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -41,7 +41,6 @@ #include "ompi/mca/bml/base/base.h" #include "ompi/memchecker.h" - OBJ_CLASS_INSTANCE(mca_pml_ob1_send_range_t, opal_free_list_item_t, NULL, NULL); @@ -148,10 +147,7 @@ static void mca_pml_ob1_send_request_destruct(mca_pml_ob1_send_request_t* req) { OBJ_DESTRUCT(&req->req_send_ranges); OBJ_DESTRUCT(&req->req_send_range_lock); - if (req->rdma_frag) { - MCA_PML_OB1_RDMA_FRAG_RETURN(req->rdma_frag); - req->rdma_frag = NULL; - } + assert( NULL == req->rdma_frag ); } OBJ_CLASS_INSTANCE( mca_pml_ob1_send_request_t, @@ -262,12 +258,20 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length) { mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req; mca_bml_base_btl_t *bml_btl = frag->rdma_bml; + size_t frag_remaining; /* count bytes of user data actually delivered and check for request completion */ if (OPAL_LIKELY(0 < rdma_length)) { - OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length); + frag_remaining = OPAL_THREAD_SUB_FETCH_SIZE_T(&frag->rdma_bytes_remaining, (size_t)rdma_length); SPC_USER_OR_MPI(sendreq->req_send.req_base.req_ompi.req_status.MPI_TAG, (ompi_spc_value_t)rdma_length, OMPI_SPC_BYTES_SENT_USER, OMPI_SPC_BYTES_SENT_MPI); + + if( 0 == frag_remaining ) { /* this frag is now completed. Update the request and be done */ + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); + if( sendreq->rdma_frag == frag ) + sendreq->rdma_frag = NULL; + MCA_PML_OB1_RDMA_FRAG_RETURN(frag); + } } send_request_pml_complete_check(sendreq); @@ -701,6 +705,7 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, frag->rdma_req = sendreq; frag->rdma_bml = bml_btl; frag->rdma_length = size; + frag->rdma_bytes_remaining = size; frag->cbfunc = mca_pml_ob1_rget_completion; /* do not store the local handle in the fragment. it will be released by mca_pml_ob1_free_rdma_resources */ diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 06e4abb4672..ae8f5afe2c5 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -216,10 +216,7 @@ static inline void mca_pml_ob1_send_request_fini (mca_pml_ob1_send_request_t *se { /* Let the base handle the reference counts */ MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); - if (sendreq->rdma_frag) { - MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag); - sendreq->rdma_frag = NULL; - } + assert( NULL == sendreq->rdma_frag ); } /* diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 10c66396db7..fb7b7f84615 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -16,6 +16,7 @@ #include "opal/runtime/opal.h" #include "opal/mca/pmix/pmix.h" +#include "ompi/attribute/attribute.h" #include "ompi/message/message.h" #include "ompi/mca/pml/base/pml_base_bsend.h" #include "opal/mca/common/ucx/common_ucx.h" @@ -49,43 +50,78 @@ #define MODEX_KEY "pml-ucx" mca_pml_ucx_module_t ompi_pml_ucx = { - { - mca_pml_ucx_add_procs, - mca_pml_ucx_del_procs, - mca_pml_ucx_enable, - NULL, - mca_pml_ucx_add_comm, - mca_pml_ucx_del_comm, - mca_pml_ucx_irecv_init, - mca_pml_ucx_irecv, - mca_pml_ucx_recv, - mca_pml_ucx_isend_init, - mca_pml_ucx_isend, - mca_pml_ucx_send, - mca_pml_ucx_iprobe, - mca_pml_ucx_probe, - mca_pml_ucx_start, - mca_pml_ucx_improbe, - mca_pml_ucx_mprobe, - mca_pml_ucx_imrecv, - mca_pml_ucx_mrecv, - mca_pml_ucx_dump, - NULL, /* FT */ - 1ul << (PML_UCX_CONTEXT_BITS), - 1ul << (PML_UCX_TAG_BITS - 1), + .super = { + .pml_add_procs = mca_pml_ucx_add_procs, + .pml_del_procs = mca_pml_ucx_del_procs, + .pml_enable = mca_pml_ucx_enable, + .pml_progress = NULL, + .pml_add_comm = mca_pml_ucx_add_comm, + .pml_del_comm = mca_pml_ucx_del_comm, + .pml_irecv_init = mca_pml_ucx_irecv_init, + .pml_irecv = mca_pml_ucx_irecv, + .pml_recv = mca_pml_ucx_recv, + .pml_isend_init = mca_pml_ucx_isend_init, + .pml_isend = mca_pml_ucx_isend, + .pml_send = mca_pml_ucx_send, + .pml_iprobe = mca_pml_ucx_iprobe, + .pml_probe = mca_pml_ucx_probe, + .pml_start = mca_pml_ucx_start, + .pml_improbe = mca_pml_ucx_improbe, + .pml_mprobe = mca_pml_ucx_mprobe, + .pml_imrecv = mca_pml_ucx_imrecv, + .pml_mrecv = mca_pml_ucx_mrecv, + .pml_dump = mca_pml_ucx_dump, + .pml_ft_event = NULL, + .pml_max_contextid = (1ul << (PML_UCX_CONTEXT_BITS)) - 1, + .pml_max_tag = (1ul << (PML_UCX_TAG_BITS - 1)) - 1 }, - NULL, /* ucp_context */ - NULL /* ucp_worker */ + .ucp_context = NULL, + .ucp_worker = NULL }; #define PML_UCX_REQ_ALLOCA() \ ((char *)alloca(ompi_pml_ucx.request_size) + ompi_pml_ucx.request_size); +#if HAVE_UCP_WORKER_ADDRESS_FLAGS +static int mca_pml_ucx_send_worker_address_type(int addr_flags, int modex_scope) +{ + ucs_status_t status; + ucp_worker_attr_t attrs; + int rc; + + attrs.field_mask = UCP_WORKER_ATTR_FIELD_ADDRESS | + UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS; + attrs.address_flags = addr_flags; + + status = ucp_worker_query(ompi_pml_ucx.ucp_worker, &attrs); + if (UCS_OK != status) { + PML_UCX_ERROR("Failed to query UCP worker address"); + return OMPI_ERROR; + } + + OPAL_MODEX_SEND(rc, modex_scope, &mca_pml_ucx_component.pmlm_version, + (void*)attrs.address, attrs.address_length); + + ucp_worker_release_address(ompi_pml_ucx.ucp_worker, attrs.address); + + if (OMPI_SUCCESS != rc) { + return OMPI_ERROR; + } + + PML_UCX_VERBOSE(2, "Pack %s worker address, size %ld", + (modex_scope == OPAL_PMIX_LOCAL) ? "local" : "remote", + attrs.address_length); + + return OMPI_SUCCESS; +} +#endif static int mca_pml_ucx_send_worker_address(void) { - ucp_address_t *address; ucs_status_t status; + +#if !HAVE_UCP_WORKER_ADDRESS_FLAGS + ucp_address_t *address; size_t addrlen; int rc; @@ -95,16 +131,35 @@ static int mca_pml_ucx_send_worker_address(void) return OMPI_ERROR; } + PML_UCX_VERBOSE(2, "Pack worker address, size %ld", addrlen); + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_pml_ucx_component.pmlm_version, (void*)address, addrlen); + + ucp_worker_release_address(ompi_pml_ucx.ucp_worker, address); + if (OMPI_SUCCESS != rc) { - PML_UCX_ERROR("Open MPI couldn't distribute EP connection details"); - return OMPI_ERROR; + goto err; + } +#else + /* Pack just network device addresses for remote node peers */ + status = mca_pml_ucx_send_worker_address_type(UCP_WORKER_ADDRESS_FLAG_NET_ONLY, + OPAL_PMIX_REMOTE); + if (UCS_OK != status) { + goto err; } - ucp_worker_release_address(ompi_pml_ucx.ucp_worker, address); + status = mca_pml_ucx_send_worker_address_type(0, OPAL_PMIX_LOCAL); + if (UCS_OK != status) { + goto err; + } +#endif return OMPI_SUCCESS; + +err: + PML_UCX_ERROR("Open MPI couldn't distribute EP connection details"); + return OMPI_ERROR; } static int mca_pml_ucx_recv_worker_address(ompi_proc_t *proc, @@ -120,6 +175,9 @@ static int mca_pml_ucx_recv_worker_address(ompi_proc_t *proc, PML_UCX_ERROR("Failed to receive UCX worker address: %s (%d)", opal_strerror(ret), ret); } + + PML_UCX_VERBOSE(2, "Got proc %d address, size %ld", + proc->super.proc_name.vpid, *addrlen_p); return ret; } @@ -139,22 +197,27 @@ int mca_pml_ucx_open(void) } /* Initialize UCX context */ - params.field_mask = UCP_PARAM_FIELD_FEATURES | - UCP_PARAM_FIELD_REQUEST_SIZE | - UCP_PARAM_FIELD_REQUEST_INIT | - UCP_PARAM_FIELD_REQUEST_CLEANUP | - UCP_PARAM_FIELD_TAG_SENDER_MASK | - UCP_PARAM_FIELD_MT_WORKERS_SHARED | - UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; - params.features = UCP_FEATURE_TAG; - params.request_size = sizeof(ompi_request_t); - params.request_init = mca_pml_ucx_request_init; - params.request_cleanup = mca_pml_ucx_request_cleanup; - params.tag_sender_mask = PML_UCX_SPECIFIC_SOURCE_MASK; + params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_REQUEST_SIZE | + UCP_PARAM_FIELD_REQUEST_INIT | + UCP_PARAM_FIELD_REQUEST_CLEANUP | + UCP_PARAM_FIELD_TAG_SENDER_MASK | + UCP_PARAM_FIELD_MT_WORKERS_SHARED | + UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; + params.features = UCP_FEATURE_TAG; + params.request_size = sizeof(ompi_request_t); + params.request_init = mca_pml_ucx_request_init; + params.request_cleanup = mca_pml_ucx_request_cleanup; + params.tag_sender_mask = PML_UCX_SPECIFIC_SOURCE_MASK; params.mt_workers_shared = 0; /* we do not need mt support for context since it will be protected by worker */ params.estimated_num_eps = ompi_proc_world_size(); +#if HAVE_DECL_UCP_PARAM_FIELD_ESTIMATED_NUM_PPN + params.estimated_num_ppn = opal_process_info.num_local_peers + 1; + params.field_mask |= UCP_PARAM_FIELD_ESTIMATED_NUM_PPN; +#endif + status = ucp_init(¶ms, config, &ompi_pml_ucx.ucp_context); ucp_config_release(config); @@ -187,19 +250,18 @@ int mca_pml_ucx_close(void) return OMPI_SUCCESS; } -int mca_pml_ucx_init(void) +int mca_pml_ucx_init(int enable_mpi_threads) { ucp_worker_params_t params; - ucs_status_t status; ucp_worker_attr_t attr; - int rc; + ucs_status_t status; + int i, rc; PML_UCX_VERBOSE(1, "mca_pml_ucx_init"); /* TODO check MPI thread mode */ params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; - params.thread_mode = UCS_THREAD_MODE_SINGLE; - if (ompi_mpi_thread_multiple) { + if (enable_mpi_threads) { params.thread_mode = UCS_THREAD_MODE_MULTI; } else { params.thread_mode = UCS_THREAD_MODE_SINGLE; @@ -209,30 +271,35 @@ int mca_pml_ucx_init(void) &ompi_pml_ucx.ucp_worker); if (UCS_OK != status) { PML_UCX_ERROR("Failed to create UCP worker"); - return OMPI_ERROR; + rc = OMPI_ERROR; + goto err; } attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE; status = ucp_worker_query(ompi_pml_ucx.ucp_worker, &attr); if (UCS_OK != status) { - ucp_worker_destroy(ompi_pml_ucx.ucp_worker); - ompi_pml_ucx.ucp_worker = NULL; PML_UCX_ERROR("Failed to query UCP worker thread level"); - return OMPI_ERROR; + rc = OMPI_ERROR; + goto err_destroy_worker; } - if (ompi_mpi_thread_multiple && attr.thread_mode != UCS_THREAD_MODE_MULTI) { + if (enable_mpi_threads && (attr.thread_mode != UCS_THREAD_MODE_MULTI)) { /* UCX does not support multithreading, disqualify current PML for now */ /* TODO: we should let OMPI to fallback to THREAD_SINGLE mode */ - ucp_worker_destroy(ompi_pml_ucx.ucp_worker); - ompi_pml_ucx.ucp_worker = NULL; - PML_UCX_ERROR("UCP worker does not support MPI_THREAD_MULTIPLE"); - return OMPI_ERROR; + PML_UCX_VERBOSE(1, "UCP worker does not support MPI_THREAD_MULTIPLE. " + "PML UCX could not be selected"); + rc = OMPI_ERR_NOT_SUPPORTED; + goto err_destroy_worker; } rc = mca_pml_ucx_send_worker_address(); if (rc < 0) { - return rc; + goto err_destroy_worker; + } + + ompi_pml_ucx.datatype_attr_keyval = MPI_KEYVAL_INVALID; + for (i = 0; i < OMPI_DATATYPE_MAX_PREDEFINED; ++i) { + ompi_pml_ucx.predefined_types[i] = PML_UCX_DATATYPE_INVALID; } /* Initialize the free lists */ @@ -248,15 +315,34 @@ int mca_pml_ucx_init(void) PML_UCX_VERBOSE(2, "created ucp context %p, worker %p", (void *)ompi_pml_ucx.ucp_context, (void *)ompi_pml_ucx.ucp_worker); - return OMPI_SUCCESS; + return rc; + +err_destroy_worker: + ucp_worker_destroy(ompi_pml_ucx.ucp_worker); + ompi_pml_ucx.ucp_worker = NULL; +err: + return OMPI_ERROR; } int mca_pml_ucx_cleanup(void) { + int i; + PML_UCX_VERBOSE(1, "mca_pml_ucx_cleanup"); opal_progress_unregister(mca_pml_ucx_progress); + if (ompi_pml_ucx.datatype_attr_keyval != MPI_KEYVAL_INVALID) { + ompi_attr_free_keyval(TYPE_ATTR, &ompi_pml_ucx.datatype_attr_keyval, false); + } + + for (i = 0; i < OMPI_DATATYPE_MAX_PREDEFINED; ++i) { + if (ompi_pml_ucx.predefined_types[i] != PML_UCX_DATATYPE_INVALID) { + ucp_dt_destroy(ompi_pml_ucx.predefined_types[i]); + ompi_pml_ucx.predefined_types[i] = PML_UCX_DATATYPE_INVALID; + } + } + ompi_pml_ucx.completed_send_req.req_state = OMPI_REQUEST_INVALID; OMPI_REQUEST_FINI(&ompi_pml_ucx.completed_send_req); OBJ_DESTRUCT(&ompi_pml_ucx.completed_send_req); @@ -341,6 +427,7 @@ int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs) } } + opal_common_ucx_mca_proc_added(); return OMPI_SUCCESS; } @@ -368,78 +455,52 @@ static inline ucp_ep_h mca_pml_ucx_get_ep(ompi_communicator_t *comm, int rank) return NULL; } -static void mca_pml_ucx_waitall(void **reqs, int *count_p) -{ - int i; - - PML_UCX_VERBOSE(2, "waiting for %d disconnect requests", *count_p); - for (i = 0; i < *count_p; ++i) { - opal_common_ucx_wait_request(reqs[i], ompi_pml_ucx.ucp_worker, "ucp_disconnect_nb"); - reqs[i] = NULL; - } - - *count_p = 0; -} - int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs) { ompi_proc_t *proc; - int num_reqs; - size_t max_reqs; - void *dreq, **dreqs; - ucp_ep_h ep; + opal_common_ucx_del_proc_t *del_procs; size_t i; + int ret; - max_reqs = ompi_pml_ucx.num_disconnect; - if (max_reqs > nprocs) { - max_reqs = nprocs; - } - - dreqs = malloc(sizeof(*dreqs) * max_reqs); - if (dreqs == NULL) { + del_procs = malloc(sizeof(*del_procs) * nprocs); + if (del_procs == NULL) { return OMPI_ERR_OUT_OF_RESOURCE; } - num_reqs = 0; - for (i = 0; i < nprocs; ++i) { - proc = procs[(i + OMPI_PROC_MY_NAME->vpid) % nprocs]; - ep = proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML]; - if (ep == NULL) { - continue; - } + proc = procs[i]; + del_procs[i].ep = proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML]; + del_procs[i].vpid = proc->super.proc_name.vpid; + /* mark peer as disconnected */ proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = NULL; - - PML_UCX_VERBOSE(2, "disconnecting from rank %d", proc->super.proc_name.vpid); - dreq = ucp_disconnect_nb(ep); - if (dreq != NULL) { - if (UCS_PTR_IS_ERR(dreq)) { - PML_UCX_ERROR("ucp_disconnect_nb(%d) failed: %s", - proc->super.proc_name.vpid, - ucs_status_string(UCS_PTR_STATUS(dreq))); - continue; - } else { - dreqs[num_reqs++] = dreq; - if (num_reqs >= ompi_pml_ucx.num_disconnect) { - mca_pml_ucx_waitall(dreqs, &num_reqs); - } - } - } } - /* num_reqs == 0 is processed by mca_pml_ucx_waitall routine, - * so suppress coverity warning */ - /* coverity[uninit_use_in_call] */ - mca_pml_ucx_waitall(dreqs, &num_reqs); - free(dreqs); - opal_common_ucx_mca_pmix_fence(ompi_pml_ucx.ucp_worker); + ret = opal_common_ucx_del_procs(del_procs, nprocs, OMPI_PROC_MY_NAME->vpid, + ompi_pml_ucx.num_disconnect, ompi_pml_ucx.ucp_worker); + free(del_procs); - return OMPI_SUCCESS; + return ret; } int mca_pml_ucx_enable(bool enable) { + ompi_attribute_fn_ptr_union_t copy_fn; + ompi_attribute_fn_ptr_union_t del_fn; + int ret; + + /* Create a key for adding custom attributes to datatypes */ + copy_fn.attr_datatype_copy_fn = + (MPI_Type_internal_copy_attr_function*)MPI_TYPE_NULL_COPY_FN; + del_fn.attr_datatype_delete_fn = mca_pml_ucx_datatype_attr_del_fn; + ret = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn, + &ompi_pml_ucx.datatype_attr_keyval, NULL, 0, + NULL); + if (ret != OMPI_SUCCESS) { + PML_UCX_ERROR("Failed to create keyval for UCX datatypes: %d", ret); + return ret; + } + PML_UCX_FREELIST_INIT(&ompi_pml_ucx.persistent_reqs, mca_pml_ucx_persistent_request_t, 128, -1, 128); @@ -479,11 +540,12 @@ int mca_pml_ucx_irecv_init(void *buf, size_t count, ompi_datatype_t *datatype, PML_UCX_TRACE_RECV("irecv_init request *%p=%p", buf, count, datatype, src, tag, comm, (void*)request, (void*)req); - req->ompi.req_state = OMPI_REQUEST_INACTIVE; - req->flags = 0; - req->buffer = buf; - req->count = count; - req->datatype = mca_pml_ucx_get_datatype(datatype); + req->ompi.req_state = OMPI_REQUEST_INACTIVE; + req->ompi.req_mpi_object.comm = comm; + req->flags = 0; + req->buffer = buf; + req->count = count; + req->datatype.datatype = mca_pml_ucx_get_datatype(datatype); PML_UCX_MAKE_RECV_TAG(req->tag, req->recv.tag_mask, tag, src, comm); @@ -512,7 +574,8 @@ int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype, } PML_UCX_VERBOSE(8, "got request %p", (void*)req); - *request = req; + req->req_mpi_object.comm = comm; + *request = req; return OMPI_SUCCESS; } @@ -534,13 +597,12 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src mca_pml_ucx_get_datatype(datatype), ucp_tag, ucp_tag_mask, req); - for (;;) { + MCA_COMMON_UCX_PROGRESS_LOOP(ompi_pml_ucx.ucp_worker) { status = ucp_request_test(req, &info); if (status != UCS_INPROGRESS) { mca_pml_ucx_set_recv_status_safe(mpi_status, status, &info); return OMPI_SUCCESS; } - opal_progress(); } } @@ -585,18 +647,20 @@ int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datat return OMPI_ERROR; } - req->ompi.req_state = OMPI_REQUEST_INACTIVE; - req->flags = MCA_PML_UCX_REQUEST_FLAG_SEND; - req->buffer = (void *)buf; - req->count = count; - req->tag = PML_UCX_MAKE_SEND_TAG(tag, comm); - req->send.mode = mode; - req->send.ep = ep; + req->ompi.req_state = OMPI_REQUEST_INACTIVE; + req->ompi.req_mpi_object.comm = comm; + req->flags = MCA_PML_UCX_REQUEST_FLAG_SEND; + req->buffer = (void *)buf; + req->count = count; + req->tag = PML_UCX_MAKE_SEND_TAG(tag, comm); + req->send.mode = mode; + req->send.ep = ep; + if (MCA_PML_BASE_SEND_BUFFERED == mode) { - req->ompi_datatype = datatype; + req->datatype.ompi_datatype = datatype; OBJ_RETAIN(datatype); } else { - req->datatype = mca_pml_ucx_get_datatype(datatype); + req->datatype.datatype = mca_pml_ucx_get_datatype(datatype); } *request = &req->ompi; @@ -709,7 +773,8 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, return OMPI_SUCCESS; } else if (!UCS_PTR_IS_ERR(req)) { PML_UCX_VERBOSE(8, "got request %p", (void*)req); - *request = req; + req->req_mpi_object.comm = comm; + *request = req; return OMPI_SUCCESS; } else { PML_UCX_ERROR("ucx send failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); @@ -727,16 +792,12 @@ mca_pml_ucx_send_nb(ucp_ep_h ep, const void *buf, size_t count, req = (ompi_request_t*)mca_pml_ucx_common_send(ep, buf, count, datatype, mca_pml_ucx_get_datatype(datatype), - tag, mode, - mca_pml_ucx_send_completion); - + tag, mode, cb); if (OPAL_LIKELY(req == NULL)) { return OMPI_SUCCESS; } else if (!UCS_PTR_IS_ERR(req)) { PML_UCX_VERBOSE(8, "got request %p", (void*)req); - ucp_worker_progress(ompi_pml_ucx.ucp_worker); - ompi_request_wait(&req, MPI_STATUS_IGNORE); - return OMPI_SUCCESS; + MCA_COMMON_UCX_WAIT_LOOP(req, ompi_pml_ucx.ucp_worker, "ucx send", ompi_request_free(&req)); } else { PML_UCX_ERROR("ucx send failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); return OMPI_ERROR; @@ -749,7 +810,7 @@ mca_pml_ucx_send_nbr(ucp_ep_h ep, const void *buf, size_t count, ucp_datatype_t ucx_datatype, ucp_tag_t tag) { - void *req; + ucs_status_ptr_t req; ucs_status_t status; /* coverity[bad_alloc_arithmetic] */ @@ -759,12 +820,7 @@ mca_pml_ucx_send_nbr(ucp_ep_h ep, const void *buf, size_t count, return OMPI_SUCCESS; } - ucp_worker_progress(ompi_pml_ucx.ucp_worker); - while ((status = ucp_request_check_status(req)) == UCS_INPROGRESS) { - opal_progress(); - } - - return OPAL_LIKELY(UCS_OK == status) ? OMPI_SUCCESS : OMPI_ERROR; + MCA_COMMON_UCX_WAIT_LOOP(req, ompi_pml_ucx.ucp_worker, "ucx send", (void)0); } #endif @@ -798,8 +854,10 @@ int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, i } int mca_pml_ucx_iprobe(int src, int tag, struct ompi_communicator_t* comm, - int *matched, ompi_status_public_t* mpi_status) + int *matched, ompi_status_public_t* mpi_status) { + static unsigned progress_count = 0; + ucp_tag_t ucp_tag, ucp_tag_mask; ucp_tag_recv_info_t info; ucp_tag_message_h ucp_msg; @@ -812,15 +870,16 @@ int mca_pml_ucx_iprobe(int src, int tag, struct ompi_communicator_t* comm, if (ucp_msg != NULL) { *matched = 1; mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); - } else { - opal_progress(); + } else { + (++progress_count % opal_common_ucx.progress_iterations) ? + (void)ucp_worker_progress(ompi_pml_ucx.ucp_worker) : opal_progress(); *matched = 0; } return OMPI_SUCCESS; } int mca_pml_ucx_probe(int src, int tag, struct ompi_communicator_t* comm, - ompi_status_public_t* mpi_status) + ompi_status_public_t* mpi_status) { ucp_tag_t ucp_tag, ucp_tag_mask; ucp_tag_recv_info_t info; @@ -829,22 +888,23 @@ int mca_pml_ucx_probe(int src, int tag, struct ompi_communicator_t* comm, PML_UCX_TRACE_PROBE("probe", src, tag, comm); PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); - for (;;) { - ucp_msg = ucp_tag_probe_nb(ompi_pml_ucx.ucp_worker, ucp_tag, ucp_tag_mask, - 0, &info); + + MCA_COMMON_UCX_PROGRESS_LOOP(ompi_pml_ucx.ucp_worker) { + ucp_msg = ucp_tag_probe_nb(ompi_pml_ucx.ucp_worker, ucp_tag, + ucp_tag_mask, 0, &info); if (ucp_msg != NULL) { mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); return OMPI_SUCCESS; } - - opal_progress(); } } int mca_pml_ucx_improbe(int src, int tag, struct ompi_communicator_t* comm, - int *matched, struct ompi_message_t **message, - ompi_status_public_t* mpi_status) + int *matched, struct ompi_message_t **message, + ompi_status_public_t* mpi_status) { + static unsigned progress_count = 0; + ucp_tag_t ucp_tag, ucp_tag_mask; ucp_tag_recv_info_t info; ucp_tag_message_h ucp_msg; @@ -860,7 +920,8 @@ int mca_pml_ucx_improbe(int src, int tag, struct ompi_communicator_t* comm, *matched = 1; mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); } else { - opal_progress(); + (++progress_count % opal_common_ucx.progress_iterations) ? + (void)ucp_worker_progress(ompi_pml_ucx.ucp_worker) : opal_progress(); *matched = 0; } return OMPI_SUCCESS; @@ -877,7 +938,7 @@ int mca_pml_ucx_mprobe(int src, int tag, struct ompi_communicator_t* comm, PML_UCX_TRACE_PROBE("mprobe", src, tag, comm); PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); - for (;;) { + MCA_COMMON_UCX_PROGRESS_LOOP(ompi_pml_ucx.ucp_worker) { ucp_msg = ucp_tag_probe_nb(ompi_pml_ucx.ucp_worker, ucp_tag, ucp_tag_mask, 1, &info); if (ucp_msg != NULL) { @@ -886,8 +947,6 @@ int mca_pml_ucx_mprobe(int src, int tag, struct ompi_communicator_t* comm, mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info); return OMPI_SUCCESS; } - - opal_progress(); } } @@ -959,8 +1018,8 @@ int mca_pml_ucx_start(size_t count, ompi_request_t** requests) tmp_req = (ompi_request_t*)mca_pml_ucx_common_send(preq->send.ep, preq->buffer, preq->count, - preq->ompi_datatype, - preq->datatype, + preq->datatype.ompi_datatype, + preq->datatype.datatype, preq->tag, preq->send.mode, mca_pml_ucx_psend_completion); @@ -968,7 +1027,8 @@ int mca_pml_ucx_start(size_t count, ompi_request_t** requests) PML_UCX_VERBOSE(8, "start recv request %p", (void*)preq); tmp_req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, preq->buffer, preq->count, - preq->datatype, preq->tag, + preq->datatype.datatype, + preq->tag, preq->recv.tag_mask, mca_pml_ucx_precv_completion); } diff --git a/ompi/mca/pml/ucx/pml_ucx.h b/ompi/mca/pml/ucx/pml_ucx.h index da1b3ef0c57..f073b56a549 100644 --- a/ompi/mca/pml/ucx/pml_ucx.h +++ b/ompi/mca/pml/ucx/pml_ucx.h @@ -15,6 +15,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/communicator/communicator.h" #include "ompi/request/request.h" #include "opal/mca/common/ucx/common_ucx.h" @@ -42,6 +43,10 @@ struct mca_pml_ucx_module { ucp_context_h ucp_context; ucp_worker_h ucp_worker; + /* Datatypes */ + int datatype_attr_keyval; + ucp_datatype_t predefined_types[OMPI_DATATYPE_MPI_MAX_PREDEFINED]; + /* Requests */ mca_pml_ucx_freelist_t persistent_reqs; ompi_request_t completed_send_req; @@ -59,7 +64,7 @@ extern mca_pml_ucx_module_t ompi_pml_ucx; int mca_pml_ucx_open(void); int mca_pml_ucx_close(void); -int mca_pml_ucx_init(void); +int mca_pml_ucx_init(int enable_mpi_threads); int mca_pml_ucx_cleanup(void); int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs); diff --git a/ompi/mca/pml/ucx/pml_ucx_component.c b/ompi/mca/pml/ucx/pml_ucx_component.c index 27e89977314..28b00fee4f1 100644 --- a/ompi/mca/pml/ucx/pml_ucx_component.c +++ b/ompi/mca/pml/ucx/pml_ucx_component.c @@ -26,25 +26,25 @@ mca_pml_base_component_2_0_0_t mca_pml_ucx_component = { /* First, the mca_base_component_t struct containing meta * information about the component itself */ - { + .pmlm_version = { MCA_PML_BASE_VERSION_2_0_0, - "ucx", /* MCA component name */ - OMPI_MAJOR_VERSION, /* MCA component major version */ - OMPI_MINOR_VERSION, /* MCA component minor version */ - OMPI_RELEASE_VERSION, /* MCA component release version */ - mca_pml_ucx_component_open, /* component open */ - mca_pml_ucx_component_close, /* component close */ - NULL, - mca_pml_ucx_component_register, + .mca_component_name = "ucx", + .mca_component_major_version = OMPI_MAJOR_VERSION, + .mca_component_minor_version = OMPI_MINOR_VERSION, + .mca_component_release_version = OMPI_RELEASE_VERSION, + .mca_open_component = mca_pml_ucx_component_open, + .mca_close_component = mca_pml_ucx_component_close, + .mca_query_component = NULL, + .mca_register_component_params = mca_pml_ucx_component_register, }, - { + .pmlm_data = { /* This component is not checkpoint ready */ - MCA_BASE_METADATA_PARAM_NONE + .param_field = MCA_BASE_METADATA_PARAM_NONE }, - mca_pml_ucx_component_init, /* component init */ - mca_pml_ucx_component_fini /* component finalize */ + .pmlm_init = mca_pml_ucx_component_init, + .pmlm_finalize = mca_pml_ucx_component_fini }; static int mca_pml_ucx_component_register(void) @@ -64,6 +64,7 @@ static int mca_pml_ucx_component_register(void) OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &ompi_pml_ucx.num_disconnect); + opal_common_ucx_mca_var_register(&mca_pml_ucx_component.pmlm_version); return 0; } @@ -89,11 +90,11 @@ static int mca_pml_ucx_component_close(void) static mca_pml_base_module_t* mca_pml_ucx_component_init(int* priority, bool enable_progress_threads, - bool enable_mpi_threads) + bool enable_mpi_threads) { int ret; - if ( (ret = mca_pml_ucx_init()) != 0) { + if ( (ret = mca_pml_ucx_init(enable_mpi_threads)) != 0) { return NULL; } diff --git a/ompi/mca/pml/ucx/pml_ucx_datatype.c b/ompi/mca/pml/ucx/pml_ucx_datatype.c index 98b7b190df7..95f9da44cc8 100644 --- a/ompi/mca/pml/ucx/pml_ucx_datatype.c +++ b/ompi/mca/pml/ucx/pml_ucx_datatype.c @@ -10,6 +10,7 @@ #include "pml_ucx_datatype.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/attribute/attribute.h" #include @@ -127,12 +128,25 @@ static ucp_generic_dt_ops_t pml_ucx_generic_datatype_ops = { .finish = pml_ucx_generic_datatype_finish }; +int mca_pml_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval, + void *attr_val, void *extra) +{ + ucp_datatype_t ucp_datatype = (ucp_datatype_t)attr_val; + + PML_UCX_ASSERT((uint64_t)ucp_datatype == datatype->pml_data); + + ucp_dt_destroy(ucp_datatype); + datatype->pml_data = PML_UCX_DATATYPE_INVALID; + return OMPI_SUCCESS; +} + ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype) { ucp_datatype_t ucp_datatype; ucs_status_t status; ptrdiff_t lb; size_t size; + int ret; ompi_datatype_type_lb(datatype, &lb); @@ -147,16 +161,33 @@ ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype) } status = ucp_dt_create_generic(&pml_ucx_generic_datatype_ops, - datatype, &ucp_datatype); + datatype, &ucp_datatype); if (status != UCS_OK) { PML_UCX_ERROR("Failed to create UCX datatype for %s", datatype->name); ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1); } + datatype->pml_data = ucp_datatype; + + /* Add custom attribute, to clean up UCX resources when OMPI datatype is + * released. + */ + if (ompi_datatype_is_predefined(datatype)) { + PML_UCX_ASSERT(datatype->id < OMPI_DATATYPE_MAX_PREDEFINED); + ompi_pml_ucx.predefined_types[datatype->id] = ucp_datatype; + } else { + ret = ompi_attr_set_c(TYPE_ATTR, datatype, &datatype->d_keyhash, + ompi_pml_ucx.datatype_attr_keyval, + (void*)ucp_datatype, false); + if (ret != OMPI_SUCCESS) { + PML_UCX_ERROR("Failed to add UCX datatype attribute for %s: %d", + datatype->name, ret); + ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1); + } + } + PML_UCX_VERBOSE(7, "created generic UCX datatype 0x%"PRIx64, ucp_datatype) - // TODO put this on a list to be destroyed later - datatype->pml_data = ucp_datatype; return ucp_datatype; } diff --git a/ompi/mca/pml/ucx/pml_ucx_datatype.h b/ompi/mca/pml/ucx/pml_ucx_datatype.h index 26b1835a153..f5207cecc75 100644 --- a/ompi/mca/pml/ucx/pml_ucx_datatype.h +++ b/ompi/mca/pml/ucx/pml_ucx_datatype.h @@ -13,6 +13,8 @@ #include "pml_ucx.h" +#define PML_UCX_DATATYPE_INVALID 0 + struct pml_ucx_convertor { opal_free_list_item_t super; ompi_datatype_t *datatype; @@ -23,6 +25,9 @@ struct pml_ucx_convertor { ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype); +int mca_pml_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval, + void *attr_val, void *extra); + OBJ_CLASS_DECLARATION(mca_pml_ucx_convertor_t); @@ -30,7 +35,7 @@ static inline ucp_datatype_t mca_pml_ucx_get_datatype(ompi_datatype_t *datatype) { ucp_datatype_t ucp_type = datatype->pml_data; - if (OPAL_LIKELY(ucp_type != 0)) { + if (OPAL_LIKELY(ucp_type != PML_UCX_DATATYPE_INVALID)) { return ucp_type; } diff --git a/ompi/mca/pml/ucx/pml_ucx_request.c b/ompi/mca/pml/ucx/pml_ucx_request.c index 05533914a4c..536ac95e79b 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.c +++ b/ompi/mca/pml/ucx/pml_ucx_request.c @@ -85,10 +85,10 @@ mca_pml_ucx_persistent_request_complete(mca_pml_ucx_persistent_request_t *preq, ompi_request_t *tmp_req) { preq->ompi.req_status = tmp_req->req_status; - ompi_request_complete(&preq->ompi, true); - mca_pml_ucx_persistent_request_detach(preq, tmp_req); mca_pml_ucx_request_reset(tmp_req); + mca_pml_ucx_persistent_request_detach(preq, tmp_req); ucp_request_free(tmp_req); + ompi_request_complete(&preq->ompi, true); } static inline void mca_pml_ucx_preq_completion(ompi_request_t *tmp_req) @@ -176,7 +176,7 @@ static int mca_pml_ucx_persistent_request_free(ompi_request_t **rptr) } if ((preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND) && (MCA_PML_BASE_SEND_BUFFERED == preq->send.mode)) { - OBJ_RELEASE(preq->ompi_datatype); + OBJ_RELEASE(preq->datatype.ompi_datatype); } PML_UCX_FREELIST_RETURN(&ompi_pml_ucx.persistent_reqs, &preq->ompi.super); *rptr = MPI_REQUEST_NULL; @@ -228,7 +228,7 @@ void mca_pml_ucx_completed_request_init(ompi_request_t *ompi_req) mca_pml_ucx_request_init_common(ompi_req, false, OMPI_REQUEST_ACTIVE, mca_pml_completed_request_free, mca_pml_completed_request_cancel); + ompi_req->req_mpi_object.comm = &ompi_mpi_comm_world.comm; ompi_request_complete(ompi_req, false); - } diff --git a/ompi/mca/pml/ucx/pml_ucx_request.h b/ompi/mca/pml/ucx/pml_ucx_request.h index 9166f042ae9..cb53d30bcee 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.h +++ b/ompi/mca/pml/ucx/pml_ucx_request.h @@ -102,7 +102,7 @@ struct pml_ucx_persistent_request { union { ucp_datatype_t datatype; ompi_datatype_t *ompi_datatype; - }; + } datatype; ucp_tag_t tag; struct { mca_pml_base_send_mode_t mode; diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index 7890293c330..03bb65d420d 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -265,6 +265,7 @@ int mca_pml_yalla_add_procs(struct ompi_proc_t **procs, size_t nprocs) int mca_pml_yalla_del_procs(struct ompi_proc_t **procs, size_t nprocs) { size_t i; + int ret; if (ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED) { PML_YALLA_VERBOSE(3, "%s", "using bulk powerdown"); @@ -276,7 +277,9 @@ int mca_pml_yalla_del_procs(struct ompi_proc_t **procs, size_t nprocs) PML_YALLA_VERBOSE(2, "disconnected from rank %s", OPAL_NAME_PRINT(procs[i]->super.proc_name)); procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = NULL; } - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + return ret; + } return OMPI_SUCCESS; } diff --git a/ompi/mca/rte/orte/Makefile.am b/ompi/mca/rte/orte/Makefile.am index 451436373b3..34051dcea6c 100644 --- a/ompi/mca/rte/orte/Makefile.am +++ b/ompi/mca/rte/orte/Makefile.am @@ -2,7 +2,7 @@ # Copyright (c) 2012 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -28,13 +28,7 @@ libmca_rte_orte_la_SOURCES =$(sources) $(headers) libmca_rte_orte_la_LDFLAGS = -module -avoid-version libmca_rte_orte_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la -man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1 - -if OPAL_WANT_PRUN -if WANT_INSTALL_HEADERS -man_pages += ompi-dvm.1 -endif -endif +man_pages = mpirun.1 mpiexec.1 ompi-clean.1 ompi-server.1 if OPAL_INSTALL_BINARIES nodist_man_MANS = $(man_pages) @@ -42,24 +36,14 @@ nodist_man_MANS = $(man_pages) install-exec-hook: (cd $(DESTDIR)$(bindir); rm -f mpirun$(EXEEXT); $(LN_S) orterun$(EXEEXT) mpirun$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f mpiexec$(EXEEXT); $(LN_S) orterun$(EXEEXT) mpiexec$(EXEEXT)) - (cd $(DESTDIR)$(bindir); rm -f ompi-ps$(EXEEXT); $(LN_S) orte-ps$(EXEEXT) ompi-ps$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f ompi-clean$(EXEEXT); $(LN_S) orte-clean$(EXEEXT) ompi-clean$(EXEEXT)) - (cd $(DESTDIR)$(bindir); rm -f ompi-top$(EXEEXT); $(LN_S) orte-top$(EXEEXT) ompi-top$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f ompi-server$(EXEEXT); $(LN_S) orte-server$(EXEEXT) ompi-server$(EXEEXT)) -if OPAL_WANT_PRUN - (cd $(DESTDIR)$(bindir); rm -f ompi-dvm$(EXEEXT); $(LN_S) orte-dvm$(EXEEXT) ompi-dvm$(EXEEXT)) -endif uninstall-local: rm -f $(DESTDIR)$(bindir)/mpirun$(EXEEXT) \ $(DESTDIR)$(bindir)/mpiexec$(EXEEXT) \ - $(DESTDIR)$(bindir)/ompi-ps$(EXEEXT) \ $(DESTDIR)$(bindir)/ompi-clean$(EXEEXT) \ - $(DESTDIR)$(bindir)/ompi-top$(EXEEXT) \ $(DESTDIR)$(bindir)/ompi-server$(EXEEXT) -if OPAL_WANT_PRUN - rm -f $(DESTDIR)$(bindir)/ompi-dvm$(EXEEXT) -endif endif # OPAL_INSTALL_BINARIES @@ -72,34 +56,17 @@ mpirun.1: $(top_builddir)/orte/tools/orterun/orterun.1 mpiexec.1: $(top_builddir)/orte/tools/orterun/orterun.1 cp -f $(top_builddir)/orte/tools/orterun/orterun.1 mpiexec.1 -$(top_builddir)/orte/tools/orte-ps/orte-ps.1: - (cd $(top_builddir)/orte/tools/orte-ps && $(MAKE) $(AM_MAKEFLAGS) orte-ps.1) - -ompi-ps.1: $(top_builddir)/orte/tools/orte-ps/orte-ps.1 - cp -f $(top_builddir)/orte/tools/orte-ps/orte-ps.1 ompi-ps.1 - $(top_builddir)/orte/tools/orte-clean/orte-clean.1: (cd $(top_builddir)/orte/tools/orte-clean && $(MAKE) $(AM_MAKEFLAGS) orte-clean.1) ompi-clean.1: $(top_builddir)/orte/tools/orte-clean/orte-clean.1 cp -f $(top_builddir)/orte/tools/orte-clean/orte-clean.1 ompi-clean.1 -$(top_builddir)/orte/tools/orte-top/orte-top.1: - (cd $(top_builddir)/orte/tools/orte-top && $(MAKE) $(AM_MAKEFLAGS) orte-top.1) - -ompi-top.1: $(top_builddir)/orte/tools/orte-top/orte-top.1 - cp -f $(top_builddir)/orte/tools/orte-top/orte-top.1 ompi-top.1 - $(top_builddir)/orte/tools/orte-server/orte-server.1: (cd $(top_builddir)/orte/tools/orte-server && $(MAKE) $(AM_MAKEFLAGS) orte-server.1) ompi-server.1: $(top_builddir)/orte/tools/orte-server/orte-server.1 cp -f $(top_builddir)/orte/tools/orte-server/orte-server.1 ompi-server.1 -if OPAL_WANT_PRUN -ompi-dvm.1: $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1 - cp -f $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1 ompi-dvm.1 -endif - clean-local: rm -f $(man_pages) diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c index 66c17ee22df..b82be1f8932 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c @@ -35,6 +35,7 @@ #include #endif #include +#include int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, const char* filename, @@ -47,7 +48,9 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, int handle; struct mca_sharedfp_lockedfile_data * module_data = NULL; struct mca_sharedfp_base_data_t* sh; - + pid_t my_pid; + int int_pid; + /*Memory is allocated here for the sh structure*/ sh = (struct mca_sharedfp_base_data_t*)malloc(sizeof(struct mca_sharedfp_base_data_t)); if ( NULL == sh){ @@ -87,16 +90,28 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, return err; } - size_t filenamelen = strlen(filename) + 16; + if ( 0 == fh->f_rank ) { + my_pid = getpid(); + int_pid = (int) my_pid; + } + err = comm->c_coll->coll_bcast (&int_pid, 1, MPI_INT, 0, comm, comm->c_coll->coll_bcast_module ); + if ( OMPI_SUCCESS != err ) { + opal_output(0, "[%d]mca_sharedfp_lockedfile_file_open: Error in bcast operation\n", fh->f_rank); + free (sh); + free(module_data); + return err; + } + + size_t filenamelen = strlen(filename) + 24; lockedfilename = (char*)malloc(sizeof(char) * filenamelen); if ( NULL == lockedfilename ) { free (sh); free (module_data); return OMPI_ERR_OUT_OF_RESOURCE; } - snprintf(lockedfilename, filenamelen, "%s-%u%s",filename,masterjobid,".lock"); + snprintf(lockedfilename, filenamelen, "%s-%u-%d%s",filename,masterjobid,int_pid,".lock"); module_data->filename = lockedfilename; - + /*-------------------------------------------------*/ /*Open the lockedfile without shared file pointer */ /*-------------------------------------------------*/ diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index dc7a9f7162a..6526ee52480 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -44,7 +44,7 @@ #include #include #include - +#include int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, const char* filename, @@ -62,6 +62,8 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, struct mca_sharedfp_sm_offset sm_offset; int sm_fd; uint32_t comm_cid; + int int_pid; + pid_t my_pid; /*Memory is allocated here for the sh structure*/ if ( mca_sharedfp_sm_verbose ) { @@ -100,7 +102,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, ** For sharedfp we also want to put the file backed shared memory into the tmp directory */ filename_basename = basename((char*)filename); - /* format is "%s/%s_cid-%d.sm", see below */ + /* format is "%s/%s_cid-%d-%d.sm", see below */ sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4; sm_filename = (char*) malloc( sizeof(char) * sm_filename_length); if (NULL == sm_filename) { @@ -111,7 +113,21 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, } comm_cid = ompi_comm_get_cid(comm); - sprintf(sm_filename, "%s/%s_cid-%d.sm", ompi_process_info.job_session_dir, filename_basename, comm_cid); + if ( 0 == fh->f_rank ) { + my_pid = getpid(); + int_pid = (int) my_pid; + } + err = comm->c_coll->coll_bcast (&int_pid, 1, MPI_INT, 0, comm, comm->c_coll->coll_bcast_module ); + if ( OMPI_SUCCESS != err ) { + opal_output(0,"mca_sharedfp_sm_file_open: Error in bcast operation \n"); + free(sm_filename); + free(sm_data); + free(sh); + return err; + } + + snprintf(sm_filename, sm_filename_length, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir, + filename_basename, comm_cid, int_pid); /* open shared memory file, initialize to 0, map into memory */ sm_fd = open(sm_filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); diff --git a/ompi/mca/topo/treematch/Makefile.am b/ompi/mca/topo/treematch/Makefile.am index 27d07bc64fe..79cda832658 100644 --- a/ompi/mca/topo/treematch/Makefile.am +++ b/ompi/mca/topo/treematch/Makefile.am @@ -5,6 +5,8 @@ # Copyright (c) 2011-2015 INRIA. All rights reserved. # Copyright (c) 2011-2015 Université Bordeaux 1 # Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -67,3 +69,5 @@ noinst_LTLIBRARIES = $(lib) libmca_topo_treematch_la_SOURCES = $(lib_sources) libmca_topo_treematch_la_LDFLAGS = -module -avoid-version +distclean-local: + rm -f config.h diff --git a/ompi/mca/topo/treematch/configure.m4 b/ompi/mca/topo/treematch/configure.m4 index c937df36114..81a5ad56e4e 100644 --- a/ompi/mca/topo/treematch/configure.m4 +++ b/ompi/mca/topo/treematch/configure.m4 @@ -6,7 +6,9 @@ # Copyright (c) 2011-2015 INRIA. All rights reserved. # Copyright (c) 2011-2015 Universite Bordeaux 1 # Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -77,7 +79,21 @@ AC_DEFUN([MCA_ompi_topo_treematch_CONFIG], [ [ompi_topo_treematch_happy=1])])]) AS_IF([test $ompi_topo_treematch_happy -eq 1], - [$1], + [AS_IF([test "x$treematch_files_local" = "xyes"], + [AS_IF([! test -d $OMPI_TOP_BUILDDIR/ompi/mca/topo/treematch], + [mkdir -p $OMPI_TOP_BUILDDIR/ompi/mca/topo/treematch]) + cat > $OMPI_TOP_BUILDDIR/ompi/mca/topo/treematch/config.h << EOF +/* + * This file is automatically generated by configure. Edits will be lost + * + * This is an dummy config.h in order to prevent the embedded treematch from using + * the config.h from the embedded hwloc + * + * see https://github.com/open-mpi/ompi/pull/6185#issuecomment-458807930 + */ +EOF + ]) + $1], [AS_IF([test ! -z "$with_treematch" && test "$with_treematch" != "no"], [AC_MSG_ERROR([TreeMatch support requested but not found. Aborting])]) $2]) diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index 891a5b041be..a2b53be0667 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -135,7 +135,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, int *lindex_to_grank = NULL; int *nodes_roots = NULL, *k = NULL; int *localrank_to_objnum = NULL; - int depth, effective_depth = 0, obj_rank = -1; + int depth = 0, effective_depth = 0, obj_rank = -1; int num_objs_in_node = 0, num_pus_in_node = 0; int numlevels = 0, num_nodes = 0, num_procs_in_node = 0; int rank, size, newrank = -1, hwloc_err, i, j, idx; diff --git a/ompi/mca/topo/treematch/treematch/tm_topology.c b/ompi/mca/topo/treematch/treematch/tm_topology.c index 4445b45634c..1ecf51657b8 100644 --- a/ompi/mca/topo/treematch/treematch/tm_topology.c +++ b/ompi/mca/topo/treematch/treematch/tm_topology.c @@ -141,7 +141,14 @@ double ** topology_to_arch(hwloc_topology_t topology) double **arch = NULL; nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); + if( nb_proc <= 0 ) { /* if multiple levels with PUs */ + return NULL; + } arch = (double**)MALLOC(sizeof(double*)*nb_proc); + if( NULL == arch ) { + return NULL; + } + for( i = 0 ; i < nb_proc ; i++ ){ obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i); arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc); diff --git a/ompi/mca/topo/treematch/treematch/tm_tree.c b/ompi/mca/topo/treematch/treematch/tm_tree.c index ffac4e7615b..35fc2aa2fef 100644 --- a/ompi/mca/topo/treematch/treematch/tm_tree.c +++ b/ompi/mca/topo/treematch/treematch/tm_tree.c @@ -918,7 +918,7 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){ work_unit_t *work = (work_unit_t *) args[7]; pthread_mutex_t *lock = (pthread_mutex_t *) args[8]; int *tab_i; - int id, id1, id2; + int id = 0, id1, id2; int total_work = work->nb_work; int cur_work = 0; @@ -1750,7 +1750,6 @@ void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new int mat_order = aff_mat -> order; tm_tree_t **cur_group = NULL; int j, l; - unsigned long int list_size; unsigned long int i; group_list_t list, **best_selection = NULL, **tab_group = NULL; double best_val, last_best; @@ -1810,8 +1809,7 @@ void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new best_selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size); list_all_possible_groups(cost_mat, tab_node, 0, arity, 0, cur_group, &list); - list_size = (int)list.val; - assert( list_size == nb_groups); + assert( nb_groups == (unsigned long int)list.val ); tab_group = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups); list_to_tab(list.next, tab_group, nb_groups); if(verbose_level>=INFO) diff --git a/ompi/mpi/Makefile.am b/ompi/mpi/Makefile.am index f4d2970b6c5..388076307c9 100644 --- a/ompi/mpi/Makefile.am +++ b/ompi/mpi/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -20,7 +20,6 @@ # EXTRA_DIST = \ - mpi/fortran/configure-fortran-output-bottom.h \ - mpi/man/make_manpage.pl + mpi/fortran/configure-fortran-output-bottom.h dist_ompidata_DATA += mpi/help-mpi-api.txt diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index e12a5483889..25b32809aeb 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -78,6 +78,9 @@ libmpi_c_mpi_la_SOURCES = \ ialltoallv.c \ alltoallw.c \ ialltoallw.c \ + attr_delete.c \ + attr_get.c \ + attr_put.c \ barrier.c \ ibarrier.c \ bcast.c \ @@ -285,6 +288,8 @@ libmpi_c_mpi_la_SOURCES = \ ineighbor_alltoallv.c \ neighbor_alltoallw.c \ ineighbor_alltoallw.c \ + keyval_create.c \ + keyval_free.c \ op_c2f.c \ op_commutative.c \ op_create.c \ @@ -431,24 +436,20 @@ libmpi_c_mpi_la_SOURCES = \ win_unlock_all.c \ win_wait.c -if OMPI_ENABLE_MPI1_COMPAT + +# include all of the removed MPI functions in library +# for v4.0.x regardless of state of OMPI_ENABLE_MPI1_COMPAT libmpi_c_mpi_la_SOURCES += \ address.c \ - attr_delete.c \ - attr_get.c \ - attr_put.c \ errhandler_create.c \ errhandler_get.c \ errhandler_set.c \ - keyval_create.c \ - keyval_free.c \ type_extent.c \ type_hindexed.c \ type_hvector.c \ type_lb.c \ type_struct.c \ type_ub.c -endif # Conditionally install the header files diff --git a/ompi/mpi/c/address.c b/ompi/mpi/c/address.c index 67b27bef58e..0eead1faae8 100644 --- a/ompi/mpi/c/address.c +++ b/ompi/mpi/c/address.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,6 +22,12 @@ #include "ompi_config.h" #include +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" @@ -30,6 +37,10 @@ #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Address = PMPI_Address #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Address #define MPI_Address PMPI_Address #endif diff --git a/ompi/mpi/c/attr_fn.c b/ompi/mpi/c/attr_fn.c index d6ab52bc704..5667f242420 100644 --- a/ompi/mpi/c/attr_fn.c +++ b/ompi/mpi/c/attr_fn.c @@ -9,6 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -131,11 +133,6 @@ int OMPI_C_MPI_COMM_DUP_FN( MPI_Comm comm, int comm_keyval, void* extra_state, return MPI_SUCCESS; } -#if !defined(OMPI_ENABLE_MPI1_COMPAT) - -#error "Need to delete the code below now that the removed functions are no longer shipping" - -#elif OMPI_ENABLE_MPI1_COMPAT int OMPI_C_MPI_NULL_DELETE_FN( MPI_Comm comm, int comm_keyval, void* attribute_val_out, void* extra_state ) @@ -159,4 +156,3 @@ int OMPI_C_MPI_DUP_FN( MPI_Comm comm, int comm_keyval, void* extra_state, *(void**)attribute_val_out = attribute_val_in; return MPI_SUCCESS; } -#endif diff --git a/ompi/mpi/c/errhandler_create.c b/ompi/mpi/c/errhandler_create.c index cae93f98f40..1edf3f64fe4 100644 --- a/ompi/mpi/c/errhandler_create.c +++ b/ompi/mpi/c/errhandler_create.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +21,12 @@ #include "ompi_config.h" +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" @@ -28,10 +35,14 @@ #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Errhandler_create = PMPI_Errhandler_create #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Errhandler_create #define MPI_Errhandler_create PMPI_Errhandler_create #endif -int MPI_Errhandler_create(MPI_Handler_function *function, +int MPI_Errhandler_create(MPI_Comm_errhandler_function *function, MPI_Errhandler *errhandler) { diff --git a/ompi/mpi/c/errhandler_get.c b/ompi/mpi/c/errhandler_get.c index 7125506b7ed..a8f0ed69495 100644 --- a/ompi/mpi/c/errhandler_get.c +++ b/ompi/mpi/c/errhandler_get.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +21,12 @@ #include "ompi_config.h" +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" @@ -30,6 +37,10 @@ #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Errhandler_get = PMPI_Errhandler_get #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Errhandler_get #define MPI_Errhandler_get PMPI_Errhandler_get #endif diff --git a/ompi/mpi/c/errhandler_set.c b/ompi/mpi/c/errhandler_set.c index c861b058903..71501fc1238 100644 --- a/ompi/mpi/c/errhandler_set.c +++ b/ompi/mpi/c/errhandler_set.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +21,12 @@ #include "ompi_config.h" +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" @@ -30,6 +37,10 @@ #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Errhandler_set = PMPI_Errhandler_set #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Errhandler_set #define MPI_Errhandler_set PMPI_Errhandler_set #endif diff --git a/ompi/mpi/c/file_set_view.c b/ompi/mpi/c/file_set_view.c index a49a80f29aa..c62df489aa6 100644 --- a/ompi/mpi/c/file_set_view.c +++ b/ompi/mpi/c/file_set_view.c @@ -64,6 +64,10 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, OMPI_CHECK_DATATYPE_FOR_VIEW(rc, filetype, 0); } } + if ( NULL == datarep) { + rc = MPI_ERR_UNSUPPORTED_DATAREP; + fh = MPI_FILE_NULL; + } OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/iallgather.c b/ompi/mpi/c/iallgather.c index 7d2740b6512..8e0abe3fe8c 100644 --- a/ompi/mpi/c/iallgather.c +++ b/ompi/mpi/c/iallgather.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -102,6 +103,9 @@ int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll->coll_iallgather_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iallgatherv.c b/ompi/mpi/c/iallgatherv.c index 0373a15b1d3..e743cb9b06f 100644 --- a/ompi/mpi/c/iallgatherv.c +++ b/ompi/mpi/c/iallgatherv.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -126,6 +127,9 @@ int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, recvbuf, recvcounts, displs, recvtype, comm, request, comm->c_coll->coll_iallgatherv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iallreduce.c b/ompi/mpi/c/iallreduce.c index d0ea511cf84..bfa968c55b4 100644 --- a/ompi/mpi/c/iallreduce.c +++ b/ompi/mpi/c/iallreduce.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -112,10 +113,11 @@ int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request, comm->c_coll->coll_iallreduce_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ialltoall.c b/ompi/mpi/c/ialltoall.c index 2d46b76f38f..0637f29f396 100644 --- a/ompi/mpi/c/ialltoall.c +++ b/ompi/mpi/c/ialltoall.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -101,5 +102,8 @@ int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll->coll_ialltoall_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ialltoallv.c b/ompi/mpi/c/ialltoallv.c index 577b3828949..cef857cdf78 100644 --- a/ompi/mpi/c/ialltoallv.c +++ b/ompi/mpi/c/ialltoallv.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -130,6 +131,9 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl err = comm->c_coll->coll_ialltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, comm->c_coll->coll_ialltoallv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ialltoallw.c b/ompi/mpi/c/ialltoallw.c index b7bc86eaa7d..6dc4af8854a 100644 --- a/ompi/mpi/c/ialltoallw.c +++ b/ompi/mpi/c/ialltoallw.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -127,6 +128,9 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, request, comm->c_coll->coll_ialltoallw_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes_w(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtypes, recvtypes); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ibcast.c b/ompi/mpi/c/ibcast.c index 1f049b4c6de..2dcdbb9633d 100644 --- a/ompi/mpi/c/ibcast.c +++ b/ompi/mpi/c/ibcast.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -19,6 +19,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -86,5 +87,13 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, err = comm->c_coll->coll_ibcast(buffer, count, datatype, root, comm, request, comm->c_coll->coll_ibcast_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (!OMPI_COMM_IS_INTRA(comm)) { + if (MPI_PROC_NULL == root) { + datatype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, datatype, NULL); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iexscan.c b/ompi/mpi/c/iexscan.c index 14cf23c590b..4c56e08f1e4 100644 --- a/ompi/mpi/c/iexscan.c +++ b/ompi/mpi/c/iexscan.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -82,10 +83,11 @@ int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_iexscan(sendbuf, recvbuf, count, datatype, op, comm, request, comm->c_coll->coll_iexscan_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/igather.c b/ompi/mpi/c/igather.c index 3fcda7e8069..c876daa7ec7 100644 --- a/ompi/mpi/c/igather.c +++ b/ompi/mpi/c/igather.c @@ -15,8 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -173,5 +174,24 @@ int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, comm->c_coll->coll_igather_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == sendbuf) { + sendtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + recvtype = NULL; + } + } else { + if (MPI_ROOT == root) { + sendtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + recvtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/igatherv.c b/ompi/mpi/c/igatherv.c index e2deab3cc9f..1d575dce4cc 100644 --- a/ompi/mpi/c/igatherv.c +++ b/ompi/mpi/c/igatherv.c @@ -13,7 +13,7 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science + * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -29,6 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -196,5 +197,24 @@ int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, request, comm->c_coll->coll_igatherv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == sendbuf) { + sendtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + recvtype = NULL; + } + } else { + if (MPI_ROOT == root) { + sendtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + recvtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_allgather.c b/ompi/mpi/c/ineighbor_allgather.c index 2706ea44d4a..cba5b5d4e36 100644 --- a/ompi/mpi/c/ineighbor_allgather.c +++ b/ompi/mpi/c/ineighbor_allgather.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -124,6 +125,9 @@ int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sen err = comm->c_coll->coll_ineighbor_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll->coll_ineighbor_allgather_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_allgatherv.c b/ompi/mpi/c/ineighbor_allgatherv.c index 2f3c244064c..58dedb61057 100644 --- a/ompi/mpi/c/ineighbor_allgatherv.c +++ b/ompi/mpi/c/ineighbor_allgatherv.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -147,6 +148,9 @@ int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype se recvbuf, (int *) recvcounts, (int *) displs, recvtype, comm, request, comm->c_coll->coll_ineighbor_allgatherv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_alltoall.c b/ompi/mpi/c/ineighbor_alltoall.c index b3d0846421e..b03b7cc50fa 100644 --- a/ompi/mpi/c/ineighbor_alltoall.c +++ b/ompi/mpi/c/ineighbor_alltoall.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -124,5 +125,8 @@ int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype send err = comm->c_coll->coll_ineighbor_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll->coll_ineighbor_alltoall_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_alltoallv.c b/ompi/mpi/c/ineighbor_alltoallv.c index 9645e15b05d..a44d081e10a 100644 --- a/ompi/mpi/c/ineighbor_alltoallv.c +++ b/ompi/mpi/c/ineighbor_alltoallv.c @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -147,6 +148,9 @@ int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const i err = comm->c_coll->coll_ineighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, comm->c_coll->coll_ineighbor_alltoallv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_alltoallw.c b/ompi/mpi/c/ineighbor_alltoallw.c index 150f28d7173..efb4d24f5f7 100644 --- a/ompi/mpi/c/ineighbor_alltoallw.c +++ b/ompi/mpi/c/ineighbor_alltoallw.c @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -147,6 +148,9 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const M err = comm->c_coll->coll_ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, request, comm->c_coll->coll_ineighbor_alltoallw_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes_w(*request, sendtypes, recvtypes); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ireduce.c b/ompi/mpi/c/ireduce.c index 47948887824..be552250fce 100644 --- a/ompi/mpi/c/ireduce.c +++ b/ompi/mpi/c/ireduce.c @@ -13,8 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -136,10 +137,11 @@ int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, OPAL_CR_ENTER_LIBRARY(); /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, request, comm->c_coll->coll_ireduce_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ireduce_scatter.c b/ompi/mpi/c/ireduce_scatter.c index 211b217971e..56525fa19f7 100644 --- a/ompi/mpi/c/ireduce_scatter.c +++ b/ompi/mpi/c/ireduce_scatter.c @@ -13,8 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -133,10 +134,11 @@ int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_ireduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm, request, comm->c_coll->coll_ireduce_scatter_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ireduce_scatter_block.c b/ompi/mpi/c/ireduce_scatter_block.c index ded4abf2232..ce43ab3cd4f 100644 --- a/ompi/mpi/c/ireduce_scatter_block.c +++ b/ompi/mpi/c/ireduce_scatter_block.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -100,10 +101,11 @@ int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm, request, comm->c_coll->coll_ireduce_scatter_block_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iscan.c b/ompi/mpi/c/iscan.c index 34502b8e366..cfae0ff409a 100644 --- a/ompi/mpi/c/iscan.c +++ b/ompi/mpi/c/iscan.c @@ -13,8 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -96,11 +97,12 @@ int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, /* Call the coll component to actually perform the allgather */ - OBJ_RETAIN(op); err = comm->c_coll->coll_iscan(sendbuf, recvbuf, count, datatype, op, comm, request, comm->c_coll->coll_iscan_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iscatter.c b/ompi/mpi/c/iscatter.c index 79a22d57a52..3357ad21158 100644 --- a/ompi/mpi/c/iscatter.c +++ b/ompi/mpi/c/iscatter.c @@ -15,8 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -156,5 +157,24 @@ int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, comm->c_coll->coll_iscatter_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == recvbuf) { + recvtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + sendtype = NULL; + } + } else { + if (MPI_ROOT == root) { + recvtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + sendtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iscatterv.c b/ompi/mpi/c/iscatterv.c index 66ae9003caa..2d164662f4a 100644 --- a/ompi/mpi/c/iscatterv.c +++ b/ompi/mpi/c/iscatterv.c @@ -13,8 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -196,5 +197,24 @@ int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[ err = comm->c_coll->coll_iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request, comm->c_coll->coll_iscatterv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == recvbuf) { + recvtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + sendtype = NULL; + } + } else { + if (MPI_ROOT == root) { + recvtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + sendtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/profile/Makefile.am b/ompi/mpi/c/profile/Makefile.am index 698c5c61abd..4a239d7e2ab 100644 --- a/ompi/mpi/c/profile/Makefile.am +++ b/ompi/mpi/c/profile/Makefile.am @@ -58,6 +58,9 @@ nodist_libmpi_c_pmpi_la_SOURCES = \ pialltoallv.c \ palltoallw.c \ pialltoallw.c \ + pattr_delete.c \ + pattr_get.c \ + pattr_put.c \ pbarrier.c \ pibarrier.c \ pbcast.c \ @@ -265,6 +268,8 @@ nodist_libmpi_c_pmpi_la_SOURCES = \ pineighbor_alltoallv.c \ pneighbor_alltoallw.c \ pineighbor_alltoallw.c \ + pkeyval_create.c \ + pkeyval_free.c \ pop_c2f.c \ pop_create.c \ pop_commutative.c \ @@ -411,24 +416,19 @@ nodist_libmpi_c_pmpi_la_SOURCES = \ pwin_unlock_all.c \ pwin_wait.c -if OMPI_ENABLE_MPI1_COMPAT +# include all of the removed MPI functions in library +# for v4.0.x regardless of state of OMPI_ENABLE_MPI1_COMPAT nodist_libmpi_c_pmpi_la_SOURCES += \ paddress.c \ - pattr_delete.c \ - pattr_get.c \ - pattr_put.c \ perrhandler_create.c \ perrhandler_get.c \ perrhandler_set.c \ - pkeyval_create.c \ - pkeyval_free.c \ ptype_extent.c \ ptype_hindexed.c \ ptype_hvector.c \ ptype_lb.c \ ptype_struct.c \ ptype_ub.c -endif # # Sym link in the sources from the real MPI directory diff --git a/ompi/mpi/c/type_create_f90_complex.c b/ompi/mpi/c/type_create_f90_complex.c index e8ec6d6f9ab..6d82c571429 100644 --- a/ompi/mpi/c/type_create_f90_complex.c +++ b/ompi/mpi/c/type_create_f90_complex.c @@ -17,6 +17,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,10 +81,10 @@ int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype) * cache. */ - if( (LDBL_DIG < p) || (LDBL_MAX_10_EXP < r) ) *newtype = &ompi_mpi_datatype_null.dt; - else if( (DBL_DIG < p) || (DBL_MAX_10_EXP < r) ) *newtype = &ompi_mpi_ldblcplex.dt; - else if( (FLT_DIG < p) || (FLT_MAX_10_EXP < r) ) *newtype = &ompi_mpi_dblcplex.dt; - else *newtype = &ompi_mpi_cplex.dt; + if ( (LDBL_DIG < p) || (LDBL_MAX_10_EXP < r) || (-LDBL_MIN_10_EXP < r) ) *newtype = &ompi_mpi_datatype_null.dt; + else if( (DBL_DIG < p) || (DBL_MAX_10_EXP < r) || (-DBL_MIN_10_EXP < r) ) *newtype = &ompi_mpi_ldblcplex.dt; + else if( (FLT_DIG < p) || (FLT_MAX_10_EXP < r) || (-FLT_MIN_10_EXP < r) ) *newtype = &ompi_mpi_dblcplex.dt; + else *newtype = &ompi_mpi_cplex.dt; if( *newtype != &ompi_mpi_datatype_null.dt ) { ompi_datatype_t* datatype; diff --git a/ompi/mpi/c/type_create_f90_real.c b/ompi/mpi/c/type_create_f90_real.c index de2ee83fac4..d131fc2a4f8 100644 --- a/ompi/mpi/c/type_create_f90_real.c +++ b/ompi/mpi/c/type_create_f90_real.c @@ -17,6 +17,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,10 +81,10 @@ int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype) * cache. */ - if( (LDBL_DIG < p) || (LDBL_MAX_10_EXP < r) ) *newtype = &ompi_mpi_datatype_null.dt; - else if( (DBL_DIG < p) || (DBL_MAX_10_EXP < r) ) *newtype = &ompi_mpi_long_double.dt; - else if( (FLT_DIG < p) || (FLT_MAX_10_EXP < r) ) *newtype = &ompi_mpi_double.dt; - else *newtype = &ompi_mpi_float.dt; + if ( (LDBL_DIG < p) || (LDBL_MAX_10_EXP < r) || (-LDBL_MIN_10_EXP < r) ) *newtype = &ompi_mpi_datatype_null.dt; + else if( (DBL_DIG < p) || (DBL_MAX_10_EXP < r) || (-DBL_MIN_10_EXP < r) ) *newtype = &ompi_mpi_long_double.dt; + else if( (FLT_DIG < p) || (FLT_MAX_10_EXP < r) || (-FLT_MIN_10_EXP < r) ) *newtype = &ompi_mpi_double.dt; + else *newtype = &ompi_mpi_float.dt; if( *newtype != &ompi_mpi_datatype_null.dt ) { ompi_datatype_t* datatype; diff --git a/ompi/mpi/c/type_extent.c b/ompi/mpi/c/type_extent.c index 4c4a4a5f59e..ecf86f14175 100644 --- a/ompi/mpi/c/type_extent.c +++ b/ompi/mpi/c/type_extent.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +21,12 @@ #include "ompi_config.h" +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" @@ -31,6 +38,10 @@ #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_extent = PMPI_Type_extent #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Type_extent #define MPI_Type_extent PMPI_Type_extent #endif diff --git a/ompi/mpi/c/type_hindexed.c b/ompi/mpi/c/type_hindexed.c index 89d3b46bdd3..ca12f4bb329 100644 --- a/ompi/mpi/c/type_hindexed.c +++ b/ompi/mpi/c/type_hindexed.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +21,12 @@ #include "ompi_config.h" +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" @@ -30,6 +37,10 @@ #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_hindexed = PMPI_Type_hindexed #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Type_hindexed #define MPI_Type_hindexed PMPI_Type_hindexed #endif diff --git a/ompi/mpi/c/type_hvector.c b/ompi/mpi/c/type_hvector.c index 2c1517b565a..4117a64cc75 100644 --- a/ompi/mpi/c/type_hvector.c +++ b/ompi/mpi/c/type_hvector.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +21,12 @@ #include "ompi_config.h" +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" @@ -30,6 +37,10 @@ #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_hvector = PMPI_Type_hvector #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Type_hvector #define MPI_Type_hvector PMPI_Type_hvector #endif diff --git a/ompi/mpi/c/type_lb.c b/ompi/mpi/c/type_lb.c index 269f7bd245a..07b8385d0dd 100644 --- a/ompi/mpi/c/type_lb.c +++ b/ompi/mpi/c/type_lb.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +21,12 @@ #include "ompi_config.h" +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" @@ -31,6 +38,10 @@ #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_lb = PMPI_Type_lb #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Type_lb #define MPI_Type_lb PMPI_Type_lb #endif diff --git a/ompi/mpi/c/type_struct.c b/ompi/mpi/c/type_struct.c index 575e26453f7..0151b99ac49 100644 --- a/ompi/mpi/c/type_struct.c +++ b/ompi/mpi/c/type_struct.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,12 +21,22 @@ #include "ompi_config.h" +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_struct = PMPI_Type_struct #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Type_struct #define MPI_Type_struct PMPI_Type_struct #endif diff --git a/ompi/mpi/c/type_ub.c b/ompi/mpi/c/type_ub.c index a7d16909d66..90755774d93 100644 --- a/ompi/mpi/c/type_ub.c +++ b/ompi/mpi/c/type_ub.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +21,12 @@ #include "ompi_config.h" +/* This implementation has been removed from the MPI 3.0 standard. + * Open MPI v4.0.x is keeping the implementation in the library, but + * removing the prototypes from the headers, unless the user configures + * with --enable-mpi1-compatibility. + */ + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" @@ -31,6 +38,10 @@ #if OPAL_HAVE_WEAK_SYMBOLS #pragma weak MPI_Type_ub = PMPI_Type_ub #endif +/* undef before defining, to prevent possible redefinition when + * using _Static_assert to error on usage of removed functions. + */ +#undef MPI_Type_ub #define MPI_Type_ub PMPI_Type_ub #endif diff --git a/ompi/mpi/cxx/constants.h b/ompi/mpi/cxx/constants.h index 255853e7d28..f841ddc71e9 100644 --- a/ompi/mpi/cxx/constants.h +++ b/ompi/mpi/cxx/constants.h @@ -180,13 +180,25 @@ static const int COMBINER_NAMED = MPI_COMBINER_NAMED; static const int COMBINER_DUP = MPI_COMBINER_DUP; static const int COMBINER_CONTIGUOUS = MPI_COMBINER_CONTIGUOUS; static const int COMBINER_VECTOR = MPI_COMBINER_VECTOR; +#if (OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING) static const int COMBINER_HVECTOR_INTEGER = MPI_COMBINER_HVECTOR_INTEGER; +#else +static const int COMBINER_HVECTOR_INTEGER = OMPI_WAS_MPI_COMBINER_HVECTOR_INTEGER; +#endif static const int COMBINER_HVECTOR = MPI_COMBINER_HVECTOR; static const int COMBINER_INDEXED = MPI_COMBINER_INDEXED; +#if (OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING) static const int COMBINER_HINDEXED_INTEGER = MPI_COMBINER_HINDEXED_INTEGER; +#else +static const int COMBINER_HINDEXED_INTEGER = OMPI_WAS_MPI_COMBINER_HINDEXED_INTEGER; +#endif static const int COMBINER_HINDEXED = MPI_COMBINER_HINDEXED; static const int COMBINER_INDEXED_BLOCK = MPI_COMBINER_INDEXED_BLOCK; +#if (OMPI_ENABLE_MPI1_COMPAT || OMPI_BUILDING) static const int COMBINER_STRUCT_INTEGER = MPI_COMBINER_STRUCT_INTEGER; +#else +static const int COMBINER_STRUCT_INTEGER = OMPI_WAS_MPI_COMBINER_STRUCT_INTEGER; +#endif static const int COMBINER_STRUCT = MPI_COMBINER_STRUCT; static const int COMBINER_SUBARRAY = MPI_COMBINER_SUBARRAY; static const int COMBINER_DARRAY = MPI_COMBINER_DARRAY; diff --git a/ompi/mpi/cxx/mpicxx.h b/ompi/mpi/cxx/mpicxx.h index 551e823b6a7..5d193d3d7ff 100644 --- a/ompi/mpi/cxx/mpicxx.h +++ b/ompi/mpi/cxx/mpicxx.h @@ -44,7 +44,7 @@ #include -#if !defined(OMPI_IGNORE_CXX_SEEK) & OMPI_WANT_MPI_CXX_SEEK +#if !defined(OMPI_IGNORE_CXX_SEEK) && OMPI_WANT_MPI_CXX_SEEK // We need to include the header files that define SEEK_* or use them // in ways that require them to be #defines so that if the user // includes them later, the double inclusion logic in the headers will diff --git a/ompi/mpi/fortran/base/fint_2_int.h b/ompi/mpi/fortran/base/fint_2_int.h index 5971694eb9b..44ce1289567 100644 --- a/ompi/mpi/fortran/base/fint_2_int.h +++ b/ompi/mpi/fortran/base/fint_2_int.h @@ -11,8 +11,8 @@ * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -161,9 +161,24 @@ /* * Define MACROS to take account of different size of logical from int + * + * There used to be an in-place option for the below conversions of + * logical arrays. So if mpi_cart_create(..., periods, ...) took an + * input array of Fortran logicals, it would walk the array converting + * the elements to C-logical values, then at the end it would restore + * the values back to Fortran logicals. + * + * The problem with that is periods is an INPUT argument and some + * Fortran compilers even put it in read-only memory because of that. + * So writing to it wasn't generally okay, even though we were restoring it + * before returning. + * + * The in-place option is hence only valid if no conversion is ever needed + * (e.g. Fortran logical and C int have the same size *and** Fortran logical + * .TRUE. value is 1 in C. */ -#if OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_INT +#if (OMPI_SIZEOF_FORTRAN_LOGICAL == SIZEOF_INT) && (OMPI_FORTRAN_VALUE_TRUE == 1) # define OMPI_LOGICAL_NAME_DECL(in) /* Not needed for int==logical */ # define OMPI_LOGICAL_NAME_CONVERT(in) in /* Not needed for int==logical */ # define OMPI_LOGICAL_SINGLE_NAME_CONVERT(in) in /* Not needed for int==logical */ @@ -172,37 +187,15 @@ # define OMPI_ARRAY_LOGICAL_2_INT_ALLOC(in,n) /* Not needed for int==logical */ # define OMPI_ARRAY_LOGICAL_2_INT_CLEANUP(in) /* Not needed for int==logical */ -# if OMPI_FORTRAN_VALUE_TRUE == 1 -# define OMPI_FORTRAN_MUST_CONVERT_LOGICAL_2_INT 0 -# define OMPI_LOGICAL_2_INT(a) a -# define OMPI_INT_2_LOGICAL(a) a -# define OMPI_ARRAY_LOGICAL_2_INT(in, n) -# define OMPI_ARRAY_INT_2_LOGICAL(in, n) -# define OMPI_SINGLE_INT_2_LOGICAL(a) /* Single-OUT variable -- Not needed for int==logical, true=1 */ -# else -# define OMPI_FORTRAN_MUST_CONVERT_LOGICAL_2_INT 1 -# define OMPI_LOGICAL_2_INT(a) ((a)==0? 0 : 1) -# define OMPI_INT_2_LOGICAL(a) ((a)==0? 0 : OMPI_FORTRAN_VALUE_TRUE) -# define OMPI_SINGLE_INT_2_LOGICAL(a) *a=OMPI_INT_2_LOGICAL(OMPI_LOGICAL_NAME_CONVERT(*a)) -# define OMPI_ARRAY_LOGICAL_2_INT(in, n) do { \ - int converted_n = (int)(n); \ - OMPI_ARRAY_LOGICAL_2_INT_ALLOC(in, converted_n + 1); \ - while (--converted_n >= 0) { \ - OMPI_LOGICAL_ARRAY_NAME_CONVERT(in)[converted_n]=OMPI_LOGICAL_2_INT(in[converted_n]); \ - } \ - } while (0) -# define OMPI_ARRAY_INT_2_LOGICAL(in, n) do { \ - int converted_n = (int)(n); \ - while (--converted_n >= 0) { \ - in[converted_n]=OMPI_INT_2_LOGICAL(OMPI_LOGICAL_ARRAY_NAME_CONVERT(in)[converted_n]); \ - } \ - OMPI_ARRAY_LOGICAL_2_INT_CLEANUP(in); \ - } while (0) - -# endif +# define OMPI_FORTRAN_MUST_CONVERT_LOGICAL_2_INT 0 +# define OMPI_LOGICAL_2_INT(a) a +# define OMPI_INT_2_LOGICAL(a) a +# define OMPI_ARRAY_LOGICAL_2_INT(in, n) +# define OMPI_ARRAY_INT_2_LOGICAL(in, n) +# define OMPI_SINGLE_INT_2_LOGICAL(a) /* Single-OUT variable -- Not needed for int==logical, true=1 */ #else /* - * For anything other than Fortran-logical == C-int, we have to convert + * For anything other than Fortran-logical == C-int or some .TRUE. is not 1 in C, we have to convert */ # define OMPI_FORTRAN_MUST_CONVERT_LOGICAL_2_INT 1 # define OMPI_LOGICAL_NAME_DECL(in) int c_##in @@ -238,7 +231,7 @@ } \ OMPI_ARRAY_LOGICAL_2_INT_CLEANUP(in); \ } while (0) -#endif /* OMPI_SIZEOF_FORTRAN_LOGICAL */ +#endif /* OMPI_SIZEOF_FORTRAN_LOGICAL && OMPI_FORTRAN_VALUE_TRUE */ #endif /* OMPI_FORTRAN_BASE_FINT_2_INT_H */ diff --git a/ompi/mpi/fortran/base/gen-mpi-sizeof.pl b/ompi/mpi/fortran/base/gen-mpi-sizeof.pl index 5ea3dca3a47..baa9cf1da59 100755 --- a/ompi/mpi/fortran/base/gen-mpi-sizeof.pl +++ b/ompi/mpi/fortran/base/gen-mpi-sizeof.pl @@ -1,8 +1,8 @@ #!/usr/bin/env perl # # Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. +# Copyright (c) 2015-2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Script to generate the overloaded MPI_SIZEOF interfaces and @@ -156,6 +156,8 @@ sub generate { queue_sub("complex(real${size})", "complex${size}", "real${size}"); } } +queue_sub("character", "character"); +queue_sub("logical", "logical"); ####################################################### diff --git a/ompi/mpi/fortran/mpif-h/Makefile.am b/ompi/mpi/fortran/mpif-h/Makefile.am index db54c677143..2c0a6c050f0 100644 --- a/ompi/mpi/fortran/mpif-h/Makefile.am +++ b/ompi/mpi/fortran/mpif-h/Makefile.am @@ -138,6 +138,9 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ alltoall_f.c \ alltoallv_f.c \ alltoallw_f.c \ + attr_delete_f.c \ + attr_get_f.c \ + attr_put_f.c \ barrier_f.c \ bcast_f.c \ bsend_f.c \ @@ -335,6 +338,8 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ iscatterv_f.c \ issend_f.c \ is_thread_main_f.c \ + keyval_create_f.c \ + keyval_free_f.c \ lookup_name_f.c \ mprobe_f.c \ mrecv_f.c \ @@ -476,18 +481,13 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ win_flush_local_f.c \ win_flush_local_all_f.c - -if OMPI_ENABLE_MPI1_COMPAT +# include all of the removed MPI functions in library +# for v4.0.x regardless of state of OMPI_ENABLE_MPI1_COMPAT lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ address_f.c \ - attr_delete_f.c \ - attr_get_f.c \ - attr_put_f.c \ errhandler_create_f.c \ errhandler_get_f.c \ errhandler_set_f.c \ - keyval_create_f.c \ - keyval_free_f.c \ type_extent_f.c \ type_hindexed_f.c \ type_hvector_f.c \ @@ -495,7 +495,6 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ type_struct_f.c \ type_ub_f.c endif -endif # # Conditionally install the header files diff --git a/ompi/mpi/fortran/mpif-h/address_f.c b/ompi/mpi/fortran/mpif-h/address_f.c index 85d1369ae2d..1abdf8cfb18 100644 --- a/ompi/mpi/fortran/mpif-h/address_f.c +++ b/ompi/mpi/fortran/mpif-h/address_f.c @@ -12,6 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mpi/fortran/mpif-h/alltoallw_f.c b/ompi/mpi/fortran/mpif-h/alltoallw_f.c index cb2328cf972..55b782a7928 100644 --- a/ompi/mpi/fortran/mpif-h/alltoallw_f.c +++ b/ompi/mpi/fortran/mpif-h/alltoallw_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -23,6 +23,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" +#include "ompi/communicator/communicator.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -74,7 +75,7 @@ void ompi_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *comm, MPI_Fint *ierr) { MPI_Comm c_comm; - MPI_Datatype *c_sendtypes, *c_recvtypes; + MPI_Datatype *c_sendtypes = NULL, *c_recvtypes; int size, c_ierr; OMPI_ARRAY_NAME_DECL(sendcounts); OMPI_ARRAY_NAME_DECL(sdispls); @@ -82,20 +83,22 @@ void ompi_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(rdispls); c_comm = PMPI_Comm_f2c(*comm); - PMPI_Comm_size(c_comm, &size); + size = OMPI_COMM_IS_INTER(c_comm)?ompi_comm_remote_size(c_comm):ompi_comm_size(c_comm); - c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); - c_recvtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); + if (!OMPI_IS_FORTRAN_IN_PLACE(sendbuf)) { + c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); + OMPI_ARRAY_FINT_2_INT(sendcounts, size); + OMPI_ARRAY_FINT_2_INT(sdispls, size); + for (int i=0; i 0) { - c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); - --size; + for (int i=0; i 0) { - c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); - --size; + for (int i=0; iJava binding of the MPI operation MPI_IBSEND. + *

Java binding of the MPI operation {@code MPI_IBSEND}. * @param buf send buffer * @param count number of items to send * @param type datatype of each item in send buffer diff --git a/ompi/mpi/man/man3/MPI_Neighbor_allgather.3in b/ompi/mpi/man/man3/MPI_Neighbor_allgather.3in index 74aee8799e7..e152d747ce8 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_allgather.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_allgather.3in @@ -33,13 +33,14 @@ MPI_NEIGHBOR_ALLGATHER(\fISENDBUF\fP,\fI SENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVBU INTEGER \fIIERROR\fP MPI_INEIGHBOR_ALLGATHER(\fISENDBUF\fP,\fI SENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVBUF\fP,\fI RECVCOUNT\fP,\fI - RECVTYPE\fP,\fI COMM\fP, \fPREQUEST\fI,\fI IERROR\fP) + RECVTYPE\fP,\fI COMM\fP, \fIREQUEST\fP,\fI IERROR\fP) \fISENDBUF\fP (*), \fIRECVBUF\fP (*) INTEGER \fISENDCOUNT\fP,\fI SENDTYPE\fP,\fI RECVCOUNT\fP,\fI RECVTYPE\fP,\fI COMM\fP, INTEGER \fIREQUEST, IERROR\fP .fi .SH Fortran 2008 Syntax +.nf USE mpi_f08 MPI_Neighbor_allgather(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIrecvtype\fP, \fIcomm\fP, \fIierror\fP) diff --git a/ompi/mpi/man/man3/MPI_T_cvar_handle_alloc.3in b/ompi/mpi/man/man3/MPI_T_cvar_handle_alloc.3in index 87e5f9f2efe..fa393b5cf66 100644 --- a/ompi/mpi/man/man3/MPI_T_cvar_handle_alloc.3in +++ b/ompi/mpi/man/man3/MPI_T_cvar_handle_alloc.3in @@ -20,6 +20,7 @@ int MPI_T_cvar_handle_alloc(int \fIcvar_index\fP, void *\fIobj_handle\fP, int MPI_T_cvar_handle_free(MPI_T_cvar_handle *\fIhandle\fP) +.fi .SH DESCRIPTION .ft R MPI_T_cvar_handle_alloc binds the control variable specified in \fIcvar_index\fP to the MPI diff --git a/ompi/mpi/man/man3/MPI_T_finalize.3in b/ompi/mpi/man/man3/MPI_T_finalize.3in index ef7ec71824c..7cb2b7dce2c 100644 --- a/ompi/mpi/man/man3/MPI_T_finalize.3in +++ b/ompi/mpi/man/man3/MPI_T_finalize.3in @@ -15,6 +15,7 @@ #include int MPI_T_finalize(void) +.fi .SH DESCRIPTION .ft R MPI_T_finalize() finalizes the MPI tool information interface and must be called the same diff --git a/ompi/mpi/man/man3/MPI_T_pvar_handle_alloc.3in b/ompi/mpi/man/man3/MPI_T_pvar_handle_alloc.3in index 1c9c844f148..dfcd4d19f55 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_handle_alloc.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_handle_alloc.3in @@ -15,17 +15,28 @@ .SH C Syntax .nf #include -int MPI_T_pvar_handle_alloc(int \fIpvar_index\fP, void *\fIobj_handle\fP, +int MPI_T_pvar_handle_alloc(int \fIsession\fP, int \fIpvar_index\fP, void *\fIobj_handle\fP, MPI_T_pvar_handle *\fIhandle\fP, int *\fIcount\fP) -int MPI_T_pvar_handle_free(MPI_T_pvar_handle *\fIhandle\fP) +int MPI_T_pvar_handle_free(int \fIsession\fP, MPI_T_pvar_handle *\fIhandle\fP) .SH DESCRIPTION .ft R MPI_T_pvar_handle_alloc binds the performance variable specified in \fIpvar_index\fP to the MPI -object specified in \fIobj_handle\fP. If MPI_T_pvar_get_info returns MPI_T_BIND_NO_OBJECT -as the binding for the variable the \fIobj_handle\fP argument is ignored. The number of -values represented by this performance variable is returned in the \fIcount\fP parameter. +object specified in \fIobj_handle\fP in the session identified by the parameter +\fIsession\fP. The object is passed in the argument \fIobj_handle\fP as an +address to a local variable that stores the object’s handle. If +MPI_T_pvar_get_info returns MPI_T_BIND_NO_OBJECT as the binding +for the variable the \fIobj_handle\fP argument is ignored. The handle +allocated to reference the variable is returned in the argument \fIhandle\fP. Upon successful +return, \fIcount\fP contains the number of elements (of the datatype returned by a previous +MPI_T_PVAR_GET_INFO call) used to represent this variable. + +The value of \fIpvar_index\fP should be in the range 0 to \fInum_pvar - 1\fP, +where \fInum_pvar\fP is the number of available performance variables as +determined from a prior call to \fIMPI_T_PVAR_GET_NUM\fP. The type of the +MPI object it references must be consistent with the type returned in the +bind argument in a prior call to \fIMPI_T_PVAR_GET_INFO\fP. MPI_T_pvar_handle_free frees a handle allocated by MPI_T_pvar_handle_alloc and sets the \fIhandle\fP argument to MPI_T_PVAR_HANDLE_NULL. @@ -50,11 +61,12 @@ MPI_T_pvar_handle_free() will fail if: The MPI Tools interface not initialized .TP 1i [MPI_T_ERR_INVALID_HANDLE] -The handle is invalid +The handle is invalid or the handle argument passed in is not associated with the session argument .SH SEE ALSO .ft R .nf MPI_T_pvar_get_info +MPI_T_pvar_get_num diff --git a/ompi/mpi/man/man3/MPI_T_pvar_session_create.3in b/ompi/mpi/man/man3/MPI_T_pvar_session_create.3in index 52a91c6617e..dd46817f314 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_session_create.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_session_create.3in @@ -19,6 +19,7 @@ int MPI_T_pvar_session_create(MPI_T_pvar_session *\fIsession\fP) int MPI_T_pvar_session_free(MPI_T_pvar_session *\fIsession\fP) +.fi .SH DESCRIPTION .ft R MPI_T_pvar_session_create creates a session for accessing performance variables. The diff --git a/ompi/mpi/man/man3/MPI_T_pvar_start.3in b/ompi/mpi/man/man3/MPI_T_pvar_start.3in index 450638149aa..2b1c9830d9b 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_start.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_start.3in @@ -19,6 +19,7 @@ int MPI_T_pvar_start(MPI_T_pvar_session \fIsession\fP, MPI_T_pvar_handle \fIhand int MPI_T_pvar_stop(MPI_T_pvar_session \fIsession\fP, MPI_T_pvar_handle \fIhandle\fP) +.fi .SH INPUT PARAMETERS .ft R .TP 1i diff --git a/ompi/mpi/man/man3/MPI_T_pvar_write.3in b/ompi/mpi/man/man3/MPI_T_pvar_write.3in index daaf28c0ac8..944a93c8e2f 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_write.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_write.3in @@ -33,7 +33,7 @@ Initial address of storage location for variable value. .SH DESCRIPTION .ft R MPI_T_pvar_write attempts to set the value of the performance variable identified by -the handle specified in \fIhandle\fP in the session specified in \fPsession\fI. The +the handle specified in \fIhandle\fP in the session specified in \fIsession\fP. The value to be written is specified in \fIbuf\fP. The caller must ensure that the buffer specified in \fIbuf\fP is large enough to hold the entire value of the performance variable. diff --git a/ompi/mpi/man/man3/MPI_Type_get_envelope.3in b/ompi/mpi/man/man3/MPI_Type_get_envelope.3in index 5832e9094ea..e624a291c04 100644 --- a/ompi/mpi/man/man3/MPI_Type_get_envelope.3in +++ b/ompi/mpi/man/man3/MPI_Type_get_envelope.3in @@ -84,20 +84,11 @@ MPI_COMBINER_NAMED a named predefined data type MPI_COMBINER_DUP MPI_Type_dup MPI_COMBINER_CONTIGUOUS MPI_Type_contiguous MPI_COMBINER_VECTOR MPI_Type_vector -MPI_COMBINER_HVECTOR_INTEGER MPI_Type_hvector from Fortran -MPI_COMBINER_HVECTOR MPI_Type_hvector from C or C++ - and MPI_Type_create for - all languages +MPI_COMBINER_HVECTOR MPI_Type_hvector MPI_COMBINER_INDEXED MPI_Type_indexed -MPI_COMBINER_HINDEXED_INTEGER MPI_Type_hindexed from Fortran -MPI_COMBINER_HINDEXED MPI_Type_hindexed from C or C++ - and MPI_Type_create_hindexed - for all languages +MPI_COMBINER_HINDEXED MPI_Type_hindexed MPI_COMBINER_INDEXED_BLOCK MPI_Type_create_indexed_block -MPI_COMBINER_STRUCT_INTEGER MPI_Type_struct from Fortran -MPI_COMBINER_STRUCT MPI_Type_struct from C or C++ - and MPI_Type_create_struct - for all languages +MPI_COMBINER_STRUCT MPI_Type_struct MPI_COMBINER_SUBARRAY MPI_Type_create_subarray MPI_COMBINER_DARRAY MPI_Type_create_darray MPI_COMBINER_F90_REAL MPI_Type_create_f90_real diff --git a/ompi/mpi/man/man3/MPI_Win_attach.3in b/ompi/mpi/man/man3/MPI_Win_attach.3in index d461e746d43..183141f7531 100644 --- a/ompi/mpi/man/man3/MPI_Win_attach.3in +++ b/ompi/mpi/man/man3/MPI_Win_attach.3in @@ -1,19 +1,20 @@ .\" -*- nroff -*- -.\" Copyright (c) 2015 Research Organization for Information Science -.\" and Technology (RIST). All rights reserved. +.\" Copyright (c) 2015-2019 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" Copyright (c) 2019 FUJITSU LIMITED. All rights reserved. .\" $COPYRIGHT$ .TH MPI_Win_attach 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" .SH NAME -\fBMPI_Win_create, MPI_Win_detach\fP \- One-sided MPI call that attach / detach a window object for RMA operations. +\fBMPI_Win_attach, MPI_Win_detach\fP \- One-sided MPI call that attach / detach a window object for RMA operations. .SH SYNTAX .ft R .SH C Syntax .nf #include -MPI_Win_attach(MPI_Win *\fIwin\fP, void *\fIbase\fP, MPI_Aint \fIsize\fP) +MPI_Win_attach(MPI_Win \fIwin\fP, void *\fIbase\fP, MPI_Aint \fIsize\fP) -MPI_Win_detach(MPI_Win *\fIwin\fP, void *\fIbase\fP) +MPI_Win_detach(MPI_Win \fIwin\fP, void *\fIbase\fP) .fi .SH Fortran Syntax .nf @@ -62,7 +63,7 @@ Fortran only: Error status (integer). .ft R MPI_Win_attach is a one-sided MPI communication collective call executed by all processes in the group of \fIcomm\fP. It returns a window object that can be used by these processes to perform RMA operations. Each process specifies a window of existing memory that it exposes to RMA accesses by the processes in the group of \fIcomm\fP. The window consists of \fIsize\fP bytes, starting at address \fIbase\fP. A process may elect to expose no memory by specifying \fIsize\fP = 0. .sp -If the \fIbase\fP value used by MPI_Win_create was allocated by MPI_Alloc_mem, the size of the window can be no larger than the value set by the MPI_ALLOC_MEM function. +If the \fIbase\fP value used by MPI_Win_attach was allocated by MPI_Alloc_mem, the size of the window can be no larger than the value set by the MPI_ALLOC_MEM function. .sp .SH NOTES diff --git a/ompi/mpi/man/man3/MPI_Win_detach.3in b/ompi/mpi/man/man3/MPI_Win_detach.3in index ff60c711116..42a7c2b2dfb 100644 --- a/ompi/mpi/man/man3/MPI_Win_detach.3in +++ b/ompi/mpi/man/man3/MPI_Win_detach.3in @@ -1 +1 @@ -.so man3/MPI_Win_attach +.so man3/MPI_Win_attach.3 diff --git a/ompi/mpiext/affinity/c/Makefile.am b/ompi/mpiext/affinity/c/Makefile.am index 46573f7a461..e42dfebdafc 100644 --- a/ompi/mpiext/affinity/c/Makefile.am +++ b/ompi/mpiext/affinity/c/Makefile.am @@ -3,6 +3,8 @@ # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -30,7 +32,7 @@ noinst_LTLIBRARIES = libmpiext_affinity_c.la # This is where the top-level header file (that is included in # ) must be installed. -ompidir = $(ompiincludedir)/ompi/mpiext/affinity/c +ompidir = $(ompiincludedir)/mpiext/ # This is the header file that is installed. ompi_HEADERS = mpiext_affinity_c.h diff --git a/ompi/mpiext/cr/c/Makefile.am b/ompi/mpiext/cr/c/Makefile.am index fe54fe557b1..9a35c553814 100644 --- a/ompi/mpiext/cr/c/Makefile.am +++ b/ompi/mpiext/cr/c/Makefile.am @@ -3,6 +3,8 @@ # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -22,7 +24,7 @@ noinst_LTLIBRARIES = libmpiext_cr_c.la # This is where the top-level header file (that is included in # ) must be installed. -ompidir = $(ompiincludedir)/ompi/mpiext/cr/c +ompidir = $(ompiincludedir)/mpiext # This is the header file that is installed. ompi_HEADERS = mpiext_cr_c.h diff --git a/ompi/mpiext/cuda/c/Makefile.am b/ompi/mpiext/cuda/c/Makefile.am index 41f0ab5fd52..f303cc70824 100644 --- a/ompi/mpiext/cuda/c/Makefile.am +++ b/ompi/mpiext/cuda/c/Makefile.am @@ -4,6 +4,8 @@ # Corporation. All rights reserved. # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 NVIDIA, Inc. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -25,10 +27,10 @@ noinst_LTLIBRARIES = libmpiext_cuda_c.la # This is where the top-level header file (that is included in # ) must be installed. -ompidir = $(ompiincludedir)/ompi/mpiext/cuda/c +ompidir = $(ompiincludedir)/mpiext # This is the header file that is installed. -ompi_HEADERS = mpiext_cuda_c.h +nodist_ompi_HEADERS = mpiext_cuda_c.h # Sources for the convenience libtool library. Other than the one # header file, all source files in the extension have no file naming diff --git a/ompi/mpiext/example/c/Makefile.am b/ompi/mpiext/example/c/Makefile.am index 7f9e74df6c8..ac8996b9862 100644 --- a/ompi/mpiext/example/c/Makefile.am +++ b/ompi/mpiext/example/c/Makefile.am @@ -4,6 +4,8 @@ # Corporation. All rights reserved. # Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,7 +25,7 @@ noinst_LTLIBRARIES = libmpiext_example_c.la # This is where the top-level header file (that is included in # ) must be installed. -ompidir = $(ompiincludedir)/ompi/mpiext/example/c +ompidir = $(ompiincludedir)/mpiext # This is the header file that is installed. ompi_HEADERS = mpiext_example_c.h diff --git a/ompi/mpiext/example/mpif-h/Makefile.am b/ompi/mpiext/example/mpif-h/Makefile.am index fdd1c2a257a..e73e4d7710f 100644 --- a/ompi/mpiext/example/mpif-h/Makefile.am +++ b/ompi/mpiext/example/mpif-h/Makefile.am @@ -4,6 +4,8 @@ # Corporation. All rights reserved. # Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,7 +28,7 @@ noinst_LTLIBRARIES = # Directory where the header file to be included in mpif-ext.h must be # installed. -ompidir = $(ompiincludedir)/ompi/mpiext/example/mpif-h +ompidir = $(ompiincludedir)/mpiext # Just like noinst_LTLIBRARIES, set this macro to empty and # conditionally add to it later. diff --git a/ompi/mpiext/pcollreq/README.txt b/ompi/mpiext/pcollreq/README.txt index 0b30de9fe89..7dd491f81df 100644 --- a/ompi/mpiext/pcollreq/README.txt +++ b/ompi/mpiext/pcollreq/README.txt @@ -4,7 +4,8 @@ $COPYRIGHT$ This extension provides the feature of persistent collective communication operations and persistent neighborhood collective communication operations, -which is proposed in the MPI Forum as of June 2018. +which is planned to be included in the next MPI Standard after MPI-3.1 as +of Nov. 2018. See MPIX_Barrier_init(3) for more details. diff --git a/ompi/mpiext/pcollreq/c/MPIX_Barrier_init.3in b/ompi/mpiext/pcollreq/c/MPIX_Barrier_init.3in index 5977e6e0041..db6cccc9c54 100644 --- a/ompi/mpiext/pcollreq/c/MPIX_Barrier_init.3in +++ b/ompi/mpiext/pcollreq/c/MPIX_Barrier_init.3in @@ -115,27 +115,435 @@ int MPIX_Neighbor_alltoallw_init(const void *\fIsendbuf\fP, const MPI_Datatype \fIrecvtypes\fP[], MPI_Comm \fIcomm\fP, MPI_Info \fIinfo\fP, MPI_Request *\fIrequest\fP) +.fi +.SH Fortran Syntax +.nf +USE MPI +USE MPI_EXT +! or the older form: INCLUDE 'mpif.h'; INCLUDE 'mpif-ext.h' +MPIX_ALLGATHER_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, + RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, COMM, INFO\fP + INTEGER \fIREQUEST, IERROR\fP + +MPIX_ALLGATHERV_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, + RECVCOUNT, DISPLS, RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT\fP(*) + INTEGER \fIDISPLS\fP(*)\fI, RECVTYPE, COMM, INFO, REQUEST, IERROR\fP + +MPIX_ALLREDUCE_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, INFO, + REQUEST, IERROR\fP) + \fISENDBUF\fP(*)\fI, RECVBUF\fP(*) + INTEGER \fICOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP + +MPIX_ALLTOALL_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, + RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + +MPIX_ALLTOALLV_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, + RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, COMM, INFO, REQUEST, + IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPE\fP + INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPE\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + +MPIX_ALLTOALLW_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, + RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, INFO, REQUEST, + IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPES(*)\fP + INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPES(*)\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + +MPIX_BARRIER_INIT(\fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP) + INTEGER \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP + +MPIX_BCAST_INIT(\fIBUFFER\fP, \fICOUNT\fP, \fIDATATYPE\fP, \fIROOT\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, + \fIIERROR\fP) + \fIBUFFER\fP(*) + INTEGER \fICOUNT\fP, \fIDATATYPE\fP, \fIROOT\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP + +MPIX_EXSCAN_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, INFO, + REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fICOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP + +MPIX_GATHER_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, + RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + +MPIX_GATHERV_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNTS, + DISPLS, RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNTS(*), DISPLS(*)\fP + INTEGER \fIRECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP + +MPIX_REDUCE_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, ROOT, COMM, + INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fICOUNT, DATATYPE, OP, ROOT, COMM, INFO, REQUEST, IERROR\fP + +MPIX_REDUCE_SCATTER_INIT(\fISENDBUF, RECVBUF, RECVCOUNTS, DATATYPE, OP, + COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fIRECVCOUNTS(*), DATATYPE, OP, COMM, INFO, REQUEST, IERROR \fP + +MPIX_REDUCE_SCATTER_BLOCK_INIT(\fISENDBUF, RECVBUF, RECVCOUNT, DATATYPE, + OP, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fIRECVCOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR \fP + +MPIX_SCAN_INIT(\fISENDBUF, RECVBUF, COUNT, DATATYPE, OP, COMM, INFO, + REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fICOUNT, DATATYPE, OP, COMM, INFO, REQUEST, IERROR\fP + +MPIX_SCATTER_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNT, + RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE, ROOT\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + +MPIX_SCATTERV_INIT(\fISENDBUF, SENDCOUNTS, DISPLS, SENDTYPE, RECVBUF, + RECVCOUNT, RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), DISPLS(*), SENDTYPE\fP + INTEGER \fIRECVCOUNT, RECVTYPE, ROOT, COMM, INFO, REQUEST, IERROR\fP + +MPIX_NEIGHBOR_ALLGATHER_INIT(\fISENDBUF\fP, \fISENDCOUNT\fP, \fISENDTYPE\fP, \fIRECVBUF\fP, + \fIRECVCOUNT\fP, \fIRECVTYPE\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fISENDBUF\fP(*), \fIRECVBUF\fP(*) + INTEGER \fISENDCOUNT\fP, \fISENDTYPE\fP, \fIRECVCOUNT\fP, \fIRECVTYPE\fP, \fICOMM\fP, + INTEGER \fIINFO, REQUEST, IERROR\fP + +MPIX_NEIGHBOR_ALLGATHERV_INIT(\fISENDBUF\fP, \fISENDCOUNT\fP, \fISENDTYPE\fP, \fIRECVBUF\fP, + \fIRECVCOUNT\fP, \fIDISPLS\fP, \fIRECVTYPE\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP) + \fISENDBUF\fP(*), \fIRECVBUF\fP(*) + INTEGER \fISENDCOUNT\fP, \fISENDTYPE\fP, \fIRECVCOUNT\fP(*), + INTEGER \fIDISPLS\fP(*), \fIRECVTYPE\fP, \fICOMM\fP, \fIINFO\fP, \fIREQUEST\fP, \fIIERROR\fP + +MPIX_NEIGHBOR_ALLTOALL_INIT(\fISENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, + RECVCOUNT, RECVTYPE, COMM, INFO, REQUEST, IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNT, SENDTYPE, RECVCOUNT, RECVTYPE\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + +MPIX_NEIGHBOR_ALLTOALLV_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, + RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, COMM, INFO, REQUEST, + IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), SDISPLS(*), SENDTYPE\fP + INTEGER \fIRECVCOUNTS(*), RDISPLS(*), RECVTYPE\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + +MPIX_NEIGHBOR_ALLTOALLW_INIT(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPES, + RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPES, COMM, INFO, REQUEST, + IERROR\fP) + \fISENDBUF(*), RECVBUF(*)\fP + INTEGER \fISENDCOUNTS(*), SENDTYPES(*)\fP + INTEGER \fIRECVCOUNTS(*), RECVTYPES(*)\fP + INTEGER(KIND=MPI_ADDRESS_KIND) \fISDISPLS(*), RDISPLS(*)\fP + INTEGER \fICOMM, INFO, REQUEST, IERROR\fP + +.fi +.SH Fortran 2008 Syntax +.nf +USE mpi_f08 +USE mpi_f08_ext +MPIX_Allgather_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, + \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Allgatherv_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, + \fIdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP, \fIdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Allreduce_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, \fIinfo\fP, + \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Alltoall_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, + \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Alltoallv_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtype\fP, \fIrecvbuf\fP, + \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIsdispls(*)\fP, + \fIrecvcounts(*)\fP, \fIrdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Alltoallw_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtypes\fP, \fIrecvbuf\fP, + \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtypes\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIsdispls(*)\fP, + \fIrecvcounts(*)\fP, \fIrdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: \fIsendtypes(*)\fP, + \fIrecvtypes(*)\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Barrier_init(\fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Bcast_init(\fIbuffer\fP, \fIcount\fP, \fIdatatype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, + \fIierror\fP) + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIbuffer\fP + INTEGER, INTENT(IN) :: \fIcount\fP, \fIroot\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Exscan_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, \fIinfo\fP, + \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Gather_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, + \fIrecvcount\fP, \fIrecvtype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP, \fIroot\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Gatherv_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, + \fIdispls\fP, \fIrecvtype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIroot\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP, \fIdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Reduce_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIroot\fP, \fIcomm\fP, + \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIcount\fP, \fIroot\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Reduce_scatter_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcounts\fP, \fIdatatype\fP, \fIop\fP, + \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Reduce_scatter_block_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIdatatype\fP, + \fIop\fP, \fIcomm\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Reduce_scatter_block_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIrecvcount\fP, \fIdatatype\fP, \fIop\fP, + \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Scan_init(\fIsendbuf\fP, \fIrecvbuf\fP, \fIcount\fP, \fIdatatype\fP, \fIop\fP, \fIcomm\fP, \fIinfo\fP, + \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIdatatype\fP + TYPE(MPI_Op), INTENT(IN) :: \fIop\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Scatter_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, \fIrecvcount\fP, + \fIrecvtype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP, \fIroot\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Scatterv_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIdispls\fP, \fIsendtype\fP, \fIrecvbuf\fP, + \fIrecvcount\fP, \fIrecvtype\fP, \fIroot\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIdispls(*)\fP + INTEGER, INTENT(IN) :: \fIrecvcount\fP, \fIroot\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Neighbor_allgather_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, + \fIrecvcount\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Neighbor_allgatherv_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, + \fIrecvcounts\fP, \fIdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIrecvcounts(*)\fP, \fIdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Neighbor_alltoall_init(\fIsendbuf\fP, \fIsendcount\fP, \fIsendtype\fP, \fIrecvbuf\fP, + \fIrecvcount\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN) :: \fIsendcount\fP, \fIrecvcount\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Neighbor_alltoallv_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtype\fP, + \fIrecvbuf\fP, \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtype\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, + \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIsdispls(*)\fP, + \fIrecvcounts(*)\fP, \fIrdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN) :: \fIsendtype\fP, \fIrecvtype\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + +MPIX_Neighbor_alltoallw_init(\fIsendbuf\fP, \fIsendcounts\fP, \fIsdispls\fP, \fIsendtypes\fP, + \fIrecvbuf\fP, \fIrecvcounts\fP, \fIrdispls\fP, \fIrecvtypes\fP, \fIcomm\fP, \fIinfo\fP, \fIrequest\fP, + \fIierror\fP) + TYPE(*), DIMENSION(..), INTENT(IN), ASYNCHRONOUS :: \fIsendbuf\fP + TYPE(*), DIMENSION(..), ASYNCHRONOUS :: \fIrecvbuf\fP + INTEGER, INTENT(IN), ASYNCHRONOUS :: \fIsendcounts(*)\fP, \fIrecvcounts(*)\fP + INTEGER(KIND=MPI_ADDRESS_KIND), INTENT(IN), ASYNCHRONOUS :: + \fIsdispls(*)\fP, \fIrdispls(*)\fP + TYPE(MPI_Datatype), INTENT(IN), ASYNCHRONOUS :: \fIsendtypes(*)\fP, + \fIrecvtypes(*)\fP + TYPE(MPI_Comm), INTENT(IN) :: \fIcomm\fP + TYPE(MPI_Info), INTENT(IN) :: \fIinfo\fP + TYPE(MPI_Request), INTENT(OUT) :: \fIrequest\fP + INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP + .fi .SH DESCRIPTION .ft R Creates a persistent communication request for a collective operation or neighborhood collective operation. -As of June 2018, the feature of persistent collective communication operations and persistent collective neighborhood communication operations is proposed in the MPI Forum. +As of Nov. 2018, the feature of persistent collective communication operations and persistent collective neighborhood communication operations is planned to be included in the next MPI Standard after MPI-3.1. .nf https://github.com/mpi-forum/mpi-issues/issues/25 .fi -Open MPI implements its draft version shown in the following URL. +Open MPI implements 2018 Draft Specification of the MPI standard shown in the following URL. .nf - https://github.com/mpi-forum/mpi-issues/files/2078076/mpi32-report-ticket25-austin-vote-june2018.pdf + https://www.mpi-forum.org/docs/drafts/mpi-2018-draft-report.pdf .fi -Because it is still in a draft stage, the interface may change in the standard. Therefore the prefix \fIMPIX_\fP is used instead of \fIMPI_\fP for these request creation functions. To start, complete, and free the created request, usual MPI functions (\fIMPI_Start\fP etc.) can be used. Only C bindings are available currently. +The interface may still change in the standard. Therefore the prefix \fIMPIX_\fP is used instead of \fIMPI_\fP for these request creation routines. To start, complete, and free the created request, usual MPI routines (\fIMPI_Start\fP etc.) can be used. -Future versions of Open MPI will switch to the \fIMPI_\fP prefix and will not require the header file \fImpi-ext.h\fP once the MPI Standard which includes this feature is published. +Future versions of Open MPI will switch to the \fIMPI_\fP prefix and will not require the C header file \fImpi-ext.h\fP, the Fortran modules \fImpi_ext\fP and \fImpi_f08_ext\fP, and the Fortran header file \fImpif-ext.h\fP once the MPI Standard which includes this feature is published. .SH EXAMPLE .nf diff --git a/ompi/mpiext/pcollreq/c/Makefile.am b/ompi/mpiext/pcollreq/c/Makefile.am index 7b5f2eba83b..d4aeb8ef362 100644 --- a/ompi/mpiext/pcollreq/c/Makefile.am +++ b/ompi/mpiext/pcollreq/c/Makefile.am @@ -23,7 +23,7 @@ noinst_LTLIBRARIES = libmpiext_pcollreq_c.la # This is where the top-level header file (that is included in # ) must be installed. -ompidir = $(ompiincludedir)/ompi/mpiext/pcollreq/c +ompidir = $(ompiincludedir)/mpiext # This is the header file that is installed. ompi_HEADERS = mpiext_pcollreq_c.h diff --git a/ompi/mpiext/pcollreq/c/allgather_init.c b/ompi/mpiext/pcollreq/c/allgather_init.c index 46a568bc65d..4b699f91a16 100644 --- a/ompi/mpiext/pcollreq/c/allgather_init.c +++ b/ompi/mpiext/pcollreq/c/allgather_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -103,6 +104,9 @@ int MPIX_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtyp err = comm->c_coll->coll_allgather_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, comm->c_coll->coll_allgather_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/allgatherv_init.c b/ompi/mpiext/pcollreq/c/allgatherv_init.c index d4b3c7368ab..2021ab9668e 100644 --- a/ompi/mpiext/pcollreq/c/allgatherv_init.c +++ b/ompi/mpiext/pcollreq/c/allgatherv_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -128,6 +129,9 @@ int MPIX_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendty recvbuf, recvcounts, displs, recvtype, comm, info, request, comm->c_coll->coll_allgatherv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/allreduce_init.c b/ompi/mpiext/pcollreq/c/allreduce_init.c index e3a96672fa6..1213395f3ec 100644 --- a/ompi/mpiext/pcollreq/c/allreduce_init.c +++ b/ompi/mpiext/pcollreq/c/allreduce_init.c @@ -12,9 +12,10 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -106,17 +108,18 @@ int MPIX_Allreduce_init(const void *sendbuf, void *recvbuf, int count, * So handle that case. */ if (0 == count) { - *request = &ompi_request_empty; - return MPI_SUCCESS; + err = ompi_request_persistent_noop_create(request); + OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } OPAL_CR_ENTER_LIBRARY(); /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_allreduce_init(sendbuf, recvbuf, count, datatype, op, comm, info, request, comm->c_coll->coll_allreduce_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/alltoall_init.c b/ompi/mpiext/pcollreq/c/alltoall_init.c index b176f63c753..7cb36216474 100644 --- a/ompi/mpiext/pcollreq/c/alltoall_init.c +++ b/ompi/mpiext/pcollreq/c/alltoall_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -102,5 +103,8 @@ int MPIX_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype err = comm->c_coll->coll_alltoall_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, comm->c_coll->coll_alltoall_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/alltoallv_init.c b/ompi/mpiext/pcollreq/c/alltoallv_init.c index 06d5922b2ac..3d34536fb01 100644 --- a/ompi/mpiext/pcollreq/c/alltoallv_init.c +++ b/ompi/mpiext/pcollreq/c/alltoallv_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -131,6 +132,9 @@ int MPIX_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int s err = comm->c_coll->coll_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, info, request, comm->c_coll->coll_alltoallv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/alltoallw_init.c b/ompi/mpiext/pcollreq/c/alltoallw_init.c index 405cc4c4f82..50902f1f639 100644 --- a/ompi/mpiext/pcollreq/c/alltoallw_init.c +++ b/ompi/mpiext/pcollreq/c/alltoallw_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -128,6 +129,9 @@ int MPIX_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int s sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, info, request, comm->c_coll->coll_alltoallw_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes_w(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtypes, recvtypes); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/bcast_init.c b/ompi/mpiext/pcollreq/c/bcast_init.c index 6a2798a9700..9cf71a7a671 100644 --- a/ompi/mpiext/pcollreq/c/bcast_init.c +++ b/ompi/mpiext/pcollreq/c/bcast_init.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -19,6 +19,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -87,5 +88,13 @@ int MPIX_Bcast_init(void *buffer, int count, MPI_Datatype datatype, err = comm->c_coll->coll_bcast_init(buffer, count, datatype, root, comm, info, request, comm->c_coll->coll_bcast_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (!OMPI_COMM_IS_INTRA(comm)) { + if (MPI_PROC_NULL == root) { + datatype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, datatype, NULL); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/exscan_init.c b/ompi/mpiext/pcollreq/c/exscan_init.c index 23f155429cd..f8e34ced68a 100644 --- a/ompi/mpiext/pcollreq/c/exscan_init.c +++ b/ompi/mpiext/pcollreq/c/exscan_init.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -84,10 +85,11 @@ int MPIX_Exscan_init(const void *sendbuf, void *recvbuf, int count, /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_exscan_init(sendbuf, recvbuf, count, datatype, op, comm, info, request, comm->c_coll->coll_exscan_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/gather_init.c b/ompi/mpiext/pcollreq/c/gather_init.c index f62dd9b54dd..051a0eaa133 100644 --- a/ompi/mpiext/pcollreq/c/gather_init.c +++ b/ompi/mpiext/pcollreq/c/gather_init.c @@ -15,8 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -174,5 +175,24 @@ int MPIX_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_gather_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, comm->c_coll->coll_gather_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == sendbuf) { + sendtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + recvtype = NULL; + } + } else { + if (MPI_ROOT == root) { + sendtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + recvtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/gatherv_init.c b/ompi/mpiext/pcollreq/c/gatherv_init.c index fbbd346008c..bd875a051c7 100644 --- a/ompi/mpiext/pcollreq/c/gatherv_init.c +++ b/ompi/mpiext/pcollreq/c/gatherv_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -199,5 +200,24 @@ int MPIX_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, recvcounts, displs, recvtype, root, comm, info, request, comm->c_coll->coll_gatherv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == sendbuf) { + sendtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + recvtype = NULL; + } + } else { + if (MPI_ROOT == root) { + sendtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + recvtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c b/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c index 4494b507b72..cd3037d0bda 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -125,6 +126,9 @@ int MPIX_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatyp err = comm->c_coll->coll_neighbor_allgather_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, comm->c_coll->coll_neighbor_allgather_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c b/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c index 66fa0487c57..3e53b846312 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -149,6 +150,9 @@ int MPIX_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Dataty recvbuf, (int *) recvcounts, (int *) displs, recvtype, comm, info, request, comm->c_coll->coll_neighbor_allgatherv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c b/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c index c564ee7e9e5..c2b0ac3c19b 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -126,5 +127,8 @@ int MPIX_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype recvbuf, recvcount, recvtype, comm, info, request, comm->c_coll->coll_neighbor_alltoall_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c b/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c index 8d3503bf57b..f86e256d815 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -149,6 +150,9 @@ int MPIX_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], co sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, info, request, comm->c_coll->coll_neighbor_alltoallv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c b/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c index 68e2b2cad22..1143ccbb3cf 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -149,6 +150,9 @@ int MPIX_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], co recvbuf, recvcounts, rdispls, recvtypes, comm, info, request, comm->c_coll->coll_neighbor_alltoallw_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes_w(*request, sendtypes, recvtypes); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/profile/Makefile.am b/ompi/mpiext/pcollreq/c/profile/Makefile.am index 9fee858081c..95595743229 100644 --- a/ompi/mpiext/pcollreq/c/profile/Makefile.am +++ b/ompi/mpiext/pcollreq/c/profile/Makefile.am @@ -22,7 +22,7 @@ noinst_LTLIBRARIES = libpmpiext_pcollreq_c.la # This is where the top-level header file (that is included in # ) must be installed. -ompidir = $(ompiincludedir)/ompi/mpiext/pcollreq/c +ompidir = $(ompiincludedir)/mpiext # This is the header file that is installed. ompi_HEADERS = pmpiext_pcollreq_c.h diff --git a/ompi/mpiext/pcollreq/c/reduce_init.c b/ompi/mpiext/pcollreq/c/reduce_init.c index 6bef3b5dded..d3b50747bfe 100644 --- a/ompi/mpiext/pcollreq/c/reduce_init.c +++ b/ompi/mpiext/pcollreq/c/reduce_init.c @@ -13,9 +13,10 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -131,17 +133,18 @@ int MPIX_Reduce_init(const void *sendbuf, void *recvbuf, int count, * So handle that case. */ if (0 == count) { - *request = &ompi_request_empty; - return MPI_SUCCESS; + err = ompi_request_persistent_noop_create(request); + OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } OPAL_CR_ENTER_LIBRARY(); /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_reduce_init(sendbuf, recvbuf, count, datatype, op, root, comm, info, request, comm->c_coll->coll_reduce_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c b/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c index ef000ae6e16..c0b8c344e62 100644 --- a/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c +++ b/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -101,10 +102,11 @@ int MPIX_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvc /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_reduce_scatter_block_init(sendbuf, recvbuf, recvcount, datatype, op, comm, info, request, comm->c_coll->coll_reduce_scatter_block_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/reduce_scatter_init.c b/ompi/mpiext/pcollreq/c/reduce_scatter_init.c index f1748b771e6..5bf5712e3e6 100644 --- a/ompi/mpiext/pcollreq/c/reduce_scatter_init.c +++ b/ompi/mpiext/pcollreq/c/reduce_scatter_init.c @@ -13,9 +13,10 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -126,18 +128,19 @@ int MPIX_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvc } } if (size == count) { - *request = &ompi_request_empty; - return MPI_SUCCESS; + err = ompi_request_persistent_noop_create(request); + OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } OPAL_CR_ENTER_LIBRARY(); /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_reduce_scatter_init(sendbuf, recvbuf, recvcounts, datatype, op, comm, info, request, comm->c_coll->coll_reduce_scatter_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/scan_init.c b/ompi/mpiext/pcollreq/c/scan_init.c index 8ff34dd5f5f..35540c1a102 100644 --- a/ompi/mpiext/pcollreq/c/scan_init.c +++ b/ompi/mpiext/pcollreq/c/scan_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -98,11 +99,12 @@ int MPIX_Scan_init(const void *sendbuf, void *recvbuf, int count, /* Call the coll component to actually perform the allgather */ - OBJ_RETAIN(op); err = comm->c_coll->coll_scan_init(sendbuf, recvbuf, count, datatype, op, comm, info, request, comm->c_coll->coll_scan_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/scatter_init.c b/ompi/mpiext/pcollreq/c/scatter_init.c index 30ee31f88d4..7ab7700c62a 100644 --- a/ompi/mpiext/pcollreq/c/scatter_init.c +++ b/ompi/mpiext/pcollreq/c/scatter_init.c @@ -15,8 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -157,5 +158,24 @@ int MPIX_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_scatter_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, comm->c_coll->coll_scatter_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == recvbuf) { + recvtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + sendtype = NULL; + } + } else { + if (MPI_ROOT == root) { + recvtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + sendtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/scatterv_init.c b/ompi/mpiext/pcollreq/c/scatterv_init.c index fef368caf7b..d2d53c7fd95 100644 --- a/ompi/mpiext/pcollreq/c/scatterv_init.c +++ b/ompi/mpiext/pcollreq/c/scatterv_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -197,5 +198,24 @@ int MPIX_Scatterv_init(const void *sendbuf, const int sendcounts[], const int di err = comm->c_coll->coll_scatterv_init(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, comm->c_coll->coll_scatterv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == recvbuf) { + recvtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + sendtype = NULL; + } + } else { + if (MPI_ROOT == root) { + recvtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + sendtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/mpif-h/Makefile.am b/ompi/mpiext/pcollreq/mpif-h/Makefile.am index e08b846df1f..7282ac086d7 100644 --- a/ompi/mpiext/pcollreq/mpif-h/Makefile.am +++ b/ompi/mpiext/pcollreq/mpif-h/Makefile.am @@ -31,7 +31,7 @@ noinst_LTLIBRARIES = # Directory where the header file to be included in mpif-ext.h must be # installed. -ompidir = $(ompiincludedir)/ompi/mpiext/pcollreq/mpif-h +ompidir = $(ompiincludedir)/mpiext # Just like noinst_LTLIBRARIES, set this macro to empty and # conditionally add to it later. diff --git a/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c b/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c index 183d739f797..0fae1e194db 100644 --- a/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c +++ b/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,6 +23,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" +#include "ompi/communicator/communicator.h" #include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING @@ -75,7 +76,7 @@ void ompix_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; - MPI_Datatype *c_sendtypes, *c_recvtypes; + MPI_Datatype *c_sendtypes = NULL, *c_recvtypes; MPI_Info c_info; MPI_Request c_request; int size, c_ierr; @@ -85,22 +86,23 @@ void ompix_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(rdispls); c_comm = PMPI_Comm_f2c(*comm); - PMPI_Comm_size(c_comm, &size); - - c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); - c_recvtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); - c_info = PMPI_Info_f2c(*info); + size = OMPI_COMM_IS_INTER(c_comm)?ompi_comm_remote_size(c_comm):ompi_comm_size(c_comm); + + if (!OMPI_IS_FORTRAN_IN_PLACE(sendbuf)) { + c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); + OMPI_ARRAY_FINT_2_INT(sendcounts, size); + OMPI_ARRAY_FINT_2_INT(sdispls, size); + for (int i=0; i 0) { - c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); - --size; + for (int i=0; io_flags & OMPI_OP_FLAGS_INTRINSIC)) { - op->o_func.intrinsic.fns[ompi_op_ddt_map[dtype->id]](source, target, - &count, &dtype, - op->o_func.intrinsic.modules[ompi_op_ddt_map[dtype->id]]); + int dtype_id; + if (!ompi_datatype_is_predefined(dtype)) { + ompi_datatype_t *dt = ompi_datatype_get_single_predefined_type_from_args(dtype); + dtype_id = ompi_op_ddt_map[dt->id]; + } else { + dtype_id = ompi_op_ddt_map[dtype->id]; + } + op->o_func.intrinsic.fns[dtype_id](source, target, + &count, &dtype, + op->o_func.intrinsic.modules[dtype_id]); return; } diff --git a/ompi/request/grequestx.c b/ompi/request/grequestx.c index acd688eacf1..1cc069c2432 100644 --- a/ompi/request/grequestx.c +++ b/ompi/request/grequestx.c @@ -50,6 +50,7 @@ static int grequestx_progress(void) { } OPAL_THREAD_LOCK(&lock); } + in_progress = false; } OPAL_THREAD_UNLOCK(&lock); diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index e4d4d5e68a6..d8eb64984c4 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -388,13 +388,13 @@ int ompi_request_default_wait_some(size_t count, int * indices, ompi_status_public_t * statuses) { - size_t num_requests_null_inactive=0, num_requests_done=0; + size_t num_requests_null_inactive, num_requests_done, num_active_reqs; int rc = MPI_SUCCESS; ompi_request_t **rptr = NULL; ompi_request_t *request = NULL; ompi_wait_sync_t sync; size_t sync_sets = 0, sync_unsets = 0; - + if (OPAL_UNLIKELY(0 == count)) { *outcount = MPI_UNDEFINED; return OMPI_SUCCESS; @@ -407,6 +407,7 @@ int ompi_request_default_wait_some(size_t count, rptr = requests; num_requests_null_inactive = 0; num_requests_done = 0; + num_active_reqs = 0; for (size_t i = 0; i < count; i++, rptr++) { void *_tmp_ptr = REQUEST_PENDING; @@ -419,14 +420,14 @@ int ompi_request_default_wait_some(size_t count, num_requests_null_inactive++; continue; } - indices[i] = OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync); - if( !indices[i] ) { + indices[num_active_reqs] = OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, &sync); + if( !indices[num_active_reqs] ) { /* If the request is completed go ahead and mark it as such */ assert( REQUEST_COMPLETE(request) ); num_requests_done++; } + num_active_reqs++; } - sync_sets = count - num_requests_null_inactive - num_requests_done; if(num_requests_null_inactive == count) { *outcount = MPI_UNDEFINED; @@ -435,6 +436,7 @@ int ompi_request_default_wait_some(size_t count, return rc; } + sync_sets = num_active_reqs - num_requests_done; if( 0 == num_requests_done ) { /* One completed request is enough to satisfy the some condition */ SYNC_WAIT(&sync); @@ -445,6 +447,7 @@ int ompi_request_default_wait_some(size_t count, rptr = requests; num_requests_done = 0; + num_active_reqs = 0; for (size_t i = 0; i < count; i++, rptr++) { void *_tmp_ptr = &sync; @@ -466,13 +469,14 @@ int ompi_request_default_wait_some(size_t count, * either slowly (in case of partial completion) * OR in parallel with `i` (in case of full set completion) */ - if( !indices[i] ){ + if( !indices[num_active_reqs] ) { indices[num_requests_done++] = i; } else if( !OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_PENDING) ) { indices[num_requests_done++] = i; } + num_active_reqs++; } - sync_unsets = count - num_requests_null_inactive - num_requests_done; + sync_unsets = num_active_reqs - num_requests_done; if( sync_sets == sync_unsets ){ /* nobody knows about us, diff --git a/ompi/runtime/Makefile.am b/ompi/runtime/Makefile.am index 98cc400a83a..71b32e2139a 100644 --- a/ompi/runtime/Makefile.am +++ b/ompi/runtime/Makefile.am @@ -33,10 +33,19 @@ headers += \ lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ runtime/ompi_mpi_abort.c \ runtime/ompi_mpi_dynamics.c \ - runtime/ompi_mpi_init.c \ runtime/ompi_mpi_finalize.c \ runtime/ompi_mpi_params.c \ runtime/ompi_mpi_preconnect.c \ runtime/ompi_cr.c \ runtime/ompi_info_support.c \ runtime/ompi_spc.c + +# The MPIR portion of the library must be built with flags to +# enable stepping out of MPI_INIT into main. +# Use an intermediate library to isolate the debug object. +noinst_LTLIBRARIES += libompi_mpir.la +libompi_mpir_la_SOURCES = \ + runtime/ompi_mpi_init.c +libompi_mpir_la_CFLAGS = $(MPIR_UNWIND_CFLAGS) + +lib@OMPI_LIBMPI_NAME@_la_LIBADD += libompi_mpir.la diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index a235f6ba2d2..b636ddfbaab 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -257,7 +257,13 @@ int ompi_mpi_finalize(void) * communications/actions to complete. See * https://github.com/open-mpi/ompi/issues/1576 for the * original bug report. */ - opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active); + if (OMPI_SUCCESS != (ret = opal_pmix.fence_nb(NULL, 0, fence_cbfunc, + (void*)&active))) { + OMPI_ERROR_LOG(ret); + /* Reset the active flag to false, to avoid waiting for + * completion when the fence was failed. */ + active = false; + } OMPI_LAZY_WAIT_FOR_COMPLETION(active); } else { /* However, we cannot guarantee that the provided PMIx has @@ -268,7 +274,9 @@ int ompi_mpi_finalize(void) ompi_communicator_t *comm = &ompi_mpi_comm_world.comm; comm->c_coll->coll_barrier(comm, comm->c_coll->coll_barrier_module); - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + OMPI_ERROR_LOG(ret); + } } } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 00e450c923a..b35c491b735 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -662,9 +662,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, #if (OPAL_ENABLE_TIMING) if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && opal_pmix_collect_all_data) { - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + error = "timing: pmix-barrier-1 failed"; + goto error; + } OMPI_TIMING_NEXT("pmix-barrier-1"); - opal_pmix.fence(NULL, 0); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + error = "timing: pmix-barrier-2 failed"; + goto error; + } OMPI_TIMING_NEXT("pmix-barrier-2"); } #endif @@ -687,19 +693,32 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, background_fence = true; active = true; OPAL_POST_OBJECT(&active); - opal_pmix.fence_nb(NULL, true, fence_release, (void*)&active); + if( OMPI_SUCCESS != (ret = opal_pmix.fence_nb(NULL, true, + fence_release, + (void*)&active))) { + error = "opal_pmix.fence_nb() failed"; + goto error; + } + } else if (!opal_pmix_base_async_modex) { /* we want to do the modex */ active = true; OPAL_POST_OBJECT(&active); - opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data, - fence_release, (void*)&active); + if( OMPI_SUCCESS != (ret = opal_pmix.fence_nb(NULL, + opal_pmix_collect_all_data, fence_release, (void*)&active))) { + error = "opal_pmix.fence_nb() failed"; + goto error; + } /* cannot just wait on thread as we need to call opal_progress */ OMPI_LAZY_WAIT_FOR_COMPLETION(active); } /* otherwise, we don't want to do the modex, so fall thru */ } else if (!opal_pmix_base_async_modex || opal_pmix_collect_all_data) { - opal_pmix.fence(NULL, opal_pmix_collect_all_data); + if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, + opal_pmix_collect_all_data))) { + error = "opal_pmix.fence() failed"; + goto error; + } } OMPI_TIMING_NEXT("modex"); @@ -877,11 +896,17 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, if (NULL != opal_pmix.fence_nb) { active = true; OPAL_POST_OBJECT(&active); - opal_pmix.fence_nb(NULL, false, - fence_release, (void*)&active); + if (OMPI_SUCCESS != (ret = opal_pmix.fence_nb(NULL, false, + fence_release, (void*)&active))) { + error = "opal_pmix.fence_nb() failed"; + goto error; + } OMPI_LAZY_WAIT_FOR_COMPLETION(active); } else { - opal_pmix.fence(NULL, false); + if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, false))) { + error = "opal_pmix.fence() failed"; + goto error; + } } } diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index a490453d0d6..adb9a1a4abb 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -17,7 +17,7 @@ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016 Research Organization for Information Science + * Copyright (c) 2016-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -82,6 +82,7 @@ static bool show_default_mca_params = false; static bool show_file_mca_params = false; static bool show_enviro_mca_params = false; static bool show_override_mca_params = false; +static bool ompi_mpi_oversubscribe = false; int ompi_mpi_register_params(void) { @@ -108,13 +109,18 @@ int ompi_mpi_register_params(void) * opal_progress: decide whether to yield and the event library * tick rate */ - /* JMS: Need ORTE data here -- set this to 0 when - exactly/under-subscribed, or 1 when oversubscribed */ - ompi_mpi_yield_when_idle = false; + ompi_mpi_oversubscribe = false; + (void) mca_base_var_register("ompi", "mpi", NULL, "oversubscribe", + "Internal MCA parameter set by the runtime environment when oversubscribing nodes", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mpi_oversubscribe); + ompi_mpi_yield_when_idle = ompi_mpi_oversubscribe; (void) mca_base_var_register("ompi", "mpi", NULL, "yield_when_idle", "Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, + OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_yield_when_idle); diff --git a/ompi/runtime/ompi_spc.c b/ompi/runtime/ompi_spc.c index caee2cda6f2..d88f290aaaf 100644 --- a/ompi/runtime/ompi_spc.c +++ b/ompi/runtime/ompi_spc.c @@ -1,11 +1,13 @@ /* - * Copyright (c) 2018 The University of Tennessee and The University + * Copyright (c) 2018-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,10 +22,8 @@ opal_timer_t sys_clock_freq_mhz = 0; static void ompi_spc_dump(void); /* Array for converting from SPC indices to MPI_T indices */ -OMPI_DECLSPEC int mpi_t_offset = -1; -OMPI_DECLSPEC bool mpi_t_enabled = false; - -OPAL_DECLSPEC ompi_communicator_t *comm = NULL; +static bool mpi_t_enabled = false; +static ompi_communicator_t *ompi_spc_comm = NULL; typedef struct ompi_spc_event_t { const char* counter_name; @@ -185,6 +185,8 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v return MPI_SUCCESS; } + index = (int)(uintptr_t)pvar->ctx; /* Convert from MPI_T pvar index to SPC index */ + /* For this event, we need to set count to the number of long long type * values for this counter. All SPC counters are one long long, so we * always set count to 1. @@ -194,14 +196,10 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v } /* For this event, we need to turn on the counter */ else if(MCA_BASE_PVAR_HANDLE_START == event) { - /* Convert from MPI_T pvar index to SPC index */ - index = pvar->pvar_index - mpi_t_offset; SET_SPC_BIT(ompi_spc_attached_event, index); } /* For this event, we need to turn off the counter */ else if(MCA_BASE_PVAR_HANDLE_STOP == event) { - /* Convert from MPI_T pvar index to SPC index */ - index = pvar->pvar_index - mpi_t_offset; CLEAR_SPC_BIT(ompi_spc_attached_event, index); } @@ -231,7 +229,7 @@ static int ompi_spc_get_count(const struct mca_base_pvar_t *pvar, void *value, v } /* Convert from MPI_T pvar index to SPC index */ - int index = pvar->pvar_index - mpi_t_offset; + int index = (int)(uintptr_t)pvar->ctx; /* Set the counter value to the current SPC value */ *counter_value = (long long)ompi_spc_events[index].value; /* If this is a timer-based counter, convert from cycles to microseconds */ @@ -268,7 +266,7 @@ void ompi_spc_events_init(void) ompi_spc_events[i].value = 0; } - ompi_comm_dup(&ompi_mpi_comm_world.comm, &comm); + ompi_comm_dup(&ompi_mpi_comm_world.comm, &ompi_spc_comm); } /* Initializes the SPC data structures and registers all counters as MPI_T pvars. @@ -276,7 +274,7 @@ void ompi_spc_events_init(void) */ void ompi_spc_init(void) { - int i, j, ret, found = 0, all_on = 0; + int i, j, ret, found = 0, all_on = 0, matched = 0; /* Initialize the clock frequency variable as the CPU's frequency in MHz */ sys_clock_freq_mhz = opal_timer_base_get_freq() / 1000000; @@ -296,52 +294,43 @@ void ompi_spc_init(void) } } - /* Turn on only the counters that were specified in the MCA parameter */ for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { - if(all_on) { - SET_SPC_BIT(ompi_spc_attached_event, i); - mpi_t_enabled = true; - found++; - } else { - /* Note: If no arguments were given, this will be skipped */ + /* Reset all timer-based counters */ + CLEAR_SPC_BIT(ompi_spc_timer_event, i); + matched = all_on; + + if( !matched ) { + /* Turn on only the counters that were specified in the MCA parameter */ for(j = 0; j < num_args; j++) { if( 0 == strcmp(ompi_spc_events_names[i].counter_name, arg_strings[j]) ) { - SET_SPC_BIT(ompi_spc_attached_event, i); - mpi_t_enabled = true; - found++; + matched = 1; break; } } } - /* ######################################################################## - * ################## Add Timer-Based Counter Enums Here ################## - * ######################################################################## - */ - CLEAR_SPC_BIT(ompi_spc_timer_event, i); + if (matched) { + SET_SPC_BIT(ompi_spc_attached_event, i); + mpi_t_enabled = true; + found++; + } /* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */ ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description, OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - ompi_spc_get_count, NULL, ompi_spc_notify, NULL); - - /* Check to make sure that ret is a valid index and not an error code. - */ - if( ret >= 0 ) { - if( mpi_t_offset == -1 ) { - mpi_t_offset = ret; - } - } - if( (ret < 0) || (ret != (mpi_t_offset + found - 1)) ) { + ompi_spc_get_count, NULL, ompi_spc_notify, (void*)(uintptr_t)i); + if( ret < 0 ) { mpi_t_enabled = false; opal_show_help("help-mpi-runtime.txt", "spc: MPI_T disabled", true); break; } } - /* If this is a timer event, sent the corresponding timer_event entry to 1 */ + + /* If this is a timer event, set the corresponding timer_event entry */ SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME); + opal_argv_free(arg_strings); } @@ -353,8 +342,8 @@ static void ompi_spc_dump(void) int i, j, world_size, offset; long long *recv_buffer = NULL, *send_buffer; - int rank = ompi_comm_rank(comm); - world_size = ompi_comm_size(comm); + int rank = ompi_comm_rank(ompi_spc_comm); + world_size = ompi_comm_size(ompi_spc_comm); /* Convert from cycles to usecs before sending */ for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { @@ -381,10 +370,10 @@ static void ompi_spc_dump(void) return; } } - (void)comm->c_coll->coll_gather(send_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG, + (void)ompi_spc_comm->c_coll->coll_gather(send_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG, recv_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG, - 0, comm, - comm->c_coll->coll_gather_module); + 0, ompi_spc_comm, + ompi_spc_comm->c_coll->coll_gather_module); /* Once rank 0 has all of the information, print the aggregated counter values for each rank in order */ if(rank == 0) { @@ -410,7 +399,7 @@ static void ompi_spc_dump(void) } free(send_buffer); - comm->c_coll->coll_barrier(comm, comm->c_coll->coll_barrier_module); + ompi_spc_comm->c_coll->coll_barrier(ompi_spc_comm, ompi_spc_comm->c_coll->coll_barrier_module); } /* Frees any dynamically alocated OMPI SPC data structures */ @@ -421,7 +410,7 @@ void ompi_spc_fini(void) } free(ompi_spc_events); ompi_spc_events = NULL; - ompi_comm_free(&comm); + ompi_comm_free(&ompi_spc_comm); } /* Records an update to a counter using an atomic add operation. */ diff --git a/ompi/tools/ompi_info/param.c b/ompi/tools/ompi_info/param.c index 17e2cc42e28..a2fb0d4487b 100644 --- a/ompi/tools/ompi_info/param.c +++ b/ompi/tools/ompi_info/param.c @@ -14,6 +14,9 @@ * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -132,6 +135,7 @@ void ompi_info_do_config(bool want_all) char *crdebug_support; char *topology_support; char *ipv6_support; + char *mpi1_compat_support; /* Do a little preprocessor trickery here to figure opal_info_out the * tri-state of MPI_PARAM_CHECK (which will be either 0, 1, or @@ -285,6 +289,7 @@ void ompi_info_do_config(bool want_all) symbol_visibility = OPAL_C_HAVE_VISIBILITY ? "yes" : "no"; topology_support = "yes"; ipv6_support = OPAL_ENABLE_IPV6 ? "yes" : "no"; + mpi1_compat_support = OMPI_ENABLE_MPI1_COMPAT ? "yes" : "no"; /* setup strings that require allocation */ if (OMPI_BUILD_FORTRAN_BINDINGS >= OMPI_FORTRAN_MPIFH_BINDINGS) { @@ -643,6 +648,8 @@ void ompi_info_do_config(bool want_all) opal_info_out("Host topology support", "options:host-topology", topology_support); opal_info_out("IPv6 support", "options:ipv6", ipv6_support); + opal_info_out("MPI1 compatibility", "options:mpi1-compatibility", + mpi1_compat_support); opal_info_out("MPI extensions", "options:mpi_ext", OMPI_MPIEXT_COMPONENTS); diff --git a/opal/class/opal_free_list.c b/opal/class/opal_free_list.c index b7c38b22f44..517d8ee0d3d 100644 --- a/opal/class/opal_free_list.c +++ b/opal/class/opal_free_list.c @@ -13,7 +13,7 @@ * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -155,13 +155,13 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_ flist->ctx = ctx; if (num_elements_to_alloc) { - return opal_free_list_grow_st (flist, num_elements_to_alloc); + return opal_free_list_grow_st (flist, num_elements_to_alloc, NULL); } return OPAL_SUCCESS; } -int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements) +int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements, opal_free_list_item_t **item_out) { unsigned char *ptr, *payload_ptr = NULL; opal_free_list_memory_t *alloc_ptr; @@ -263,10 +263,16 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements) /* NTH: in case the free list may be accessed from multiple threads * use the atomic lifo push. The overhead is small compared to the * overall overhead of opal_free_list_grow(). */ - opal_lifo_push_atomic (&flist->super, &item->super); + if (item_out && 0 == i) { + /* ensure the thread that is growing the free list always gets an item + * if one is available */ + *item_out = item; + } else { + opal_lifo_push_atomic (&flist->super, &item->super); + } + ptr += head_size; payload_ptr += elem_size; - } if (OPAL_SUCCESS != rc && 0 == num_elements) { @@ -298,7 +304,7 @@ int opal_free_list_resize_mt(opal_free_list_t *flist, size_t size) opal_mutex_lock (&flist->fl_lock); do { - ret = opal_free_list_grow_st (flist, flist->fl_num_per_alloc); + ret = opal_free_list_grow_st (flist, flist->fl_num_per_alloc, NULL); if (OPAL_SUCCESS != ret) { break; } diff --git a/opal/class/opal_free_list.h b/opal/class/opal_free_list.h index 1e1de3e8e83..b7fd1920219 100644 --- a/opal/class/opal_free_list.h +++ b/opal/class/opal_free_list.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -146,6 +146,7 @@ OPAL_DECLSPEC int opal_free_list_init (opal_free_list_t *free_list, * * @param flist (IN) Free list to grow * @param num_elements (IN) Number of elements to add + * @param item_out (OUT) Location to store new free list item (can be NULL) * * @returns OPAL_SUCCESS if any elements were added * @returns OPAL_ERR_OUT_OF_RESOURCE if no elements could be added @@ -155,8 +156,14 @@ OPAL_DECLSPEC int opal_free_list_init (opal_free_list_t *free_list, * that may be accessed by multiple threads simultaneously. Note: this is an * internal function that will be used when needed by opal_free_list_get* and * opal_free_list_wait*. + * + * The item_out parameter can be used to ensure that the thread calling this + * function always gets a free list item if the list is successfully grown. + * This eliminates a race condition with code that simply calls free_list_get + * and assumes NULL is an out of memory condition (which it wasn't necessarily + * before this parameter was added). */ -OPAL_DECLSPEC int opal_free_list_grow_st (opal_free_list_t *flist, size_t num_elements); +OPAL_DECLSPEC int opal_free_list_grow_st (opal_free_list_t *flist, size_t num_elements, opal_free_list_item_t **item_out); /** * Grow the free list to be at least size elements. @@ -195,9 +202,8 @@ static inline opal_free_list_item_t *opal_free_list_get_mt (opal_free_list_t *fl if (OPAL_UNLIKELY(NULL == item)) { opal_mutex_lock (&flist->fl_lock); - opal_free_list_grow_st (flist, flist->fl_num_per_alloc); + opal_free_list_grow_st (flist, flist->fl_num_per_alloc, &item); opal_mutex_unlock (&flist->fl_lock); - item = (opal_free_list_item_t *) opal_lifo_pop_atomic (&flist->super); } return item; @@ -209,8 +215,7 @@ static inline opal_free_list_item_t *opal_free_list_get_st (opal_free_list_t *fl (opal_free_list_item_t*) opal_lifo_pop_st (&flist->super); if (OPAL_UNLIKELY(NULL == item)) { - opal_free_list_grow_st (flist, flist->fl_num_per_alloc); - item = (opal_free_list_item_t *) opal_lifo_pop_atomic (&flist->super); + opal_free_list_grow_st (flist, flist->fl_num_per_alloc, &item); } return item; @@ -253,7 +258,7 @@ static inline opal_free_list_item_t *opal_free_list_wait_mt (opal_free_list_t *f while (NULL == item) { if (!opal_mutex_trylock (&fl->fl_lock)) { if (fl->fl_max_to_alloc <= fl->fl_num_allocated || - OPAL_SUCCESS != opal_free_list_grow_st (fl, fl->fl_num_per_alloc)) { + OPAL_SUCCESS != opal_free_list_grow_st (fl, fl->fl_num_per_alloc, &item)) { fl->fl_num_waiting++; opal_condition_wait (&fl->fl_condition, &fl->fl_lock); fl->fl_num_waiting--; @@ -274,7 +279,9 @@ static inline opal_free_list_item_t *opal_free_list_wait_mt (opal_free_list_t *f opal_mutex_lock (&fl->fl_lock); } opal_mutex_unlock (&fl->fl_lock); - item = (opal_free_list_item_t *) opal_lifo_pop_atomic (&fl->super); + if (NULL == item) { + item = (opal_free_list_item_t *) opal_lifo_pop_atomic (&fl->super); + } } return item; @@ -287,12 +294,13 @@ static inline opal_free_list_item_t *opal_free_list_wait_st (opal_free_list_t *f while (NULL == item) { if (fl->fl_max_to_alloc <= fl->fl_num_allocated || - OPAL_SUCCESS != opal_free_list_grow_st (fl, fl->fl_num_per_alloc)) { + OPAL_SUCCESS != opal_free_list_grow_st (fl, fl->fl_num_per_alloc, &item)) { /* try to make progress */ opal_progress (); } - - item = (opal_free_list_item_t *) opal_lifo_pop (&fl->super); + if (NULL == item) { + item = (opal_free_list_item_t *) opal_lifo_pop (&fl->super); + } } return item; diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 63b4d714084..4754723f68a 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2013-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2013-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved * $COPYRIGHT$ * @@ -324,13 +324,14 @@ int32_t opal_convertor_unpack( opal_convertor_t* pConv, return pConv->fAdvance( pConv, iov, out_size, max_data ); } -static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor, - size_t starting_point, const size_t* sizes ) +static inline int +opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor, + size_t starting_point, const size_t* sizes ) { dt_stack_t* pStack; /* pointer to the position on the stack */ const opal_datatype_t* pData = pConvertor->pDesc; dt_elem_desc_t* pElems; - uint32_t count; + size_t count; ptrdiff_t extent; pStack = pConvertor->pStack; @@ -340,7 +341,7 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* */ pElems = pConvertor->use_desc->desc; - count = (uint32_t)(starting_point / pData->size); + count = starting_point / pData->size; extent = pData->ub - pData->lb; pStack[0].type = OPAL_DATATYPE_LOOP; /* the first one is always the loop */ @@ -349,14 +350,14 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pStack[0].disp = count * extent; /* now compute the number of pending bytes */ - count = (uint32_t)(starting_point - count * pData->size); + count = starting_point % pData->size; /** * We save the current displacement starting from the begining * of this data. */ if( OPAL_LIKELY(0 == count) ) { pStack[1].type = pElems->elem.common.type; - pStack[1].count = pElems->elem.count; + pStack[1].count = pElems->elem.blocklen; } else { pStack[1].type = OPAL_DATATYPE_UINT1; pStack[1].count = pData->size - count; @@ -370,9 +371,9 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* return OPAL_SUCCESS; } -static inline -int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor, - const size_t* sizes ) +static inline int +opal_convertor_create_stack_at_begining( opal_convertor_t* convertor, + const size_t* sizes ) { dt_stack_t* pStack = convertor->pStack; dt_elem_desc_t* pElems; @@ -402,7 +403,7 @@ int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor, pStack[1].count = pElems[0].loop.loops; pStack[1].type = OPAL_DATATYPE_LOOP; } else { - pStack[1].count = pElems[0].elem.count; + pStack[1].count = pElems[0].elem.count * pElems[0].elem.blocklen; pStack[1].type = pElems[0].elem.common.type; } return OPAL_SUCCESS; @@ -563,7 +564,7 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor ) int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, const struct opal_datatype_t* datatype, - int32_t count, + size_t count, const void* pUserBuf ) { /* Here I should check that the data is not overlapping */ @@ -578,8 +579,9 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, assert(! (convertor->flags & CONVERTOR_SEND)); OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); - if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) { - if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { +#if defined(CHECKSUM) + if( OPAL_UNLIKELY(convertor->flags & CONVERTOR_WITH_CHECKSUM) ) { + if( OPAL_UNLIKELY(!(convertor->flags & CONVERTOR_HOMOGENEOUS)) ) { convertor->fAdvance = opal_unpack_general_checksum; } else { if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { @@ -588,8 +590,9 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, convertor->fAdvance = opal_generic_simple_unpack_checksum; } } - } else { - if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { + } else +#endif /* defined(CHECKSUM) */ + if( OPAL_UNLIKELY(!(convertor->flags & CONVERTOR_HOMOGENEOUS)) ) { convertor->fAdvance = opal_unpack_general; } else { if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { @@ -598,14 +601,13 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, convertor->fAdvance = opal_generic_simple_unpack; } } - } return OPAL_SUCCESS; } int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, const struct opal_datatype_t* datatype, - int32_t count, + size_t count, const void* pUserBuf ) { convertor->flags |= CONVERTOR_SEND; @@ -617,6 +619,7 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); +#if defined(CHECKSUM) if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) { if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) { convertor->fAdvance = opal_pack_general_checksum; @@ -631,7 +634,8 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, convertor->fAdvance = opal_generic_simple_pack_checksum; } } - } else { + } else +#endif /* defined(CHECKSUM) */ if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) { convertor->fAdvance = opal_pack_general; } else { @@ -645,7 +649,6 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, convertor->fAdvance = opal_generic_simple_pack; } } - } return OPAL_SUCCESS; } @@ -699,12 +702,12 @@ int opal_convertor_clone( const opal_convertor_t* source, void opal_convertor_dump( opal_convertor_t* convertor ) { - opal_output( 0, "Convertor %p count %d stack position %d bConverted %ld\n" - "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n" + opal_output( 0, "Convertor %p count %" PRIsize_t " stack position %u bConverted %" PRIsize_t "\n" + "\tlocal_size %" PRIsize_t " remote_size %" PRIsize_t " flags %X stack_size %u pending_length %" PRIsize_t "\n" "\tremote_arch %u local_arch %u\n", (void*)convertor, - convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted, - (unsigned long)convertor->local_size, (unsigned long)convertor->remote_size, + convertor->count, convertor->stack_pos, convertor->bConverted, + convertor->local_size, convertor->remote_size, convertor->flags, convertor->stack_size, convertor->partial_length, convertor->remoteArch, opal_local_arch ); if( convertor->flags & CONVERTOR_RECV ) opal_output( 0, "unpack "); @@ -734,8 +737,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos, { opal_output( 0, "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name ); for( ; stack_pos >= 0; stack_pos-- ) { - opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index, - (int)pStack[stack_pos].count, (long)pStack[stack_pos].disp ); + opal_output( 0, "%d: pos %d count %" PRIsize_t " disp %ld ", stack_pos, pStack[stack_pos].index, + pStack[stack_pos].count, pStack[stack_pos].disp ); if( pStack->index != -1 ) opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n", (unsigned long)pDesc[pStack[stack_pos].index].elem.count, diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 22a2bb1de3f..b24d94c37b0 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved * $COPYRIGHT$ * @@ -74,6 +74,7 @@ struct opal_convertor_master_t; struct dt_stack_t { int32_t index; /**< index in the element description */ int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */ + int16_t padding; size_t count; /**< number of times we still have to do it */ ptrdiff_t disp; /**< actual displacement depending on the count field */ }; @@ -93,30 +94,33 @@ struct opal_convertor_t { const opal_datatype_t* pDesc; /**< the datatype description associated with the convertor */ const dt_type_desc_t* use_desc; /**< the version used by the convertor (normal or optimized) */ opal_datatype_count_t count; /**< the total number of full datatype elements */ + + /* --- cacheline boundary (64 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */ uint32_t stack_size; /**< size of the allocated stack */ - /* --- cacheline 1 boundary (64 bytes) --- */ unsigned char* pBaseBuf; /**< initial buffer as supplied by the user */ dt_stack_t* pStack; /**< the local stack for the actual conversion */ convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */ + + /* --- cacheline boundary (96 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */ struct opal_convertor_master_t* master; /**< the master convertor */ /* All others fields get modified for every call to pack/unpack functions */ uint32_t stack_pos; /**< the actual position on the stack */ - uint32_t partial_length; /**< amount of data left over from the last unpack */ + size_t partial_length; /**< amount of data left over from the last unpack */ size_t bConverted; /**< # of bytes already converted */ + + /* --- cacheline boundary (128 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */ uint32_t checksum; /**< checksum computed by pack/unpack operation */ uint32_t csum_ui1; /**< partial checksum computed by pack/unpack operation */ size_t csum_ui2; /**< partial checksum computed by pack/unpack operation */ - /* --- cacheline 2 boundary (128 bytes) --- */ + + /* --- fields are no more aligned on cacheline --- */ dt_stack_t static_stack[DT_STATIC_STACK_SIZE]; /**< local stack for small datatypes */ - /* --- cacheline 3 boundary (192 bytes) was 56 bytes ago --- */ #if OPAL_CUDA_SUPPORT memcpy_fct_t cbmemcpy; /**< memcpy or cuMemcpy */ void * stream; /**< CUstream for async copy */ #endif - /* size: 248, cachelines: 4, members: 20 */ - /* last cacheline: 56 bytes */ }; OPAL_DECLSPEC OBJ_CLASS_DECLARATION( opal_convertor_t ); @@ -251,12 +255,12 @@ static inline void opal_convertor_get_offset_pointer( const opal_convertor_t* pC */ OPAL_DECLSPEC int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, const struct opal_datatype_t* datatype, - int32_t count, + size_t count, const void* pUserBuf); static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_convertor_t* pSrcConv, const struct opal_datatype_t* datatype, - int32_t count, + size_t count, const void* pUserBuf, int32_t flags, opal_convertor_t* convertor ) @@ -273,11 +277,11 @@ static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_conve */ OPAL_DECLSPEC int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, const struct opal_datatype_t* datatype, - int32_t count, + size_t count, const void* pUserBuf ); static inline int32_t opal_convertor_copy_and_prepare_for_recv( const opal_convertor_t* pSrcConv, const struct opal_datatype_t* datatype, - int32_t count, + size_t count, const void* pUserBuf, int32_t flags, opal_convertor_t* convertor ) @@ -328,8 +332,10 @@ opal_convertor_set_position( opal_convertor_t* convertor, /* Remove the completed flag if it's already set */ convertor->flags &= ~CONVERTOR_COMPLETED; - if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && - (convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) && + if( (convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) && +#if defined(CHECKSUM) + !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && +#endif /* defined(CHECKSUM) */ (convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { /* Contiguous and no checkpoint and no homogeneous unpack */ convertor->bConverted = *position; diff --git a/opal/datatype/opal_convertor_internal.h b/opal/datatype/opal_convertor_internal.h index 025633cb7e7..39690f5bd19 100644 --- a/opal/datatype/opal_convertor_internal.h +++ b/opal/datatype/opal_convertor_internal.h @@ -50,11 +50,6 @@ opal_convertor_master_t* opal_convertor_find_or_create_master( uint32_t remote_a void opal_convertor_destroy_masters( void ); -#if OPAL_ENABLE_DEBUG -extern bool opal_pack_debug; -extern bool opal_unpack_debug; -#endif /* OPAL_ENABLE_DEBUG */ - END_C_DECLS #endif /* OPAL_CONVERTOR_INTERNAL_HAS_BEEN_INCLUDED */ diff --git a/opal/datatype/opal_convertor_raw.c b/opal/datatype/opal_convertor_raw.c index 09019388127..5bea5dcf5b8 100644 --- a/opal/datatype/opal_convertor_raw.c +++ b/opal/datatype/opal_convertor_raw.c @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,29 +25,53 @@ #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" -#define DO_DEBUG(INST) if( opal_pack_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_raw_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ +/* Take a new iovec (base + len) and try to merge it with what we already + * have. If we succeed return 0 and move forward, otherwise save it into a new + * iovec location. If we need to advance position and we reach the end + * of the iovec array, return 1 to signal we did not saved the last iovec. + */ +static inline int +opal_convertor_merge_iov( struct iovec* iov, uint32_t* iov_count, + IOVBASE_TYPE* base, size_t len, + uint32_t* idx ) +{ + if( 0 != iov[*idx].iov_len ) { + if( (base == ((char*)iov[*idx].iov_base + iov[*idx].iov_len)) ) { + iov[*idx].iov_len += len; /* merge with previous iovec */ + return 0; + } /* cannot merge, move to the next position */ + *idx = *idx + 1; + if( *idx == *iov_count ) return 1; /* do not overwrite outside the iovec array boundaries */ + } + iov[*idx].iov_base = base; + iov[*idx].iov_len = len; + return 0; +} + /** * This function always work in local representation. This means no representation - * conversion (i.e. no heterogeneity) has to be taken into account, and that all + * conversion (i.e. no heterogeneity) is taken into account, and that all * length we're working on are local. */ int32_t opal_convertor_raw( opal_convertor_t* pConvertor, - struct iovec* iov, uint32_t* iov_count, - size_t* length ) + struct iovec* iov, uint32_t* iov_count, + size_t* length ) { const opal_datatype_t *pData = pConvertor->pDesc; dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ - uint32_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t do_now, blength; dt_elem_desc_t* description, *pElem; unsigned char *source_base; /* origin of the data */ - size_t raw_data = 0; /* sum of raw data lengths in the iov_len fields */ - uint32_t index = 0, i; /* the iov index and a simple counter */ + size_t sum_iov_len = 0; /* sum of raw data lengths in the iov_len fields */ + uint32_t index = 0; /* the iov index and a simple counter */ assert( (*iov_count) > 0 ); if( OPAL_LIKELY(pConvertor->flags & CONVERTOR_COMPLETED) ) { @@ -77,74 +101,96 @@ opal_convertor_raw( opal_convertor_t* pConvertor, description = pConvertor->use_desc->desc; /* For the first step we have to add both displacement to the source. After in the - * main while loop we will set back the source_base to the correct value. This is - * due to the fact that the convertor can stop in the middle of a data with a count - */ + * main while loop we will set back the source_base to the correct value. This is + * due to the fact that the convertor can stop in the middle of a data with a count + */ pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; source_base = pConvertor->pBaseBuf + pStack->disp; - count_desc = (uint32_t)pStack->count; + count_desc = pStack->count; pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); - source_base += pStack->disp; - DO_DEBUG( opal_output( 0, "raw start pos_desc %d count_desc %d disp %ld\n" - "stack_pos %d pos_desc %d count_desc %d disp %ld\n", + + DO_DEBUG( opal_output( 0, "raw start pos_desc %d count_desc %" PRIsize_t " disp %ld\n" + "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pos_desc, count_desc, (long)(source_base - pConvertor->pBaseBuf), - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + pConvertor->stack_pos, pStack->index, pStack->count, (long)pStack->disp ); ); + + iov[index].iov_len = 0; + /* Special case if we start from a position that is in the middle of a data element blocklen. + * We can treat this outside the loop as it is an exception that can only happen once, + * and will simplify the loop handling. + */ + if( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + const ddt_elem_desc_t* current = &(pElem->elem); + + if( count_desc != (current->count * current->blocklen) ) { /* Not the full element description */ + if( (do_now = count_desc % current->blocklen) ) { + do_now = current->blocklen - do_now; /* how much left in the block */ + source_base += current->disp; + blength = do_now * opal_datatype_basicDatatypes[current->common.type]->size; + OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, + pConvertor->pDesc, pConvertor->count ); + DO_DEBUG( opal_output( 0, "raw 1. iov[%d] = {base %p, length %" PRIsize_t "}\n", + index, (void*)source_base, blength ); ); + opal_convertor_merge_iov( iov, iov_count, + (IOVBASE_TYPE *) source_base, blength, &index ); + /* ignore the return value, we know there was at least one element in the iovec */ + sum_iov_len += blength; + count_desc -= do_now; + + source_base += (blength - current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size + + current->extent - current->disp); + } + } + } + while( 1 ) { while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { - size_t blength = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; - source_base += pElem->elem.disp; - if( blength == (size_t)pElem->elem.extent ) { /* no resized data */ - if( index < *iov_count ) { - blength *= count_desc; - /* now here we have a basic datatype */ - OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, - pConvertor->pDesc, pConvertor->count ); - DO_DEBUG( opal_output( 0, "raw 1. iov[%d] = {base %p, length %lu}\n", - index, (void*)source_base, (unsigned long)blength ); ); - iov[index].iov_base = (IOVBASE_TYPE *) source_base; - iov[index].iov_len = blength; - source_base += blength; - raw_data += blength; - index++; - count_desc = 0; - } - } else { - for( i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, - pConvertor->pDesc, pConvertor->count ); - DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %lu}\n", - index, (void*)source_base, (unsigned long)blength ); ); - iov[index].iov_base = (IOVBASE_TYPE *) source_base; - iov[index].iov_len = blength; - source_base += pElem->elem.extent; - raw_data += blength; - count_desc--; - } + const ddt_elem_desc_t* current = &(pElem->elem); + source_base += current->disp; + + do_now = current->count; + if( count_desc != (current->count * current->blocklen) ) { + do_now = count_desc / current->blocklen; + assert( 0 == (count_desc % current->blocklen) ); } - source_base -= pElem->elem.disp; + + blength = current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size; + for(size_t _i = 0; _i < do_now; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, + pConvertor->pDesc, pConvertor->count ); + DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %" PRIsize_t "}\n", + index, (void*)source_base, blength ); ); + if( opal_convertor_merge_iov( iov, iov_count, + (IOVBASE_TYPE *) source_base, blength, &index ) ) + break; /* no more iovec available, bail out */ + + source_base += current->extent; + sum_iov_len += blength; + count_desc -= current->blocklen; + } + if( 0 == count_desc ) { /* completed */ source_base = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); continue; } + source_base -= current->disp; goto complete_loop; } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ - DO_DEBUG( opal_output( 0, "raw end_loop count %d stack_pos %d" - " pos_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, - pos_desc, (long)pStack->disp, (unsigned long)raw_data ); ); + DO_DEBUG( opal_output( 0, "raw end_loop count %" PRIsize_t " stack_pos %d" + " pos_desc %d disp %ld space %" PRIsize_t "\n", + pStack->count, pConvertor->stack_pos, + pos_desc, (long)pStack->disp, sum_iov_len ); ); if( --(pStack->count) == 0 ) { /* end of loop */ - if( pConvertor->stack_pos == 0 ) { - /* we lie about the size of the next element in order to - * make sure we exit the main loop. - */ - *iov_count = index; - goto complete_loop; /* completed */ + if( 0 == pConvertor->stack_pos ) { + /* we're done. Force the exit of the main for loop (around iovec) */ + index++; /* account for the currently updating iovec */ + goto complete_loop; } pConvertor->stack_pos--; pStack--; @@ -155,52 +201,56 @@ opal_convertor_raw( opal_convertor_t* pConvertor, pStack->disp += (pData->ub - pData->lb); } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); - pStack->disp += description[pStack->index].loop.extent; + pStack->disp += description[pStack->index].loop.extent; /* jump by the loop extent */ } } source_base = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - DO_DEBUG( opal_output( 0, "raw new_loop count %d stack_pos %d " - "pos_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, - pos_desc, (long)pStack->disp, (unsigned long)raw_data ); ); + DO_DEBUG( opal_output( 0, "raw new_loop count %" PRIsize_t " stack_pos %d " + "pos_desc %d disp %ld space %" PRIsize_t "\n", + pStack->count, pConvertor->stack_pos, + pos_desc, (long)pStack->disp, sum_iov_len ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)source_base; ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)(pElem + pElem->loop.items); if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - uint32_t i; - source_base += end_loop->first_elem_disp; - for( i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) { + ptrdiff_t offset = end_loop->first_elem_disp; + source_base += offset; + for(; count_desc > 0; ) { OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, end_loop->size, pConvertor->pBaseBuf, - pConvertor->pDesc, pConvertor->count ); - iov[index].iov_base = (IOVBASE_TYPE *) source_base; - iov[index].iov_len = end_loop->size; + pConvertor->pDesc, pConvertor->count ); + if( opal_convertor_merge_iov( iov, iov_count, + (IOVBASE_TYPE *) source_base, end_loop->size, &index ) ) { + source_base -= offset; + goto complete_loop; + } + source_base += pElem->loop.extent; - raw_data += end_loop->size; + sum_iov_len += end_loop->size; count_desc--; + DO_DEBUG( opal_output( 0, "raw contig loop generate iov[%d] = {base %p, length %" PRIsize_t "}" + "space %" PRIsize_t " [pos_desc %d]\n", + index, iov[index].iov_base, iov[index].iov_len, + sum_iov_len, pos_desc ); ); } - source_base -= end_loop->first_elem_disp; - if( 0 == count_desc ) { /* completed */ - pos_desc += pElem->loop.items + 1; - goto update_loop_description; - } + source_base -= offset; + pos_desc += pElem->loop.items + 1; + } else { + local_disp = (ptrdiff_t)source_base - local_disp; + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, + pStack->disp + local_disp); + pos_desc++; } - local_disp = (ptrdiff_t)source_base - local_disp; - PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, - pStack->disp + local_disp); - pos_desc++; - update_loop_description: /* update the current state */ source_base = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); - continue; } } -complete_loop: - pConvertor->bConverted += raw_data; /* update the already converted bytes */ - *length = raw_data; + complete_loop: + pConvertor->bConverted += sum_iov_len; /* update the already converted bytes */ + *length = sum_iov_len; *iov_count = index; if( pConvertor->bConverted == pConvertor->local_size ) { pConvertor->flags |= CONVERTOR_COMPLETED; @@ -208,8 +258,8 @@ opal_convertor_raw( opal_convertor_t* pConvertor, } /* I complete an element, next step I should go to the next one */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc, - source_base - pStack->disp - pConvertor->pBaseBuf ); - DO_DEBUG( opal_output( 0, "raw save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n", - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + source_base - pConvertor->pBaseBuf ); + DO_DEBUG( opal_output( 0, "raw save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", + pConvertor->stack_pos, pStack->index, pStack->count, (long)pStack->disp ); ); return 0; } diff --git a/opal/datatype/opal_copy_functions.c b/opal/datatype/opal_copy_functions.c index 221d07a920c..1b96c78a6c3 100644 --- a/opal/datatype/opal_copy_functions.c +++ b/opal/datatype/opal_copy_functions.c @@ -4,8 +4,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * @@ -39,18 +39,17 @@ * Return value: Number of elements of type TYPE copied */ #define COPY_TYPE( TYPENAME, TYPE, COUNT ) \ -static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, \ - char* from, size_t from_len, ptrdiff_t from_extent, \ - char* to, size_t to_len, ptrdiff_t to_extent, \ - ptrdiff_t *advance) \ +static int copy_##TYPENAME( opal_convertor_t *pConvertor, size_t count, \ + char* from, size_t from_len, ptrdiff_t from_extent, \ + char* to, size_t to_len, ptrdiff_t to_extent, \ + ptrdiff_t *advance) \ { \ - uint32_t i; \ size_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \ size_t local_TYPE_size = (COUNT) * sizeof(TYPE); \ \ /* make sure the remote buffer is large enough to hold the data */ \ if( (remote_TYPE_size * count) > from_len ) { \ - count = (uint32_t)(from_len / remote_TYPE_size); \ + count = from_len / remote_TYPE_size; \ if( (count * remote_TYPE_size) != from_len ) { \ DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \ from_len - (count * remote_TYPE_size) ); \ @@ -67,7 +66,7 @@ static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, MEMCPY( to, from, count * local_TYPE_size ); \ } else { \ /* source or destination are non-contigous */ \ - for( i = 0; i < count; i++ ) { \ + for(size_t i = 0; i < count; i++ ) { \ MEMCPY( to, from, local_TYPE_size ); \ to += to_extent; \ from += from_extent; \ @@ -92,17 +91,16 @@ static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, * Return value: Number of elements of type TYPE copied */ #define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \ -static int copy_##TYPENAME##_##COUNT( opal_convertor_t *pConvertor, uint32_t count, \ - char* from, size_t from_len, ptrdiff_t from_extent, \ - char* to, size_t to_len, ptrdiff_t to_extent, \ - ptrdiff_t *advance ) \ +static size_t copy_##TYPENAME##_##COUNT( opal_convertor_t *pConvertor, size_t count, \ + char* from, size_t from_len, ptrdiff_t from_extent, \ + char* to, size_t to_len, ptrdiff_t to_extent, \ + ptrdiff_t *advance ) \ { \ - uint32_t i; \ size_t remote_TYPE_size = (size_t)(COUNT); /* TODO */ \ size_t local_TYPE_size = (size_t)(COUNT); \ \ if( (remote_TYPE_size * count) > from_len ) { \ - count = (uint32_t)(from_len / remote_TYPE_size); \ + count = from_len / remote_TYPE_size; \ if( (count * remote_TYPE_size) != from_len ) { \ DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \ from_len - (count * remote_TYPE_size) ); \ @@ -117,7 +115,7 @@ static int copy_##TYPENAME##_##COUNT( opal_convertor_t *pConvertor, uint32_t cou (to_extent == (ptrdiff_t)remote_TYPE_size) ) { \ MEMCPY( to, from, count * local_TYPE_size ); \ } else { \ - for( i = 0; i < count; i++ ) { \ + for(size_t i = 0; i < count; i++ ) { \ MEMCPY( to, from, local_TYPE_size ); \ to += to_extent; \ from += from_extent; \ diff --git a/opal/datatype/opal_copy_functions_heterogeneous.c b/opal/datatype/opal_copy_functions_heterogeneous.c index a46e87b4dde..83a3966008c 100644 --- a/opal/datatype/opal_copy_functions_heterogeneous.c +++ b/opal/datatype/opal_copy_functions_heterogeneous.c @@ -4,9 +4,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015-2017 Research Organization for Information Science - * Copyright (c) 2015-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -140,12 +139,12 @@ opal_dt_swap_long_double(void *to_p, const void *from_p, const size_t size, size #define COPY_TYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE ) \ static int32_t \ -copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ +copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, size_t count, \ const char* from, size_t from_len, ptrdiff_t from_extent, \ char* to, size_t to_length, ptrdiff_t to_extent, \ ptrdiff_t *advance) \ { \ - uint32_t i; \ + size_t i; \ \ datatype_check( #TYPE, sizeof(TYPE), sizeof(TYPE), &count, \ from, from_len, from_extent, \ @@ -188,12 +187,12 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, #define COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE) \ static int32_t \ -copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ +copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, size_t count, \ const char* from, size_t from_len, ptrdiff_t from_extent, \ char* to, size_t to_length, ptrdiff_t to_extent, \ ptrdiff_t *advance) \ { \ - uint32_t i; \ + size_t i; \ \ datatype_check( #TYPE, sizeof(TYPE), sizeof(TYPE), &count, \ from, from_len, from_extent, \ @@ -233,12 +232,12 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, #define COPY_2TYPE_HETEROGENEOUS( TYPENAME, TYPE1, TYPE2 ) \ static int32_t \ -copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ - const char* from, uint32_t from_len, ptrdiff_t from_extent, \ - char* to, uint32_t to_length, ptrdiff_t to_extent, \ +copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, size_t count, \ + const char* from, size_t from_len, ptrdiff_t from_extent, \ + char* to, size_t to_length, ptrdiff_t to_extent, \ ptrdiff_t *advance) \ { \ - uint32_t i; \ + size_t i; \ \ datatype_check( #TYPENAME, sizeof(TYPE1) + sizeof(TYPE2), \ sizeof(TYPE1) + sizeof(TYPE2), &count, \ @@ -276,13 +275,13 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ static inline void -datatype_check(char *type, size_t local_size, size_t remote_size, uint32_t *count, +datatype_check(char *type, size_t local_size, size_t remote_size, size_t *count, const char* from, size_t from_len, ptrdiff_t from_extent, char* to, size_t to_len, ptrdiff_t to_extent) { /* make sure the remote buffer is large enough to hold the data */ if( (remote_size * *count) > from_len ) { - *count = (uint32_t)(from_len / remote_size); + *count = from_len / remote_size; if( (*count * remote_size) != from_len ) { DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", from_len - (*count * remote_size) ); @@ -296,20 +295,18 @@ datatype_check(char *type, size_t local_size, size_t remote_size, uint32_t *coun } #define CXX_BOOL_COPY_LOOP(TYPE) \ - for( i = 0; i < count; i++ ) { \ + for(size_t i = 0; i < count; i++ ) { \ bool *to_real = (bool*) to; \ *to_real = *((TYPE*) from) == 0 ? false : true; \ to += to_extent; \ from += from_extent; \ } static int32_t -copy_cxx_bool_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, - const char* from, uint32_t from_len, ptrdiff_t from_extent, - char* to, uint32_t to_length, ptrdiff_t to_extent, +copy_cxx_bool_heterogeneous(opal_convertor_t *pConvertor, size_t count, + const char* from, size_t from_len, ptrdiff_t from_extent, + char* to, size_t to_length, ptrdiff_t to_extent, ptrdiff_t *advance) { - uint32_t i; - /* fix up the from extent */ if ((pConvertor->remoteArch & OPAL_ARCH_BOOLISxx) != (opal_local_arch & OPAL_ARCH_BOOLISxx)) { diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index 3605660fa1f..e1bc18c67f9 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -86,7 +86,7 @@ BEGIN_C_DECLS * associated type. */ #define MAX_DT_COMPONENT_COUNT UINT_MAX -typedef uint32_t opal_datatype_count_t; +typedef size_t opal_datatype_count_t; typedef union dt_elem_desc dt_elem_desc_t; @@ -119,7 +119,6 @@ struct opal_datatype_t { /* Attribute fields */ char name[OPAL_MAX_OBJECT_NAME]; /**< name of the datatype */ - /* --- cacheline 2 boundary (128 bytes) was 8-12 bytes ago --- */ dt_type_desc_t desc; /**< the data description */ dt_type_desc_t opt_desc; /**< short description of the data used when conversion is useless or in the send case (without conversion) */ @@ -225,13 +224,41 @@ opal_datatype_is_contiguous_memory_layout( const opal_datatype_t* datatype, int3 } -OPAL_DECLSPEC void opal_datatype_dump( const opal_datatype_t* pData ); +OPAL_DECLSPEC void +opal_datatype_dump( const opal_datatype_t* pData ); + /* data creation functions */ -OPAL_DECLSPEC int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t * dest_type ); -OPAL_DECLSPEC int32_t opal_datatype_create_contiguous( int count, const opal_datatype_t* oldType, opal_datatype_t** newType ); -OPAL_DECLSPEC int32_t opal_datatype_resize( opal_datatype_t* type, ptrdiff_t lb, ptrdiff_t extent ); -OPAL_DECLSPEC int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtAdd, size_t count, - ptrdiff_t disp, ptrdiff_t extent ); + +/** + * Create a duplicate of the source datatype. + */ +OPAL_DECLSPEC int32_t +opal_datatype_clone( const opal_datatype_t* src_type, + opal_datatype_t* dest_type ); +/** + * A contiguous array of identical datatypes. + */ +OPAL_DECLSPEC int32_t +opal_datatype_create_contiguous( int count, const opal_datatype_t* oldType, + opal_datatype_t** newType ); +/** + * Add a new datatype to the base type description. The count is the number + * repetitions of the same element to be added, and the extent is the extent + * of each element. The displacement is the initial displacement of the + * first element. + */ +OPAL_DECLSPEC int32_t +opal_datatype_add( opal_datatype_t* pdtBase, + const opal_datatype_t* pdtAdd, size_t count, + ptrdiff_t disp, ptrdiff_t extent ); + +/** + * Alter the lb and extent of an existing datatype in place. + */ +OPAL_DECLSPEC int32_t +opal_datatype_resize( opal_datatype_t* type, + ptrdiff_t lb, + ptrdiff_t extent ); static inline int32_t opal_datatype_type_lb( const opal_datatype_t* pData, ptrdiff_t* disp ) diff --git a/opal/datatype/opal_datatype_add.c b/opal/datatype/opal_datatype_add.c index 146ce12afe2..108b4e3d1be 100644 --- a/opal/datatype/opal_datatype_add.c +++ b/opal/datatype/opal_datatype_add.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -281,15 +281,23 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) { if( NULL != pdtBase->ptypes ) pdtBase->ptypes[pdtAdd->id] += count; + + pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED); pLast->elem.common.type = pdtAdd->id; - pLast->elem.count = count; pLast->elem.disp = disp; - pLast->elem.extent = extent; - pdtBase->desc.used++; - pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED); - if( (extent != (ptrdiff_t)pdtAdd->size) && (count > 1) ) { /* gaps around the datatype */ - pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS); + pLast->elem.extent = count * extent; + /* assume predefined datatypes without extent, aka. contiguous */ + pLast->elem.count = 1; + pLast->elem.blocklen = count; + if( extent != (ptrdiff_t)pdtAdd->size ) { /* not contiguous: let's fix */ + pLast->elem.count = count; + pLast->elem.blocklen = 1; + pLast->elem.extent = extent; + if( count > 1 ) { /* gaps around the predefined datatype */ + pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS); + } } + pdtBase->desc.used++; } else { /* keep trace of the total number of basic datatypes in the datatype definition */ pdtBase->loops += pdtAdd->loops; @@ -299,13 +307,40 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]); } - if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) && - (extent == pdtAdd->desc.desc[0].elem.extent) ){ + if( 1 == pdtAdd->desc.used ) { pLast->elem = pdtAdd->desc.desc[0].elem; - pLast->elem.count *= count; pLast->elem.disp += disp; + if( 1 == count ) { + /* Extent only has a meaning when there are multiple elements. Bail out */ + } else if( 1 == pLast->elem.count ) { + /* The size and true_extent of the added datatype are identical, signaling a datatype + * that is mostly contiguous with the exception of the initial and final gaps. These + * gaps do not matter here as they will amended (the initial gaps being shifted by the + * new displacement and the final gap being replaced with the new gap + */ + if( pdtAdd->desc.desc[0].elem.extent == extent ) { + /* pure bliss everything is fully contiguous and we can collapse + * everything by updating the blocklen and extent + */ + pLast->elem.blocklen *= count; + pLast->elem.extent *= count; + } else { + pLast->elem.count = count; + pLast->elem.extent = extent; + } + } else if( extent == (ptrdiff_t)(pLast->elem.count * pLast->elem.extent) ) { + /* It's just a repetition of the same element, increase the count */ + pLast->elem.count *= count; + } else { + /* No luck here, no optimization can be applied. Fall back to the + * normal case where we add a loop around the datatype. + */ + goto build_loop; + } pdtBase->desc.used++; } else { + +build_loop: /* if the extent of the datatype is the same as the extent of the loop * description of the datatype then we simply have to update the main loop. */ diff --git a/opal/datatype/opal_datatype_clone.c b/opal/datatype/opal_datatype_clone.c index fa4479982d0..59e82bb40cf 100644 --- a/opal/datatype/opal_datatype_clone.c +++ b/opal/datatype/opal_datatype_clone.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,6 +43,7 @@ int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t * sizeof(opal_datatype_t)-sizeof(opal_object_t) ); dest_type->flags &= (~OPAL_DATATYPE_FLAG_PREDEFINED); + dest_type->ptypes = NULL; dest_type->desc.desc = temp; /** diff --git a/opal/datatype/opal_datatype_copy.c b/opal/datatype/opal_datatype_copy.c index 7bf94ef97b9..c70bdd24dfa 100644 --- a/opal/datatype/opal_datatype_copy.c +++ b/opal/datatype/opal_datatype_copy.c @@ -36,7 +36,7 @@ #if OPAL_ENABLE_DEBUG -#define DO_DEBUG(INST) if( opal_copy_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_copy_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index 5dcfe2ec5d3..11058012e1e 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -4,8 +4,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,50 +43,45 @@ static inline void _predefined_data( const dt_elem_desc_t* ELEM, const opal_datatype_t* DATATYPE, unsigned char* SOURCE_BASE, size_t TOTAL_COUNT, - uint32_t COUNT, + size_t COUNT, unsigned char* SOURCE, unsigned char* DESTINATION, size_t* SPACE ) { - uint32_t _copy_count = (COUNT); - size_t _copy_blength; const ddt_elem_desc_t* _elem = &((ELEM)->elem); unsigned char* _source = (SOURCE) + _elem->disp; unsigned char* _destination = (DESTINATION) + _elem->disp; + size_t do_now = _elem->count, do_now_bytes; - _copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; + assert( (COUNT) == (do_now * _elem->blocklen)); - if( _copy_blength == (uint32_t)_elem->extent ) { - _copy_blength *= _copy_count; - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE), - (DATATYPE), (TOTAL_COUNT) ); - /* the extent and the size of the basic datatype are equals */ - DO_DEBUG( opal_output( 0, "copy 1. %s( %p, %p, %lu ) => space %lu\n", - STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); ); - MEM_OP( _destination, _source, _copy_blength ); - _source += _copy_blength; - _destination += _copy_blength; - } else { - uint32_t _i; - for( _i = 0; _i < _copy_count; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE), - (DATATYPE), (TOTAL_COUNT) ); - DO_DEBUG( opal_output( 0, "copy 2. %s( %p, %p, %lu ) => space %lu\n", - STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); - MEM_OP( _destination, _source, _copy_blength ); - _source += _elem->extent; - _destination += _elem->extent; - } - _copy_blength *= _copy_count; + /* We don't a prologue and epilogue here as we are __always__ working + * with full copies of the data description. + */ + + /** + * Compute how many full blocklen we need to do and do them. + */ + do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; + assert( (do_now * do_now_bytes) <= (*SPACE) ); + + for(size_t _i = 0; _i < do_now; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _source, do_now_bytes, (SOURCE_BASE), + (DATATYPE), (TOTAL_COUNT) ); + DO_DEBUG( opal_output( 0, "copy %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n", + STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, do_now_bytes, *(SPACE) - _i * do_now_bytes ); ); + MEM_OP( _destination, _source, do_now_bytes ); + _destination += _elem->extent; + _source += _elem->extent; } - *(SPACE) -= _copy_blength; + *(SPACE) -= (do_now_bytes * do_now); } static inline void _contiguous_loop( const dt_elem_desc_t* ELEM, const opal_datatype_t* DATATYPE, unsigned char* SOURCE_BASE, size_t TOTAL_COUNT, - uint32_t COUNT, + size_t COUNT, unsigned char* SOURCE, unsigned char* DESTINATION, size_t* SPACE ) @@ -96,7 +91,6 @@ static inline void _contiguous_loop( const dt_elem_desc_t* ELEM, unsigned char* _source = (SOURCE) + _end_loop->first_elem_disp; unsigned char* _destination = (DESTINATION) + _end_loop->first_elem_disp; size_t _copy_loops = (COUNT); - uint32_t _i; if( _loop->extent == (ptrdiff_t)_end_loop->size ) { /* the loop is contiguous */ _copy_loops *= _end_loop->size; @@ -104,11 +98,11 @@ static inline void _contiguous_loop( const dt_elem_desc_t* ELEM, (DATATYPE), (TOTAL_COUNT) ); MEM_OP( _destination, _source, _copy_loops ); } else { - for( _i = 0; _i < _copy_loops; _i++ ) { + for(size_t _i = 0; _i < _copy_loops; _i++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _end_loop->size, (SOURCE_BASE), (DATATYPE), (TOTAL_COUNT) ); - DO_DEBUG( opal_output( 0, "copy 3. %s( %p, %p, %lu ) => space %lu\n", - STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); + DO_DEBUG( opal_output( 0, "copy 3. %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n", + STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, _end_loop->size, *(SPACE) - _i * _end_loop->size ); ); MEM_OP( _destination, _source, _end_loop->size ); _source += _loop->extent; _destination += _loop->extent; @@ -149,12 +143,10 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i if( (ptrdiff_t)datatype->size == extent ) { /* all contiguous == no gaps around */ size_t total_length = iov_len_local; size_t memop_chunk = opal_datatype_memop_block_size; + OPAL_DATATYPE_SAFEGUARD_POINTER( source, iov_len_local, + (unsigned char*)source_base, datatype, count ); while( total_length > 0 ) { if( memop_chunk > total_length ) memop_chunk = total_length; - OPAL_DATATYPE_SAFEGUARD_POINTER( destination, memop_chunk, - (unsigned char*)destination_base, datatype, count ); - OPAL_DATATYPE_SAFEGUARD_POINTER( source, memop_chunk, - (unsigned char*)source_base, datatype, count ); DO_DEBUG( opal_output( 0, "copy c1. %s( %p, %p, %lu ) => space %lu\n", STRINGIFY(MEM_OP_NAME), (void*)destination, (void*)source, (unsigned long)memop_chunk, (unsigned long)total_length ); ); MEM_OP( destination, source, memop_chunk ); @@ -186,17 +178,12 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i pos_desc = 0; stack_pos = 0; - if( datatype->opt_desc.desc != NULL ) { - description = datatype->opt_desc.desc; - } else { + description = datatype->opt_desc.desc; + if( NULL == description ) { description = datatype->desc.desc; } - if( description[0].elem.common.type == OPAL_DATATYPE_LOOP ) - count_desc = description[0].loop.loops; - else - count_desc = description[0].elem.count; - pElem = &(description[pos_desc]); + UPDATE_INTERNAL_COUNTERS( description, 0, pElem, count_desc ); while( 1 ) { while( OPAL_LIKELY(pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) ) { @@ -207,8 +194,8 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ - DO_DEBUG( opal_output( 0, "copy end_loop count %d stack_pos %d pos_desc %d disp %ld space %lu\n", - (int)pStack->count, stack_pos, pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "copy end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %ld space %lu\n", + pStack->count, stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( stack_pos == 0 ) { assert( iov_len_local == 0 ); @@ -229,8 +216,8 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i source = (unsigned char*)source_base + pStack->disp; destination = (unsigned char*)destination_base + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - DO_DEBUG( opal_output( 0, "copy new_loop count %d stack_pos %d pos_desc %d disp %ld space %lu\n", - (int)pStack->count, stack_pos, pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "copy new_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %ld space %lu\n", + pStack->count, stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)source; diff --git a/opal/datatype/opal_datatype_create.c b/opal/datatype/opal_datatype_create.c index 0e6d49b9bd7..122521989b8 100644 --- a/opal/datatype/opal_datatype_create.c +++ b/opal/datatype/opal_datatype_create.c @@ -11,6 +11,8 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,7 +79,7 @@ static void opal_datatype_destruct( opal_datatype_t* datatype ) } } /* dont free the ptypes of predefined types (it was not dynamically allocated) */ - if( (NULL != datatype->ptypes) && (datatype->id >= OPAL_DATATYPE_MAX_PREDEFINED) ) { + if( (NULL != datatype->ptypes) && (!opal_datatype_is_predefined(datatype)) ) { free(datatype->ptypes); datatype->ptypes = NULL; } diff --git a/opal/datatype/opal_datatype_dump.c b/opal/datatype/opal_datatype_dump.c index d469f8291dc..7782a805d0a 100644 --- a/opal/datatype/opal_datatype_dump.c +++ b/opal/datatype/opal_datatype_dump.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -13,6 +13,8 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -62,7 +64,7 @@ int opal_datatype_dump_data_flags( unsigned short usflags, char* ptr, size_t len int index = 0; if( length < 22 ) return 0; index = snprintf( ptr, 22, "-----------[---][---]" ); /* set everything to - */ - if( usflags & OPAL_DATATYPE_FLAG_COMMITTED ) ptr[1] = 'c'; + if( usflags & OPAL_DATATYPE_FLAG_COMMITTED ) ptr[1] = 'c'; if( usflags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) ptr[2] = 'C'; if( usflags & OPAL_DATATYPE_FLAG_OVERLAP ) ptr[3] = 'o'; if( usflags & OPAL_DATATYPE_FLAG_USER_LB ) ptr[4] = 'l'; @@ -88,17 +90,17 @@ int opal_datatype_dump_data_desc( dt_elem_desc_t* pDesc, int nbElems, char* ptr, index += snprintf( ptr + index, length - index, "%15s ", opal_datatype_basicDatatypes[pDesc->elem.common.type]->name ); if( length <= (size_t)index ) break; if( OPAL_DATATYPE_LOOP == pDesc->elem.common.type ) - index += snprintf( ptr + index, length - index, "%d times the next %d elements extent %d\n", - (int)pDesc->loop.loops, (int)pDesc->loop.items, - (int)pDesc->loop.extent ); + index += snprintf( ptr + index, length - index, "%u times the next %u elements extent %td\n", + pDesc->loop.loops, pDesc->loop.items, + pDesc->loop.extent ); else if( OPAL_DATATYPE_END_LOOP == pDesc->elem.common.type ) - index += snprintf( ptr + index, length - index, "prev %d elements first elem displacement %ld size of data %d\n", - (int)pDesc->end_loop.items, (long)pDesc->end_loop.first_elem_disp, - (int)pDesc->end_loop.size ); + index += snprintf( ptr + index, length - index, "prev %u elements first elem displacement %td size of data %" PRIsize_t "\n", + pDesc->end_loop.items, pDesc->end_loop.first_elem_disp, + pDesc->end_loop.size ); else - index += snprintf( ptr + index, length - index, "count %d disp 0x%lx (%ld) blen %d extent %d (size %ld)\n", - (int)pDesc->elem.count, (long)pDesc->elem.disp, (long)pDesc->elem.disp, (int)pDesc->elem.blocklen, - (int)pDesc->elem.extent, (long)(pDesc->elem.count * opal_datatype_basicDatatypes[pDesc->elem.common.type]->size) ); + index += snprintf( ptr + index, length - index, "count %" PRIsize_t " disp 0x%tx (%td) blen %u extent %td (size %zd)\n", + pDesc->elem.count, pDesc->elem.disp, pDesc->elem.disp, pDesc->elem.blocklen, + pDesc->elem.extent, (pDesc->elem.count * pDesc->elem.blocklen * opal_datatype_basicDatatypes[pDesc->elem.common.type]->size) ); pDesc++; if( length <= (size_t)index ) break; @@ -116,13 +118,13 @@ void opal_datatype_dump( const opal_datatype_t* pData ) length = pData->opt_desc.used + pData->desc.used; length = length * 100 + 500; buffer = (char*)malloc( length ); - index += snprintf( buffer, length - index, "Datatype %p[%s] size %ld align %d id %d length %d used %d\n" - "true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n" - "nbElems %d loops %d flags %X (", - (void*)pData, pData->name, (long)pData->size, (int)pData->align, pData->id, (int)pData->desc.length, (int)pData->desc.used, - (long)pData->true_lb, (long)pData->true_ub, (long)(pData->true_ub - pData->true_lb), - (long)pData->lb, (long)pData->ub, (long)(pData->ub - pData->lb), - (int)pData->nbElems, (int)pData->loops, (int)pData->flags ); + index += snprintf( buffer, length - index, "Datatype %p[%s] size %" PRIsize_t " align %u id %u length %" PRIsize_t " used %" PRIsize_t "\n" + "true_lb %td true_ub %td (true_extent %td) lb %td ub %td (extent %td)\n" + "nbElems %" PRIsize_t " loops %u flags %X (", + (void*)pData, pData->name, pData->size, pData->align, (uint32_t)pData->id, pData->desc.length, pData->desc.used, + pData->true_lb, pData->true_ub, pData->true_ub - pData->true_lb, + pData->lb, pData->ub, pData->ub - pData->lb, + pData->nbElems, pData->loops, (int)pData->flags ); /* dump the flags */ if( pData->flags == OPAL_DATATYPE_FLAG_PREDEFINED ) index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/opal/datatype/opal_datatype_fake_stack.c b/opal/datatype/opal_datatype_fake_stack.c index 1cc05fe8860..bd1d919e374 100644 --- a/opal/datatype/opal_datatype_fake_stack.c +++ b/opal/datatype/opal_datatype_fake_stack.c @@ -11,8 +11,8 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,7 +47,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, size_t loop_length, *remoteLength, remote_size; size_t resting_place = starting_point; dt_elem_desc_t* pElems; - uint32_t count; + size_t count; assert( 0 != starting_point ); assert( pConvertor->bConverted != starting_point ); @@ -93,7 +93,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, /* remove from the main loop all the complete datatypes */ assert (! (pConvertor->flags & CONVERTOR_SEND)); remote_size = opal_convertor_compute_remote_size( pConvertor ); - count = (int32_t)(starting_point / remote_size); + count = starting_point / remote_size; resting_place -= (remote_size * count); pStack->count = pConvertor->count - count; pStack->index = -1; diff --git a/opal/datatype/opal_datatype_get_count.c b/opal/datatype/opal_datatype_get_count.c index ae085c42704..f75b86d0e2d 100644 --- a/opal/datatype/opal_datatype_get_count.c +++ b/opal/datatype/opal_datatype_get_count.c @@ -69,14 +69,14 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]); - local_size = pElems[pos_desc].elem.count * basic_type->size; + local_size = (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen) * basic_type->size; if( local_size >= iSize ) { local_size = iSize / basic_type->size; nbElems += (int32_t)local_size; iSize -= local_size * basic_type->size; return (iSize == 0 ? nbElems : -1); } - nbElems += pElems[pos_desc].elem.count; + nbElems += (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen); iSize -= local_size; pos_desc++; /* advance to the next data */ } @@ -131,7 +131,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]); - local_length = pElems[pos_desc].elem.count; + local_length = (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen); if( local_length >= count ) { *length += count * basic_type->size; return 0; @@ -188,8 +188,8 @@ int opal_datatype_compute_ptypes( opal_datatype_t* datatype ) } while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ - datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count; - nbElems += pElems[pos_desc].elem.count; + datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen; + nbElems += pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen; DUMP( " compute_ptypes-add: type %d count %"PRIsize_t" (total type %"PRIsize_t" total %lld)\n", pElems[pos_desc].elem.common.type, datatype->ptypes[pElems[pos_desc].elem.common.type], diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index bc3f8aa7cab..bdeb0cc429e 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2018 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -215,19 +215,23 @@ union dt_elem_desc { /** - * Create one or more elements depending on the value of _count. If the value - * is too large for the type of elem.count then use oth the elem.count and - * elem.blocklen to create it. If the number is prime then create a second - * element to account for the difference. + * Create an element entry in the description. If the element is contiguous + * collapse everything into the blocklen. */ -#define CREATE_ELEM( _place, _type, _flags, _count, _disp, _extent ) \ +#define CREATE_ELEM(_place, _type, _flags, _blocklen, _count, _disp, _extent) \ do { \ (_place)->elem.common.flags = (_flags) | OPAL_DATATYPE_FLAG_DATA; \ (_place)->elem.common.type = (_type); \ - (_place)->elem.disp = (_disp); \ - (_place)->elem.extent = (_extent); \ + (_place)->elem.blocklen = (_blocklen); \ (_place)->elem.count = (_count); \ - (_place)->elem.blocklen = 1; \ + (_place)->elem.extent = (_extent); \ + (_place)->elem.disp = (_disp); \ + if( _extent == (ptrdiff_t)(_blocklen * opal_datatype_basicDatatypes[_type]->size) ) { \ + /* collapse it into a single large blocklen */ \ + (_place)->elem.blocklen *= _count; \ + (_place)->elem.extent *= _count; \ + (_place)->elem.count = 1; \ + } \ } while(0) /* * This array holds the descriptions desc.desc[2] of the predefined basic datatypes. @@ -480,22 +484,23 @@ static inline int GET_FIRST_NON_LOOP( const union dt_elem_desc* _pElem ) } #define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \ - do { \ - (ELEMENT) = &((DESCRIPTION)[(POSITION)]); \ - if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \ - (COUNTER) = (ELEMENT)->loop.loops; \ - else \ - (COUNTER) = (ELEMENT)->elem.count; \ + do { \ + (ELEMENT) = &((DESCRIPTION)[(POSITION)]); \ + if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \ + (COUNTER) = (ELEMENT)->loop.loops; \ + else \ + (COUNTER) = (ELEMENT)->elem.count * (ELEMENT)->elem.blocklen; \ } while (0) OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datatype_t* pData, char* ptr, size_t length ); OPAL_DECLSPEC int opal_datatype_dump_data_flags( unsigned short usflags, char* ptr, size_t length ); OPAL_DECLSPEC int opal_datatype_dump_data_desc( union dt_elem_desc* pDesc, int nbElems, char* ptr, size_t length ); -#if OPAL_ENABLE_DEBUG -extern bool opal_position_debug; -extern bool opal_copy_debug; -#endif /* OPAL_ENABLE_DEBUG */ +extern bool opal_ddt_position_debug; +extern bool opal_ddt_copy_debug; +extern bool opal_ddt_unpack_debug; +extern bool opal_ddt_pack_debug; +extern bool opal_ddt_raw_debug; END_C_DECLS #endif /* OPAL_DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED */ diff --git a/opal/datatype/opal_datatype_module.c b/opal/datatype/opal_datatype_module.c index 2d8dedc94e7..ba933b5fe2b 100644 --- a/opal/datatype/opal_datatype_module.c +++ b/opal/datatype/opal_datatype_module.c @@ -37,10 +37,11 @@ /* by default the debuging is turned off */ int opal_datatype_dfd = -1; -bool opal_unpack_debug = false; -bool opal_pack_debug = false; -bool opal_position_debug = false; -bool opal_copy_debug = false; +bool opal_ddt_unpack_debug = false; +bool opal_ddt_pack_debug = false; +bool opal_ddt_position_debug = false; +bool opal_ddt_copy_debug = false; +bool opal_ddt_raw_debug = false; int opal_ddt_verbose = -1; /* Has the datatype verbose it's own output stream */ extern int opal_cuda_verbose; @@ -148,35 +149,43 @@ int opal_datatype_register_params(void) int ret; ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_unpack_debug", - "Whether to output debugging information in the ddt unpack functions (nonzero = enabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, &opal_unpack_debug); + "Whether to output debugging information in the ddt unpack functions (nonzero = enabled)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_unpack_debug); if (0 > ret) { - return ret; + return ret; } ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_pack_debug", - "Whether to output debugging information in the ddt pack functions (nonzero = enabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, &opal_pack_debug); + "Whether to output debugging information in the ddt pack functions (nonzero = enabled)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_pack_debug); if (0 > ret) { - return ret; + return ret; + } + + ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_raw_debug", + "Whether to output debugging information in the ddt raw functions (nonzero = enabled)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_raw_debug); + if (0 > ret) { + return ret; } ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_position_debug", - "Non zero lead to output generated by the datatype position functions", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, &opal_position_debug); + "Non zero lead to output generated by the datatype position functions", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_position_debug); if (0 > ret) { - return ret; + return ret; } ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_copy_debug", - "Whether to output debugging information in the ddt copy functions (nonzero = enabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, &opal_copy_debug); + "Whether to output debugging information in the ddt copy functions (nonzero = enabled)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_copy_debug); if (0 > ret) { - return ret; + return ret; } ret = mca_base_var_register ("opal", "opal", NULL, "ddt_verbose", @@ -195,7 +204,7 @@ int opal_datatype_register_params(void) OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL, &opal_cuda_verbose); if (0 > ret) { - return ret; + return ret; } #endif @@ -224,8 +233,8 @@ int32_t opal_datatype_init( void ) OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS; datatype->desc.desc[0].elem.common.type = i; - /* datatype->desc.desc[0].elem.blocklen XXX not set at the moment, it will be needed later */ datatype->desc.desc[0].elem.count = 1; + datatype->desc.desc[0].elem.blocklen = 1; datatype->desc.desc[0].elem.disp = 0; datatype->desc.desc[0].elem.extent = datatype->size; diff --git a/opal/datatype/opal_datatype_monotonic.c b/opal/datatype/opal_datatype_monotonic.c index b467d95ecbe..247fd66142d 100644 --- a/opal/datatype/opal_datatype_monotonic.c +++ b/opal/datatype/opal_datatype_monotonic.c @@ -2,6 +2,9 @@ /* * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2019 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,35 +21,43 @@ #include "opal/datatype/opal_datatype_internal.h" #include "opal/datatype/opal_convertor.h" +#define OPAL_DATATYPE_MAX_MONOTONIC_IOVEC 32 + +/** + * Check if the datatype describes a memory layout where the pointers to + * the contiguous pieces are always advancing in the same direction, i.e. + * there is no potential for overlap. + */ int32_t opal_datatype_is_monotonic(opal_datatype_t* type ) { + struct iovec iov[OPAL_DATATYPE_MAX_MONOTONIC_IOVEC]; + ptrdiff_t upper_limit = (ptrdiff_t)type->true_lb; /* as conversion base will be NULL the first address is true_lb */ + size_t max_data = 0x7FFFFFFF; opal_convertor_t *pConv; + bool monotonic = true; uint32_t iov_count; - struct iovec iov[5]; - size_t max_data = 0; - long prev = -1; int rc; - bool monotonic = true; pConv = opal_convertor_create( opal_local_arch, 0 ); if (OPAL_UNLIKELY(NULL == pConv)) { - return 0; + return -1; } rc = opal_convertor_prepare_for_send( pConv, type, 1, NULL ); if( OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { OBJ_RELEASE(pConv); - return 0; + return -1; } do { - iov_count = 5; + iov_count = OPAL_DATATYPE_MAX_MONOTONIC_IOVEC; rc = opal_convertor_raw( pConv, iov, &iov_count, &max_data); - for (uint32_t i=0; icommon.flags = OPAL_DATATYPE_FLAG_BASIC; \ - _elem->common.type = OPAL_DATATYPE_LOOP; \ - _elem->count = 0; \ - _elem->disp = 0; \ - _elem->extent = 0; \ - } while (0) - static int32_t opal_datatype_optimize_short( opal_datatype_t* pData, - int32_t count, + size_t count, dt_type_desc_t* pTypeDesc ) { dt_elem_desc_t* pElemDesc; - ddt_elem_desc_t opt_elem; - dt_stack_t* pOrigStack; - dt_stack_t* pStack; /* pointer to the position on the stack */ - int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ - int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1; - int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity; - ptrdiff_t total_disp = 0, last_extent = 1, last_disp = 0; - uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */ - uint32_t i; - size_t last_length = 0; + dt_stack_t *pOrigStack, *pStack; /* pointer to the position on the stack */ + int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ + int32_t stack_pos = 0; + int32_t nbElems = 0; + ptrdiff_t total_disp = 0; + ddt_elem_desc_t last = {.common.flags = 0xFFFF /* all on */, .count = 0, .disp = 0}, compress; + ddt_elem_desc_t* current; pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->loops+2) ); SAVE_STACK( pStack, -1, 0, count, 0 ); @@ -64,186 +51,205 @@ opal_datatype_optimize_short( opal_datatype_t* pData, pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length ); pTypeDesc->used = 0; - SET_EMPTY_ELEMENT( &opt_elem ); assert( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pData->desc.used].elem.common.type ); - opt_elem.common.type = OPAL_DATATYPE_LOOP; - opt_elem.common.flags = 0xFFFF; /* keep all for the first datatype */ - opt_elem.count = 0; - opt_elem.disp = pData->desc.desc[pData->desc.used].end_loop.first_elem_disp; - opt_elem.extent = 0; while( stack_pos >= 0 ) { if( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */ ddt_endloop_desc_t* end_loop = &(pData->desc.desc[pos_desc].end_loop); - if( last_length != 0 ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); + if( 0 != last.count ) { + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); pElemDesc++; nbElems++; - last_disp += last_length; - last_length = 0; + last.count= 0; } CREATE_LOOP_END( pElemDesc, nbElems - pStack->index + 1, /* # of elems in this loop */ end_loop->first_elem_disp, end_loop->size, end_loop->common.flags ); - pElemDesc++; nbElems++; if( --stack_pos >= 0 ) { /* still something to do ? */ ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop); - pStartLoop->items = end_loop->items; + pStartLoop->items = pElemDesc->end_loop.items; total_disp = pStack->disp; /* update the displacement position */ } + pElemDesc++; nbElems++; pStack--; /* go down one position on the stack */ pos_desc++; continue; } if( OPAL_DATATYPE_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]); - ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]); int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) ); - ptrdiff_t loop_disp = pData->desc.desc[pos_desc + index].elem.disp; - continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) - == (total_disp + loop_disp)); if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - /* the loop is contiguous or composed by contiguous elements with a gap */ - if( loop->extent == (ptrdiff_t)end_loop->size ) { - /* the whole loop is contiguous */ - if( !continuity ) { - if( 0 != last_length ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, - last_length, last_disp, last_extent ); - pElemDesc++; nbElems++; - last_length = 0; - } - last_disp = total_disp + loop_disp; - } - last_length = (last_length * opal_datatype_basicDatatypes[last_type]->size - + loop->loops * end_loop->size); - last_type = OPAL_DATATYPE_UINT1; - last_extent = 1; - } else { - int counter = loop->loops; - ptrdiff_t merged_disp = 0; - /* if the previous data is contiguous with this piece and it has a length not ZERO */ - if( last_length != 0 ) { - if( continuity ) { - last_length *= opal_datatype_basicDatatypes[last_type]->size; - last_length += end_loop->size; - last_type = OPAL_DATATYPE_UINT1; - last_extent = 1; - counter--; - merged_disp = loop->extent; /* merged loop, update the disp of the remaining elems */ - } - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, - last_length, last_disp, last_extent ); - pElemDesc++; nbElems++; - last_disp += last_length; - last_length = 0; - last_type = OPAL_DATATYPE_LOOP; - } - /** - * The content of the loop is contiguous (maybe with a gap before or after). - * - * If any of the loops have been merged with the previous element, then the - * displacement of the first element (or the displacement of all elements if the - * loop will be removed) must be updated accordingly. - */ - if( counter <= 2 ) { - merged_disp += end_loop->first_elem_disp; - while( counter > 0 ) { - CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC, - end_loop->size, merged_disp, 1); - pElemDesc++; nbElems++; counter--; - merged_disp += loop->extent; - } - } else { - CREATE_LOOP_START( pElemDesc, counter, 2, loop->extent, loop->common.flags ); - pElemDesc++; nbElems++; - CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC, - end_loop->size, loop_disp, 1); - pElemDesc++; nbElems++; - CREATE_LOOP_END( pElemDesc, 2, end_loop->first_elem_disp + merged_disp, - end_loop->size, end_loop->common.flags ); - pElemDesc++; nbElems++; + ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]); + + assert(pData->desc.desc[pos_desc + index].elem.disp == end_loop->first_elem_disp); + compress.common.flags = loop->common.flags; + compress.common.type = pData->desc.desc[pos_desc + index].elem.common.type; + compress.blocklen = pData->desc.desc[pos_desc + index].elem.blocklen; + for( uint32_t i = index+1; i < loop->items; i++ ) { + current = &pData->desc.desc[pos_desc + i].elem; + assert(1 == current->count); + if( (current->common.type == OPAL_DATATYPE_LOOP) || + compress.common.type != current->common.type ) { + compress.common.type = OPAL_DATATYPE_UINT1; + compress.blocklen = end_loop->size; + break; } + compress.blocklen += current->blocklen; } - pos_desc += loop->items + 1; - } else { - ddt_elem_desc_t* elem = (ddt_elem_desc_t*)&(pData->desc.desc[pos_desc+1]); - if( last_length != 0 ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); - pElemDesc++; nbElems++; - last_disp += last_length; - last_length = 0; - last_type = OPAL_DATATYPE_LOOP; + compress.count = loop->loops; + compress.extent = loop->extent; + compress.disp = end_loop->first_elem_disp; + if( compress.extent == (ptrdiff_t)(compress.blocklen * opal_datatype_basicDatatypes[compress.common.type]->size) ) { + /* The compressed element is contiguous: collapse it into a single large blocklen */ + compress.blocklen *= compress.count; + compress.extent *= compress.count; + compress.count = 1; } - if( 2 == loop->items ) { /* small loop */ - if( (1 == elem->count) - && (elem->extent == (ptrdiff_t)opal_datatype_basicDatatypes[elem->common.type]->size) ) { - CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags & ~OPAL_DATATYPE_FLAG_CONTIGUOUS, - loop->loops, elem->disp, loop->extent ); + /** + * The current loop has been compressed and can now be treated as if it + * was a data element. We can now look if it can be fused with last, + * as done in the fusion of 2 elements below. Let's use the same code. + */ + pos_desc += loop->items + 1; + current = &compress; + goto fuse_loops; + } + + /** + * If the content of the loop is not contiguous there is little we can do + * that would not incur significant optimization cost and still be beneficial + * in reducing the number of memcpy during pack/unpack. + */ + + if( 0 != last.count ) { /* Generate the pending element */ + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); + pElemDesc++; nbElems++; + last.count = 0; + last.common.type = OPAL_DATATYPE_LOOP; + } + + /* Can we unroll the loop? */ + if( (loop->items <= 3) && (loop->loops <= 2) ) { + ptrdiff_t elem_displ = 0; + for( uint32_t i = 0; i < loop->loops; i++ ) { + for( uint32_t j = 0; j < (loop->items - 1); j++ ) { + current = &pData->desc.desc[pos_desc + index + j].elem; + CREATE_ELEM( pElemDesc, current->common.type, current->common.flags, + current->blocklen, current->count, current->disp + elem_displ, current->extent ); pElemDesc++; nbElems++; - pos_desc += loop->items + 1; - goto complete_loop; - } else if( loop->loops < 3 ) { - ptrdiff_t elem_displ = elem->disp; - for( i = 0; i < loop->loops; i++ ) { - CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags, - elem->count, elem_displ, elem->extent ); - elem_displ += loop->extent; - pElemDesc++; nbElems++; - } - pos_desc += loop->items + 1; - goto complete_loop; } + elem_displ += loop->extent; } - CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags ); - pElemDesc++; nbElems++; - PUSH_STACK( pStack, stack_pos, nbElems, OPAL_DATATYPE_LOOP, loop->loops, total_disp ); - pos_desc++; - DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" ); + pos_desc += loop->items + 1; + goto complete_loop; } + + CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags ); + pElemDesc++; nbElems++; + PUSH_STACK( pStack, stack_pos, nbElems, OPAL_DATATYPE_LOOP, loop->loops, total_disp ); + pos_desc++; + DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" ); + complete_loop: total_disp = pStack->disp; /* update the displacement */ continue; } - while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */ - /* now here we have a basic datatype */ - type = pData->desc.desc[pos_desc].elem.common.type; - continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) - == (total_disp + pData->desc.desc[pos_desc].elem.disp)); + while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* go over all basic datatype elements */ + current = &pData->desc.desc[pos_desc].elem; + pos_desc++; /* point to the next element as current points to the current one */ - if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity && - (pData->desc.desc[pos_desc].elem.extent == (int32_t)opal_datatype_basicDatatypes[type]->size) ) { - if( type == last_type ) { - last_length += pData->desc.desc[pos_desc].elem.count; - last_extent = pData->desc.desc[pos_desc].elem.extent; - } else { - if( last_length == 0 ) { - last_type = type; - last_length = pData->desc.desc[pos_desc].elem.count; - last_extent = pData->desc.desc[pos_desc].elem.extent; - } else { - last_length = last_length * opal_datatype_basicDatatypes[last_type]->size + - pData->desc.desc[pos_desc].elem.count * opal_datatype_basicDatatypes[type]->size; - last_type = OPAL_DATATYPE_UINT1; - last_extent = 1; + fuse_loops: + if( 0 == last.count ) { /* first data of the datatype */ + last = *current; + continue; /* next data */ + } else { /* can we merge it in order to decrease count */ + if( (ptrdiff_t)last.blocklen * (ptrdiff_t)opal_datatype_basicDatatypes[last.common.type]->size == last.extent ) { + last.extent *= last.count; + last.blocklen *= last.count; + last.count = 1; + } + } + + /* are the two elements compatible: aka they have very similar values and they + * can be merged together by increasing the count, and/or changing the extent. + */ + if( (last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) == + (current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size) ) { + ddt_elem_desc_t save = last; /* safekeep the type and blocklen */ + if( last.common.type != current->common.type ) { + last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size; + last.common.type = OPAL_DATATYPE_UINT1; + } + + if( (last.extent * (ptrdiff_t)last.count + last.disp) == current->disp ) { + if( 1 == current->count ) { + last.count++; + continue; } + if( last.extent == current->extent ) { + last.count += current->count; + continue; + } + } + if( 1 == last.count ) { + /* we can ignore the extent of the element with count == 1 and merge them together if their displacements match */ + if( 1 == current->count ) { + last.extent = current->disp - last.disp; + last.count++; + continue; + } + /* can we compute a matching displacement ? */ + if( (last.disp + current->extent) == current->disp ) { + last.extent = current->extent; + last.count = current->count + last.count; + continue; + } + } + last.blocklen = save.blocklen; + last.common.type = save.common.type; + /* try other optimizations */ + } + /* are the elements fusionable such that we can fusion the last blocklen of one with the first + * blocklen of the other. + */ + if( (ptrdiff_t)(last.disp + (last.count - 1) * last.extent + last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) == + current->disp ) { + if( last.count != 1 ) { + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count - 1, last.disp, last.extent ); + pElemDesc++; nbElems++; + last.disp += (last.count - 1) * last.extent; + last.count = 1; } - last_flags &= pData->desc.desc[pos_desc].elem.common.flags; - } else { - if( last_length != 0 ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); + if( last.common.type == current->common.type ) { + last.blocklen += current->blocklen; + } else { + last.blocklen = ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) + + (current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)); + last.common.type = OPAL_DATATYPE_UINT1; + } + last.extent += current->extent; + if( current->count != 1 ) { + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); pElemDesc++; nbElems++; + last = *current; + last.count -= 1; + last.disp += last.extent; } - last_disp = total_disp + pData->desc.desc[pos_desc].elem.disp; - last_length = pData->desc.desc[pos_desc].elem.count; - last_extent = pData->desc.desc[pos_desc].elem.extent; - last_type = type; + continue; } - pos_desc++; /* advance to the next data */ + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); + pElemDesc++; nbElems++; + last = *current; } } - if( last_length != 0 ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); + if( 0 != last.count ) { + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); pElemDesc++; nbElems++; } /* cleanup the stack */ diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index 9af53f4dd58..6dc0b81a253 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,7 +31,7 @@ #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" -#define DO_DEBUG(INST) if( opal_pack_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_pack_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ @@ -53,8 +53,6 @@ #endif /* defined(CHECKSUM) */ -#define IOVEC_MEM_LIMIT 8192 - /* the contig versions does not use the stack. They can easily retrieve * the status with just the informations from pConvertor->bConverted. */ @@ -68,9 +66,8 @@ opal_pack_homogeneous_contig_function( opal_convertor_t* pConv, unsigned char *source_base = NULL; uint32_t iov_count; size_t length = pConv->local_size - pConv->bConverted, initial_amount = pConv->bConverted; - ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; - source_base = (pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp); + source_base = (pConv->pBaseBuf + pConv->pDesc->true_lb + pStack[0].disp + pStack[1].disp); /* There are some optimizations that can be done if the upper level * does not provide a buffer. @@ -111,154 +108,117 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv, uint32_t* out_size, size_t* max_data ) { + size_t remaining, length, initial_bytes_converted = pConv->bConverted; const opal_datatype_t* pData = pConv->pDesc; dt_stack_t* stack = pConv->pStack; + ptrdiff_t extent = pData->ub - pData->lb; unsigned char *user_memory, *packed_buffer; - uint32_t i, index, iov_count; - size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted; - ptrdiff_t extent= pData->ub - pData->lb; - ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; + uint32_t idx; + size_t i; + /* The memory layout is contiguous with gaps in the begining and at the end. The datatype true_lb + * is the initial displacement, the size the length of the contiguous area and the extent represent + * how much we should jump between elements. + */ assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((ptrdiff_t)pData->size != extent) ); + assert( pData->opt_desc.used <= 1 ); DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n", (void*)pConv->pBaseBuf, *out_size ); ); if( stack[1].type != opal_datatype_uint1.id ) { stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size; - stack[1].type = opal_datatype_uint1.id; + stack[1].type = opal_datatype_uint1.id; + } + /* We can provide directly the pointers in the user buffers (like the convertor_raw) */ + if( NULL == iov[0].iov_base ) { + user_memory = pConv->pBaseBuf + pData->true_lb; + + for( idx = 0; (idx < (*out_size)) && stack[0].count; idx++ ) { + iov[idx].iov_base = user_memory + stack[0].disp + stack[1].disp; + iov[idx].iov_len = stack[1].count; + COMPUTE_CSUM( iov[idx].iov_base, iov[idx].iov_len, pConv ); + + pConv->bConverted += stack[1].count; + + stack[0].disp += extent; + stack[0].count--; + stack[1].disp = 0; + stack[1].count = pData->size; /* we might need this to update the partial + * length for the first iteration */ + } + goto update_status_and_return; } - /* There are some optimizations that can be done if the upper level - * does not provide a buffer. - */ - for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { + for( idx = 0; idx < (*out_size); idx++ ) { /* Limit the amount of packed data to the data left over on this convertor */ remaining = pConv->local_size - pConv->bConverted; if( 0 == remaining ) break; /* we're done this time */ - if( remaining > (uint32_t)iov[iov_count].iov_len ) - remaining = iov[iov_count].iov_len; - packed_buffer = (unsigned char *)iov[iov_count].iov_base; - bConverted = remaining; /* how much will get unpacked this time */ - user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp + stack[1].disp; - i = pConv->count - stack[0].count; /* how many we already packed */ - assert(i == ((uint32_t)(pConv->bConverted / pData->size))); - - if( packed_buffer == NULL ) { - /* special case for small data. We avoid allocating memory if we - * can fill the iovec directly with the address of the remaining - * data. - */ - if( (uint32_t)stack->count < ((*out_size) - iov_count) ) { - stack[1].count = pData->size - (pConv->bConverted % pData->size); - for( index = iov_count; i < pConv->count; i++, index++ ) { - iov[index].iov_base = (IOVBASE_TYPE *) user_memory; - iov[index].iov_len = stack[1].count; - stack[0].disp += extent; - pConv->bConverted += stack[1].count; - stack[1].disp = 0; /* reset it for the next round */ - stack[1].count = pData->size; - user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp; - COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv ); - } - *out_size = iov_count + index; - *max_data = (pConv->bConverted - initial_bytes_converted); - pConv->flags |= CONVERTOR_COMPLETED; - return 1; /* we're done */ - } - /* now special case for big contiguous data with gaps around */ - if( pData->size >= IOVEC_MEM_LIMIT ) { - /* as we dont have to copy any data, we can simply fill the iovecs - * with data from the user data description. - */ - for( index = iov_count; (i < pConv->count) && (index < (*out_size)); - i++, index++ ) { - if( remaining < pData->size ) { - iov[index].iov_base = (IOVBASE_TYPE *) user_memory; - iov[index].iov_len = remaining; - remaining = 0; - COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv ); - break; - } else { - iov[index].iov_base = (IOVBASE_TYPE *) user_memory; - iov[index].iov_len = pData->size; - user_memory += extent; - COMPUTE_CSUM( iov[index].iov_base, (size_t)iov[index].iov_len, pConv ); - } - remaining -= iov[index].iov_len; - pConv->bConverted += iov[index].iov_len; - } - *out_size = index; - *max_data = (pConv->bConverted - initial_bytes_converted); - if( pConv->bConverted == pConv->local_size ) { - pConv->flags |= CONVERTOR_COMPLETED; - return 1; - } - return 0; + if( remaining > iov[idx].iov_len ) + remaining = iov[idx].iov_len; + packed_buffer = (unsigned char *)iov[idx].iov_base; + pConv->bConverted += remaining; + user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp; + + DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( user_memory %p, packed_buffer %p length %" PRIsize_t "\n", + (void*)user_memory, (void*)packed_buffer, remaining ); ); + + length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last pack */ + /* data left from last round and enough space in the buffer */ + if( (pData->size != length) && (length <= remaining)) { + /* copy the partial left-over from the previous round */ + OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf, + pData, pConv->count ); + DO_DEBUG( opal_output( 0, "pack dest %p src %p length %" PRIsize_t " [prologue]\n", + (void*)user_memory, (void*)packed_buffer, length ); ); + MEMCPY_CSUM( packed_buffer, user_memory, length, pConv ); + packed_buffer += length; + remaining -= length; + stack[1].count -= length; + stack[1].disp += length; /* just in case, we overwrite this below */ + if( 0 == stack[1].count) { /* one completed element */ + stack[0].count--; + stack[0].disp += extent; + if( 0 == stack[0].count ) /* not yet done */ + break; + stack[1].count = pData->size; + stack[1].disp = 0; } + user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp; } - { - DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); - - length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last pack */ - /* data left from last round and enough space in the buffer */ - if( (0 != length) && (length <= remaining)) { - /* copy the partial left-over from the previous round */ - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "2. pack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)length ); ); - MEMCPY_CSUM( packed_buffer, user_memory, length, pConv ); - packed_buffer += length; - user_memory += (extent - pData->size + length); - remaining -= length; - stack[1].count -= length; - if( 0 == stack[1].count) { /* one completed element */ - stack[0].count--; - stack[0].disp += extent; - if( 0 != stack[0].count ) { /* not yet done */ - stack[1].count = pData->size; - stack[1].disp = 0; - } - } - } - for( i = 0; pData->size <= remaining; i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "3. pack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)pData->size ); ); - MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv ); - packed_buffer += pData->size; - user_memory += extent; - remaining -= pData->size; - } - stack[0].count -= i; /* the filled up and the entire types */ - stack[0].disp += (i * extent); - stack[1].disp += remaining; - /* Copy the last bits */ - if( 0 != remaining ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "4. pack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); - MEMCPY_CSUM( packed_buffer, user_memory, remaining, pConv ); - user_memory += remaining; - stack[1].count -= remaining; - } + for( i = 0; pData->size <= remaining; i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf, + pData, pConv->count ); + DO_DEBUG( opal_output( 0, "pack dest %p src %p length %" PRIsize_t " [%" PRIsize_t "/%" PRIsize_t "\n", + (void*)user_memory, (void*)packed_buffer, pData->size, remaining, iov[idx].iov_len ); ); + MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv ); + packed_buffer += pData->size; + user_memory += extent; + remaining -= pData->size; + } + stack[0].count -= i; /* the entire datatype copied above */ + stack[0].disp += (i * extent); + + /* Copy the last bits */ + if( 0 != remaining ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf, + pData, pConv->count ); + DO_DEBUG( opal_output( 0, "4. pack dest %p src %p length %" PRIsize_t "\n", + (void*)user_memory, (void*)packed_buffer, remaining ); ); + MEMCPY_CSUM( packed_buffer, user_memory, remaining, pConv ); + stack[1].count -= remaining; + stack[1].disp += remaining; /* keep the += in case we are copying less that the datatype size */ if( 0 == stack[1].count ) { /* prepare for the next element */ stack[1].count = pData->size; stack[1].disp = 0; } } - pConv->bConverted += bConverted; - } - *out_size = iov_count; - *max_data = (pConv->bConverted - initial_bytes_converted); - if( pConv->bConverted == pConv->local_size ) { - pConv->flags |= CONVERTOR_COMPLETED; - return 1; } - return 0; + + update_status_and_return: + *out_size = idx; + *max_data = pConv->bConverted - initial_bytes_converted; + if( pConv->bConverted == pConv->local_size ) pConv->flags |= CONVERTOR_COMPLETED; + return !!(pConv->flags & CONVERTOR_COMPLETED); /* done or not */ } /* The pack/unpack functions need a cleanup. I have to create a proper interface to access @@ -278,7 +238,7 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ - uint32_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t count_desc; /* the number of items already done in the actual pos_desc */ size_t total_packed = 0; /* total amount packed this time */ dt_elem_desc_t* description; dt_elem_desc_t* pElem; @@ -300,37 +260,51 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; conv_ptr = pConvertor->pBaseBuf + pStack->disp; - count_desc = (uint32_t)pStack->count; + count_desc = pStack->count; pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); - DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %d disp %ld\n" - "stack_pos %d pos_desc %d count_desc %d disp %ld\n", + DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %" PRIsize_t " disp %ld\n" + "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf), - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); ); for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { iov_ptr = (unsigned char *) iov[iov_count].iov_base; iov_len_local = iov[iov_count].iov_len; - while( 1 ) { - while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { - /* now here we have a basic datatype */ - PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, - conv_ptr, iov_ptr, iov_len_local ); - if( 0 == count_desc ) { /* completed */ + + if( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + if( (pElem->elem.count * pElem->elem.blocklen) != count_desc ) { + /* we have a partial (less than blocklen) basic datatype */ + int rc = PACK_PARTIAL_BLOCKLEN( pConvertor, pElem, count_desc, + conv_ptr, iov_ptr, iov_len_local ); + if( 0 == rc ) /* not done */ + goto complete_loop; + if( 0 == count_desc ) { conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - continue; } - goto complete_loop; + } + } + + while( 1 ) { + while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + /* we have a basic datatype (working on full blocks) */ + PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, + conv_ptr, iov_ptr, iov_len_local ); + if( 0 != count_desc ) /* completed? */ + goto complete_loop; + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + pos_desc++; /* advance to the next data */ + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ - DO_DEBUG( opal_output( 0, "pack end_loop count %d stack_pos %d" + DO_DEBUG( opal_output( 0, "pack end_loop count %" PRIsize_t " stack_pos %d" " pos_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, - pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); + pStack->count, pConvertor->stack_pos, + pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( 0 == pConvertor->stack_pos ) { /* we're done. Force the exit of the main for loop (around iovec) */ @@ -351,9 +325,9 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, } conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - DO_DEBUG( opal_output( 0, "pack new_loop count %d stack_pos %d pos_desc %d count_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - count_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "pack new_loop count %" PRIsize_t " stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + count_desc, pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)conv_ptr; @@ -390,8 +364,8 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, /* Save the global position for the next round */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, conv_ptr - pConvertor->pBaseBuf ); - DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n", - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", + pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); ); return 0; } @@ -411,7 +385,7 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, static inline void pack_predefined_heterogeneous( opal_convertor_t* CONVERTOR, const dt_elem_desc_t* ELEM, - uint32_t* COUNT, + size_t* COUNT, unsigned char** SOURCE, unsigned char** DESTINATION, size_t* SPACE ) @@ -420,12 +394,12 @@ pack_predefined_heterogeneous( opal_convertor_t* CONVERTOR, const ddt_elem_desc_t* _elem = &((ELEM)->elem); unsigned char* _source = (*SOURCE) + _elem->disp; ptrdiff_t advance; - uint32_t _count = *(COUNT); + size_t _count = *(COUNT); size_t _r_blength; _r_blength = master->remote_sizes[_elem->common.type]; if( (_count * _r_blength) > *(SPACE) ) { - _count = (uint32_t)(*(SPACE) / _r_blength); + _count = (*(SPACE) / _r_blength); if( 0 == _count ) return; /* nothing to do */ } @@ -454,7 +428,7 @@ opal_pack_general_function( opal_convertor_t* pConvertor, { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ - uint32_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t count_desc; /* the number of items already done in the actual pos_desc */ size_t total_packed = 0; /* total amount packed this time */ dt_elem_desc_t* description; dt_elem_desc_t* pElem; @@ -476,15 +450,15 @@ opal_pack_general_function( opal_convertor_t* pConvertor, pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; conv_ptr = pConvertor->pBaseBuf + pStack->disp; - count_desc = (uint32_t)pStack->count; + count_desc = pStack->count; pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); - DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %d disp %ld\n" - "stack_pos %d pos_desc %d count_desc %d disp %ld\n", + DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %" PRIsize_t " disp %ld\n" + "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf), - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); ); for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { iov_ptr = (unsigned char *) iov[iov_count].iov_base; @@ -492,7 +466,7 @@ opal_pack_general_function( opal_convertor_t* pConvertor, while( 1 ) { while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ - DO_DEBUG( opal_output( 0, "pack (%p:%ld, %d, %ld) -> (%p, %ld) type %s\n", + DO_DEBUG( opal_output( 0, "pack (%p:%ld, %" PRIsize_t ", %ld) -> (%p, %ld) type %s\n", (void*)pConvertor->pBaseBuf, conv_ptr + pElem->elem.disp - pConvertor->pBaseBuf, count_desc, description[pos_desc].elem.extent, (void*)iov_ptr, iov_len_local, @@ -513,10 +487,10 @@ opal_pack_general_function( opal_convertor_t* pConvertor, goto complete_loop; } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ - DO_DEBUG( opal_output( 0, "pack end_loop count %d stack_pos %d" + DO_DEBUG( opal_output( 0, "pack end_loop count %" PRIsize_t " stack_pos %d" " pos_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, - pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); + pStack->count, pConvertor->stack_pos, + pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( 0 == pConvertor->stack_pos ) { /* we lie about the size of the next element in order to @@ -539,9 +513,9 @@ opal_pack_general_function( opal_convertor_t* pConvertor, } conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - DO_DEBUG( opal_output( 0, "pack new_loop count %d stack_pos %d pos_desc %d count_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - count_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "pack new_loop count %" PRIsize_t " stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + count_desc, pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)conv_ptr; @@ -583,7 +557,7 @@ opal_pack_general_function( opal_convertor_t* pConvertor, /* Save the global position for the next round */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, conv_ptr - pConvertor->pBaseBuf ); - DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n", - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t" disp %ld\n", + pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); ); return 0; } diff --git a/opal/datatype/opal_datatype_pack.h b/opal/datatype/opal_datatype_pack.h index 2176e53e897..1eaf2e8b9f9 100644 --- a/opal/datatype/opal_datatype_pack.h +++ b/opal/datatype/opal_datatype_pack.h @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -19,8 +19,6 @@ #include "opal_config.h" -#include - #if !defined(CHECKSUM) && OPAL_CUDA_SUPPORT /* Make use of existing macro to do CUDA style memcpy */ #undef MEMCPY_CSUM @@ -28,90 +26,181 @@ CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) ) #endif -static inline void pack_predefined_data( opal_convertor_t* CONVERTOR, - const dt_elem_desc_t* ELEM, - uint32_t* COUNT, - unsigned char** SOURCE, - unsigned char** DESTINATION, - size_t* SPACE ) +/** + * This function deals only with partial elements. The COUNT points however to the whole leftover count, + * but this function is only expected to operate on an amount less than blength, that would allow the rest + * of the pack process to handle only entire blength blocks (plus the left over). + * + * Return 1 if we are now aligned on a block, 0 otherwise. + */ +static inline int +pack_partial_blocklen( opal_convertor_t* CONVERTOR, + const dt_elem_desc_t* ELEM, + size_t* COUNT, + unsigned char** memory, + unsigned char** packed, + size_t* SPACE ) +{ + const ddt_elem_desc_t* _elem = &((ELEM)->elem); + size_t do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t do_now = *(COUNT); + unsigned char* _memory = (*memory) + _elem->disp; + unsigned char* _packed = *packed; + + assert( *(COUNT) <= _elem->count * _elem->blocklen); + + /** + * First check if we already did something on this element ? The COUNT is the number + * of remaining predefined types in the current elem, not how many predefined types + * should be manipulated in the current call (this number is instead reflected on the + * SPACE). + */ + if( 0 == (do_now = (*COUNT) % _elem->blocklen) ) + return 1; + + size_t left_in_block = do_now; /* left in the current blocklen */ + + if( (do_now_bytes * do_now) > *(SPACE) ) + do_now = (*SPACE) / do_now_bytes; + + do_now_bytes *= do_now; + + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack memcpy( %p, %p, %lu ) => space %lu [partial]\n", + _packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) ); + *(memory) += (ptrdiff_t)do_now_bytes; + if( do_now == left_in_block ) /* compensate if completed a blocklen */ + *(memory) += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); + + *(COUNT) -= do_now; + *(SPACE) -= do_now_bytes; + *(packed) += do_now_bytes; + return (do_now == left_in_block); +} + +/** + * Pack entire blocks, plus a possible remainder if SPACE is constrained to less than COUNT elements. + */ +static inline void +pack_predefined_data( opal_convertor_t* CONVERTOR, + const dt_elem_desc_t* ELEM, + size_t* COUNT, + unsigned char** memory, + unsigned char** packed, + size_t* SPACE ) { - uint32_t _copy_count = *(COUNT); - size_t _copy_blength; const ddt_elem_desc_t* _elem = &((ELEM)->elem); - unsigned char* _source = (*SOURCE) + _elem->disp; + size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t cando_count = *(COUNT), do_now_bytes; + unsigned char* _memory = (*memory) + _elem->disp; + unsigned char* _packed = *packed; + + assert( 0 == (cando_count % _elem->blocklen) ); /* no partials here */ + assert( *(COUNT) <= _elem->count * _elem->blocklen); - _copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; - if( (_copy_count * _copy_blength) > *(SPACE) ) { - _copy_count = (uint32_t)(*(SPACE) / _copy_blength); - if( 0 == _copy_count ) return; /* nothing to do */ + if( (blocklen_bytes * cando_count) > *(SPACE) ) + cando_count = (*SPACE) / blocklen_bytes; + + /* premptively update the number of COUNT we will return. */ + *(COUNT) -= cando_count; + + if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */ + for(; cando_count > 0; cando_count--) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack memcpy( %p, %p, %lu ) => space %lu [blen = 1]\n", + (void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; + } + goto update_and_return; } - if( (ptrdiff_t)_copy_blength == _elem->extent ) { - _copy_blength *= _copy_count; - /* the extent and the size of the basic datatype are equal */ - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)*(DESTINATION), (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) ); - _source += _copy_blength; - *(DESTINATION) += _copy_blength; - } else { - uint32_t _i; - for( _i = 0; _i < _copy_count; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); + if( (1 < _elem->count) && (_elem->blocklen <= cando_count) ) { + blocklen_bytes *= _elem->blocklen; + + do { /* Do as many full blocklen as possible */ + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)*(DESTINATION), (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); - MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) ); - *(DESTINATION) += _copy_blength; - _source += _elem->extent; - } - _copy_blength *= _copy_count; + (void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; + cando_count -= _elem->blocklen; + } while (_elem->blocklen <= cando_count); } - *(SOURCE) = _source - _elem->disp; - *(SPACE) -= _copy_blength; - *(COUNT) -= _copy_count; + + /** + * As an epilog do anything left from the last blocklen. + */ + if( 0 != cando_count ) { + assert( (cando_count < _elem->blocklen) || + ((1 == _elem->count) && (cando_count <= _elem->blocklen)) ); + do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size; + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", + (void*)_packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) ); + _memory += do_now_bytes; + _packed += do_now_bytes; + } + + update_and_return: + *(memory) = _memory - _elem->disp; + *(SPACE) -= (_packed - *packed); + *(packed) = _packed; } static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR, const dt_elem_desc_t* ELEM, - uint32_t* COUNT, - unsigned char** SOURCE, - unsigned char** DESTINATION, + size_t* COUNT, + unsigned char** memory, + unsigned char** packed, size_t* SPACE ) { const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM); const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items); - unsigned char* _source = (*SOURCE) + _end_loop->first_elem_disp; - uint32_t _copy_loops = *(COUNT); - uint32_t _i; + unsigned char* _memory = (*memory) + _end_loop->first_elem_disp; + size_t _copy_loops = *(COUNT); if( (_copy_loops * _end_loop->size) > *(SPACE) ) - _copy_loops = (uint32_t)(*(SPACE) / _end_loop->size); - for( _i = 0; _i < _copy_loops; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _end_loop->size, (CONVERTOR)->pBaseBuf, + _copy_loops = (*(SPACE) / _end_loop->size); + for(size_t _i = 0; _i < _copy_loops; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, _end_loop->size, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)*(DESTINATION), (void*)_source, (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); - MEMCPY_CSUM( *(DESTINATION), _source, _end_loop->size, (CONVERTOR) ); - *(DESTINATION) += _end_loop->size; - _source += _loop->extent; + (void*)*(packed), (void*)_memory, (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); + MEMCPY_CSUM( *(packed), _memory, _end_loop->size, (CONVERTOR) ); + *(packed) += _end_loop->size; + _memory += _loop->extent; } - *(SOURCE) = _source - _end_loop->first_elem_disp; + *(memory) = _memory - _end_loop->first_elem_disp; *(SPACE) -= _copy_loops * _end_loop->size; *(COUNT) -= _copy_loops; } -#define PACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \ +#define PACK_PARTIAL_BLOCKLEN( CONVERTOR, /* the convertor */ \ + ELEM, /* the basic element to be packed */ \ + COUNT, /* the number of elements */ \ + MEMORY, /* the source pointer (char*) */ \ + PACKED, /* the destination pointer (char*) */ \ + SPACE ) /* the space in the destination buffer */ \ +pack_partial_blocklen( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) ) + +#define PACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \ ELEM, /* the basic element to be packed */ \ COUNT, /* the number of elements */ \ - SOURCE, /* the source pointer (char*) */ \ - DESTINATION, /* the destination pointer (char*) */ \ + MEMORY, /* the source pointer (char*) */ \ + PACKED, /* the destination pointer (char*) */ \ SPACE ) /* the space in the destination buffer */ \ -pack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) ) +pack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) ) -#define PACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \ - pack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) ) +#define PACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, MEMORY, PACKED, SPACE ) \ + pack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) ) #endif /* OPAL_DATATYPE_PACK_H_HAS_BEEN_INCLUDED */ diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index a4a088ffbdb..02ec55651a0 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science + * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -33,7 +33,7 @@ #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" -#define DO_DEBUG(INST) if( opal_position_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_position_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ @@ -49,78 +49,118 @@ * - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless. */ +static inline void +position_single_block(opal_convertor_t* CONVERTOR, + unsigned char** mem, ptrdiff_t mem_update, + size_t* space, size_t space_update, + size_t* cnt, size_t cnt_update) +{ + OPAL_DATATYPE_SAFEGUARD_POINTER( *mem, mem_update, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [prolog]\n", + (void*)*mem, (unsigned long)space_update, (unsigned long)(*space) ); ); + *mem += mem_update; + *space -= space_update; + *cnt -= cnt_update; +} + /** - * Advance the current position in the convertor based using the - * current element and a left-over counter. Update the head pointer - * and the leftover byte space. + * Advance the convertors' position according. Update the pointer and the remaining space + * accordingly. */ static inline void position_predefined_data( opal_convertor_t* CONVERTOR, dt_elem_desc_t* ELEM, - uint32_t* COUNT, + size_t* COUNT, unsigned char** POINTER, size_t* SPACE ) { - uint32_t _copy_count = *(COUNT); - size_t _copy_blength; - ddt_elem_desc_t* _elem = &((ELEM)->elem); + const ddt_elem_desc_t* _elem = &((ELEM)->elem); + size_t total_count = _elem->count * _elem->blocklen; + size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t do_now, do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; + unsigned char* _memory = (*POINTER) + _elem->disp; - _copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; - if( (_copy_count * _copy_blength) > *(SPACE) ) { - _copy_count = (uint32_t)(*(SPACE) / _copy_blength); - if( 0 == _copy_count ) return; /* nothing to do */ + assert( *(COUNT) <= _elem->count * _elem->blocklen); + + if( cando_count > *(COUNT) ) + cando_count = *(COUNT); + + if( 1 == _elem->blocklen ) { + DO_DEBUG( opal_output( 0, "position( %p, %" PRIsize_t " ) x (count %" PRIsize_t ", extent %ld) => space %lu [prolog]\n", + (void*)_memory, (unsigned long)do_now_bytes, cando_count, _elem->extent, (unsigned long)(*SPACE) ); ); + _memory += cando_count * _elem->extent; + *SPACE -= cando_count * do_now_bytes; + *COUNT -= cando_count; + goto update_and_return; } - _copy_blength *= _copy_count; - OPAL_DATATYPE_SAFEGUARD_POINTER( *(POINTER) + _elem->disp, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - *(POINTER) += (_copy_count * _elem->extent); - *(SPACE) -= _copy_blength; - *(COUNT) -= _copy_count; -} + /** + * First check if we already did something on this element ? + */ + do_now = (total_count - *(COUNT)); /* done elements */ + if( 0 != do_now ) { + do_now = do_now % _elem->blocklen; /* partial blocklen? */ -/** - * Advance the current position in the convertor based using the - * current contiguous loop and a left-over counter. Update the head - * pointer and the leftover byte space. - */ -static inline void -position_contiguous_loop( opal_convertor_t* CONVERTOR, - dt_elem_desc_t* ELEM, - uint32_t* COUNT, - unsigned char** POINTER, - size_t* SPACE ) -{ - ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM); - ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + (ELEM)->loop.items); - uint32_t _copy_loops = *(COUNT); + if( 0 != do_now ) { + size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */ + do_now = (left_in_block > cando_count ) ? cando_count : left_in_block; + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; - if( (_copy_loops * _end_loop->size) > *(SPACE) ) - _copy_loops = (uint32_t)(*(SPACE) / _end_loop->size); - OPAL_DATATYPE_SAFEGUARD_POINTER( *(POINTER) + _end_loop->first_elem_disp, - (_copy_loops - 1) * _loop->extent + _end_loop->size, - (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - *(POINTER) += _copy_loops * _loop->extent; - *(SPACE) -= _copy_loops * _end_loop->size; - *(COUNT) -= _copy_loops; -} + position_single_block( CONVERTOR, &_memory, do_now_bytes, + SPACE, do_now_bytes, COUNT, do_now ); -#define POSITION_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, POSITION, SPACE ) \ - position_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) ) + /* compensate if we just completed a blocklen */ + if( do_now == left_in_block ) + _memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); + cando_count -= do_now; + } + } + + /** + * Compute how many full blocklen we need to do and do them. + */ + do_now = cando_count / _elem->blocklen; + if( 0 != do_now ) { + do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; +#if OPAL_ENABLE_DEBUG + for(size_t _i = 0; _i < do_now; _i++ ) { + position_single_block( CONVERTOR, &_memory, _elem->extent, + SPACE, do_now_bytes, COUNT, _elem->blocklen ); + cando_count -= _elem->blocklen; + } +#else + _memory += do_now * _elem->extent; + *SPACE -= do_now * do_now_bytes; + *COUNT -= do_now * _elem->blocklen; + cando_count -= do_now * _elem->blocklen; +#endif /* OPAL_ENABLE_DEBUG */ + } + + /** + * As an epilog do anything left from the last blocklen. + */ + do_now = cando_count; + if( 0 != do_now ) { + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + position_single_block( CONVERTOR, &_memory, do_now_bytes, + SPACE, do_now_bytes, COUNT, do_now ); + } -#define POSITION_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, POSITION, SPACE ) \ - position_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) ) + update_and_return: + *(POINTER) = _memory - _elem->disp; +} int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, size_t* position ) { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ - uint32_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t iov_len_local; dt_elem_desc_t* description = pConvertor->use_desc->desc; dt_elem_desc_t* pElem; /* current position */ unsigned char *base_pointer = pConvertor->pBaseBuf; - size_t iov_len_local; ptrdiff_t extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb; DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position ); @@ -128,15 +168,15 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, /* We dont want to have to parse the datatype multiple times. What we are interested in * here is to compute the number of completed datatypes that we can move forward, update - * the counters and finally compute the position taking in account only the remaining - * elements. The only problem is that we have to modify all the elements on the stack. + * the counters and compute the position taking in account only the remaining elements. + * The only problem is that we have to modify all the elements on the stack. */ iov_len_local = *position - pConvertor->bConverted; if( iov_len_local > pConvertor->pDesc->size ) { pStack = pConvertor->pStack; /* we're working with the full stack */ - count_desc = (uint32_t)(iov_len_local / pConvertor->pDesc->size); + count_desc = iov_len_local / pConvertor->pDesc->size; DO_DEBUG( opal_output( 0, "position before %lu asked %lu data size %lu" - " iov_len_local %lu count_desc %d\n", + " iov_len_local %lu count_desc %" PRIsize_t "\n", (unsigned long)pConvertor->bConverted, (unsigned long)*position, (unsigned long)pConvertor->pDesc->size, (unsigned long)iov_len_local, count_desc ); ); /* Update all the stack including the last one */ @@ -152,15 +192,15 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; base_pointer += pStack->disp; - count_desc = (uint32_t)pStack->count; + count_desc = pStack->count; pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); - DO_DEBUG( opal_output( 0, "position start pos_desc %d count_desc %d disp %llx\n" - "stack_pos %d pos_desc %d count_desc %d disp %llx\n", + DO_DEBUG( opal_output( 0, "position start pos_desc %d count_desc %" PRIsize_t " disp %llx\n" + "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %llx\n", pos_desc, count_desc, (unsigned long long)(base_pointer - pConvertor->pBaseBuf), - pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); ); + pConvertor->stack_pos, pStack->index, pStack->count, (unsigned long long)pStack->disp ); ); /* Last data has been only partially converted. Compute the relative position */ if( 0 != pConvertor->partial_length ) { size_t element_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; @@ -171,21 +211,19 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, assert(pConvertor->partial_length < element_length); return 0; } - pConvertor->partial_length = (pConvertor->partial_length + missing_length) % element_length; - assert(pConvertor->partial_length == 0); + pConvertor->partial_length = 0; pConvertor->bConverted += missing_length; iov_len_local -= missing_length; count_desc--; } while( 1 ) { - if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ - DO_DEBUG( opal_output( 0, "position end_loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - (unsigned long long)pStack->disp, (unsigned long)iov_len_local ); ); + if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the the entire datatype */ + DO_DEBUG( opal_output( 0, "position end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( pConvertor->stack_pos == 0 ) { pConvertor->flags |= CONVERTOR_COMPLETED; - pConvertor->partial_length = 0; goto complete_loop; /* completed */ } pConvertor->stack_pos--; @@ -194,23 +232,30 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, } else { if( pStack->index == -1 ) { pStack->disp += extent; + pos_desc = 0; /* back to the first element */ } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); pStack->disp += description[pStack->index].loop.extent; + pos_desc = pStack->index; /* go back to the loop start itself to give a chance + * to move forward by entire loops */ } - pos_desc = pStack->index + 1; } base_pointer = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - DO_DEBUG( opal_output( 0, "position new_loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - (unsigned long long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "position new_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)base_pointer; - if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - POSITION_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, - base_pointer, iov_len_local ); + ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)(pElem + pElem->loop.items); + size_t full_loops = iov_len_local / end_loop->size; + full_loops = count_desc <= full_loops ? count_desc : full_loops; + if( full_loops ) { + base_pointer += full_loops * pElem->loop.extent; + iov_len_local -= full_loops * end_loop->size; + count_desc -= full_loops; + if( 0 == count_desc ) { /* completed */ pos_desc += pElem->loop.items + 1; goto update_loop_description; @@ -225,25 +270,24 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, base_pointer = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); - DO_DEBUG( opal_output( 0, "position set loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - (unsigned long long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "position set loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + pStack->disp, (unsigned long)iov_len_local ); ); continue; } while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ - POSITION_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, - base_pointer, iov_len_local ); + position_predefined_data( pConvertor, pElem, &count_desc, &base_pointer, &iov_len_local ); if( 0 != count_desc ) { /* completed */ - pConvertor->partial_length = (uint32_t)iov_len_local; + pConvertor->partial_length = iov_len_local; goto complete_loop; } base_pointer = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - DO_DEBUG( opal_output( 0, "position set loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - (unsigned long long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "position set loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + pStack->disp, (unsigned long)iov_len_local ); ); } } complete_loop: @@ -253,8 +297,8 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, /* I complete an element, next step I should go to the next one */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, base_pointer - pConvertor->pBaseBuf ); - DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %d disp %llx\n", - pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); ); + DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %llx\n", + pConvertor->stack_pos, pStack->index, pStack->count, (unsigned long long)pStack->disp ); ); return 0; } return 1; diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index b43a5c8f83e..0925bde736d 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -13,8 +13,8 @@ * Copyright (c) 2008-2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +33,7 @@ #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" -#define DO_DEBUG(INST) if( opal_unpack_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_unpack_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ @@ -70,98 +70,82 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv, { const opal_datatype_t *pData = pConv->pDesc; unsigned char *user_memory, *packed_buffer; - uint32_t iov_count, i; - size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted; + uint32_t iov_idx, i; + size_t remaining, initial_bytes_converted = pConv->bConverted; dt_stack_t* stack = pConv->pStack; ptrdiff_t extent = pData->ub - pData->lb; - ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; - DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n", + DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov count %d )\n", (void*)pConv->pBaseBuf, *out_size ); ); if( stack[1].type != opal_datatype_uint1.id ) { stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size; stack[1].type = opal_datatype_uint1.id; } - for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { - remaining = pConv->local_size - pConv->bConverted; - if( 0 == remaining ) break; /* we're done this time */ - if( remaining > (uint32_t)iov[iov_count].iov_len ) - remaining = iov[iov_count].iov_len; - packed_buffer = (unsigned char*)iov[iov_count].iov_base; - bConverted = remaining; /* how much will get unpacked this time */ - user_memory = pConv->pBaseBuf + initial_displ; - - if( (ptrdiff_t)pData->size == extent ) { - user_memory += pConv->bConverted; - DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); + + if( (ptrdiff_t)pData->size == extent ) { + for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) { + remaining = pConv->local_size - pConv->bConverted; + if( 0 == remaining ) break; /* we're done this time */ + if( remaining > iov[iov_idx].iov_len ) + remaining = iov[iov_idx].iov_len; + + packed_buffer = (unsigned char*)iov[iov_idx].iov_base; + user_memory = pConv->pBaseBuf + pData->true_lb + pConv->bConverted; /* contiguous data or basic datatype with count */ OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf, pData, pConv->count ); - DO_DEBUG( opal_output( 0, "1. unpack contig dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); + DO_DEBUG( opal_output( 0, "unpack contig [%d] dest %p src %p length %" PRIsize_t "\n", + iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); ); MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv ); - } else { - user_memory += stack[0].disp + stack[1].disp; + pConv->bConverted += remaining; /* how much will get unpacked this time */ + } + } else { + for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) { + remaining = pConv->local_size - pConv->bConverted; + if( 0 == remaining ) break; /* we're done this time */ + if( remaining > iov[iov_idx].iov_len ) + remaining = iov[iov_idx].iov_len; + + packed_buffer = (unsigned char*)iov[iov_idx].iov_base; + user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp; + pConv->bConverted += remaining; /* how much will get unpacked this time */ + + for( i = 0; stack[1].count <= remaining; i++ ) { /* partial or full data */ + OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, stack[1].count, pConv->pBaseBuf, + pData, pConv->count ); + DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [%d]\n", + iov_idx, (void*)user_memory, (void*)packed_buffer, stack[1].count, i ); ); + MEMCPY_CSUM( user_memory, packed_buffer, stack[1].count, pConv ); - DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); + packed_buffer += stack[1].count; + remaining -= stack[1].count; - length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last unpack */ - /* complete the last copy */ - if( (0 != length) && (length <= remaining) ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "2. unpack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)length ); ); - MEMCPY_CSUM( user_memory, packed_buffer, length, pConv ); - packed_buffer += length; - user_memory += (extent - (pData->size - length)); - remaining -= length; - stack[1].count -= length; - if( 0 == stack[1].count) { /* one completed element */ - stack[0].count--; - stack[0].disp += extent; - if( 0 != stack[0].count ) { /* not yet done */ - stack[1].count = pData->size; - stack[1].disp = 0; - } - } - } - for( i = 0; pData->size <= remaining; i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "3. unpack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)pData->size ); ); - MEMCPY_CSUM( user_memory, packed_buffer, pData->size, pConv ); - packed_buffer += pData->size; - user_memory += extent; - remaining -= pData->size; + stack[0].count--; + stack[0].disp += extent; + stack[1].count = pData->size; + stack[1].disp = 0; + + user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp; } - stack[0].count -= i; - stack[0].disp += (i * extent); - stack[1].disp += remaining; - /* copy the last bits */ + + /* Copy the last bits */ if( 0 != remaining ) { OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf, pData, pConv->count ); - DO_DEBUG( opal_output( 0, "4. unpack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); + DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [epilog]\n", + iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); ); MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv ); - user_memory += remaining; stack[1].count -= remaining; + stack[1].disp += remaining; /* keep the += in case we are copying less that the datatype size */ + assert( stack[1].count ); } } - pConv->bConverted += bConverted; - } - *out_size = iov_count; /* we only reach this line after the for loop succesfully complete */ - *max_data = (pConv->bConverted - initial_bytes_converted); - if( pConv->bConverted == pConv->local_size ) { - pConv->flags |= CONVERTOR_COMPLETED; - return 1; } - return 0; + *out_size = iov_idx; /* we only reach this line after the for loop succesfully complete */ + *max_data = pConv->bConverted - initial_bytes_converted; + if( pConv->bConverted == pConv->local_size ) pConv->flags |= CONVERTOR_COMPLETED; + return !!(pConv->flags & CONVERTOR_COMPLETED); /* done or not */ } /** @@ -176,26 +160,26 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv, * change the content of the data (as in all conversions that require changing the size * of the exponent or mantissa). */ -static inline uint32_t +static inline void opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem, unsigned char* partial_data, - ptrdiff_t start_position, ptrdiff_t length, + ptrdiff_t start_position, size_t length, unsigned char** user_buffer ) { char unused_byte = 0x7F, saved_data[16]; unsigned char temporary[16], *temporary_buffer = temporary; unsigned char* user_data = *user_buffer + pElem->elem.disp; - uint32_t i, count_desc = 1; + size_t count_desc = 1; size_t data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; DO_DEBUG( opal_output( 0, "unpack partial data start %lu end %lu data_length %lu user %p\n" - "\tbConverted %lu total_length %lu count %d\n", + "\tbConverted %lu total_length %lu count %ld\n", (unsigned long)start_position, (unsigned long)start_position + length, (unsigned long)data_length, (void*)*user_buffer, (unsigned long)pConvertor->bConverted, (unsigned long)pConvertor->local_size, pConvertor->count ); ); /* Find a byte that is not used in the partial buffer */ find_unused_byte: - for( i = 0; i < length; i++ ) { + for(size_t i = 0; i < length; i++ ) { if( unused_byte == partial_data[i] ) { unused_byte--; goto find_unused_byte; @@ -234,18 +218,17 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle { char resaved_data[16]; pConvertor->cbmemcpy(resaved_data, user_data, data_length, pConvertor ); - for( i = 0; i < data_length; i++ ) { + for(size_t i = 0; i < data_length; i++ ) { if( unused_byte == resaved_data[i] ) pConvertor->cbmemcpy(&user_data[i], &saved_data[i], 1, pConvertor); } } #else - for( i = 0; i < data_length; i++ ) { + for(size_t i = 0; i < data_length; i++ ) { if( unused_byte == user_data[i] ) user_data[i] = saved_data[i]; } #endif - return 0; } /* The pack/unpack functions need a cleanup. I have to create a proper interface to access @@ -265,7 +248,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ - uint32_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t count_desc; /* the number of items already done in the actual pos_desc */ size_t total_unpacked = 0; /* total size unpacked this time */ dt_elem_desc_t* description; dt_elem_desc_t* pElem; @@ -286,19 +269,20 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; conv_ptr = pConvertor->pBaseBuf + pStack->disp; - count_desc = (uint32_t)pStack->count; + count_desc = pStack->count; pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); - DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %d disp %ld\n" - "stack_pos %d pos_desc %d count_desc %d disp %ld\n", + DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %" PRIsize_t " disp %ld\n" + "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf), - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)(pStack->disp) ); ); + pConvertor->stack_pos, pStack->index, pStack->count, (long)(pStack->disp) ); ); for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { iov_ptr = (unsigned char *) iov[iov_count].iov_base; iov_len_local = iov[iov_count].iov_len; + if( 0 != pConvertor->partial_length ) { size_t element_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; size_t missing_length = element_length - pConvertor->partial_length; @@ -307,7 +291,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, COMPUTE_CSUM( iov_ptr, missing_length, pConvertor ); opal_unpack_partial_datatype( pConvertor, pElem, iov_ptr, - pConvertor->partial_length, element_length - pConvertor->partial_length, + pConvertor->partial_length, (size_t)(element_length - pConvertor->partial_length), &conv_ptr ); --count_desc; if( 0 == count_desc ) { @@ -319,46 +303,41 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, iov_len_local -= missing_length; pConvertor->partial_length = 0; /* nothing more inside */ } - while( 1 ) { - while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { - /* now here we have a basic datatype */ - UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, - iov_ptr, conv_ptr, iov_len_local ); - if( 0 == count_desc ) { /* completed */ + if( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + if( (pElem->elem.count * pElem->elem.blocklen) != count_desc ) { + /* we have a partial (less than blocklen) basic datatype */ + int rc = UNPACK_PARTIAL_BLOCKLEN( pConvertor, pElem, count_desc, + iov_ptr, conv_ptr, iov_len_local ); + if( 0 == rc ) /* not done */ + goto complete_loop; + if( 0 == count_desc ) { conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - continue; } - assert( pElem->elem.common.type < OPAL_DATATYPE_MAX_PREDEFINED ); - if( 0 != iov_len_local ) { - unsigned char* temp = conv_ptr; - /* We have some partial data here. Let's copy it into the convertor - * and keep it hot until the next round. - */ - assert( iov_len_local < opal_datatype_basicDatatypes[pElem->elem.common.type]->size ); - COMPUTE_CSUM( iov_ptr, iov_len_local, pConvertor ); - - opal_unpack_partial_datatype( pConvertor, pElem, - iov_ptr, 0, iov_len_local, - &temp ); - - pConvertor->partial_length = (uint32_t)iov_len_local; - iov_len_local = 0; - } - goto complete_loop; + } + } + + while( 1 ) { + while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + /* we have a basic datatype (working on full blocks) */ + UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, + iov_ptr, conv_ptr, iov_len_local ); + if( 0 != count_desc ) /* completed? */ + goto complete_loop; + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + pos_desc++; /* advance to the next data */ + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ - DO_DEBUG( opal_output( 0, "unpack end_loop count %d stack_pos %d pos_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - (long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "unpack end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %ld space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( 0 == pConvertor->stack_pos ) { - /* Do the same thing as when the loop is completed */ - iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ - total_unpacked += iov[iov_count].iov_len; - iov_count++; /* go to the next */ - goto complete_conversion; + /* we're done. Force the exit of the main for loop (around iovec) */ + *out_size = iov_count; + goto complete_loop; } pConvertor->stack_pos--; pStack--; @@ -374,9 +353,9 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, } conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - DO_DEBUG( opal_output( 0, "unpack new_loop count %d stack_pos %d pos_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - (long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "unpack new_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %ld space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)conv_ptr; @@ -397,14 +376,29 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); - continue; } } complete_loop: + assert( pElem->elem.common.type < OPAL_DATATYPE_MAX_PREDEFINED ); + if( 0 != iov_len_local ) { + unsigned char* temp = conv_ptr; + /* We have some partial data here. Let's copy it into the convertor + * and keep it hot until the next round. + */ + assert( iov_len_local < opal_datatype_basicDatatypes[pElem->elem.common.type]->size ); + COMPUTE_CSUM( iov_ptr, iov_len_local, pConvertor ); + + opal_unpack_partial_datatype( pConvertor, pElem, + iov_ptr, 0, iov_len_local, + &temp ); + + pConvertor->partial_length = iov_len_local; + iov_len_local = 0; + } + iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ total_unpacked += iov[iov_count].iov_len; } - complete_conversion: *max_data = total_unpacked; pConvertor->bConverted += total_unpacked; /* update the already converted bytes */ *out_size = iov_count; @@ -415,8 +409,8 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, /* Save the global position for the next round */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, conv_ptr - pConvertor->pBaseBuf ); - DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n", - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", + pConvertor->stack_pos, pStack->index, pStack->count, (long)pStack->disp ); ); return 0; } @@ -439,21 +433,21 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ - uint32_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t count_desc; /* the number of items already done in the actual pos_desc */ uint16_t type = OPAL_DATATYPE_MAX_PREDEFINED; /* type at current position */ size_t total_unpacked = 0; /* total size unpacked this time */ dt_elem_desc_t* description; dt_elem_desc_t* pElem; const opal_datatype_t *pData = pConvertor->pDesc; unsigned char *conv_ptr, *iov_ptr; - size_t iov_len_local; uint32_t iov_count; + size_t iov_len_local; const opal_convertor_master_t* master = pConvertor->master; ptrdiff_t advance; /* number of bytes that we should advance the buffer */ - int32_t rc; + size_t rc; - DO_DEBUG( opal_output( 0, "opal_convertor_general_unpack( %p, {%p, %lu}, %u )\n", + DO_DEBUG( opal_output( 0, "opal_convertor_general_unpack( %p, {%p, %lu}, %d )\n", (void*)pConvertor, (void*)iov[0].iov_base, (unsigned long)iov[0].iov_len, *out_size ); ); description = pConvertor->use_desc->desc; @@ -465,15 +459,15 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; conv_ptr = pConvertor->pBaseBuf + pStack->disp; - count_desc = (uint32_t)pStack->count; + count_desc = pStack->count; pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); - DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %d disp %ld\n" - "stack_pos %d pos_desc %d count_desc %d disp %ld\n", + DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %" PRIsize_t " disp %ld\n" + "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf), - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)(pStack->disp) ); ); + pConvertor->stack_pos, pStack->index, pStack->count, (long)(pStack->disp) ); ); for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { iov_ptr = (unsigned char *) iov[iov_count].iov_base; @@ -485,7 +479,7 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, type = description[pos_desc].elem.common.type; OPAL_DATATYPE_SAFEGUARD_POINTER( conv_ptr + pElem->elem.disp, pData->size, pConvertor->pBaseBuf, pData, pConvertor->count ); - DO_DEBUG( opal_output( 0, "unpack (%p, %ld) -> (%p:%ld, %d, %ld) type %s\n", + DO_DEBUG( opal_output( 0, "unpack (%p, %ld) -> (%p:%ld, %" PRIsize_t ", %ld) type %s\n", (void*)iov_ptr, iov_len_local, (void*)pConvertor->pBaseBuf, conv_ptr + pElem->elem.disp - pConvertor->pBaseBuf, count_desc, description[pos_desc].elem.extent, @@ -520,22 +514,20 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, iov_ptr, 0, iov_len_local, &temp ); - pConvertor->partial_length = (uint32_t)iov_len_local; + pConvertor->partial_length = iov_len_local; iov_len_local = 0; } goto complete_loop; } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ - DO_DEBUG( opal_output( 0, "unpack end_loop count %d stack_pos %d pos_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - (long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "unpack end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %ld space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( 0 == pConvertor->stack_pos ) { - /* Do the same thing as when the loop is completed */ - iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ - total_unpacked += iov[iov_count].iov_len; - iov_count++; /* go to the next */ - goto complete_conversion; + /* we're done. Force the exit of the main for loop (around iovec) */ + *out_size = iov_count; + goto complete_loop; } pConvertor->stack_pos--; pStack--; @@ -551,9 +543,9 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, } conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - DO_DEBUG( opal_output( 0, "unpack new_loop count %d stack_pos %d pos_desc %d disp %ld space %lu\n", - (int)pStack->count, pConvertor->stack_pos, pos_desc, - (long)pStack->disp, (unsigned long)iov_len_local ); ); + DO_DEBUG( opal_output( 0, "unpack new_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %ld space %lu\n", + pStack->count, pConvertor->stack_pos, pos_desc, + pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, @@ -569,7 +561,6 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ total_unpacked += iov[iov_count].iov_len; } - complete_conversion: *max_data = total_unpacked; pConvertor->bConverted += total_unpacked; /* update the already converted bytes */ *out_size = iov_count; @@ -580,7 +571,7 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, /* Save the global position for the next round */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, conv_ptr - pConvertor->pBaseBuf ); - DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n", - pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t" disp %ld\n", + pConvertor->stack_pos, pStack->index, pStack->count, (long)pStack->disp ); ); return 0; } diff --git a/opal/datatype/opal_datatype_unpack.h b/opal/datatype/opal_datatype_unpack.h index 44f7505a58c..db5b58fd3c3 100644 --- a/opal/datatype/opal_datatype_unpack.h +++ b/opal/datatype/opal_datatype_unpack.h @@ -1,12 +1,12 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,86 +26,178 @@ CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) ) #endif +/** + * This function deals only with partial elements. The COUNT points however to the whole leftover count, + * but this function is only expected to operate on an amount less than blength, that would allow the rest + * of the pack process to handle only entire blength blocks (plus the left over). + * + * Return 1 if we are now aligned on a block, 0 otherwise. + */ +static inline int +unpack_partial_blocklen( opal_convertor_t* CONVERTOR, + const dt_elem_desc_t* ELEM, + size_t* COUNT, + unsigned char** packed, + unsigned char** memory, + size_t* SPACE ) +{ + const ddt_elem_desc_t* _elem = &((ELEM)->elem); + size_t do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t do_now = (*COUNT); + unsigned char* _memory = (*memory) + _elem->disp; + unsigned char* _packed = *packed; + + assert( *(COUNT) <= (_elem->count * _elem->blocklen)); + + /** + * First check if we already did something on this element ? The COUNT is the number + * of remaining predefined types in the current elem, not how many predefined types + * should be manipulated in the current call (this number is instead reflected on the + * SPACE). + */ + if( 0 == (do_now = (*COUNT) % _elem->blocklen) ) + return 1; + + size_t left_in_block = do_now; /* left in the current blocklen */ + + if( (do_now_bytes * do_now) > *(SPACE) ) + do_now = (*SPACE) / do_now_bytes; + + do_now_bytes *= do_now; + + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack memcpy( %p, %p, %lu ) => space %lu [prolog]\n", + (void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) ); + *(memory) += (ptrdiff_t)do_now_bytes; + if( do_now == left_in_block ) /* compensate if completed a blocklen */ + *(memory) += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); + + *(COUNT) -= do_now; + *(SPACE) -= do_now_bytes; + *(packed) += do_now_bytes; + return (do_now == left_in_block); +} + static inline void -unpack_predefined_data( opal_convertor_t* CONVERTOR, /* the convertor */ - const dt_elem_desc_t* ELEM, /* the element description */ - uint32_t* COUNT, /* the number of elements */ - unsigned char** SOURCE, /* the source pointer */ - unsigned char** DESTINATION, /* the destination pointer */ - size_t* SPACE ) /* the space in the destination buffer */ +unpack_predefined_data( opal_convertor_t* CONVERTOR, + const dt_elem_desc_t* ELEM, + size_t* COUNT, + unsigned char** packed, + unsigned char** memory, + size_t* SPACE ) { - uint32_t _copy_count = *(COUNT); - size_t _copy_blength; const ddt_elem_desc_t* _elem = &((ELEM)->elem); - unsigned char* _destination = (*DESTINATION) + _elem->disp; + size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t cando_count = (*COUNT), do_now_bytes; + unsigned char* _memory = (*memory) + _elem->disp; + unsigned char* _packed = *packed; + + assert( 0 == (cando_count % _elem->blocklen) ); /* no partials here */ + assert( *(COUNT) <= (_elem->count * _elem->blocklen)); + + if( (blocklen_bytes * cando_count) > *(SPACE) ) + cando_count = (*SPACE) / blocklen_bytes; - _copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; - if( (_copy_count * _copy_blength) > *(SPACE) ) { - _copy_count = (uint32_t)(*(SPACE) / _copy_blength); - if( 0 == _copy_count ) return; /* nothing to do */ + /* premptively update the number of COUNT we will return. */ + *(COUNT) -= cando_count; + + if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */ + for(; cando_count > 0; cando_count--) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack memcpy( %p, %p, %lu ) => space %lu [blen = 1]\n", + (void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; + } + goto update_and_return; } - if( (ptrdiff_t)_copy_blength == _elem->extent ) { - _copy_blength *= _copy_count; - /* the extent and the size of the basic datatype are equal */ - OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)_destination, (void*)*(SOURCE), (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) ); - *(SOURCE) += _copy_blength; - _destination += _copy_blength; - } else { - uint32_t _i; - for( _i = 0; _i < _copy_count; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); + if( (1 < _elem->count) && (_elem->blocklen <= cando_count) ) { + blocklen_bytes *= _elem->blocklen; + + do { /* Do as many full blocklen as possible */ + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)_destination, (void*)*(SOURCE), (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); - MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) ); - *(SOURCE) += _copy_blength; - _destination += _elem->extent; - } - _copy_blength *= _copy_count; + (void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; + cando_count -= _elem->blocklen; + } while (_elem->blocklen <= cando_count); + } + + /** + * As an epilog do anything left from the last blocklen. + */ + if( 0 != cando_count ) { + assert( (cando_count < _elem->blocklen) || + ((1 == _elem->count) && (cando_count <= _elem->blocklen)) ); + do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size; + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", + (void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) ); + _memory += do_now_bytes; + _packed += do_now_bytes; } - (*DESTINATION) = _destination - _elem->disp; - *(SPACE) -= _copy_blength; - *(COUNT) -= _copy_count; + + update_and_return: + *(memory) = _memory - _elem->disp; + *(SPACE) -= (_packed - *packed); + *(packed) = _packed; } static inline void unpack_contiguous_loop( opal_convertor_t* CONVERTOR, const dt_elem_desc_t* ELEM, - uint32_t* COUNT, - unsigned char** SOURCE, - unsigned char** DESTINATION, + size_t* COUNT, + unsigned char** packed, + unsigned char** memory, size_t* SPACE ) { const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM); const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items); - unsigned char* _destination = (*DESTINATION) + _end_loop->first_elem_disp; - uint32_t _copy_loops = *(COUNT); - uint32_t _i; + unsigned char* _memory = (*memory) + _end_loop->first_elem_disp; + size_t _copy_loops = *(COUNT); if( (_copy_loops * _end_loop->size) > *(SPACE) ) - _copy_loops = (uint32_t)(*(SPACE) / _end_loop->size); - for( _i = 0; _i < _copy_loops; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _end_loop->size, (CONVERTOR)->pBaseBuf, + _copy_loops = (*(SPACE) / _end_loop->size); + for(size_t _i = 0; _i < _copy_loops; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, _end_loop->size, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)_destination, (void*)*(SOURCE), (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); - MEMCPY_CSUM( _destination, *(SOURCE), _end_loop->size, (CONVERTOR) ); - *(SOURCE) += _end_loop->size; - _destination += _loop->extent; + (void*)_memory, (void*)*(packed), (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); + MEMCPY_CSUM( _memory, *(packed), _end_loop->size, (CONVERTOR) ); + *(packed) += _end_loop->size; + _memory += _loop->extent; } - *(DESTINATION) = _destination - _end_loop->first_elem_disp; - *(SPACE) -= _copy_loops * _end_loop->size; - *(COUNT) -= _copy_loops; + *(memory) = _memory - _end_loop->first_elem_disp; + *(SPACE) -= _copy_loops * _end_loop->size; + *(COUNT) -= _copy_loops; } -#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \ - unpack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) ) +#define UNPACK_PARTIAL_BLOCKLEN( CONVERTOR, /* the convertor */ \ + ELEM, /* the basic element to be packed */ \ + COUNT, /* the number of elements */ \ + PACKED, /* the destination pointer (char*) */ \ + MEMORY, /* the source pointer (char*) */ \ + SPACE ) /* the space in the destination buffer */ \ +unpack_partial_blocklen( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) ) + +#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \ + ELEM, /* the basic element to be packed */ \ + COUNT, /* the number of elements */ \ + PACKED, /* the destination pointer (char*) */ \ + MEMORY, /* the source pointer (char*) */ \ + SPACE ) /* the space in the destination buffer */ \ +unpack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) ) -#define UNPACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \ - unpack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) ) +#define UNPACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, PACKED, MEMORY, SPACE ) \ + unpack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) ) #endif /* OPAL_DATATYPE_UNPACK_H_HAS_BEEN_INCLUDED */ diff --git a/opal/dss/dss_load_unload.c b/opal/dss/dss_load_unload.c index 0fa02d01c28..b35b58d6512 100644 --- a/opal/dss/dss_load_unload.c +++ b/opal/dss/dss_load_unload.c @@ -12,6 +12,7 @@ * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -372,6 +373,10 @@ int opal_value_unload(opal_value_t *kv, *data = kv->data.ptr; break; + case OPAL_VPID: + memcpy(*data, &kv->data.name.vpid, sizeof(opal_vpid_t)); + break; + default: OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); return OPAL_ERR_NOT_SUPPORTED; diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index 246e964da02..9ea9d0362da 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -98,7 +98,8 @@ enum { OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67), OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68), OPAL_ERR_MODEL_DECLARED = (OPAL_ERR_BASE - 69), - OPAL_PMIX_LAUNCH_DIRECTIVE = (OPAL_ERR_BASE - 70) + OPAL_PMIX_LAUNCH_DIRECTIVE = (OPAL_ERR_BASE - 70), + OPAL_OPERATION_SUCCEEDED = (OPAL_ERR_BASE - 71) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index 027b771162a..919482f902d 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -495,7 +495,7 @@ static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) { int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED; - bool ret = opal_atomic_compare_exchange_strong_32 (&lock->u.lock, &unlocked, OPAL_ATOMIC_LOCK_LOCKED); + bool ret = opal_atomic_compare_exchange_strong_acq_32 (&lock->u.lock, &unlocked, OPAL_ATOMIC_LOCK_LOCKED); return (ret == false) ? 1 : 0; } diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index c6ef6eb9c30..d85ff02bd6a 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -15,6 +15,8 @@ * reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +59,14 @@ static inline void opal_atomic_mb(void) static inline void opal_atomic_rmb(void) { +#if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 + /* work around a bug in older gcc versions where ACQUIRE seems to get + * treated as a no-op instead of being equivalent to + * __asm__ __volatile__("": : :"memory") */ + __atomic_thread_fence (__ATOMIC_SEQ_CST); +#else __atomic_thread_fence (__ATOMIC_ACQUIRE); +#endif } static inline void opal_atomic_wmb(void) diff --git a/opal/include/opal_config_bottom.h b/opal/include/opal_config_bottom.h index 58823471774..da4086df01f 100644 --- a/opal/include/opal_config_bottom.h +++ b/opal/include/opal_config_bottom.h @@ -260,7 +260,6 @@ including stdint.h */ #define __STDC_LIMIT_MACROS #endif -#include "opal_config.h" #include "opal_stdint.h" /*********************************************************************** diff --git a/opal/mca/base/mca_base_var.c b/opal/mca/base/mca_base_var.c index 7d55e703d68..d409296d3cd 100644 --- a/opal/mca/base/mca_base_var.c +++ b/opal/mca/base/mca_base_var.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. @@ -2146,7 +2146,8 @@ int mca_base_var_dump(int vari, char ***out, mca_base_var_dump_type_t output_typ asprintf(out[0] + line++, "%ssource:%s", tmp, source_string); /* Output whether it's read only or writable */ - asprintf(out[0] + line++, "%sstatus:%s", tmp, VAR_IS_DEFAULT_ONLY(var[0]) ? "read-only" : "writeable"); + asprintf(out[0] + line++, "%sstatus:%s", tmp, + VAR_IS_SETTABLE(var[0]) ? "writeable" : "read-only"); /* Output the info level of this parametere */ asprintf(out[0] + line++, "%slevel:%d", tmp, var->mbv_info_lvl + 1); diff --git a/opal/mca/btl/ofi/Makefile.am b/opal/mca/btl/ofi/Makefile.am deleted file mode 100644 index fdaeec865d7..00000000000 --- a/opal/mca/btl/ofi/Makefile.am +++ /dev/null @@ -1,62 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 NVIDIA Corporation. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# Copyright (c) 2018 Intel, inc. All rights reserved -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -#dist_opaldata_DATA = help-mpi-btl-ofi.txt - -AM_CPPFLAGS = $(opal_common_ofi_CPPFLAGS) -sources = \ - btl_ofi.h \ - btl_ofi_component.c \ - btl_ofi_endpoint.h \ - btl_ofi_endpoint.c \ - btl_ofi_module.c \ - btl_ofi_rdma.h \ - btl_ofi_rdma.c \ - btl_ofi_atomics.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_btl_ofi_DSO -lib = -lib_sources = -component = mca_btl_ofi.la -component_sources = $(sources) -else -lib = libmca_btl_ofi.la -lib_sources = $(sources) -component = -component_sources = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component) -mca_btl_ofi_la_SOURCES = $(component_sources) -mca_btl_ofi_la_LDFLAGS = -module -avoid-version \ - $(opal_common_ofi_LDFLAGS) -mca_btl_ofi_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/ofi/lib@OPAL_LIB_PREFIX@mca_common_ofi.la - -noinst_LTLIBRARIES = $(lib) -libmca_btl_ofi_la_SOURCES = $(lib_sources) -libmca_btl_ofi_la_LDFLAGS = -module -avoid-version $(opal_common_ofi_LDFLAGS) diff --git a/opal/mca/btl/ofi/README b/opal/mca/btl/ofi/README deleted file mode 100644 index 97e3759830d..00000000000 --- a/opal/mca/btl/ofi/README +++ /dev/null @@ -1,88 +0,0 @@ -======================================== -Design notes on BTL/OFI -======================================== - -This is the RDMA only btl based on OFI Libfabric. The goal is to enable RDMA -with multiple vendor hardware through one interface. Most of the operations are -managed by upper layer (osc/rdma). This BTL is mostly doing the low level work. - -Tested providers: sockets,psm2,ugni - -======================================== - -Component - -This BTL is requesting libfabric version 1.5 API and will not support older versions. - -The required capabilities of this BTL is FI_ATOMIC and FI_RMA with the endpoint type -of FI_EP_RDM only. This BTL does NOT support libfabric provider that requires local -memory registration (FI_MR_LOCAL). - -BTL/OFI will initialize a module with ONLY the first compatible info returned from OFI. -This means it will rely on OFI provider to do load balancing. The support for multiple -device might be added later. - -The BTL creates only one endpoint and one CQ. - -======================================== - -Memory Registration - -Open MPI has a system in place to exchange remote address and always use the remote -virtual address to refer to a piece of memory. However, some libfabric providers might -not support the use of virtual address and instead will use zero-based offset addressing. - -FI_MR_VIRT_ADDR is the flag that determine this behavior. mca_btl_ofi_reg_mem() handles -this by storing the base address in registration handle in case of the provider does not -support FI_MR_VIRT_ADDR. This base address will be used to calculate the offset later in -RDMA/Atomic operations. - -The BTL will try to use the address of registration handle as the key. However, if the -provider supports FI_MR_PROV_KEY, it will use provider provided key. Simply does not care. - -The BTL does not register local operand or compare. This is why this BTL does not support -FI_MR_LOCAL and will allocate every buffer before registering. This means FI_MR_ALLOCATED -is supported. So to be explicit. - -Supported MR mode bits (will work with or without): - enum: - - FI_MR_BASIC - - FI_MR_SCALABLE - - mode bits: - - FI_MR_VIRT_ADDR - - FI_MR_ALLOCATED - - FI_MR_PROV_KEY - -The BTL does NOT support (will not work with): - - FI_MR_LOCAL - - FI_MR_MMU_NOTIFY - - FI_MR_RMA_EVENT - - FI_MR_ENDPOINT - -Just a reminder, in libfabric API 1.5... -FI_MR_BASIC == (FI_MR_PROV_KEY | FI_MR_ALLOCATED | FI_MR_VIRT_ADDR) - -======================================== - -Completions - -Every operation in this BTL is asynchronous. The completion handling will occur in -mca_btl_ofi_component_progress() where we read the CQ with the completion context and -execute the callback functions. The completions are local. No remote completion event is -generated as local completion already guarantee global completion. - -The BTL keep tracks of number of outstanding operations and provide flush interface. - -======================================== - -Sockets Provider - -Sockets provider is the proof of concept provider for libfabric. It is supposed to support -all the OFI API with emulations. This provider is considered very slow and bound to raise -problems that we might not see from other faster providers. - -Known Problems: - - sockets provider uses progress thread and can cause segfault in finalize as we free - the resources while progress thread is still using it. sleep(1) was put in - mca_btl_ofi_componenet_close() for this reason. diff --git a/opal/mca/btl/ofi/btl_ofi.h b/opal/mca/btl/ofi/btl_ofi.h deleted file mode 100644 index 02e44fd8b30..00000000000 --- a/opal/mca/btl/ofi/btl_ofi.h +++ /dev/null @@ -1,311 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2018 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc, All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_BTL_OFI_H -#define MCA_BTL_OFI_H - -#include "opal_config.h" -#include -#include - -/* Open MPI includes */ -#include "opal/mca/event/event.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "opal/mca/mpool/mpool.h" -#include "opal/mca/btl/base/btl_base_error.h" -#include "opal/mca/rcache/base/base.h" -#include "opal/mca/pmix/pmix.h" - -#include -#include -#include -#include -#include -#include - -BEGIN_C_DECLS -#define MCA_BTL_OFI_MAX_MODULES 16 -#define MCA_BTL_OFI_MAX_CQ_READ_ENTRIES 128 -#define MCA_BTL_OFI_NUM_CQE_READ 64 -#define MCA_BTL_OFI_PROGRESS_THRESHOLD 64 - -#define MCA_BTL_OFI_ABORT(args) mca_btl_ofi_exit(args) - -enum mca_btl_ofi_type { - MCA_BTL_OFI_TYPE_PUT = 1, - MCA_BTL_OFI_TYPE_GET, - MCA_BTL_OFI_TYPE_AOP, - MCA_BTL_OFI_TYPE_AFOP, - MCA_BTL_OFI_TYPE_CSWAP, - MCA_BTL_OFI_TYPE_TOTAL -}; - -struct mca_btl_ofi_context_t { - int32_t context_id; - - /* transmit context */ - struct fid_ep *tx_ctx; - struct fid_ep *rx_ctx; - - /* completion queue */ - struct fid_cq *cq; - - /* completion info freelist */ - /* We have it per context to reduce the thread contention - * on the freelist. Things can get really slow. */ - opal_free_list_t comp_list; - - /* for thread locking */ - volatile int32_t lock; -}; -typedef struct mca_btl_ofi_context_t mca_btl_ofi_context_t; - -/** - * @brief OFI BTL module - */ -struct mca_btl_ofi_module_t { - /** base BTL interface */ - mca_btl_base_module_t super; - - /* libfabric components */ - struct fi_info *fabric_info; - struct fid_fabric *fabric; - struct fid_domain *domain; - struct fid_ep *ofi_endpoint; - struct fid_av *av; - - int num_contexts; - mca_btl_ofi_context_t *contexts; - - char *linux_device_name; - - /** whether the module has been fully initialized or not */ - bool initialized; - bool use_virt_addr; - bool is_scalable_ep; - - int64_t outstanding_rdma; - - /** linked list of BTL endpoints. this list is never searched so - * there is no need for a complicated structure here at this time*/ - opal_list_t endpoints; - - opal_mutex_t module_lock; - - /** registration cache */ - mca_rcache_base_module_t *rcache; -}; -typedef struct mca_btl_ofi_module_t mca_btl_ofi_module_t; - -extern mca_btl_ofi_module_t mca_btl_ofi_module_template; - -/** - * @brief OFI BTL component - */ -struct mca_btl_ofi_component_t { - mca_btl_base_component_3_0_0_t super; /**< base BTL component */ - - /** number of TL modules */ - int module_count; - int num_contexts_per_module; - int num_cqe_read; - int progress_threshold; - - size_t namelen; - - /** All BTL OFI modules (1 per tl) */ - mca_btl_ofi_module_t *modules[MCA_BTL_OFI_MAX_MODULES]; - -}; -typedef struct mca_btl_ofi_component_t mca_btl_ofi_component_t; - -OPAL_MODULE_DECLSPEC extern mca_btl_ofi_component_t mca_btl_ofi_component; - -struct mca_btl_base_registration_handle_t { - uint64_t rkey; - void *desc; - void *base_addr; -}; - -struct mca_btl_ofi_reg_t { - mca_rcache_base_registration_t base; - struct fid_mr *ur_mr; - - /* remote handle */ - mca_btl_base_registration_handle_t handle; -}; -typedef struct mca_btl_ofi_reg_t mca_btl_ofi_reg_t; - -OBJ_CLASS_DECLARATION(mca_btl_ofi_reg_t); - -/* completion structure store information needed - * for RDMA callbacks */ -struct mca_btl_ofi_completion_t { - opal_free_list_item_t comp_list; - opal_free_list_t *my_list; - - struct mca_btl_base_module_t *btl; - struct mca_btl_base_endpoint_t *endpoint; - struct mca_btl_ofi_context_t *my_context; - uint32_t type; - - void *local_address; - mca_btl_base_registration_handle_t *local_handle; - - /* information for atomic op */ - uint64_t operand; - uint64_t compare; - - mca_btl_base_rdma_completion_fn_t cbfunc; - void *cbcontext; - void *cbdata; - -}; -typedef struct mca_btl_ofi_completion_t mca_btl_ofi_completion_t; - -OBJ_CLASS_DECLARATION(mca_btl_ofi_completion_t); - -/** - * Initiate an asynchronous put. - * Completion Semantics: if this function returns a 1 then the operation - * is complete. a return of OPAL_SUCCESS indicates - * the put operation has been queued with the - * network. the local_handle can not be deregistered - * until all outstanding operations on that handle - * have been completed. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param local_address (IN) Local address to put from (registered) - * @param remote_address (IN) Remote address to put to (registered remotely) - * @param local_handle (IN) Registration handle for region containing - * (local_address, local_address + size) - * @param remote_handle (IN) Remote registration handle for region containing - * (remote_address, remote_address + size) - * @param size (IN) Number of bytes to put - * @param flags (IN) Flags for this put operation - * @param order (IN) Ordering - * @param cbfunc (IN) Function to call on completion (if queued) - * @param cbcontext (IN) Context for the callback - * @param cbdata (IN) Data for callback - * - * @retval OPAL_SUCCESS The descriptor was successfully queued for a put - * @retval OPAL_ERROR The descriptor was NOT successfully queued for a put - * @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the put - * operation. Try again later - * @retval OPAL_ERR_NOT_AVAILABLE Put can not be performed due to size or - * alignment restrictions. - */ -int mca_btl_ofi_put (struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle, - struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); - -/** - * Initiate an asynchronous get. - * Completion Semantics: if this function returns a 1 then the operation - * is complete. a return of OPAL_SUCCESS indicates - * the get operation has been queued with the - * network. the local_handle can not be deregistered - * until all outstanding operations on that handle - * have been completed. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param local_address (IN) Local address to put from (registered) - * @param remote_address (IN) Remote address to put to (registered remotely) - * @param local_handle (IN) Registration handle for region containing - * (local_address, local_address + size) - * @param remote_handle (IN) Remote registration handle for region containing - * (remote_address, remote_address + size) - * @param size (IN) Number of bytes to put - * @param flags (IN) Flags for this put operation - * @param order (IN) Ordering - * @param cbfunc (IN) Function to call on completion (if queued) - * @param cbcontext (IN) Context for the callback - * @param cbdata (IN) Data for callback - * - * @retval OPAL_SUCCESS The descriptor was successfully queued for a put - * @retval OPAL_ERROR The descriptor was NOT successfully queued for a put - * @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the put - * operation. Try again later - * @retval OPAL_ERR_NOT_AVAILABLE Put can not be performed due to size or - * alignment restrictions. - */ -int mca_btl_ofi_get (struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle, - struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); - -int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, - mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order, - mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); - -int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, - uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, - void *cbcontext, void *cbdata); - -int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); - - -int mca_btl_ofi_flush (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint); - -int mca_btl_ofi_finalize (mca_btl_base_module_t *btl); - -void mca_btl_ofi_rcache_init (mca_btl_ofi_module_t *module); -int mca_btl_ofi_reg_mem (void *reg_data, void *base, size_t size, - mca_rcache_base_registration_t *reg); -int mca_btl_ofi_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg); - -int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context); -void mca_btl_ofi_exit(void); - -/* thread atomics */ -static inline bool mca_btl_ofi_context_trylock (mca_btl_ofi_context_t *context) -{ - return (context->lock || OPAL_ATOMIC_SWAP_32(&context->lock, 1)); -} - -static inline void mca_btl_ofi_context_lock(mca_btl_ofi_context_t *context) -{ - while (mca_btl_ofi_context_trylock(context)); -} - -static inline void mca_btl_ofi_context_unlock(mca_btl_ofi_context_t *context) -{ - opal_atomic_mb(); - context->lock = 0; -} - -END_C_DECLS -#endif diff --git a/opal/mca/btl/ofi/btl_ofi_atomics.c b/opal/mca/btl/ofi/btl_ofi_atomics.c deleted file mode 100644 index e5364ed6489..00000000000 --- a/opal/mca/btl/ofi/btl_ofi_atomics.c +++ /dev/null @@ -1,192 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc, All rights reserved - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include -#include "btl_ofi_rdma.h" - -static inline int to_fi_op(mca_btl_base_atomic_op_t op) -{ - switch (op) { - case MCA_BTL_ATOMIC_ADD: - return FI_SUM; - case MCA_BTL_ATOMIC_SWAP: - return FI_ATOMIC_WRITE; - default: - BTL_ERROR(("Unknown or unsupported atomic op.")); - MCA_BTL_OFI_ABORT(); - - /* just to squash the warning */ - return OPAL_ERROR; - } -} - -int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, - uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, - void *cbcontext, void *cbdata) -{ - int rc; - int fi_datatype = FI_UINT64; - int fi_op; - - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; - mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint; - mca_btl_ofi_completion_t *comp = NULL; - mca_btl_ofi_context_t *ofi_context; - - ofi_context = get_ofi_context(ofi_btl); - - if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) { - fi_datatype = FI_UINT32; - } - - fi_op = to_fi_op(op); - - comp = mca_btl_ofi_completion_alloc(btl, endpoint, - ofi_context, - local_address, - local_handle, - cbfunc, cbcontext, cbdata, - MCA_BTL_OFI_TYPE_AFOP); - - /* copy the operand because it might get freed from upper layer */ - comp->operand = (uint64_t) operand; - - remote_address = (remote_address - (uint64_t) remote_handle->base_addr); - - rc = fi_fetch_atomic(ofi_context->tx_ctx, - (void*) &comp->operand, 1, NULL, /* operand */ - local_address, local_handle->desc, /* results */ - btl_endpoint->peer_addr, /* remote addr */ - remote_address, remote_handle->rkey, /* remote buffer */ - fi_datatype, fi_op, comp); - - if (rc == -FI_EAGAIN) { - return OPAL_ERR_OUT_OF_RESOURCE; - } else if (rc < 0) { - BTL_ERROR(("fi_fetch_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); - MCA_BTL_OFI_ABORT(); - } - - MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl); - - return OPAL_SUCCESS; -} - -int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, - mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order, - mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) -{ - int rc; - int fi_datatype = FI_UINT64; - int fi_op; - - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; - mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint; - mca_btl_ofi_completion_t *comp = NULL; - mca_btl_ofi_context_t *ofi_context; - - ofi_context = get_ofi_context(ofi_btl); - - if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) { - fi_datatype = FI_UINT32; - } - - fi_op = to_fi_op(op); - - comp = mca_btl_ofi_completion_alloc(btl, endpoint, - ofi_context, - NULL, - NULL, - cbfunc, cbcontext, cbdata, - MCA_BTL_OFI_TYPE_AOP); - - /* copy the operand because it might get freed from upper layer */ - comp->operand = (uint64_t) operand; - - remote_address = (remote_address - (uint64_t) remote_handle->base_addr); - - rc = fi_atomic(ofi_context->tx_ctx, - (void*) &comp->operand, 1, NULL, /* operand */ - btl_endpoint->peer_addr, /* remote addr */ - remote_address, remote_handle->rkey, /* remote buffer */ - fi_datatype, fi_op, comp); - - if (rc == -FI_EAGAIN) { - return OPAL_ERR_OUT_OF_RESOURCE; - } else if (rc < 0) { - BTL_ERROR(("fi_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); - MCA_BTL_OFI_ABORT(); - } - - MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl); - - return OPAL_SUCCESS; -} - -int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) -{ - int rc; - int fi_datatype = FI_UINT64; - - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; - mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint; - mca_btl_ofi_completion_t *comp = NULL; - mca_btl_ofi_context_t *ofi_context; - - ofi_context = get_ofi_context(ofi_btl); - - if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) { - fi_datatype = FI_UINT32; - } - - comp = mca_btl_ofi_completion_alloc(btl, endpoint, - ofi_context, - local_address, - local_handle, - cbfunc, cbcontext, cbdata, - MCA_BTL_OFI_TYPE_CSWAP); - - /* copy the operand because it might get freed from upper layer */ - comp->operand = (uint64_t) value; - comp->compare = (uint64_t) compare; - - remote_address = (remote_address - (uint64_t) remote_handle->base_addr); - - /* perform atomic */ - rc = fi_compare_atomic(ofi_context->tx_ctx, - (void*) &comp->operand, 1, NULL, - (void*) &comp->compare, NULL, - local_address, local_handle->desc, - btl_endpoint->peer_addr, - remote_address, remote_handle->rkey, - fi_datatype, - FI_CSWAP, - comp); - - if (rc == -FI_EAGAIN) { - return OPAL_ERR_OUT_OF_RESOURCE; - } else if (rc < 0) { - BTL_ERROR(("fi_compare_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); - MCA_BTL_OFI_ABORT(); - } - - MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/btl/ofi/btl_ofi_component.c b/opal/mca/btl/ofi/btl_ofi_component.c deleted file mode 100644 index 1ee541afb36..00000000000 --- a/opal/mca/btl/ofi/btl_ofi_component.c +++ /dev/null @@ -1,681 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc, All rights reserved - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "opal_config.h" - -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "opal/mca/hwloc/base/base.h" - -#include - -#include "btl_ofi.h" -#include "btl_ofi_endpoint.h" -#include "btl_ofi_rdma.h" - -#define MCA_BTL_OFI_REQUIRED_CAPS (FI_RMA | FI_ATOMIC) -#define MCA_BTL_OFI_REQUESTED_MR_MODE (FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_VIRT_ADDR) - -static char *prov_include; -static char *prov_exclude; -static char *ofi_progress_mode; -static bool disable_sep; -static int mca_btl_ofi_init_device(struct fi_info *info); - -/* validate information returned from fi_getinfo(). - * return OPAL_ERROR if we dont have what we need. */ -static int validate_info(struct fi_info *info) -{ - int mr_mode; - - BTL_VERBOSE(("validating device: %s", info->domain_attr->name)); - - /* we need exactly all the required bits */ - if ((info->caps & MCA_BTL_OFI_REQUIRED_CAPS) != MCA_BTL_OFI_REQUIRED_CAPS) { - BTL_VERBOSE(("unsupported caps")); - return OPAL_ERROR; - } - - /* we need FI_EP_RDM */ - if (info->ep_attr->type != FI_EP_RDM) { - BTL_VERBOSE(("unsupported EP type")); - return OPAL_ERROR; - } - - mr_mode = info->domain_attr->mr_mode; - - if (!(mr_mode == FI_MR_BASIC || mr_mode == FI_MR_SCALABLE || - (mr_mode & ~(FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY)) == 0)) { - BTL_VERBOSE(("unsupported MR mode")); - return OPAL_ERROR; - } - - if (!(info->tx_attr->op_flags | FI_DELIVERY_COMPLETE)) { - BTL_VERBOSE(("the endpoint tx_ctx does not support FI_DELIVERY_COMPLETE")); - return OPAL_ERROR; - } - - BTL_VERBOSE(("device: %s is good to go.", info->domain_attr->name)); - return OPAL_SUCCESS; -} - -/* Register the MCA parameters */ -static int mca_btl_ofi_component_register(void) -{ - mca_btl_ofi_module_t *module = &mca_btl_ofi_module_template; - - /* fi_getinfo with prov_name == NULL means ALL provider. - * Since now we are using the first valid info returned, I'm not sure - * if we need to provide the support for comma limited provider list. */ - prov_include = NULL; - (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, - "provider_include", - "OFI provider that ofi btl will query for. This parameter only " - "accept ONE provider name. " - "(e.g., \"psm2\"; an empty value means that all providers will " - "be considered.", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_4, - MCA_BASE_VAR_SCOPE_READONLY, - &prov_include); - - /* TODO: this param has not been implemented. Not sure if we need it. " */ - prov_exclude = NULL; - (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, - "provider_exclude", - "Comma-delimited list of OFI providers that are not considered for use " - "(default: \"sockets,mxm\"; empty value means that all providers will " - " be considered). " - "Mutually exclusive with btl_ofi_provider_include.", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_4, - MCA_BASE_VAR_SCOPE_READONLY, - &prov_exclude); - - mca_btl_ofi_component.num_cqe_read = MCA_BTL_OFI_NUM_CQE_READ; - (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, - "num_cq_read", - "Number of completion entries to read from a single cq_read. ", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_ofi_component.num_cqe_read); - - ofi_progress_mode = "unspec"; - (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, - "progress_mode", - "requested provider progress mode. [unspec, auto, manual]" - "(default: unspec)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &ofi_progress_mode); - - mca_btl_ofi_component.num_contexts_per_module = 1; - (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, - "num_contexts_per_module", - "number of communication context per module to create. " - "This should increase multithreaded performance but it is " - "advised that this number should be lower than total cores.", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_ofi_component.num_contexts_per_module); - - disable_sep = false; - (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, - "disable_sep", - "force btl/ofi to never use scalable endpoint. ", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &disable_sep); - - mca_btl_ofi_component.progress_threshold = MCA_BTL_OFI_PROGRESS_THRESHOLD; - (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, - "progress_threshold", - "number of outstanding operation before btl will progress " - "automatically. Tuning this might improve performance on " - "certain type of application.", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_ofi_component.progress_threshold); - - /* for now we want this component to lose to btl/ugni and btl/vader */ - module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 50; - - return mca_btl_base_param_register (&mca_btl_ofi_component.super.btl_version, - &module->super); -} - -static int mca_btl_ofi_component_open(void) -{ - mca_btl_ofi_component.module_count = 0; - return OPAL_SUCCESS; -} - -/* - * component cleanup - sanity checking of queue lengths - */ -static int mca_btl_ofi_component_close(void) -{ - /* If we don't sleep, sockets provider freaks out. */ - sleep(1); - return OPAL_SUCCESS; -} - -void mca_btl_ofi_exit(void) -{ - BTL_ERROR(("BTL OFI will now abort.")); - exit(1); -} - -/* - * OFI component initialization: - * read interface list from kernel and compare against component parameters - * then create a BTL instance for selected interfaces - */ - -static mca_btl_base_module_t **mca_btl_ofi_component_init (int *num_btl_modules, bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* for this BTL to be useful the interface needs to support RDMA and certain atomic operations */ - int rc; - uint64_t progress_mode; - unsigned resource_count = 0; - struct mca_btl_base_module_t **base_modules; - - BTL_VERBOSE(("initializing ofi btl")); - - /* Set up libfabric hints. */ - uint32_t libfabric_api; - libfabric_api = fi_version(); - - /* bail if OFI version is less than 1.5. */ - if (libfabric_api < FI_VERSION(1, 5)) { - BTL_VERBOSE(("ofi btl disqualified because OFI version < 1.5.")); - return NULL; - } - - struct fi_info *info, *info_list; - struct fi_info hints = {0}; - struct fi_ep_attr ep_attr = {0}; - struct fi_rx_attr rx_attr = {0}; - struct fi_tx_attr tx_attr = {0}; - struct fi_fabric_attr fabric_attr = {0}; - struct fi_domain_attr domain_attr = {0}; - - /* Select the provider */ - fabric_attr.prov_name = prov_include; - - domain_attr.mr_mode = MCA_BTL_OFI_REQUESTED_MR_MODE; - - /* message progression mode. */ - if (!strcmp(ofi_progress_mode, "auto")) { - progress_mode = FI_PROGRESS_AUTO; - } else if (!strcmp(ofi_progress_mode, "manual")) { - progress_mode = FI_PROGRESS_MANUAL; - } else { - progress_mode = FI_PROGRESS_UNSPEC; - } - - domain_attr.control_progress = progress_mode; - domain_attr.data_progress = progress_mode; - - /* select endpoint type */ - ep_attr.type = FI_EP_RDM; - - /* ask for capabilities */ - hints.caps = MCA_BTL_OFI_REQUIRED_CAPS; - - /* Ask for completion context */ - hints.mode = FI_CONTEXT; - - hints.fabric_attr = &fabric_attr; - hints.domain_attr = &domain_attr; - hints.ep_attr = &ep_attr; - hints.tx_attr = &tx_attr; - hints.rx_attr = &rx_attr; - - /* for now */ - tx_attr.iov_limit = 1; - rx_attr.iov_limit = 1; - - tx_attr.op_flags = FI_DELIVERY_COMPLETE; - - mca_btl_ofi_component.module_count = 0; - - /* do the query. */ - rc = fi_getinfo(FI_VERSION(1, 5), NULL, NULL, 0, &hints, &info_list); - if (0 != rc) { - BTL_VERBOSE(("fi_getinfo failed with code %d: %s",rc, fi_strerror(-rc))); - return NULL; - } - - /* count the number of resources/ */ - info = info_list; - while(info) { - resource_count++; - info = info->next; - } - BTL_VERBOSE(("ofi btl found %d possible resources.", resource_count)); - - info = info_list; - - while(info) { - rc = validate_info(info); - if (OPAL_SUCCESS == rc) { - /* Device passed sanity check, let's make a module. - * We only pick the first device we found valid */ - rc = mca_btl_ofi_init_device(info); - if (OPAL_SUCCESS == rc) - break; - } - info = info->next; - } - - /* We are done with the returned info. */ - fi_freeinfo(info_list); - - /* pass module array back to caller */ - base_modules = calloc (mca_btl_ofi_component.module_count, sizeof (*base_modules)); - if (NULL == base_modules) { - return NULL; - } - - memcpy(base_modules, mca_btl_ofi_component.modules, - mca_btl_ofi_component.module_count *sizeof (mca_btl_ofi_component.modules[0])); - - BTL_VERBOSE(("ofi btl initialization complete. found %d suitable transports", - mca_btl_ofi_component.module_count)); - - *num_btl_modules = mca_btl_ofi_component.module_count; - - return base_modules; -} - -static int mca_btl_ofi_init_device(struct fi_info *info) -{ - int rc; - int *module_count = &mca_btl_ofi_component.module_count; - size_t namelen; - size_t num_contexts_to_create; - - char *linux_device_name; - char ep_name[FI_NAME_MAX]; - - struct fi_info *ofi_info; - struct fi_ep_attr *ep_attr; - struct fi_domain_attr *domain_attr; - struct fi_av_attr av_attr = {0}; - struct fid_fabric *fabric = NULL; - struct fid_domain *domain = NULL; - struct fid_ep *ep = NULL; - struct fid_av *av = NULL; - - mca_btl_ofi_module_t *module; - - /* allocate module */ - module = (mca_btl_ofi_module_t*) calloc(1, sizeof(mca_btl_ofi_module_t)); - if (NULL == module) { - BTL_ERROR(("failed to allocate memory for OFI module")); - goto fail; - } - *module = mca_btl_ofi_module_template; - - /* make a copy of the given info to store on the module */ - ofi_info = fi_dupinfo(info); - ep_attr = ofi_info->ep_attr; - domain_attr = ofi_info->domain_attr; - - linux_device_name = info->domain_attr->name; - BTL_VERBOSE(("initializing dev:%s provider:%s", - linux_device_name, - info->fabric_attr->prov_name)); - - /* fabric */ - rc = fi_fabric(ofi_info->fabric_attr, &fabric, NULL); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_fabric with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto fail; - } - - /* domain */ - rc = fi_domain(fabric, ofi_info, &domain, NULL); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_domain with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto fail; - } - - /* AV */ - av_attr.type = FI_AV_MAP; - rc = fi_av_open(domain, &av_attr, &av, NULL); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_av_open with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto fail; - } - - num_contexts_to_create = mca_btl_ofi_component.num_contexts_per_module; - - /* If the domain support scalable endpoint. */ - if (domain_attr->max_ep_tx_ctx > 1 && !disable_sep) { - - BTL_VERBOSE(("btl/ofi using scalable endpoint.")); - - if (num_contexts_to_create > domain_attr->max_ep_tx_ctx) { - BTL_VERBOSE(("cannot create requested %u contexts. (node max=%zu)", - module->num_contexts, - domain_attr->max_ep_tx_ctx)); - goto fail; - } - - /* modify the info to let the provider know we are creating x contexts */ - ep_attr->tx_ctx_cnt = num_contexts_to_create; - ep_attr->rx_ctx_cnt = num_contexts_to_create; - - /* create scalable endpoint */ - rc = fi_scalable_ep(domain, ofi_info, &ep, NULL); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_scalable_ep with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto fail; - } - - module->num_contexts = num_contexts_to_create; - module->is_scalable_ep = true; - - /* create contexts */ - module->contexts = mca_btl_ofi_context_alloc_scalable(ofi_info, - domain, ep, av, - num_contexts_to_create); - - } else { - /* warn the user if they want more than 1 context */ - if (num_contexts_to_create > 1) { - BTL_ERROR(("cannot create %zu contexts as the provider does not support " - "scalable endpoint. Falling back to single context endpoint.", - num_contexts_to_create)); - } - - BTL_VERBOSE(("btl/ofi using normal endpoint.")); - - rc = fi_endpoint(domain, ofi_info, &ep, NULL); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_endpoint with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto fail; - } - - module->num_contexts = 1; - module->is_scalable_ep = false; - - /* create contexts */ - module->contexts = mca_btl_ofi_context_alloc_normal(ofi_info, - domain, ep, av); - } - - if (NULL == module->contexts) { - /* error message is already printed */ - goto fail; - } - - /* enable the endpoint for using */ - rc = fi_enable(ep); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_enable with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto fail; - } - - /* Everything succeeded, lets create a module for this device. */ - /* store the information. */ - module->fabric_info = ofi_info; - module->fabric = fabric; - module->domain = domain; - module->av = av; - module->ofi_endpoint = ep; - module->linux_device_name = linux_device_name; - module->outstanding_rdma = 0; - module->use_virt_addr = false; - - if (ofi_info->domain_attr->mr_mode == FI_MR_BASIC || - ofi_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) { - module->use_virt_addr = true; - } - - /* initialize the rcache */ - mca_btl_ofi_rcache_init(module); - - /* create endpoint list */ - OBJ_CONSTRUCT(&module->endpoints, opal_list_t); - OBJ_CONSTRUCT(&module->module_lock, opal_mutex_t); - - /* create and send the modex for this device */ - namelen = sizeof(ep_name); - rc = fi_getname((fid_t)ep, &ep_name[0], &namelen); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_getname with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto fail; - } - - /* post our endpoint name so peer can use it to connect to us */ - OPAL_MODEX_SEND(rc, - OPAL_PMIX_GLOBAL, - &mca_btl_ofi_component.super.btl_version, - &ep_name, - namelen); - mca_btl_ofi_component.namelen = namelen; - - /* add this module to the list */ - mca_btl_ofi_component.modules[(*module_count)++] = module; - - return OPAL_SUCCESS; - -fail: - /* clean up */ - - /* if the contexts have not been initiated, num_contexts should - * be zero and we skip this. */ - for (int i=0; i < module->num_contexts; i++) { - mca_btl_ofi_context_finalize(&module->contexts[i], module->is_scalable_ep); - } - free(module->contexts); - - if (NULL != av) { - fi_close(&av->fid); - } - - if (NULL != ep) { - fi_close(&ep->fid); - } - - if (NULL != domain) { - fi_close(&domain->fid); - } - - if (NULL != fabric) { - fi_close(&fabric->fid); - } - free(module); - - /* not really a failure. just skip this device. */ - return OPAL_ERR_OUT_OF_RESOURCE; -} - -/** - * @brief OFI BTL progress function - * - * This function explictly progresses all workers. - */ -static int mca_btl_ofi_component_progress (void) -{ - int events = 0; - mca_btl_ofi_context_t *context; - - for (int i = 0 ; i < mca_btl_ofi_component.module_count ; ++i) { - mca_btl_ofi_module_t *module = mca_btl_ofi_component.modules[i]; - - /* progress context we own first. */ - context = get_ofi_context(module); - - if (mca_btl_ofi_context_trylock(context)) { - events += mca_btl_ofi_context_progress(context); - mca_btl_ofi_context_unlock(context); - } - - /* if there is nothing to do, try progress other's. */ - if (events == 0) { - for (int j = 0 ; j < module->num_contexts ; j++ ) { - - context = get_ofi_context_rr(module); - - if (mca_btl_ofi_context_trylock(context)) { - events += mca_btl_ofi_context_progress(context); - mca_btl_ofi_context_unlock(context); - } - - /* If we did something, good enough. return now. - * This is crucial for performance/latency. */ - if (events > 0) { - break; - } - } - } - } - - return events; -} - -int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) { - - int ret = 0; - int events_read; - int events = 0; - struct fi_cq_entry cq_entry[MCA_BTL_OFI_MAX_CQ_READ_ENTRIES]; - struct fi_cq_err_entry cqerr = {0}; - - mca_btl_ofi_completion_t *comp; - - ret = fi_cq_read(context->cq, &cq_entry, mca_btl_ofi_component.num_cqe_read); - - if (0 < ret) { - events_read = ret; - for (int i = 0; i < events_read; i++) { - if (NULL != cq_entry[i].op_context) { - ++events; - comp = (mca_btl_ofi_completion_t*) cq_entry[i].op_context; - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t*)comp->btl; - - switch (comp->type) { - case MCA_BTL_OFI_TYPE_GET: - case MCA_BTL_OFI_TYPE_PUT: - case MCA_BTL_OFI_TYPE_AOP: - case MCA_BTL_OFI_TYPE_AFOP: - case MCA_BTL_OFI_TYPE_CSWAP: - - /* call the callback */ - if (comp->cbfunc) { - comp->cbfunc (comp->btl, comp->endpoint, - comp->local_address, comp->local_handle, - comp->cbcontext, comp->cbdata, OPAL_SUCCESS); - } - - /* return the completion handler */ - opal_free_list_return(comp->my_list, (opal_free_list_item_t*) comp); - - MCA_BTL_OFI_NUM_RDMA_DEC(ofi_btl); - break; - - default: - /* catasthrophic */ - BTL_ERROR(("unknown completion type")); - MCA_BTL_OFI_ABORT(); - } - } - } - } else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) { - ret = fi_cq_readerr(context->cq, &cqerr, 0); - - /* cq readerr failed!? */ - if (0 > ret) { - BTL_ERROR(("%s:%d: Error returned from fi_cq_readerr: %s(%d)", - __FILE__, __LINE__, fi_strerror(-ret), ret)); - } else { - BTL_ERROR(("fi_cq_readerr: (provider err_code = %d)\n", - cqerr.prov_errno)); - } - MCA_BTL_OFI_ABORT(); - } -#ifdef FI_EINTR - /* sometimes, sockets provider complain about interupt. We do nothing. */ - else if (OPAL_UNLIKELY(ret == -FI_EINTR)) { - - } -#endif - /* If the error is not FI_EAGAIN, report the error and abort. */ - else if (OPAL_UNLIKELY(ret != -FI_EAGAIN)) { - BTL_ERROR(("fi_cq_read returned error %d:%s", ret, fi_strerror(-ret))); - MCA_BTL_OFI_ABORT(); - } - - return events; -} - -/** OFI btl component */ -mca_btl_ofi_component_t mca_btl_ofi_component = { - .super = { - .btl_version = { - MCA_BTL_DEFAULT_VERSION("ofi"), - .mca_open_component = mca_btl_ofi_component_open, - .mca_close_component = mca_btl_ofi_component_close, - .mca_register_component_params = mca_btl_ofi_component_register, - }, - .btl_data = { - /* The component is not checkpoint ready */ - .param_field = MCA_BASE_METADATA_PARAM_NONE - }, - - .btl_init = mca_btl_ofi_component_init, - .btl_progress = mca_btl_ofi_component_progress, - }, -}; diff --git a/opal/mca/btl/ofi/btl_ofi_endpoint.c b/opal/mca/btl/ofi/btl_ofi_endpoint.c deleted file mode 100644 index 0ef91a9b6ff..00000000000 --- a/opal/mca/btl/ofi/btl_ofi_endpoint.c +++ /dev/null @@ -1,343 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc, All rights reserved - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "btl_ofi.h" -#include "btl_ofi_endpoint.h" -#include "opal/util/proc.h" - -#if OPAL_HAVE_THREAD_LOCAL -opal_thread_local mca_btl_ofi_context_t *my_context = NULL; -#endif /* OPAL_HAVE_THREAD_LOCAL */ - -static void mca_btl_ofi_endpoint_construct (mca_btl_ofi_endpoint_t *endpoint) -{ - endpoint->peer_addr = 0; - OBJ_CONSTRUCT(&endpoint->ep_lock, opal_mutex_t); -} - -static void mca_btl_ofi_endpoint_destruct (mca_btl_ofi_endpoint_t *endpoint) -{ - endpoint->peer_addr = 0; - - /* set to null, we will free ofi endpoint in module */ - endpoint->ofi_endpoint = NULL; - - OBJ_DESTRUCT(&endpoint->ep_lock); -} - -OBJ_CLASS_INSTANCE(mca_btl_ofi_endpoint_t, opal_list_item_t, - mca_btl_ofi_endpoint_construct, - mca_btl_ofi_endpoint_destruct); - -mca_btl_base_endpoint_t *mca_btl_ofi_endpoint_create (opal_proc_t *proc, struct fid_ep *ep) -{ - mca_btl_ofi_endpoint_t *endpoint = OBJ_NEW(mca_btl_ofi_endpoint_t); - - if (OPAL_UNLIKELY(NULL == endpoint)) { - return NULL; - } - - endpoint->ep_proc = proc; - endpoint->ofi_endpoint = ep; - - return (mca_btl_base_endpoint_t *) endpoint; -} - -int ofi_comp_list_init(opal_free_list_t *comp_list) -{ - int rc; - OBJ_CONSTRUCT(comp_list, opal_free_list_t); - rc = opal_free_list_init(comp_list, - sizeof(mca_btl_ofi_completion_t), - opal_cache_line_size, - OBJ_CLASS(mca_btl_ofi_completion_t), - 0, - 0, - 128, - -1, - 128, - NULL, - 0, - NULL, - NULL, - NULL); - if (rc != OPAL_SUCCESS) { - BTL_VERBOSE(("cannot allocate completion freelist")); - } - return rc; -} - -/* mca_btl_ofi_context_alloc_normal() - * - * This function will allocate an ofi_context, map the endpoint to tx/rx context, - * bind CQ,AV to the endpoint and initialize all the structure. - * USE WITH NORMAL ENDPOINT ONLY */ -mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_normal(struct fi_info *info, - struct fid_domain *domain, - struct fid_ep *ep, - struct fid_av *av) -{ - int rc; - uint32_t cq_flags = FI_TRANSMIT; - char *linux_device_name = info->domain_attr->name; - - struct fi_cq_attr cq_attr = {0}; - - mca_btl_ofi_context_t *context; - - context = (mca_btl_ofi_context_t*) calloc(1, sizeof(*context)); - if (NULL == context) { - BTL_VERBOSE(("cannot allocate context")); - return NULL; - } - - /* Don't really need to check, just avoiding compiler warning because - * BTL_VERBOSE is a no op in performance build and the compiler will - * complain about unused variable. */ - if (NULL == linux_device_name) { - BTL_VERBOSE(("linux device name is NULL. This shouldn't happen.")); - goto single_fail; - } - - cq_attr.format = FI_CQ_FORMAT_CONTEXT; - cq_attr.wait_obj = FI_WAIT_NONE; - rc = fi_cq_open(domain, &cq_attr, &context->cq, NULL); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_cq_open with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto single_fail; - } - - rc = fi_ep_bind(ep, (fid_t)av, 0); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_ep_bind with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto single_fail; - } - - rc = fi_ep_bind(ep, (fid_t)context->cq, cq_flags); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_scalable_ep_bind with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto single_fail; - } - - rc = ofi_comp_list_init(&context->comp_list); - if (rc != OPAL_SUCCESS) { - goto single_fail; - } - - context->tx_ctx = ep; - context->rx_ctx = ep; - context->context_id = 0; - - return context; - -single_fail: - mca_btl_ofi_context_finalize(context, false); - return NULL; -} - -/* mca_btl_ofi_context_alloc_scalable() - * - * This function allocate communication contexts and return the pointer - * to the first btl context. It also take care of all the bindings needed. - * USE WITH SCALABLE ENDPOINT ONLY */ -mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_scalable(struct fi_info *info, - struct fid_domain *domain, - struct fid_ep *sep, - struct fid_av *av, - size_t num_contexts) -{ - BTL_VERBOSE(("creating %zu contexts", num_contexts)); - - int rc; - size_t i; - char *linux_device_name = info->domain_attr->name; - - struct fi_cq_attr cq_attr = {0}; - struct fi_tx_attr tx_attr = {0}; - struct fi_rx_attr rx_attr = {0}; - - mca_btl_ofi_context_t *contexts; - tx_attr.op_flags = FI_DELIVERY_COMPLETE; - - contexts = (mca_btl_ofi_context_t*) calloc(num_contexts, sizeof(*contexts)); - if (NULL == contexts) { - BTL_VERBOSE(("cannot allocate communication contexts.")); - return NULL; - } - - /* Don't really need to check, just avoiding compiler warning because - * BTL_VERBOSE is a no op in performance build and the compiler will - * complain about unused variable. */ - if (NULL == linux_device_name) { - BTL_VERBOSE(("linux device name is NULL. This shouldn't happen.")); - goto scalable_fail; - } - - /* bind AV to endpoint */ - rc = fi_scalable_ep_bind(sep, (fid_t)av, 0); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_scalable_ep_bind with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto scalable_fail; - } - - for (i=0; i < num_contexts; i++) { - rc = fi_tx_context(sep, i, &tx_attr, &contexts[i].tx_ctx, NULL); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_tx_context with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto scalable_fail; - } - - /* We don't actually need a receiving context as we only do one-sided. - * However, sockets provider will hang if we dont have one. It is - * also nice to have equal number of tx/rx context. */ - rc = fi_rx_context(sep, i, &rx_attr, &contexts[i].rx_ctx, NULL); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_rx_context with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto scalable_fail; - } - - /* create CQ */ - cq_attr.format = FI_CQ_FORMAT_CONTEXT; - cq_attr.wait_obj = FI_WAIT_NONE; - rc = fi_cq_open(domain, &cq_attr, &contexts[i].cq, NULL); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_cq_open with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto scalable_fail; - } - - /* bind cq to transmit context */ - uint32_t cq_flags = (FI_TRANSMIT); - rc = fi_ep_bind(contexts[i].tx_ctx, (fid_t)contexts[i].cq, cq_flags); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_ep_bind with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto scalable_fail; - } - - /* enable the context. */ - rc = fi_enable(contexts[i].tx_ctx); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_enable with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto scalable_fail; - } - - rc = fi_enable(contexts[i].rx_ctx); - if (0 != rc) { - BTL_VERBOSE(("%s failed fi_enable with err=%s", - linux_device_name, - fi_strerror(-rc) - )); - goto scalable_fail; - } - - /* initialize completion freelist. */ - rc = ofi_comp_list_init(&contexts[i].comp_list); - if (rc != OPAL_SUCCESS) { - goto scalable_fail; - } - - /* assign the id */ - contexts[i].context_id = i; - } - - return contexts; - -scalable_fail: - /* close and free */ - for(i=0; i < num_contexts; i++) { - mca_btl_ofi_context_finalize(&contexts[i], true); - } - free(contexts); - - return NULL; -} - -void mca_btl_ofi_context_finalize(mca_btl_ofi_context_t *context, bool scalable_ep) { - - /* if it is a scalable ep, we have to close all contexts. */ - if (scalable_ep) { - if (NULL != context->tx_ctx) { - fi_close(&context->tx_ctx->fid); - } - - if (NULL != context->rx_ctx) { - fi_close(&context->rx_ctx->fid); - } - } - - if( NULL != context->cq) { - fi_close(&context->cq->fid); - } - - /* Can we destruct the object that hasn't been constructed? */ - OBJ_DESTRUCT(&context->comp_list); -} - -/* Get a context to use for communication. - * If TLS is supported, it will use the cached endpoint. - * If not, it will invoke the normal round-robin assignment. */ -mca_btl_ofi_context_t *get_ofi_context(mca_btl_ofi_module_t *btl) -{ -#if OPAL_HAVE_THREAD_LOCAL - /* With TLS, we cache the context we use. */ - static volatile int64_t cur_num = 0; - - if (OPAL_UNLIKELY(my_context == NULL)) { - OPAL_THREAD_LOCK(&btl->module_lock); - - my_context = &btl->contexts[cur_num]; - cur_num = (cur_num + 1) %btl->num_contexts; - - OPAL_THREAD_UNLOCK(&btl->module_lock); - } - - assert (my_context); - return my_context; -#else - return get_ofi_context_rr(btl); -#endif -} - -/* return the context in a round-robin. */ -/* There is no need for atomics here as it might hurt the performance. */ -mca_btl_ofi_context_t *get_ofi_context_rr(mca_btl_ofi_module_t *btl) -{ - static volatile uint64_t rr_num = 0; - return &btl->contexts[rr_num++%btl->num_contexts]; -} diff --git a/opal/mca/btl/ofi/btl_ofi_endpoint.h b/opal/mca/btl/ofi/btl_ofi_endpoint.h deleted file mode 100644 index aad758d8c85..00000000000 --- a/opal/mca/btl/ofi/btl_ofi_endpoint.h +++ /dev/null @@ -1,75 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2017-2018 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc, All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_OFI_ENDPOINT_H -#define MCA_BTL_OFI_ENDPOINT_H - -#include "opal/class/opal_list.h" -#include "opal/mca/event/event.h" - -#include "btl_ofi.h" - -BEGIN_C_DECLS - -#if OPAL_HAVE_THREAD_LOCAL -extern opal_thread_local mca_btl_ofi_context_t *my_context; -#endif /* OPAL_HAVE_THREAD_LOCAL */ - -struct mca_btl_base_endpoint_t { - opal_list_item_t super; - - struct fid_ep *ofi_endpoint; - fi_addr_t peer_addr; - - /** endpoint proc */ - opal_proc_t *ep_proc; - - /** mutex to protect this structure */ - opal_mutex_t ep_lock; -}; - -typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t; -typedef mca_btl_base_endpoint_t mca_btl_ofi_endpoint_t; -OBJ_CLASS_DECLARATION(mca_btl_ofi_endpoint_t); - -int ofi_comp_list_init(opal_free_list_t *comp_list); - -mca_btl_base_endpoint_t *mca_btl_ofi_endpoint_create (opal_proc_t *proc, struct fid_ep *ep); - -/* contexts */ -mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_scalable(struct fi_info *info, - struct fid_domain *domain, - struct fid_ep *sep, - struct fid_av *av, - size_t num_contexts); - -mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_normal(struct fi_info *info, - struct fid_domain *domain, - struct fid_ep *ep, - struct fid_av *av); -void mca_btl_ofi_context_finalize(mca_btl_ofi_context_t *context, bool scalable_ep); - -mca_btl_ofi_context_t *get_ofi_context(mca_btl_ofi_module_t *btl); -mca_btl_ofi_context_t *get_ofi_context_rr(mca_btl_ofi_module_t *btl); - -END_C_DECLS -#endif diff --git a/opal/mca/btl/ofi/btl_ofi_module.c b/opal/mca/btl/ofi/btl_ofi_module.c deleted file mode 100644 index df6ae1e2e1a..00000000000 --- a/opal/mca/btl/ofi/btl_ofi_module.c +++ /dev/null @@ -1,329 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc, All rights reserved - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#include -#include "opal/class/opal_bitmap.h" -#include "opal/mca/btl/btl.h" -#include "opal/datatype/opal_convertor.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/mpool/mpool.h" - -#include "btl_ofi.h" -#include "btl_ofi_endpoint.h" - -static int mca_btl_ofi_add_procs (mca_btl_base_module_t *btl, - size_t nprocs, opal_proc_t **opal_procs, - mca_btl_base_endpoint_t **peers, - opal_bitmap_t *reachable) -{ - int rc; - int count; - char *ep_name = NULL; - size_t namelen = mca_btl_ofi_component.namelen; - - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; - - for (size_t i = 0 ; i < nprocs ; ++i) { - peers[i] = mca_btl_ofi_endpoint_create (opal_procs[i], ofi_btl->ofi_endpoint); - if (OPAL_UNLIKELY(NULL == peers[i])) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - OPAL_MODEX_RECV(rc, &mca_btl_ofi_component.super.btl_version, - &peers[i]->ep_proc->proc_name, (void **)&ep_name, &namelen); - if (OPAL_SUCCESS != rc) { - BTL_ERROR(("error receiving modex")); - MCA_BTL_OFI_ABORT(); - } - - /* get peer fi_addr */ - count = fi_av_insert(ofi_btl->av, /* Address vector to insert */ - ep_name, /* peer name */ - 1, /* amount to insert */ - &peers[i]->peer_addr, /* return peer address here */ - 0, /* flags */ - NULL); /* context */ - - /* if succeed, add this proc and mark reachable */ - if (count == 1) { /* we inserted 1 address. */ - opal_list_append (&ofi_btl->endpoints, &peers[i]->super); - opal_bitmap_set_bit(reachable, i); - } else { - BTL_VERBOSE(("fi_av_insert failed with rc = %d", count)); - MCA_BTL_OFI_ABORT(); - } - } - - return OPAL_SUCCESS; -} - -static int mca_btl_ofi_del_procs (mca_btl_base_module_t *btl, size_t nprocs, - opal_proc_t **procs, mca_btl_base_endpoint_t **peers) -{ - int ret; - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; - - for (size_t i = 0 ; i < nprocs ; ++i) { - if (peers[i]) { - - /* remove the address from AV. */ - ret = fi_av_remove(ofi_btl->av, &peers[i]->peer_addr, 1, 0); - if (ret < 0) { - /* remove failed. this should not happen. */ - /* Lets not crash because we failed to remove an address. */ - BTL_ERROR(("fi_av_remove failed with error %d:%s", - ret, fi_strerror(-ret))); - } - - /* remove and free MPI endpoint from the list. */ - opal_list_remove_item (&ofi_btl->endpoints, &peers[i]->super); - OBJ_RELEASE(peers[i]); - } - } - - return OPAL_SUCCESS; -} - -void mca_btl_ofi_rcache_init (mca_btl_ofi_module_t *module) -{ - if (!module->initialized) { - mca_rcache_base_resources_t rcache_resources; - char *tmp; - - (void) asprintf (&tmp, "ofi.%s", module->linux_device_name); - - rcache_resources.cache_name = tmp; - rcache_resources.reg_data = (void *) module; - rcache_resources.sizeof_reg = sizeof (mca_btl_ofi_reg_t); - rcache_resources.register_mem = mca_btl_ofi_reg_mem; - rcache_resources.deregister_mem = mca_btl_ofi_dereg_mem; - - module->rcache = mca_rcache_base_module_create ("grdma", module, &rcache_resources); - free (tmp); - - if (NULL == module->rcache) { - /* something when horribly wrong */ - BTL_ERROR(("cannot create rcache")); - MCA_BTL_OFI_ABORT(); - } - - module->initialized = true; - } -} - - -/** - * @brief Register a memory region for put/get/atomic operations. - * - * @param btl (IN) BTL module - * @param endpoint(IN) BTL addressing information (or NULL for all endpoints) - * @param base (IN) Pointer to start of region - * @param size (IN) Size of region - * @param flags (IN) Flags indicating what operation will be performed. Valid - * values are MCA_BTL_DES_FLAGS_PUT, MCA_BTL_DES_FLAGS_GET, - * and MCA_BTL_DES_FLAGS_ATOMIC - * - * @returns a memory registration handle valid for both local and remote operations - * @returns NULL if the region could not be registered - * - * This function registers the specified region with the hardware for use with - * the btl_put, btl_get, btl_atomic_cas, btl_atomic_op, and btl_atomic_fop - * functions. Care should be taken to not hold an excessive number of registrations - * as they may use limited system/NIC resources. - */ -static struct mca_btl_base_registration_handle_t * -mca_btl_ofi_register_mem (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *base, - size_t size, uint32_t flags) -{ - mca_btl_ofi_module_t *ofi_module = (mca_btl_ofi_module_t *) btl; - mca_btl_ofi_reg_t *reg; - int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; - int rc; - - rc = ofi_module->rcache->rcache_register (ofi_module->rcache, base, size, 0, access_flags, - (mca_rcache_base_registration_t **) ®); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - return NULL; - } - - return ®->handle; -} - -/** - * @brief Deregister a memory region - * - * @param btl (IN) BTL module region was registered with - * @param handle (IN) BTL registration handle to deregister - * - * This function deregisters the memory region associated with the specified handle. Care - * should be taken to not perform any RDMA or atomic operation on this memory region - * after it is deregistered. It is erroneous to specify a memory handle associated with - * a remote node. - */ -static int mca_btl_ofi_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle) -{ - mca_btl_ofi_module_t *ofi_module = (mca_btl_ofi_module_t *) btl; - mca_btl_ofi_reg_t *reg = - (mca_btl_ofi_reg_t *)((intptr_t) handle - offsetof (mca_btl_ofi_reg_t, handle)); - - (void) ofi_module->rcache->rcache_deregister (ofi_module->rcache, ®->base); - - return OPAL_SUCCESS; -} - -int mca_btl_ofi_reg_mem (void *reg_data, void *base, size_t size, mca_rcache_base_registration_t *reg) -{ - int rc; - static uint64_t access_flags = FI_REMOTE_WRITE | FI_REMOTE_READ | FI_READ | FI_WRITE; - - mca_btl_ofi_module_t *btl = (mca_btl_ofi_module_t*) reg_data; - mca_btl_ofi_reg_t *ur = (mca_btl_ofi_reg_t*) reg; - - rc = fi_mr_reg(btl->domain, base, size, access_flags, 0, - (uint64_t) reg, 0, &ur->ur_mr, NULL); - if (0 != rc) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - ur->handle.rkey = fi_mr_key(ur->ur_mr); - ur->handle.desc = fi_mr_desc(ur->ur_mr); - - /* In case the provider doesn't support FI_MR_VIRT_ADDR, - * we need to reference the remote address by the distance from base registered - * address. We keep this information to use in rdma/atomic operations. */ - if (btl->use_virt_addr) { - ur->handle.base_addr = 0; - } else { - ur->handle.base_addr = base; - } - - return OPAL_SUCCESS; -} - -int mca_btl_ofi_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg) -{ - mca_btl_ofi_reg_t *ur = (mca_btl_ofi_reg_t*)reg; - - if (ur->ur_mr != NULL) { - if (0 != fi_close(&ur->ur_mr->fid)) { - BTL_ERROR(("%s: error unpinning memory mr=%p: %s", - __func__, (void*) ur->ur_mr, strerror(errno))); - return OPAL_ERROR; - } - } - - return OPAL_SUCCESS; -} - -/* - * Cleanup/release module resources. - */ - -int mca_btl_ofi_finalize (mca_btl_base_module_t* btl) -{ - int i; - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; - mca_btl_ofi_endpoint_t *endpoint, *next; - - assert(btl); - - /* loop over all the contexts */ - for (i=0; i < ofi_btl->num_contexts; i++) { - mca_btl_ofi_context_finalize(&ofi_btl->contexts[i], ofi_btl->is_scalable_ep); - } - free(ofi_btl->contexts); - - if (NULL != ofi_btl->av) { - fi_close(&ofi_btl->av->fid); - } - - if (NULL != ofi_btl->ofi_endpoint) { - fi_close(&ofi_btl->ofi_endpoint->fid); - } - - if (NULL != ofi_btl->domain) { - fi_close(&ofi_btl->domain->fid); - } - - if (NULL != ofi_btl->fabric) { - fi_close(&ofi_btl->fabric->fid); - } - - if (NULL != ofi_btl->fabric_info) { - fi_freeinfo(ofi_btl->fabric_info); - } - - /* clean up any leftover endpoints */ - OPAL_LIST_FOREACH_SAFE(endpoint, next, &ofi_btl->endpoints, mca_btl_ofi_endpoint_t) { - opal_list_remove_item (&ofi_btl->endpoints, &endpoint->super); - OBJ_RELEASE(endpoint); - } - - OBJ_DESTRUCT(&ofi_btl->endpoints); - - if (ofi_btl->rcache) { - mca_rcache_base_module_destroy (ofi_btl->rcache); - } - - free (btl); - - return OPAL_SUCCESS; -} - -mca_btl_ofi_module_t mca_btl_ofi_module_template = { - .super = { - /* initialize functions. this btl only support RDMA and atomics - * for now so it does not provide prepare_src, alloc, free, or send */ - .btl_component = &mca_btl_ofi_component.super, - .btl_add_procs = mca_btl_ofi_add_procs, - .btl_del_procs = mca_btl_ofi_del_procs, - .btl_finalize = mca_btl_ofi_finalize, - .btl_put = mca_btl_ofi_put, - .btl_get = mca_btl_ofi_get, - .btl_register_mem = mca_btl_ofi_register_mem, - .btl_deregister_mem = mca_btl_ofi_deregister_mem, - .btl_atomic_op = mca_btl_ofi_aop, - .btl_atomic_fop = mca_btl_ofi_afop, - .btl_atomic_cswap = mca_btl_ofi_acswap, - .btl_flush = mca_btl_ofi_flush, - - /* set the default flags for this btl. ofi provides us with rdma and both - * fetching and non-fetching atomics (though limited to add and cswap) */ - .btl_flags = MCA_BTL_FLAGS_RDMA | - MCA_BTL_FLAGS_ATOMIC_FOPS | - MCA_BTL_FLAGS_ATOMIC_OPS, - - .btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | - MCA_BTL_ATOMIC_SUPPORTS_SWAP | - MCA_BTL_ATOMIC_SUPPORTS_CSWAP | - MCA_BTL_ATOMIC_SUPPORTS_32BIT, - - /* set the default limits on put and get */ - .btl_registration_handle_size = sizeof(mca_btl_base_registration_handle_t), - .btl_put_limit = 1 << 23, - .btl_put_alignment = 0, - .btl_get_limit = 1 << 23, - .btl_get_alignment = 0, - } -}; diff --git a/opal/mca/btl/ofi/btl_ofi_rdma.c b/opal/mca/btl/ofi/btl_ofi_rdma.c deleted file mode 100644 index 9a545038a4d..00000000000 --- a/opal/mca/btl/ofi/btl_ofi_rdma.c +++ /dev/null @@ -1,156 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc, All rights reserved - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "btl_ofi_rdma.h" - -OBJ_CLASS_INSTANCE(mca_btl_ofi_completion_t, - opal_free_list_item_t, - NULL, - NULL); - -mca_btl_ofi_completion_t *mca_btl_ofi_completion_alloc ( - mca_btl_base_module_t *btl, - mca_btl_base_endpoint_t *endpoint, - mca_btl_ofi_context_t *ofi_context, - void *local_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_rdma_completion_fn_t cbfunc, - void *cbcontext, void *cbdata, - int type) -{ - assert(btl); - assert(endpoint); - assert(ofi_context); - - mca_btl_ofi_completion_t *comp; - - comp = (mca_btl_ofi_completion_t*) opal_free_list_get(&ofi_context->comp_list); - assert(comp); - - comp->btl = btl; - comp->endpoint = endpoint; - comp->my_context = ofi_context; - comp->local_address = local_address; - comp->local_handle = local_handle; - comp->cbfunc = cbfunc; - comp->cbcontext = cbcontext; - comp->cbdata = cbdata; - comp->my_list = &ofi_context->comp_list; - comp->type = type; - - return comp; -} - -int mca_btl_ofi_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) -{ - - int rc; - - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; - mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint; - mca_btl_ofi_completion_t *comp; - mca_btl_ofi_context_t *ofi_context; - - ofi_context = get_ofi_context(ofi_btl); - - /* create completion context */ - comp = mca_btl_ofi_completion_alloc(btl, endpoint, - ofi_context, - local_address, - local_handle, - cbfunc, cbcontext, cbdata, - MCA_BTL_OFI_TYPE_GET); - - remote_address = (remote_address - (uint64_t) remote_handle->base_addr); - - /* Remote write data across the wire */ - rc = fi_read(ofi_context->tx_ctx, - local_address, size, /* payload */ - local_handle->desc, - btl_endpoint->peer_addr, - remote_address, remote_handle->rkey, - comp); /* completion context */ - - if (-FI_EAGAIN == rc) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - if (0 != rc) { - BTL_ERROR(("fi_read failed with %d:%s", rc, fi_strerror(-rc))); - MCA_BTL_OFI_ABORT(); - } - - MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl); - - return OPAL_SUCCESS; -} - -int mca_btl_ofi_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) -{ - int rc; - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; - mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint; - mca_btl_ofi_context_t *ofi_context; - - ofi_context = get_ofi_context(ofi_btl); - - /* create completion context */ - mca_btl_ofi_completion_t *comp; - comp = mca_btl_ofi_completion_alloc(btl, endpoint, - ofi_context, - local_address, - local_handle, - cbfunc, cbcontext, cbdata, - MCA_BTL_OFI_TYPE_PUT); - - remote_address = (remote_address - (uint64_t) remote_handle->base_addr); - - /* Remote write data across the wire */ - rc = fi_write(ofi_context->tx_ctx, - local_address, size, /* payload */ - local_handle->desc, - btl_endpoint->peer_addr, - remote_address, remote_handle->rkey, - comp); /* completion context */ - - if (-FI_EAGAIN == rc) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - if (0 != rc) { - BTL_ERROR(("fi_write failed with %d:%s", rc, fi_strerror(-rc))); - MCA_BTL_OFI_ABORT(); - } - - MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl); - - return OPAL_SUCCESS; - -} - -int mca_btl_ofi_flush (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint) -{ - mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; - - while(ofi_btl->outstanding_rdma > 0) { - (void) mca_btl_ofi_component.super.btl_progress(); - } - - return OPAL_SUCCESS; -} diff --git a/opal/mca/btl/ofi/btl_ofi_rdma.h b/opal/mca/btl/ofi/btl_ofi_rdma.h deleted file mode 100644 index 3de42454395..00000000000 --- a/opal/mca/btl/ofi/btl_ofi_rdma.h +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc, All rights reserved - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef BTL_OFI_RDMA_H -#define BTL_OFI_RDMA_H - -#include "opal/threads/thread_usage.h" - -#include "btl_ofi.h" -#include "btl_ofi_endpoint.h" - -mca_btl_ofi_completion_t *mca_btl_ofi_completion_alloc ( - mca_btl_base_module_t *btl, - mca_btl_base_endpoint_t *endpoint, - mca_btl_ofi_context_t *ofi_context, - void *local_address, - mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_rdma_completion_fn_t cbfunc, - void *cbcontext, void *cbdata, - int type); - -#define MCA_BTL_OFI_NUM_RDMA_INC(module) \ - OPAL_THREAD_ADD_FETCH64(&(module)->outstanding_rdma, 1); \ - if (module->outstanding_rdma > mca_btl_ofi_component.progress_threshold){ \ - mca_btl_ofi_component.super.btl_progress(); \ - } - -#define MCA_BTL_OFI_NUM_RDMA_DEC(module) \ - OPAL_THREAD_ADD_FETCH64(&(module)->outstanding_rdma, -1); - -#endif /* !defined(BTL_OFI_RDMA_H) */ - diff --git a/opal/mca/btl/ofi/configure.m4 b/opal/mca/btl/ofi/configure.m4 deleted file mode 100644 index 222a7b29e03..00000000000 --- a/opal/mca/btl/ofi/configure.m4 +++ /dev/null @@ -1,51 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006 QLogic Corp. All rights reserved. -# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2018 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2018 Intel, inc. All rights reserved -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# OPAL_CHECK_OFI(prefix, [action-if-found], [action-if-not-found]) -# -------------------------------------------------------- -# check if OFI support can be found. sets prefix_{CPPFLAGS, -# LDFLAGS, LIBS} as needed and runs action-if-found if there is -# support, otherwise executes action-if-not-found - -AC_DEFUN([MCA_opal_btl_ofi_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([opal_btl_ofi_happy CPPFLAGS_save]) - - AC_CONFIG_FILES([opal/mca/btl/ofi/Makefile]) - - AC_REQUIRE([MCA_opal_common_ofi_CONFIG]) - - opal_btl_ofi_happy=0 - AS_IF([test "$opal_common_ofi_happy" = "yes"], - [CPPFLAGS_save=$CPPFLAGS - CPPFLAGS="$opal_common_ofi_CPPFLAGS $CPPFLAGS" - AC_CHECK_DECL([FI_MR_VIRT_ADDR], [opal_btl_ofi_happy=1], [], - [#include ]) - CPPFLAGS=$CPPFLAGS_save]) - AS_IF([test $opal_btl_ofi_happy -eq 1], - [$1], - [$2]) - - OPAL_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/btl/ofi/owner.txt b/opal/mca/btl/ofi/owner.txt deleted file mode 100644 index f58f1cbab7e..00000000000 --- a/opal/mca/btl/ofi/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner:Intel -status:active diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index dc279df8347..f9ba3a3de61 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -19,9 +19,10 @@ * Copyright (c) 2009 IBM Corporation. All rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved + * Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1644,6 +1645,7 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl) for (ep_index=0; ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints); ep_index++) { + endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints, ep_index); if(!endpoint) { @@ -1710,7 +1712,7 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl) free(openib_btl->cpcs); /* Release device if there are no more users */ - if(!(--openib_btl->device->btls)) { + if(!(--openib_btl->device->allowed_btls)) { OBJ_RELEASE(openib_btl->device); } diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index 6b4dd0466bf..3ffc0feffce 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -18,8 +18,10 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -164,6 +166,9 @@ struct mca_btl_openib_component_t { int ib_num_btls; /**< number of devices available to the openib component */ + int ib_allowed_btls; + /**< number of devices allowed to the openib component */ + struct mca_btl_openib_module_t **openib_btls; /**< array of available BTLs */ @@ -389,6 +394,7 @@ typedef struct mca_btl_openib_device_t { /* Whether this device supports eager RDMA */ uint8_t use_eager_rdma; uint8_t btls; /** < number of btls using this device */ + uint8_t allowed_btls; /** < number of allowed btls using this device */ opal_pointer_array_t *endpoints; opal_pointer_array_t *device_btls; uint16_t hp_cq_polls; @@ -480,6 +486,7 @@ struct mca_btl_openib_module_t { uint8_t num_cpcs; mca_btl_openib_device_t *device; + char * device_name; uint8_t port_num; /**< ID of the PORT */ uint16_t pkey_index; struct ibv_port_attr ib_port_attr; diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index c9dec165afb..d93178fb537 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -19,9 +19,10 @@ * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. + * Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -269,7 +270,7 @@ static int btl_openib_modex_send(void) /* uint8_t for number of modules in the message */ 1 + /* For each module: */ - mca_btl_openib_component.ib_num_btls * + mca_btl_openib_component.ib_allowed_btls * ( /* Common module data */ modex_message_size + @@ -300,8 +301,8 @@ static int btl_openib_modex_send(void) /* Pack the number of modules */ offset = message; - pack8(&offset, mca_btl_openib_component.ib_num_btls); - opal_output(-1, "modex sending %d btls (packed: %d, offset now at %d)", mca_btl_openib_component.ib_num_btls, *((uint8_t*) message), (int) (offset - message)); + pack8(&offset, mca_btl_openib_component.ib_allowed_btls); + opal_output(-1, "modex sending %d btls (packed: %d, offset now at %d)", mca_btl_openib_component.ib_allowed_btls, *((uint8_t*) message), (int) (offset - message)); /* Pack each of the modules */ for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { @@ -627,24 +628,26 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, * unless the user specifically requested to override this * policy. For ancient OFED, only allow if user has set * the MCA parameter. + * + * We emit a help message if Open MPI was configured without + * UCX support if the port is configured to use infiniband for link + * layer. If UCX support is available, don't emit help message + * since UCX PML has higher priority than OB1 and this BTL will + * not be used. */ + if (false == mca_btl_openib_component.allow_ib #if HAVE_DECL_IBV_LINK_LAYER_ETHERNET - if ((IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer) && - (false == mca_btl_openib_component.allow_ib)) { - opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", - true, opal_process_info.nodename, - ibv_get_device_name(device->ib_dev), port_num); - return OPAL_ERR_NOT_FOUND; - } -#else - if (false == mca_btl_openib_component.allow_ib) { + && IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer +#endif + ) { +#if !HAVE_UCX opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", true, opal_process_info.nodename, - ibv_get_device_name(device->ib_dev), port_num); - return OPAL_ERR_NOT_FOUND; - } + ibv_get_device_name(device->ib_dev), + port_num); #endif - + return OPAL_ERR_NOT_FOUND; + } /* Ensure that the requested GID index (via the btl_openib_gid_index MCA param) is within the GID table @@ -765,6 +768,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, ib_selected = OBJ_NEW(mca_btl_base_selected_module_t); ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl; openib_btl->device = device; + openib_btl->device_name = NULL; openib_btl->port_num = (uint8_t) port_num; openib_btl->pkey_index = pkey_index; openib_btl->lid = lid; @@ -883,7 +887,9 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, opal_list_append(btl_list, (opal_list_item_t*) ib_selected); opal_pointer_array_add(device->device_btls, (void*) openib_btl); ++device->btls; + ++device->allowed_btls; ++mca_btl_openib_component.ib_num_btls; + ++mca_btl_openib_component.ib_allowed_btls; if (-1 != mca_btl_openib_component.ib_max_btls && mca_btl_openib_component.ib_num_btls >= mca_btl_openib_component.ib_max_btls) { @@ -1692,6 +1698,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) goto error; } #if HAVE_DECL_IBV_EXP_QUERY_DEVICE + memset(&device->ib_exp_dev_attr, 0, sizeof(device->ib_exp_dev_attr)); device->ib_exp_dev_attr.comp_mask = IBV_EXP_DEVICE_ATTR_RESERVED - 1; if(ibv_exp_query_device(device->ib_dev_context, &device->ib_exp_dev_attr)){ BTL_ERROR(("error obtaining device attributes for %s errno says %s", @@ -1910,7 +1917,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) if (ib_port_attr.active_mtu < device->mtu){ device->mtu = ib_port_attr.active_mtu; } - if (mca_btl_openib_component.apm_ports && device->btls > 0) { + if (mca_btl_openib_component.apm_ports && device->allowed_btls > 0) { init_apm_port(device, i, ib_port_attr.lid); break; } @@ -1946,7 +1953,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) /* If we made a BTL, check APM status and return. Otherwise, fall through and destroy everything */ - if (device->btls > 0) { + if (device->allowed_btls > 0) { /* if apm was enabled it should be > 1 */ if (1 == mca_btl_openib_component.apm_ports) { opal_show_help("help-mpi-btl-openib.txt", @@ -2267,6 +2274,11 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) good: mca_btl_openib_component.devices_count++; return OPAL_SUCCESS; + } else if (device->btls > 0) { + /* no port is allowed to be used by btl/openib, + * so release the device right away */ + OBJ_RELEASE(device); + return OPAL_SUCCESS; } error: @@ -2808,7 +2820,6 @@ btl_openib_component_init(int *num_btl_modules, ib_devs = opal_ibv_get_device_list(&num_devs); if(0 == num_devs || NULL == ib_devs) { - mca_btl_base_error_no_nics("OpenFabrics (openib)", "device"); goto no_btls; } @@ -2912,36 +2923,38 @@ btl_openib_component_init(int *num_btl_modules, goto no_btls; } - /* Now that we know we have devices and ports that we want to use, - init CPC components */ - if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) { - goto no_btls; - } + if (0 < mca_btl_openib_component.ib_allowed_btls) { + /* Now that we know we have devices and ports that we want to use, + init CPC components */ + if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) { + goto no_btls; + } - /* Setup the BSRQ QP's based on the final value of - mca_btl_openib_component.receive_queues. */ - if (OPAL_SUCCESS != setup_qps()) { - goto no_btls; - } - if (mca_btl_openib_component.num_srq_qps > 0 || - mca_btl_openib_component.num_xrc_qps > 0) { - opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table; - if(OPAL_SUCCESS != opal_hash_table_init( - srq_addr_table, (mca_btl_openib_component.num_srq_qps + - mca_btl_openib_component.num_xrc_qps) * - mca_btl_openib_component.ib_num_btls)) { - BTL_ERROR(("SRQ internal error. Failed to allocate SRQ addr hash table")); + /* Setup the BSRQ QP's based on the final value of + mca_btl_openib_component.receive_queues. */ + if (OPAL_SUCCESS != setup_qps()) { goto no_btls; } - } + if (mca_btl_openib_component.num_srq_qps > 0 || + mca_btl_openib_component.num_xrc_qps > 0) { + opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table; + if(OPAL_SUCCESS != opal_hash_table_init( + srq_addr_table, (mca_btl_openib_component.num_srq_qps + + mca_btl_openib_component.num_xrc_qps) * + mca_btl_openib_component.ib_num_btls)) { + BTL_ERROR(("SRQ internal error. Failed to allocate SRQ addr hash table")); + goto no_btls; + } + } - /* For XRC: - * from this point we know if MCA_BTL_XRC_ENABLED it true or false */ + /* For XRC: + * from this point we know if MCA_BTL_XRC_ENABLED it true or false */ - /* Init XRC IB Addr hash table */ - if (MCA_BTL_XRC_ENABLED) { - OBJ_CONSTRUCT(&mca_btl_openib_component.ib_addr_table, - opal_hash_table_t); + /* Init XRC IB Addr hash table */ + if (MCA_BTL_XRC_ENABLED) { + OBJ_CONSTRUCT(&mca_btl_openib_component.ib_addr_table, + opal_hash_table_t); + } } /* Allocate space for btl modules */ @@ -2986,12 +2999,13 @@ btl_openib_component_init(int *num_btl_modules, mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz)); } - mca_btl_openib_component.openib_btls[i] = openib_btl; - OBJ_RELEASE(ib_selected); - btls[i] = &openib_btl->super; if (finish_btl_init(openib_btl) != OPAL_SUCCESS) { goto no_btls; } + + mca_btl_openib_component.openib_btls[i] = openib_btl; + OBJ_RELEASE(ib_selected); + btls[i] = &openib_btl->super; ++i; } /* If we got nothing, then error out */ @@ -3039,6 +3053,7 @@ btl_openib_component_init(int *num_btl_modules, there are no openib BTL's in this process and return NULL. */ mca_btl_openib_component.ib_num_btls = 0; + mca_btl_openib_component.ib_allowed_btls = 0; btl_openib_modex_send(); if (NULL != btls) { free(btls); diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c index e362c958caa..54b47777141 100644 --- a/opal/mca/btl/openib/btl_openib_mca.c +++ b/opal/mca/btl/openib/btl_openib_mca.c @@ -573,7 +573,7 @@ int btl_openib_register_mca_params(void) CHECK(reg_bool("cuda_async_recv", NULL, "Enable or disable CUDA async recv copies " "(true = async; false = sync)", - true, &mca_btl_openib_component.cuda_async_recv)); + false, &mca_btl_openib_component.cuda_async_recv)); /* Also make the max send size larger for better GPU buffer performance */ mca_btl_openib_module.super.btl_max_send_size = 128 * 1024; /* Turn of message coalescing - not sure if it works with GPU buffers */ diff --git a/opal/mca/btl/openib/btl_openib_proc.c b/opal/mca/btl/openib/btl_openib_proc.c index a4b77fa6436..8f41b9696ad 100644 --- a/opal/mca/btl/openib/btl_openib_proc.c +++ b/opal/mca/btl/openib/btl_openib_proc.c @@ -13,8 +13,8 @@ * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. diff --git a/opal/mca/btl/openib/mca-btl-openib-device-params.ini b/opal/mca/btl/openib/mca-btl-openib-device-params.ini index 6e852647f39..4a0a62467d4 100644 --- a/opal/mca/btl/openib/mca-btl-openib-device-params.ini +++ b/opal/mca/btl/openib/mca-btl-openib-device-params.ini @@ -334,9 +334,17 @@ max_inline_data = 72 # Broadcom NetXtreme-E RDMA Ethernet Controller -[Broadcom Cumulus] +[Broadcom BCM57XXX] vendor_id = 0x14e4 -vendor_part_id = 0x16d7 +vendor_part_id = 0x1605,0x1606,0x1614,0x16c0,0x16c1,0x16ce,0x16cf,0x16d6,0x16d7,0x16d8,0x16d9,0x16df,0x16e2,0x16e3,0x16e5,0x16eb,0x16ed,0x16ef,0x16f0,0x16f1 +use_eager_rdma = 1 +mtu = 1024 +receive_queues = P,65536,256,192,128 +max_inline_data = 96 + +[Broadcom BCM58XXX] +vendor_id = 0x14e4 +vendor_part_id = 0xd800,0xd802,0xd804 use_eager_rdma = 1 mtu = 1024 receive_queues = P,65536,256,192,128 diff --git a/opal/mca/btl/scif/Makefile.am b/opal/mca/btl/scif/Makefile.am deleted file mode 100644 index 828ef2e7dfb..00000000000 --- a/opal/mca/btl/scif/Makefile.am +++ /dev/null @@ -1,50 +0,0 @@ -# -*- indent-tabs-mode:nil -*- -# -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights -# reserved. -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -AM_CPPFLAGS = $(btl_scif_CPPFLAGS) - -if MCA_BUILD_opal_btl_scif_DSO -component_noinst = -component_install = mca_btl_scif.la -else -component_noinst = libmca_btl_scif.la -component_install = -endif - -scif_SOURCES = \ - btl_scif_component.c \ - btl_scif_module.c \ - btl_scif_add_procs.c \ - btl_scif_endpoint.h \ - btl_scif_endpoint.c \ - btl_scif_frag.c \ - btl_scif_frag.h \ - btl_scif_send.c \ - btl_scif_put.c \ - btl_scif_get.c \ - btl_scif.h - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_btl_scif_la_SOURCES = $(scif_SOURCES) -nodist_mca_btl_scif_la_SOURCES = $(scif_nodist_SOURCES) -mca_btl_scif_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \ - $(btl_scif_LIBS) -mca_btl_scif_la_LDFLAGS = -module -avoid-version $(btl_scif_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_btl_scif_la_SOURCES = $(scif_SOURCES) -nodist_libmca_btl_scif_la_SOURCES = $(scif_nodist_SOURCES) -libmca_btl_scif_la_LIBADD = $(btl_scif_LIBS) -libmca_btl_scif_la_LDFLAGS = -module -avoid-version $(btl_scif_LDFLAGS) diff --git a/opal/mca/btl/scif/btl_scif.h b/opal/mca/btl/scif/btl_scif.h deleted file mode 100644 index 9b5917224cc..00000000000 --- a/opal/mca/btl/scif/btl_scif.h +++ /dev/null @@ -1,249 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_SCIF_H -#define MCA_BTL_SCIF_H - -#include "opal_config.h" - -#include "opal/util/output.h" -#include "opal_stdint.h" -#include "opal/util/proc.h" - -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "opal/mca/btl/base/btl_base_error.h" -#include "opal/mca/rcache/rcache.h" -#include "opal/mca/rcache/base/base.h" - -#include -#include -#include -#include -#include -#include - -/* Turn on timers for debug builds */ -#if OPAL_ENABLE_DEBUG -/* #define SCIF_TIMING */ -#endif - -#if defined(SCIF_TIMING) -#include -#include - -static inline void timerspecsub (struct timespec *end, struct timespec *start, - struct timespec *diff) { - diff->tv_nsec = end->tv_nsec - start->tv_nsec; - diff->tv_sec = end->tv_sec - start->tv_sec; - if (diff->tv_nsec < 0) { - --diff->tv_sec; - diff->tv_nsec += 1000000000; - } -} - -#define SCIF_UPDATE_TIMER(agg, max, start) \ - do { \ - struct timespec _te, _diff; \ - double _tmpd; \ - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &_te); \ - timerspecsub(&_te, &(start), &_diff); \ - _tmpd = (double) _diff.tv_sec + (double) _diff.tv_nsec / 1000000000.0; \ - (agg) += _tmpd; \ - (max) = fmax ((max), _tmpd); \ - } while (0) -#endif - -typedef struct mca_btl_scif_modex_t { - struct scif_portID port_id; -} mca_btl_scif_modex_t; - -typedef struct mca_btl_scif_module_t { - mca_btl_base_module_t super; - - /* listening endpoint */ - scif_epd_t scif_fd; - - /* listening port */ - struct scif_portID port_id; - - size_t endpoint_count; - struct mca_btl_base_endpoint_t *endpoints; - - opal_list_t failed_frags; - - /* fragments for DMA */ - opal_free_list_t dma_frags; - - /* fragments for eager send */ - opal_free_list_t eager_frags; - - pthread_t listen_thread; - - volatile bool exiting; - bool listening; - - mca_rcache_base_module_t *rcache; -} mca_btl_scif_module_t; - -typedef struct mca_btl_scif_component_t { - /* base BTL component */ - mca_btl_base_component_3_0_0_t super; - - /* DMA free list settings */ - int scif_free_list_num; - int scif_free_list_max; - int scif_free_list_inc; - - unsigned int segment_size; - - bool rma_use_cpu; - bool rma_sync; - -#if defined(SCIF_TIMING) - /* performance timers */ - double aquire_buffer_time; - double aquire_buffer_time_max; - - double send_time; - double send_time_max; - - double sendi_time; - double sendi_time_max; - - double get_time; - double get_time_max; - unsigned long get_count; - - double put_time; - double put_time_max; - unsigned long put_count; -#endif -} mca_btl_scif_component_t; - -int mca_btl_scif_module_init (void); - -/** - * BML->BTL notification of change in the process list. - * - * location: btl_scif_add_procs.c - * - * @param btl (IN) BTL module - * @param nprocs (IN) Number of processes - * @param procs (IN) Array of processes - * @param endpoint (OUT) Array of mca_btl_base_endpoint_t structures by BTL. - * @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this BTL. - * @return OPAL_SUCCESS or error status on failure. - */ -int -mca_btl_scif_add_procs (struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers, - opal_bitmap_t *reachable); - -/** - * Notification of change to the process list. - * - * location: btl_scif_add_procs.c - * - * @param btl (IN) BTL module - * @param nprocs (IN) Number of processes - * @param proc (IN) Set of processes - * @param peer (IN) Set of peer addressing information. - * @return Status indicating if cleanup was successful - */ -int -mca_btl_scif_del_procs (struct mca_btl_base_module_t *btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers); - -/** - * Initiate an asynchronous send. - * - * location: btl_scif_send.c - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transfered - * @param tag (IN) The tag value used to notify the peer. - */ -int -mca_btl_scif_send (struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *btl_peer, - struct mca_btl_base_descriptor_t *descriptor, - mca_btl_base_tag_t tag); - -int mca_btl_scif_sendi (struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - struct opal_convertor_t *convertor, - void *header, size_t header_size, - size_t payload_size, uint8_t order, - uint32_t flags, mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t **descriptor); - -/** - * Initiate a get operation. - * - * location: btl_scif_get.c - */ -int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); - -/** - * Initiate a put operation. - * - * location: btl_scif_put.c - */ -int mca_btl_scif_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); - -mca_btl_base_descriptor_t * -mca_btl_scif_alloc(struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - uint8_t order, size_t size, uint32_t flags); - -int mca_btl_scif_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint); - -struct mca_btl_scif_reg_t; - -struct mca_btl_base_registration_handle_t { - /** scif offset */ - off_t scif_offset; - /** base address of this scif region */ - uintptr_t scif_base; -}; - -struct mca_btl_scif_registration_handle_t { - mca_btl_base_registration_handle_t btl_handle; - struct mca_btl_scif_reg_t *reg; -}; -typedef struct mca_btl_scif_registration_handle_t mca_btl_scif_registration_handle_t; - -typedef struct mca_btl_scif_reg_t { - mca_rcache_base_registration_t base; - /** per-endpoint btl handles for this registration */ - mca_btl_scif_registration_handle_t *handles; -} mca_btl_scif_reg_t; - -/* Global structures */ - -OPAL_MODULE_DECLSPEC extern mca_btl_scif_component_t mca_btl_scif_component; -OPAL_MODULE_DECLSPEC extern mca_btl_scif_module_t mca_btl_scif_module; - -#endif diff --git a/opal/mca/btl/scif/btl_scif_add_procs.c b/opal/mca/btl/scif/btl_scif_add_procs.c deleted file mode 100644 index b29d694fc65..00000000000 --- a/opal/mca/btl/scif/btl_scif_add_procs.c +++ /dev/null @@ -1,259 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#include "opal/util/sys_limits.h" - -#include "btl_scif.h" -#include "btl_scif_frag.h" - -static int mca_btl_scif_setup_rcache (mca_btl_scif_module_t *scif_module); -static void *mca_btl_scif_connect_accept (void *arg); - -int mca_btl_scif_add_procs(struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers, - opal_bitmap_t *reachable) { - mca_btl_scif_module_t *scif_module = (mca_btl_scif_module_t *) btl; - size_t procs_on_board, i, board_proc; - opal_proc_t *my_proc = opal_proc_local_get(); - int rc; - - /* determine how many procs are on this board */ - for (i = 0, procs_on_board = 0 ; i < nprocs ; ++i) { - struct opal_proc_t *opal_proc = procs[i]; - - if (my_proc == opal_proc) { - continue; - } - - if (!OPAL_PROC_ON_LOCAL_HOST(opal_proc->proc_flags) || - my_proc == opal_proc) { - /* scif can only be used with procs on this board */ - continue; - } - - procs_on_board++; - } - - /* allocate space for the detected peers and setup the rcache */ - if (NULL == scif_module->endpoints) { - scif_module->endpoints = calloc (procs_on_board, sizeof (mca_btl_base_endpoint_t)); - if (OPAL_UNLIKELY(NULL == scif_module->endpoints)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - rc = mca_btl_scif_setup_rcache (scif_module); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - BTL_ERROR(("btl/scif error setting up rcache or free lists")); - return rc; - } - } - - for (i = 0, board_proc = 0 ; i < nprocs ; ++i) { - struct opal_proc_t *opal_proc = procs[i]; - - if (my_proc == opal_proc) { - continue; - } - - if (!OPAL_PROC_ON_LOCAL_HOST(opal_proc->proc_flags) || - my_proc == opal_proc) { - peers[i] = NULL; - /* scif can only be used with procs on this board */ - continue; - } - - /* Initialize endpoints */ - rc = mca_btl_scif_ep_init (scif_module->endpoints + board_proc, (mca_btl_scif_module_t *) btl, opal_proc); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - BTL_ERROR(("btl/scif error initializing endpoint")); - return rc; - } - - scif_module->endpoints[board_proc].id = board_proc; - - /* Set the reachable bit */ - rc = opal_bitmap_set_bit (reachable, i); - - /* Store a reference to this peer */ - peers[i] = scif_module->endpoints + board_proc; - - board_proc++; - } - - BTL_VERBOSE(("%lu procs on board\n", (unsigned long) procs_on_board)); - - scif_module->endpoint_count = procs_on_board; - - if (!mca_btl_scif_module.listening) { - /* start listening thread */ - rc = pthread_create (&mca_btl_scif_module.listen_thread, NULL, mca_btl_scif_connect_accept, NULL); - if (0 > rc) { - return OPAL_ERROR; - } - mca_btl_scif_module.listening = true; - } - - return OPAL_SUCCESS; -} - -static void *mca_btl_scif_connect_accept (void *arg) -{ - struct scif_pollepd pollepd = {.epd = mca_btl_scif_module.scif_fd, .events = SCIF_POLLIN, .revents = 0}; - int rc; - - BTL_VERBOSE(("btl/scif: listening for new connections")); - - /* listen for connections */ - while (1) { - pollepd.revents = 0; - - rc = scif_poll (&pollepd, 1, -1); - if (1 == rc) { - if (SCIF_POLLIN != pollepd.revents) { - break; - } - if (mca_btl_scif_module.exiting) { - /* accept the connection so scif_connect() does not timeout */ - struct scif_portID peer; - scif_epd_t newepd; - scif_accept(mca_btl_scif_module.scif_fd, &peer, &newepd, SCIF_ACCEPT_SYNC); - scif_close(newepd); - break; - } - - rc = mca_btl_scif_ep_connect_start_passive (); - if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("btl/scif: error accepting scif connection")); - continue; - } - } else { - break; - } - } - - BTL_VERBOSE(("btl/scif: stopped listening for new connections")); - - return NULL; -} - -int mca_btl_scif_del_procs (struct mca_btl_base_module_t *btl, - size_t nprocs, struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers) { - /* do nothing for now */ - return OPAL_SUCCESS; -} - -static int scif_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg) -{ - mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg; - size_t size = (size_t)((uintptr_t) reg->bound - (uintptr_t) reg->base); - int i; - - /* register the fragment with all connected endpoints */ - for (i = 0 ; i < (int) mca_btl_scif_module.endpoint_count ; ++i) { - if ((off_t)-1 != scif_reg->handles[i].btl_handle.scif_offset && - MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) { - (void) scif_unregister(mca_btl_scif_module.endpoints[i].scif_epd, - scif_reg->handles[i].btl_handle.scif_offset, size); - } - } - - free (scif_reg->handles); - - return OPAL_SUCCESS; -} - -static int scif_reg_mem (void *reg_data, void *base, size_t size, - mca_rcache_base_registration_t *reg) -{ - mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg; - int rc = OPAL_SUCCESS; - unsigned int i; - - scif_reg->handles = calloc (mca_btl_scif_module.endpoint_count, sizeof (scif_reg->handles[0])); - - /* intialize all scif offsets to -1 and initialize the pointer back to the rcache registration */ - for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) { - scif_reg->handles[i].btl_handle.scif_offset = -1; - scif_reg->handles[i].btl_handle.scif_base = (intptr_t) base; - scif_reg->handles[i].reg = scif_reg; - } - - /* register the pointer with all connected endpoints */ - for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) { - if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) { - scif_reg->handles[i].btl_handle.scif_offset = scif_register (mca_btl_scif_module.endpoints[i].scif_epd, - base, size, 0, SCIF_PROT_READ | - SCIF_PROT_WRITE, 0); - if (SCIF_REGISTER_FAILED == scif_reg->handles[i].btl_handle.scif_offset) { - /* cleanup */ - scif_dereg_mem (reg_data, reg); - rc = OPAL_ERR_OUT_OF_RESOURCE; - break; - } - } - } - - return rc; -} - -static int mca_btl_scif_setup_rcache (mca_btl_scif_module_t *scif_module) -{ - mca_rcache_base_resources_t rcache_resources; - int rc; - - /* initialize the grdma rcache */ - rcache_resources.cache_name = "scif"; - rcache_resources.reg_data = (void *) scif_module; - rcache_resources.sizeof_reg = sizeof (mca_btl_scif_reg_t); - rcache_resources.register_mem = scif_reg_mem; - rcache_resources.deregister_mem = scif_dereg_mem; - scif_module->rcache = mca_rcache_base_module_create ("grdma", scif_module, &rcache_resources); - if (NULL == scif_module->rcache) { - BTL_ERROR(("error creating grdma rcache")); - return OPAL_ERROR; - } - - /* setup free lists for fragments. dma fragments will be used for - * rma operations and in-place sends. eager frags will be used for - * buffered sends. */ - rc = opal_free_list_init (&scif_module->dma_frags, - sizeof (mca_btl_scif_dma_frag_t), 64, - OBJ_CLASS(mca_btl_scif_dma_frag_t), - 128, opal_getpagesize (), - mca_btl_scif_component.scif_free_list_num, - mca_btl_scif_component.scif_free_list_max, - mca_btl_scif_component.scif_free_list_inc, - NULL, 0, NULL, NULL, NULL); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - return rc; - } - - rc = opal_free_list_init (&scif_module->eager_frags, - sizeof (mca_btl_scif_eager_frag_t), 8, - OBJ_CLASS(mca_btl_scif_eager_frag_t), - 128 + scif_module->super.btl_eager_limit, 64, - mca_btl_scif_component.scif_free_list_num, - mca_btl_scif_component.scif_free_list_max, - mca_btl_scif_component.scif_free_list_inc, - NULL, 0, NULL, NULL, NULL); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - BTL_ERROR(("error creating eager receive fragment free list")); - return rc; - } - - return OPAL_SUCCESS; -} diff --git a/opal/mca/btl/scif/btl_scif_component.c b/opal/mca/btl/scif/btl_scif_component.c deleted file mode 100644 index 42ef9b6473f..00000000000 --- a/opal/mca/btl/scif/btl_scif_component.c +++ /dev/null @@ -1,386 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "btl_scif.h" -#include "btl_scif_frag.h" - -#include "opal/runtime/opal_params.h" -#include "opal/include/opal/align.h" -#include "opal/memoryhooks/memory.h" -#include "opal/mca/pmix/pmix.h" - -#include "opal/mca/base/mca_base_pvar.h" - -#include - -static int btl_scif_component_register(void); -static int btl_scif_component_open(void); -static int btl_scif_component_close(void); -static mca_btl_base_module_t **mca_btl_scif_component_init(int *, bool, bool); -static int mca_btl_scif_component_progress(void); - -mca_btl_scif_component_t mca_btl_scif_component = { - { - /* First, the mca_base_component_t struct containing meta information - about the component itself */ - - .btl_version = { - MCA_BTL_DEFAULT_VERSION("scif"), - .mca_open_component = btl_scif_component_open, - .mca_close_component = btl_scif_component_close, - .mca_register_component_params = btl_scif_component_register, - }, - .btl_data = { - .param_field = MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - .btl_init = mca_btl_scif_component_init, - .btl_progress = mca_btl_scif_component_progress, - } -}; - -static int btl_scif_component_register(void) -{ - (void) mca_base_var_group_component_register(&mca_btl_scif_component.super.btl_version, - "SCIF byte transport layer"); - - mca_btl_scif_component.scif_free_list_num = 8; - (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, - "free_list_num", "Initial fragment free list size", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_btl_scif_component.scif_free_list_num); - mca_btl_scif_component.scif_free_list_max = 16384; - (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, - "free_list_max", "Maximum fragment free list size", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_btl_scif_component.scif_free_list_max); - mca_btl_scif_component.scif_free_list_inc = 64; - (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, - "free_list_inc", "Fragment free list size increment", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_btl_scif_component.scif_free_list_inc); - - mca_btl_scif_component.segment_size = 8 * 1024; - (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, - "segment_size", "Size of memory segment to " - "allocate for each remote process (default: " - "8k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, - MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_LOCAL, - &mca_btl_scif_component.segment_size); - - mca_btl_scif_component.rma_use_cpu = false; - (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, - "rma_use_cpu", "Use CPU instead of DMA " - "for RMA copies (default: false)", MCA_BASE_VAR_TYPE_BOOL, - NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_btl_scif_component.rma_use_cpu); - - - mca_btl_scif_component.rma_sync = true; - (void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version, - "rma_sync", "Use synchronous RMA instead of " - "an RMA fence (default: true)", MCA_BASE_VAR_TYPE_BOOL, - NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_btl_scif_component.rma_sync); - -#if defined(SCIF_TIMING) - mca_btl_scif_component.aquire_buffer_time = 0.0; - (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, - "aquire_buffer_time", "Aggregate time spent " - "aquiring send buffers", OPAL_INFO_LVL_9, - MCA_BASE_PVAR_CLASS_AGGREGATE, MCA_BASE_VAR_TYPE_DOUBLE, - NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY | - MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL, - &mca_btl_scif_component.aquire_buffer_time); - - mca_btl_scif_component.send_time = 0.0; - (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, - "send_time", "Aggregate time spent writing to " - "send buffers", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE, - MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - NULL, NULL, NULL, &mca_btl_scif_component.send_time); - - mca_btl_scif_component.sendi_time = 0.0; - (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, - "sendi_time", "Aggregate time spent writing to " - "send buffers in sendi", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE, - MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - NULL, NULL, NULL, &mca_btl_scif_component.sendi_time); - - mca_btl_scif_component.get_time = 0.0; - (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, - "get_time", "Aggregate time spent in DMA read (scif_readfrom)", - OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE, - MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - NULL, NULL, NULL, &mca_btl_scif_component.get_time); - - mca_btl_scif_component.get_count = 0; - (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, - "get_count", "Number of times btl_scif_get was called", - OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER, - MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - NULL, NULL, NULL, &mca_btl_scif_component.get_count); - - mca_btl_scif_component.put_time = 0.0; - (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, - "put_time", "Aggregate time spent in DMA write (scif_writeto)", - OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE, - MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - NULL, NULL, NULL, &mca_btl_scif_component.put_time); - - mca_btl_scif_component.put_count = 0; - (void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version, - "put_count", "Number of times btl_scif_put was called", - OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER, - MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - NULL, NULL, NULL, &mca_btl_scif_component.put_count); -#endif - - mca_btl_scif_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 1; - mca_btl_scif_module.super.btl_eager_limit = 1 * 1024; - mca_btl_scif_module.super.btl_rndv_eager_limit = 1 * 1024; - mca_btl_scif_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024; - mca_btl_scif_module.super.btl_max_send_size = 1 * 1024; - mca_btl_scif_module.super.btl_rdma_pipeline_send_length = 1 * 1024; - - /* threshold for put */ - mca_btl_scif_module.super.btl_min_rdma_pipeline_size = 1 * 1024; - - mca_btl_scif_module.super.btl_flags = MCA_BTL_FLAGS_SEND | - MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; - - mca_btl_scif_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); - - mca_btl_scif_module.super.btl_bandwidth = 50000; /* Mbs */ - mca_btl_scif_module.super.btl_latency = 2; /* Microsecs */ - - /* Call the BTL based to register its MCA params */ - mca_btl_base_param_register(&mca_btl_scif_component.super.btl_version, - &mca_btl_scif_module.super); - - return OPAL_SUCCESS; -} - -static int btl_scif_component_open(void) -{ - return OPAL_SUCCESS; -} - -static int btl_scif_component_close(void) -{ - return OPAL_SUCCESS; -} - -static void mca_btl_scif_autoset_leave_pinned (void) { - int value = opal_mem_hooks_support_level(); - - if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == - ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) { - /* Set leave pinned to 1 if leave pinned pipeline is not set */ - if (-1 == opal_leave_pinned) { - opal_leave_pinned = !opal_leave_pinned_pipeline; - } - } else { - opal_leave_pinned = 0; - opal_leave_pinned_pipeline = 0; - } -} - -static int mca_btl_scif_modex_send (void) -{ - mca_btl_scif_modex_t modex; - int rc; - - memset(&modex, 0, sizeof(mca_btl_scif_modex_t)); - modex.port_id = mca_btl_scif_module.port_id; - - OPAL_MODEX_SEND(rc, OPAL_PMIX_LOCAL, - &mca_btl_scif_component.super.btl_version, - &modex, sizeof (modex)); - return rc; -} - - -static mca_btl_base_module_t **mca_btl_scif_component_init (int *num_btl_modules, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - struct mca_btl_base_module_t **base_modules; - int rc; - - BTL_VERBOSE(("btl/scif initializing")); - - signal (SIGSEGV, SIG_DFL); - - /* we currently need the memory hooks to determine when - * registrations are no longer valid. */ - mca_btl_scif_autoset_leave_pinned (); - - if (32768 < mca_btl_scif_module.super.btl_eager_limit) { - mca_btl_scif_module.super.btl_eager_limit = 32768; - } - - /* the segment should be large enough to hold at least one eager packet */ - if (4 * mca_btl_scif_module.super.btl_eager_limit > mca_btl_scif_component.segment_size) { - mca_btl_scif_component.segment_size = 4 * mca_btl_scif_module.super.btl_eager_limit; - } - - /* round up to a multiple of 4096 */ - mca_btl_scif_component.segment_size = (mca_btl_scif_component.segment_size + 0xfff) & ~0xfff; - - base_modules = (struct mca_btl_base_module_t **) - calloc (1, sizeof (struct mca_btl_base_module_t *)); - if (OPAL_UNLIKELY(NULL == base_modules)) { - BTL_ERROR(("Malloc failed : %s:%d", __FILE__, __LINE__)); - return NULL; - } - - /* initialize the module */ - rc = mca_btl_scif_module_init (); - if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("btl/scif error initializing module")); - free (base_modules); - return NULL; - } - - base_modules[0] = &mca_btl_scif_module.super; - mca_btl_scif_module.exiting = false; - mca_btl_scif_module.listening = false; - - rc = mca_btl_scif_modex_send (); - if (OPAL_SUCCESS != rc) { - BTL_VERBOSE(("btl/scif error sending modex")); - free (base_modules); - return NULL; - } - - *num_btl_modules = 1; - - BTL_VERBOSE(("btl/scif done initializing modules")); - - return base_modules; -} - -static int mca_btl_scif_progress_recvs (mca_btl_base_endpoint_t *ep) -{ - const mca_btl_active_message_callback_t *reg; - unsigned int start = ep->recv_buffer.start; - unsigned int end = ep->recv_buffer.endp[0]; - mca_btl_scif_base_frag_t frag; - mca_btl_scif_frag_hdr_t *hdr; - /* changing this value does not appear to have a signifigant impact - * on performance */ - int frags_per_loop = 5; - - if (end == start) { - return 0; - } - - end &= ~ (1 << 31); - start &= ~ (1 << 31); - - /* force all prior reads to complete before continuing */ - opal_atomic_rmb (); - - do { - hdr = (mca_btl_scif_frag_hdr_t *) (ep->recv_buffer.buffer + start); - - /* force all prior reads to complete before continuing */ - MB(); - - BTL_VERBOSE(("got frag with header {.tag = %d, .size = %d} from offset %u", - hdr->tag, hdr->size, start)); -#if defined(SCIF_USE_SEQ) - if (hdr->seq != ep->seq_expected) { - break; - } - - ep->seq_expected++; -#endif - - /* message to skip the rest of the buffer */ - if (0xff != hdr->tag) { - reg = mca_btl_base_active_message_trigger + hdr->tag; - - /* fragment fits entirely in the remaining buffer space. some - * btl users do not handle fragmented data so we can't split - * the fragment without introducing another copy here. this - * limitation has not appeared to cause any performance - * problems. */ - frag.base.des_segment_count = 1; - frag.segments[0].seg_len = hdr->size; - frag.segments[0].seg_addr.pval = (void *) (hdr + 1); - - frag.base.des_segments = frag.segments; - - /* call the registered callback function */ - reg->cbfunc(&mca_btl_scif_module.super, hdr->tag, &frag.base, reg->cbdata); - } - - start = (start + hdr->size + sizeof (*hdr) + 63) & ~63; - - /* skip unusable space at the end of the buffer */ - if (mca_btl_scif_component.segment_size == start) { - start = 64; - ep->recv_buffer.start = ((ep->recv_buffer.start & (1 << 31)) ^ (1 << 31)) | 64; - } else { - ep->recv_buffer.start = (ep->recv_buffer.start & (1 << 31)) | start; - } - } while (start != end && --frags_per_loop); - - /* let the sender know where we stopped */ - ep->recv_buffer.startp[0] = ep->recv_buffer.start; - - /* return the number of fragments processed */ - return 5 - frags_per_loop; -} - -static int mca_btl_scif_progress_sends (mca_btl_base_endpoint_t *ep) -{ - /* try sending any wait listed fragments */ - if (OPAL_UNLIKELY(0 != opal_list_get_size (&ep->frag_wait_list))) { - return mca_btl_scif_progress_send_wait_list (ep); - } - - return 0; -} - -static int mca_btl_scif_component_progress (void) -{ - unsigned int i; - int count = 0; - - /* progress all connected endpoints */ - for (i = 0, count = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) { - if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) { - /* poll all connected endpoints */ - count += mca_btl_scif_progress_recvs (mca_btl_scif_module.endpoints + i); - /* if any fragments are waiting try to send them now */ - count += mca_btl_scif_progress_sends (mca_btl_scif_module.endpoints + i); - } - } - - return count; -} diff --git a/opal/mca/btl/scif/btl_scif_endpoint.c b/opal/mca/btl/scif/btl_scif_endpoint.c deleted file mode 100644 index 41e34cabcdc..00000000000 --- a/opal/mca/btl/scif/btl_scif_endpoint.c +++ /dev/null @@ -1,301 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "btl_scif.h" - -#include "btl_scif_endpoint.h" -#include "opal/mca/memchecker/base/base.h" -#include "opal/util/sys_limits.h" - -static void mca_btl_scif_ep_construct (mca_btl_base_endpoint_t *ep) { - memset ((char *) ep + sizeof(ep->super), 0, sizeof (*ep) - sizeof (ep->super)); - OBJ_CONSTRUCT(&ep->lock, opal_mutex_t); - OBJ_CONSTRUCT(&ep->frag_wait_list, opal_list_t); -} - -static void mca_btl_scif_ep_destruct (mca_btl_base_endpoint_t *ep) { - if (ep->send_buffer.buffer) { - scif_munmap (ep->send_buffer.buffer, mca_btl_scif_component.segment_size); - } - - if (ep->recv_buffer.buffer) { - scif_unregister (ep->scif_epd, ep->recv_buffer.scif_offset, mca_btl_scif_component.segment_size); - free (ep->recv_buffer.buffer); - } - - if (ep->scif_epd) { - scif_close (ep->scif_epd); - } - - OBJ_DESTRUCT(&ep->lock); - OBJ_DESTRUCT(&ep->frag_wait_list); -} - -OBJ_CLASS_INSTANCE(mca_btl_scif_endpoint_t, opal_list_item_t, - mca_btl_scif_ep_construct, mca_btl_scif_ep_destruct); - -static void mca_btl_scif_ep_free_buffer (mca_btl_base_endpoint_t *ep) { - if (ep->recv_buffer.buffer) { - scif_unregister (ep->scif_epd, ep->recv_buffer.scif_offset, mca_btl_scif_component.segment_size); - free (ep->recv_buffer.buffer); - ep->recv_buffer.buffer = NULL; - ep->recv_buffer.scif_offset = (off_t) -1; - } -} - -static inline int mca_btl_scif_ep_get_buffer (mca_btl_base_endpoint_t *ep) { - int rc; - - rc = posix_memalign ((void **) &ep->recv_buffer.buffer, opal_getpagesize(), mca_btl_scif_component.segment_size); - if (0 > rc) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - memset (ep->recv_buffer.buffer, 0, mca_btl_scif_component.segment_size); - - ep->recv_buffer.scif_offset = scif_register (ep->scif_epd, ep->recv_buffer.buffer, - mca_btl_scif_component.segment_size, 0, - SCIF_PROT_READ | SCIF_PROT_WRITE, 0); - if (SCIF_REGISTER_FAILED == ep->recv_buffer.scif_offset) { - BTL_VERBOSE(("failed to register a scif buffer of size %d. errno = %d", - mca_btl_scif_component.segment_size, errno)); - free (ep->recv_buffer.buffer); - ep->recv_buffer.buffer = NULL; - return OPAL_ERROR; - } - - ep->recv_buffer.startp = (uint32_t *) ep->recv_buffer.buffer; - ep->recv_buffer.endp = ep->recv_buffer.startp + 1; - - ep->recv_buffer.startp[0] = ep->recv_buffer.endp[0] = 64; - - BTL_VERBOSE(("allocated buffer of size %d bytes. with scif registration %lu", - mca_btl_scif_component.segment_size, (unsigned long) ep->recv_buffer.scif_offset)); - - return OPAL_SUCCESS; -} - -/* must be called with the endpoint lock held */ -static int mca_btl_scif_ep_connect_finish (mca_btl_base_endpoint_t *ep, bool passive) { - int rc; - - rc = mca_btl_scif_ep_get_buffer (ep); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - BTL_VERBOSE(("error allocating buffer for scif peer")); - return rc; - } - - if (passive) { - rc = scif_recv (ep->scif_epd, &ep->send_buffer.scif_offset, - sizeof (ep->send_buffer.scif_offset), SCIF_RECV_BLOCK); - if (OPAL_LIKELY(-1 != rc)) { - rc = scif_send (ep->scif_epd, &ep->recv_buffer.scif_offset, - sizeof (ep->recv_buffer.scif_offset), SCIF_SEND_BLOCK); - } - } else { - rc = scif_send (ep->scif_epd, &ep->recv_buffer.scif_offset, - sizeof (ep->recv_buffer.scif_offset), SCIF_SEND_BLOCK); - if (OPAL_LIKELY(-1 != rc)) { - rc = scif_recv (ep->scif_epd, &ep->send_buffer.scif_offset, - sizeof (ep->send_buffer.scif_offset), SCIF_RECV_BLOCK); - } - } - - if (OPAL_UNLIKELY(-1 == rc)) { - BTL_VERBOSE(("error exchanging connection data with peer %d", ep->peer_proc->proc_name.vpid)); - mca_btl_scif_ep_free_buffer (ep); - return OPAL_ERROR; - } - - BTL_VERBOSE(("remote peer %d has scif offset %lu", ep->peer_proc->proc_name.vpid, - (unsigned long) ep->send_buffer.scif_offset)); - - ep->send_buffer.buffer = scif_mmap (0, mca_btl_scif_component.segment_size, - SCIF_PROT_READ | SCIF_PROT_WRITE, - 0, ep->scif_epd, ep->send_buffer.scif_offset); - if (OPAL_UNLIKELY(NULL == ep->send_buffer.buffer)) { - BTL_VERBOSE(("error in scif_mmap")); - mca_btl_scif_ep_free_buffer (ep); - return OPAL_ERROR; - } - - opal_memchecker_base_mem_defined (ep->send_buffer.buffer, mca_btl_scif_component.segment_size); - - BTL_VERBOSE(("remote peer %d buffer mapped to local pointer %p", ep->peer_proc->proc_name.vpid, - ep->send_buffer.buffer)); - - /* setup the circular send buffers */ - ep->send_buffer.start = ep->send_buffer.end = 64; - - ep->send_buffer.startp = (uint32_t *) ep->send_buffer.buffer; - ep->send_buffer.endp = ep->send_buffer.startp + 1; - - ep->recv_buffer.start = 64; - - /* connection complete */ - ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTED; - - BTL_VERBOSE(("btl/scif connection to remote peer %d established", ep->peer_proc->proc_name.vpid)); - - return OPAL_SUCCESS; -} - -int mca_btl_scif_ep_connect_start_passive (void) { - mca_btl_base_endpoint_t *ep = NULL; - opal_process_name_t remote_name; - struct scif_portID port_id; - unsigned int i; - scif_epd_t epd; - int rc; - - /* accept the connection request. if the endpoint is already connecting we - * may close this endpoint and alloc mca_btl_scif_ep_connect_start_active - * to finish the connection. */ - rc = scif_accept (mca_btl_scif_module.scif_fd, &port_id, &epd, SCIF_ACCEPT_SYNC); - if (OPAL_UNLIKELY(0 > rc)) { - BTL_VERBOSE(("error accepting connecton from scif peer. %d", errno)); - return OPAL_ERROR; - } - - /* determine which peer sent the connection request */ - rc = scif_recv (epd, &remote_name, sizeof (remote_name), SCIF_RECV_BLOCK); - if (OPAL_UNLIKELY(-1 == rc)) { - BTL_VERBOSE(("error in scif_recv")); - scif_close (epd); - return OPAL_ERROR; - } - - BTL_VERBOSE(("got connection request from vpid %d on port %u on node %u", - remote_name.vpid, port_id.port, port_id.node)); - - for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) { - if (mca_btl_scif_module.endpoints[i].peer_proc->proc_name.vpid == - remote_name.vpid) { - ep = mca_btl_scif_module.endpoints + i; - break; - } - } - - /* peer not found */ - if (i == mca_btl_scif_module.endpoint_count) { - BTL_VERBOSE(("remote peer %d unknown", remote_name.vpid)); - scif_close (epd); - return OPAL_ERROR; - } - - /* similtaneous connections (active side) */ - if ((MCA_BTL_SCIF_EP_STATE_CONNECTING == ep->state && - ep->port_id.port < mca_btl_scif_module.port_id.port) || - MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state) { - BTL_VERBOSE(("active connection in progress. connection request from peer %d rejected", remote_name.vpid)); - scif_close (epd); - return OPAL_SUCCESS; - } - - opal_mutex_lock (&ep->lock); - - if (MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state) { - opal_mutex_unlock (&ep->lock); - scif_close (epd); - return OPAL_SUCCESS; - } - - BTL_VERBOSE(("accepted connection from port %d", ep->port_id.port)); - - ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTING; - ep->scif_epd = epd; - - rc = mca_btl_scif_ep_connect_finish (ep, true); - if (OPAL_SUCCESS != rc) { - scif_close (ep->scif_epd); - ep->scif_epd = -1; - ep->state = MCA_BTL_SCIF_EP_STATE_INIT; - } - - opal_mutex_unlock (&ep->lock); - - return rc; -} - -static inline int mca_btl_scif_ep_connect_start_active (mca_btl_base_endpoint_t *ep) { - int rc = OPAL_SUCCESS; - - BTL_VERBOSE(("initiaiting connection to remote peer %d with port: %u on local scif node: %u", - ep->peer_proc->proc_name.vpid, ep->port_id.port, ep->port_id.node)); - - opal_mutex_lock (&ep->lock); - do { - if (MCA_BTL_SCIF_EP_STATE_INIT != ep->state) { - /* the accept thread has already finished this connection */ - rc = OPAL_SUCCESS; - break; - } - - ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTING; - - ep->scif_epd = scif_open (); - if (OPAL_UNLIKELY(SCIF_OPEN_FAILED == ep->scif_epd)) { - BTL_VERBOSE(("error creating new scif endpoint")); - rc = OPAL_ERROR; - break; - } - - rc = scif_connect (ep->scif_epd, &ep->port_id); - if (OPAL_UNLIKELY(-1 == rc)) { - /* the connection attempt failed. this could mean the peer is currently - * processing connections. we will to try again later. */ - BTL_VERBOSE(("error connecting to scif peer. %d", errno)); - rc = OPAL_ERR_RESOURCE_BUSY; - break; - } - - rc = scif_send (ep->scif_epd, &OPAL_PROC_MY_NAME, sizeof (OPAL_PROC_MY_NAME), SCIF_SEND_BLOCK); - if (OPAL_UNLIKELY(-1 == rc)) { - BTL_VERBOSE(("error in scif_send")); - rc = OPAL_ERROR; - break; - } - - /* build connection data */ - rc = mca_btl_scif_ep_connect_finish (ep, false); - } while (0); - - if (OPAL_SUCCESS != rc) { - scif_close (ep->scif_epd); - ep->scif_epd = -1; - ep->state = MCA_BTL_SCIF_EP_STATE_INIT; - } - - opal_mutex_unlock (&ep->lock); - - return rc; -} - -int mca_btl_scif_ep_connect (mca_btl_base_endpoint_t *ep) { - int rc; - - if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state)) { - return OPAL_SUCCESS; - } else if (MCA_BTL_SCIF_EP_STATE_CONNECTING == ep->state) { - return OPAL_ERR_RESOURCE_BUSY; - } - - if (MCA_BTL_SCIF_EP_STATE_INIT == ep->state) { - rc = mca_btl_scif_ep_connect_start_active (ep); - if (OPAL_SUCCESS != rc) { - return rc; - } - } - - return OPAL_SUCCESS; -} diff --git a/opal/mca/btl/scif/btl_scif_endpoint.h b/opal/mca/btl/scif/btl_scif_endpoint.h deleted file mode 100644 index c04ea35405b..00000000000 --- a/opal/mca/btl/scif/btl_scif_endpoint.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_SCIF_ENDPOINT_H -#define MCA_BTL_SCIF_ENDPOINT_H - -#include "btl_scif.h" -#include "opal/mca/pmix/pmix.h" - -typedef enum mca_btl_scif_endpoint_state_t { - MCA_BTL_SCIF_EP_STATE_INIT, - MCA_BTL_SCIF_EP_STATE_CONNECTING, - MCA_BTL_SCIF_EP_STATE_CONNECTED -} mca_btl_scif_endpoint_state_t; - -typedef struct mca_btl_scif_endpoint_buffer_t { - unsigned char *buffer; - off_t scif_offset; - unsigned int start, end; - uint32_t *startp, *endp; -} mca_btl_scif_endpoint_buffer_t; - -typedef struct mca_btl_base_endpoint_t { - opal_list_item_t super; - mca_btl_scif_module_t *btl; - - /* location in the module endpoints array */ - int id; - - opal_mutex_t lock; - - /* scif endpoint */ - scif_epd_t scif_epd; - - /* connection information */ - struct scif_portID port_id; - - /* buffer information */ - mca_btl_scif_endpoint_buffer_t send_buffer; - mca_btl_scif_endpoint_buffer_t recv_buffer; - - /* current connect state */ - mca_btl_scif_endpoint_state_t state; - - /* frags waiting for resources */ - opal_list_t frag_wait_list; - - /* associated process */ - opal_proc_t *peer_proc; - -#if defined(SCIF_USE_SEQ) - uint32_t seq_next; - uint32_t seq_expected; -#endif -} mca_btl_base_endpoint_t; - -typedef mca_btl_base_endpoint_t mca_btl_scif_endpoint_t; - -OBJ_CLASS_DECLARATION(mca_btl_scif_endpoint_t); - -int mca_btl_scif_ep_connect (mca_btl_scif_endpoint_t *ep); -int mca_btl_scif_ep_connect_start_passive (void); - -static inline int mca_btl_scif_ep_init (mca_btl_scif_endpoint_t *endpoint, - mca_btl_scif_module_t *btl, - opal_proc_t *peer_proc) { - mca_btl_scif_modex_t *modex; - size_t msg_size; - int rc; - - OBJ_CONSTRUCT(endpoint, mca_btl_scif_endpoint_t); - endpoint->state = MCA_BTL_SCIF_EP_STATE_INIT; - - OPAL_MODEX_RECV(rc, &mca_btl_scif_component.super.btl_version, - &peer_proc->proc_name, (void **) &modex, &msg_size); - if (OPAL_SUCCESS != rc) { - return rc; - } - assert (msg_size == sizeof (endpoint->port_id)); - - endpoint->port_id = modex->port_id; - endpoint->peer_proc = peer_proc; - endpoint->btl = btl; - -#if defined(SCIF_USE_SEQ) - endpoint->seq_next = 0x00001010; - endpoint->seq_expected = 0x00001010; -#endif - - free (modex); - - return OPAL_SUCCESS; -} - -static inline int mca_btl_scif_ep_release (mca_btl_scif_endpoint_t *ep) -{ - OBJ_DESTRUCT(ep); - return OPAL_SUCCESS; -} - -#endif /* MCA_BTL_SCIF_ENDPOINT_H */ diff --git a/opal/mca/btl/scif/btl_scif_frag.c b/opal/mca/btl/scif/btl_scif_frag.c deleted file mode 100644 index 6a684defb63..00000000000 --- a/opal/mca/btl/scif/btl_scif_frag.c +++ /dev/null @@ -1,31 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "btl_scif.h" -#include "btl_scif_frag.h" - -static inline void mca_btl_scif_base_frag_constructor (mca_btl_scif_base_frag_t *frag) -{ - memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base)); - frag->segments[0].seg_addr.pval = frag->base.super.ptr; -} - -static inline void mca_btl_scif_eager_frag_constructor (mca_btl_scif_base_frag_t *frag) -{ - memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base)); - frag->segments[0].seg_addr.pval = frag->base.super.ptr; -} - -OBJ_CLASS_INSTANCE(mca_btl_scif_eager_frag_t, mca_btl_base_descriptor_t, - mca_btl_scif_base_frag_constructor, NULL); - -OBJ_CLASS_INSTANCE(mca_btl_scif_dma_frag_t, mca_btl_base_descriptor_t, - mca_btl_scif_base_frag_constructor, NULL); diff --git a/opal/mca/btl/scif/btl_scif_frag.h b/opal/mca/btl/scif/btl_scif_frag.h deleted file mode 100644 index d17ea2a5cec..00000000000 --- a/opal/mca/btl/scif/btl_scif_frag.h +++ /dev/null @@ -1,95 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#if !defined(MCA_BTL_SCIF_FRAG_H) -#define MCA_BTL_SCIF_FRAG_H - -#include "btl_scif.h" -#include "btl_scif_endpoint.h" - -typedef struct mca_btl_scif_frag_hdr_t { -#if defined(SCIF_USE_SEQ) - uint32_t seq; -#endif - uint8_t tag; - uint8_t flags; - uint16_t size; -} mca_btl_scif_frag_hdr_t; - -struct mca_btl_scif_base_frag_t; - -typedef void (*frag_cb_t) (struct mca_btl_scif_base_frag_t *, int); - -typedef struct mca_btl_scif_base_frag_t { - mca_btl_base_descriptor_t base; - mca_btl_scif_frag_hdr_t hdr; - mca_btl_base_segment_t segments[2]; - mca_btl_base_endpoint_t *endpoint; - mca_btl_scif_reg_t *registration; - opal_free_list_t *my_list; -} mca_btl_scif_base_frag_t; - -typedef mca_btl_scif_base_frag_t mca_btl_scif_dma_frag_t; -typedef mca_btl_scif_base_frag_t mca_btl_scif_eager_frag_t; - -OBJ_CLASS_DECLARATION(mca_btl_scif_dma_frag_t); -OBJ_CLASS_DECLARATION(mca_btl_scif_eager_frag_t); - -static inline int mca_btl_scif_frag_alloc (mca_btl_base_endpoint_t *ep, - opal_free_list_t *list, - mca_btl_scif_base_frag_t **frag) -{ - *frag = (mca_btl_scif_base_frag_t *) opal_free_list_get (list); - if (OPAL_LIKELY(NULL != *frag)) { - (*frag)->my_list = list; - (*frag)->endpoint = ep; - return OPAL_SUCCESS; - } - - return OPAL_ERR_OUT_OF_RESOURCE; -} - -static inline int mca_btl_scif_frag_return (mca_btl_scif_base_frag_t *frag) -{ - if (frag->registration) { - frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache, - &frag->registration->base); - frag->registration = NULL; - } - - frag->segments[0].seg_addr.pval = frag->base.super.ptr; - frag->segments[0].seg_len = 0; - frag->segments[1].seg_len = 0; - - opal_free_list_return (frag->my_list, (opal_free_list_item_t *) frag); - - return OPAL_SUCCESS; -} - -static inline void mca_btl_scif_frag_complete (mca_btl_scif_base_frag_t *frag, int rc) { - BTL_VERBOSE(("frag complete. flags = %d", frag->base.des_flags)); - - /* call callback if specified */ - if (frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) { - frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc); - } - - if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) { - mca_btl_scif_frag_return (frag); - } -} - -#define MCA_BTL_SCIF_FRAG_ALLOC_EAGER(ep, frag) \ - mca_btl_scif_frag_alloc((ep), &(ep)->btl->eager_frags, &(frag)) -#define MCA_BTL_SCIF_FRAG_ALLOC_DMA(ep, frag) \ - mca_btl_scif_frag_alloc((ep), &(ep)->btl->dma_frags, &(frag)) - -#endif /* MCA_BTL_SCIF_FRAG_H */ diff --git a/opal/mca/btl/scif/btl_scif_get.c b/opal/mca/btl/scif/btl_scif_get.c deleted file mode 100644 index 3b68dfe8c95..00000000000 --- a/opal/mca/btl/scif/btl_scif_get.c +++ /dev/null @@ -1,75 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#include "btl_scif_frag.h" - -#include - -#define lmin(a,b) ((a) < (b) ? (a) : (b)) - -/** - * Initiate a get operation. - */ -int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) -{ - int rc, mark, scif_flags = 0; - off_t roffset, loffset; -#if defined(SCIF_TIMING) - struct timespec ts; - - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); - - mca_btl_scif_component.get_count++; -#endif - - BTL_VERBOSE(("Using DMA Get from remote address %" PRIx64 " to local address %p", - remote_address, local_address)); - - roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base); - loffset = local_handle->scif_offset + (off_t)((intptr_t)local_address - local_handle->scif_base); - - if (mca_btl_scif_component.rma_use_cpu) { - scif_flags = SCIF_RMA_USECPU; - } - - if (mca_btl_scif_component.rma_sync) { - scif_flags |= SCIF_RMA_SYNC; - } - - /* start the read */ - rc = scif_readfrom (endpoint->scif_epd, loffset, size, roffset, scif_flags); - if (OPAL_UNLIKELY(-1 == rc)) { - return OPAL_ERROR; - } - - if (!(scif_flags & SCIF_RMA_SYNC)) { - /* according to the scif documentation is is better to use a fence rather - * than using the SCIF_RMA_SYNC flag with scif_readfrom */ - scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark); - scif_fence_wait (endpoint->scif_epd, mark); - } - -#if defined(SCIF_TIMING) - SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time, - mca_btl_scif_component.get_time_max, ts); -#endif - - /* always call the callback function */ - cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/btl/scif/btl_scif_module.c b/opal/mca/btl/scif/btl_scif_module.c deleted file mode 100644 index e5d3f09da8a..00000000000 --- a/opal/mca/btl/scif/btl_scif_module.c +++ /dev/null @@ -1,308 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "btl_scif.h" -#include "btl_scif_frag.h" -#include "btl_scif_endpoint.h" - -static int -mca_btl_scif_free (struct mca_btl_base_module_t *btl, - mca_btl_base_descriptor_t *des); - -static int -mca_btl_scif_module_finalize (struct mca_btl_base_module_t* btl); - -static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl, - mca_btl_base_endpoint_t *endpoint, - void *base, size_t size, uint32_t flags); -static int mca_btl_scif_deregister_mem (struct mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle); - -static struct mca_btl_base_descriptor_t * -mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - struct opal_convertor_t *convertor, - uint8_t order, size_t reserve, size_t *size, - uint32_t flags); - -mca_btl_scif_module_t mca_btl_scif_module = { - .super = { - .btl_component = &mca_btl_scif_component.super, - .btl_add_procs = mca_btl_scif_add_procs, - .btl_del_procs = mca_btl_scif_del_procs, - .btl_finalize = mca_btl_scif_module_finalize, - .btl_alloc = mca_btl_scif_alloc, - .btl_free = mca_btl_scif_free, - .btl_prepare_src = mca_btl_scif_prepare_src, - .btl_send = mca_btl_scif_send, - .btl_sendi = mca_btl_scif_sendi, - .btl_put = mca_btl_scif_put, - .btl_get = mca_btl_scif_get, - .btl_register_mem = mca_btl_scif_register_mem, - .btl_deregister_mem = mca_btl_scif_deregister_mem, - } -}; - -int mca_btl_scif_module_init (void) -{ - int rc; - - /* create an endpoint to listen for connections */ - mca_btl_scif_module.scif_fd = scif_open (); - if (-1 == mca_btl_scif_module.scif_fd) { - BTL_VERBOSE(("scif_open failed. errno = %d", errno)); - return OPAL_ERROR; - } - - /* bind the endpoint to a port */ - mca_btl_scif_module.port_id.port = scif_bind (mca_btl_scif_module.scif_fd, 0); - if (-1 == mca_btl_scif_module.port_id.port) { - BTL_VERBOSE(("scif_bind failed. errno = %d", errno)); - scif_close (mca_btl_scif_module.scif_fd); - mca_btl_scif_module.scif_fd = -1; - return OPAL_ERROR; - } - - /* determine this processes node id */ - rc = scif_get_nodeIDs (NULL, 0, &mca_btl_scif_module.port_id.node); - if (-1 == rc) { - BTL_VERBOSE(("btl/scif error getting node id of this node")); - return OPAL_ERROR; - } - - /* Listen for connections */ - /* TODO - base the maximum backlog off something */ - rc = scif_listen (mca_btl_scif_module.scif_fd, 64); - if (-1 == rc) { - BTL_VERBOSE(("scif_listen failed. errno = %d", errno)); - scif_close (mca_btl_scif_module.scif_fd); - mca_btl_scif_module.scif_fd = -1; - return OPAL_ERROR; - } - - BTL_VERBOSE(("btl/scif: listening @ port %u on node %u\n", - mca_btl_scif_module.port_id.port, mca_btl_scif_module.port_id.node)); - - OBJ_CONSTRUCT(&mca_btl_scif_module.dma_frags, opal_free_list_t); - OBJ_CONSTRUCT(&mca_btl_scif_module.eager_frags, opal_free_list_t); - - return OPAL_SUCCESS; -} - -static int -mca_btl_scif_module_finalize (struct mca_btl_base_module_t *btl) -{ - mca_btl_scif_module_t *scif_module = (mca_btl_scif_module_t *) btl; - unsigned int i; - - OBJ_DESTRUCT(&mca_btl_scif_module.dma_frags); - OBJ_DESTRUCT(&mca_btl_scif_module.eager_frags); - - mca_btl_scif_module.exiting = true; - - /* close all open connections and release endpoints */ - if (NULL != scif_module->endpoints) { - for (i = 0 ; i < scif_module->endpoint_count ; ++i) { - mca_btl_scif_ep_release (scif_module->endpoints + i); - } - - free (scif_module->endpoints); - - scif_module->endpoint_count = 0; - scif_module->endpoints = NULL; - } - - if (NULL != scif_module->rcache) { - mca_rcache_base_module_destroy (scif_module->rcache); - scif_module->rcache = NULL; - } - - /* close the listening endpoint */ - if (mca_btl_scif_module.listening && -1 != mca_btl_scif_module.scif_fd) { - /* wake up the scif thread */ - scif_epd_t tmpfd; - tmpfd = scif_open(); - scif_connect (tmpfd, &mca_btl_scif_module.port_id); - pthread_join(mca_btl_scif_module.listen_thread, NULL); - scif_close(tmpfd); - scif_close (mca_btl_scif_module.scif_fd); - } - - mca_btl_scif_module.scif_fd = -1; - - return OPAL_SUCCESS; -} - -mca_btl_base_descriptor_t * -mca_btl_scif_alloc(struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - uint8_t order, size_t size, uint32_t flags) -{ - mca_btl_scif_base_frag_t *frag = NULL; - - BTL_VERBOSE(("allocating fragment of size: %u", (unsigned int)size)); - - if (size <= mca_btl_scif_module.super.btl_eager_limit) { - (void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag); - } - - if (OPAL_UNLIKELY(NULL == frag)) { - return NULL; - } - - BTL_VERBOSE(("btl/scif_module allocated frag of size: %u, flags: %x. frag = %p", - (unsigned int)size, flags, (void *) frag)); - - frag->base.des_flags = flags; - frag->base.order = order; - frag->base.des_segments = frag->segments; - frag->base.des_segment_count = 1; - - frag->segments[0].seg_len = size; - - return &frag->base; -} - -static int -mca_btl_scif_free (struct mca_btl_base_module_t *btl, - mca_btl_base_descriptor_t *des) -{ - return mca_btl_scif_frag_return ((mca_btl_scif_base_frag_t *) des); -} - -static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl, - mca_btl_base_endpoint_t *endpoint, - void *base, size_t size, uint32_t flags) -{ - mca_btl_scif_module_t *scif_module = &mca_btl_scif_module; - mca_btl_scif_reg_t *scif_reg; - int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; - int rc; - - if (MCA_BTL_ENDPOINT_ANY == endpoint) { - /* it probably isn't possible to support registering memory to use with any endpoint so - * return NULL */ - return NULL; - } - - if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) { - /* the endpoint needs to be connected before the fragment can be - * registered. */ - rc = mca_btl_scif_ep_connect (endpoint); - if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) { - /* not yet connected */ - return NULL; - } - } - - rc = scif_module->rcache->rcache_register (scif_module->rcache, base, size, 0, access_flags, - (mca_rcache_base_registration_t **) &scif_reg); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - return NULL; - } - - /* register the memory location with this peer if it isn't already */ - if ((off_t) -1 == scif_reg->handles[endpoint->id].btl_handle.scif_offset) { - size_t seg_size = (size_t)((uintptr_t) scif_reg->base.bound - (uintptr_t) scif_reg->base.base) + 1; - - /* NTH: until we determine a way to pass permissions to the rcache just make all segments - * read/write */ - scif_reg->handles[endpoint->id].btl_handle.scif_offset = - scif_register (endpoint->scif_epd, scif_reg->base.base, seg_size, 0, SCIF_PROT_READ | - SCIF_PROT_WRITE, 0); - BTL_VERBOSE(("registered fragment for scif DMA transaction. offset = %lu", - (unsigned long) scif_reg->handles[endpoint->id].btl_handle.scif_offset)); - } - - return &scif_reg->handles[endpoint->id].btl_handle; -} - -static int mca_btl_scif_deregister_mem (struct mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle) -{ - mca_btl_scif_registration_handle_t *scif_handle = (mca_btl_scif_registration_handle_t *) handle; - mca_btl_scif_module_t *scif_module = &mca_btl_scif_module; - mca_btl_scif_reg_t *scif_reg = scif_handle->reg; - - scif_module->rcache->rcache_deregister (scif_module->rcache, &scif_reg->base); - - return OPAL_SUCCESS; -} - -static inline struct mca_btl_base_descriptor_t * -mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl, - mca_btl_base_endpoint_t *endpoint, - struct opal_convertor_t *convertor, - uint8_t order, size_t reserve, size_t *size, - uint32_t flags) -{ - mca_btl_scif_base_frag_t *frag = NULL; - uint32_t iov_count = 1; - struct iovec iov; - size_t max_size = *size; - int rc; - - if (OPAL_LIKELY((mca_btl_scif_module.super.btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) && - !opal_convertor_need_buffers (convertor) && - reserve <= 128)) { - /* inplace send */ - void *data_ptr; - opal_convertor_get_current_pointer (convertor, &data_ptr); - - (void) MCA_BTL_SCIF_FRAG_ALLOC_DMA(endpoint, frag); - if (OPAL_UNLIKELY(NULL == frag)) { - return NULL; - } - - frag->segments[0].seg_len = reserve; - frag->segments[1].seg_addr.pval = data_ptr; - frag->segments[1].seg_len = *size; - frag->base.des_segment_count = 2; - } else { - /* buffered send */ - (void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag); - if (OPAL_UNLIKELY(NULL == frag)) { - return NULL; - } - - if (*size) { - iov.iov_len = *size; - iov.iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->segments[0].seg_addr.pval + reserve); - - rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_size); - if (OPAL_UNLIKELY(rc < 0)) { - mca_btl_scif_frag_return (frag); - return NULL; - } - *size = max_size; - } - - frag->segments[0].seg_len = reserve + *size; - frag->base.des_segment_count = 1; - } - - frag->base.des_segments = frag->segments; - frag->base.order = order; - frag->base.des_flags = flags; - - return &frag->base; -} - -static mca_btl_base_descriptor_t *mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl, - mca_btl_base_endpoint_t *endpoint, - struct opal_convertor_t *convertor, - uint8_t order, size_t reserve, size_t *size, - uint32_t flags) -{ - return mca_btl_scif_prepare_src_send (btl, endpoint, convertor, order, reserve, size, flags); -} diff --git a/opal/mca/btl/scif/btl_scif_put.c b/opal/mca/btl/scif/btl_scif_put.c deleted file mode 100644 index 27355a3e5c5..00000000000 --- a/opal/mca/btl/scif/btl_scif_put.c +++ /dev/null @@ -1,72 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#include "btl_scif_frag.h" - -#define lmin(a,b) ((a) < (b) ? (a) : (b)) - -/** - * Initiate a put operation. - */ -int mca_btl_scif_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) -{ - int rc, mark, scif_flags = 0; - off_t roffset, loffset; -#if defined(SCIF_TIMING) - struct timespec ts; - - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); - - mca_btl_scif_component.get_count++; -#endif - - BTL_VERBOSE(("Using DMA Put from local address %p to remote address %" PRIx64, - local_address, remote_address)); - - roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base); - loffset = local_handle->scif_offset + (off_t)((intptr_t) local_address - local_handle->scif_base); - - if (mca_btl_scif_component.rma_use_cpu) { - scif_flags = SCIF_RMA_USECPU; - } - - if (mca_btl_scif_component.rma_sync) { - scif_flags |= SCIF_RMA_SYNC; - } - - /* start the write */ - rc = scif_writeto (endpoint->scif_epd, loffset, size, roffset, scif_flags); - rc = scif_readfrom (endpoint->scif_epd, loffset, size, roffset, scif_flags); - if (OPAL_UNLIKELY(-1 == rc)) { - return OPAL_ERROR; - } - - if (!(scif_flags & SCIF_RMA_SYNC)) { - /* according to the scif documentation is is better to use a fence rather - * than using the SCIF_RMA_SYNC flag with scif_readfrom */ - scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark); - scif_fence_wait (endpoint->scif_epd, mark); - } - -#if defined(SCIF_TIMING) - SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time, - mca_btl_scif_component.get_time_max, ts); -#endif - - /* always call the callback function */ - cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/btl/scif/btl_scif_send.c b/opal/mca/btl/scif/btl_scif_send.c deleted file mode 100644 index 008e23b439c..00000000000 --- a/opal/mca/btl/scif/btl_scif_send.c +++ /dev/null @@ -1,299 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "btl_scif.h" -#include "btl_scif_frag.h" - -#define BUFFER_FREE(s,e,hbm) (((s) > (e) || ((s) == (e) && !hbm)) ? (s) - (e) : (mca_btl_scif_component.segment_size - (e))) - -/* attempt to reserve a contiguous segment from the remote endpoint */ -static inline int mca_btl_scif_send_get_buffer (mca_btl_base_endpoint_t *endpoint, size_t size, unsigned char * restrict *dst) -{ - /* the high bit helps determine if the buffer is empty or full */ - bool hbm = (endpoint->send_buffer.start >> 31) == (endpoint->send_buffer.end >> 31); - const unsigned int segment_size = mca_btl_scif_component.segment_size; - unsigned int start = endpoint->send_buffer.start & ~ (1 << 31); - unsigned int end = endpoint->send_buffer.end & ~ (1 << 31); - unsigned int buffer_free = BUFFER_FREE(start, end, hbm); -#if defined(SCIF_TIMING) - struct timespec ts; - - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); -#endif - - /* need space for the fragment + the header */ - size += sizeof (mca_btl_scif_frag_hdr_t); - - /* check if we need to free up space for this fragment */ - if (OPAL_UNLIKELY(buffer_free < size)) { - BTL_VERBOSE(("not enough room for a fragment of size %u. in use buffer segment: {start: %x, end: %x, high bit matches: %d}\n", - (unsigned) size, start, end, (int) hbm)); - - /* read the current start pointer from the remote peer */ - start = endpoint->send_buffer.start = endpoint->send_buffer.startp[0]; - start &= ~ (1 << 31); - hbm = (endpoint->send_buffer.start >> 31) == (endpoint->send_buffer.end >> 31); - buffer_free = BUFFER_FREE(start, end, hbm); - - opal_atomic_rmb (); - - /* if this is the end of the buffer. does the fragment fit? */ - if (OPAL_UNLIKELY(buffer_free > 0 && buffer_free < size && start <= end)) { - mca_btl_scif_frag_hdr_t hdr; - - hdr.size = buffer_free - sizeof (mca_btl_scif_frag_hdr_t); - hdr.tag = 0xff; -#if defined(SCIF_USE_SEQ) - hdr.seq = endpoint->seq_next++; - ((uint64_t *) (endpoint->send_buffer.buffer + end))[0] = *((uint64_t *) &hdr); -#else - ((uint32_t *) (endpoint->send_buffer.buffer + end))[0] = *((uint32_t *) &hdr); -#endif - - /* toggle the high bit */ - end = 64; - endpoint->send_buffer.end = ((endpoint->send_buffer.end & (1 << 31)) ^ (1 << 31)) | end; - hbm = (endpoint->send_buffer.start >> 31) == (endpoint->send_buffer.end >> 31); - buffer_free = BUFFER_FREE(start, end, hbm); - } - - if (OPAL_UNLIKELY(buffer_free < size)) { -#if defined(SCIF_TIMING) - SCIF_UPDATE_TIMER(mca_btl_scif_component.aquire_buffer_time, mca_btl_scif_component.aquire_buffer_time_max, ts); -#endif - return OPAL_ERR_OUT_OF_RESOURCE; - } - } - - BTL_VERBOSE(("writing fragment of size %u to offset %u {start: %x, end: %x} of peer's buffer. free = %u", - (unsigned int) size, end, start, end, buffer_free)); - - *dst = endpoint->send_buffer.buffer + end; - - /* align the buffer on a 64 byte boundary */ - end = (end + size + 63) & ~63; - - if (OPAL_UNLIKELY(segment_size == end)) { - endpoint->send_buffer.end = ((endpoint->send_buffer.end & (1 << 31)) ^ (1 << 31)) | 64; - } else { - endpoint->send_buffer.end = (endpoint->send_buffer.end & (1 << 31)) | end; - } - -#if defined(SCIF_TIMING) - SCIF_UPDATE_TIMER(mca_btl_scif_component.aquire_buffer_time, mca_btl_scif_component.aquire_buffer_time_max, ts); -#endif - - return OPAL_SUCCESS; -} - -static void mark_buffer (struct mca_btl_base_endpoint_t *endpoint) -{ - if (endpoint->port_id.node != mca_btl_scif_module.port_id.node) { - /* force the PCIe bus to flush by reading from the remote node */ - volatile uint32_t start = endpoint->send_buffer.startp[0]; (void)start; - - endpoint->send_buffer.endp[0] = endpoint->send_buffer.end; - - endpoint->send_buffer.start = endpoint->send_buffer.startp[0]; - } else { - MB(); - endpoint->send_buffer.endp[0] = endpoint->send_buffer.end; - } -} - -static int mca_btl_scif_send_frag (struct mca_btl_base_endpoint_t *endpoint, - mca_btl_scif_base_frag_t *frag) -{ - size_t size = frag->hdr.size; - unsigned char * restrict dst; - - BTL_VERBOSE(("btl/scif sending descriptor %p from %d -> %d. length = %" PRIu64, (void *) frag, - OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid, frag->segments[0].seg_len)); - - if (OPAL_LIKELY(OPAL_SUCCESS == mca_btl_scif_send_get_buffer (endpoint, size, &dst))) { - unsigned char * restrict data = (unsigned char * restrict) frag->segments[0].seg_addr.pval; -#if defined(SCIF_TIMING) - struct timespec ts; - - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); -#endif - - memcpy (dst + sizeof (frag->hdr), data, frag->segments[0].seg_len); - - if (frag->segments[1].seg_len) { - memcpy (dst + sizeof (frag->hdr) + frag->segments[0].seg_len, - frag->segments[1].seg_addr.pval, - frag->segments[1].seg_len); - } - -#if defined(SCIF_USE_SEQ) - frag->hdr.seq = endpoint->seq_next++; - /* write the tag to signal the fragment is available */ - ((uint64_t *) dst)[0] = *((uint64_t *) &frag->hdr); -#else - ((uint32_t *) dst)[0] = *((uint32_t *) &frag->hdr); -#endif - - opal_atomic_wmb (); - -#if defined(SCIF_TIMING) - SCIF_UPDATE_TIMER(mca_btl_scif_component.send_time, mca_btl_scif_component.send_time_max, ts); -#endif - - /* fragment is gone */ - mca_btl_scif_frag_complete (frag, OPAL_SUCCESS); - - return 1; - } - - return OPAL_ERR_OUT_OF_RESOURCE; -} - -int mca_btl_scif_send (struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - struct mca_btl_base_descriptor_t *descriptor, - mca_btl_base_tag_t tag) -{ - mca_btl_scif_base_frag_t *frag = (mca_btl_scif_base_frag_t *) descriptor; - size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len; - int rc; - - frag->hdr.tag = tag; - frag->hdr.size = size; - - if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) { - rc = mca_btl_scif_ep_connect (endpoint); - if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) { - /* the receiver was not ready to handle the fragment. queue up the fragment. */ - descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) descriptor); - return OPAL_SUCCESS; - } - } - - rc = mca_btl_scif_send_frag (endpoint, frag); - if (OPAL_LIKELY(1 == rc)) { - mark_buffer (endpoint); - return 1; - } - - /* the receiver was not ready to handle the fragment. queue up the fragment. */ - descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) descriptor); - - return OPAL_SUCCESS; -} - -int mca_btl_scif_sendi (struct mca_btl_base_module_t *btl, - struct mca_btl_base_endpoint_t *endpoint, - struct opal_convertor_t *convertor, - void *header, size_t header_size, - size_t payload_size, uint8_t order, - uint32_t flags, mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t **descriptor) -{ - size_t length = (header_size + payload_size); - unsigned char * restrict base; - mca_btl_scif_frag_hdr_t hdr; - size_t max_data; - int rc; -#if defined(SCIF_TIMING) - struct timespec ts; -#endif - - assert (length < mca_btl_scif_module.super.btl_eager_limit); - assert (0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)); - - if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) { - rc = mca_btl_scif_ep_connect (endpoint); - if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) { - return OPAL_ERR_RESOURCE_BUSY; - } - } - - rc = mca_btl_scif_send_get_buffer (endpoint, length, &base); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - if (NULL != descriptor) { - *descriptor = NULL; - } - return OPAL_ERR_OUT_OF_RESOURCE; - } - -#if defined(SCIF_TIMING) - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); -#endif - - /* fill in the fragment header (except for the tag) */ - hdr.size = length; - hdr.tag = tag; - -#if defined(SCIF_USE_SEQ) - hdr.seq = endpoint->seq_next++; -#endif - - /* write the match header (with MPI comm/tag/etc. info) */ - memcpy (base + sizeof (hdr), header, header_size); - - if (payload_size) { - uint32_t iov_count = 1; - struct iovec iov[1]; - - iov[0].iov_base = base + sizeof (hdr) + header_size; - iov[0].iov_len = payload_size; - - /* move the data */ - opal_convertor_pack (convertor, iov, &iov_count, &max_data); - - assert (max_data == payload_size); - } - -#if defined(SCIF_USE_SEQ) - /* signal the remote side that this fragment is available */ - ((uint64_t *)base)[0] = *((uint64_t *) &hdr); -#else - ((uint32_t *)base)[0] = *((uint32_t *) &hdr); -#endif - - opal_atomic_wmb (); - - mark_buffer (endpoint); - -#if defined(SCIF_TIMING) - SCIF_UPDATE_TIMER(mca_btl_scif_component.sendi_time, mca_btl_scif_component.sendi_time_max, ts); -#endif - - return OPAL_SUCCESS; -} - -int mca_btl_scif_progress_send_wait_list (mca_btl_base_endpoint_t *endpoint) -{ - mca_btl_scif_base_frag_t *frag; - int rc = OPAL_SUCCESS; - - while (NULL != - (frag = (mca_btl_scif_base_frag_t *) opal_list_remove_first (&endpoint->frag_wait_list))) { - rc = mca_btl_scif_send_frag (endpoint, frag); - if (OPAL_UNLIKELY(OPAL_SUCCESS > rc)) { - if (OPAL_LIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) { - opal_list_prepend (&endpoint->frag_wait_list, (opal_list_item_t *) frag); - } else { - mca_btl_scif_frag_complete (frag, rc); - } - - break; - } - } - - mark_buffer (endpoint); - return OPAL_SUCCESS; -} diff --git a/opal/mca/btl/scif/configure.m4 b/opal/mca/btl/scif/configure.m4 deleted file mode 100644 index f8b814e2e70..00000000000 --- a/opal/mca/btl/scif/configure.m4 +++ /dev/null @@ -1,47 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AC_DEFUN([MCA_opal_btl_scif_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([opal_btl_scif_happy]) - AC_CONFIG_FILES([opal/mca/btl/scif/Makefile]) - - AC_ARG_WITH([scif], [AC_HELP_STRING([--with-scif(=DIR)]), - [Build with SCIF, searching for headers in DIR])]) - OPAL_CHECK_WITHDIR([scif], [$with_scif], [include/scif.h]) - - opal_btl_scif_happy="no" - - if test "$with_scif" != "no" ; then - if test -n "$with_scif" && test "$with_scif" != "yes" ; then - opal_check_scif_dir=$with_scif - fi - - OPAL_CHECK_PACKAGE([btl_scif], [scif.h], [scif], [scif_open], [], - [$opal_check_scif_dir], [], [opal_btl_scif_happy="yes"], []) - - if test "$opal_btl_scif_happy" != "yes" && test -n "$with_scif" ; then - AC_MSG_ERROR([SCIF support requested but not found. Aborting]) - fi - fi - - AS_IF([test "$opal_btl_scif_happy" = "yes"], [$1], [$2]) - - OPAL_SUMMARY_ADD([[Transports]],[[Intel SCIF]],[[btl_scif]],[$opal_btl_scif_happy]) - - # substitute in the things needed to build scif - AC_SUBST([btl_scif_CPPFLAGS]) - AC_SUBST([btl_scif_LDFLAGS]) - AC_SUBST([btl_scif_LIBS]) - OPAL_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/btl/scif/owner.txt b/opal/mca/btl/scif/owner.txt deleted file mode 100644 index 30615e90eb7..00000000000 --- a/opal/mca/btl/scif/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner:LANL -status: maintenance diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index e8b05880155..2d1004bcf74 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -361,9 +361,9 @@ static int mca_btl_tcp_component_open(void) #if OPAL_ENABLE_IPV6 mca_btl_tcp_component.tcp6_listen_sd = -1; #endif - mca_btl_tcp_component.tcp_num_btls=0; + mca_btl_tcp_component.tcp_num_btls = 0; mca_btl_tcp_component.tcp_addr_count = 0; - mca_btl_tcp_component.tcp_btls=NULL; + mca_btl_tcp_component.tcp_btls = NULL; /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_lock, opal_mutex_t); diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index f8df420ff8e..e69cd863be9 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -717,34 +717,39 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo /* start the connect - will likely fail with EINPROGRESS */ mca_btl_tcp_proc_tosocks(btl_endpoint->endpoint_addr, &endpoint_addr); - + /* Bind the socket to one of the addresses associated with * this btl module. This sets the source IP to one of the * addresses shared in modex, so that the destination rank * can properly pair btl modules, even in cases where Linux * might do something unexpected with routing */ - opal_socklen_t sockaddr_addrlen = sizeof(struct sockaddr_storage); if (endpoint_addr.ss_family == AF_INET) { assert(NULL != &btl_endpoint->endpoint_btl->tcp_ifaddr); if (bind(btl_endpoint->endpoint_sd, (struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr, - sockaddr_addrlen) < 0) { - BTL_ERROR(("bind() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); + sizeof(struct sockaddr_in)) < 0) { + BTL_ERROR(("bind on local address (%s:%d) failed: %s (%d)", + opal_net_get_hostname((struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr), + htons(((struct sockaddr_in*)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_port), + strerror(opal_socket_errno), opal_socket_errno)); - CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); - return OPAL_ERROR; - } + CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); + return OPAL_ERROR; + } } #if OPAL_ENABLE_IPV6 if (endpoint_addr.ss_family == AF_INET6) { assert(NULL != &btl_endpoint->endpoint_btl->tcp_ifaddr_6); if (bind(btl_endpoint->endpoint_sd, (struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr_6, - sockaddr_addrlen) < 0) { - BTL_ERROR(("bind() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); + sizeof(struct sockaddr_in6)) < 0) { + BTL_ERROR(("bind on local address (%s:%d) failed: %s (%d)", + opal_net_get_hostname((struct sockaddr*) &btl_endpoint->endpoint_btl->tcp_ifaddr), + htons(((struct sockaddr_in*)&btl_endpoint->endpoint_btl->tcp_ifaddr)->sin_port), + strerror(opal_socket_errno), opal_socket_errno)); - CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); - return OPAL_ERROR; - } - } + CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd); + return OPAL_ERROR; + } + } #endif opal_output_verbose(10, opal_btl_base_framework.framework_output, "btl: tcp: attempting to connect() to %s address %s on port %d", diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index b7bf95c14b1..c7ee66bb256 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -413,7 +413,7 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, { struct sockaddr_storage endpoint_addr_ss; const char *proc_hostname; - unsigned int perm_size; + unsigned int perm_size = 0; int rc, *a = NULL; size_t i, j; mca_btl_tcp_interface_t** peer_interfaces = NULL; @@ -732,12 +732,12 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, } free(proc_data->local_interfaces[i]); } - free(proc_data->local_interfaces); + free(proc_data->local_interfaces); proc_data->local_interfaces = NULL; proc_data->max_local_interfaces = 0; - free(proc_data->weights); - free(proc_data->best_addr); - free(proc_data->best_assignment); + free(proc_data->weights); proc_data->weights = NULL; + free(proc_data->best_addr); proc_data->best_addr = NULL; + free(proc_data->best_assignment); proc_data->best_assignment = NULL; OBJ_DESTRUCT(&_proc_data.local_kindex_to_index); OBJ_DESTRUCT(&_proc_data.peer_kindex_to_index); @@ -901,17 +901,22 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr /* No further use of this socket. Close it */ CLOSE_THE_SOCKET(sd); { - char *addr_str = NULL, *tmp, *pnet; + char *addr_str = NULL, *tmp; + char ip[128]; + ip[sizeof(ip) - 1] = '\0'; + for (size_t i = 0; i < btl_proc->proc_endpoint_count; i++) { mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; if (btl_endpoint->endpoint_addr->addr_family != addr->sa_family) { continue; } - pnet = opal_net_get_hostname((struct sockaddr*)&btl_endpoint->endpoint_addr->addr_inet); + inet_ntop(btl_endpoint->endpoint_addr->addr_family, + (void*) &(btl_endpoint->endpoint_addr->addr_inet), + ip, sizeof(ip) - 1); if (NULL == addr_str) { - (void)asprintf(&tmp, "\n\t%s", pnet); + (void)asprintf(&tmp, "\n\t%s", ip); } else { - (void)asprintf(&tmp, "%s\n\t%s", addr_str, pnet); + (void)asprintf(&tmp, "%s\n\t%s", addr_str, ip); free(addr_str); } addr_str = tmp; diff --git a/opal/mca/btl/uct/btl_uct.h b/opal/mca/btl/uct/btl_uct.h index 0a896dd736e..38756794430 100644 --- a/opal/mca/btl/uct/btl_uct.h +++ b/opal/mca/btl/uct/btl_uct.h @@ -38,7 +38,6 @@ #include "opal/class/opal_hash_table.h" #include "opal/mca/pmix/pmix.h" #include "opal/threads/tsd.h" -#include #include #include "btl_uct_types.h" @@ -69,7 +68,7 @@ struct mca_btl_uct_module_t { opal_hash_table_t id_to_endpoint; /** mutex to protect the module */ - opal_mutex_t lock; + opal_recursive_mutex_t lock; /** async context */ ucs_async_context_t *ucs_async; @@ -107,11 +106,11 @@ struct mca_btl_uct_module_t { /** large registered frags for packing non-contiguous data */ opal_free_list_t max_frags; - /** RDMA completions */ - opal_free_list_t rdma_completions; - /** frags that were waiting on connections that are now ready to send */ opal_list_t pending_frags; + + /** pending connection requests */ + opal_fifo_t pending_connection_reqs; }; typedef struct mca_btl_uct_module_t mca_btl_uct_module_t; @@ -282,6 +281,7 @@ ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsig struct mca_btl_base_endpoint_t *mca_btl_uct_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc); int mca_btl_uct_query_tls (mca_btl_uct_module_t *module, mca_btl_uct_md_t *md, uct_tl_resource_desc_t *tl_descs, unsigned tl_count); +int mca_btl_uct_process_connection_request (mca_btl_uct_module_t *module, mca_btl_uct_conn_req_t *req); /** * @brief Checks if a tl is suitable for using for RDMA @@ -290,7 +290,7 @@ int mca_btl_uct_query_tls (mca_btl_uct_module_t *module, mca_btl_uct_md_t *md, u */ static inline bool mca_btl_uct_tl_supports_rdma (mca_btl_uct_tl_t *tl) { - return (tl->uct_iface_attr.cap.flags & (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY)) == + return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY)) == (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY); } @@ -299,7 +299,7 @@ static inline bool mca_btl_uct_tl_supports_rdma (mca_btl_uct_tl_t *tl) */ static inline bool mca_btl_uct_tl_support_am (mca_btl_uct_tl_t *tl) { - return (tl->uct_iface_attr.cap.flags & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_AM_BCOPY | UCT_IFACE_FLAG_AM_ZCOPY)); + return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_AM_BCOPY | UCT_IFACE_FLAG_AM_ZCOPY)); } /** @@ -309,7 +309,7 @@ static inline bool mca_btl_uct_tl_support_am (mca_btl_uct_tl_t *tl) */ static inline bool mca_btl_uct_tl_supports_conn (mca_btl_uct_tl_t *tl) { - return (tl->uct_iface_attr.cap.flags & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE)) == + return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE)) == (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE); } @@ -320,7 +320,7 @@ static inline bool mca_btl_uct_tl_supports_conn (mca_btl_uct_tl_t *tl) */ static inline bool mca_btl_uct_tl_requires_connection_tl (mca_btl_uct_tl_t *tl) { - return !(tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE); + return !(MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE); } END_C_DECLS diff --git a/opal/mca/btl/uct/btl_uct_am.c b/opal/mca/btl/uct/btl_uct_am.c index 6927f31c8cd..90ea28eed5c 100644 --- a/opal/mca/btl/uct/btl_uct_am.c +++ b/opal/mca/btl/uct/btl_uct_am.c @@ -25,7 +25,7 @@ mca_btl_base_descriptor_t *mca_btl_uct_alloc (mca_btl_base_module_t *btl, mca_bt mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl; mca_btl_uct_base_frag_t *frag = NULL; - if ((size + 8) <= (size_t) uct_btl->am_tl->uct_iface_attr.cap.am.max_short) { + if (size <= (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) { frag = mca_btl_uct_frag_alloc_short (uct_btl, endpoint); } else if (size <= uct_btl->super.btl_eager_limit) { frag = mca_btl_uct_frag_alloc_eager (uct_btl, endpoint); @@ -40,6 +40,10 @@ mca_btl_base_descriptor_t *mca_btl_uct_alloc (mca_btl_base_module_t *btl, mca_bt frag->base.des_flags = flags; frag->base.order = order; frag->uct_iov.length = size; + if (NULL != frag->base.super.registration) { + /* zero-copy fragments will need callbacks */ + frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + } } return (mca_btl_base_descriptor_t *) frag; @@ -95,14 +99,18 @@ struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src (mca_btl_base_module_t return NULL; } + frag->uct_iov.length = total_size; frag->base.order = order; frag->base.des_flags = flags; - if (total_size > (size_t) uct_btl->am_tl->uct_iface_attr.cap.am.max_short) { + if (total_size > (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) { + frag->segments[0].seg_len = reserve; frag->segments[1].seg_len = *size; frag->segments[1].seg_addr.pval = data_ptr; frag->base.des_segment_count = 2; } else { + frag->segments[0].seg_len = total_size; memcpy ((void *)((intptr_t) frag->segments[1].seg_addr.pval + reserve), data_ptr, *size); + frag->base.des_segment_count = 1; } } @@ -130,7 +138,7 @@ static size_t mca_btl_uct_send_frag_pack (void *data, void *arg) data = (void *)((intptr_t) data + 8); /* this function should only ever get called with fragments with two segments */ - for (size_t i = 0 ; i < 2 ; ++i) { + for (size_t i = 0 ; i < frag->base.des_segment_count ; ++i) { const size_t seg_len = frag->segments[i].seg_len; memcpy (data, frag->segments[i].seg_addr.pval, seg_len); data = (void *)((intptr_t) data + seg_len); @@ -140,57 +148,84 @@ static size_t mca_btl_uct_send_frag_pack (void *data, void *arg) return length; } -int mca_btl_uct_send_frag (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint, mca_btl_uct_base_frag_t *frag, - int32_t flags, mca_btl_uct_device_context_t *context, uct_ep_h ep_handle) +static void mca_btl_uct_append_pending_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t *frag, + mca_btl_uct_device_context_t *context, bool ready) +{ + frag->ready = ready; + frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + opal_atomic_wmb (); + + opal_list_append (&uct_btl->pending_frags, (opal_list_item_t *) frag); +} + +int mca_btl_uct_send_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t *frag, bool append) { + mca_btl_uct_device_context_t *context = frag->context; + const ssize_t msg_size = frag->uct_iov.length + 8; + ssize_t size; ucs_status_t ucs_status; + uct_ep_h ep_handle = NULL; - mca_btl_uct_context_lock (context); + /* if we get here then we must have an endpoint handle for this context/endpoint pair */ + (void) mca_btl_uct_endpoint_test_am (uct_btl, frag->endpoint, frag->context, &ep_handle); + assert (NULL != ep_handle); - do { + /* if another thread set this we really don't care too much as this flag is only meant + * to protect against deep recursion */ + if (!context->in_am_callback) { + mca_btl_uct_context_lock (context); + /* attempt to post the fragment */ if (NULL != frag->base.super.registration) { frag->comp.dev_context = context; - ucs_status = uct_ep_am_zcopy (ep_handle, MCA_BTL_UCT_FRAG, &frag->header, sizeof (frag->header), &frag->uct_iov, 1, 0, &frag->comp.uct_comp); + + if (OPAL_LIKELY(UCS_INPROGRESS == ucs_status)) { + uct_worker_progress (context->uct_worker); + mca_btl_uct_context_unlock (context); + return OPAL_SUCCESS; + } } else { /* short message */ - /* restore original flags */ - frag->base.des_flags = flags; - - if (1 == frag->base.des_segment_count) { + if (1 == frag->base.des_segment_count && (frag->uct_iov.length + 8) < MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) { ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, frag->header.value, frag->uct_iov.buffer, frag->uct_iov.length); - } else { - ucs_status = uct_ep_am_bcopy (ep_handle, MCA_BTL_UCT_FRAG, mca_btl_uct_send_frag_pack, frag, 0); + + if (OPAL_LIKELY(UCS_OK == ucs_status)) { + uct_worker_progress (context->uct_worker); + mca_btl_uct_context_unlock (context); + /* send is complete */ + mca_btl_uct_frag_complete (frag, OPAL_SUCCESS); + return 1; + } } - } - if (UCS_ERR_NO_RESOURCE != ucs_status) { - /* go ahead and progress the worker while we have the lock */ - (void) uct_worker_progress (context->uct_worker); - break; + size = uct_ep_am_bcopy (ep_handle, MCA_BTL_UCT_FRAG, mca_btl_uct_send_frag_pack, frag, 0); + if (OPAL_LIKELY(size == msg_size)) { + uct_worker_progress (context->uct_worker); + mca_btl_uct_context_unlock (context); + /* send is complete */ + mca_btl_uct_frag_complete (frag, OPAL_SUCCESS); + return 1; + } } - /* wait for something to complete before trying again */ - while (!uct_worker_progress (context->uct_worker)); - } while (1); + /* wait for something to happen */ + uct_worker_progress (context->uct_worker); + mca_btl_uct_context_unlock (context); - mca_btl_uct_context_unlock (context); - - if (UCS_OK == ucs_status) { - /* restore original flags */ - frag->base.des_flags = flags; - /* send is complete */ - mca_btl_uct_frag_complete (frag, OPAL_SUCCESS); - return 1; + mca_btl_uct_device_handle_completions (context); } - if (OPAL_UNLIKELY(UCS_INPROGRESS != ucs_status)) { + if (!append) { return OPAL_ERR_OUT_OF_RESOURCE; } - return 0; + OPAL_THREAD_LOCK(&uct_btl->lock); + mca_btl_uct_append_pending_frag (uct_btl, frag, context, true); + OPAL_THREAD_UNLOCK(&uct_btl->lock); + + return OPAL_SUCCESS; } int mca_btl_uct_send (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, mca_btl_base_descriptor_t *descriptor, @@ -199,7 +234,6 @@ int mca_btl_uct_send (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpo mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl; mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_am_context (uct_btl); mca_btl_uct_base_frag_t *frag = (mca_btl_uct_base_frag_t *) descriptor; - int flags = frag->base.des_flags; uct_ep_h ep_handle; int rc; @@ -208,28 +242,21 @@ int mca_btl_uct_send (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpo frag->header.data.tag = tag; - - /* add the callback flag before posting to avoid potential races with other threads */ - frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + frag->context = context; rc = mca_btl_uct_endpoint_check_am (uct_btl, endpoint, context, &ep_handle); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - OPAL_THREAD_LOCK(&endpoint->ep_lock); + OPAL_THREAD_LOCK(&uct_btl->lock); /* check one more time in case another thread is completing the connection now */ if (OPAL_SUCCESS != mca_btl_uct_endpoint_test_am (uct_btl, endpoint, context, &ep_handle)) { - frag->context_id = context->context_id; - frag->ready = false; - OPAL_THREAD_LOCK(&uct_btl->lock); - opal_list_append (&uct_btl->pending_frags, (opal_list_item_t *) frag); - OPAL_THREAD_UNLOCK(&endpoint->ep_lock); + mca_btl_uct_append_pending_frag (uct_btl, frag, context, false); OPAL_THREAD_UNLOCK(&uct_btl->lock); - return OPAL_SUCCESS; } - OPAL_THREAD_UNLOCK(&endpoint->ep_lock); + OPAL_THREAD_UNLOCK(&uct_btl->lock); } - return mca_btl_uct_send_frag (uct_btl, endpoint, frag, flags, context, ep_handle); + return mca_btl_uct_send_frag (uct_btl, frag, true); } struct mca_btl_uct_sendi_pack_args_t { @@ -253,10 +280,9 @@ static size_t mca_btl_uct_sendi_pack (void *data, void *arg) return args->header_size + args->payload_size + 8; } -static inline size_t mca_btl_uct_max_sendi (mca_btl_uct_module_t *uct_btl) +static inline size_t mca_btl_uct_max_sendi (mca_btl_uct_module_t *uct_btl, int context_id) { - return (uct_btl->am_tl->uct_iface_attr.cap.am.max_short > uct_btl->am_tl->uct_iface_attr.cap.am.max_bcopy) ? - uct_btl->am_tl->uct_iface_attr.cap.am.max_short : uct_btl->am_tl->uct_iface_attr.cap.am.max_bcopy; + return MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context_id).cap.am.max_bcopy; } int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, opal_convertor_t *convertor, @@ -269,12 +295,12 @@ int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endp /* message with header */ const size_t msg_size = total_size + 8; mca_btl_uct_am_header_t am_header; - ucs_status_t ucs_status; + ucs_status_t ucs_status = UCS_ERR_NO_RESOURCE; uct_ep_h ep_handle; int rc; rc = mca_btl_uct_endpoint_check_am (uct_btl, endpoint, context, &ep_handle); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || msg_size > mca_btl_uct_max_sendi (uct_btl))) { + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || msg_size > mca_btl_uct_max_sendi (uct_btl, context->context_id))) { if (descriptor) { *descriptor = mca_btl_uct_alloc (btl, endpoint, order, total_size, flags); } @@ -287,7 +313,7 @@ int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endp mca_btl_uct_context_lock (context); if (0 == payload_size) { ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, am_header.value, header, header_size); - } else if (msg_size < (size_t) uct_btl->am_tl->uct_iface_attr.cap.am.max_short) { + } else if (msg_size < (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context->context_id).cap.am.max_short) { int8_t *data = alloca (total_size); _mca_btl_uct_send_pack (data, header, header_size, convertor, payload_size); ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data, total_size); diff --git a/opal/mca/btl/uct/btl_uct_am.h b/opal/mca/btl/uct/btl_uct_am.h index 07d7223eab5..9035540e710 100644 --- a/opal/mca/btl/uct/btl_uct_am.h +++ b/opal/mca/btl/uct/btl_uct_am.h @@ -14,6 +14,12 @@ #include "btl_uct_frag.h" +struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src (mca_btl_base_module_t *btl, + mca_btl_base_endpoint_t *endpoint, + opal_convertor_t *convertor, + uint8_t order, size_t reserve, + size_t *size, uint32_t flags); + int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, opal_convertor_t *convertor, void *header, size_t header_size, size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t **descriptor); @@ -21,8 +27,7 @@ int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endp int mca_btl_uct_send (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, mca_btl_base_descriptor_t *descriptor, mca_btl_base_tag_t tag); -int mca_btl_uct_send_frag (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint, mca_btl_uct_base_frag_t *frag, - int32_t flags, mca_btl_uct_device_context_t *context, uct_ep_h ep_handle); +int mca_btl_uct_send_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t *frag, bool append); mca_btl_base_descriptor_t *mca_btl_uct_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, uint8_t order, size_t size, uint32_t flags); diff --git a/opal/mca/btl/uct/btl_uct_amo.c b/opal/mca/btl/uct/btl_uct_amo.c index d443777089f..f7d02326884 100644 --- a/opal/mca/btl/uct/btl_uct_amo.c +++ b/opal/mca/btl/uct/btl_uct_amo.c @@ -104,8 +104,10 @@ int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end rc = OPAL_SUCCESS; } else if (UCS_OK == ucs_status) { rc = 1; + mca_btl_uct_uct_completion_release (comp); } else { rc = OPAL_ERR_OUT_OF_RESOURCE; + mca_btl_uct_uct_completion_release (comp); } uct_rkey_release (&rkey); @@ -176,8 +178,10 @@ int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e rc = OPAL_SUCCESS; } else if (UCS_OK == ucs_status) { rc = 1; + mca_btl_uct_uct_completion_release (comp); } else { rc = OPAL_ERR_OUT_OF_RESOURCE; + mca_btl_uct_uct_completion_release (comp); } uct_rkey_release (&rkey); diff --git a/opal/mca/btl/uct/btl_uct_component.c b/opal/mca/btl/uct/btl_uct_component.c index aa88fe82d0d..f968cb9c31c 100644 --- a/opal/mca/btl/uct/btl_uct_component.c +++ b/opal/mca/btl/uct/btl_uct_component.c @@ -14,6 +14,10 @@ * reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * Copyright (c) 2019 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,6 +32,9 @@ #include "opal/mca/btl/base/base.h" #include "opal/mca/hwloc/base/base.h" #include "opal/util/argv.h" +#include "opal/memoryhooks/memory.h" +#include "opal/mca/memory/base/base.h" +#include #include @@ -47,13 +54,13 @@ static int mca_btl_uct_component_register(void) MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_uct_component.memory_domains); - mca_btl_uct_component.allowed_transports = "any"; + mca_btl_uct_component.allowed_transports = "dc_mlx5,rc_mlx5,ud,ugni_rdma,ugni_smsg,any"; (void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version, - "transports", "Comma-delimited list of transports of the form to use." - " The list of transports available can be queried using ucx_info. Special" - "values: any (any available) (default: any)", MCA_BASE_VAR_TYPE_STRING, - NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_btl_uct_component.allowed_transports); + "transports", "Comma-delimited list of transports to use sorted by increasing " + "priority. The list of transports available can be queried using ucx_info. Special" + "values: any (any available) (default: dc_mlx5,rc_mlx5,ud,any)", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_uct_component.allowed_transports); mca_btl_uct_component.num_contexts_per_module = 0; (void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version, @@ -93,6 +100,11 @@ static int mca_btl_uct_component_register(void) &module->super); } +static void mca_btl_uct_mem_release_cb(void *buf, size_t length, void *cbdata, bool from_alloc) +{ + ucm_vm_munmap(buf, length); +} + static int mca_btl_uct_component_open(void) { if (0 == mca_btl_uct_component.num_contexts_per_module) { @@ -112,6 +124,18 @@ static int mca_btl_uct_component_open(void) } } + if (mca_btl_uct_component.num_contexts_per_module > MCA_BTL_UCT_MAX_WORKERS) { + mca_btl_uct_component.num_contexts_per_module = MCA_BTL_UCT_MAX_WORKERS; + } + + if (mca_btl_uct_component.disable_ucx_memory_hooks && + ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == + ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & + opal_mem_hooks_support_level()))) { + ucm_set_external_event(UCM_EVENT_VM_UNMAPPED); + opal_mem_hooks_register_release(mca_btl_uct_mem_release_cb, NULL); + } + return OPAL_SUCCESS; } @@ -121,6 +145,10 @@ static int mca_btl_uct_component_open(void) */ static int mca_btl_uct_component_close(void) { + if (mca_btl_uct_component.disable_ucx_memory_hooks) { + opal_mem_hooks_unregister_release (mca_btl_uct_mem_release_cb); + } + return OPAL_SUCCESS; } @@ -128,12 +156,12 @@ static size_t mca_btl_uct_tl_modex_size (mca_btl_uct_tl_t *tl) { const size_t size = strlen (tl->uct_tl_name) + 1; - if (tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { /* pad out to a multiple of 4 bytes */ - return (4 + 3 + size + tl->uct_iface_attr.device_addr_len + tl->uct_iface_attr.iface_addr_len) & ~3; + return (4 + 3 + size + MCA_BTL_UCT_TL_ATTR(tl, 0).device_addr_len + MCA_BTL_UCT_TL_ATTR(tl, 0).iface_addr_len) & ~3; } - return (4 + 3 + size + tl->uct_iface_attr.device_addr_len) & ~3; + return (4 + 3 + size + MCA_BTL_UCT_TL_ATTR(tl, 0).device_addr_len) & ~3; } static size_t mca_btl_uct_module_modex_size (mca_btl_uct_module_t *module) @@ -172,13 +200,13 @@ static size_t mca_btl_uct_tl_modex_pack (mca_btl_uct_tl_t *tl, uint8_t *modex_da * the same endpoint since we are only doing RDMA. if any of these assumptions are * wrong then we can't delay creating the other contexts and must include their * information in the modex. */ - if (tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { uct_iface_get_address (dev_context->uct_iface, (uct_iface_addr_t *) modex_data); - modex_data += tl->uct_iface_attr.iface_addr_len; + modex_data += MCA_BTL_UCT_TL_ATTR(tl, 0).iface_addr_len; } uct_iface_get_device_address (dev_context->uct_iface, (uct_device_addr_t *) modex_data); - modex_data += tl->uct_iface_attr.device_addr_len; + modex_data += MCA_BTL_UCT_TL_ATTR(tl, 0).device_addr_len; return modex_size; } @@ -190,7 +218,7 @@ static int mca_btl_uct_modex_send (void) uint8_t *modex_data; int rc; - for (unsigned i = 0 ; i < mca_btl_uct_component.module_count ; ++i) { + for (int i = 0 ; i < mca_btl_uct_component.module_count ; ++i) { modex_size += mca_btl_uct_module_modex_size (mca_btl_uct_component.modules[i]); } @@ -199,7 +227,7 @@ static int mca_btl_uct_modex_send (void) modex->module_count = mca_btl_uct_component.module_count; - for (unsigned i = 0 ; i < mca_btl_uct_component.module_count ; ++i) { + for (int i = 0 ; i < mca_btl_uct_component.module_count ; ++i) { mca_btl_uct_module_t *module = mca_btl_uct_component.modules[i]; size_t name_len = strlen (module->md_name); @@ -247,9 +275,9 @@ static mca_btl_uct_module_t *mca_btl_uct_alloc_module (const char *md_name, mca_ OBJ_CONSTRUCT(&module->short_frags, opal_free_list_t); OBJ_CONSTRUCT(&module->eager_frags, opal_free_list_t); OBJ_CONSTRUCT(&module->max_frags, opal_free_list_t); - OBJ_CONSTRUCT(&module->rdma_completions, opal_free_list_t); OBJ_CONSTRUCT(&module->pending_frags, opal_list_t); - OBJ_CONSTRUCT(&module->lock, opal_mutex_t); + OBJ_CONSTRUCT(&module->lock, opal_recursive_mutex_t); + OBJ_CONSTRUCT(&module->pending_connection_reqs, opal_fifo_t); module->md = md; module->md_name = strdup (md_name); @@ -275,10 +303,13 @@ ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsig .seg_len = length - sizeof (*header)}; mca_btl_uct_base_frag_t frag = {.base = {.des_segments = &seg, .des_segment_count = 1}}; + /* prevent recursion */ + tl_context->in_am_callback = true; + reg = mca_btl_base_active_message_trigger + header->data.tag; - mca_btl_uct_context_unlock (tl_context); reg->cbfunc (&uct_btl->super, header->data.tag, &frag.base, reg->cbdata); - mca_btl_uct_context_lock (tl_context); + + tl_context->in_am_callback = false; return UCS_OK; } @@ -434,7 +465,7 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules, return base_modules; } -int mca_btl_uct_tl_progress (mca_btl_uct_tl_t *tl, int starting_index) +static int mca_btl_uct_tl_progress (mca_btl_uct_tl_t *tl, int starting_index) { unsigned int ret = 0; @@ -468,8 +499,7 @@ static int mca_btl_uct_component_progress_pending (mca_btl_uct_module_t *uct_btl opal_list_remove_item (&uct_btl->pending_frags, (opal_list_item_t *) frag); - if (OPAL_SUCCESS > mca_btl_uct_send (&uct_btl->super, frag->endpoint, &frag->base, - frag->header.data.tag)) { + if (OPAL_SUCCESS > mca_btl_uct_send_frag (uct_btl, frag, false)) { opal_list_prepend (&uct_btl->pending_frags, (opal_list_item_t *) frag); } } @@ -488,7 +518,7 @@ static int mca_btl_uct_component_progress (void) int starting_index = mca_btl_uct_get_context_index (); unsigned ret = 0; - for (unsigned i = 0 ; i < mca_btl_uct_component.module_count ; ++i) { + for (int i = 0 ; i < mca_btl_uct_component.module_count ; ++i) { mca_btl_uct_module_t *module = mca_btl_uct_component.modules[i]; /* unlike ucp, uct actually tells us something useful! its almost like it was "inspired" @@ -500,9 +530,16 @@ static int mca_btl_uct_component_progress (void) } if (module->conn_tl) { + mca_btl_uct_pending_connection_request_t *request; + if (module->conn_tl != module->am_tl && module->conn_tl != module->rdma_tl) { ret += mca_btl_uct_tl_progress (module->conn_tl, 0); } + + while (NULL != (request = (mca_btl_uct_pending_connection_request_t *) opal_fifo_pop_atomic (&module->pending_connection_reqs))) { + mca_btl_uct_process_connection_request (module, (mca_btl_uct_conn_req_t *) request->request_data); + OBJ_RELEASE(request); + } } if (0 != opal_list_get_size (&module->pending_frags)) { diff --git a/opal/mca/btl/uct/btl_uct_device_context.h b/opal/mca/btl/uct/btl_uct_device_context.h index ccb4f3be71f..12ef1e1f42c 100644 --- a/opal/mca/btl/uct/btl_uct_device_context.h +++ b/opal/mca/btl/uct/btl_uct_device_context.h @@ -23,7 +23,7 @@ * @param[in] tl btl uct tl pointer * @param[in] context_id identifier for this context (0..MCA_BTL_UCT_MAX_WORKERS-1) */ -mca_btl_uct_device_context_t *mca_btl_uct_context_create (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl, int context_id); +mca_btl_uct_device_context_t *mca_btl_uct_context_create (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl, int context_id, bool enable_progress); /** * @brief Destroy a device context and release all resources @@ -89,14 +89,12 @@ mca_btl_uct_module_get_tl_context_specific (mca_btl_uct_module_t *module, mca_bt mca_btl_uct_device_context_t *context = tl->uct_dev_contexts[context_id]; if (OPAL_UNLIKELY(NULL == context)) { - mca_btl_uct_device_context_t *new_context; - - new_context = mca_btl_uct_context_create (module, tl, context_id); - if (!opal_atomic_compare_exchange_strong_ptr (&tl->uct_dev_contexts[context_id], &context, new_context)) { - mca_btl_uct_context_destroy (new_context); - } else { - context = new_context; + OPAL_THREAD_LOCK(&module->lock); + context = tl->uct_dev_contexts[context_id]; + if (OPAL_UNLIKELY(NULL == context)) { + context = tl->uct_dev_contexts[context_id] = mca_btl_uct_context_create (module, tl, context_id, true); } + OPAL_THREAD_UNLOCK(&module->lock); } return context; diff --git a/opal/mca/btl/uct/btl_uct_endpoint.c b/opal/mca/btl/uct/btl_uct_endpoint.c index e0dd6eee50a..ccdbd4511a2 100644 --- a/opal/mca/btl/uct/btl_uct_endpoint.c +++ b/opal/mca/btl/uct/btl_uct_endpoint.c @@ -2,6 +2,9 @@ /* * Copyright (c) 2018 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * Copyright (c) 2019 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,7 +59,7 @@ mca_btl_base_endpoint_t *mca_btl_uct_endpoint_create (opal_proc_t *proc) static unsigned char *mca_btl_uct_process_modex_tl (unsigned char *modex_data) { - BTL_VERBOSE(("processing modex for tl %s. size: %u", modex_data, *((uint32_t *) modex_data))); + BTL_VERBOSE(("processing modex for tl %s. size: %u", modex_data + 4, *((uint32_t *) modex_data))); /* skip size and name */ return modex_data + 4 + strlen ((char *) modex_data + 4) + 1; @@ -99,6 +102,28 @@ static void mca_btl_uct_process_modex (mca_btl_uct_module_t *uct_btl, unsigned c } } +static inline ucs_status_t mca_btl_uct_ep_create_connected_compat (uct_iface_h iface, uct_device_addr_t *device_addr, + uct_iface_addr_t *iface_addr, uct_ep_h *uct_ep) +{ +#if UCT_API >= UCT_VERSION(1, 6) + uct_ep_params_t ep_params = {.field_mask = UCT_EP_PARAM_FIELD_IFACE | UCT_EP_PARAM_FIELD_DEV_ADDR | UCT_EP_PARAM_FIELD_IFACE_ADDR, + .iface = iface, .dev_addr = device_addr, .iface_addr = iface_addr}; + return uct_ep_create (&ep_params, uct_ep); +#else + return uct_ep_create_connected (iface, device_addr, iface_addr, uct_ep); +#endif +} + +static inline ucs_status_t mca_btl_uct_ep_create_compat (uct_iface_h iface, uct_ep_h *uct_ep) +{ +#if UCT_API >= UCT_VERSION(1, 6) + uct_ep_params_t ep_params = {.field_mask = UCT_EP_PARAM_FIELD_IFACE, .iface = iface}; + return uct_ep_create (&ep_params, uct_ep); +#else + return uct_ep_create (iface, uct_ep); +#endif +} + static int mca_btl_uct_endpoint_connect_iface (mca_btl_uct_module_t *uct_btl, mca_btl_uct_tl_t *tl, mca_btl_uct_device_context_t *tl_context, mca_btl_uct_tl_endpoint_t *tl_endpoint, uint8_t *tl_data) @@ -109,15 +134,14 @@ static int mca_btl_uct_endpoint_connect_iface (mca_btl_uct_module_t *uct_btl, mc /* easy case. just connect to the interface */ iface_addr = (uct_iface_addr_t *) tl_data; - device_addr = (uct_device_addr_t *) ((uintptr_t) iface_addr + tl->uct_iface_attr.iface_addr_len); + device_addr = (uct_device_addr_t *) ((uintptr_t) iface_addr + MCA_BTL_UCT_TL_ATTR(tl, tl_context->context_id).iface_addr_len); BTL_VERBOSE(("connecting endpoint to interface")); mca_btl_uct_context_lock (tl_context); - ucs_status = uct_ep_create_connected (tl_context->uct_iface, device_addr, iface_addr, &tl_endpoint->uct_ep); - mca_btl_uct_context_unlock (tl_context); - + ucs_status = mca_btl_uct_ep_create_connected_compat (tl_context->uct_iface, device_addr, iface_addr, &tl_endpoint->uct_ep); tl_endpoint->flags = MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY; + mca_btl_uct_context_unlock (tl_context); return (UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERROR; } @@ -138,15 +162,30 @@ static void mca_btl_uct_connection_ep_destruct (mca_btl_uct_connection_ep_t *ep) OBJ_CLASS_INSTANCE(mca_btl_uct_connection_ep_t, opal_object_t, mca_btl_uct_connection_ep_construct, mca_btl_uct_connection_ep_destruct); +struct mca_btl_uct_conn_completion_t { + uct_completion_t super; + volatile bool complete; +}; +typedef struct mca_btl_uct_conn_completion_t mca_btl_uct_conn_completion_t; + +static void mca_btl_uct_endpoint_flush_complete (uct_completion_t *self, ucs_status_t status) +{ + mca_btl_uct_conn_completion_t *completion = (mca_btl_uct_conn_completion_t *) self; + BTL_VERBOSE(("connection flush complete")); + completion->complete = true; +} + static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint, mca_btl_uct_device_context_t *conn_tl_context, - int64_t type, void *request, size_t request_length) + mca_btl_uct_conn_req_t *request, size_t request_length) { mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep; + mca_btl_uct_conn_completion_t completion = {.super = {.count = 1, .func = mca_btl_uct_endpoint_flush_complete}, + .complete = false}; ucs_status_t ucs_status; - BTL_VERBOSE(("sending connection request to peer. type: %" PRId64 ", length: %" PRIsize_t, - type, request_length)); + BTL_VERBOSE(("sending connection request to peer. context id: %d, type: %d, length: %" PRIsize_t, + request->context_id, request->type, request_length)); OBJ_RETAIN(endpoint->conn_ep); @@ -155,7 +194,8 @@ static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mc do { MCA_BTL_UCT_CONTEXT_SERIALIZE(conn_tl_context, { - ucs_status = uct_ep_am_short (conn_ep->uct_ep, MCA_BTL_UCT_CONNECT_RDMA, type, request, request_length); + ucs_status = uct_ep_am_short (conn_ep->uct_ep, MCA_BTL_UCT_CONNECT_RDMA, request->type, request, + request_length); }); if (OPAL_LIKELY(UCS_OK == ucs_status)) { break; @@ -170,12 +210,18 @@ static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mc } while (1); /* for now we just wait for the connection request to complete before continuing */ - MCA_BTL_UCT_CONTEXT_SERIALIZE(conn_tl_context, { - do { - uct_worker_progress (conn_tl_context->uct_worker); - ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL); - } while (UCS_INPROGRESS == ucs_status); - }); + ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, &completion.super); + if (UCS_OK != ucs_status && UCS_INPROGRESS != ucs_status) { + /* NTH: I don't know if this path is needed. For some networks we must use a completion. */ + do { + ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL); + mca_btl_uct_context_progress (conn_tl_context); + } while (UCS_INPROGRESS == ucs_status); + } else { + do { + mca_btl_uct_context_progress (conn_tl_context); + } while (!completion.complete); + } opal_mutex_lock (&endpoint->ep_lock); @@ -189,7 +235,7 @@ static int mca_btl_uct_endpoint_connect_endpoint (mca_btl_uct_module_t *uct_btl, mca_btl_uct_tl_endpoint_t *tl_endpoint, uint8_t *tl_data, uint8_t *conn_tl_data, void *ep_addr) { - size_t request_length = sizeof (mca_btl_uct_conn_req_t) + tl->uct_iface_attr.ep_addr_len; + size_t request_length = sizeof (mca_btl_uct_conn_req_t) + MCA_BTL_UCT_TL_ATTR(tl, tl_context->context_id).ep_addr_len; mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep; mca_btl_uct_tl_t *conn_tl = uct_btl->conn_tl; mca_btl_uct_device_context_t *conn_tl_context = conn_tl->uct_dev_contexts[0]; @@ -208,7 +254,7 @@ static int mca_btl_uct_endpoint_connect_endpoint (mca_btl_uct_module_t *uct_btl, opal_process_name_print (endpoint->ep_proc->proc_name))); iface_addr = (uct_iface_addr_t *) conn_tl_data; - device_addr = (uct_device_addr_t *) ((uintptr_t) conn_tl_data + conn_tl->uct_iface_attr.iface_addr_len); + device_addr = (uct_device_addr_t *) ((uintptr_t) conn_tl_data + MCA_BTL_UCT_TL_ATTR(conn_tl, 0).iface_addr_len); endpoint->conn_ep = conn_ep = OBJ_NEW(mca_btl_uct_connection_ep_t); if (OPAL_UNLIKELY(NULL == conn_ep)) { @@ -217,8 +263,8 @@ static int mca_btl_uct_endpoint_connect_endpoint (mca_btl_uct_module_t *uct_btl, /* create a temporary endpoint for setting up the rdma endpoint */ MCA_BTL_UCT_CONTEXT_SERIALIZE(conn_tl_context, { - ucs_status = uct_ep_create_connected (conn_tl_context->uct_iface, device_addr, iface_addr, - &conn_ep->uct_ep); + ucs_status = mca_btl_uct_ep_create_connected_compat (conn_tl_context->uct_iface, device_addr, iface_addr, + &conn_ep->uct_ep); }); if (UCS_OK != ucs_status) { BTL_VERBOSE(("could not create an endpoint for forming connection to remote peer. code = %d", @@ -233,60 +279,50 @@ static int mca_btl_uct_endpoint_connect_endpoint (mca_btl_uct_module_t *uct_btl, request->proc_name = OPAL_PROC_MY_NAME; request->context_id = tl_context->context_id; request->tl_index = tl->tl_index; + request->type = !!(ep_addr); if (NULL == tl_endpoint->uct_ep) { BTL_VERBOSE(("allocating endpoint for peer %s and sending connection data", opal_process_name_print (endpoint->ep_proc->proc_name))); MCA_BTL_UCT_CONTEXT_SERIALIZE(tl_context, { - ucs_status = uct_ep_create (tl_context->uct_iface, &tl_endpoint->uct_ep); + ucs_status = mca_btl_uct_ep_create_compat (tl_context->uct_iface, &tl_endpoint->uct_ep); }); if (UCS_OK != ucs_status) { OBJ_RELEASE(endpoint->conn_ep); return OPAL_ERROR; } - - /* fill in connection request */ - ucs_status = uct_ep_get_address (tl_endpoint->uct_ep, (uct_ep_addr_t *) request->ep_addr); - if (UCS_OK != ucs_status) { - /* this is a fatal a fatal error */ - OBJ_RELEASE(endpoint->conn_ep); - uct_ep_destroy (tl_endpoint->uct_ep); - tl_endpoint->uct_ep = NULL; - return OPAL_ERROR; - } - - rc = mca_btl_uct_endpoint_send_conn_req (uct_btl, endpoint, conn_tl_context, 0, request, - request_length); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - OBJ_RELEASE(endpoint->conn_ep); - uct_ep_destroy (tl_endpoint->uct_ep); - tl_endpoint->uct_ep = NULL; - return OPAL_ERROR; - } } if (ep_addr) { - BTL_VERBOSE(("using remote endpoint address to connect endpoint. ep_addr = %p", ep_addr)); - - device_addr = (uct_device_addr_t *) tl_data; + BTL_VERBOSE(("using remote endpoint address to connect endpoint for tl %s, index %d. ep_addr = %p", + tl->uct_tl_name, tl_context->context_id, ep_addr)); /* NTH: there is no need to lock the device context in this case */ - ucs_status = uct_ep_connect_to_ep (tl_endpoint->uct_ep, device_addr, ep_addr); + ucs_status = uct_ep_connect_to_ep (tl_endpoint->uct_ep, (uct_device_addr_t *) tl_data, ep_addr); if (UCS_OK != ucs_status) { return OPAL_ERROR; } + } - /* let the remote side know that the connection has been established and - * wait for the message to be sent */ - rc = mca_btl_uct_endpoint_send_conn_req (uct_btl, endpoint, conn_tl_context, 1, request, - sizeof (mca_btl_uct_conn_req_t)); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - OBJ_RELEASE(endpoint->conn_ep); - uct_ep_destroy (tl_endpoint->uct_ep); - tl_endpoint->uct_ep = NULL; - return OPAL_ERROR; - } + /* fill in connection request */ + ucs_status = uct_ep_get_address (tl_endpoint->uct_ep, (uct_ep_addr_t *) request->ep_addr); + if (UCS_OK != ucs_status) { + /* this is a fatal a fatal error */ + OBJ_RELEASE(endpoint->conn_ep); + uct_ep_destroy (tl_endpoint->uct_ep); + tl_endpoint->uct_ep = NULL; + return OPAL_ERROR; + } + + /* let the remote side know that the connection has been established and + * wait for the message to be sent */ + rc = mca_btl_uct_endpoint_send_conn_req (uct_btl, endpoint, conn_tl_context, request, request_length); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + OBJ_RELEASE(endpoint->conn_ep); + uct_ep_destroy (tl_endpoint->uct_ep); + tl_endpoint->uct_ep = NULL; + return OPAL_ERROR; } return (tl_endpoint->flags & MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY) ? OPAL_SUCCESS : OPAL_ERR_OUT_OF_RESOURCE; @@ -296,8 +332,9 @@ int mca_btl_uct_endpoint_connect (mca_btl_uct_module_t *uct_btl, mca_btl_uct_end void *ep_addr, int tl_index) { mca_btl_uct_tl_endpoint_t *tl_endpoint = endpoint->uct_eps[context_id] + tl_index; - mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_rdma_context_specific (uct_btl, context_id); - mca_btl_uct_tl_t *tl = (tl_index == uct_btl->rdma_tl->tl_index) ? uct_btl->rdma_tl : uct_btl->am_tl; + mca_btl_uct_tl_t *tl = (uct_btl->rdma_tl && tl_index == uct_btl->rdma_tl->tl_index) ? + uct_btl->rdma_tl : uct_btl->am_tl; + mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_tl_context_specific (uct_btl, tl, context_id); uint8_t *rdma_tl_data = NULL, *conn_tl_data = NULL, *am_tl_data = NULL, *tl_data; mca_btl_uct_connection_ep_t *conn_ep = NULL; mca_btl_uct_modex_t *modex; @@ -312,8 +349,8 @@ int mca_btl_uct_endpoint_connect (mca_btl_uct_module_t *uct_btl, mca_btl_uct_end return OPAL_ERR_UNREACH; } - BTL_VERBOSE(("checking endpoint %p with context id %d. cached uct ep: %p, ready: %d", endpoint, context_id, - tl_endpoint->uct_ep, !!(MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY & tl_endpoint->flags))); + BTL_VERBOSE(("checking endpoint %p with context id %d. cached uct ep: %p, ready: %d", (void *) endpoint, context_id, + (void *) tl_endpoint->uct_ep, !!(MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY & tl_endpoint->flags))); opal_mutex_lock (&endpoint->ep_lock); if (MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY & tl_endpoint->flags) { diff --git a/opal/mca/btl/uct/btl_uct_endpoint.h b/opal/mca/btl/uct/btl_uct_endpoint.h index f8d5e6f522d..6add6f27193 100644 --- a/opal/mca/btl/uct/btl_uct_endpoint.h +++ b/opal/mca/btl/uct/btl_uct_endpoint.h @@ -31,8 +31,8 @@ BEGIN_C_DECLS mca_btl_base_endpoint_t *mca_btl_uct_endpoint_create (opal_proc_t *proc); int mca_btl_uct_endpoint_connect (mca_btl_uct_module_t *module, mca_btl_uct_endpoint_t *endpoint, int ep_index, void *ep_addr, int tl_index); -static int mca_btl_uct_endpoint_test_am (mca_btl_uct_module_t *module, mca_btl_uct_endpoint_t *endpoint, - mca_btl_uct_device_context_t *context, uct_ep_h *ep_handle) +static inline int mca_btl_uct_endpoint_test_am (mca_btl_uct_module_t *module, mca_btl_uct_endpoint_t *endpoint, + mca_btl_uct_device_context_t *context, uct_ep_h *ep_handle) { int tl_index = module->am_tl->tl_index; int ep_index = context->context_id; @@ -72,7 +72,8 @@ static inline int mca_btl_uct_endpoint_check (mca_btl_uct_module_t *module, mca_ rc = mca_btl_uct_endpoint_connect (module, endpoint, ep_index, NULL, tl_index); *ep_handle = endpoint->uct_eps[ep_index][tl_index].uct_ep; - BTL_VERBOSE(("mca_btl_uct_endpoint_connect returned %d", rc)); + BTL_VERBOSE(("mca_btl_uct_endpoint_connect returned %d. context id = %d, flags = 0x%x", rc, ep_index, + MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY & endpoint->uct_eps[ep_index][tl_index].flags)); return rc; } diff --git a/opal/mca/btl/uct/btl_uct_module.c b/opal/mca/btl/uct/btl_uct_module.c index 245c3eddd12..f0802867546 100644 --- a/opal/mca/btl/uct/btl_uct_module.c +++ b/opal/mca/btl/uct/btl_uct_module.c @@ -31,15 +31,6 @@ #include "btl_uct_endpoint.h" #include "btl_uct_am.h" -#include "opal/memoryhooks/memory.h" -#include "opal/mca/memory/base/base.h" -#include - -static void mca_btl_uct_mem_release_cb(void *buf, size_t length, void *cbdata, bool from_alloc) -{ - ucm_vm_munmap(buf, length); -} - struct mca_btl_base_endpoint_t *mca_btl_uct_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc) { mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) module; @@ -62,7 +53,7 @@ struct mca_btl_base_endpoint_t *mca_btl_uct_get_ep (struct mca_btl_base_module_t break; } - BTL_VERBOSE(("endpoint initialized. new endpoint: %p", ep)); + BTL_VERBOSE(("endpoint initialized. new endpoint: %p", (void *) ep)); /* add this endpoint to the connection lookup table */ (void) opal_hash_table_set_value_uint64 (&uct_module->id_to_endpoint, (intptr_t) proc, ep); @@ -83,7 +74,6 @@ static int mca_btl_uct_add_procs (mca_btl_base_module_t *btl, if (false == uct_module->initialized) { mca_btl_uct_tl_t *am_tl = uct_module->am_tl; - mca_btl_uct_tl_t *rdma_tl = uct_module->rdma_tl; /* NTH: might want to vary this size based off the universe size (if * one exists). the table is only used for connection lookup and @@ -97,7 +87,7 @@ static int mca_btl_uct_add_procs (mca_btl_base_module_t *btl, if (am_tl) { rc = opal_free_list_init (&uct_module->short_frags, sizeof (mca_btl_uct_base_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_uct_base_frag_t), - am_tl->uct_iface_attr.cap.am.max_short, opal_cache_line_size, + MCA_BTL_UCT_TL_ATTR(am_tl, 0).cap.am.max_short, opal_cache_line_size, 0, 1024, 64, NULL, 0, NULL, NULL, NULL); rc = opal_free_list_init (&uct_module->eager_frags, sizeof (mca_btl_uct_base_frag_t), @@ -111,18 +101,6 @@ static int mca_btl_uct_add_procs (mca_btl_base_module_t *btl, NULL, 0, uct_module->rcache, NULL, NULL); } - if (rdma_tl) { - rc = opal_free_list_init (&uct_module->rdma_completions, sizeof (mca_btl_uct_uct_completion_t), - opal_cache_line_size, OBJ_CLASS(mca_btl_uct_uct_completion_t), - 0, opal_cache_line_size, 0, 4096, 128, NULL, 0, NULL, NULL, - NULL); - } - - if (mca_btl_uct_component.disable_ucx_memory_hooks) { - ucm_set_external_event(UCM_EVENT_VM_UNMAPPED); - opal_mem_hooks_register_release(mca_btl_uct_mem_release_cb, NULL); - } - uct_module->initialized = true; } @@ -296,9 +274,9 @@ int mca_btl_uct_finalize (mca_btl_base_module_t* btl) OBJ_DESTRUCT(&uct_module->short_frags); OBJ_DESTRUCT(&uct_module->eager_frags); OBJ_DESTRUCT(&uct_module->max_frags); - OBJ_DESTRUCT(&uct_module->rdma_completions); OBJ_DESTRUCT(&uct_module->pending_frags); OBJ_DESTRUCT(&uct_module->lock); + OBJ_DESTRUCT(&uct_module->pending_connection_reqs); if (uct_module->rcache) { mca_rcache_base_module_destroy (uct_module->rcache); @@ -344,6 +322,7 @@ mca_btl_uct_module_t mca_btl_uct_module_template = { .btl_flush = mca_btl_uct_flush, .btl_sendi = mca_btl_uct_sendi, + .btl_prepare_src = mca_btl_uct_prepare_src, .btl_send = mca_btl_uct_send, .btl_alloc = mca_btl_uct_alloc, .btl_free = mca_btl_uct_free, @@ -370,12 +349,12 @@ mca_btl_uct_module_t mca_btl_uct_module_template = { OBJ_CLASS_INSTANCE(mca_btl_uct_reg_t, opal_free_list_item_t, NULL, NULL); -void mca_btl_uct_md_construct (mca_btl_uct_md_t *md) +static void mca_btl_uct_md_construct (mca_btl_uct_md_t *md) { md->uct_md = NULL; } -void mca_btl_uct_md_destruct (mca_btl_uct_md_t *md) +static void mca_btl_uct_md_destruct (mca_btl_uct_md_t *md) { if (md->uct_md) { uct_md_close (md->uct_md); diff --git a/opal/mca/btl/uct/btl_uct_rdma.c b/opal/mca/btl/uct/btl_uct_rdma.c index 58f7c504792..2d2d1c3f04b 100644 --- a/opal/mca/btl/uct/btl_uct_rdma.c +++ b/opal/mca/btl/uct/btl_uct_rdma.c @@ -30,13 +30,14 @@ static void mca_btl_uct_uct_completion_construct (mca_btl_uct_uct_completion_t * OBJ_CLASS_INSTANCE(mca_btl_uct_uct_completion_t, opal_free_list_item_t, mca_btl_uct_uct_completion_construct, NULL); + mca_btl_uct_uct_completion_t * mca_btl_uct_uct_completion_alloc (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint, void *local_address, mca_btl_base_registration_handle_t *local_handle, mca_btl_uct_device_context_t *dev_context, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_uct_uct_completion_t *comp = (mca_btl_uct_uct_completion_t *) opal_free_list_get (&uct_btl->rdma_completions); + mca_btl_uct_uct_completion_t *comp = (mca_btl_uct_uct_completion_t *) opal_free_list_get (&dev_context->rdma_completions); if (OPAL_LIKELY(NULL != comp)) { comp->uct_comp.count = 1; comp->btl = &uct_btl->super; @@ -55,8 +56,7 @@ mca_btl_uct_uct_completion_alloc (mca_btl_uct_module_t *uct_btl, mca_btl_base_en void mca_btl_uct_uct_completion_release (mca_btl_uct_uct_completion_t *comp) { if (comp) { - mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) comp->btl; - opal_free_list_return (&uct_btl->rdma_completions, &comp->super); + opal_free_list_return (&comp->dev_context->rdma_completions, &comp->super); } } @@ -98,30 +98,36 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi mca_btl_uct_context_lock (context); - if (size <= uct_btl->rdma_tl->uct_iface_attr.cap.get.max_bcopy) { + if (size <= MCA_BTL_UCT_TL_ATTR(uct_btl->rdma_tl, context->context_id).cap.get.max_bcopy) { ucs_status = uct_ep_get_bcopy (ep_handle, mca_btl_uct_get_unpack, local_address, size, remote_address, rkey.rkey, &comp->uct_comp); } else { uct_iov_t iov = {.buffer = local_address, .length = size, .stride = 0, .count = 1, .memh = MCA_BTL_UCT_REG_REMOTE_TO_LOCAL(local_handle)->uct_memh}; - ucs_status = uct_ep_get_zcopy (ep_handle, &iov, 1, remote_address, rkey.rkey, &comp->uct_comp); } - /* go ahead and progress the worker while we have the lock */ - (void) uct_worker_progress (context->uct_worker); + /* go ahead and progress the worker while we have the lock (if we are not in an AM callback) */ + if (!context->in_am_callback) { + (void) uct_worker_progress (context->uct_worker); + } mca_btl_uct_context_unlock (context); - mca_btl_uct_device_handle_completions (context); + if (!context->in_am_callback) { + mca_btl_uct_device_handle_completions (context); + } if (UCS_OK == ucs_status && cbfunc) { /* if UCS_OK is returned the callback will never fire so we have to make the callback * ourselves */ cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - mca_btl_uct_uct_completion_release (comp); - } else if (UCS_INPROGRESS == ucs_status) { + } + + if (UCS_INPROGRESS == ucs_status) { ucs_status = UCS_OK; + } else { + mca_btl_uct_uct_completion_release (comp); } BTL_VERBOSE(("get issued. status = %d", ucs_status)); @@ -157,6 +163,8 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi ucs_status_t ucs_status; uct_rkey_bundle_t rkey; uct_ep_h ep_handle; + bool use_short = false; + bool use_bcopy = false; int rc; BTL_VERBOSE(("performing put operation. local address: %p, length: %lu", local_address, (unsigned long) size)); @@ -177,12 +185,19 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi mca_btl_uct_context_lock (context); + /* determine what UCT prototol should be used */ + if (size <= uct_btl->super.btl_put_local_registration_threshold) { + use_short = size <= MCA_BTL_UCT_TL_ATTR(uct_btl->rdma_tl, context->context_id).cap.put.max_short; + use_bcopy = !use_short; + } + do { - if (size <= uct_btl->rdma_tl->uct_iface_attr.cap.put.max_short) { + if (use_short) { ucs_status = uct_ep_put_short (ep_handle, local_address, size, remote_address, rkey.rkey); - } else if (size <= uct_btl->super.btl_put_local_registration_threshold) { + } else if (use_bcopy) { ssize_t tmp = uct_ep_put_bcopy (ep_handle, mca_btl_uct_put_pack, - &(mca_btl_uct_put_pack_args_t) {.local_address = local_address, .size = size}, + &(mca_btl_uct_put_pack_args_t) {.local_address = local_address, + .size = size}, remote_address, rkey.rkey); ucs_status = (tmp == (ssize_t) size) ? UCS_OK : UCS_ERR_NO_RESOURCE; } else { @@ -193,8 +208,11 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi } /* go ahead and progress the worker while we have the lock */ - if (UCS_ERR_NO_RESOURCE != ucs_status) { - (void) uct_worker_progress (context->uct_worker); + if (UCS_ERR_NO_RESOURCE != ucs_status || context->in_am_callback) { + if (!context->in_am_callback) { + (void) uct_worker_progress (context->uct_worker); + } + break; } @@ -211,9 +229,12 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi * ourselves. this callback is possibly being made before the data is visible to the * remote process. */ cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - mca_btl_uct_uct_completion_release (comp); - } else if (UCS_INPROGRESS == ucs_status) { + } + + if (UCS_INPROGRESS == ucs_status) { ucs_status = UCS_OK; + } else { + mca_btl_uct_uct_completion_release (comp); } uct_rkey_release (&rkey); diff --git a/opal/mca/btl/uct/btl_uct_tl.c b/opal/mca/btl/uct/btl_uct_tl.c index 13e51bceacc..a711a41ce99 100644 --- a/opal/mca/btl/uct/btl_uct_tl.c +++ b/opal/mca/btl/uct/btl_uct_tl.c @@ -4,6 +4,9 @@ * reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * Copyright (c) 2019 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,11 +19,17 @@ #include "opal/util/bit_ops.h" #include "opal/util/argv.h" +#if HAVE_DECL_UCT_CB_FLAG_SYNC +#define MCA_BTL_UCT_CB_FLAG_SYNC UCT_CB_FLAG_SYNC +#else +#define MCA_BTL_UCT_CB_FLAG_SYNC 0 +#endif + /** * @brief Convert UCT capabilities to BTL flags */ static uint64_t mca_btl_uct_cap_to_btl_flag[][2] = { - {UCT_IFACE_FLAG_AM_ZCOPY, MCA_BTL_FLAGS_SEND}, + {UCT_IFACE_FLAG_AM_SHORT, MCA_BTL_FLAGS_SEND}, {UCT_IFACE_FLAG_PUT_ZCOPY, MCA_BTL_FLAGS_PUT}, {UCT_IFACE_FLAG_GET_ZCOPY, MCA_BTL_FLAGS_GET}, {0,0}, @@ -61,11 +70,11 @@ static uint64_t mca_btl_uct_cap_to_btl_atomic_flag[][2] = { static void mca_btl_uct_module_set_atomic_flags (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl) { - uint64_t cap_flags = tl->uct_iface_attr.cap.flags; + uint64_t cap_flags = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags; /* NTH: only use the fetching atomics for now */ - uint64_t atomic_flags32 = tl->uct_iface_attr.cap.atomic32.fop_flags; - uint64_t atomic_flags64 = tl->uct_iface_attr.cap.atomic64.fop_flags; + uint64_t atomic_flags32 = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.atomic32.fop_flags; + uint64_t atomic_flags64 = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.atomic64.fop_flags; /* NTH: don't really have a way to seperate 32-bit and 64-bit right now */ uint64_t all_flags = atomic_flags32 & atomic_flags64; @@ -110,8 +119,7 @@ static uint64_t mca_btl_uct_cap_to_btl_atomic_flag[][2] = { */ static void mca_btl_uct_module_set_atomic_flags (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl) { - uint64_t cap_flags = tl->uct_iface_attr.cap.flags; - uint32_t flags = 0; + uint64_t cap_flags = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags; module->super.btl_atomic_flags = 0; @@ -165,61 +173,70 @@ OBJ_CLASS_INSTANCE(mca_btl_uct_tl_t, opal_list_item_t, mca_btl_uct_tl_constructo static ucs_status_t mca_btl_uct_conn_req_cb (void *arg, void *data, size_t length, unsigned flags) { mca_btl_uct_module_t *module = (mca_btl_uct_module_t *) arg; - mca_btl_uct_conn_req_t *req = (mca_btl_uct_conn_req_t *) ((uintptr_t) data + 8); + mca_btl_uct_pending_connection_request_t *request = calloc (1, length + sizeof (request->super)); + + /* it is not safe to process the connection request from the callback so just save it for + * later processing */ + OBJ_CONSTRUCT(request, mca_btl_uct_pending_connection_request_t); + memcpy (&request->request_data, (void *) ((intptr_t) data + 8), length); + opal_fifo_push_atomic (&module->pending_connection_reqs, &request->super); + + return UCS_OK; +} + +OBJ_CLASS_INSTANCE(mca_btl_uct_pending_connection_request_t, opal_list_item_t, NULL, NULL); + +int mca_btl_uct_process_connection_request (mca_btl_uct_module_t *module, mca_btl_uct_conn_req_t *req) +{ struct opal_proc_t *remote_proc = opal_proc_for_name (req->proc_name); mca_btl_base_endpoint_t *endpoint = mca_btl_uct_get_ep (&module->super, remote_proc); mca_btl_uct_tl_endpoint_t *tl_endpoint = endpoint->uct_eps[req->context_id] + req->tl_index; - int64_t type = *((int64_t *) data); int32_t ep_flags; int rc; - BTL_VERBOSE(("got connection request for endpoint %p. length = %lu", endpoint, length)); + BTL_VERBOSE(("got connection request for endpoint %p. type = %d. context id = %d", + (void *) endpoint, req->type, req->context_id)); if (NULL == endpoint) { BTL_ERROR(("could not create endpoint for connection request")); return UCS_ERR_UNREACHABLE; } - assert (type < 2); + assert (req->type < 2); + + ep_flags = opal_atomic_fetch_or_32 (&tl_endpoint->flags, MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REC); - if (0 == type) { + if (!(ep_flags & MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REC)) { /* create any necessary resources */ rc = mca_btl_uct_endpoint_connect (module, endpoint, req->context_id, req->ep_addr, req->tl_index); if (OPAL_SUCCESS != rc && OPAL_ERR_OUT_OF_RESOURCE != rc) { - BTL_ERROR(("could not setup rdma endpoint")); - return UCS_ERR_UNREACHABLE; + BTL_ERROR(("could not setup rdma endpoint. rc = %d", rc)); + return rc; } - - ep_flags = opal_atomic_or_fetch_32 (&tl_endpoint->flags, MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REC); - } else { - ep_flags = opal_atomic_or_fetch_32 (&tl_endpoint->flags, MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REM_READY); } /* the connection is ready once we have received the connection data and also a connection ready * message. this might be overkill but there is little documentation at the UCT level on when * an endpoint can be used. */ - if ((ep_flags & (MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REM_READY | MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REC)) == - (MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REM_READY | MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REC)) { - mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_tl_context_specific (module, module->comm_tls[req->tl_index], req->context_id); + if (req->type == 1) { + /* remote side is ready */ mca_btl_uct_base_frag_t *frag; /* to avoid a race with send adding pending frags grab the lock here */ - OPAL_THREAD_LOCK(&endpoint->ep_lock); - (void) opal_atomic_or_fetch_32 (&tl_endpoint->flags, MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY); - OPAL_THREAD_UNLOCK(&endpoint->ep_lock); - - opal_atomic_wmb (); + OPAL_THREAD_SCOPED_LOCK(&endpoint->ep_lock,{ + BTL_VERBOSE(("connection ready. sending %" PRIsize_t " frags", opal_list_get_size (&module->pending_frags))); + (void) opal_atomic_or_fetch_32 (&tl_endpoint->flags, MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY); + opal_atomic_wmb (); - OPAL_THREAD_SCOPED_LOCK(&module->lock, { OPAL_LIST_FOREACH(frag, &module->pending_frags, mca_btl_uct_base_frag_t) { - if (frag->context_id == req->context_id && endpoint == frag->endpoint) { + if (frag->context->context_id == req->context_id && endpoint == frag->endpoint) { frag->ready = true; } } }); } - return UCS_OK; + return OPAL_SUCCESS; } static int mca_btl_uct_setup_connection_tl (mca_btl_uct_module_t *module) @@ -239,14 +256,36 @@ static int mca_btl_uct_setup_connection_tl (mca_btl_uct_module_t *module) return UCS_OK == ucs_status ? OPAL_SUCCESS : OPAL_ERROR; } -mca_btl_uct_device_context_t *mca_btl_uct_context_create (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl, int context_id) +static void mca_btl_uct_context_enable_progress (mca_btl_uct_device_context_t *context) { + if (!context->progress_enabled) { +#if HAVE_DECL_UCT_PROGRESS_THREAD_SAFE + uct_iface_progress_enable (context->uct_iface, UCT_PROGRESS_THREAD_SAFE | UCT_PROGRESS_SEND | + UCT_PROGRESS_RECV); +#else + uct_iface_progress_enable (context->uct_iface, UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); +#endif + context->progress_enabled = true; + } +} + +mca_btl_uct_device_context_t *mca_btl_uct_context_create (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl, int context_id, bool enable_progress) +{ +#if UCT_API >= UCT_VERSION(1, 6) + uct_iface_params_t iface_params = {.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_DEVICE, + .open_mode = UCT_IFACE_OPEN_MODE_DEVICE, + .mode = {.device = {.tl_name = tl->uct_tl_name, + .dev_name = tl->uct_dev_name}}}; +#else uct_iface_params_t iface_params = {.rndv_cb = NULL, .eager_cb = NULL, .stats_root = NULL, .rx_headroom = 0, .open_mode = UCT_IFACE_OPEN_MODE_DEVICE, .mode = {.device = {.tl_name = tl->uct_tl_name, .dev_name = tl->uct_dev_name}}}; +#endif mca_btl_uct_device_context_t *context; ucs_status_t ucs_status; + int rc; context = calloc (1, sizeof (*context)); if (OPAL_UNLIKELY(NULL == context)) { @@ -257,40 +296,54 @@ mca_btl_uct_device_context_t *mca_btl_uct_context_create (mca_btl_uct_module_t * context->uct_btl = module; OBJ_CONSTRUCT(&context->completion_fifo, opal_fifo_t); OBJ_CONSTRUCT(&context->mutex, opal_recursive_mutex_t); + OBJ_CONSTRUCT(&context->rdma_completions, opal_free_list_t); + + rc = opal_free_list_init (&context->rdma_completions, sizeof (mca_btl_uct_uct_completion_t), + opal_cache_line_size, OBJ_CLASS(mca_btl_uct_uct_completion_t), + 0, opal_cache_line_size, 0, 4096, 128, NULL, 0, NULL, NULL, + NULL); + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + mca_btl_uct_context_destroy (context); + return NULL; + } - do { - /* apparently (in contradiction to the spec) UCT is *not* thread safe. because we have to - * use our own locks just go ahead and use UCS_THREAD_MODE_SINGLE. if they ever fix their - * api then change this back to UCS_THREAD_MODE_MULTI and remove the locks around the - * various UCT calls. */ - ucs_status = uct_worker_create (module->ucs_async, UCS_THREAD_MODE_SINGLE, &context->uct_worker); - if (UCS_OK != ucs_status) { - BTL_VERBOSE(("could not create a UCT worker")); - mca_btl_uct_context_destroy (context); - context = NULL; - break; - } + /* apparently (in contradiction to the spec) UCT is *not* thread safe. because we have to + * use our own locks just go ahead and use UCS_THREAD_MODE_SINGLE. if they ever fix their + * api then change this back to UCS_THREAD_MODE_MULTI and remove the locks around the + * various UCT calls. */ + ucs_status = uct_worker_create (module->ucs_async, UCS_THREAD_MODE_SINGLE, &context->uct_worker); + if (OPAL_UNLIKELY(UCS_OK != ucs_status)) { + BTL_VERBOSE(("could not create a UCT worker")); + mca_btl_uct_context_destroy (context); + return NULL; + } - ucs_status = uct_iface_open (tl->uct_md->uct_md, context->uct_worker, &iface_params, - tl->uct_tl_config, &context->uct_iface); - if (UCS_OK != ucs_status) { - BTL_VERBOSE(("could not open UCT interface. error code: %d", ucs_status)); - mca_btl_uct_context_destroy (context); - context = NULL; - break; - } + ucs_status = uct_iface_open (tl->uct_md->uct_md, context->uct_worker, &iface_params, + tl->uct_tl_config, &context->uct_iface); + if (OPAL_UNLIKELY(UCS_OK != ucs_status)) { + BTL_VERBOSE(("could not open UCT interface. error code: %d", ucs_status)); + mca_btl_uct_context_destroy (context); + return NULL; + } - BTL_VERBOSE(("enabling progress for tl %p context id %d", tl, context_id)); + /* only need to query one of the interfaces to get the attributes */ + ucs_status = uct_iface_query (context->uct_iface, &context->uct_iface_attr); + if (UCS_OK != ucs_status) { + BTL_VERBOSE(("Error querying UCT interface")); + mca_btl_uct_context_destroy (context); + return NULL; + } - uct_iface_progress_enable (context->uct_iface, UCT_PROGRESS_THREAD_SAFE | UCT_PROGRESS_SEND | - UCT_PROGRESS_RECV); + if (context_id > 0 && tl == module->am_tl) { + BTL_VERBOSE(("installing AM handler for tl %p context id %d", (void *) tl, context_id)); + uct_iface_set_am_handler (context->uct_iface, MCA_BTL_UCT_FRAG, mca_btl_uct_am_handler, + context, MCA_BTL_UCT_CB_FLAG_SYNC); + } - if (context_id > 0 && tl == module->am_tl) { - BTL_VERBOSE(("installing AM handler for tl %p context id %d", tl, context_id)); - uct_iface_set_am_handler (context->uct_iface, MCA_BTL_UCT_FRAG, mca_btl_uct_am_handler, - context, UCT_CB_FLAG_SYNC); - } - } while (0); + if (enable_progress) { + BTL_VERBOSE(("enabling progress for tl %p context id %d", (void *) tl, context_id)); + mca_btl_uct_context_enable_progress (context); + } return context; } @@ -308,6 +361,7 @@ void mca_btl_uct_context_destroy (mca_btl_uct_device_context_t *context) } OBJ_DESTRUCT(&context->completion_fifo); + OBJ_DESTRUCT(&context->rdma_completions); free (context); } @@ -322,7 +376,6 @@ static int tl_compare (opal_list_item_t **a, opal_list_item_t **b) static mca_btl_uct_tl_t *mca_btl_uct_create_tl (mca_btl_uct_module_t *module, mca_btl_uct_md_t *md, uct_tl_resource_desc_t *tl_desc, int priority) { mca_btl_uct_tl_t *tl = OBJ_NEW(mca_btl_uct_tl_t); - ucs_status_t ucs_status; if (OPAL_UNLIKELY(NULL == tl)) { return NULL; @@ -345,22 +398,15 @@ static mca_btl_uct_tl_t *mca_btl_uct_create_tl (mca_btl_uct_module_t *module, mc (void) uct_md_iface_config_read (md->uct_md, tl_desc->tl_name, NULL, NULL, &tl->uct_tl_config); /* always create a 0 context (needed to query) */ - tl->uct_dev_contexts[0] = mca_btl_uct_context_create (module, tl, 0); + tl->uct_dev_contexts[0] = mca_btl_uct_context_create (module, tl, 0, false); if (NULL == tl->uct_dev_contexts[0]) { BTL_VERBOSE(("could not create a uct device context")); OBJ_RELEASE(tl); return NULL; } - /* only need to query one of the interfaces to get the attributes */ - ucs_status = uct_iface_query (tl->uct_dev_contexts[0]->uct_iface, &tl->uct_iface_attr); - if (UCS_OK != ucs_status) { - BTL_VERBOSE(("Error querying UCT interface")); - OBJ_RELEASE(tl); - return NULL; - } - - BTL_VERBOSE(("Interface CAPS for tl %s::%s: 0x%lx", module->md_name, tl_desc->tl_name, (unsigned long) tl->uct_iface_attr.cap.flags)); + BTL_VERBOSE(("Interface CAPS for tl %s::%s: 0x%lx", module->md_name, tl_desc->tl_name, + (unsigned long) MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags)); return tl; } @@ -371,24 +417,20 @@ static void mca_btl_uct_set_tl_rdma (mca_btl_uct_module_t *module, mca_btl_uct_t mca_btl_uct_module_set_atomic_flags (module, tl); - module->super.btl_get_limit = tl->uct_iface_attr.cap.get.max_zcopy; - if (tl->uct_iface_attr.cap.get.max_bcopy) { + module->super.btl_get_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.get.max_zcopy; + if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.get.max_bcopy) { module->super.btl_get_alignment = 0; - module->super.btl_get_local_registration_threshold = tl->uct_iface_attr.cap.get.max_bcopy; + module->super.btl_get_local_registration_threshold = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.get.max_bcopy; } else { /* this is overkill in terms of alignment but we have no way to enforce a minimum get size */ - module->super.btl_get_alignment = opal_next_poweroftwo_inclusive (tl->uct_iface_attr.cap.get.min_zcopy); + module->super.btl_get_alignment = opal_next_poweroftwo_inclusive (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.get.min_zcopy); } - module->super.btl_put_limit = tl->uct_iface_attr.cap.put.max_zcopy; + module->super.btl_put_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.put.max_zcopy; module->super.btl_put_alignment = 0; - /* no registration needed when using short put */ - if (tl->uct_iface_attr.cap.put.max_bcopy > tl->uct_iface_attr.cap.put.max_short) { - module->super.btl_put_local_registration_threshold = tl->uct_iface_attr.cap.put.max_bcopy; - } else { - module->super.btl_put_local_registration_threshold = tl->uct_iface_attr.cap.put.max_short; - } + /* no registration needed when using short/bcopy put */ + module->super.btl_put_local_registration_threshold = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.put.max_bcopy; module->rdma_tl = tl; OBJ_RETAIN(tl); @@ -418,6 +460,9 @@ static void mca_btl_uct_set_tl_am (mca_btl_uct_module_t *module, mca_btl_uct_tl_ if (tl->max_device_contexts <= 1) { tl->max_device_contexts = mca_btl_uct_component.num_contexts_per_module; } + + module->super.btl_max_send_size = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_zcopy - sizeof (mca_btl_uct_am_header_t); + module->super.btl_eager_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_bcopy - sizeof (mca_btl_uct_am_header_t); } static int mca_btl_uct_set_tl_conn (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl) @@ -464,18 +509,23 @@ static int mca_btl_uct_evaluate_tl (mca_btl_uct_module_t *module, mca_btl_uct_tl } if (tl == module->rdma_tl || tl == module->am_tl) { - BTL_VERBOSE(("tl has flags 0x%" PRIx64, tl->uct_iface_attr.cap.flags)); - module->super.btl_flags |= mca_btl_uct_module_flags (tl->uct_iface_attr.cap.flags); + BTL_VERBOSE(("tl has flags 0x%" PRIx64, MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags)); + module->super.btl_flags |= mca_btl_uct_module_flags (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags); /* the bandwidth and latency numbers relate to both rdma and active messages. need to * come up with a better estimate. */ /* UCT bandwidth is in bytes/sec, BTL is in MB/sec */ - module->super.btl_bandwidth = (uint32_t) (tl->uct_iface_attr.bandwidth / 1048576.0); + module->super.btl_bandwidth = (uint32_t) (MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth / 1048576.0); /* TODO -- figure out how to translate UCT latency to us */ module->super.btl_latency = 1; } + if (tl == module->rdma_tl || tl == module->am_tl || tl == module->conn_tl) { + /* make sure progress is enabled on the default context now that we know this TL will be used */ + mca_btl_uct_context_enable_progress (tl->uct_dev_contexts[0]); + } + return OPAL_SUCCESS; } @@ -485,6 +535,7 @@ int mca_btl_uct_query_tls (mca_btl_uct_module_t *module, mca_btl_uct_md_t *md, u mca_btl_uct_tl_t *tl; opal_list_t tl_list; char **tl_filter; + int any_priority = 0; OBJ_CONSTRUCT(&tl_list, opal_list_t); @@ -497,23 +548,46 @@ int mca_btl_uct_query_tls (mca_btl_uct_module_t *module, mca_btl_uct_md_t *md, u free (tl_filter[0]); tl_filter[0] = tmp; include = false; - } else if (0 == strcmp (tl_filter[0], "any")) { - any = true; + } + + /* check for the any keyword */ + for (unsigned j = 0 ; tl_filter[j] ; ++j) { + if (0 == strcmp (tl_filter[j], "any")) { + any_priority = j; + any = true; + break; + } + } + + if (any && !include) { + opal_argv_free (tl_filter); + return OPAL_ERR_NOT_AVAILABLE; } for (unsigned i = 0 ; i < tl_count ; ++i) { bool try_tl = any; - int priority = 0; - - for (unsigned j = 0 ; tl_filter[j] && !try_tl ; ++j) { - try_tl = (0 == strcmp (tl_filter[j], tl_descs[i].tl_name)) == include; - priority = j; + int priority = any_priority; + + for (unsigned j = 0 ; tl_filter[j] ; ++j) { + if (0 == strcmp (tl_filter[j], tl_descs[i].tl_name)) { + try_tl = include; + priority = j; + break; + } } + BTL_VERBOSE(("tl filter: tl_name = %s, use = %d, priority = %d", tl_descs[i].tl_name, try_tl, priority)); + if (!try_tl) { continue; } + if (0 == strcmp (tl_descs[i].tl_name, "ud")) { + /* ud looks like any normal transport but we do not want to use it for anything other + * than connection management so ensure it gets evaluated last */ + priority = INT_MAX; + } + tl = mca_btl_uct_create_tl (module, md, tl_descs + i, priority); if (tl) { @@ -521,6 +595,8 @@ int mca_btl_uct_query_tls (mca_btl_uct_module_t *module, mca_btl_uct_md_t *md, u } } + opal_argv_free (tl_filter); + if (0 == opal_list_get_size (&tl_list)) { BTL_VERBOSE(("no suitable tls match filter: %s", mca_btl_uct_component.allowed_transports)); OBJ_DESTRUCT(&tl_list); @@ -543,6 +619,7 @@ int mca_btl_uct_query_tls (mca_btl_uct_module_t *module, mca_btl_uct_md_t *md, u /* no rdma tls */ BTL_VERBOSE(("no rdma tl matched supplied filter. disabling RDMA support")); + module->super.btl_flags &= ~MCA_BTL_FLAGS_RDMA; module->super.btl_put = NULL; module->super.btl_get = NULL; module->super.btl_atomic_fop = NULL; diff --git a/opal/mca/btl/uct/btl_uct_types.h b/opal/mca/btl/uct/btl_uct_types.h index f7731d9e441..7b7a4eaa69b 100644 --- a/opal/mca/btl/uct/btl_uct_types.h +++ b/opal/mca/btl/uct/btl_uct_types.h @@ -77,6 +77,9 @@ struct mca_btl_uct_conn_req_t { /** name of the requesting process */ opal_process_name_t proc_name; + /** request type: 0 == endpoint data, 1 == endpoint data + remote ready */ + int type; + /** context id that should be connected */ int context_id; @@ -141,9 +144,21 @@ struct mca_btl_uct_device_context_t { /** UCT interface handle */ uct_iface_h uct_iface; + /** interface attributes */ + uct_iface_attr_t uct_iface_attr; + + /** RDMA completions */ + opal_free_list_t rdma_completions; + /** complete fragments and rdma operations. this fifo is used to avoid making * callbacks while holding the device lock. */ opal_fifo_t completion_fifo; + + /** progress is enabled on this context */ + bool progress_enabled; + + /** context is in AM callback */ + volatile bool in_am_callback; }; typedef struct mca_btl_uct_device_context_t mca_btl_uct_device_context_t; @@ -229,8 +244,8 @@ struct mca_btl_uct_base_frag_t { /** module this fragment is associated with */ struct mca_btl_uct_module_t *btl; - /** context this fragment is waiting on */ - int context_id; + /* tl context */ + mca_btl_uct_device_context_t *context; /** is this frag ready to send (only used when pending) */ bool ready; @@ -301,9 +316,6 @@ struct mca_btl_uct_tl_t { /** device name for this tl (used for creating device contexts) */ char *uct_dev_name; - /** interface attributes */ - uct_iface_attr_t uct_iface_attr; - /** maxiumum number of device contexts that can be created */ int max_device_contexts; @@ -318,4 +330,14 @@ struct mca_btl_uct_tl_t { typedef struct mca_btl_uct_tl_t mca_btl_uct_tl_t; OBJ_CLASS_DECLARATION(mca_btl_uct_tl_t); +#define MCA_BTL_UCT_TL_ATTR(tl, context_id) (tl)->uct_dev_contexts[(context_id)]->uct_iface_attr + +struct mca_btl_uct_pending_connection_request_t { + opal_list_item_t super; + uint8_t request_data[]; +}; + +typedef struct mca_btl_uct_pending_connection_request_t mca_btl_uct_pending_connection_request_t; +OBJ_CLASS_DECLARATION(mca_btl_uct_pending_connection_request_t); + #endif /* !defined(BTL_UCT_TYPES_H) */ diff --git a/opal/mca/btl/uct/configure.m4 b/opal/mca/btl/uct/configure.m4 index dbeabe2f5f7..82844857740 100644 --- a/opal/mca/btl/uct/configure.m4 +++ b/opal/mca/btl/uct/configure.m4 @@ -14,6 +14,8 @@ # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2018 Los Alamos National Security, LLC. # All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -33,10 +35,21 @@ AC_DEFUN([MCA_opal_btl_uct_CONFIG],[ OMPI_CHECK_UCX([btl_uct], [btl_uct_happy="yes"], [btl_uct_happy="no"]) + if test "$btl_uct_happy" = "yes" ; then + OPAL_VAR_SCOPE_PUSH([CPPFLAGS_save]) + + CPPFLAGS_save="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $btl_uct_CPPFLAGS" + + AC_CHECK_DECLS([UCT_PROGRESS_THREAD_SAFE, UCT_CB_FLAG_SYNC], [], [], [[#include ]]) + + CPPFLAGS="$CPPFLAGS_save" + OPAL_VAR_SCOPE_POP + fi AS_IF([test "$btl_uct_happy" = "yes"], [$1 - btl_uct_LIBS = "$btl_uct_LIBS -luct" + btl_uct_LIBS="$btl_uct_LIBS -luct" ], [$2]) diff --git a/opal/mca/btl/usnic/Makefile.am b/opal/mca/btl/usnic/Makefile.am index ecd3099dc67..17d62cc429e 100644 --- a/opal/mca/btl/usnic/Makefile.am +++ b/opal/mca/btl/usnic/Makefile.am @@ -11,11 +11,13 @@ # All rights reserved. # Copyright (c) 2006 Sandia National Laboratories. All rights # reserved. -# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2019 Cisco Systems, Inc. All rights reserved # Copyright (c) 2015 Intel, Inc. All rights reserved. # Copyright (c) 2016-2017 IBM Corporation. All rights reserved. # Copyright (c) 2017 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,7 +25,7 @@ # $HEADER$ # -AM_CPPFLAGS = -DBTL_IN_OPAL=1 $(opal_common_ofi_CPPFLAGS) -DOMPI_LIBMPI_NAME=\"$(OMPI_LIBMPI_NAME)\" +AM_CPPFLAGS = -DBTL_IN_OPAL=1 $(opal_ofi_CPPFLAGS) -DOMPI_LIBMPI_NAME=\"$(OMPI_LIBMPI_NAME)\" EXTRA_DIST = README.txt README.test @@ -88,16 +90,21 @@ mcacomponent_LTLIBRARIES = $(component) mca_btl_usnic_la_SOURCES = $(component_sources) mca_btl_usnic_la_LDFLAGS = \ $(opal_btl_usnic_LDFLAGS) \ + $(opal_ofi_LDFLAGS) \ -module -avoid-version mca_btl_usnic_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/ofi/lib@OPAL_LIB_PREFIX@mca_common_ofi.la + $(opal_ofi_LIBS) noinst_LTLIBRARIES = $(lib) libmca_btl_usnic_la_SOURCES = $(lib_sources) -libmca_btl_usnic_la_LDFLAGS = -module -avoid-version $(opal_btl_usnic_LDFLAGS) +libmca_btl_usnic_la_LDFLAGS = \ + $(opal_btl_usnic_LDFLAGS) \ + $(opal_ofi_LDFLAGS) \ + -module -avoid-version +libmca_btl_usnic_la_LIBADD = $(opal_ofi_LIBS) if OPAL_BTL_USNIC_BUILD_UNIT_TESTS -usnic_btl_run_tests_CPPFLAGS = \ +usnic_btl_run_tests_CPPFLAGS = $(AM_CPPFLAGS) \ -DBTL_USNIC_RUN_TESTS_SYMBOL=\"opal_btl_usnic_run_tests\" usnic_btl_run_tests_SOURCES = test/usnic_btl_run_tests.c usnic_btl_run_tests_LDADD = -ldl diff --git a/opal/mca/btl/usnic/btl_usnic.h b/opal/mca/btl/usnic/btl_usnic.h index e8f6dafa2de..b4831d82d9a 100644 --- a/opal/mca/btl/usnic/btl_usnic.h +++ b/opal/mca/btl/usnic/btl_usnic.h @@ -68,7 +68,7 @@ extern uint64_t opal_btl_usnic_ticks; extern opal_recursive_mutex_t btl_usnic_lock; static inline uint64_t -get_nsec(void) +get_ticks(void) { return opal_btl_usnic_ticks; } @@ -206,6 +206,14 @@ typedef struct opal_btl_usnic_component_t { /** retrans characteristics */ int retrans_timeout; + /** max number of messages re-sent during a single progress + iteration */ + int max_resends_per_iteration; + + /** minimum number of times through component progress before + checking to see if standalone ACKs need to be sent */ + int ack_iteration_delay; + /** transport header length for all usNIC devices on this server (it is guaranteed that all usNIC devices on a single server will have the same underlying transport, and therefore the diff --git a/opal/mca/btl/usnic/btl_usnic_component.c b/opal/mca/btl/usnic/btl_usnic_component.c index 25a64a25d26..629d292c305 100644 --- a/opal/mca/btl/usnic/btl_usnic_component.c +++ b/opal/mca/btl/usnic/btl_usnic_component.c @@ -384,8 +384,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module, static void usnic_clock_callback(int fd, short flags, void *timeout) { - /* 1ms == 1,000,000 ns */ - opal_btl_usnic_ticks += 1000000; + /* Increase by so many ticks that we will definitely force sending + any ACKs that are pending */ + opal_btl_usnic_ticks += 1000; /* run progress to make sure time change gets noticed */ usnic_component_progress(); @@ -1132,7 +1133,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, */ static int usnic_handle_completion(opal_btl_usnic_module_t* module, opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion); -static int usnic_component_progress_2(void); +static int usnic_component_progress_2(bool check_priority); static void usnic_handle_cq_error(opal_btl_usnic_module_t* module, opal_btl_usnic_channel_t *channel, int cq_ret); @@ -1145,9 +1146,7 @@ static int usnic_component_progress(void) struct fi_cq_entry completion; opal_btl_usnic_channel_t *channel; static bool fastpath_ok = true; - - /* update our simulated clock */ - opal_btl_usnic_ticks += 5000; + bool check_priority = true; count = 0; if (fastpath_ok) { @@ -1180,10 +1179,11 @@ static int usnic_component_progress(void) usnic_handle_cq_error(module, channel, ret); } } + check_priority = false; } fastpath_ok = true; - return count + usnic_component_progress_2(); + return count + usnic_component_progress_2(check_priority); } static int usnic_handle_completion( @@ -1304,7 +1304,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module, } } -static int usnic_component_progress_2(void) +static int usnic_component_progress_2(bool check_priority) { int i, j, count = 0, num_events, ret; opal_btl_usnic_module_t* module; @@ -1313,15 +1313,18 @@ static int usnic_component_progress_2(void) int rc; int c; - /* update our simulated clock */ - opal_btl_usnic_ticks += 5000; + opal_btl_usnic_ticks += 1; + + /* If we need to check priority, start with the priority channel. + Otherwise, just check the data channel. */ + int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL; /* Poll for completions */ for (i = 0; i < mca_btl_usnic_component.num_modules; i++) { module = mca_btl_usnic_component.usnic_active_modules[i]; /* poll each channel */ - for (c=0; cmod_channels[c]; if (channel->chan_deferred_recv != NULL) { diff --git a/opal/mca/btl/usnic/btl_usnic_mca.c b/opal/mca/btl/usnic/btl_usnic_mca.c index 84f987cf22c..b3e130850c2 100644 --- a/opal/mca/btl/usnic/btl_usnic_mca.c +++ b/opal/mca/btl/usnic/btl_usnic_mca.c @@ -260,6 +260,14 @@ int opal_btl_usnic_component_register(void) 5000, &mca_btl_usnic_component.retrans_timeout, REGINT_GE_ONE, OPAL_INFO_LVL_5)); + CHECK(reg_int("max_resends_per_iteration", "Maximum number of frames to resend in a single iteration through usNIC component progress", + 16, &mca_btl_usnic_component.max_resends_per_iteration, + REGINT_GE_ONE, OPAL_INFO_LVL_5)); + + CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent", + 4, &mca_btl_usnic_component.ack_iteration_delay, + REGINT_GE_ZERO, OPAL_INFO_LVL_5)); + CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)", 0, &max_tiny_msg_size, REGINT_GE_ZERO, OPAL_INFO_LVL_5)); diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index ba0442c43c4..94aefc9b715 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -963,11 +963,12 @@ usnic_do_resends( opal_btl_usnic_send_segment_t *sseg; opal_btl_usnic_endpoint_t *endpoint; struct opal_btl_usnic_channel_t *data_channel; - int ret; + int ret, count; data_channel = &module->mod_channels[USNIC_DATA_CHANNEL]; - while ((get_send_credits(data_channel) > 1) && + count = mca_btl_usnic_component.max_resends_per_iteration; + while (count > 0 && (get_send_credits(data_channel) > 1) && !opal_list_is_empty(&module->pending_resend_segs)) { /* @@ -1009,6 +1010,8 @@ usnic_do_resends( BTL_ERROR(("hotel checkin failed\n")); abort(); /* should not be possible */ } + + --count; } } @@ -1236,7 +1239,7 @@ opal_btl_usnic_module_progress_sends( /* Is it time to send ACK? */ if (endpoint->endpoint_acktime == 0 || - endpoint->endpoint_acktime <= get_nsec()) { + endpoint->endpoint_acktime <= get_ticks()) { if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) { opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint); } else { @@ -2366,14 +2369,14 @@ static void init_freelists(opal_btl_usnic_module_t *module) uint32_t segsize; segsize = (module->local_modex.max_msg_size + - opal_cache_line_size - 1) & + mca_btl_usnic_component.prefix_send_offset + + opal_cache_line_size - 1) & ~(opal_cache_line_size - 1); /* Send frags freelists */ OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t); rc = usnic_compat_free_list_init(&module->small_send_frags, - sizeof(opal_btl_usnic_small_send_frag_t) + - mca_btl_usnic_component.prefix_send_offset, + sizeof(opal_btl_usnic_small_send_frag_t), opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_small_send_frag_t), segsize, @@ -2390,8 +2393,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t); rc = usnic_compat_free_list_init(&module->large_send_frags, - sizeof(opal_btl_usnic_large_send_frag_t) + - mca_btl_usnic_component.prefix_send_offset, + sizeof(opal_btl_usnic_large_send_frag_t), opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_large_send_frag_t), 0, /* payload size */ @@ -2408,8 +2410,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t); rc = usnic_compat_free_list_init(&module->put_dest_frags, - sizeof(opal_btl_usnic_put_dest_frag_t) + - mca_btl_usnic_component.prefix_send_offset, + sizeof(opal_btl_usnic_put_dest_frag_t), opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_put_dest_frag_t), 0, /* payload size */ @@ -2427,8 +2428,7 @@ static void init_freelists(opal_btl_usnic_module_t *module) /* list of segments to use for sending */ OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t); rc = usnic_compat_free_list_init(&module->chunk_segs, - sizeof(opal_btl_usnic_chunk_segment_t) + - mca_btl_usnic_component.prefix_send_offset, + sizeof(opal_btl_usnic_chunk_segment_t), opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_chunk_segment_t), segsize, @@ -2446,11 +2446,11 @@ static void init_freelists(opal_btl_usnic_module_t *module) /* ACK segments freelist */ uint32_t ack_segment_len; ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) + + mca_btl_usnic_component.prefix_send_offset + opal_cache_line_size - 1) & ~(opal_cache_line_size - 1); OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t); rc = usnic_compat_free_list_init(&module->ack_segs, - sizeof(opal_btl_usnic_ack_segment_t) + - mca_btl_usnic_component.prefix_send_offset, + sizeof(opal_btl_usnic_ack_segment_t), opal_cache_line_size, OBJ_CLASS(opal_btl_usnic_ack_segment_t), ack_segment_len, diff --git a/opal/mca/btl/usnic/btl_usnic_recv.h b/opal/mca/btl/usnic/btl_usnic_recv.h index 7e056e488db..7a178c1630f 100644 --- a/opal/mca/btl/usnic/btl_usnic_recv.h +++ b/opal/mca/btl/usnic/btl_usnic_recv.h @@ -112,9 +112,12 @@ opal_btl_usnic_update_window( opal_btl_usnic_add_to_endpoints_needing_ack(endpoint); } - /* give this process a chance to send something before ACKing */ + /* A hueristic: set to send this ACK after we have checked our + incoming DATA_CHANNEL component.act_iteration_delay times + (i.e., so we can piggyback an ACK on an outgoing send) */ if (0 == endpoint->endpoint_acktime) { - endpoint->endpoint_acktime = get_nsec() + 50000; /* 50 usec */ + endpoint->endpoint_acktime = + get_ticks() + mca_btl_usnic_component.ack_iteration_delay; } /* Save this incoming segment in the received segmentss array on the diff --git a/opal/mca/btl/usnic/configure.m4 b/opal/mca/btl/usnic/configure.m4 index 33d5dacdb75..0e75e625a14 100644 --- a/opal/mca/btl/usnic/configure.m4 +++ b/opal/mca/btl/usnic/configure.m4 @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2006 Sandia National Laboratories. All rights # reserved. -# Copyright (c) 2010-2017 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2010-2019 Cisco Systems, Inc. All rights reserved # Copyright (c) 2017 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ @@ -95,21 +95,17 @@ AC_DEFUN([_OPAL_BTL_USNIC_DO_CONFIG],[ AC_MSG_RESULT([$opal_btl_usnic_happy]) ]) - # The usnic BTL requires OFI libfabric support. AS_IF([test "$opal_btl_usnic_happy" = "yes"], - [AC_MSG_CHECKING([whether OFI libfabric support is available]) - AS_IF([test "$opal_common_ofi_happy" = "yes"], - [opal_btl_usnic_happy=yes], - [opal_btl_usnic_happy=no]) - AC_MSG_RESULT([$opal_btl_usnic_happy]) - ]) + [ # The usnic BTL requires OFI libfabric support + OPAL_CHECK_OFI + opal_btl_usnic_happy=$opal_ofi_happy]) # The usnic BTL requires at least OFI libfabric v1.1 (there was a # critical bug in libfabric v1.0). AS_IF([test "$opal_btl_usnic_happy" = "yes"], [AC_MSG_CHECKING([whether OFI libfabric is >= v1.1]) opal_btl_usnic_CPPFLAGS_save=$CPPFLAGS - CPPFLAGS="$opal_common_ofi_CPPFLAGS $CPPFLAGS" + CPPFLAGS="$opal_ofi_CPPFLAGS $CPPFLAGS" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[ #if !defined(FI_MAJOR_VERSION) @@ -127,7 +123,7 @@ AC_DEFUN([_OPAL_BTL_USNIC_DO_CONFIG],[ # Make sure we can find the OFI libfabric usnic extensions header AS_IF([test "$opal_btl_usnic_happy" = "yes" ], [opal_btl_usnic_CPPFLAGS_save=$CPPFLAGS - CPPFLAGS="$opal_common_ofi_CPPFLAGS $CPPFLAGS" + CPPFLAGS="$opal_ofi_CPPFLAGS $CPPFLAGS" AC_CHECK_HEADER([rdma/fi_ext_usnic.h], [], [opal_btl_usnic_happy=no]) diff --git a/opal/mca/btl/vader/btl_vader.h b/opal/mca/btl/vader/btl_vader.h index 7e1afad24c6..eab5f5a87d3 100644 --- a/opal/mca/btl/vader/btl_vader.h +++ b/opal/mca/btl/vader/btl_vader.h @@ -15,6 +15,8 @@ * Copyright (c) 2010-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * * $COPYRIGHT$ * @@ -53,6 +55,7 @@ #include "opal/mca/rcache/rcache.h" #include "opal/mca/rcache/base/base.h" #include "opal/mca/btl/base/btl_base_error.h" +#include "opal/mca/mpool/base/base.h" #include "opal/util/proc.h" #include "btl_vader_endpoint.h" @@ -81,7 +84,12 @@ union vader_modex_t { void *segment_base; } xpmem; #endif - opal_shmem_ds_t seg_ds; + struct vader_modex_other_t { + ino_t user_ns_id; + int seg_ds_size; + /* seg_ds needs to be the last element */ + opal_shmem_ds_t seg_ds; + } other; }; /** @@ -112,16 +120,15 @@ struct mca_btl_vader_component_t { opal_mutex_t lock; /**< lock to protect concurrent updates to this structure's members */ char *my_segment; /**< this rank's base pointer */ size_t segment_size; /**< size of my_segment */ - size_t segment_offset; /**< start of unused portion of my_segment */ int32_t num_smp_procs; /**< current number of smp procs on this host */ opal_free_list_t vader_frags_eager; /**< free list of vader send frags */ opal_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */ opal_free_list_t vader_frags_user; /**< free list of small inline frags */ + opal_free_list_t vader_fboxes; /**< free list of available fast-boxes */ unsigned int fbox_threshold; /**< number of sends required before we setup a send fast box for a peer */ unsigned int fbox_max; /**< maximum number of send fast boxes to allocate */ unsigned int fbox_size; /**< size of each peer fast box allocation */ - unsigned int fbox_count; /**< number of send fast boxes allocated */ int single_copy_mechanism; /**< single copy mechanism to use */ @@ -143,6 +150,7 @@ struct mca_btl_vader_component_t { #if OPAL_BTL_VADER_HAVE_KNEM unsigned int knem_dma_min; /**< minimum size to enable DMA for knem transfers (0 disables) */ #endif + mca_mpool_base_module_t *mpool; }; typedef struct mca_btl_vader_component_t mca_btl_vader_component_t; OPAL_MODULE_DECLSPEC extern mca_btl_vader_component_t mca_btl_vader_component; @@ -267,6 +275,8 @@ int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); #endif +ino_t mca_btl_vader_get_user_ns_id(void); + int mca_btl_vader_get_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, diff --git a/opal/mca/btl/vader/btl_vader_atomic.c b/opal/mca/btl/vader/btl_vader_atomic.c index df3c2664be8..1bf54430df2 100644 --- a/opal/mca/btl/vader/btl_vader_atomic.c +++ b/opal/mca/btl/vader/btl_vader_atomic.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,58 +17,14 @@ #include "btl_vader_endpoint.h" #include "btl_vader_xpmem.h" -static void mca_btl_vader_sc_emu_aop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *desc, int status) -{ - mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc; - void *local_address = frag->rdma.local_address; - void *context = frag->rdma.context; - void *cbdata = frag->rdma.cbdata; - mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; - - /* return the fragment first since the callback may call put/get/amo and could use this fragment */ - MCA_BTL_VADER_FRAG_RETURN(frag); - - cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status); -} - int mca_btl_vader_emu_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_frag_t *frag; - - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags, NULL, - remote_address, cbfunc, cbcontext, cbdata, mca_btl_vader_sc_emu_aop_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; -} - -static void mca_btl_vader_sc_emu_afop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *desc, int status) -{ - mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc; - mca_btl_vader_sc_emu_hdr_t *hdr; - void *local_address = frag->rdma.local_address; - void *context = frag->rdma.context; - void *cbdata = frag->rdma.cbdata; - mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; - - hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; - - *((int64_t *) frag->rdma.local_address) = hdr->operand[0]; - - /* return the fragment first since the callback may call put/get/amo and could use this fragment */ - MCA_BTL_VADER_FRAG_RETURN(frag); - - cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status); + size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags, + size, NULL, remote_address, cbfunc, cbcontext, cbdata); } int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -76,19 +33,9 @@ int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_ba uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_frag_t *frag; - - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags, - local_address, remote_address, cbfunc, cbcontext, cbdata, - mca_btl_vader_sc_emu_afop_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; + size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, order, flags, + size, local_address, remote_address, cbfunc, cbcontext, cbdata); } int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -96,17 +43,7 @@ int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_ mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_frag_t *frag; - - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, 0, order, - flags, local_address, remote_address, cbfunc, cbcontext, cbdata, - mca_btl_vader_sc_emu_afop_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; + size_t size = (flags & MCA_BTL_ATOMIC_FLAG_32BIT) ? 4 : 8; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, order, + flags, size, local_address, remote_address, cbfunc, cbcontext, cbdata); } diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 3fc35c6a46e..44de7e004c2 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -16,8 +16,12 @@ * All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,6 +41,10 @@ #include "btl_vader_fbox.h" #include "btl_vader_xpmem.h" +#ifdef HAVE_SYS_STAT_H +#include +#endif + #include #include @@ -301,6 +309,7 @@ static int mca_btl_vader_component_open(void) OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, opal_free_list_t); + OBJ_CONSTRUCT(&mca_btl_vader_component.vader_fboxes, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t); @@ -321,6 +330,7 @@ static int mca_btl_vader_component_close(void) OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send); + OBJ_DESTRUCT(&mca_btl_vader_component.vader_fboxes); OBJ_DESTRUCT(&mca_btl_vader_component.lock); OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints); OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments); @@ -336,9 +346,33 @@ static int mca_btl_vader_component_close(void) mca_btl_vader_knem_fini (); #endif + if (mca_btl_vader_component.mpool) { + mca_btl_vader_component.mpool->mpool_finalize (mca_btl_vader_component.mpool); + mca_btl_vader_component.mpool = NULL; + } + return OPAL_SUCCESS; } +/* + * mca_btl_vader_parse_proc_ns_user() tries to get the user namespace ID + * of the current process. + * Returns the ID of the user namespace. In the case of an error '0' is returned. + */ +ino_t mca_btl_vader_get_user_ns_id(void) +{ + struct stat buf; + + if (0 > stat("/proc/self/ns/user", &buf)) { + /* + * Something went wrong, probably an old kernel that does not support namespaces + * simply assume all processes are in the same user namespace and return 0 + */ + return 0; + } + + return buf.st_ino; +} static int mca_btl_base_vader_modex_send (void) { union vader_modex_t modex; @@ -352,8 +386,16 @@ static int mca_btl_base_vader_modex_send (void) modex_size = sizeof (modex.xpmem); } else { #endif - modex_size = opal_shmem_sizeof_shmem_ds (&mca_btl_vader_component.seg_ds); - memmove (&modex.seg_ds, &mca_btl_vader_component.seg_ds, modex_size); + modex.other.seg_ds_size = opal_shmem_sizeof_shmem_ds (&mca_btl_vader_component.seg_ds); + memmove (&modex.other.seg_ds, &mca_btl_vader_component.seg_ds, modex.other.seg_ds_size); + modex.other.user_ns_id = mca_btl_vader_get_user_ns_id(); + /* + * If modex.other.user_ns_id is '0' something did not work out + * during user namespace detection. Assuming there are no + * namespaces available it will return '0' for all processes and + * the check later will see '0' everywhere and not disable CMA. + */ + modex_size = sizeof (modex.other); #if OPAL_BTL_VADER_HAVE_XPMEM } @@ -365,6 +407,7 @@ static int mca_btl_base_vader_modex_send (void) return rc; } +#if OPAL_BTL_VADER_HAVE_XPMEM || OPAL_BTL_VADER_HAVE_CMA || OPAL_BTL_VADER_HAVE_KNEM static void mca_btl_vader_select_next_single_copy_mechanism (void) { for (int i = 0 ; single_copy_mechanisms[i].value != MCA_BTL_VADER_NONE ; ++i) { @@ -374,10 +417,13 @@ static void mca_btl_vader_select_next_single_copy_mechanism (void) } } } +#endif static void mca_btl_vader_check_single_copy (void) { +#if OPAL_BTL_VADER_HAVE_XPMEM || OPAL_BTL_VADER_HAVE_CMA || OPAL_BTL_VADER_HAVE_KNEM int initial_mechanism = mca_btl_vader_component.single_copy_mechanism; +#endif /* single-copy emulation is always used to support AMO's right now */ mca_btl_vader_sc_emu_init (); @@ -463,12 +509,6 @@ static void mca_btl_vader_check_single_copy (void) mca_btl_vader.super.btl_get = NULL; mca_btl_vader.super.btl_put = NULL; } - - if (MCA_BTL_VADER_EMUL == mca_btl_vader_component.single_copy_mechanism) { - /* limit to the maximum fragment size */ - mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t); - mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t); - } } /* @@ -517,12 +557,10 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls, /* no fast boxes allocated initially */ component->num_fbox_in_endpoints = 0; - component->fbox_count = 0; mca_btl_vader_check_single_copy (); if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) { - const char *base_dir = opal_process_info.proc_session_dir; char *sm_file; rc = asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%x.%d", mca_btl_vader_component.backing_directory, @@ -559,8 +597,6 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls, } } - component->segment_offset = 0; - /* initialize my fifo */ vader_fifo_init ((struct vader_fifo_t *) component->my_segment); diff --git a/opal/mca/btl/vader/btl_vader_endpoint.h b/opal/mca/btl/vader/btl_vader_endpoint.h index d3a39e08f24..e9409b90c11 100644 --- a/opal/mca/btl/vader/btl_vader_endpoint.h +++ b/opal/mca/btl/vader/btl_vader_endpoint.h @@ -13,6 +13,8 @@ * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,6 +60,7 @@ typedef struct mca_btl_base_endpoint_t { uint32_t *startp; /**< pointer to location storing start offset */ unsigned int start, end; uint16_t seq; + opal_free_list_item_t *fbox; /**< fast-box free list item */ } fbox_out; int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing @@ -101,13 +104,16 @@ static inline void mca_btl_vader_endpoint_setup_fbox_recv (struct mca_btl_base_e endpoint->fbox_in.buffer = base; } -static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, void *base) +static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, opal_free_list_item_t *fbox) { + void *base = fbox->ptr; + endpoint->fbox_out.start = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.end = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.startp = (uint32_t *) base; endpoint->fbox_out.startp[0] = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.seq = 0; + endpoint->fbox_out.fbox = fbox; /* zero out the first header in the fast box */ memset ((char *) base + MCA_BTL_VADER_FBOX_ALIGNMENT, 0, MCA_BTL_VADER_FBOX_ALIGNMENT); diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h index abaf12811e4..3762c62010d 100644 --- a/opal/mca/btl/vader/btl_vader_fbox.h +++ b/opal/mca/btl/vader/btl_vader_fbox.h @@ -1,6 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -29,6 +31,10 @@ typedef union mca_btl_vader_fbox_hdr_t { /** sequence number */ uint16_t seq; } data; + struct { + uint32_t value0; + uint32_t value1; + } data_i32; uint64_t ival; } mca_btl_vader_fbox_hdr_t; @@ -51,8 +57,20 @@ static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr, uint16_t seq, uint32_t size) { mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag, .seq = seq, .size = size}}; - hdr->ival = tmp.ival; + /* clear out existing tag/seq */ + hdr->data_i32.value1 = 0; + opal_atomic_wmb (); + hdr->data_i32.value0 = size; opal_atomic_wmb (); + hdr->data_i32.value1 = tmp.data_i32.value1; +} + +static inline mca_btl_vader_fbox_hdr_t mca_btl_vader_fbox_read_header (mca_btl_vader_fbox_hdr_t *hdr) +{ + mca_btl_vader_fbox_hdr_t tmp = {.data_i32 = {.value1 = hdr->data_i32.value1}};; + opal_atomic_rmb (); + tmp.data_i32.value0 = hdr->data_i32.value0; + return tmp; } /* attempt to reserve a contiguous segment from the remote ep */ @@ -138,9 +156,6 @@ static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t *ep, unsign memcpy (data + header_size, payload, payload_size); } - /* write out part of the header now. the tag will be written when the data is available */ - mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), tag, ep->fbox_out.seq++, data_size); - end += size; if (OPAL_UNLIKELY(fbox_size == end)) { @@ -152,6 +167,9 @@ static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t *ep, unsign MCA_BTL_VADER_FBOX_HDR(ep->fbox_out.buffer + end)->ival = 0; } + /* write out part of the header now. the tag will be written when the data is available */ + mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), tag, ep->fbox_out.seq++, data_size); + /* align the buffer */ ep->fbox_out.end = ((uint32_t) hbs << 31) | end; opal_atomic_wmb (); @@ -174,7 +192,7 @@ static inline bool mca_btl_vader_check_fboxes (void) int poll_count; for (poll_count = 0 ; poll_count <= MCA_BTL_VADER_POLL_COUNT ; ++poll_count) { - const mca_btl_vader_fbox_hdr_t hdr = {.ival = MCA_BTL_VADER_FBOX_HDR(ep->fbox_in.buffer + start)->ival}; + const mca_btl_vader_fbox_hdr_t hdr = mca_btl_vader_fbox_read_header (MCA_BTL_VADER_FBOX_HDR(ep->fbox_in.buffer + start)); /* check for a valid tag a sequence number */ if (0 == hdr.data.tag || hdr.data.seq != ep->fbox_in.seq) { @@ -243,20 +261,17 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc /* protect access to mca_btl_vader_component.segment_offset */ OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); - if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size && - mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) { - /* verify the remote side will accept another fbox */ - if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) { - void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset; - mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size; + /* verify the remote side will accept another fbox */ + if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) { + opal_free_list_item_t *fbox = opal_free_list_get (&mca_btl_vader_component.vader_fboxes); + if (NULL != fbox) { /* zero out the fast box */ - memset (fbox_base, 0, mca_btl_vader_component.fbox_size); - mca_btl_vader_endpoint_setup_fbox_send (ep, fbox_base); + memset (fbox->ptr, 0, mca_btl_vader_component.fbox_size); + mca_btl_vader_endpoint_setup_fbox_send (ep, fbox); hdr->flags |= MCA_BTL_VADER_FLAG_SETUP_FBOX; hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer); - ++mca_btl_vader_component.fbox_count; } else { opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1); } diff --git a/opal/mca/btl/vader/btl_vader_fifo.h b/opal/mca/btl/vader/btl_vader_fifo.h index 0dc70bc8a13..178a416704f 100644 --- a/opal/mca/btl/vader/btl_vader_fifo.h +++ b/opal/mca/btl/vader/btl_vader_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2017 Los Alamos National Security, LLC. + * Copyright (c) 2010-2018 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -155,7 +155,11 @@ static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct m static inline void vader_fifo_init (vader_fifo_t *fifo) { - fifo->fifo_head = fifo->fifo_tail = VADER_FIFO_FREE; + /* due to a compiler bug in Oracle C 5.15 the following line was broken into two. Not + * ideal but oh well. See #5814 */ + /* fifo->fifo_head = fifo->fifo_tail = VADER_FIFO_FREE; */ + fifo->fifo_head = VADER_FIFO_FREE; + fifo->fifo_tail = VADER_FIFO_FREE; fifo->fbox_available = mca_btl_vader_component.fbox_max; mca_btl_vader_component.my_fifo = fifo; } diff --git a/opal/mca/btl/vader/btl_vader_frag.c b/opal/mca/btl/vader/btl_vader_frag.c index a132ea3d725..3635af99a43 100644 --- a/opal/mca/btl/vader/btl_vader_frag.c +++ b/opal/mca/btl/vader/btl_vader_frag.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,38 +43,9 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag) int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx) { mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) item; - unsigned int data_size = (unsigned int)(uintptr_t) ctx; - unsigned int frag_size = data_size + sizeof (mca_btl_vader_hdr_t); - - /* ensure next fragment is aligned on a cache line */ - frag_size = (frag_size + 63) & ~63; - - OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); - - if (data_size && mca_btl_vader_component.segment_size < mca_btl_vader_component.segment_offset + frag_size) { - OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock); - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* Set the list element here so we don't have to set it on the critical path. This only - * works if each free list has its own unique fragment size and ALL free lists are initialized - * with opal_free_list_init. */ - if (mca_btl_vader_component.max_inline_send == data_size) { - frag->my_list = &mca_btl_vader_component.vader_frags_user; - } else if (mca_btl_vader.super.btl_eager_limit == data_size) { - frag->my_list = &mca_btl_vader_component.vader_frags_eager; - } else if (mca_btl_vader.super.btl_max_send_size == data_size) { - frag->my_list = &mca_btl_vader_component.vader_frags_max_send; - } - - if (data_size) { - item->ptr = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset; - mca_btl_vader_component.segment_offset += frag_size; - } - - OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock); - mca_btl_vader_frag_constructor ((mca_btl_vader_frag_t *) item); + /* Set the list element here so we don't have to set it on the critical path */ + frag->my_list = (opal_free_list_t *) ctx; return OPAL_SUCCESS; } diff --git a/opal/mca/btl/vader/btl_vader_frag.h b/opal/mca/btl/vader/btl_vader_frag.h index d3ce21dae3c..2de26914818 100644 --- a/opal/mca/btl/vader/btl_vader_frag.h +++ b/opal/mca/btl/vader/btl_vader_frag.h @@ -14,6 +14,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,9 +92,12 @@ struct mca_btl_vader_frag_t { /** rdma callback data */ struct mca_btl_vader_rdma_cbdata_t { void *local_address; + uint64_t remote_address; mca_btl_base_rdma_completion_fn_t cbfunc; void *context; void *cbdata; + size_t remaining; + size_t sent; } rdma; }; @@ -151,28 +155,87 @@ static inline void mca_btl_vader_frag_complete (mca_btl_vader_frag_t *frag) { int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx); -static inline mca_btl_vader_frag_t * -mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type, +static inline void mca_btl_vader_rdma_frag_advance (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, + mca_btl_vader_frag_t *frag, int status) +{ + mca_btl_vader_sc_emu_hdr_t *hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; + mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; + size_t hdr_size = sizeof (*hdr); + size_t len = frag->rdma.sent ? frag->segments[0].seg_len - hdr_size : 0; + void *context = frag->rdma.context; + void *cbdata = frag->rdma.cbdata; + void *data = (void *) (hdr + 1); + + if (frag->rdma.sent) { + if (MCA_BTL_VADER_OP_GET == hdr->type) { + memcpy (frag->rdma.local_address, data, len); + } else if ((MCA_BTL_VADER_OP_ATOMIC == hdr->type || MCA_BTL_VADER_OP_CSWAP == hdr->type) && + frag->rdma.local_address) { + if (8 == len) { + *((int64_t *) frag->rdma.local_address) = hdr->operand[0]; + } else { + *((int32_t *) frag->rdma.local_address) = (int32_t) hdr->operand[0]; + } + } + } + + if (frag->rdma.remaining) { + size_t packet_size = (frag->rdma.remaining + hdr_size) <= mca_btl_vader.super.btl_max_send_size ? + frag->rdma.remaining : mca_btl_vader.super.btl_max_send_size - hdr_size; + + /* advance the local and remote pointers */ + frag->rdma.local_address = (void *)((uintptr_t) frag->rdma.local_address + len); + frag->rdma.remote_address += len; + + if (MCA_BTL_VADER_OP_PUT == hdr->type) { + /* copy the next block into the fragment buffer */ + memcpy ((void *) (hdr + 1), frag->rdma.local_address, packet_size); + } + + hdr->addr = frag->rdma.remote_address; + /* clear out the complete flag before sending the fragment again */ + frag->hdr->flags &= ~MCA_BTL_VADER_FLAG_COMPLETE; + frag->segments[0].seg_len = packet_size + sizeof (*hdr); + frag->rdma.sent += packet_size; + frag->rdma.remaining -= packet_size; + + /* send is always successful */ + (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); + return; + } + + /* return the fragment before calling the callback */ + MCA_BTL_VADER_FRAG_RETURN(frag); + cbfunc (btl, endpoint, (void *)((uintptr_t) frag->rdma.local_address - frag->rdma.sent), NULL, + context, cbdata, status); +} + +static inline int +mca_btl_vader_rdma_frag_start (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, int type, uint64_t operand1, uint64_t operand2, mca_btl_base_atomic_op_t op, int order, int flags, size_t size, void *local_address, int64_t remote_address, - mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, - void *cbdata, mca_btl_base_completion_fn_t des_cbfunc) + mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { mca_btl_vader_sc_emu_hdr_t *hdr; - size_t total_size = size + sizeof (*hdr); + size_t hdr_size = sizeof (*hdr); + size_t packet_size = (size + hdr_size) <= mca_btl_vader.super.btl_max_send_size ? size : + mca_btl_vader.super.btl_max_send_size - hdr_size; mca_btl_vader_frag_t *frag; - frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, total_size, + frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, packet_size + hdr_size, MCA_BTL_DES_SEND_ALWAYS_CALLBACK); if (OPAL_UNLIKELY(NULL == frag)) { - return NULL; + return OPAL_ERR_OUT_OF_RESOURCE; } - frag->base.des_cbfunc = des_cbfunc; + frag->base.des_cbfunc = (mca_btl_base_completion_fn_t) mca_btl_vader_rdma_frag_advance; frag->rdma.local_address = local_address; + frag->rdma.remote_address = remote_address; frag->rdma.cbfunc = cbfunc; frag->rdma.context = cbcontext; frag->rdma.cbdata = cbdata; + frag->rdma.remaining = size; + frag->rdma.sent = 0; hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; @@ -183,7 +246,8 @@ mca_btl_vader_rdma_frag_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint hdr->operand[0] = operand1; hdr->operand[1] = operand2; - return frag; + mca_btl_vader_rdma_frag_advance (btl, endpoint, frag, OPAL_SUCCESS); + return OPAL_SUCCESS; } #endif /* MCA_BTL_VADER_SEND_FRAG_H */ diff --git a/opal/mca/btl/vader/btl_vader_get.c b/opal/mca/btl/vader/btl_vader_get.c index db4b678ef19..a71203f1ad9 100644 --- a/opal/mca/btl/vader/btl_vader_get.c +++ b/opal/mca/btl/vader/btl_vader_get.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -156,49 +157,15 @@ int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t } #endif -static void mca_btl_vader_sc_emu_get_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *desc, int status) -{ - mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc; - mca_btl_vader_sc_emu_hdr_t *hdr; - void *local_address = frag->rdma.local_address; - size_t len = frag->segments[0].seg_len - sizeof (*hdr); - void *context = frag->rdma.context; - void *cbdata = frag->rdma.cbdata; - mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; - void *data; - - hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; - data = (void *) (hdr + 1); - - memcpy (local_address, data, len); - - /* return the fragment before calling the callback */ - MCA_BTL_VADER_FRAG_RETURN(frag); - - cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status); -} - int mca_btl_vader_get_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_frag_t *frag; - if (size > mca_btl_vader.super.btl_get_limit) { return OPAL_ERR_NOT_AVAILABLE; } - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size, - local_address, remote_address, cbfunc, cbcontext, cbdata, - mca_btl_vader_sc_emu_get_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size, + local_address, remote_address, cbfunc, cbcontext, cbdata); } diff --git a/opal/mca/btl/vader/btl_vader_module.c b/opal/mca/btl/vader/btl_vader_module.c index c28012ffc7f..e54c02b5698 100644 --- a/opal/mca/btl/vader/btl_vader_module.c +++ b/opal/mca/btl/vader/btl_vader_module.c @@ -15,8 +15,10 @@ * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +27,7 @@ */ #include "opal_config.h" +#include "opal/util/show_help.h" #include "btl_vader.h" #include "btl_vader_endpoint.h" @@ -77,6 +80,28 @@ mca_btl_vader_t mca_btl_vader = { } }; +/* + * Exit function copied from btl_usnic_util.c + * + * The following comment tells Coverity that this function does not return. + * See https://scan.coverity.com/tune. + */ + +/* coverity[+kill] */ +static void vader_btl_exit(mca_btl_vader_t *btl) +{ + if (NULL != btl && NULL != btl->error_cb) { + btl->error_cb(&btl->super, MCA_BTL_ERROR_FLAGS_FATAL, + (opal_proc_t*) opal_proc_local_get(), + "The vader BTL is aborting the MPI job (via PML error callback)."); + } + + /* If the PML error callback returns (or if there wasn't one), just exit. Shrug. */ + fprintf(stderr, "*** The Open MPI vader BTL is aborting the MPI job (via exit(3)).\n"); + fflush(stderr); + exit(1); +} + static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) { mca_btl_vader_component_t *component = &mca_btl_vader_component; @@ -95,19 +120,32 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) return OPAL_ERR_OUT_OF_RESOURCE; } - component->segment_offset = MCA_BTL_VADER_FIFO_SIZE; + component->mpool = mca_mpool_basic_create ((void *) (component->my_segment + MCA_BTL_VADER_FIFO_SIZE), + (unsigned long) (mca_btl_vader_component.segment_size - MCA_BTL_VADER_FIFO_SIZE), 64); + if (NULL == component->mpool) { + free (component->endpoints); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + rc = opal_free_list_init (&component->vader_fboxes, sizeof (opal_free_list_item_t), 8, + OBJ_CLASS(opal_free_list_item_t), mca_btl_vader_component.fbox_size, + opal_cache_line_size, 0, mca_btl_vader_component.fbox_max, 4, + component->mpool, 0, NULL, NULL, NULL); + if (OPAL_SUCCESS != rc) { + return rc; + } /* initialize fragment descriptor free lists */ /* initialize free list for small send and inline fragments */ rc = opal_free_list_init (&component->vader_frags_user, sizeof(mca_btl_vader_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t), - 0, opal_cache_line_size, - component->vader_free_list_num, + mca_btl_vader_component.max_inline_send + sizeof (mca_btl_vader_frag_t), + opal_cache_line_size, component->vader_free_list_num, component->vader_free_list_max, component->vader_free_list_inc, - NULL, 0, NULL, mca_btl_vader_frag_init, - (void *)(intptr_t) mca_btl_vader_component.max_inline_send); + component->mpool, 0, NULL, mca_btl_vader_frag_init, + &component->vader_frags_user); if (OPAL_SUCCESS != rc) { return rc; } @@ -116,12 +154,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) rc = opal_free_list_init (&component->vader_frags_eager, sizeof (mca_btl_vader_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t), - 0, opal_cache_line_size, - component->vader_free_list_num, + mca_btl_vader.super.btl_eager_limit + sizeof (mca_btl_vader_frag_t), + opal_cache_line_size, component->vader_free_list_num, component->vader_free_list_max, component->vader_free_list_inc, - NULL, 0, NULL, mca_btl_vader_frag_init, - (void *)(intptr_t) mca_btl_vader.super.btl_eager_limit); + component->mpool, 0, NULL, mca_btl_vader_frag_init, + &component->vader_frags_eager); if (OPAL_SUCCESS != rc) { return rc; } @@ -131,12 +169,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) rc = opal_free_list_init (&component->vader_frags_max_send, sizeof (mca_btl_vader_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t), - 0, opal_cache_line_size, - component->vader_free_list_num, + mca_btl_vader.super.btl_max_send_size + sizeof (mca_btl_vader_frag_t), + opal_cache_line_size, component->vader_free_list_num, component->vader_free_list_max, component->vader_free_list_inc, - NULL, 0, NULL, mca_btl_vader_frag_init, - (void *)(intptr_t) mca_btl_vader.super.btl_max_send_size); + component->mpool, 0, NULL, mca_btl_vader_frag_init, + &component->vader_frags_max_send); if (OPAL_SUCCESS != rc) { return rc; } @@ -158,6 +196,7 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_proc_t *proc, int remote_rank) { mca_btl_vader_component_t *component = &mca_btl_vader_component; union vader_modex_t *modex; + ino_t my_user_ns_id; size_t msg_size; int rc; @@ -182,17 +221,58 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_ } else { #endif /* store a copy of the segment information for detach */ - ep->segment_data.other.seg_ds = malloc (msg_size); + ep->segment_data.other.seg_ds = malloc (modex->other.seg_ds_size); if (NULL == ep->segment_data.other.seg_ds) { return OPAL_ERR_OUT_OF_RESOURCE; } - memcpy (ep->segment_data.other.seg_ds, &modex->seg_ds, msg_size); + memcpy (ep->segment_data.other.seg_ds, &modex->other.seg_ds, modex->other.seg_ds_size); ep->segment_base = opal_shmem_segment_attach (ep->segment_data.other.seg_ds); if (NULL == ep->segment_base) { return OPAL_ERROR; } + + if (MCA_BTL_VADER_CMA == mca_btl_vader_component.single_copy_mechanism) { + my_user_ns_id = mca_btl_vader_get_user_ns_id(); + if (my_user_ns_id != modex->other.user_ns_id) { + mca_base_var_source_t source; + int vari; + rc = mca_base_var_find_by_name("btl_vader_single_copy_mechanism", &vari); + if (OPAL_ERROR == rc) { + return OPAL_ERROR; + } + rc = mca_base_var_get_value(vari, NULL, &source, NULL); + if (OPAL_ERROR == rc) { + return OPAL_ERROR; + } + /* + * CMA is not possible as different user namespaces are in use. + * Currently the kernel does not allow * process_vm_{read,write}v() + * for processes running in different user namespaces even if + * all involved user IDs are mapped to the same user ID. + * + * Fallback to MCA_BTL_VADER_EMUL. + */ + if (MCA_BASE_VAR_SOURCE_DEFAULT != source) { + /* If CMA has been explicitly selected we want to error out */ + opal_show_help("help-btl-vader.txt", "cma-different-user-namespace-error", + true, opal_process_info.nodename); + vader_btl_exit(&mca_btl_vader); + } + /* + * If CMA has been selected because it is the default or + * some fallback, this falls back even further. + */ + opal_show_help("help-btl-vader.txt", "cma-different-user-namespace-warning", + true, opal_process_info.nodename); + mca_btl_vader_component.single_copy_mechanism = MCA_BTL_VADER_EMUL; + mca_btl_vader.super.btl_get = mca_btl_vader_get_sc_emu; + mca_btl_vader.super.btl_put = mca_btl_vader_put_sc_emu; + mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t); + mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t); + } + } #if OPAL_BTL_VADER_HAVE_XPMEM } #endif @@ -276,7 +356,7 @@ static int vader_add_procs (struct mca_btl_base_module_t* btl, continue; } - if (my_proc != procs[proc]) { + if (my_proc != procs[proc] && NULL != reachability) { /* add this proc to shared memory accessibility list */ rc = opal_bitmap_set_bit (reachability, proc); if(OPAL_SUCCESS != rc) { @@ -534,6 +614,7 @@ static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep) OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t); OBJ_CONSTRUCT(&ep->pending_frags_lock, opal_mutex_t); ep->fifo = NULL; + ep->fbox_out.fbox = NULL; } #if OPAL_BTL_VADER_HAVE_XPMEM @@ -562,8 +643,12 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep) /* disconnect from the peer's segment */ opal_shmem_segment_detach (&seg_ds); } + if (ep->fbox_out.fbox) { + opal_free_list_return (&mca_btl_vader_component.vader_fboxes, ep->fbox_out.fbox); + } ep->fbox_in.buffer = ep->fbox_out.buffer = NULL; + ep->fbox_out.fbox = NULL; ep->segment_base = NULL; ep->fifo = NULL; } diff --git a/opal/mca/btl/vader/btl_vader_put.c b/opal/mca/btl/vader/btl_vader_put.c index 0224d2fe193..8e47a70332c 100644 --- a/opal/mca/btl/vader/btl_vader_put.c +++ b/opal/mca/btl/vader/btl_vader_put.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -135,21 +136,6 @@ int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t } #endif -static void mca_btl_vader_sc_emu_put_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_base_descriptor_t *desc, int status) -{ - mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc; - void *local_address = frag->rdma.local_address; - void *context = frag->rdma.context; - void *cbdata = frag->rdma.cbdata; - mca_btl_base_rdma_completion_fn_t cbfunc = frag->rdma.cbfunc; - - /* return the fragment first since the callback may call put/get/amo and could use this fragment */ - MCA_BTL_VADER_FRAG_RETURN(frag); - - cbfunc (btl, endpoint, local_address, NULL, context, cbdata, status); -} - /** * @brief Provides an emulated put path which uses copy-in copy-out with shared memory buffers */ @@ -158,26 +144,10 @@ int mca_btl_vader_put_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_ mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { - mca_btl_vader_sc_emu_hdr_t *hdr; - mca_btl_vader_frag_t *frag; - if (size > mca_btl_vader.super.btl_put_limit) { return OPAL_ERR_NOT_AVAILABLE; } - frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size, - local_address, remote_address, cbfunc, cbcontext, cbdata, - mca_btl_vader_sc_emu_put_complete); - if (OPAL_UNLIKELY(NULL == frag)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval; - - memcpy ((void *) (hdr + 1), local_address, size); - - /* send is always successful */ - (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER); - - return OPAL_SUCCESS; + return mca_btl_vader_rdma_frag_start (btl, endpoint, MCA_BTL_VADER_OP_PUT, 0, 0, 0, order, flags, size, + local_address, remote_address, cbfunc, cbcontext, cbdata); } diff --git a/opal/mca/btl/vader/btl_vader_sc_emu.c b/opal/mca/btl/vader/btl_vader_sc_emu.c index 651ce9b4eb3..4f0b289ea63 100644 --- a/opal/mca/btl/vader/btl_vader_sc_emu.c +++ b/opal/mca/btl/vader/btl_vader_sc_emu.c @@ -15,9 +15,7 @@ #if OPAL_HAVE_ATOMIC_MATH_64 static void mca_btl_vader_sc_emu_atomic_64 (int64_t *operand, volatile int64_t *addr, mca_btl_base_atomic_op_t op) { - int64_t result; - - fprintf (stderr, "Performing atomic operation %d on address %p\n", op, (void *) addr); + int64_t result = 0; switch (op) { case MCA_BTL_ATOMIC_ADD: @@ -56,9 +54,7 @@ static void mca_btl_vader_sc_emu_atomic_64 (int64_t *operand, volatile int64_t * #if OPAL_HAVE_ATOMIC_MATH_32 static void mca_btl_vader_sc_emu_atomic_32 (int32_t *operand, volatile int32_t *addr, mca_btl_base_atomic_op_t op) { - int32_t result; - - fprintf (stderr, "Performing atomic operation %d on address %p\n", op, (void *) addr); + int32_t result = 0; switch (op) { case MCA_BTL_ATOMIC_ADD: diff --git a/opal/mca/btl/vader/help-btl-vader.txt b/opal/mca/btl/vader/help-btl-vader.txt index 9d87267564a..ea87559d454 100644 --- a/opal/mca/btl/vader/help-btl-vader.txt +++ b/opal/mca/btl/vader/help-btl-vader.txt @@ -121,6 +121,25 @@ WARNING: Linux kernel CMA support was requested via the btl_vader_single_copy_mechanism MCA variable, but CMA support is not available due to restrictive ptrace settings. +The vader shared memory BTL will fall back on another single-copy +mechanism if one is available. This may result in lower performance. + + Local host: %s +# +[cma-different-user-namespace-error] +ERROR: Linux kernel CMA support was requested via the +btl_vader_single_copy_mechanism MCA variable, but CMA support is +not available due to different user namespaces. + +Your MPI job will abort now. Please select another value for +btl_vader_single_copy_mechanism. + + Local host: %s +# +[cma-different-user-namespace-warning] +WARNING: The default btl_vader_single_copy_mechanism CMA is +not available due to different user namespaces. + The vader shared memory BTL will fall back on another single-copy mechanism if one is available. This may result in lower performance. diff --git a/opal/mca/common/ofi/Makefile.am b/opal/mca/common/ofi/Makefile.am deleted file mode 100644 index 658e1a703f2..00000000000 --- a/opal/mca/common/ofi/Makefile.am +++ /dev/null @@ -1,105 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2013 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. -# Copyright (c) 2017 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# A word of explanation... -# -# This library is linked against various MCA components because the -# support for ofis is needed in various places. -# -# Note that building this common component statically and linking -# against other dynamic components is *not* supported! - -AM_CPPFLAGS = $(opal_common_ofi_CPPFLAGS) - -# Header files - -headers = \ - common_ofi.h - -# Source files - -sources = \ - common_ofi.c - -# As per above, we'll either have an installable or noinst result. -# The installable one should follow the same MCA prefix naming rules -# (i.e., libmca__.la). The noinst one can be named -# whatever it wants, although libmca___noinst.la is -# recommended. - -# To simplify components that link to this library, we will *always* -# have an output libtool library named libmca__.la -- even -# for case 2) described above (i.e., so there's no conditional logic -# necessary in component Makefile.am's that link to this library). -# Hence, if we're creating a noinst version of this library (i.e., -# case 2), we sym link it to the libmca__.la name -# (libtool will do the Right Things under the covers). See the -# all-local and clean-local rules, below, for how this is effected. - -lib_LTLIBRARIES = -noinst_LTLIBRARIES = -comp_inst = lib@OPAL_LIB_PREFIX@mca_common_ofi.la -comp_noinst = lib@OPAL_LIB_PREFIX@mca_common_ofi_noinst.la - - -if MCA_BUILD_opal_common_ofi_DSO -lib_LTLIBRARIES += $(comp_inst) -else -noinst_LTLIBRARIES += $(comp_noinst) -endif - -lib@OPAL_LIB_PREFIX@mca_common_ofi_la_SOURCES = $(headers) $(sources) -lib@OPAL_LIB_PREFIX@mca_common_ofi_la_LDFLAGS = \ - $(opal_common_ofi_LDFLAGS) \ - -version-info $(libmca_opal_common_ofi_so_version) -lib@OPAL_LIB_PREFIX@mca_common_ofi_la_LIBADD = $(opal_common_ofi_LIBS) - -lib@OPAL_LIB_PREFIX@mca_common_ofi_noinst_la_SOURCES = $(headers) $(sources) -lib@OPAL_LIB_PREFIX@mca_common_ofi_noinst_la_LDFLAGS = $(opal_common_ofi_LDFLAGS) -lib@OPAL_LIB_PREFIX@mca_common_ofi_noinst_la_LIBADD = $(opal_common_ofi_LIBS) - -# Conditionally install the header files - -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -opal_HEADERS = $(headers) -endif - -# These two rules will sym link the "noinst" libtool library filename -# to the installable libtool library filename in the case where we are -# compiling this component statically (case 2), described above). - -V=0 -OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V) -ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY) -ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(comp_inst)`; - -all-local: - $(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \ - rm -f "$(comp_inst)"; \ - $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \ - fi - -clean-local: - if test -z "$(lib_LTLIBRARIES)"; then \ - rm -f "$(comp_inst)"; \ - fi diff --git a/opal/mca/common/ofi/common_ofi.c b/opal/mca/common/ofi/common_ofi.c deleted file mode 100644 index c2d02be50bb..00000000000 --- a/opal/mca/common/ofi/common_ofi.c +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#include "opal/constants.h" - -#include -#include - -#include "common_ofi.h" - -int mca_common_ofi_register_mca_variables(void) -{ - return OPAL_SUCCESS; -} diff --git a/opal/mca/common/ofi/common_ofi.h b/opal/mca/common/ofi/common_ofi.h deleted file mode 100644 index bb5a04f35a8..00000000000 --- a/opal/mca/common/ofi/common_ofi.h +++ /dev/null @@ -1,18 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_MCA_COMMON_OFI_H -#define OPAL_MCA_COMMON_OFI_H - -OPAL_DECLSPEC int mca_common_ofi_register_mca_variables(void); - -#endif /* OPAL_MCA_COMMON_OFI_H */ diff --git a/opal/mca/common/ofi/configure.m4 b/opal/mca/common/ofi/configure.m4 deleted file mode 100644 index 4e47ad278dd..00000000000 --- a/opal/mca/common/ofi/configure.m4 +++ /dev/null @@ -1,32 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. -# Copyright (c) 2013 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. -# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AC_DEFUN([MCA_opal_common_ofi_CONFIG],[ - AC_CONFIG_FILES([opal/mca/common/ofi/Makefile]) - - # Check for ofi. Note that $opal_common_ofi_happy is - # used in other configure.m4's to know if ofi configured - # successfully. - OPAL_CHECK_OFI([opal_common_ofi], - [opal_common_ofi_happy=yes - common_ofi_WRAPPER_EXTRA_LDFLAGS=$opal_common_ofi_LDFLAGS - common_ofi_WRAPPER_EXTRA_LIBS=$opal_common_ofi_LIBS - $1], - [opal_common_ofi_happy=no - $2]) - -])dnl diff --git a/opal/mca/common/ofi/owner.txt b/opal/mca/common/ofi/owner.txt deleted file mode 100644 index 5fe87e2d40c..00000000000 --- a/opal/mca/common/ofi/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: Intel -status:active diff --git a/opal/mca/common/ucx/common_ucx.c b/opal/mca/common/ucx/common_ucx.c index cd54490e4dd..bf5d6c04943 100644 --- a/opal/mca/common/ucx/common_ucx.c +++ b/opal/mca/common/ucx/common_ucx.c @@ -34,45 +34,80 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length, ucm_vm_munmap(buf, length); } +OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component) +{ + static int registered = 0; + static int hook_index; + static int verbose_index; + static int progress_index; + if (!registered) { + verbose_index = mca_base_var_register("opal", "opal_common", "ucx", "verbose", + "Verbose level of the UCX components", + MCA_BASE_VAR_TYPE_INT, NULL, 0, + MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &opal_common_ucx.verbose); + progress_index = mca_base_var_register("opal", "opal_common", "ucx", "progress_iterations", + "Set number of calls of internal UCX progress " + "calls per opal_progress call", + MCA_BASE_VAR_TYPE_INT, NULL, 0, + MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &opal_common_ucx.progress_iterations); + hook_index = mca_base_var_register("opal", "opal_common", "ucx", "opal_mem_hooks", + "Use OPAL memory hooks, instead of UCX internal " + "memory hooks", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &opal_common_ucx.opal_mem_hooks); + registered = 1; + } + if (component) { + mca_base_var_register_synonym(verbose_index, component->mca_project_name, + component->mca_type_name, + component->mca_component_name, + "verbose", 0); + mca_base_var_register_synonym(progress_index, component->mca_project_name, + component->mca_type_name, + component->mca_component_name, + "progress_iterations", 0); + mca_base_var_register_synonym(hook_index, component->mca_project_name, + component->mca_type_name, + component->mca_component_name, + "opal_mem_hooks", 0); + } +} + OPAL_DECLSPEC void opal_common_ucx_mca_register(void) { + int ret; + opal_common_ucx.registered++; if (opal_common_ucx.registered > 1) { /* process once */ return; } - mca_base_var_register("opal", "opal_common", "ucx", "verbose", - "Verbose level of the UCX components", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &opal_common_ucx.verbose); - mca_base_var_register("opal", "opal_common", "ucx", "progress_iterations", - "Set number of calls of internal UCX progress calls per opal_progress call", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &opal_common_ucx.progress_iterations); - mca_base_var_register("opal", "opal_common", "ucx", "opal_mem_hooks", - "Use OPAL memory hooks, instead of UCX internal memory hooks", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, - &opal_common_ucx.opal_mem_hooks); - opal_common_ucx.output = opal_output_open(NULL); opal_output_set_verbosity(opal_common_ucx.output, opal_common_ucx.verbose); - mca_base_framework_open(&opal_memory_base_framework, 0); - /* Set memory hooks */ - if (opal_common_ucx.opal_mem_hooks && - (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == - ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & - opal_mem_hooks_support_level())) - { - MCA_COMMON_UCX_VERBOSE(1, "%s", "using OPAL memory hooks as external events"); - ucm_set_external_event(UCM_EVENT_VM_UNMAPPED); - opal_mem_hooks_register_release(opal_common_ucx_mem_release_cb, NULL); + if (opal_common_ucx.opal_mem_hooks) { + ret = mca_base_framework_open(&opal_memory_base_framework, 0); + if (OPAL_SUCCESS != ret) { + /* failed to initialize memory framework - just exit */ + MCA_COMMON_UCX_VERBOSE(1, "failed to initialize memory base framework: %d, " + "memory hooks will not be used", ret); + return; + } + + if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == + ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & + opal_mem_hooks_support_level())) { + MCA_COMMON_UCX_VERBOSE(1, "%s", "using OPAL memory hooks as external events"); + ucm_set_external_event(UCM_EVENT_VM_UNMAPPED); + opal_mem_hooks_register_release(opal_common_ucx_mem_release_cb, NULL); + } } } @@ -97,13 +132,118 @@ static void opal_common_ucx_mca_fence_complete_cb(int status, void *fenced) *(int*)fenced = 1; } -OPAL_DECLSPEC void opal_common_ucx_mca_pmix_fence(ucp_worker_h worker) +void opal_common_ucx_mca_proc_added(void) +{ +#if HAVE_DECL_UCM_TEST_EVENTS + static int warned = 0; + static char *mem_hooks_suggestion = "Pls try adding --mca opal_common_ucx_opal_mem_hooks 1 " + "to mpirun/oshrun command line to resolve this issue."; + ucs_status_t status; + + if (!warned) { + status = ucm_test_events(UCM_EVENT_VM_UNMAPPED); + if (status != UCS_OK) { + MCA_COMMON_UCX_WARN("UCX is unable to handle VM_UNMAP event. " + "This may cause performance degradation or data " + "corruption. %s", + opal_common_ucx.opal_mem_hooks ? "" : mem_hooks_suggestion); + warned = 1; + } + } +#endif +} + +OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence_nb(int *fenced) +{ + return opal_pmix.fence_nb(NULL, 0, opal_common_ucx_mca_fence_complete_cb, (void *)fenced); +} + +OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker) { volatile int fenced = 0; + int ret = OPAL_SUCCESS; + + if (OPAL_SUCCESS != (ret = opal_pmix.fence_nb(NULL, 0, + opal_common_ucx_mca_fence_complete_cb, (void*)&fenced))){ + return ret; + } - opal_pmix.fence_nb(NULL, 0, opal_common_ucx_mca_fence_complete_cb, (void*)&fenced); while (!fenced) { ucp_worker_progress(worker); } + + return ret; +} + +static void opal_common_ucx_wait_all_requests(void **reqs, int count, ucp_worker_h worker) +{ + int i; + + MCA_COMMON_UCX_VERBOSE(2, "waiting for %d disconnect requests", count); + for (i = 0; i < count; ++i) { + opal_common_ucx_wait_request(reqs[i], worker, "ucp_disconnect_nb"); + reqs[i] = NULL; + } +} + +OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs, + size_t count, size_t my_rank, + size_t max_disconnect, + ucp_worker_h worker) +{ + size_t num_reqs; + size_t max_reqs; + void *dreq, **dreqs; + size_t i; + size_t n; + + MCA_COMMON_UCX_ASSERT(procs || !count); + MCA_COMMON_UCX_ASSERT(max_disconnect > 0); + + max_reqs = (max_disconnect > count) ? count : max_disconnect; + + dreqs = malloc(sizeof(*dreqs) * max_reqs); + if (dreqs == NULL) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + num_reqs = 0; + + for (i = 0; i < count; ++i) { + n = (i + my_rank) % count; + if (procs[n].ep == NULL) { + continue; + } + + MCA_COMMON_UCX_VERBOSE(2, "disconnecting from rank %zu", procs[n].vpid); + dreq = ucp_disconnect_nb(procs[n].ep); + if (dreq != NULL) { + if (UCS_PTR_IS_ERR(dreq)) { + MCA_COMMON_UCX_ERROR("ucp_disconnect_nb(%zu) failed: %s", procs[n].vpid, + ucs_status_string(UCS_PTR_STATUS(dreq))); + continue; + } else { + dreqs[num_reqs++] = dreq; + if (num_reqs >= max_disconnect) { + opal_common_ucx_wait_all_requests(dreqs, num_reqs, worker); + num_reqs = 0; + } + } + } + } + /* num_reqs == 0 is processed by opal_common_ucx_wait_all_requests routine, + * so suppress coverity warning */ + /* coverity[uninit_use_in_call] */ + opal_common_ucx_wait_all_requests(dreqs, num_reqs, worker); + free(dreqs); + + return OPAL_SUCCESS; } +OPAL_DECLSPEC int opal_common_ucx_del_procs(opal_common_ucx_del_proc_t *procs, size_t count, + size_t my_rank, size_t max_disconnect, ucp_worker_h worker) +{ + opal_common_ucx_del_procs_nofence(procs, count, my_rank, max_disconnect, worker); + + return opal_common_ucx_mca_pmix_fence(worker); +} diff --git a/opal/mca/common/ucx/common_ucx.h b/opal/mca/common/ucx/common_ucx.h index 13a03000e83..202131ac890 100644 --- a/opal/mca/common/ucx/common_ucx.h +++ b/opal/mca/common/ucx/common_ucx.h @@ -34,15 +34,18 @@ BEGIN_C_DECLS # define MCA_COMMON_UCX_ASSERT(_x) #endif +#define UCX_VERSION(_major, _minor, _build) (((_major) * 100) + (_minor)) + #define _MCA_COMMON_UCX_QUOTE(_x) \ # _x #define MCA_COMMON_UCX_QUOTE(_x) \ _MCA_COMMON_UCX_QUOTE(_x) -#define MCA_COMMON_UCX_ERROR(...) \ - opal_output_verbose(0, opal_common_ucx.output, \ - __FILE__ ":" MCA_COMMON_UCX_QUOTE(__LINE__) \ - " Error: " __VA_ARGS__) +#define MCA_COMMON_UCX_ERROR(...) \ + MCA_COMMON_UCX_VERBOSE(0, " Error: " __VA_ARGS__) + +#define MCA_COMMON_UCX_WARN(...) \ + MCA_COMMON_UCX_VERBOSE(0, " Warning: " __VA_ARGS__) #define MCA_COMMON_UCX_VERBOSE(_level, ... ) \ if (((_level) <= MCA_COMMON_UCX_MAX_VERBOSE) && \ @@ -52,6 +55,33 @@ BEGIN_C_DECLS __VA_ARGS__); \ } +/* progress loop to allow call UCX/opal progress */ +/* used C99 for-statement variable initialization */ +#define MCA_COMMON_UCX_PROGRESS_LOOP(_worker) \ + for (unsigned iter = 0;; (++iter % opal_common_ucx.progress_iterations) ? \ + (void)ucp_worker_progress(_worker) : opal_progress()) + +#define MCA_COMMON_UCX_WAIT_LOOP(_request, _worker, _msg, _completed) \ + do { \ + ucs_status_t status; \ + /* call UCX progress */ \ + MCA_COMMON_UCX_PROGRESS_LOOP(_worker) { \ + status = opal_common_ucx_request_status(_request); \ + if (UCS_INPROGRESS != status) { \ + _completed; \ + if (OPAL_LIKELY(UCS_OK == status)) { \ + return OPAL_SUCCESS; \ + } else { \ + MCA_COMMON_UCX_VERBOSE(1, "%s failed: %d, %s", \ + (_msg) ? (_msg) : __func__, \ + UCS_PTR_STATUS(_request), \ + ucs_status_string(UCS_PTR_STATUS(_request))); \ + return OPAL_ERROR; \ + } \ + } \ + } \ + } while (0) + typedef struct opal_common_ucx_module { int output; int verbose; @@ -60,59 +90,52 @@ typedef struct opal_common_ucx_module { bool opal_mem_hooks; } opal_common_ucx_module_t; +typedef struct opal_common_ucx_del_proc { + ucp_ep_h ep; + size_t vpid; +} opal_common_ucx_del_proc_t; + extern opal_common_ucx_module_t opal_common_ucx; OPAL_DECLSPEC void opal_common_ucx_mca_register(void); OPAL_DECLSPEC void opal_common_ucx_mca_deregister(void); +OPAL_DECLSPEC void opal_common_ucx_mca_proc_added(void); OPAL_DECLSPEC void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status); -OPAL_DECLSPEC void opal_common_ucx_mca_pmix_fence(ucp_worker_h worker); +OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker); +OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence_nb(int *fenced); +OPAL_DECLSPEC int opal_common_ucx_del_procs(opal_common_ucx_del_proc_t *procs, size_t count, + size_t my_rank, size_t max_disconnect, ucp_worker_h worker); +OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs, size_t count, + size_t my_rank, size_t max_disconnect, ucp_worker_h worker); +OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component); static inline -int opal_common_ucx_wait_request(ucs_status_ptr_t request, ucp_worker_h worker, - const char *msg) +ucs_status_t opal_common_ucx_request_status(ucs_status_ptr_t request) { - ucs_status_t status; - int i; #if !HAVE_DECL_UCP_REQUEST_CHECK_STATUS ucp_tag_recv_info_t info; + + return ucp_request_test(request, &info); +#else + return ucp_request_check_status(request); #endif +} +static inline +int opal_common_ucx_wait_request(ucs_status_ptr_t request, ucp_worker_h worker, + const char *msg) +{ /* check for request completed or failed */ if (OPAL_LIKELY(UCS_OK == request)) { return OPAL_SUCCESS; } else if (OPAL_UNLIKELY(UCS_PTR_IS_ERR(request))) { - MCA_COMMON_UCX_VERBOSE(1, "%s failed: %d, %s", msg ? msg : __FUNCTION__, + MCA_COMMON_UCX_VERBOSE(1, "%s failed: %d, %s", msg ? msg : __func__, UCS_PTR_STATUS(request), ucs_status_string(UCS_PTR_STATUS(request))); return OPAL_ERROR; } - while (1) { - /* call UCX progress */ - for (i = 0; i < opal_common_ucx.progress_iterations; i++) { - if (UCS_INPROGRESS != (status = -#if HAVE_DECL_UCP_REQUEST_CHECK_STATUS - ucp_request_check_status(request) -#else - ucp_request_test(request, &info) -#endif - )) { - ucp_request_free(request); - if (OPAL_LIKELY(UCS_OK == status)) { - return OPAL_SUCCESS; - } else { - MCA_COMMON_UCX_VERBOSE(1, "%s failed: %d, %s", msg ? msg : __FUNCTION__, - UCS_PTR_STATUS(request), - ucs_status_string(UCS_PTR_STATUS(request))); - return OPAL_ERROR; - } - } - ucp_worker_progress(worker); - } - /* call OPAL progress on every opal_common_ucx_progress_iterations - * calls to UCX progress */ - opal_progress(); - } + MCA_COMMON_UCX_WAIT_LOOP(request, worker, msg, ucp_request_free(request)); } static inline diff --git a/opal/mca/common/verbs_usnic/configure.m4 b/opal/mca/common/verbs_usnic/configure.m4 index 68fed9404b8..4e2f2d5654a 100644 --- a/opal/mca/common/verbs_usnic/configure.m4 +++ b/opal/mca/common/verbs_usnic/configure.m4 @@ -69,6 +69,21 @@ AC_DEFUN([MCA_opal_common_verbs_usnic_CONFIG],[ [common_verbs_usnic_happy=0]) ]) + AS_IF([test $common_verbs_usnic_happy -eq 1], + [AC_CHECK_MEMBER([struct ibv_device.ops], + [], + [AC_MSG_WARN([--with-verbs-usnic specified, but the verbs.h does not]) + AC_MSG_WARN([have the required member fields. It is highly likely]) + AC_MSG_WARN([that you do not need --with-verbs-usnic. Try configuring]) + AC_MSG_WARN([and building Open MPI without it; if you get warnings]) + AC_MSG_WARN([about usnic IB devices anyway, please let us know.]) + AC_MSG_WARN([Since you asked for --with-verbs-usnic and we cannot]) + AC_MSG_WARN([deliver it, configure will now abort.]) + AC_MSG_ERROR([Cannot continue]) + ], + [#include ]) + ]) + AC_DEFINE_UNQUOTED([OPAL_COMMON_VERBS_USNIC_HAPPY], [$common_verbs_usnic_happy], [Whether the common/usnic_verbs component is being built or not]) diff --git a/opal/mca/crs/blcr/.opal_ignore b/opal/mca/crs/blcr/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/opal/mca/crs/blcr/Makefile.am b/opal/mca/crs/blcr/Makefile.am deleted file mode 100644 index 7e0e22bc4d1..00000000000 --- a/opal/mca/crs/blcr/Makefile.am +++ /dev/null @@ -1,51 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFLAGS = $(crs_blcr_CFLAGS) -AM_CPPFLAGS = $(crs_blcr_CPPFLAGS) - -dist_opaldata_DATA = help-opal-crs-blcr.txt - -sources = \ - crs_blcr.h \ - crs_blcr_component.c \ - crs_blcr_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_crs_blcr_DSO -component_noinst = -component_install = mca_crs_blcr.la -else -component_noinst = libmca_crs_blcr.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_crs_blcr_la_SOURCES = $(sources) -mca_crs_blcr_la_LDFLAGS = -module -avoid-version $(crs_blcr_LDFLAGS) -mca_crs_blcr_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \ - $(crs_blcr_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_crs_blcr_la_SOURCES = $(sources) -libmca_crs_blcr_la_LDFLAGS = -module -avoid-version $(crs_blcr_LDFLAGS) -libmca_crs_blcr_la_LIBADD = $(crs_blcr_LIBS) diff --git a/opal/mca/crs/blcr/configure.m4 b/opal/mca/crs/blcr/configure.m4 deleted file mode 100644 index 3aea23106bc..00000000000 --- a/opal/mca/crs/blcr/configure.m4 +++ /dev/null @@ -1,204 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_crs_blcr_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_opal_crs_blcr_CONFIG],[ - AC_CONFIG_FILES([opal/mca/crs/blcr/Makefile]) - - AC_ARG_WITH([blcr], - [AC_HELP_STRING([--with-blcr(=DIR)], - [Path to BLCR Installation])]) - OPAL_CHECK_WITHDIR([blcr], [$with_blcr], [include/libcr.h]) - AC_ARG_WITH([blcr-libdir], - [AC_HELP_STRING([--with-blcr-libdir=DIR], - [Search for BLCR libraries in DIR])]) - OPAL_CHECK_WITHDIR([blcr-libdir], [$with_blcr_libdir], [libcr.*]) - - check_crs_blcr_good="no" - - # If we do not want FT, don't compile this component - # - # If we wanted BLCR, but did not specify the FT option, - # error out with a warning for the user - AS_IF([test "$opal_want_ft_cr" = "0"], - [$2 - check_crs_blcr_good="no" - AS_IF([test ! -z "$with_blcr" && test "$with_blcr" != "no"], - [AC_MSG_WARN([BLCR support requested, but FT support not requested. You need to specify the --with-ft=cr configure option.]) - AC_MSG_ERROR([Aborting.])]) - ], - [check_crs_blcr_good="yes"]) - - # If we do not want BLCR, then do not compile it - AS_IF([test "$with_blcr" = "no" || test "$check_crs_blcr_good" = "no"], - [$2 - check_crs_blcr_good="no"], - [check_crs_blcr_good="yes"]) - - # Defaults - check_crs_blcr_dir_msg="compiler default" - check_crs_blcr_libdir_msg="linker default" - check_crs_blcr_dir="" - check_crs_blcr_libdir="" - - # Determine the search paths for the headers and libraries - AS_IF([test "$check_crs_blcr_good" != "yes"], [$2], - [AS_IF([test ! -z "$with_blcr" && test "$with_blcr" != "yes"], - [check_crs_blcr_dir="$with_blcr" - check_crs_blcr_dir_msg="$with_blcr (from --with-blcr)"]) - AS_IF([test ! -z "$with_blcr_libdir" && test "$with_blcr_libdir" != "yes"], - [check_crs_blcr_libdir="$with_blcr_libdir" - check_crs_blcr_libdir_msg="$with_blcr_libdir (from --with-blcr-libdir)"]) - ]) - - AS_IF([test "$check_crs_blcr_good" != "yes"], [$2], - [AC_MSG_CHECKING([for BLCR dir]) - AC_MSG_RESULT([$check_crs_blcr_dir_msg]) - AC_MSG_CHECKING([for BLCR library dir]) - AC_MSG_RESULT([$check_crs_blcr_libdir_msg]) - OPAL_CHECK_PACKAGE([crs_blcr_check], - [libcr.h], - [cr], - [cr_init], - [], - [$check_crs_blcr_dir], - [$check_crs_blcr_libdir], - [check_crs_blcr_good="yes"], - [check_crs_blcr_good="no"]) - ]) - - crs_blcr_save_CFLAGS="$CFLAGS" - crs_blcr_save_CPPFLAGS="$CPPFLAGS" - crs_blcr_save_LDFLAGS="$LDFLAGS" - crs_blcr_save_LIBS="$LIBS" - - crs_blcr_CFLAGS="$CFLAGS $crs_blcr_check_CFLAGS" - crs_blcr_CPPFLAGS="$CPPFLAGS $crs_blcr_check_CPPFLAGS" - crs_blcr_LDFLAGS="$LDFLAGS $crs_blcr_check_LDFLAGS" - crs_blcr_LIBS="$LIBS $crs_blcr_check_LIBS" - - # Check to see if we found the BLCR libcr.h library - AS_IF([test "$check_crs_blcr_good" != "yes"], [$2], - [ - # - # Since BLCR libraries are not fully ISO99 C compliant - # -pedantic and -Wundef raise a bunch of warnings, so - # we just strip them off for this component - AC_MSG_WARN([Removed -pedantic and -Wundef from CFLAGS for blcr component because libcr.h is not really ANSI C]) - # Strip off problematic arguments - crs_blcr_CFLAGS="`echo $crs_blcr_CFLAGS | sed 's/-pedantic//g'`" - crs_blcr_CFLAGS="`echo $crs_blcr_CFLAGS | sed 's/-Wundef//g'`" - crs_blcr_CPPFLAGS="`echo $crs_blcr_CPPFLAGS | sed 's/-pedantic//g'`" - crs_blcr_CPPFLAGS="`echo $crs_blcr_CPPFLAGS | sed 's/-Wundef//g'`" - crs_blcr_LDFLAGS="$crs_blcr_LDFLAGS" - crs_blcr_LIBS="$crs_blcr_LIBS" - $1]) - - # - # Check for version difference which may have: - # - working cr_request_file - # - working cr_request_checkpoint (which should be used instead of cr_request_file) - # - 'requester' parameter to checkpoint_info - # - AS_IF([test "$check_crs_blcr_good" != "yes"], [$2], [ - CFLAGS="$crs_blcr_CFLAGS" - CPPFLAGS="$crs_blcr_CPPFLAGS" - LDFLAGS="$crs_blcr_LDFLAGS" - LIBS="$crs_blcr_LIBS" - # - # First look for the cr_request_file function - # - crs_blcr_have_working_cr_request=0 - AC_MSG_CHECKING(for BLCR working cr_request) - OPAL_SEARCH_LIBS_COMPONENT([crs_blcr], [cr_request_file],[cr], - [AC_TRY_COMPILE([#include ], - [#if CR_RELEASE_MAJOR <= 0 && CR_RELEASE_MINOR < 6 - #error Version earlier than 0.6.0 - #endif - ], - [crs_blcr_have_working_cr_request=1 - ], - [crs_blcr_have_working_cr_request=0 - AC_MSG_WARN([This BLCR version does not contain a known working version of cr_request_file]) - ])], - [crs_blcr_have_working_cr_request=0 - AC_MSG_WARN([This BLCR version does not contain the cr_request_file function]) - ]) - AC_DEFINE_UNQUOTED([CRS_BLCR_HAVE_CR_REQUEST], [$crs_blcr_have_working_cr_request], - [BLCR cr_request_file check]) - - # - # Look for the cr_request_checkpoint function - # - crs_blcr_have_cr_request_checkpoint=0 - AC_MSG_CHECKING(for BLCR cr_request_checkpoint) - OPAL_SEARCH_LIBS_COMPONENT([crs_blcr], - [cr_request_checkpoint],[cr], - [crs_blcr_have_cr_request_checkpoint=1 - ], - [crs_blcr_have_cr_request_checkpoint=0 - AC_MSG_WARN([This BLCR version does not contain the cr_request_checkpoint function]) - ]) - AC_DEFINE_UNQUOTED([CRS_BLCR_HAVE_CR_REQUEST_CHECKPOINT], [$crs_blcr_have_cr_request_checkpoint], - [BLCR cr_request_checkpoint check]) - - # - # Look for the cr_checkpoint_info.requester member - # - crs_blcr_have_info_requester=0 - AC_CHECK_MEMBER([struct cr_checkpoint_info.requester], - [crs_blcr_have_info_requester=1], - [AC_MSG_WARN([This BLCR version does not contain a 'requester' member of the 'cr_checkpoint_info' struct])], - [#include ]) - AC_DEFINE_UNQUOTED([CRS_BLCR_HAVE_INFO_REQUESTER], [$crs_blcr_have_info_requester], - [BLCRs cr_checkpoint_info.requester member availability]) - $1]) - - # - # Require either a working cr_request_file() or cr_request_checkpoint() function - # - AS_IF([test "$crs_blcr_have_working_cr_request" = "0" && test "$crs_blcr_have_cr_request_checkpoint" = "0"], - [$2 - check_crs_blcr_good="no" - AC_MSG_WARN([The BLCR CRS component requires either the cr_request_checkpoint() or cr_request_file() functions])]) - - # - # Reset the flags - # - CFLAGS="$crs_blcr_save_CFLAGS" - CPPFLAGS="$crs_blcr_save_CPPFLAGS" - LDFLAGS="$crs_blcr_save_LDFLAGS" - LIBS="$crs_blcr_save_LIBS" - - # - AS_IF([test "$check_crs_blcr_good" = "yes"], - [ AC_SUBST([crs_blcr_CFLAGS]) - AC_SUBST([crs_blcr_CPPFLAGS]) - AC_SUBST([crs_blcr_LDFLAGS]) - AC_SUBST([crs_blcr_LIBS]) - $1], - [AS_IF([test ! -z "$with_blcr" && test "$with_blcr" != "no"], - [AC_MSG_WARN([BLCR support requested but not found. Perhaps you need to specify the location of the BLCR libraries.]) - AC_MSG_ERROR([Aborting.])]) - $3]) - -])dnl diff --git a/opal/mca/crs/blcr/crs_blcr.h b/opal/mca/crs/blcr/crs_blcr.h deleted file mode 100644 index f4678d76217..00000000000 --- a/opal/mca/crs/blcr/crs_blcr.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * BLCR CRS component - * - */ - -#ifndef MCA_CRS_BLCR_EXPORT_H -#define MCA_CRS_BLCR_EXPORT_H - -#include "opal_config.h" - - -#include "opal/mca/mca.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/base/base.h" - -#include - -BEGIN_C_DECLS - - /* - * Local Component structures - */ - struct opal_crs_blcr_component_t { - /** Base CRS component */ - opal_crs_base_component_t super; - }; - typedef struct opal_crs_blcr_component_t opal_crs_blcr_component_t; - OPAL_MODULE_DECLSPEC extern opal_crs_blcr_component_t mca_crs_blcr_component; - - int opal_crs_blcr_component_query(mca_base_module_t **module, int *priority); - - extern bool opal_crs_blcr_dev_null; - - /* - * Module functions - */ - int opal_crs_blcr_module_init(void); - int opal_crs_blcr_module_finalize(void); - - /* - * Actual funcationality - */ - int opal_crs_blcr_checkpoint( pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state); - - int opal_crs_blcr_restart( opal_crs_base_snapshot_t *snapshot, - bool spawn_child, - pid_t *child_pid); - - int opal_crs_blcr_disable_checkpoint(void); - int opal_crs_blcr_enable_checkpoint(void); - - int opal_crs_blcr_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env); - - int opal_crs_blcr_reg_thread(void); - -END_C_DECLS - -#endif /* MCA_CRS_BLCR_EXPORT_H */ diff --git a/opal/mca/crs/blcr/crs_blcr_component.c b/opal/mca/crs/blcr/crs_blcr_component.c deleted file mode 100644 index 57cb8e43abd..00000000000 --- a/opal/mca/crs/blcr/crs_blcr_component.c +++ /dev/null @@ -1,145 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/constants.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "crs_blcr.h" - -/* - * Local functionality - */ -static int crs_blcr_register (void); -static int crs_blcr_open(void); -static int crs_blcr_close(void); - -bool opal_crs_blcr_dev_null = false; - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_crs_blcr_component_t mca_crs_blcr_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itself - */ - .base_version = { - OPAL_CRS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "blcr", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = crs_blcr_open, - .mca_close_component = crs_blcr_close, - .mca_query_component = opal_crs_blcr_component_query, - .mca_register_component_params = crs_blcr_register - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1, - } -}; - -static int crs_blcr_register (void) -{ - int ret; - - mca_crs_blcr_component.super.priority = 10; - ret = mca_base_component_var_register (&mca_crs_blcr_component.super.base_version, - "priority", "Priority of the CRS blcr component " - "(default: 10)". MCA_BASE_VAR_TYPE_INT, NULL, - MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_crs_blcr_component.super.priority); - if (0 > ret) { - return ret; - } - - mca_crs_blcr_component.super.verbose = 0; - ret = mca_base_component_var_register (&mca_crs_blcr_component.super.base_version, - "verbose", - "Verbose level for the CRS blcr component", - MCA_BASE_VAR_TYPE_INT, NULL, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_blcr_component.super.verbose); - if (0 > ret) { - return ret; - } - - opal_crs_blcr_dev_null = false; - ret = mca_base_component_var_register (&mca_crs_blcr_component.super.base_version, - "dev_null", - "Not for general use! For debugging only! Save checkpoint to /dev/null. [Default = disabled]", - MCA_BASE_VAR_TYPE_BOOL, NULL, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &opal_crs_blcr_dev_null); - return (0 > ret) ? ret : OPAL_SUCCESS -} - -static int crs_blcr_open(void) -{ - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != mca_crs_blcr_component.super.verbose) { - mca_crs_blcr_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_crs_blcr_component.super.output_handle, - mca_crs_blcr_component.super.verbose); - } else { - mca_crs_blcr_component.super.output_handle = opal_crs_base_framework.framework_output; - } - - /* - * Debug output - */ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: open()"); - opal_output_verbose(20, mca_crs_blcr_component.super.output_handle, - "crs:blcr: open: priority = %d", - mca_crs_blcr_component.super.priority); - opal_output_verbose(20, mca_crs_blcr_component.super.output_handle, - "crs:blcr: open: verbosity = %d", - mca_crs_blcr_component.super.verbose); - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: open: dev_null = %s", - (opal_crs_blcr_dev_null == true ? "True" : "False")); - - return OPAL_SUCCESS; -} - -static int crs_blcr_close(void) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: close()"); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/blcr/crs_blcr_module.c b/opal/mca/crs/blcr/crs_blcr_module.c deleted file mode 100644 index c84e79bfbe2..00000000000 --- a/opal/mca/crs/blcr/crs_blcr_module.c +++ /dev/null @@ -1,866 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * - * Copyright (c) 2017 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "opal/util/show_help.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/constants.h" - -#include "opal/mca/base/mca_base_var.h" - -#include "opal/threads/threads.h" -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" - -#include "opal/mca/event/event.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "crs_blcr.h" - -/* - * Blcr module - */ -static opal_crs_base_module_t blcr_module = { - /** Initialization Function */ - opal_crs_blcr_module_init, - /** Finalization Function */ - opal_crs_blcr_module_finalize, - - /** Checkpoint interface */ - opal_crs_blcr_checkpoint, - - /** Restart Command Access */ - opal_crs_blcr_restart, - - /** Disable checkpoints */ - opal_crs_blcr_disable_checkpoint, - /** Enable checkpoints */ - opal_crs_blcr_enable_checkpoint, - - /** Prelaunch */ - opal_crs_blcr_prelaunch, - - /** Register Thread */ - opal_crs_blcr_reg_thread -}; - -/*************************** - * Snapshot Class Functions - ***************************/ -OBJ_CLASS_DECLARATION(opal_crs_blcr_snapshot_t); - -struct opal_crs_blcr_snapshot_t { - /** Base CRS snapshot type */ - opal_crs_base_snapshot_t super; - char * context_filename; -}; -typedef struct opal_crs_blcr_snapshot_t opal_crs_blcr_snapshot_t; - -void opal_crs_blcr_construct(opal_crs_blcr_snapshot_t *obj); -void opal_crs_blcr_destruct( opal_crs_blcr_snapshot_t *obj); - -OBJ_CLASS_INSTANCE(opal_crs_blcr_snapshot_t, - opal_crs_base_snapshot_t, - opal_crs_blcr_construct, - opal_crs_blcr_destruct); - -/****************** - * Local Functions - ******************/ -static int blcr_get_checkpoint_filename(char **fname, pid_t pid); -static int opal_crs_blcr_thread_callback(void *arg); -static int opal_crs_blcr_signal_callback(void *arg); - -static int opal_crs_blcr_restart_cmd(char *fname, char **cmd); - -static int blcr_cold_start(opal_crs_blcr_snapshot_t *snapshot); - -#if OPAL_ENABLE_CRDEBUG == 1 -static void MPIR_checkpoint_debugger_crs_hook(cr_hook_event_t event); -#endif - -/************************* - * Local Global Variables - *************************/ -#if OPAL_ENABLE_CRDEBUG == 1 -static opal_thread_t *checkpoint_thread_id = NULL; -static bool blcr_crdebug_refreshed_env = false; -#endif - -static cr_client_id_t client_id; -static cr_callback_id_t cr_thread_callback_id; -static cr_callback_id_t cr_signal_callback_id; -static int blcr_current_state = OPAL_CRS_NONE; - -static char *blcr_restart_cmd = NULL; -static char *blcr_checkpoint_cmd = NULL; - -static opal_condition_t blcr_cond; -static opal_mutex_t blcr_lock; - -static pid_t my_pid = -1; - -void opal_crs_blcr_construct(opal_crs_blcr_snapshot_t *snapshot) { - snapshot->context_filename = NULL; - snapshot->super.component_name = strdup(mca_crs_blcr_component.super.base_version.mca_component_name); -} - -void opal_crs_blcr_destruct( opal_crs_blcr_snapshot_t *snapshot) { - if(NULL != snapshot->context_filename) { - free(snapshot->context_filename); - snapshot->context_filename = NULL; - } -} - -/***************** - * MCA Functions - *****************/ -int opal_crs_blcr_component_query(mca_base_module_t **module, int *priority) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: component_query()"); - - *priority = mca_crs_blcr_component.super.priority; - *module = (mca_base_module_t *)&blcr_module; - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_module_init(void) -{ - void *crs_blcr_thread_callback_arg = NULL; - void *crs_blcr_signal_callback_arg = NULL; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: module_init()"); - - blcr_restart_cmd = strdup("cr_restart"); - blcr_checkpoint_cmd = strdup("cr_checkpoint"); - - my_pid = getpid(); - - if( !opal_cr_is_tool ) { - /* We need to make the lock and condition variable before - * starting the thread, since the thread uses these vars. - */ - OBJ_CONSTRUCT(&blcr_lock, opal_mutex_t); - OBJ_CONSTRUCT(&blcr_cond, opal_condition_t); - - /* - * Initialize BLCR - */ - client_id = cr_init(); - if (0 > client_id) { - opal_output(mca_crs_blcr_component.super.output_handle, - "Error: crs:blcr: module_init: cr_init failed (%d)\n", client_id); - return OPAL_ERROR; - } - } - -#if OPAL_ENABLE_CRDEBUG == 1 - blcr_crdebug_refreshed_env = false; -#endif - - blcr_restart_cmd = strdup("cr_restart"); - blcr_checkpoint_cmd = strdup("cr_checkpoint"); - - if( !opal_cr_is_tool ) { - /* - * Register the thread handler - */ - cr_thread_callback_id = cr_register_callback(opal_crs_blcr_thread_callback, - crs_blcr_thread_callback_arg, - CR_THREAD_CONTEXT); - /* - * Register the signal handler - * - even though we do not use it - */ - cr_signal_callback_id = cr_register_callback(opal_crs_blcr_signal_callback, - crs_blcr_signal_callback_arg, - CR_SIGNAL_CONTEXT); - -#if OPAL_ENABLE_CRDEBUG == 1 - /* - * Checkpoint/restart enabled debugging hooks - * "NO_CALLBACKS" -> non-MPI threads - * "SIGNAL_CONTEXT" -> MPI threads - * "THREAD_CONTEXT" -> BLCR threads - */ - cr_register_hook(CR_HOOK_CONT_NO_CALLBACKS, MPIR_checkpoint_debugger_crs_hook); - cr_register_hook(CR_HOOK_CONT_SIGNAL_CONTEXT, MPIR_checkpoint_debugger_crs_hook); - - cr_register_hook(CR_HOOK_RSTRT_NO_CALLBACKS, MPIR_checkpoint_debugger_crs_hook); - cr_register_hook(CR_HOOK_RSTRT_SIGNAL_CONTEXT, MPIR_checkpoint_debugger_crs_hook); -#endif - } - - /* - * Now that we are done with init, set the state to running - */ - blcr_current_state = OPAL_CRS_RUNNING; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: module_init() --> Finished [%d]", - opal_cr_is_tool); - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env) -{ - char * tmp_env_var = NULL; - - (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); - opal_setenv(tmp_env_var, - "0", true, env); - free(tmp_env_var); - tmp_env_var = NULL; - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_reg_thread(void) -{ - cr_client_id_t loc_client_id; - - /* - * Initialize BLCR - */ - loc_client_id = cr_init(); - if (0 > loc_client_id) { - opal_output(mca_crs_blcr_component.super.output_handle, - "Error: crs:blcr: reg_thread: cr_init failed (%d)\n", loc_client_id); - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_module_finalize(void) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: module_finalize()"); - - /* Cleanup some memory */ - if( NULL != blcr_restart_cmd ) { - free(blcr_restart_cmd); - blcr_restart_cmd = NULL; - } - if( NULL != blcr_checkpoint_cmd ) { - free(blcr_checkpoint_cmd); - blcr_checkpoint_cmd = NULL; - } - - if( !opal_cr_is_tool ) { - OBJ_DESTRUCT(&blcr_lock); - OBJ_DESTRUCT(&blcr_cond); - - if( OPAL_CRS_RUNNING == blcr_current_state ) { - /* Unload the thread callback */ - cr_replace_callback(cr_thread_callback_id, NULL, NULL, CR_THREAD_CONTEXT); - /* Unload the signal callback */ - cr_replace_callback(cr_signal_callback_id, NULL, NULL, CR_SIGNAL_CONTEXT); - } - -#if OPAL_ENABLE_CRDEBUG == 1 - /* - * Checkpoint/restart enabled debugging hooks - */ - cr_register_hook(CR_HOOK_CONT_NO_CALLBACKS, NULL); - cr_register_hook(CR_HOOK_CONT_SIGNAL_CONTEXT, NULL); - - cr_register_hook(CR_HOOK_RSTRT_NO_CALLBACKS, NULL); - cr_register_hook(CR_HOOK_RSTRT_SIGNAL_CONTEXT, NULL); -#endif - } - - /* BLCR does not have a finalization routine */ - blcr_current_state = OPAL_CRS_NONE; - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_checkpoint(pid_t pid, - opal_crs_base_snapshot_t *base_snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state) -{ - int ret, exit_status = OPAL_SUCCESS; - opal_crs_blcr_snapshot_t *snapshot = NULL; -#if CRS_BLCR_HAVE_CR_REQUEST_CHECKPOINT == 1 - cr_checkpoint_args_t cr_args; - static cr_checkpoint_handle_t cr_handle = (cr_checkpoint_handle_t)(-1); -#endif - int fd = 0; - char *loc_fname = NULL; - - if( pid != my_pid ) { - opal_output(0, "crs:blcr: checkpoint(%d, ---): Checkpointing of peers not allowed!", pid); - exit_status = OPAL_ERROR; - goto cleanup; - } - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(%d, ---)", pid); - - snapshot = (opal_crs_blcr_snapshot_t *)base_snapshot; - - /* - * Update the snapshot metadata - */ - snapshot->super.component_name = strdup(mca_crs_blcr_component.super.base_version.mca_component_name); - blcr_get_checkpoint_filename(&(snapshot->context_filename), pid); - - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name); - fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_CONTEXT, snapshot->context_filename); - - fclose(snapshot->super.metadata ); - snapshot->super.metadata = NULL; - - /* - * If we can checkpointing ourselves do so: - * use cr_request_checkpoint() if available, and cr_request_file() if not - */ - if( opal_crs_blcr_dev_null ) { - loc_fname = strdup("/dev/null"); - } else { - asprintf(&loc_fname, "%s/%s", snapshot->super.snapshot_directory, snapshot->context_filename); - } - -#if OPAL_ENABLE_CRDEBUG == 1 - /* Make sure to identify the checkpointing thread, so that it is not - * prevented from requesting the checkpoint after the debugger detaches - */ - opal_cr_debug_set_current_ckpt_thread_self(); - checkpoint_thread_id = opal_thread_get_self(); - blcr_crdebug_refreshed_env = false; - - /* If checkpoint/restart enabled debugging then mark detachment place */ - if( MPIR_debug_with_checkpoint ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Detaching debugger..."); - MPIR_checkpoint_debugger_detach(); - } -#endif - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint SELF <%s>", - loc_fname); - -#if CRS_BLCR_HAVE_CR_REQUEST_CHECKPOINT == 1 || CRS_BLCR_HAVE_CR_REQUEST == 1 -#if CRS_BLCR_HAVE_CR_REQUEST_CHECKPOINT == 1 - fd = open(loc_fname, - O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, - S_IRUSR | S_IWUSR); - if( fd < 0 ) { - *state = OPAL_CRS_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to open checkpoint file (%s) for pid (%d)", - loc_fname, pid); - exit_status = OPAL_ERROR; - goto cleanup; - } - - cr_initialize_checkpoint_args_t(&cr_args); - cr_args.cr_scope = CR_SCOPE_PROC; - cr_args.cr_fd = fd; - if( options->stop ) { - cr_args.cr_signal = SIGSTOP; - } - - ret = cr_request_checkpoint(&cr_args, &cr_handle); - if( ret < 0 ) { - close(cr_args.cr_fd); - *state = OPAL_CRS_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to checkpoint pid (%d) to file (%s)", - pid, loc_fname); - exit_status = ret; - goto cleanup; - } - - /* Wait for checkpoint to finish */ - do { - ret = cr_poll_checkpoint(&cr_handle, NULL); - if( ret < 0 ) { - /* Check if restarting. This is not an error. */ - if( (ret == CR_POLL_CHKPT_ERR_POST) && (errno == CR_ERESTARTED) ) { - ret = 0; - break; - } - /* If Call was interrupted by a signal, retry the call */ - else if (errno == EINTR) { - ; - } - /* Otherwise this is a real error that we need to deal with */ - else { - *state = OPAL_CRS_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to checkpoint pid (%d) to file (%s) - poll failed with (%d)", - pid, loc_fname, ret); - exit_status = ret; - goto cleanup; - } - } - } while( ret < 0 ); - - /* Close the file */ - close(cr_args.cr_fd); -#else - /* Request a checkpoint be taken of the current process. - * Since we are not guaranteed to finish the checkpoint before this - * returns, we also need to wait for it. - */ - cr_request_file(loc_fname); - - /* Wait for checkpoint to finish */ - do { - usleep(1000); /* JJH Do we really want to sleep? */ - } while(CR_STATE_IDLE != cr_status()); -#endif -#endif - - *state = blcr_current_state; - free(loc_fname); - - cleanup: - if( NULL != snapshot->super.metadata ) { - fclose(snapshot->super.metadata ); - snapshot->super.metadata = NULL; - } - - return exit_status; -} - -int opal_crs_blcr_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid) -{ - opal_crs_blcr_snapshot_t *snapshot = OBJ_NEW(opal_crs_blcr_snapshot_t); - char **cr_argv = NULL; - char *cr_cmd = NULL; - char *cr_full_cmd = NULL; - int ret; - int exit_status = OPAL_SUCCESS; - int status; - - snapshot->super = *base_snapshot; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: restart(--, %d)", spawn_child); - - /* - * If we need to reconstruct the snapshot, - */ - if(snapshot->super.cold_start) { - if( OPAL_SUCCESS != (ret = blcr_cold_start(snapshot)) ) { - exit_status = OPAL_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: Unable to reconstruct the snapshot."); - goto cleanup; - } - } - - - /* - * Get the restart command - */ - if ( OPAL_SUCCESS != (ret = opal_crs_blcr_restart_cmd(snapshot->context_filename, &cr_cmd)) ) { - exit_status = ret; - goto cleanup; - } - if ( NULL == (cr_argv = opal_argv_split(cr_cmd, ' ')) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* Need to shutdown the event engine before this. - * for some reason the BLCR checkpointer and our event engine don't get - * along very well. - */ - opal_progress_finalize(); - (void) mca_base_framework_close(&opal_event_base_framework); - - if (!spawn_child) { - cr_full_cmd = opal_argv_join(cr_argv, ' '); - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: SELF: exec :(%s, %s):", - blcr_restart_cmd, cr_full_cmd); - - status = execvp(blcr_restart_cmd, cr_argv); - - if(status < 0) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: SELF: Child failed to execute :(%d):", status); - } - opal_show_help("help-opal-crs-blcr.txt", "blcr:restart_failed_exec", true, - status, - blcr_restart_cmd, - cr_full_cmd); - - exit_status = status; - goto cleanup; - } - /* - * Restart by starting a new process - */ - else { - *child_pid = fork(); - - if( 0 == *child_pid) { - /* Child Process */ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: CHILD: exec :(%s, %s):", - blcr_restart_cmd, - opal_argv_join(cr_argv, ' ')); - - status = execvp(blcr_restart_cmd, cr_argv); - - if(status < 0) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: CHILD: Child failed to execute :(%d):", status); - } - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: CHILD: execvp returned %d", status); - - exit_status = status; - goto cleanup; - } - else if(*child_pid > 0) { - /* Parent is done once it is started. */ - ; - } - else { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_restart: CHILD: fork failed :(%d):", *child_pid); - } - } - - cleanup: - if(NULL != cr_cmd) - free(cr_cmd); - if(NULL != cr_argv) - opal_argv_free(cr_argv); - - return exit_status; -} - -int opal_crs_blcr_disable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: disable_checkpoint()"); - /* - * Enter the BLCR Critical Section - */ - cr_enter_cs(client_id); - - return OPAL_SUCCESS; -} - -int opal_crs_blcr_enable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: enable_checkpoint()"); - /* - * Leave the BLCR Critical Section - */ - cr_leave_cs(client_id); - - return OPAL_SUCCESS; -} - -/***************************** - * Local Function Definitions - *****************************/ -static int opal_crs_blcr_thread_callback(void *arg) { - const struct cr_checkpoint_info *ckpt_info = cr_get_checkpoint_info(); - int ret; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: thread_callback()"); - - OPAL_THREAD_LOCK(&blcr_lock); - blcr_current_state = OPAL_CRS_CHECKPOINT; - - /* - * Allow the checkpoint to be taken, if we requested it - */ -#if CRS_BLCR_HAVE_INFO_REQUESTER == 1 - if( ckpt_info->requester != my_pid ) { - ret = cr_checkpoint(CR_CHECKPOINT_OMIT); - blcr_current_state = OPAL_CRS_RUNNING; - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: thread_callback(); WARNING: An external agent attempted to checkpoint this process " - "when it did not expect to be checkpointed. Skipping this checkpoint request." - " [%d != %d].", ckpt_info->requester, my_pid); - return 0; - } - else -#endif - { - if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_CRS_PRE_CKPT, - OPAL_CR_INC_STATE_PREPARE)) ) { - ; - } - - ret = cr_checkpoint(0); - } - - /* - * Restarting - */ - if ( 0 < ret ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: thread_callback: Restarting."); - blcr_current_state = OPAL_CRS_RESTART; - } - /* - * Continuing - */ - else { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: thread_callback: Continue."); - blcr_current_state = OPAL_CRS_CONTINUE; - } - - if( OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_CRS_POST_CKPT, - (blcr_current_state == OPAL_CRS_CONTINUE ? - OPAL_CR_INC_STATE_CONTINUE : - OPAL_CR_INC_STATE_RESTART))) ) { - ; - } - - OPAL_THREAD_UNLOCK(&blcr_lock); - opal_condition_signal(&blcr_cond); - - return 0; -} - -static int opal_crs_blcr_signal_callback(void *arg) { - const struct cr_checkpoint_info *ckpt_info = cr_get_checkpoint_info(); - int ret; - - /* - * Allow the checkpoint to be taken, if we requested it - */ -#if CRS_BLCR_HAVE_INFO_REQUESTER == 1 - if( ckpt_info->requester != my_pid ) { - ret = cr_checkpoint(CR_CHECKPOINT_OMIT); - return 0; - } - else -#endif - { - ret = cr_checkpoint(0); - } - - return 0; -} - -static int opal_crs_blcr_restart_cmd(char *fname, char **cmd) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: restart_cmd(%s, ---)", fname); - - if (NULL == fname) { - opal_output_verbose(10, opal_crs_base_framework.framework_output, - "crs:blcr: restart_cmd: Error: filename is NULL!"); - return OPAL_CRS_ERROR; - } - - asprintf(cmd, "%s %s", blcr_restart_cmd, fname); - - return OPAL_SUCCESS; -} - -static int blcr_get_checkpoint_filename(char **fname, pid_t pid) -{ - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: get_checkpoint_filename(--, %d)", pid); - - asprintf(fname, "ompi_blcr_context.%d", pid); - - return OPAL_SUCCESS; -} - -static int blcr_cold_start(opal_crs_blcr_snapshot_t *snapshot) { - int ret, exit_status = OPAL_SUCCESS; - char **tmp_argv = NULL; - char * component_name = NULL; - int prev_pid; - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: cold_start()"); - - /* - * Find the snapshot directory, read the metadata file - */ - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "r")) ) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.metadata, - &component_name, &prev_pid) ) ) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_cold_start: Error: Failed to extract the metadata from the local snapshot (%s). Returned %d.", - snapshot->super.metadata_filename, ret); - exit_status = ret; - goto cleanup; - } - - snapshot->super.component_name = strdup(component_name); - - /* Compare the component strings to make sure this is our snapshot before going further */ - if ( 0 != strncmp(mca_crs_blcr_component.super.base_version.mca_component_name, - component_name, strlen(component_name)) ) { - exit_status = OPAL_ERROR; - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_cold_start: Error: This snapshot (%s) is not intended for us (%s)\n", - component_name, mca_crs_blcr_component.super.base_version.mca_component_name); - goto cleanup; - } - - /* - * Context Filename - */ - opal_crs_base_metadata_read_token(snapshot->super.metadata, CRS_METADATA_CONTEXT, &tmp_argv); - if( NULL == tmp_argv ) { - opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: blcr_cold_start: Error: Failed to read the %s token from the local checkpoint in %s", - CRS_METADATA_CONTEXT, snapshot->super.snapshot_directory); - exit_status = OPAL_ERROR; - goto cleanup; - } - asprintf(&snapshot->context_filename, "%s/%s", snapshot->super.snapshot_directory, tmp_argv[0]); - - /* - * Reset the cold_start flag - */ - snapshot->super.cold_start = false; - - cleanup: - if(NULL != tmp_argv) { - opal_argv_free(tmp_argv); - tmp_argv = NULL; - } - - if( NULL != snapshot->super.metadata ) { - fclose(snapshot->super.metadata); - snapshot->super.metadata = NULL; - } - - return exit_status; -} - -#if OPAL_ENABLE_CRDEBUG == 1 -static void MPIR_checkpoint_debugger_crs_hook(cr_hook_event_t event) { - opal_thread_t *my_thread_id = NULL; - my_thread_id = opal_thread_get_self(); - - /* Non-MPI threads */ - if(event == CR_HOOK_RSTRT_NO_CALLBACKS ) { - /* wait for the MPI thread to refresh the environment for us */ - while(!blcr_crdebug_refreshed_env) { - sched_yield(); - } - } - /* MPI threads */ - else if(event == CR_HOOK_RSTRT_SIGNAL_CONTEXT ) { - if( opal_thread_self_compare(checkpoint_thread_id) ) { - opal_cr_refresh_environ(my_pid); - blcr_crdebug_refreshed_env = true; - } else { - while(!blcr_crdebug_refreshed_env) { - sched_yield(); - } - } - } - - /* - * Some debugging output - */ - /* Non-MPI threads */ - if( event == CR_HOOK_CONT_NO_CALLBACKS ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Waiting in Continue (Non-MPI). (%d)", - (int)my_thread_id->t_handle); - } - else if(event == CR_HOOK_RSTRT_NO_CALLBACKS ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Waiting in Restart (Non-MPI). (%d)", - (int)my_thread_id->t_handle); - } - /* MPI Threads */ - else if( event == CR_HOOK_CONT_SIGNAL_CONTEXT ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Waiting in Continue (MPI)."); - } - else if(event == CR_HOOK_RSTRT_SIGNAL_CONTEXT ) { - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Waiting in Restart (MPI)."); - } - - /* - * Enter the breakpoint function. - * If no debugger intends on attaching, then this function is expected to - * return immediately. - * - * If this is an MPI thread then odds are that this is the checkpointing - * thread, in which case this function will return immediately allowing - * it to prepare the MPI library before signaling to the debugger that - * it is safe to attach, if necessary. - */ - MPIR_checkpoint_debugger_waitpoint(); - - opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, - "crs:blcr: MPIR_checkpoint_debugger_crs_hook: Finished..."); - } -#endif diff --git a/opal/mca/crs/blcr/help-opal-crs-blcr.txt b/opal/mca/crs/blcr/help-opal-crs-blcr.txt deleted file mode 100644 index efb015d716b..00000000000 --- a/opal/mca/crs/blcr/help-opal-crs-blcr.txt +++ /dev/null @@ -1,28 +0,0 @@ - -*- text -*- -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open PAL CRS framework. -# -[blcr:restart_failed_exec] -Error: BLCR was not able to restart the process because exec failed. - Check the installation of BLCR on all of the machines in your - system. The following information may be of help: - Return Code : %d - BLCR Restart Command : %s - Restart Command Line : %s diff --git a/opal/mca/crs/criu/.opal_ignore b/opal/mca/crs/criu/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/opal/mca/crs/criu/Makefile.am b/opal/mca/crs/criu/Makefile.am deleted file mode 100644 index 1088e7be763..00000000000 --- a/opal/mca/crs/criu/Makefile.am +++ /dev/null @@ -1,51 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Hochschule Esslingen. All rights reserved. -# -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFLAGS = $(crs_criu_CFLAGS) -AM_CPPFLAGS = $(crs_criu_CPPFLAGS) - -sources = \ - crs_criu.h \ - crs_criu_component.c \ - crs_criu_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_crs_criu_DSO -component_noinst = -component_install = mca_crs_criu.la -else -component_noinst = libmca_crs_criu.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_crs_criu_la_SOURCES = $(sources) -mca_crs_criu_la_LDFLAGS = -module -avoid-version $(crs_criu_LDFLAGS) -mca_crs_criu_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \ - $(crs_criu_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_crs_criu_la_SOURCES = $(sources) -libmca_crs_criu_la_LDFLAGS = -module -avoid-version $(crs_criu_LDFLAGS) -libmca_crs_criu_la_LIBADD = $(crs_criu_LIBS) diff --git a/opal/mca/crs/criu/configure.m4 b/opal/mca/crs/criu/configure.m4 deleted file mode 100644 index 94ea29d2248..00000000000 --- a/opal/mca/crs/criu/configure.m4 +++ /dev/null @@ -1,93 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2014 Hochschule Esslingen. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_crs_criu_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_opal_crs_criu_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([check_crs_criu_good check_crs_criu_dir_msg check_crs_criu_libdir_msg check_crs_criu_dir check_crs_criu_libdir]) - AC_CONFIG_FILES([opal/mca/crs/criu/Makefile]) - - AC_ARG_WITH([criu], - [AC_HELP_STRING([--with-criu(=DIR)], - [Path to CRIU Installation])]) - OPAL_CHECK_WITHDIR([criu], [$with_criu], [include/criu/criu.h]) - AC_ARG_WITH([criu-libdir], - [AC_HELP_STRING([--with-criu-libdir=DIR], - [Search for CRIU libraries in DIR])]) - OPAL_CHECK_WITHDIR([criu-libdir], [$with_criu_libdir], [libcriu.*]) - - # If we do not want FT or CRIU, don't compile this component - AS_IF([test "$opal_want_ft_cr" = "1" && test "$with_criu" = "yes"], - [check_crs_criu_good=yes], - [check_crs_criu_good=no]) - - # Defaults - check_crs_criu_dir_msg="compiler default" - check_crs_criu_libdir_msg="linker default" - check_crs_criu_dir="" - check_crs_criu_libdir="" - - # Determine the search paths for the headers and libraries - AS_IF([test $check_crs_criu_good = yes], - [AS_IF([test ! -z "$with_criu" && test "$with_criu" != "yes"], - [check_crs_criu_dir="$with_criu" - check_crs_criu_dir_msg="$with_criu (from --with-criu)"]) - AS_IF([test ! -z "$with_criu_libdir" && test "$with_criu_libdir" != "yes"], - [check_crs_criu_libdir="$with_criu_libdir" - check_crs_criu_libdir_msg="$with_criu_libdir (from --with-criu-libdir)"]) - ]) - - AS_IF([test $check_crs_criu_good = yes], - [AC_MSG_CHECKING([for CRIU dir]) - AC_MSG_RESULT([$check_crs_criu_dir_msg]) - AC_MSG_CHECKING([for CRIU library dir]) - AC_MSG_RESULT([$check_crs_criu_libdir_msg]) - OPAL_CHECK_PACKAGE([crs_criu_check], - [criu/criu.h], - [criu], - [criu_init_opts], - [], - [$check_crs_criu_dir], - [$check_crs_criu_libdir], - [check_crs_criu_good="yes"], - [check_crs_criu_good="no"]) - ]) - - crs_criu_CFLAGS="$CFLAGS $crs_criu_check_CFLAGS" - crs_criu_CPPFLAGS="$CPPFLAGS $crs_criu_check_CPPFLAGS" - crs_criu_LDFLAGS="$LDFLAGS $crs_criu_check_LDFLAGS" - crs_criu_LIBS="$LIBS $crs_criu_check_LIBS" - - AS_IF([test $check_crs_criu_good = yes], - [ AC_SUBST([crs_criu_CFLAGS]) - AC_SUBST([crs_criu_CPPFLAGS]) - AC_SUBST([crs_criu_LDFLAGS]) - AC_SUBST([crs_criu_LIBS]) - $1], - [AS_IF([test ! -z "$with_criu" && test "$with_criu" != "no"], - [AC_MSG_WARN([CRIU support requested but not found. Perhaps you need to enable FT support, or specify the location of the CRIU libraries...?]) - AC_MSG_ERROR([Aborting.])]) - $2]) - - OPAL_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/crs/criu/crs_criu.h b/opal/mca/crs/criu/crs_criu.h deleted file mode 100644 index 96dbbd4598e..00000000000 --- a/opal/mca/crs/criu/crs_criu.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * CRIU CRS component - support checkpoint/restart using CRIU - */ - -#ifndef MCA_CRS_CRIU_EXPORT_H -#define MCA_CRS_CRIU_EXPORT_H - -#include "opal_config.h" - - -#include "opal/mca/mca.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/base/base.h" - -#include - -BEGIN_C_DECLS - -#define LOG_FILE ("criu.log") - -/* Local Component structures */ -struct opal_crs_criu_component_t { - /* Base CRS component */ - opal_crs_base_component_t super; - - /* criu log file */ - char *log_file; - /* criu log level */ - int log_level; - /* criu tcp established */ - bool tcp_established; - /* criu shell job */ - bool shell_job; - /* criu external unix sockets */ - bool ext_unix_sk; - /* criu leave tasks in running state after checkpoint */ - bool leave_running; -}; -typedef struct opal_crs_criu_component_t opal_crs_criu_component_t; - -OPAL_MODULE_DECLSPEC extern opal_crs_criu_component_t mca_crs_criu_component; - -int opal_crs_criu_component_query(mca_base_module_t **module, int *priority); - -/* - * Module functions - */ -int opal_crs_criu_module_init(void); -int opal_crs_criu_module_finalize(void); -int opal_crs_criu_checkpoint(pid_t pid, opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state); - -int opal_crs_criu_restart(opal_crs_base_snapshot_t *snapshot, - bool spawn_child, pid_t *child_pid); - -int opal_crs_criu_disable_checkpoint(void); -int opal_crs_criu_enable_checkpoint(void); - -int opal_crs_criu_prelaunch(int32_t rank, char *base_snapshot_dir, char **app, - char **cwd, char ***argv, char ***env); - -int opal_crs_criu_reg_thread(void); - - -END_C_DECLS - -#endif /* MCA_CRS_CRIU_EXPORT_H */ diff --git a/opal/mca/crs/criu/crs_criu_component.c b/opal/mca/crs/criu/crs_criu_component.c deleted file mode 100644 index e56be920c70..00000000000 --- a/opal/mca/crs/criu/crs_criu_component.c +++ /dev/null @@ -1,213 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/constants.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "crs_criu.h" - -/* Local functionality */ -static int crs_criu_register(void); -static int crs_criu_open(void); -static int crs_criu_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_crs_criu_component_t mca_crs_criu_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itself - */ - .base_version = { - OPAL_CRS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "criu", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = crs_criu_open, - .mca_close_component = crs_criu_close, - .mca_query_component = opal_crs_criu_component_query, - .mca_register_component_params = crs_criu_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1, - }, - /* criu log file */ - LOG_FILE, - /* criu log level */ - 0, - /* criu tcp established */ - true, - /* criu shell job */ - true, - /* criu external unix sockets */ - true, - /* criu leave tasks in running state after checkpoint */ - true -}; - -static int crs_criu_register(void) -{ - int ret; - - mca_base_component_t *component = &mca_crs_criu_component.super.base_version; - - mca_crs_criu_component.super.priority = 10; - ret = mca_base_component_var_register(component, "priority", - "Priority of the CRS criu component (default: 10)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_crs_criu_component.super.priority); - if (0 > ret) { - return ret; - } - - mca_crs_criu_component.super.verbose = 0; - ret = mca_base_component_var_register(component, "verbose", - "Verbose level for the CRS criu component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.super.verbose); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "log", "Name of CRIU logfile (default: criu.log)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.log_file); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "log_level", - "Verbose level for the CRS criu component (default: 0)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.log_level); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "tcp_established", - "Checkpoint/restore established TCP connections (default: true)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.tcp_established); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "shell_job", - "Allow to dump and restore shell jobs (default: true)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.shell_job); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "ext_unix_sk", - "Allow external unix connections (default: true)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.ext_unix_sk); - - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register(component, "leave_running", - "Leave tasks in running state after checkpoint (default: true)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_criu_component.leave_running); - - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int crs_criu_open(void) -{ - int oh; - - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if (0 != mca_crs_criu_component.super.verbose) { - mca_crs_criu_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_crs_criu_component.super.output_handle, - mca_crs_criu_component.super.verbose); - } else { - mca_crs_criu_component.super.output_handle = opal_crs_base_framework.framework_output; - } - - oh = mca_crs_criu_component.super.output_handle; - /* - * Debug output - */ - opal_output_verbose(10, oh, "crs:criu: open()"); - opal_output_verbose(20, oh, "crs:criu: open: priority = %d", - mca_crs_criu_component.super.priority); - opal_output_verbose(20, oh, "crs:criu: open: verbosity = %d", - mca_crs_criu_component.super.verbose); - opal_output_verbose(20, oh, "crs:criu: open: log_file = %s", - mca_crs_criu_component.log_file); - opal_output_verbose(20, oh, "crs:criu: open: log_level = %d", - mca_crs_criu_component.log_level); - opal_output_verbose(20, oh, "crs:criu: open: tcp_established = %d", - mca_crs_criu_component.tcp_established); - opal_output_verbose(20, oh, "crs:criu: open: shell_job = %d", - mca_crs_criu_component.shell_job); - opal_output_verbose(20, oh, "crs:criu: open: ext_unix_sk = %d", - mca_crs_criu_component.ext_unix_sk); - opal_output_verbose(20, oh, "crs:criu: open: leave_running = %d", - mca_crs_criu_component.leave_running); - - return OPAL_SUCCESS; -} - -static int crs_criu_close(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: close()"); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/criu/crs_criu_module.c b/opal/mca/crs/criu/crs_criu_module.c deleted file mode 100644 index e4b12c4717c..00000000000 --- a/opal/mca/crs/criu/crs_criu_module.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Evergrid, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include - -#include "opal/util/show_help.h" -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/constants.h" - -#include "opal/mca/base/mca_base_var.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "crs_criu.h" - -/* CRIU module */ -static opal_crs_base_module_t criu_module = { - /* Initialization Function */ - opal_crs_criu_module_init, - /* Finalization Function */ - opal_crs_criu_module_finalize, - - /* Checkpoint interface */ - opal_crs_criu_checkpoint, - - /* Restart Command Access */ - opal_crs_criu_restart, - - /* Disable checkpoints */ - opal_crs_criu_disable_checkpoint, - /* Enable checkpoints */ - opal_crs_criu_enable_checkpoint, - - /* Prelaunch */ - opal_crs_criu_prelaunch, - - /* Register Thread */ - opal_crs_criu_reg_thread -}; - -/* Snapshot Class Functions */ -OBJ_CLASS_DECLARATION(opal_crs_criu_snapshot_t); - -struct opal_crs_criu_snapshot_t { - /* Base CRS snapshot type */ - opal_crs_base_snapshot_t super; -}; -typedef struct opal_crs_criu_snapshot_t opal_crs_criu_snapshot_t; - -void opal_crs_criu_construct(opal_crs_criu_snapshot_t *obj); -void opal_crs_criu_destruct(opal_crs_criu_snapshot_t *obj); - -OBJ_CLASS_INSTANCE(opal_crs_criu_snapshot_t, - opal_crs_base_snapshot_t, - opal_crs_criu_construct, - opal_crs_criu_destruct); - -void opal_crs_criu_construct(opal_crs_criu_snapshot_t *snapshot) -{ - snapshot->super.component_name = strdup(mca_crs_criu_component.super.base_version.mca_component_name); -} - -void opal_crs_criu_destruct(opal_crs_criu_snapshot_t *snapshot) -{ -} - -int opal_crs_criu_component_query(mca_base_module_t **module, int *priority) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: component_query()"); - - *priority = mca_crs_criu_component.super.priority; - *module = (mca_base_module_t *)&criu_module; - - return OPAL_SUCCESS; -} - -int opal_crs_criu_module_init(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: module_init()"); - - return OPAL_SUCCESS; -} - -int opal_crs_criu_module_finalize(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: module_finalize()"); - - return OPAL_SUCCESS; -} - -static void criu_error(int ret, pid_t pid) -{ - switch (ret) { - case -EBADE: - opal_output(0, "crs:criu:(PID:%d):RPC has returned fail", pid); - break; - case -ECONNREFUSED: - opal_output(0, "crs:criu:(PID:%d):Unable to connect to CRIU", pid); - break; - case -ECOMM: - opal_output(0, "crs:criu:(PID:%d):Unable to send/recv msg to/from CRIU", pid); - break; - case -EINVAL: - opal_output(0, "crs:criu:(PID:%d):CRIU doesn't support this type of request." - "You should probably update CRIU", pid); - break; - case -EBADMSG: - opal_output(0, "crs:criu:(PID:%d):Unexpected response from CRIU." - "You should probably update CRIU", pid); - break; - default: - opal_output(0, "crs:criu:(PID:%d):Unknown error type code." - "You should probably update CRIU", pid); - } -} - -int opal_crs_criu_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state) -{ - int ret; - int fd = 0; - int oh = mca_crs_criu_component.super.output_handle; - opal_crs_criu_snapshot_t *snapshot = NULL; - char *dest = NULL; - - opal_output_verbose(10, oh, "crs:criu: checkpoint(%d, ---)", pid); - - snapshot = (opal_crs_criu_snapshot_t *)base_snapshot; - snapshot->super.component_name = strdup(mca_crs_criu_component.super.base_version.mca_component_name); - - if (NULL == snapshot->super.metadata) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a"))) { - opal_output(oh, "crs:criu: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - *state = OPAL_CRS_ERROR; - goto cleanup; - } - } - fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name); - - fclose(snapshot->super.metadata); - snapshot->super.metadata = NULL; - - ret = criu_init_opts(); - - if (ret < 0) { - criu_error(ret, pid); - *state = OPAL_CRS_ERROR; - goto cleanup; - } - - opal_output_verbose(10, oh, "crs:criu: criu_init_opts() returned %d", ret); - - dest = snapshot->super.snapshot_directory; - opal_output_verbose(10, oh, "crs:criu: opening snapshot directory %s", dest); - fd = open(dest, O_DIRECTORY); - - if (fd < 0) { - *state = OPAL_CRS_ERROR; - opal_output(oh, "crs:criu: checkpoint(): Error: Unable to open checkpoint " - "directory (%s) for pid (%d)", dest, pid); - goto cleanup; - } - - /* http://criu.org/C_API */ - criu_set_images_dir_fd(fd); - criu_set_pid(pid); - - criu_set_log_file(mca_crs_criu_component.log_file); - criu_set_log_level(mca_crs_criu_component.log_level); - criu_set_tcp_established(mca_crs_criu_component.tcp_established); - criu_set_shell_job(mca_crs_criu_component.shell_job); - criu_set_ext_unix_sk(mca_crs_criu_component.ext_unix_sk); - criu_set_leave_running(mca_crs_criu_component.leave_running); - ret = criu_dump(); - - if (ret < 0) { - criu_error(ret, pid); - *state = OPAL_CRS_ERROR; - goto cleanup; - } - - *state = OPAL_CRS_CONTINUE; - - cleanup: - - if (fd > 0) { - close(fd); - } - - if (OPAL_CRS_ERROR == *state) { - return OPAL_ERROR; - } - return OPAL_SUCCESS; -} - -int opal_crs_criu_restart(opal_crs_base_snapshot_t *snapshot, - bool spawn_child, pid_t *child_pid) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} - -int opal_crs_criu_disable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} - -int opal_crs_criu_enable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} - -int opal_crs_criu_prelaunch(int32_t rank, char *base_snapshot_dir, - char **app, char **cwd, char ***argv, - char ***env) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} - -int opal_crs_criu_reg_thread(void) -{ - opal_output_verbose(10, mca_crs_criu_component.super.output_handle, - "crs:criu: %s", __func__); - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/criu/owner.txt b/opal/mca/crs/criu/owner.txt deleted file mode 100644 index 0cc0384f0eb..00000000000 --- a/opal/mca/crs/criu/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: CISCO -status: maintenance diff --git a/opal/mca/crs/dmtcp/.opal_ignore b/opal/mca/crs/dmtcp/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/opal/mca/crs/dmtcp/Makefile.am b/opal/mca/crs/dmtcp/Makefile.am deleted file mode 100644 index 91bbbe91a1b..00000000000 --- a/opal/mca/crs/dmtcp/Makefile.am +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (c) 2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFLAGS = $(crs_dmtcp_CFLAGS) -AM_CPPFLAGS = $(crs_dmtcp_CPPFLAGS) - -sources = \ - crs_dmtcp.h \ - crs_dmtcp_component.c \ - crs_dmtcp_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_crs_dmtcp_DSO -component_noinst = -component_install = mca_crs_dmtcp.la -else -component_noinst = libmca_crs_dmtcp.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_crs_dmtcp_la_SOURCES = $(sources) -mca_crs_dmtcp_la_LDFLAGS = -module -avoid-version $(crs_dmtcp_LDFLAGS) -mca_crs_dmtcp_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \ - $(crs_dmtcp_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_crs_dmtcp_la_SOURCES = $(sources) -libmca_crs_dmtcp_la_LDFLAGS = -module -avoid-version $(crs_dmtcp_LDFLAGS) -libmca_crs_dmtcp_la_LIBADD = $(crs_dmtcp_LIBS) diff --git a/opal/mca/crs/dmtcp/configure.m4 b/opal/mca/crs/dmtcp/configure.m4 deleted file mode 100644 index af61f228a00..00000000000 --- a/opal/mca/crs/dmtcp/configure.m4 +++ /dev/null @@ -1,140 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_opal_crs_dmtcp_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_opal_crs_dmtcp_CONFIG],[ - AC_CONFIG_FILES([opal/mca/crs/dmtcp/Makefile]) - - OPAL_VAR_SCOPE_PUSH([opal_check_crs_dmtcp_good opal_opal_check_crs_dmtcp_save_CPPFLAGS opal_opal_check_crs_dmtcp_save_LDFLAGS opal_opal_check_crs_dmtcp_save_LIBS opal_check_crs_dmtcp_dir_msg opal_check_crs_dmtcp_libdir_msg opal_check_crs_dmtcp_dir opal_check_crs_dmtcp_libdir]) - - - opal_check_crs_dmtcp_good="no" - - # Configure option to specify where to look for DMTCP headers - # --with-dmtcp(=DIR) - AC_ARG_WITH([dmtcp], - [AC_HELP_STRING([--with-dmtcp(=DIR)], - [Path to DMTCP Installation])]) - OPAL_CHECK_WITHDIR([dmtcp], [$with_dmtcp], [include/mtcp.h]) - - # Configure option to specify where to look for DMTCP libraries - # (Default: $with_dmtcp/lib) - # --with-dmtcp-libdir=DIR - AC_ARG_WITH([dmtcp-libdir], - [AC_HELP_STRING([--with-dmtcp-libdir=DIR], - [Search for DMTCP libraries in DIR])]) - OPAL_CHECK_WITHDIR([dmtcp-libdir], [$with_dmtcp_libdir], [libmtcp.so]) - - # - # Check if Open MPI was compiled with Checkpoint/Restart support - # If not, then we do not compile this component - # - AS_IF([test "$opal_want_ft" = "0"], - [opal_check_crs_dmtcp_good="no"], - [opal_check_crs_dmtcp_good="yes"]) - - # - # Check if the user explicitly requested -not- to build the DMTCP component - # If so, the we do not compile this component - # - AS_IF([test "$with_dmtcp" = "no" || test "$opal_check_crs_dmtcp_good" = "no"], - [opal_check_crs_dmtcp_good="no"], - [opal_check_crs_dmtcp_good="yes"]) - - # Save some flags - opal_opal_check_crs_dmtcp_save_CPPFLAGS=$CPPFLAGS - opal_opal_check_crs_dmtcp_save_LDFLAGS=$LDFLAGS - opal_opal_check_crs_dmtcp_save_LIBS=$LIBS - - # - # Now to check if the library is usable - # - opal_check_crs_dmtcp_dir_msg="compiler default" - opal_check_crs_dmtcp_libdir_msg="linker default" - opal_check_crs_dmtcp_dir="" - opal_check_crs_dmtcp_libdir="" - - # Determine the search paths for the headers and libraries - AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], - [AS_IF([test ! -z "$with_dmtcp" && test "$with_dmtcp" != "yes"], - [opal_check_crs_dmtcp_dir="$with_dmtcp" - opal_check_crs_dmtcp_dir_msg="$with_dmtcp (from --with-dmtcp)"]) - AS_IF([test ! -z "$with_dmtcp_libdir" && test "$with_dmtcp_libdir" != "yes"], - [opal_check_crs_dmtcp_libdir="$with_dmtcp_libdir" - opal_check_crs_dmtcp_libdir_msg="$with_dmtcp_libdir (from --with-dmtcp-libdir)"]) - ]) - - # Look for DMTCP. - AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], - [AC_MSG_CHECKING([for DMTCP dir]) - AC_MSG_RESULT([$opal_check_crs_dmtcp_dir_msg]) - AC_MSG_CHECKING([for DMTCP library dir]) - AC_MSG_RESULT([$opal_check_crs_dmtcp_libdir_msg]) - OPAL_CHECK_PACKAGE([crs_dmtcp_check], - [mtcp.h], - [mtcp], - [mtcp_init], - [], - [$opal_check_crs_dmtcp_dir], - [$opal_check_crs_dmtcp_libdir], - [opal_check_crs_dmtcp_good="yes"], - [opal_check_crs_dmtcp_good="no"]) - ]) - - # When we restart a thread, we use execlp() to exec the "mtcp_restart" - # command. We don't care what its path is, but it does need to exist in - # the PATH. - AC_CHECK_PROG([mtcp_restart_command_exists], ["mtcp_restart"], ["yes"], ["no"]) - AS_IF([test "$mtcp_restart_command_exists" = "no"], - [opal_check_crs_dmtcp_good="no" - AS_IF([test ! -z "$with_dmtcp" && test "$with_dmtcp" != "no"], - [AC_MSG_WARN([mtcp_restart not found in PATH.]) - AC_MSG_ERROR([Aborting.])])]) - - # - # If '-lmtcp' or - # '-I' or '-L' was needed to link to MTCP, then OPAL_CHECK_PACKAGE - # sets the crs_mtcp_check_* variables, which we use below. - # - - crs_dmtcp_CFLAGS="$CFLAGS $crs_dmtcp_check_CFLAGS" - crs_dmtcp_CPPFLAGS="$CPPFLAGS $crs_dmtcp_check_CPPFLAGS" - crs_dmtcp_LDFLAGS="$LDFLAGS $crs_dmtcp_check_LDFLAGS" - crs_dmtcp_LIBS="$crs_dmtcp_check_LIBS $LIBS" - - AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], - [$1]) - - CPPFLAGS=$opal_opal_check_crs_dmtcp_save_CPPFLAGS - LDFLAGS="$crs_dmtcp_check_LDFLAGS $opal_opal_check_crs_dmtcp_save_LDFLAGS" - LIBS="$crs_dmtcp_LIBS $opal_opal_check_crs_dmtcp_save_LIBS" - - AC_SUBST([crs_dmtcp_CFLAGS]) - AC_SUBST([crs_dmtcp_CPPFLAGS]) - AC_SUBST([crs_dmtcp_LDFLAGS]) - AC_SUBST([crs_dmtcp_LIBS]) - - # If all is good at this point then post any compiler options to - # the build environment. If all is not good at this point and - # DMTCP was explicitly requested, then error out. - - AS_IF([test "$opal_check_crs_dmtcp_good" = "yes"], - [$1], - [AS_IF([test ! -z "$with_dmtcp" && test "$with_dmtcp" != "no"], - [AC_MSG_WARN([DMTCP support requested but not found. Perhaps you need to specify the location of the DMTCP libraries.]) - AC_MSG_ERROR([Aborting.])]) - $2]) - OPAL_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/crs/dmtcp/crs_dmtcp.h b/opal/mca/crs/dmtcp/crs_dmtcp.h deleted file mode 100644 index 6e5b4db9562..00000000000 --- a/opal/mca/crs/dmtcp/crs_dmtcp.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010-2011 Alex Brick . - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * DMTCP CRS component - * - */ - -#ifndef MCA_CRS_DMTCP_EXPORT_H -#define MCA_CRS_DMTCP_EXPORT_H - -#include "opal_config.h" - - -#include "opal/mca/mca.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/base/base.h" - -/* JJH NOTE: Include your library header here */ -/* #include */ -#include - -BEGIN_C_DECLS - - /* - * Local Component Structure - */ - struct opal_crs_dmtcp_component_t { - /** Base CRS component */ - opal_crs_base_component_t super; - - /** JJH: Add additional items here as needed internally */ - }; - typedef struct opal_crs_dmtcp_component_t opal_crs_dmtcp_component_t; - OPAL_MODULE_DECLSPEC extern opal_crs_dmtcp_component_t mca_crs_dmtcp_component; - - /* - * Component query command - * - Called during opal_init() to determine if this component should be selected. - */ - int opal_crs_dmtcp_component_query(mca_base_module_t **module, int *priority); - - /* - * Module functions - */ - int opal_crs_dmtcp_module_init(void); - int opal_crs_dmtcp_module_finalize(void); - - /* - * Actual CRS funcationality - */ - int opal_crs_dmtcp_checkpoint( pid_t pid, - opal_crs_base_snapshot_t *snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state); - - int opal_crs_dmtcp_restart( opal_crs_base_snapshot_t *snapshot, - bool spawn_child, - pid_t *child_pid); - - int opal_crs_dmtcp_disable_checkpoint(void); - int opal_crs_dmtcp_enable_checkpoint(void); - - int opal_crs_dmtcp_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env); - - int opal_crs_dmtcp_reg_thread(void); - -END_C_DECLS - -#endif /* MCA_CRS_DMTCP_EXPORT_H */ diff --git a/opal/mca/crs/dmtcp/crs_dmtcp_component.c b/opal/mca/crs/dmtcp/crs_dmtcp_component.c deleted file mode 100644 index 20e85fa406c..00000000000 --- a/opal/mca/crs/dmtcp/crs_dmtcp_component.c +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010-2011 Alex Brick . - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include "opal/util/output.h" - -#include "opal/constants.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" -#include "crs_dmtcp.h" - -/* - * Local functionality - */ -static int crs_dmtcp_register (void); -static int crs_dmtcp_open(void); -static int crs_dmtcp_close(void); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -opal_crs_dmtcp_component_t mca_crs_dmtcp_component = { - /* First do the base component stuff */ - { - /* Handle the general mca_component_t struct containing - * meta information about the component itself - */ - .base_version = { - OPAL_CRS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "dmtcp", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = crs_dmtcp_open, - .mca_close_component = crs_dmtcp_close, - .mca_query_component = opal_crs_dmtcp_component_query, - .mca_register_component_params = crs_dmtcp_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .verbose = 0, - .output_handle = -1 - } -}; - -static int crs_dmtcp_register (void) -{ - int ret; - /* - * User can adjust the relative priority of this component with respect - * to other CRS components available for selection. - */ - mca_crs_dmtcp_component.super.priority = 20 - ret = mca_base_component_var_register (&mca_crs_dmtcp_component.super.base_version, - "priority", "Priority of the CRS dmtcp component " - "(default: 20)", MCA_BASE_VAR_TYPE_INT, NULL, - MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_crs_dmtcp_component.super.priority); - if (0 > ret) { - return ret; - } - - /* - * Adjust the verbosity level for this component. Default off or 0. - */ - mca_crs_dmtcp_component.super.verbose = 0; - ret = mca_base_component_var_register (&mca_crs_dmtcp_component.super.base_version, - "verbose", - "Verbose level for the CRS dmtcp component", - MCA_BASE_VAR_TYPE_INT, NULL,MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &mca_crs_dmtcp_component.super.verbose); - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int crs_dmtcp_open(void) -{ - /* If there is a custom verbose level for this component than use it - * otherwise take our parents level and output channel - */ - if ( 0 != mca_crs_dmtcp_component.super.verbose) { - mca_crs_dmtcp_component.super.output_handle = opal_output_open(NULL); - opal_output_set_verbosity(mca_crs_dmtcp_component.super.output_handle, - mca_crs_dmtcp_component.super.verbose); - } else { - mca_crs_dmtcp_component.super.output_handle = opal_crs_base_framework.framework_output; - } - - /* - * Debug output - */ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: open()"); - opal_output_verbose(20, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: open: priority = %d", - mca_crs_dmtcp_component.super.priority); - opal_output_verbose(20, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: open: verbosity = %d", - mca_crs_dmtcp_component.super.verbose); - - return OPAL_SUCCESS; -} - -static int crs_dmtcp_close(void) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: close()"); - - return OPAL_SUCCESS; -} diff --git a/opal/mca/crs/dmtcp/crs_dmtcp_module.c b/opal/mca/crs/dmtcp/crs_dmtcp_module.c deleted file mode 100644 index e18626ff577..00000000000 --- a/opal/mca/crs/dmtcp/crs_dmtcp_module.c +++ /dev/null @@ -1,709 +0,0 @@ -/* - * Copyright (c) 2010 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010-2011 Alex Brick . - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/constants.h" - -#include "opal/mca/base/mca_base_var.h" - -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" - -#include "opal/mca/event/event.h" - -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "crs_dmtcp.h" - -#define MTCP_RESTART_COMMAND "mtcp_restart" - -/* - * DMTCP module - */ -static opal_crs_base_module_t dmtcp_module = { - /** Initialization Function */ - opal_crs_dmtcp_module_init, - /** Finalization Function */ - opal_crs_dmtcp_module_finalize, - - /** Checkpoint interface */ - opal_crs_dmtcp_checkpoint, - - /** Restart Command Access */ - opal_crs_dmtcp_restart, - - /** Disable checkpoints */ - opal_crs_dmtcp_disable_checkpoint, - /** Enable checkpoints */ - opal_crs_dmtcp_enable_checkpoint, - - /** Prelaunch */ - opal_crs_dmtcp_prelaunch, - - /** Register Thread */ - opal_crs_dmtcp_reg_thread -}; - -/*************************** - * Snapshot Class Functions - ***************************/ -OBJ_CLASS_DECLARATION(opal_crs_dmtcp_snapshot_t); - -struct opal_crs_dmtcp_snapshot_t { - /** Base CRS snapshot type */ - opal_crs_base_snapshot_t super; - char * context_filename; -}; -typedef struct opal_crs_dmtcp_snapshot_t opal_crs_dmtcp_snapshot_t; - -void opal_crs_dmtcp_construct(opal_crs_dmtcp_snapshot_t *obj); -void opal_crs_dmtcp_destruct(opal_crs_dmtcp_snapshot_t *obj); - -OBJ_CLASS_INSTANCE(opal_crs_dmtcp_snapshot_t, - opal_crs_base_snapshot_t, - opal_crs_dmtcp_construct, - opal_crs_dmtcp_destruct); - -/****************** - * Local Functions - ******************/ -static int dmtcp_cold_start(opal_crs_dmtcp_snapshot_t *snapshot); -static int dmtcp_generate_full_ckpt_path(opal_crs_dmtcp_snapshot_t *snapshot); -static void dmtcp_sleep_between_ckpt_callback(int interval); -static void dmtcp_pre_ckpt_callback(char **ckpt_filename); -static void dmtcp_post_ckpt_callback(int is_restarting, - char *mtcp_restore_argv_start_addr); -static int dmtcp_should_ckpt_fd_callback(int fd); - -/************************* - * Local Global Variables - *************************/ -static char *full_ckpt_path = NULL; -static pthread_cond_t checkpoint_cond = PTHREAD_COND_INITIALIZER; -static pthread_cond_t checkpoint_done_cond = PTHREAD_COND_INITIALIZER; -static pthread_mutex_t checkpoint_mutex = PTHREAD_MUTEX_INITIALIZER; -static int post_ckpt_state; - -void opal_crs_dmtcp_construct(opal_crs_dmtcp_snapshot_t *snapshot) { - snapshot->context_filename = NULL; - snapshot->super.component_name = - strdup(mca_crs_dmtcp_component.super.base_version.mca_component_name); -} - -void opal_crs_dmtcp_destruct( opal_crs_dmtcp_snapshot_t *snapshot) { - if(NULL != snapshot->context_filename) { - free(snapshot->context_filename); - snapshot->context_filename = NULL; - } -} - -/***************** - * MCA Functions - *****************/ -int opal_crs_dmtcp_component_query(mca_base_module_t **module, int *priority) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: component_query()"); - - *priority = mca_crs_dmtcp_component.super.priority; - *module = (mca_base_module_t *)&dmtcp_module; - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_module_init(void) -{ - char *temp_checkpoint_name; - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: module_init()"); - - /* - * JJH NOTE: Call any initialization routines you require - */ - mtcp_set_callbacks(dmtcp_sleep_between_ckpt_callback, /* sleep_between_ckpt */ - dmtcp_pre_ckpt_callback, /* pre_ckpt */ - dmtcp_post_ckpt_callback, /* post_ckpt */ - dmtcp_should_ckpt_fd_callback, /* ckpt_fd */ - NULL); /* write_ckpt_header */ - - /* This serves to simply initialize MTCP. The checkpoint file will - * actually be set by our pre_ckpt callback (which takes it from the - * snapshot given to the CRS checkpoint function), and the interval will be - * ignored, substituted for a synchronization signal that is handled by our - * sleep_between_ckpt callback. - */ - - asprintf(&temp_checkpoint_name, "checkpoint.dmtcp.%ld", syscall(SYS_getpid)); - mtcp_init(temp_checkpoint_name, 0, 1); - mtcp_ok(); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: leaving module_init()"); - - free(temp_checkpoint_name); - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_module_finalize(void) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: module_finalize()"); - - /* - * JJH NOTE: Call any finalization routines you require - */ - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_prelaunch(int32_t rank, - char *base_snapshot_dir, - char **app, - char **cwd, - char ***argv, - char ***env) -{ - char * tmp_env_var = NULL; - - /* - * The below should be left untouched for now - */ - (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); - opal_setenv(tmp_env_var, - "0", true, env); - free(tmp_env_var); - tmp_env_var = NULL; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: leaving module_prelaunch()"); - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_reg_thread(void) -{ - /* - * JJH NOTE: If you require that all threads that may call into MTCP - * explicitly register with MTCP, then place the necessary - * initialization here. - */ - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: leaving module_reg_thread()"); - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_checkpoint(pid_t pid, - opal_crs_base_snapshot_t *base_snapshot, - opal_crs_base_ckpt_options_t *options, - opal_crs_state_type_t *state) -{ - int unlock_retval, exit_status = OPAL_SUCCESS; - char buf[BUFSIZ]; - opal_crs_dmtcp_snapshot_t *snapshot; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: about to lock mutex for checkpoint()"); - - pthread_mutex_lock(&checkpoint_mutex); - snapshot = (opal_crs_dmtcp_snapshot_t *) base_snapshot; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: checkpoint(%d, ---)", pid); - - /* Are we checkpointing ourselves or a peer. - * JJH NOTE: This will only ever be called when pid == getpid() - * This is an old interface argument, that is no longer used. - */ - - /* bricka (2010-05-14): According to crs.h, 0 also indicates checkpointing - * self. - */ - if((pid != 0) && (pid != syscall(SYS_getpid)) ) { - /* MTCP can only checkpoint a single process: we can only checkpoint - * ourself. */ - *state = OPAL_CRS_ERROR; - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* the metadata file should always be NULL at this point */ - if ( NULL != snapshot->super.metadata) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: checkpoint(): Error: Metadata file already open"); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * Update the snapshot metadata with the component name so opal-restart can - * pick the correct CRS to restart with. - */ - snapshot->super.component_name = strdup(mca_crs_dmtcp_component.super.base_version.mca_component_name); - - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: checkpoint(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - - /* The filename of the checkpoint will be changed by our pre_ckpt hook - * based on the options given to this function. */ - if(dmtcp_generate_full_ckpt_path(snapshot) == -1) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: unable to generate context filename."); - - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * JJH NOTE: You can write however much or little data you want to the - * metadata file. The metadata file is stored with the local - * checkpoint, and provided at restart time to help the - * CRS component deteremine how to restart from any files - * that is left in this directory during checkpoint. - * Use the command below to write key/value strings to the - * metadata file. - * (Just as we did above with the component name). - */ - if ( 0 > fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name)) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: unable to print component name to metadata"); - } - - if ( 0 > fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_CONTEXT, snapshot->context_filename)) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: unable to print context name to metadata"); - } - - fclose(snapshot->super.metadata ); - snapshot->super.metadata = NULL; - - /* - * JJH NOTE: Setup and request a checkpoint of this process. - */ - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: will checkpoint to file: %s", - full_ckpt_path); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: about to signal checkpoint"); - - /* Now that we have set the requested filename, we simply need to start - * the checkpoint. */ - pthread_cond_signal(&checkpoint_cond); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: signalled checkpoint"); - - /* We want to wait for the checkpoint to finish before we continue (in - * particular, we need the post_ckpt hook to happen so that we know the - * status of the checkpoint) - */ - pthread_cond_wait(&checkpoint_done_cond, &checkpoint_mutex); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: received checkpoint_done signal"); - - /* We have now been checkpointed. Note that the state of the checkpoint - * (OPAL_CRS_CONTINUE, etc.) has been recorded by the post_ckpt hook. - */ - *state = post_ckpt_state; - exit_status = OPAL_SUCCESS; - - free(full_ckpt_path); - - cleanup: - unlock_retval = pthread_mutex_unlock(&checkpoint_mutex); - - if( 0 != unlock_retval ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_checkpoint: unable to unlock mutex at end of checkpoint: %s", - strerror_r(unlock_retval, buf, BUFSIZ)); - - exit_status = OPAL_ERROR; - } - - if( NULL != snapshot->super.metadata ) { - fclose(snapshot->super.metadata ); - snapshot->super.metadata = NULL; - } - - return exit_status; -} - -int opal_crs_dmtcp_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid) -{ - int ret, exit_status = OPAL_SUCCESS; - int exec_status; - - opal_crs_dmtcp_snapshot_t *snapshot = OBJ_NEW(opal_crs_dmtcp_snapshot_t); - snapshot->super = *base_snapshot; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: restart(--, %d)", spawn_child); - - /* - * JJH NOTE: 'cold_start' indicates that this process is being restarted from - * opal-restart instead of from within an already running process. - * In the current code base, this is always set to true since it - * does not allow a process to request a restart of itself. - */ - if(snapshot->super.cold_start) { - /* - * Read the metadata left by the checkpoint() of this process - */ - if( OPAL_SUCCESS != (ret = dmtcp_cold_start(snapshot)) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: Unable to reconstruct the snapshot."); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - - /* JJH NOTE: Nearly all of the time the 'spawn_child' argument is set to - * 'false' indicating that the restart function is expected to - * call exec() directly. It is only set to 'true' if the user - * explicitly tells opal-restart to spawn off the child, which - * rarely/never happens. So I would not worry about that option. - */ - if( spawn_child ) { - pid_t child_pid = fork(); - - if(child_pid > 0) - goto cleanup; - else if(child_pid < 0) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: Unable to spawn child."); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - - /* - * JJH NOTE: Restart the process by replacing this process - */ - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: About to invoke command: %s with argv: %s %s", - MTCP_RESTART_COMMAND, - MTCP_RESTART_COMMAND, - snapshot->context_filename); - - exec_status = execlp(MTCP_RESTART_COMMAND, MTCP_RESTART_COMMAND, snapshot->context_filename, NULL); - - /* If we get down here, something has broken. */ - - if(exec_status < 0) - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: error in replacing process: %s", - strerror(errno)); - else - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_restart: exec() returned!"); - - exit_status = OPAL_ERROR; - goto cleanup; - - cleanup: - return exit_status; -} - -int opal_crs_dmtcp_disable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: disable_checkpoint()"); - - /* - * JJH NOTE: Enter a critical section. This is not really used in the code - * at the moment. - */ - mtcp_no(); - - return OPAL_SUCCESS; -} - -int opal_crs_dmtcp_enable_checkpoint(void) -{ - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: enable_checkpoint()"); - /* - * JJH NOTE: Leave a critical section. This is not really used in the code - * at the moment. - */ - mtcp_ok(); - - return OPAL_SUCCESS; -} - -/***************************** - * Local Function Definitions - *****************************/ -static int dmtcp_cold_start(opal_crs_dmtcp_snapshot_t *snapshot) { - int ret, exit_status = OPAL_SUCCESS; - char **tmp_argv = NULL; - char * component_name = NULL; - int prev_pid; - - /* - * Find the snapshot directory, read the metadata file for - * component name and previous pid - */ - if( NULL == snapshot->super.metadata ) { - if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "r")) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_cold_start(): Error: Unable to open the file (%s)", - snapshot->super.metadata_filename); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.metadata, - &component_name, &prev_pid) ) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_cold_start: Error: Failed to extract the metadata from the local snapshot (%s). Returned %d.", - snapshot->super.metadata_filename, ret); - exit_status = ret; - goto cleanup; - } - - snapshot->super.component_name = strdup(component_name); - - /* - * Compare the component strings to make sure this is our snapshot before going further. - * JJH NOTE: This will nearly always be true since opal-restart also checks this metadata. - */ - if ( 0 != strncmp(mca_crs_dmtcp_component.super.base_version.mca_component_name, - component_name, strlen(component_name)) ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_cold_start: Error: This snapshot (%s) is not intended for us (%s)\n", - component_name, mca_crs_dmtcp_component.super.base_version.mca_component_name); - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* - * Read context information from the metadata file - */ - opal_crs_base_metadata_read_token(snapshot->super.metadata, CRS_METADATA_CONTEXT, &tmp_argv); - if( NULL == tmp_argv ) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: dmtcp_cold_start: Error: Failed to read the %s token from the local checkpoint in %s", - CRS_METADATA_CONTEXT, snapshot->super.snapshot_directory); - exit_status = OPAL_ERROR; - goto cleanup; - } - - asprintf(&(snapshot->context_filename), "%s/%s", snapshot->super.snapshot_directory, tmp_argv[0]); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: cold_start(%s)", snapshot->context_filename); - - /* - * Reset the cold_start flag - */ - snapshot->super.cold_start = false; - - cleanup: - if(NULL != tmp_argv) { - opal_argv_free(tmp_argv); - tmp_argv = NULL; - } - - if( NULL != snapshot->super.metadata ) { - fclose(snapshot->super.metadata); - snapshot->super.metadata = NULL; - } - - return exit_status; -} - -/** - * Given a snapshot, generate the context filename and its full path. - * - * @param snapshot the snapshot with request information - */ -static int dmtcp_generate_full_ckpt_path(opal_crs_dmtcp_snapshot_t *snapshot) -{ - int retval; - retval = asprintf(&(snapshot->context_filename), "ompi_dmtcp_context.%ld", syscall(SYS_getpid)); - if(retval == -1) - return -1; - - return asprintf(&full_ckpt_path, "%s/%s", snapshot->super.snapshot_directory, snapshot->context_filename); -} - -/** - * This is a callback function to call the actual checkpointing routine. - * Instead of waiting for a specific interval as MTCP does, we will wait on a - * synchronization signal that will allow us to checkpoint on demand. The - * argument to this function will be ignored. - */ -static void dmtcp_sleep_between_ckpt_callback(int interval) -{ - int signal_retval; - char buf[BUFSIZ]; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: called sleep_between_ckpt callback"); - - pthread_mutex_lock(&checkpoint_mutex); - - /* If the MPI checkpoint thread is waiting on the checkpoint_done_cond and - * this thread is here, it means that a checkpoint has just completed. - * Let's signal the MPI checkpoint thread to resume. */ - signal_retval = pthread_cond_signal(&checkpoint_done_cond); - - if( 0 != signal_retval) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: post_ckpt_callback(): Unable to signal checkpoint done: %s", - strerror_r(signal_retval, buf, BUFSIZ)); - } - - /* now we simply wait for the signal to checkpoint */ - pthread_cond_wait(&checkpoint_cond, &checkpoint_mutex); - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: received sync signal to checkpoint."); - - /* We have now been instructed to checkpoint, so we return. Note that the - * mutex is still locked: the post_ckpt callback will unlock it. */ -} - -/** - * This is a callback function that is invoked before the checkpoint actually - * occurs. It enables us to do any logging that is necessary, as well as change - * the filename that the checkpoint will be written to. We expect that this - * filename will be pulled from the checkpoint options. - * - * @param ckpt_filename a pointer in which to store the desired checkpoint - * filename - */ -static void dmtcp_pre_ckpt_callback(char **ckpt_filename) -{ - *ckpt_filename = full_ckpt_path; -} - -/** - * This is a callback function that is invoked after the checkpoint has - * finished. It enables us to do any logging that is necessary, as well as - * report whether this is called from a restart or a checkpoint. We will report - * this status, signal the CRS code to continue running, and then release the - * mutex that we are holding. - * - * @param is_restarting whether or not this is being called as part of a restart - * @param mtcp_restore_argv_start_addr unused - */ -static void dmtcp_post_ckpt_callback(int is_restarting, char *mtcp_restore_argv_start_addr) -{ - int unlock_retval; - char buf[BUFSIZ]; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: in post_ckpt_callback, restarting: %d", is_restarting); - if(is_restarting) - post_ckpt_state = OPAL_CRS_RESTART; - else - post_ckpt_state = OPAL_CRS_CONTINUE; - - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: unlocking at end of post_ckpt_callback"); - - unlock_retval = pthread_mutex_unlock(&checkpoint_mutex); - - if( 0 != unlock_retval) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: post_ckpt_callback(): Unable to unlock mutex: %s", - strerror_r(unlock_retval, buf, BUFSIZ)); - } -} - -/** - * This is a callback function that is invoked by DMTCP to see if it should - * checkpoint the given file descriptor. - * - * If the file descriptor is a socket, named-pipe or pseudo-terminal, DMTCP - * should skip checkpointing them. - * - * If we can't determine the type of fd (stat and/or readlink failed), we ask - * DMTCP to try to checkpoint them anyways with the assumption that DMTCP would - * warn users of any such case. - * - * @param fd file descriptor to checkpoint - * @return: 1 if DMTCP should ckpt the file descriptor, 0 otherwise. - */ -static int dmtcp_should_ckpt_fd_callback(int fd) -{ - struct stat stat_buf; - char device_name[PATH_MAX]; - char proc_filename[64]; - char buf[BUFSIZ]; - - if (fstat(fd, &stat_buf) != 0) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: should_ckpt_fd_callback(): error stat()'ing %d: %s", - fd, strerror_r(errno, buf, BUFSIZ)); - return 1; - /* Don't checkpoint sockets and FIFOs */ - } else if (S_ISSOCK(stat_buf.st_mode) || S_ISFIFO(stat_buf.st_mode)) { - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: skipping checkpointing socket/fifo: %d", - fd); - return 0; - } - - memset(device_name, 0, sizeof device_name); - sprintf(proc_filename, "/proc/self/fd/%d", fd); - if (readlink(proc_filename, device_name, sizeof(device_name) - 1) <= 0) { - opal_output(mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: should_ckpt_fd_callback(): readlink(%d) failed: %s", - fd, strerror_r(errno, buf, BUFSIZ)); - return 1; - } - - /* Don't checkpoint ptys */ - if (strstr(device_name, "/dev/pts/") == 0 || - strstr(device_name, "/dev/pty") == 0 || - strstr(device_name, "/dev/tty") == 0) { - opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle, - "crs:dmtcp: skipping checkpointing %s", - device_name); - return 0; - } - - /* Checkpoint fd by default */ - return 1; -} diff --git a/opal/mca/crs/dmtcp/owner.txt b/opal/mca/crs/dmtcp/owner.txt deleted file mode 100644 index ed1d89a44ab..00000000000 --- a/opal/mca/crs/dmtcp/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: U Brit.Columbia -status: unmaintained diff --git a/opal/mca/event/configure.m4 b/opal/mca/event/configure.m4 index f8040446009..5fde182b4cf 100644 --- a/opal/mca/event/configure.m4 +++ b/opal/mca/event/configure.m4 @@ -1,6 +1,6 @@ dnl -*- shell-script -*- dnl -dnl Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2010-2018 Cisco Systems, Inc. All rights reserved dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -8,21 +8,52 @@ dnl dnl $HEADER$ dnl -# There will only be one component used in this framework, and it will -# be selected at configure time by priority. Components must set -# their priorities in their configure.m4 files. They must also set -# the shell variable $opal_event_base_include to a header file name -# (relative to opal/mca/event) that will be included in -# opal/mca/event/event.h. +dnl There will only be one component used in this framework, and it will +dnl be selected at configure time by priority. Components must set +dnl their priorities in their configure.m4 files. They must also set +dnl the shell variable $opal_event_base_include to a header file name +dnl (relative to opal/mca/event) that will be included in +dnl opal/mca/event/event.h. dnl We only want one winning component (vs. STOP_AT_FIRST_PRIORITY, dnl which will allow all components of the same priority who succeed to dnl win) + m4_define(MCA_opal_event_CONFIGURE_MODE, STOP_AT_FIRST) +dnl +dnl Setup --with-libevent and --with-libevent-libdir +dnl +AC_DEFUN([MCA_opal_event_SETUP],[ + AC_ARG_WITH([libevent], + [AC_HELP_STRING([--with-libevent=DIR], + [Search for libevent headers and libraries in DIR. Should only be used if an external copy of libevent is being used.])]) + + # Bozo check + AS_IF([test "$with_libevent" = "no"], + [AC_MSG_WARN([It is not possible to configure Open MPI --without-libevent]) + AC_MSG_ERROR([Cannot continue])]) + AS_IF([test "$with_libevent" = "yes"], + [with_libevent=]) + + AC_ARG_WITH([libevent-libdir], + [AC_HELP_STRING([--with-libevent-libdir=DIR], + [Search for libevent libraries in DIR. Should only be used if an external copy of libevent is being used.])]) + + # Make sure the user didn't specify --with-libevent=internal and + # --with-libevent-libdir=whatever (because you can only specify + # --with-libevent-libdir when external libevent is being used). + AS_IF([test "$with_libevent" = "internal" && test -n "$with_libevent_libdir"], + [AC_MSG_WARN([Both --with-libevent=internal and --with-libevent-libdir=DIR]) + AC_MSG_WARN([were specified, which does not make sense.]) + AC_MSG_ERROR([Cannot continue])]) +]) + AC_DEFUN([MCA_opal_event_CONFIG],[ opal_event_base_include= + MCA_opal_event_SETUP + # configure all the components MCA_CONFIGURE_FRAMEWORK($1, $2, 1) diff --git a/opal/mca/event/external/configure.m4 b/opal/mca/event/external/configure.m4 index 498af38b405..15313db50a3 100644 --- a/opal/mca/event/external/configure.m4 +++ b/opal/mca/event/external/configure.m4 @@ -1,11 +1,11 @@ # -*- shell-script -*- # -# Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2015-2017 Research Organization for Information Science +# Copyright (c) 2015-2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -56,78 +56,57 @@ AC_DEFUN([MCA_opal_event_external_POST_CONFIG],[ AC_DEFUN([MCA_opal_event_external_CONFIG],[ AC_CONFIG_FILES([opal/mca/event/external/Makefile]) - OPAL_VAR_SCOPE_PUSH([opal_event_external_CPPFLAGS_save opal_event_external_CFLAGS_save opal_event_external_LDFLAGS_save opal_event_external_LIBS_save]) - - AC_ARG_WITH([libevent], - [AC_HELP_STRING([--with-libevent=DIR], - [Search for libevent headers and libraries in DIR. Should only be used if an external copy of libevent is being used.])]) - - # Bozo check - AS_IF([test "$with_libevent" = "no"], - [AC_MSG_WARN([It is not possible to configure Open MPI --without-libevent]) - AC_MSG_ERROR([Cannot continue])]) - - AC_ARG_WITH([libevent-libdir], - [AC_HELP_STRING([--with-libevent-libdir=DIR], - [Search for libevent libraries in DIR. Should only be used if an external copy of libevent is being used.])]) - - # Make sure the user didn't specify --with-libevent=internal and - # --with-libevent-libdir=whatever (because you can only specify - # --with-libevent-libdir when external libevent is being used). - AS_IF([test "$with_libevent" = "internal" && test -n "$with_libevent_libdir"], - [AC_MSG_WARN([Both --with-libevent=internal and --with-libevent-libdir=DIR]) - AC_MSG_WARN([were specified, which does not make sense.]) - AC_MSG_ERROR([Cannot continue])]) - - # Do we want this external component? (slightly redundant logic, - # but hopefully slightly more clear...) - opal_event_external_want=no - AS_IF([test "$with_libevent" = "external"], [opal_event_external_want=yes]) - AS_IF([test -n "$with_libevent_libdir"], [opal_event_external_want=yes]) - AS_IF([test -n "$with_libevent" && test "$with_libevent" != "no" && test "$with_libevent" != "internal"], [opal_event_external_want=yes]) - - # If we want external support, try it - AS_IF([test "$opal_event_external_want" = "yes"], - [ # Error out if the specified dir does not exist - OPAL_CHECK_WITHDIR([libevent-libdir], [$with_libevent_libdir], - [libevent.*]) - - AC_MSG_CHECKING([for external libevent in]) - AS_IF([test "$with_libevent" != "external" && test "$with_libevent" != "yes"], - [opal_event_dir=$with_libevent - AC_MSG_RESULT([$opal_event_dir]) - OPAL_CHECK_WITHDIR([libevent], [$opal_event_dir], - [include/event.h]) - AS_IF([test -z "$with_libevent_libdir" || test "$with_libevent_libdir" = "yes"], - [AC_MSG_CHECKING([for $with_libevent/lib64]) - AS_IF([test -d "$with_libevent/lib64"], - [opal_event_libdir_found=yes - AC_MSG_RESULT([found])], - [opal_event_libdir_found=no - AC_MSG_RESULT([not found])]) - AS_IF([test "$opal_event_libdir_found" = "yes"], - [opal_event_libdir="$with_libevent/lib64"], - [AC_MSG_CHECKING([for $with_libevent/lib]) - AS_IF([test -d "$with_libevent/lib"], - [AC_MSG_RESULT([found]) - opal_event_libdir="$with_libevent/lib"], - [AC_MSG_RESULT([not found]) - AC_MSG_WARN([Library directories were not found:]) - AC_MSG_WARN([ $with_libevent/lib64]) - AC_MSG_WARN([ $with_libevent/lib]) - AC_MSG_WARN([Please use --with-libevent-libdir to identify it.]) - AC_MSG_ERROR([Cannot continue])])])])], - [AC_MSG_RESULT([(default search paths)])]) - AS_IF([test ! -z "$with_libevent_libdir" && test "$with_libevent_libdir" != "yes"], - [opal_event_libdir="$with_libevent_libdir"]) - - opal_event_external_CPPFLAGS_save=$CPPFLAGS + OPAL_VAR_SCOPE_PUSH([opal_event_external_CPPFLAGS_save opal_event_external_CFLAGS_save opal_event_external_LDFLAGS_save opal_event_external_LIBS_save opal_event_dir opal_event_summary_msg]) + + opal_event_summary_msg="internal" + + # Check the value of $with_libevent_libdir. This macro safely + # handles "yes", "no", blank, and directory name values. + OPAL_CHECK_WITHDIR([libevent-libdir], [$with_libevent_libdir], + [libevent.*]) + + # Did the user want us to check for libevent in a specific location? + AC_MSG_CHECKING([for external libevent in]) + AS_IF([test -n "$with_libevent" && \ + test "$with_libevent" != "external" && \ + test "$with_libevent" != "internal" && \ + test "$with_libevent" != "yes" && \ + test "$with_libevent" != "no"], + [opal_event_dir=$with_libevent + AC_MSG_RESULT([$opal_event_dir]) + OPAL_CHECK_WITHDIR([libevent], [$opal_event_dir], + [include/event2/event.h]) + AS_IF([test -z "$with_libevent_libdir" || test "$with_libevent_libdir" = "yes"], + [AC_MSG_CHECKING([for $with_libevent/lib64]) + AS_IF([test -d "$with_libevent/lib64"], + [opal_event_libdir_found=yes + AC_MSG_RESULT([found])], + [opal_event_libdir_found=no + AC_MSG_RESULT([not found])]) + AS_IF([test "$opal_event_libdir_found" = "yes"], + [opal_event_libdir="$with_libevent/lib64"], + [AC_MSG_CHECKING([for $with_libevent/lib]) + AS_IF([test -d "$with_libevent/lib"], + [AC_MSG_RESULT([found]) + opal_event_libdir="$with_libevent/lib"], + [AC_MSG_RESULT([not found]) + AC_MSG_WARN([Library directories were not found:]) + AC_MSG_WARN([ $with_libevent/lib64]) + AC_MSG_WARN([ $with_libevent/lib]) + AC_MSG_WARN([Please use --with-libevent-libdir to identify it.]) + AC_MSG_ERROR([Cannot continue])])])])], + [AC_MSG_RESULT([(default search paths)])]) + AS_IF([test ! -z "$with_libevent_libdir" && test "$with_libevent_libdir" != "yes"], + [opal_event_libdir="$with_libevent_libdir"]) + + AS_IF([test "$with_libevent" != "internal"], + [opal_event_external_CPPFLAGS_save=$CPPFLAGS opal_event_external_CFLAGS_save=$CFLAGS opal_event_external_LDFLAGS_save=$LDFLAGS opal_event_external_LIBS_save=$LIBS OPAL_CHECK_PACKAGE([opal_event_external], - [event.h], + [event2/event.h], [event], [event_config_new], [-levent_pthreads], @@ -136,23 +115,44 @@ AC_DEFUN([MCA_opal_event_external_CONFIG],[ [opal_event_external_support=yes], [opal_event_external_support=no]) - # Ensure that this libevent has the symbol - # "evthread_set_lock_callbacks", which will only exist if - # libevent was configured with thread support. - LIBS="$opal_event_external_LDFLAGS $LIBS" - AC_CHECK_LIB([event], [evthread_set_lock_callbacks], - [], - [AC_MSG_WARN([External libevent does not have thread support]) - AC_MSG_WARN([Open MPI requires libevent to be compiled with]) - AC_MSG_WARN([thread support enabled]) - AC_MSG_ERROR([Cannot continue])]) - - AC_CHECK_LIB([event_pthreads], [evthread_use_pthreads], - [], - [AC_MSG_WARN([External libevent does not have thread support]) - AC_MSG_WARN([Open MPI requires libevent to be compiled with]) - AC_MSG_WARN([thread support enabled]) - AC_MSG_ERROR([Cannot continue])]) + AS_IF([test "$opal_event_external_support" = "yes"], + [# Ensure that this libevent has the symbol + # "evthread_set_lock_callbacks", which will only exist if + # libevent was configured with thread support. + LIBS="$opal_event_external_LDFLAGS $LIBS" + AC_CHECK_LIB([event], [evthread_set_lock_callbacks], + [], + [AC_MSG_WARN([External libevent does not have thread support]) + AC_MSG_WARN([Open MPI requires libevent to be compiled with]) + AC_MSG_WARN([thread support enabled]) + opal_event_external_support=no])]) + + AS_IF([test "$opal_event_external_support" = "yes"], + [AC_CHECK_LIB([event_pthreads], [evthread_use_pthreads], + [], + [AC_MSG_WARN([External libevent does not have thread support]) + AC_MSG_WARN([Open MPI requires libevent to be compiled with]) + AC_MSG_WARN([thread support enabled]) + opal_event_external_support=no])]) + + AS_IF([test "$opal_event_external_support" = "yes"], + [AS_IF([test -z "$with_libevent" || test "$with_libevent" = "yes"], + [AC_MSG_CHECKING([if external libevent version is 2.0.22 or greater]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if defined(_EVENT_NUMERIC_VERSION) && _EVENT_NUMERIC_VERSION < 0x02001600 +#error "libevent API version is less than 0x02001600" +#elif defined(EVENT__NUMERIC_VERSION) && EVENT__NUMERIC_VERSION < 0x02001600 +#error "libevent API version is less than 0x02001600" +#endif + ]])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + opal_event_summary_msg="internal (external libevent version is less that internal version 2.0.22)" + AC_MSG_WARN([external libevent version is less than internal version (2.0.22)]) + AC_MSG_WARN([using internal libevent]) + opal_event_external_support=no])])]) CPPFLAGS=$opal_event_external_CPPFLAGS_save CFLAGS=$opal_event_external_CFLAGS_save @@ -171,20 +171,27 @@ AC_DEFUN([MCA_opal_event_external_CONFIG],[ # building with developer headers so that our headers can # be found. event_external_WRAPPER_EXTRA_LDFLAGS=$opal_event_external_LDFLAGS - event_external_WRAPPER_EXTRA_LIBS=$opal_event_external_LIBS - ]) + event_external_WRAPPER_EXTRA_LIBS=$opal_event_external_LIBS]) + +################################################################## # Done! AS_IF([test "$opal_event_external_support" = "yes"], - [ # If we configured successfully, set + [# If we configured successfully, set # OPAL_HAVE_WORKING_EVENTOPS to 1 (it's a calculated value # in the embedded Open MPI libevent, so we can only assume # what it is in the installed libevent :-\ ). file=$opal_event_dir/include/libevent/config.h OPAL_HAVE_WORKING_EVENTOPS=1 + opal_event_summary_msg="external" $1], [OPAL_HAVE_WORKING_EVENTOPS=0 + AS_IF([test "$with_libevent" != internal && test -n "$with_libevent"], + [AC_MSG_WARN([external libevent requested but cannot be built]) + AC_MSG_ERROR([Cannot continue.])]) $2]) + OPAL_SUMMARY_ADD([[Miscellaneous]],[[Libevent support]], [], [$opal_event_summary_msg]) + OPAL_VAR_SCOPE_POP ])dnl diff --git a/opal/mca/event/libevent2022/configure.m4 b/opal/mca/event/libevent2022/configure.m4 index 81ffb3ca82b..244fcc8ba4b 100644 --- a/opal/mca/event/libevent2022/configure.m4 +++ b/opal/mca/event/libevent2022/configure.m4 @@ -1,6 +1,6 @@ # -*- shell-script -*- # -# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved. # Copyright (c) 2015-2016 Research Organization for Information Science @@ -14,9 +14,9 @@ # AC_DEFUN([MCA_opal_event_libevent2022_PRIORITY], [80]) -# -# Force this component to compile in static-only mode -# +dnl +dnl Force this component to compile in static-only mode +dnl AC_DEFUN([MCA_opal_event_libevent2022_COMPILE_MODE], [ AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) $4="static" @@ -24,7 +24,8 @@ AC_DEFUN([MCA_opal_event_libevent2022_COMPILE_MODE], [ ]) AC_DEFUN([MCA_opal_event_libevent2022_POST_CONFIG], [ - AM_CONDITIONAL(OPAL_EVENT_HAVE_THREAD_SUPPORT, test "$enable_event_thread_support" = "yes") + AM_CONDITIONAL(OPAL_EVENT_HAVE_THREAD_SUPPORT, + [test "$enable_event_thread_support" = "yes"]) AS_IF([test "$1" = "1"], [ # Build libevent/include/event2/event-config.h. If we # don't do it here, then libevent's Makefile.am will build @@ -34,8 +35,8 @@ AC_DEFUN([MCA_opal_event_libevent2022_POST_CONFIG], [ # copied from libevent's Makefile.am. AC_CONFIG_COMMANDS([opal/mca/event/libevent2022/libevent/include/event2/event-config.h], - [libevent_basedir="opal/mca/event/libevent2022" - libevent_file="$libevent_basedir/libevent/include/event2/event-config.h" + [opal_event_libevent2022_basedir="opal/mca/event/libevent2022" + libevent_file="$opal_event_libevent2022_basedir/libevent/include/event2/event-config.h" rm -f "$libevent_file.new" cat > "$libevent_file.new" <> "$libevent_file.new" + -e 's/#ifndef /#ifndef _EVENT_/' < "$opal_event_libevent2022_basedir/libevent/config.h" >> "$libevent_file.new" echo "#endif" >> "$libevent_file.new" # Only make a new .h libevent_file if the @@ -74,7 +75,7 @@ EOF # Add some stuff to CPPFLAGS so that the rest of the source # tree can be built - libevent_file=$libevent_basedir/libevent + libevent_file=$opal_event_libevent2022_basedir/libevent CPPFLAGS="-I$OPAL_TOP_SRCDIR/$libevent_file -I$OPAL_TOP_SRCDIR/$libevent_file/include $CPPFLAGS" AS_IF([test "$OPAL_TOP_BUILDDIR" != "$OPAL_TOP_SRCDIR"], [CPPFLAGS="-I$OPAL_TOP_BUILDDIR/$libevent_file/include $CPPFLAGS"]) @@ -82,14 +83,51 @@ EOF ]) ]) -# MCA_event_libevent2022_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) -# ------------------------------------------------ +dnl MCA_event_libevent2022_CONFIG([action-if-can-compile], +dnl [action-if-cant-compile]) +dnl ------------------------------------------------ AC_DEFUN([MCA_opal_event_libevent2022_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([CFLAGS_save CPPFLAGS_save libevent_file event_args libevent_happy]) - AC_CONFIG_FILES([opal/mca/event/libevent2022/Makefile]) - libevent_basedir="opal/mca/event/libevent2022" + opal_event_libevent2022_basedir="opal/mca/event/libevent2022" + + # We know that the external event component will be configured + # before this one because of its priority. This component is only + # needed if the external component was not successful in selecting + # itself. + AC_MSG_CHECKING([if event external component succeeded]) + AS_IF([test "$opal_event_external_support" = "yes"], + [AC_MSG_RESULT([yes]) + AC_MSG_NOTICE([event:external succeeded, so this component will be configured, but then will be skipped]) + MCA_opal_event_libevent2022_FAKE_CONFIG($2)], + [AC_MSG_RESULT([no]) + AC_MSG_NOTICE([event:external failed, so this component will be used]) + MCA_opal_event_libevent2022_REAL_CONFIG($1, $2)]) +]) + +dnl +dnl This macro is invoked when event:external is going to be used (and +dnl this component is *not* going to be used). +dnl +dnl $1: action if this component can compile +dnl (we still invoke $1 so that "make distclean" and friends will work) +dnl +AC_DEFUN([MCA_opal_event_libevent2022_FAKE_CONFIG],[ + MCA_opal_event_libevent2022_SUB_CONFIGURE([], [], []) + AC_MSG_NOTICE([remember: event:external will be used; this component was configured, but will be skipped]) + $1 +]) + +dnl +dnl This macro has a bunch of side effects. It is only meant to be +dnl invoked when this component is going to be used (i.e., when +dnl event:external is *not* going to be used). If this macro is invoked +dnl when event:external is used, Terrible Things will happen. +dnl +dnl $1: action if this component can compile +dnl $2: action if this component cannot compile +dnl +AC_DEFUN([MCA_opal_event_libevent2022_REAL_CONFIG],[ + OPAL_VAR_SCOPE_PUSH([CFLAGS_save CPPFLAGS_save libevent_file event_args libevent_happy]) CFLAGS_save="$CFLAGS" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS" @@ -159,14 +197,14 @@ AC_DEFUN([MCA_opal_event_libevent2022_CONFIG],[ AC_MSG_RESULT([$event_args]) + # Invoke the embedded configure script. # We define "random" to be "opal_random" so that Libevent will not # use random(3) internally (and potentially unexpectedly perturb # values returned by rand(3) to the application). - CPPFLAGS="$CPPFLAGS -Drandom=opal_random" - OPAL_CONFIG_SUBDIR([$libevent_basedir/libevent], - [$event_args $opal_subdir_args 'CPPFLAGS=$CPPFLAGS'], - [libevent_happy="yes"], [libevent_happy="no"]) + MCA_opal_event_libevent2022_SUB_CONFIGURE([$event_args], + [libevent_happy="yes"], + [libevent_happy="no"]) if test "$libevent_happy" = "no"; then AC_MSG_WARN([Event library failed to configure]) AC_MSG_ERROR([Cannot continue]) @@ -184,26 +222,34 @@ AC_DEFUN([MCA_opal_event_libevent2022_CONFIG],[ # the value in the generated libevent/config.h (NOT # libevent/include/event2/event-config.h!). Otherwise, set it to # 0. - libevent_file=$libevent_basedir/libevent/config.h - - # If we are not building the internal libevent, then indicate that - # this component should not be built. NOTE: we still did all the - # above configury so that all the proper GNU Autotools - # infrastructure is setup properly (e.g., w.r.t. SUBDIRS=libevent in - # this directory's Makefile.am, we still need the Autotools "make - # distclean" infrastructure to work properly). - - AS_IF([test "$with_libevent" != "internal" && test -n "$with_libevent" && test "$with_libevent" != "yes"], - [AC_MSG_WARN([using an external libevent; disqualifying this component]) - libevent_happy=no], - - [AS_IF([test "$libevent_happy" = "yes" && test -r $libevent_file], - [OPAL_HAVE_WORKING_EVENTOPS=`grep HAVE_WORKING_EVENTOPS $libevent_file | awk '{print [$]3 }'` - $1], - [$2 - OPAL_HAVE_WORKING_EVENTOPS=0]) - ] - ) + libevent_file=$opal_event_libevent2022_basedir/libevent/config.h + + AS_IF([test "$libevent_happy" = "yes" && test -r $libevent_file], + [OPAL_HAVE_WORKING_EVENTOPS=`grep HAVE_WORKING_EVENTOPS $libevent_file | awk '{print [$]3 }'` + $1], + [$2 + OPAL_HAVE_WORKING_EVENTOPS=0]) OPAL_VAR_SCOPE_POP ]) + +dnl Call configure in the embedded libevent. +dnl +dnl We still do this so that all the proper GNU Autotools +dnl infrastructure is setup properly (e.g., w.r.t. SUBDIRS=libevent in +dnl this directorys Makefile.am, we still need the Autotools "make +dnl distclean" infrastructure to work properly). +dnl +dnl $1: extra configure arguments +dnl $2: action on success +dnl $3: action on failure +dnl +AC_DEFUN([MCA_opal_event_libevent2022_SUB_CONFIGURE],[ + # We define "random" to be "opal_random" so that Libevent will not + # use random(3) internally (and potentially unexpectedly perturb + # values returned by rand(3) to the application). + + OPAL_CONFIG_SUBDIR([$opal_event_libevent2022_basedir/libevent], + [$1 $opal_subdir_args 'CPPFLAGS=$CPPFLAGS'], + [$2], [$3]) +]) diff --git a/opal/mca/hwloc/base/hwloc_base_dt.c b/opal/mca/hwloc/base/hwloc_base_dt.c index 0840ee13f11..c0e24d44de3 100644 --- a/opal/mca/hwloc/base/hwloc_base_dt.c +++ b/opal/mca/hwloc/base/hwloc_base_dt.c @@ -96,7 +96,7 @@ int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest, free(xmlbuffer); goto cleanup; } - if (0 != hwloc_topology_set_xmlbuffer(t, xmlbuffer, strlen(xmlbuffer))) { + if (0 != hwloc_topology_set_xmlbuffer(t, xmlbuffer, strlen(xmlbuffer)+1)) { rc = OPAL_ERROR; free(xmlbuffer); hwloc_topology_destroy(t); diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index f0a4f14cc46..ba26ba0ac6d 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -16,6 +16,10 @@ * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (C) 2018 Mellanox Technologies, Ltd. + * All rights reserved. + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -362,7 +366,7 @@ int opal_hwloc_base_get_topology(void) free(val); return OPAL_ERROR; } - if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) { + if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val)+1)) { free(val); hwloc_topology_destroy(opal_hwloc_topology); return OPAL_ERROR; @@ -1719,14 +1723,14 @@ int opal_hwloc_base_cset2str(char *str, int len, for (core_index = 0; core_index < num_cores; ++core_index) { if (map[socket_index][core_index] > 0) { if (!first) { - strncat(str, ", ", len - strlen(str)); + strncat(str, ", ", len - strlen(str) - 1); } first = false; snprintf(tmp, stmp, "socket %d[core %d[hwt %s]]", socket_index, core_index, bitmap2rangestr(map[socket_index][core_index])); - strncat(str, tmp, len - strlen(str)); + strncat(str, tmp, len - strlen(str) - 1); } } } @@ -1782,7 +1786,7 @@ int opal_hwloc_base_cset2mapstr(char *str, int len, for (socket = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0); NULL != socket; socket = socket->next_cousin) { - strncat(str, "[", len - strlen(str)); + strncat(str, "[", len - strlen(str) - 1); /* Iterate over all existing cores in this socket */ core_index = 0; @@ -1794,7 +1798,7 @@ int opal_hwloc_base_cset2mapstr(char *str, int len, socket->cpuset, HWLOC_OBJ_CORE, ++core_index)) { if (core_index > 0) { - strncat(str, "/", len - strlen(str)); + strncat(str, "/", len - strlen(str) - 1); } /* Iterate over all existing PUs in this core */ @@ -1809,13 +1813,13 @@ int opal_hwloc_base_cset2mapstr(char *str, int len, /* Is this PU in the cpuset? */ if (hwloc_bitmap_isset(cpuset, pu->os_index)) { - strncat(str, "B", len - strlen(str)); + strncat(str, "B", len - strlen(str) - 1); } else { - strncat(str, ".", len - strlen(str)); + strncat(str, ".", len - strlen(str) - 1); } } } - strncat(str, "]", len - strlen(str)); + strncat(str, "]", len - strlen(str) - 1); } return OPAL_SUCCESS; @@ -1857,9 +1861,18 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t * if (!strcmp(device_obj->name, device_name)) { /* find numa node containing this device */ obj = device_obj->parent; +#if HWLOC_API_VERSION < 0x20000 while ((obj != NULL) && (obj->type != HWLOC_OBJ_NODE)) { obj = obj->parent; } +#else + while (obj && !obj->memory_arity) { + obj = obj->parent; /* no memory child, walk up */ + } + if (obj != NULL) { + obj = obj->memory_first_child; + } +#endif if (obj == NULL) { opal_output_verbose(5, opal_hwloc_base_framework.framework_output, "hwloc:base:get_sorted_numa_list: NUMA node closest to %s wasn't found.", diff --git a/opal/mca/hwloc/external/configure.m4 b/opal/mca/hwloc/external/configure.m4 index d1b4a1380b7..dd7bce346c4 100644 --- a/opal/mca/hwloc/external/configure.m4 +++ b/opal/mca/hwloc/external/configure.m4 @@ -1,7 +1,7 @@ # -*- shell-script -*- # # Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2014-2017 Research Organization for Information Science +# Copyright (c) 2014-2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # # Copyright (c) 2018 Intel, Inc. All rights reserved. @@ -84,7 +84,7 @@ AC_DEFUN([MCA_opal_hwloc_external_POST_CONFIG],[ AC_DEFUN([MCA_opal_hwloc_external_CONFIG],[ AC_CONFIG_FILES([opal/mca/hwloc/external/Makefile]) - OPAL_VAR_SCOPE_PUSH([opal_hwloc_external_CPPFLAGS_save opal_hwloc_external_CFLAGS_save opal_hwloc_external_LDFLAGS_save opal_hwloc_external_LIBS_save opal_hwloc_external_want opal_hwloc_external_tmp opal_hwloc_external_lstopo]) + OPAL_VAR_SCOPE_PUSH([opal_hwloc_external_CPPFLAGS_save opal_hwloc_external_CFLAGS_save opal_hwloc_external_LDFLAGS_save opal_hwloc_external_LIBS_save opal_hwloc_external_tmp opal_hwloc_external_lstopo opal_hwloc_summary_msg]) AC_ARG_WITH([hwloc-libdir], [AC_HELP_STRING([--with-hwloc-libdir=DIR], @@ -98,19 +98,9 @@ AC_DEFUN([MCA_opal_hwloc_external_CONFIG],[ AC_MSG_WARN([were specified, which does not make sense.]) AC_MSG_ERROR([Cannot continue])]) - # Do we want this external component? (slightly redundant logic, - # but hopefully slightly more clear...) - opal_hwloc_external_want=no - AS_IF([test "$with_hwloc_libdir" != ""], [opal_hwloc_external_want=yes]) - AS_IF([test "$with_hwloc" = "external"], [opal_hwloc_external_want=yes]) - AS_IF([test "$with_hwloc" != "" && \ - test "$with_hwloc" != "no" && \ - test "$with_hwloc" != "internal" && \ - test "$with_hwloc" != "future"], [opal_hwloc_external_want=yes]) - AS_IF([test "$with_hwloc" = "no"], [opal_hwloc_external_want=no]) - - # If we still want external support, try it - AS_IF([test "$opal_hwloc_external_want" = "yes"], + opal_hwloc_summary_msg="internal" + # Try external support if needed + AS_IF([test "$with_hwloc" != "internal"], [OPAL_CHECK_WITHDIR([hwloc-libdir], [$with_hwloc_libdir], [libhwloc.*]) @@ -143,65 +133,88 @@ AC_DEFUN([MCA_opal_hwloc_external_CONFIG],[ [CPPFLAGS="$CPPFLAGS $opal_hwloc_external_CPPFLAGS" LDFLAGS="$LDFLAGS $opal_hwloc_external_LDFLAGS" LIBS="$LIBS $opal_hwloc_external_LIBS" - AC_CHECK_DECLS([HWLOC_OBJ_OSDEV_COPROC], [], [], [#include ]) - AC_CHECK_FUNCS([hwloc_topology_dup])]) - - CPPFLAGS=$opal_hwloc_external_CPPFLAGS_save - CFLAGS=$opal_hwloc_external_CFLAGS_save - LDFLAGS=$opal_hwloc_external_LDFLAGS_save - LIBS=$opal_hwloc_external_LIBS_save - ]) - - # Done! - AS_IF([test "$opal_hwloc_external_support" = "yes"], - [AC_DEFINE_UNQUOTED([HWLOC_EXTERNAL_HWLOC_VERSION], - [external], - [Version of hwloc]) - # See if the external hwloc supports XML - AC_MSG_CHECKING([if external hwloc supports XML]) - AS_IF([test "$opal_hwloc_dir" != ""], - [opal_hwloc_external_lstopo="$opal_hwloc_dir/bin/lstopo"], - [OPAL_WHICH(lstopo, opal_hwloc_external_lstopo)]) - opal_hwloc_external_tmp=`$opal_hwloc_external_lstopo --help | $GREP "Supported output file formats" | grep xml` - AS_IF([test "$opal_hwloc_external_tmp" = ""], - [opal_hwloc_external_enable_xml=0 - AC_MSG_RESULT([no])], - [opal_hwloc_external_enable_xml=1 - AC_MSG_RESULT([yes])]) - - AC_CHECK_HEADERS([infiniband/verbs.h]) - - AC_MSG_CHECKING([if external hwloc version is 1.5 or greater]) - AS_IF([test "$opal_hwloc_dir" != ""], - [opal_hwloc_external_CFLAGS_save=$CFLAGS - CFLAGS="-I$opal_hwloc_dir/include $opal_hwloc_external_CFLAGS_save"]) - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ + AC_MSG_CHECKING([if external hwloc version is 1.5 or greater]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ #if HWLOC_API_VERSION < 0x00010500 #error "hwloc API version is less than 0x00010500" #endif - ]])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - AC_MSG_ERROR([Cannot continue])]) + ]])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + opal_hwloc_external_support=no])]) - AS_IF([test "$opal_hwloc_dir" != ""], - [CFLAGS=$opal_hwloc_external_CFLAGS_save]) + # If external hwloc is not explicitly requested, check external version + # is not lower than the internal one + AS_IF([test "$opal_hwloc_external_support" = "yes"], + [AS_IF([test -z "$with_hwloc" || test "$with_hwloc" = "yes"], + [AC_MSG_CHECKING([if external hwloc version is 2.0 or greater]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[ +#if HWLOC_API_VERSION < 0x00020000 +#error "hwloc API version is less than 0x00020000" +#endif + ]])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + opal_hwloc_summary_msg="internal (external hlwoc version is less than internal version 2.0)" + AC_MSG_WARN([external hwloc version is less than internal version 2.0]) + AC_MSG_WARN([using internal hwloc]) + opal_hwloc_external_support=no])])]) - # These flags need to get passed to the wrapper compilers - # (this is unnecessary for the internal/embedded hwloc) + AS_IF([test "$opal_hwloc_external_support" = "yes"], + [AC_DEFINE_UNQUOTED([HWLOC_EXTERNAL_HWLOC_VERSION], + [external], + [Version of hwloc]) - # Finally, add some flags to the wrapper compiler if we're - # building with developer headers so that our headers can - # be found. - hwloc_external_WRAPPER_EXTRA_CPPFLAGS=$opal_hwloc_external_CPPFLAGS - hwloc_external_WRAPPER_EXTRA_LDFLAGS=$opal_hwloc_external_LDFLAGS - hwloc_external_WRAPPER_EXTRA_LIBS=$opal_hwloc_external_LIBS + AC_CHECK_DECLS([HWLOC_OBJ_OSDEV_COPROC], [], [], [#include ]) + AC_CHECK_FUNCS([hwloc_topology_dup]) + + # See if the external hwloc supports XML + AC_MSG_CHECKING([if external hwloc supports XML]) + AS_IF([test "$opal_hwloc_dir" != ""], + [opal_hwloc_external_lstopo="$opal_hwloc_dir/bin/lstopo"], + [OPAL_WHICH(lstopo, opal_hwloc_external_lstopo)]) + opal_hwloc_external_tmp=`$opal_hwloc_external_lstopo --help | $GREP "Supported output file formats" | grep xml` + AS_IF([test "$opal_hwloc_external_tmp" = ""], + [opal_hwloc_external_enable_xml=0 + AC_MSG_RESULT([no])], + [opal_hwloc_external_enable_xml=1 + AC_MSG_RESULT([yes])]) + + AC_CHECK_HEADERS([infiniband/verbs.h]) + + # These flags need to get passed to the wrapper compilers + # (this is unnecessary for the internal/embedded hwloc) + + # Finally, add some flags to the wrapper compiler if we're + # building with developer headers so that our headers can + # be found. + hwloc_external_WRAPPER_EXTRA_CPPFLAGS=$opal_hwloc_external_CPPFLAGS + hwloc_external_WRAPPER_EXTRA_LDFLAGS=$opal_hwloc_external_LDFLAGS + hwloc_external_WRAPPER_EXTRA_LIBS=$opal_hwloc_external_LIBS]) + + CPPFLAGS=$opal_hwloc_external_CPPFLAGS_save + CFLAGS=$opal_hwloc_external_CFLAGS_save + LDFLAGS=$opal_hwloc_external_LDFLAGS_save + LIBS=$opal_hwloc_external_LIBS_save + ]) + # Done! + AS_IF([test "$opal_hwloc_external_support" = "yes"], + [opal_hwloc_summary_msg="external" $1], - [$2]) + [# Abort is external hwloc was explicitly requested but cannot be built + AS_IF([test "$with_hwloc" != internal && + test -n "$with_hwloc"], + [AC_MSG_WARN([external hwloc cannot be built]) + AC_MSG_ERROR([Cannot continue.])]) + $2]) + + OPAL_SUMMARY_ADD([[Miscellaneous]],[[HWLOC support]], [], [$opal_hwloc_summary_msg]) AC_SUBST(opal_hwloc_external_LDFLAGS) AC_SUBST(opal_hwloc_external_LIBS) diff --git a/opal/mca/hwloc/external/external.h b/opal/mca/hwloc/external/external.h index 1428459755e..8a1a32068ba 100644 --- a/opal/mca/hwloc/external/external.h +++ b/opal/mca/hwloc/external/external.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2016 Research Organization for Information Science + * Copyright (c) 2011-2019 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2016-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. @@ -21,6 +21,23 @@ BEGIN_C_DECLS #include + +/* Top-level configure will always configure the embedded hwloc + * component, even if we already know that we'll be using an external + * hwloc (because of complicated reasons). A side-effect of this is + * that the embedded hwloc will AC_DEFINE HWLOC_VERSION (and friends) + * in opal_config.h. If the external hwloc defines a different value + * of HWLOC_VERSION (etc.), we'll get zillions of warnings about the + * two HWLOC_VERSION values not matching. Hence, we undefined all of + * them here (so that the external can define them to + * whatever it wants). */ + +#undef HWLOC_VERSION +#undef HWLOC_VERSION_MAJOR +#undef HWLOC_VERSION_MINOR +#undef HWLOC_VERSION_RELEASE +#undef HWLOC_VERSION_GREEK + #include MCA_hwloc_external_header /* If the including file requested it, also include the hwloc verbs diff --git a/opal/mca/hwloc/hwloc201/configure.m4 b/opal/mca/hwloc/hwloc201/configure.m4 index b6e60bc11e3..78b1b9856fb 100644 --- a/opal/mca/hwloc/hwloc201/configure.m4 +++ b/opal/mca/hwloc/hwloc201/configure.m4 @@ -1,8 +1,8 @@ # -*- shell-script -*- # -# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved # Copyright (c) 2014-2017 Intel, Inc. All rights reserved. -# Copyright (c) 2015-2017 Research Organization for Information Science +# Copyright (c) 2015-2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016 Los Alamos National Security, LLC. All rights # reserved. @@ -17,7 +17,7 @@ # # Priority # -AC_DEFUN([MCA_opal_hwloc_hwloc201_PRIORITY], [90]) +AC_DEFUN([MCA_opal_hwloc_hwloc201_PRIORITY], [80]) # # Force this component to compile in static-only mode @@ -76,24 +76,25 @@ AC_DEFUN([MCA_opal_hwloc_hwloc201_CONFIG],[ OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc201_save_CPPFLAGS opal_hwloc_hwloc201_save_LDFLAGS opal_hwloc_hwloc201_save_LIBS opal_hwloc_hwloc201_save_cairo opal_hwloc_hwloc201_save_xml opal_hwloc_hwloc201_save_mode opal_hwloc_hwloc201_basedir opal_hwloc_hwloc201_file opal_hwloc_hwloc201_save_cflags CPPFLAGS_save LIBS_save opal_hwloc_external]) + # We know that the external hwloc component will be configured + # before this one because of its priority. This component is only + # needed if the external component was not successful in selecting + # itself. Print out a message explaining this. + AC_MSG_CHECKING([if hwloc external component succeeded]) + AS_IF([test "$opal_hwloc_external_support" = "yes"], + [AC_MSG_RESULT([yes]) + AC_MSG_NOTICE([hwloc:external succeeded, so this component will be configured, but then will be skipped])], + [AC_MSG_RESULT([no]) + AC_MSG_NOTICE([hwloc:external failed, so this component will be used])]) + # default to this component not providing support opal_hwloc_hwloc201_basedir=opal/mca/hwloc/hwloc201 opal_hwloc_hwloc201_support=no - AS_IF([test "$with_hwloc" = "internal" || test -z "$with_hwloc" || test "$with_hwloc" = "yes"], - [opal_hwloc_external="no"], - [opal_hwloc_external="yes"]) - opal_hwloc_hwloc201_save_CPPFLAGS=$CPPFLAGS opal_hwloc_hwloc201_save_LDFLAGS=$LDFLAGS opal_hwloc_hwloc201_save_LIBS=$LIBS - # Run the hwloc configuration - if no external hwloc, then set the prefixi - # to minimize the chance that someone will use the internal symbols - AS_IF([test "$opal_hwloc_external" = "no" && - test "$with_hwloc" != "future"], - [HWLOC_SET_SYMBOL_PREFIX([opal_hwloc201_])]) - # save XML or graphical options opal_hwloc_hwloc201_save_cairo=$enable_cairo opal_hwloc_hwloc201_save_xml=$enable_xml @@ -136,6 +137,12 @@ AC_DEFUN([MCA_opal_hwloc_hwloc201_CONFIG],[ AS_IF([test -n "$opal_datatype_cuda_CPPFLAGS"], [CPPFLAGS="$CPPFLAGS $opal_datatype_cuda_CPPFLAGS"]) + # Only set the symbol prefix if this component is being used + # (i.e., if the external component is not being used). + AS_IF([test "$opal_hwloc_external_support" = "no"], + [HWLOC_SET_SYMBOL_PREFIX([opal_hwloc201_])]) + + # Do the bulk of the hwloc core setup HWLOC_SETUP_CORE([opal/mca/hwloc/hwloc201/hwloc], [AC_MSG_CHECKING([whether hwloc configure succeeded]) AC_MSG_RESULT([yes]) @@ -195,8 +202,8 @@ AC_DEFUN([MCA_opal_hwloc_hwloc201_CONFIG],[ # infrastructure is setup properly (e.g., w.r.t. SUBDIRS=hwloc in # this directory's Makefile.am, we still need the Autotools "make # distclean" infrastructure to work properly). - AS_IF([test "$opal_hwloc_external" = "yes"], - [AC_MSG_WARN([using an external hwloc; disqualifying this component]) + AS_IF([test "$opal_hwloc_external_support" = "yes"], + [AC_MSG_NOTICE([using an external hwloc; disqualifying this component]) opal_hwloc_hwloc201_support=no], [AC_DEFINE([HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC], [1]) AC_DEFINE([HAVE_HWLOC_TOPOLOGY_DUP], [1])]) diff --git a/opal/mca/memory/patcher/configure.m4 b/opal/mca/memory/patcher/configure.m4 index 6881ec69366..0c5d8553259 100644 --- a/opal/mca/memory/patcher/configure.m4 +++ b/opal/mca/memory/patcher/configure.m4 @@ -40,10 +40,6 @@ AC_DEFUN([MCA_opal_memory_patcher_CONFIG],[ AC_CHECK_HEADERS([linux/mman.h sys/syscall.h]) - AC_CHECK_DECLS([__mmap], [], [], [#include ]) - - AC_CHECK_FUNCS([__mmap]) - AC_CHECK_DECLS([__syscall], [], [], [#include ]) AC_CHECK_FUNCS([__syscall]) diff --git a/opal/mca/memory/patcher/memory_patcher_component.c b/opal/mca/memory/patcher/memory_patcher_component.c index bf676dbdca9..687d430fa36 100644 --- a/opal/mca/memory/patcher/memory_patcher_component.c +++ b/opal/mca/memory/patcher/memory_patcher_component.c @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2019 IBM Corporation. All rights reserved. * * $COPYRIGHT$ * @@ -48,6 +48,9 @@ #if defined(HAVE_LINUX_MMAN_H) #include #endif +#if defined(HAVE_SYS_IPC_H) +#include +#endif #include "memory_patcher.h" #undef opal_memory_changed @@ -104,15 +107,7 @@ opal_memory_patcher_component_t mca_memory_patcher_component = { * data. If this can be resolved the two levels can be joined. */ -/* - * The following block of code is #if 0'ed out because we do not need - * to intercept mmap() any more (mmap() only deals with memory - * protection; it does not invalidate any rcache entries for a given - * region). But if we do someday, this is the code that we'll need. - * It's a little non-trivial, so we might as well keep it (and #if 0 - * it out). - */ -#if 0 +#if defined (SYS_mmap) #if defined(HAVE___MMAP) && !HAVE_DECL___MMAP /* prototype for Apple's internal mmap function */ @@ -121,35 +116,30 @@ void *__mmap (void *start, size_t length, int prot, int flags, int fd, off_t off static void *(*original_mmap)(void *, size_t, int, int, int, off_t); -static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +static void *_intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { - OPAL_PATCHER_BEGIN; void *result = 0; - if (prot == PROT_NONE) { + if ((flags & MAP_FIXED) && (start != NULL)) { opal_mem_hooks_release_hook (start, length, true); } if (!original_mmap) { -#ifdef HAVE___MMAP - /* the darwin syscall returns an int not a long so call the underlying __mmap function */ - result = __mmap (start, length, prot, flags, fd, offset); -#else result = (void*)(intptr_t) memory_patcher_syscall(SYS_mmap, start, length, prot, flags, fd, offset); -#endif - - // I thought we had some issue in the past with the above line for IA32, - // like maybe syscall() wouldn't handle that many arguments. But just now - // I used gcc -m32 and it worked on a recent system. But there's a possibility - // that older ia32 systems may need some other code to make the above syscall. } else { result = original_mmap (start, length, prot, flags, fd, offset); } - OPAL_PATCHER_END; return result; } +static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +{ + OPAL_PATCHER_BEGIN; + void *result = _intercept_mmap (start, length, prot, flags, fd, offset); + OPAL_PATCHER_END; + return result; +} #endif #if defined (SYS_munmap) @@ -256,6 +246,9 @@ static int _intercept_madvise (void *start, size_t length, int advice) int result = 0; if (advice == MADV_DONTNEED || +#ifdef MADV_FREE + advice == MADV_FREE || +#endif #ifdef MADV_REMOVE advice == MADV_REMOVE || #endif @@ -341,7 +334,12 @@ static int intercept_brk (void *addr) #endif -#if defined(SYS_shmdt) && defined(__linux__) +#define HAS_SHMDT (defined(SYS_shmdt) || \ + (defined(IPCOP_shmdt) && defined(SYS_ipc))) +#define HAS_SHMAT (defined(SYS_shmat) || \ + (defined(IPCOP_shmat) && defined(SYS_ipc))) + +#if (HAS_SHMDT || HAS_SHMAT) && defined(__linux__) #include #include @@ -404,6 +402,68 @@ static size_t memory_patcher_get_shm_seg_size (const void *shmaddr) return seg_size; } +static size_t get_shm_size(int shmid) +{ + struct shmid_ds ds; + int ret; + + ret = shmctl(shmid, IPC_STAT, &ds); + if (ret < 0) { + return 0; + } + + return ds.shm_segsz; +} +#endif + +#if HAS_SHMAT && defined(__linux__) +static void *(*original_shmat)(int shmid, const void *shmaddr, int shmflg); + +static void *_intercept_shmat(int shmid, const void *shmaddr, int shmflg) +{ + void *result = 0; + + size_t size = get_shm_size(shmid); + + if ((shmflg & SHM_REMAP) && (shmaddr != NULL)) { +// I don't really know what REMAP combined with SHM_RND does, so I'll just +// guess it remaps all the way down to the lower attach_addr, and all the +// way up to the original shmaddr+size + uintptr_t attach_addr = (uintptr_t)shmaddr; + + if (shmflg & SHM_RND) { + attach_addr -= ((uintptr_t)shmaddr) % SHMLBA; + size += ((uintptr_t)shmaddr) % SHMLBA; + } + opal_mem_hooks_release_hook ((void*)attach_addr, size, false); + } + + if (!original_shmat) { +#if defined(SYS_shmat) + result = memory_patcher_syscall(SYS_shmat, shmid, shmaddr, shmflg); +#else // IPCOP_shmat + unsigned long ret; + ret = memory_patcher_syscall(SYS_ipc, IPCOP_shmat, + shmid, shmflg, &shmaddr, shmaddr); + result = (ret > -(unsigned long)SHMLBA) ? (void *)ret : (void *)shmaddr; +#endif + } else { + result = original_shmat (shmid, shmaddr, shmflg); + } + + return result; +} + +static void* intercept_shmat (int shmid, const void * shmaddr, int shmflg) +{ + OPAL_PATCHER_BEGIN; + void *result = _intercept_shmat (shmid, shmaddr, shmflg); + OPAL_PATCHER_END; + return result; +} +#endif + +#if HAS_SHMDT && defined(__linux__) static int (*original_shmdt) (const void *); static int _intercept_shmdt (const void *shmaddr) @@ -417,7 +477,11 @@ static int _intercept_shmdt (const void *shmaddr) if (original_shmdt) { result = original_shmdt (shmaddr); } else { +#if defined(SYS_shmdt) result = memory_patcher_syscall (SYS_shmdt, shmaddr); +#else // IPCOP_shmdt + result = memory_patcher_syscall(SYS_ipc, IPCOP_shmdt, 0, 0, 0, shmaddr); +#endif } return result; @@ -478,9 +542,7 @@ static int patcher_open (void) /* set memory hooks support level */ opal_mem_hooks_set_support (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT); -#if 0 - /* See above block to see why mmap() functionality is #if 0'ed - out */ +#if defined (SYS_mmap) rc = opal_patcher->patch_symbol ("mmap", (uintptr_t) intercept_mmap, (uintptr_t *) &original_mmap); if (OPAL_SUCCESS != rc) { return rc; @@ -508,7 +570,14 @@ static int patcher_open (void) } #endif -#if defined(SYS_shmdt) && defined(__linux__) +#if HAS_SHMAT && defined(__linux__) + rc = opal_patcher->patch_symbol ("shmat", (uintptr_t) intercept_shmat, (uintptr_t *) &original_shmat); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif + +#if HAS_SHMDT && defined(__linux__) rc = opal_patcher->patch_symbol ("shmdt", (uintptr_t) intercept_shmdt, (uintptr_t *) &original_shmdt); if (OPAL_SUCCESS != rc) { return rc; diff --git a/opal/mca/mpool/base/Makefile.am b/opal/mca/mpool/base/Makefile.am index 646444e231d..dd85a97fb6b 100644 --- a/opal/mca/mpool/base/Makefile.am +++ b/opal/mca/mpool/base/Makefile.am @@ -28,7 +28,8 @@ libmca_mpool_la_SOURCES += \ base/mpool_base_lookup.c \ base/mpool_base_alloc.c \ base/mpool_base_tree.c \ - base/mpool_base_default.c + base/mpool_base_default.c \ + base/mpool_base_basic.c dist_opaldata_DATA += \ base/help-mpool-base.txt diff --git a/opal/mca/mpool/base/base.h b/opal/mca/mpool/base/base.h index 88a99cad01a..6d95665bff7 100644 --- a/opal/mca/mpool/base/base.h +++ b/opal/mca/mpool/base/base.h @@ -53,6 +53,8 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_selected_module_t); OPAL_DECLSPEC mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char* name); OPAL_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name); +OPAL_DECLSPEC mca_mpool_base_module_t *mca_mpool_basic_create (void *base, size_t size, unsigned min_align); + /* * Globals */ diff --git a/opal/mca/mpool/base/mpool_base_basic.c b/opal/mca/mpool/base/mpool_base_basic.c new file mode 100644 index 00000000000..fba7e6fed7b --- /dev/null +++ b/opal/mca/mpool/base/mpool_base_basic.c @@ -0,0 +1,109 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyrigth (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/align.h" + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/constants.h" +#include "opal/util/sys_limits.h" + +struct mca_mpool_base_basic_module_t { + mca_mpool_base_module_t super; + opal_mutex_t lock; + uintptr_t ptr; + size_t size; + size_t avail; + unsigned min_align; +}; +typedef struct mca_mpool_base_basic_module_t mca_mpool_base_basic_module_t; + +static void *mca_mpool_base_basic_alloc (mca_mpool_base_module_t *mpool, size_t size, + size_t align, uint32_t flags) +{ + mca_mpool_base_basic_module_t *basic_module = (mca_mpool_base_basic_module_t *) mpool; + uintptr_t next_ptr; + void *ptr; + + opal_mutex_lock (&basic_module->lock); + + align = align > basic_module->min_align ? align : basic_module->min_align; + + next_ptr = OPAL_ALIGN(basic_module->ptr, align, uintptr_t); + + size = OPAL_ALIGN(size, 8, size_t) + next_ptr - basic_module->ptr; + + if (size > basic_module->avail) { + opal_mutex_unlock (&basic_module->lock); + return NULL; + } + + ptr = (void *) next_ptr; + basic_module->avail -= size; + basic_module->ptr += size; + + opal_mutex_unlock (&basic_module->lock); + return ptr; +} + +/** + * free function + */ +static void mca_mpool_base_basic_free (mca_mpool_base_module_t *mpool, void *addr) +{ + /* nothing to do for now */ +} + +static void mca_mpool_base_basic_finalize (struct mca_mpool_base_module_t *mpool) +{ + mca_mpool_base_basic_module_t *basic_module = (mca_mpool_base_basic_module_t *) mpool; + + OBJ_DESTRUCT(&basic_module->lock); + free (mpool); +} + +static mca_mpool_base_module_t mca_mpool_basic_template = { + .mpool_alloc = mca_mpool_base_basic_alloc, + .mpool_free = mca_mpool_base_basic_free, + .mpool_finalize = mca_mpool_base_basic_finalize, + .flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM, +}; + +mca_mpool_base_module_t *mca_mpool_basic_create (void *base, size_t size, unsigned min_align) +{ + mca_mpool_base_basic_module_t *basic_module = calloc (1, sizeof (*basic_module)); + + if (OPAL_UNLIKELY(NULL == basic_module)) { + return NULL; + } + + memcpy (&basic_module->super, &mca_mpool_basic_template, sizeof (mca_mpool_basic_template)); + + OBJ_CONSTRUCT(&basic_module->lock, opal_mutex_t); + + basic_module->super.mpool_base = base; + basic_module->ptr = (uintptr_t) base; + basic_module->size = basic_module->avail = size; + basic_module->min_align = min_align; + + return &basic_module->super; +} diff --git a/opal/mca/mpool/hugepage/mpool_hugepage_component.c b/opal/mca/mpool/hugepage/mpool_hugepage_component.c index 02320b9f91e..712301a38c5 100644 --- a/opal/mca/mpool/hugepage/mpool_hugepage_component.c +++ b/opal/mca/mpool/hugepage/mpool_hugepage_component.c @@ -264,12 +264,17 @@ static void mca_mpool_hugepage_find_hugepages (void) { hp->path = strdup (mntent->mnt_dir); hp->page_size = page_size; - - OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output, - "found huge page with size = %lu, path = %s, mmap flags = 0x%x", - hp->page_size, hp->path, hp->mmap_flags)); - - opal_list_append (&mca_mpool_hugepage_component.huge_pages, &hp->super); + + if(0 == access (hp->path, R_OK | W_OK)){ + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output, + "found huge page with size = %lu, path = %s, mmap flags = 0x%x, adding to list", + hp->page_size, hp->path, hp->mmap_flags); + opal_list_append (&mca_mpool_hugepage_component.huge_pages, &hp->super); + } else { + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output, + "found huge page with size = %lu, path = %s, mmap flags = 0x%x, with invalid " + "permissions, skipping", hp->page_size, hp->path, hp->mmap_flags); + } } opal_list_sort (&mca_mpool_hugepage_component.huge_pages, page_compare); diff --git a/opal/mca/mpool/memkind/mpool_memkind_component.c b/opal/mca/mpool/memkind/mpool_memkind_component.c index 41145d039c1..96128b2dd3a 100644 --- a/opal/mca/mpool/memkind/mpool_memkind_component.c +++ b/opal/mca/mpool/memkind/mpool_memkind_component.c @@ -15,7 +15,7 @@ * Copyright (c) 2010-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science + * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -237,7 +237,7 @@ static int mca_mpool_memkind_close(void) opal_output_close (mca_mpool_memkind_component.output); mca_mpool_memkind_component.output = -1; - OBJ_DESTRUCT(&mca_mpool_memkind_component.module_list); + OPAL_LIST_DESTRUCT(&mca_mpool_memkind_component.module_list); if (mca_mpool_memkind_policy_enum) { OBJ_RELEASE(mca_mpool_memkind_policy_enum); diff --git a/opal/mca/patcher/base/patcher_base_patch.c b/opal/mca/patcher/base/patcher_base_patch.c index 3d97bef6be2..07e2c1ea345 100644 --- a/opal/mca/patcher/base/patcher_base_patch.c +++ b/opal/mca/patcher/base/patcher_base_patch.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -107,7 +107,11 @@ static void flush_and_invalidate_cache (unsigned long a) #elif OPAL_ASSEMBLY_ARCH == OPAL_IA64 __asm__ volatile ("fc %0;; sync.i;; srlz.i;;" : : "r"(a) : "memory"); #elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 - __asm__ volatile ("dsb sy"); + __asm__ volatile ("dc cvau, %0\n\t" + "dsb ish\n\t" + "ic ivau, %0\n\t" + "dsb ish\n\t" + "isb":: "r" (a)); #endif } @@ -138,10 +142,27 @@ static inline void apply_patch (unsigned char *patch_data, uintptr_t address, si { ModifyMemoryProtection (address, data_size, PROT_EXEC|PROT_READ|PROT_WRITE); memcpy ((void *) address, patch_data, data_size); - for (size_t i = 0 ; i < data_size ; i += 16) { +#if HAVE___CLEAR_CACHE + /* do not allow global declaration of compiler intrinsic */ + void __clear_cache(void* beg, void* end); + + __clear_cache ((void *) address, (void *) (address + data_size)); +#else + size_t offset_jump = 16; + +#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 + offset_jump = 32; +#endif + + /* align the address */ + address &= ~(offset_jump - 1); + + for (size_t i = 0 ; i < data_size ; i += offset_jump) { flush_and_invalidate_cache (address + i); } +#endif + ModifyMemoryProtection (address, data_size, PROT_EXEC|PROT_READ); } diff --git a/opal/mca/pmix/base/pmix_base_select.c b/opal/mca/pmix/base/pmix_base_select.c index b66466afb69..53891f45c49 100644 --- a/opal/mca/pmix/base/pmix_base_select.c +++ b/opal/mca/pmix/base/pmix_base_select.c @@ -4,6 +4,8 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -19,6 +21,7 @@ #include "opal/mca/base/base.h" #include "opal/mca/pmix/pmix.h" #include "opal/mca/pmix/base/base.h" +#include "opal/util/opal_environ.h" /* * Globals @@ -33,11 +36,8 @@ int opal_pmix_base_select(void) // value of OPAL's mca_base_component_show_load_errors (i.e., the // bool variable behind Open MPI's mca_component_show_load_errors // MCA param). - char *pmix_show_load_errors_env = NULL; - asprintf(&pmix_show_load_errors_env, - "PMIX_MCA_mca_base_component_show_load_errors=%d", - mca_base_component_show_load_errors ? 1 : 0); - putenv(pmix_show_load_errors_env); + char *pmix_show_load_errors_env = mca_base_component_show_load_errors ? "1" : "0"; + opal_setenv("PMIX_MCA_mca_base_component_show_load_errors", pmix_show_load_errors_env, true, &environ); /* * Select the best component diff --git a/opal/mca/pmix/ext1x/configure.m4 b/opal/mca/pmix/ext1x/configure.m4 index 16bbf2a502a..abbca782eaa 100644 --- a/opal/mca/pmix/ext1x/configure.m4 +++ b/opal/mca/pmix/ext1x/configure.m4 @@ -33,9 +33,7 @@ AC_DEFUN([MCA_opal_pmix_ext1x_CONFIG],[ AS_IF([test "$opal_external_pmix_happy" = "yes"], [ # check for the 1.x version ( >= 1.1.4 ?) AC_MSG_CHECKING([if external component is version 1.x]) - AS_IF([test "$opal_external_pmix_version" = "11" || - test "$opal_external_pmix_version" = "12" || - test "$opal_external_pmix_version" = "1x"], + AS_IF([test "$opal_external_pmix_version" = "1x"], [AC_MSG_RESULT([yes]) AS_IF([test "$opal_event_external_support" != "yes"], [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) diff --git a/opal/mca/pmix/ext1x/pmix1x.c b/opal/mca/pmix/ext1x/pmix1x.c index 410c7c79916..d71d35b1bfc 100644 --- a/opal/mca/pmix/ext1x/pmix1x.c +++ b/opal/mca/pmix/ext1x/pmix1x.c @@ -185,6 +185,7 @@ pmix_status_t pmix1_convert_opalrc(int rc) case OPAL_ERROR: return PMIX_ERROR; case OPAL_SUCCESS: + case OPAL_OPERATION_SUCCEEDED: return PMIX_SUCCESS; default: return PMIX_ERROR; diff --git a/opal/mca/pmix/ext2x/Makefile.am b/opal/mca/pmix/ext2x/Makefile.am index e5273558436..194443d93a9 100644 --- a/opal/mca/pmix/ext2x/Makefile.am +++ b/opal/mca/pmix/ext2x/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. # Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. @@ -38,15 +38,15 @@ endif mcacomponentdir = $(opallibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_pmix_ext2x_la_SOURCES = $(sources) +mca_pmix_ext2x_la_SOURCES = $(sources) $(headers) mca_pmix_ext2x_la_CFLAGS = $(opal_pmix_ext2x_CFLAGS) -mca_pmix_ext2x_la_CPPFLAGS =$(opal_pmix_ext2x_CPPFLAGS) +mca_pmix_ext2x_la_CPPFLAGS = $(opal_pmix_ext2x_CPPFLAGS) mca_pmix_ext2x_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext2x_LDFLAGS) mca_pmix_ext2x_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \ $(opal_pmix_ext2x_LIBS) noinst_LTLIBRARIES = $(component_noinst) -libmca_pmix_ext2x_la_SOURCES =$(sources) +libmca_pmix_ext2x_la_SOURCES = $(sources) $(headers) libmca_pmix_ext2x_la_CFLAGS = $(opal_pmix_ext2x_CFLAGS) libmca_pmix_ext2x_la_CPPFLAGS = $(opal_pmix_ext2x_CPPFLAGS) libmca_pmix_ext2x_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext2x_LDFLAGS) diff --git a/opal/mca/pmix/ext2x/ext2x.c b/opal/mca/pmix/ext2x/ext2x.c index 3e3b53980b6..7e0803a9e56 100644 --- a/opal/mca/pmix/ext2x/ext2x.c +++ b/opal/mca/pmix/ext2x/ext2x.c @@ -440,6 +440,7 @@ pmix_status_t ext2x_convert_opalrc(int rc) case OPAL_ERROR: return PMIX_ERROR; case OPAL_SUCCESS: + case OPAL_OPERATION_SUCCEEDED: return PMIX_SUCCESS; default: return rc; diff --git a/opal/mca/pmix/ext2x/ext2x.h b/opal/mca/pmix/ext2x/ext2x.h index 8e2012906c7..0f28c8f667e 100644 --- a/opal/mca/pmix/ext2x/ext2x.h +++ b/opal/mca/pmix/ext2x/ext2x.h @@ -37,12 +37,6 @@ BEGIN_C_DECLS -#ifdef OPAL_C_HAVE_VISIBILITY -#define PMIX_HAVE_VISIBILITY 1 -#else -#undef PMIX_HAVE_VISIBILITY -#endif - typedef struct { opal_pmix_base_component_t super; bool legacy_get; diff --git a/opal/mca/pmix/ext2x/ext2x_client.c b/opal/mca/pmix/ext2x/ext2x_client.c index 43c711f5848..9cffb66a538 100644 --- a/opal/mca/pmix/ext2x/ext2x_client.c +++ b/opal/mca/pmix/ext2x/ext2x_client.c @@ -8,6 +8,9 @@ * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -165,6 +168,8 @@ int ext2x_client_finalize(void) { pmix_status_t rc; opal_ext2x_event_t *event, *ev2; + opal_list_t evlist; + OBJ_CONSTRUCT(&evlist, opal_list_t); opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client finalize"); @@ -178,12 +183,19 @@ int ext2x_client_finalize(void) OPAL_PMIX_DESTRUCT_LOCK(&event->lock); OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); - OPAL_PMIX_WAIT_THREAD(&event->lock); opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super); - OBJ_RELEASE(event); + /* wait and release outside the loop to avoid double mutex + * interlock */ + opal_list_append(&evlist, &event->super); } } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_ext2x_event_t) { + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&evlist, &event->super); + OBJ_RELEASE(event); + } + OBJ_DESTRUCT(&evlist); rc = PMIx_Finalize(NULL, 0); return ext2x_convert_rc(rc); diff --git a/opal/mca/pmix/ext2x/ext2x_component.c b/opal/mca/pmix/ext2x/ext2x_component.c index a03125f7804..cf60a7ee8f3 100644 --- a/opal/mca/pmix/ext2x/ext2x_component.c +++ b/opal/mca/pmix/ext2x/ext2x_component.c @@ -118,12 +118,12 @@ static int external_open(void) OBJ_CONSTRUCT(&mca_pmix_ext2x_component.dmdx, opal_list_t); version = PMIx_Get_version(); - if ('2' != version[0]) { + if ('2' > version[0]) { opal_show_help("help-pmix-base.txt", "incorrect-pmix", true, version, "v2.x"); return OPAL_ERROR; } - if (0 == strncmp(version, "2.1", 3)) { + if (0 != strncmp(version, "2.0", 3)) { mca_pmix_ext2x_component.legacy_get = false; } diff --git a/opal/mca/pmix/ext2x/ext2x_server_south.c b/opal/mca/pmix/ext2x/ext2x_server_south.c index 34317130115..f9e15344e97 100644 --- a/opal/mca/pmix/ext2x/ext2x_server_south.c +++ b/opal/mca/pmix/ext2x/ext2x_server_south.c @@ -9,6 +9,9 @@ * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -180,6 +183,8 @@ int ext2x_server_finalize(void) { pmix_status_t rc; opal_ext2x_event_t *event, *ev2; + opal_list_t evlist; + OBJ_CONSTRUCT(&evlist, opal_list_t); OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); --opal_pmix_base.initialized; @@ -190,13 +195,19 @@ int ext2x_server_finalize(void) OPAL_PMIX_DESTRUCT_LOCK(&event->lock); OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); - OPAL_PMIX_WAIT_THREAD(&event->lock); opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super); - OBJ_RELEASE(event); + /* wait and release outside the loop to avoid double mutex + * interlock */ + opal_list_append(&evlist, &event->super); } } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - + OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_ext2x_event_t) { + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&evlist, &event->super); + OBJ_RELEASE(event); + } + OBJ_DESTRUCT(&evlist); rc = PMIx_server_finalize(); return ext2x_convert_rc(rc); } diff --git a/opal/mca/pmix/flux/pmix_flux.c b/opal/mca/pmix/flux/pmix_flux.c index 187108bcc7d..3233524e0fe 100644 --- a/opal/mca/pmix/flux/pmix_flux.c +++ b/opal/mca/pmix/flux/pmix_flux.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -373,6 +373,7 @@ static int flux_init(opal_list_t *ilist) char *str; if (0 < pmix_init_count) { + pmix_init_count++; return OPAL_SUCCESS; } @@ -585,11 +586,10 @@ static int flux_fini(void) { if (0 == --pmix_init_count) { PMI_Finalize (); + // teardown hash table + opal_pmix_base_hash_finalize(); } - // teardown hash table - opal_pmix_base_hash_finalize(); - return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/configure.m4 b/opal/mca/pmix/pmix3x/configure.m4 index 646e1debf83..10cd639e09b 100644 --- a/opal/mca/pmix/pmix3x/configure.m4 +++ b/opal/mca/pmix/pmix3x/configure.m4 @@ -13,7 +13,7 @@ # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2010-2017 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -53,14 +53,14 @@ AC_DEFUN([MCA_opal_pmix_pmix3x_CONFIG],[ opal_pmix_pmix3x_timing_flag=--disable-pmix-timing fi - opal_pmix_pmix3x_args="$opal_pmix_pmix3x_timing_flag --without-tests-examples --disable-pmix-binaries --disable-pmix-backward-compatibility --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\"" + opal_pmix_pmix3x_args="$opal_pmix_pmix3x_timing_flag --without-tests-examples --disable-pmix-binaries --disable-pmix-backward-compatibility --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --enable-embedded-hwloc --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" AS_IF([test "$enable_debug" = "yes"], [opal_pmix_pmix3x_args="--enable-debug $opal_pmix_pmix3x_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], [opal_pmix_pmix3x_args="--disable-debug $opal_pmix_pmix3x_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS"]) AC_MSG_CHECKING([if want to install standalone libpmix]) - AS_IF([test "$enable_install_libpmix" == "yes"], + AS_IF([test "$enable_install_libpmix" = "yes"], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) opal_pmix_pmix3x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX3X_ --enable-embedded-mode $opal_pmix_pmix3x_args"]) @@ -87,6 +87,9 @@ AC_DEFUN([MCA_opal_pmix_pmix3x_CONFIG],[ [AC_MSG_RESULT([no - disqualifying this component]) opal_pmix_pmix3x_happy=0], [AC_MSG_RESULT([yes - using the internal v3.x library]) + AS_IF([test "$opal_pmix_pmix3x_happy" = "0"], + [AC_MSG_WARN([INTERNAL PMIX FAILED TO CONFIGURE]) + AC_MSG_ERROR([CANNOT CONTINUE])]) # Build flags for our Makefile.am opal_pmix_pmix3x_LDFLAGS= opal_pmix_pmix3x_LIBS="$OPAL_TOP_BUILDDIR/$opal_pmix_pmix3x_basedir/pmix/src/libpmix.la" diff --git a/opal/mca/pmix/pmix3x/pmix/INSTALL b/opal/mca/pmix/pmix3x/pmix/INSTALL index 08fdfe641ff..3fac5ad5866 100644 --- a/opal/mca/pmix/pmix3x/pmix/INSTALL +++ b/opal/mca/pmix/pmix3x/pmix/INSTALL @@ -9,7 +9,7 @@ Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. -Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +Copyright (c) 2013-2019 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -22,9 +22,9 @@ For More Information This file is a *very* short overview of building and installing the PMIx library. Much more information is available in the -FAQ section on the PMIx web site: +How-To section on the PMIx web site: - http://pmix.github.io/pmix/faq + https://pmix.org/support/how-to/ Developer Builds @@ -56,6 +56,24 @@ shell$ ./configure --prefix=/where/to/install [...lots of output...] shell$ make all install +NOTE: this version of PMIx requires the Libevent package to build +and operate. Any version of Libevent greater than or equal to +2.0.21 is acceptable. + +NOTE: this version of PMIx optionally supports the HWLOC package +for providing topology information to both the host environment +(by collecting local inventory for rollup) and local client +processes. Any version of HWLOC greater than 1.10 is supported, +although versions in the 2.x series are recommended. + +Note that you must point configure at the libevent installation +using the --with-libevent=

option if it is in a non-standard +location. Similarly, non-standard locations for the HWLOC package +must be specified using the --with-hwloc= option. In both +cases, PMIx will automatically detect these packages in standard +locations and build-in support for them unless otherwise specified +using the respective configure option. + If you need special access to install, then you can execute "make all" as a user with write permissions in the build tree, and a separate "make install" as a user with write permissions to the diff --git a/opal/mca/pmix/pmix3x/pmix/LICENSE b/opal/mca/pmix/pmix3x/pmix/LICENSE index 06f1248793d..9f9a1b943ba 100644 --- a/opal/mca/pmix/pmix3x/pmix/LICENSE +++ b/opal/mca/pmix/pmix3x/pmix/LICENSE @@ -26,7 +26,7 @@ Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. Copyright (c) 2006-2010 The University of Houston. All rights reserved. Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. Copyright (c) 2007-2008 UT-Battelle, LLC. All rights reserved. -Copyright (c) 2007-2018 IBM Corporation. All rights reserved. +Copyright (c) 2007-2019 IBM Corporation. All rights reserved. Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing Centre, Federal Republic of Germany Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany @@ -36,7 +36,7 @@ Copyright (c) 2008-2009 Institut National de Recherche en Informatique. All rights reserved. Copyright (c) 2007 Lawrence Livermore National Security, LLC. All rights reserved. -Copyright (c) 2007-2018 Mellanox Technologies. All rights reserved. +Copyright (c) 2007-2019 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. Copyright (c) 2008-2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. @@ -45,8 +45,10 @@ Copyright (c) 2010 ARM ltd. All rights reserved. Copyright (c) 2010-2011 Alex Brick . All rights reserved. Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights reserved. -Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +Copyright (c) 2013-2019 Intel, Inc. All rights reserved. Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. +Copyright (c) 2019 Amazon.com, Inc. or its affiliates. All Rights + reserved. $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix3x/pmix/Makefile.am b/opal/mca/pmix/pmix3x/pmix/Makefile.am index 9d1013c08c0..47e21332730 100644 --- a/opal/mca/pmix/pmix3x/pmix/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/Makefile.am @@ -11,7 +11,9 @@ # All rights reserved. # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2019 Amazon.com, Inc. or its affiliates. All Rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -25,6 +27,7 @@ ACLOCAL_AMFLAGS = -I ./config SUBDIRS = config contrib include src etc +AM_DISTCHECK_CONFIGURE_FLAGS = --disable-dlopen headers = sources = @@ -37,10 +40,6 @@ dist_pmixdata_DATA = if ! PMIX_EMBEDDED_MODE dist_pmixdata_DATA += contrib/pmix-valgrind.supp -if PMIX_HAVE_PANDOC -SUBDIRS += man -endif - endif if PMIX_TESTS_EXAMPLES diff --git a/opal/mca/pmix/pmix3x/pmix/NEWS b/opal/mca/pmix/pmix3x/pmix/NEWS index 572b06fb421..f18016dd7f2 100644 --- a/opal/mca/pmix/pmix3x/pmix/NEWS +++ b/opal/mca/pmix/pmix3x/pmix/NEWS @@ -1,5 +1,5 @@ -Copyright (c) 2015-2018 Intel, Inc. All rights reserved. -Copyright (c) 2017 IBM Corporation. All rights reserved. +Copyright (c) 2015-2019 Intel, Inc. All rights reserved. +Copyright (c) 2017-2019 IBM Corporation. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -17,11 +17,197 @@ the README file, PMIx typically maintains two separate version series simultaneously - the current release and one that is locked to only bug fixes. Since these series are semi-independent of each other, a single NEWS-worthy item might apply to different series. For -example, a bug might be fixed in the master, and then moved to the -current release as well as the "stable" bug fix release branch. +example, a bug might be fixed in the master, and then moved to +multiple release branches. -3.0.0 -- TBD +3.1.4 -- 9 Aug 2019 +---------------------- +- PR #1342: Fix if_linux_ipv6_open interface filter +- PR #1344: Remove unnecessary libtool init for c++ +- PR #1346: Fix incorrect pointer casts/deref +- PR #1347/#1348: Fix use of gethostname +- PR #1353/#1357: util/environ: use setenv() if available +- PR #1354: Plug a misc memory leak in the pmix_query_caddy_t destructor +- PR #1356: Fix another pointer cast/deref in test suite +- PR #1358: Implement support for class-based info arrays +- PR #1359: Plug misc minor memory leaks +- PR #1368: Backport support for libev +- PR #1369: Fix legacy support for PMI-1 +- PR #1370: Cleanup handling of data requests for different nspaces +- PR #1193: Resolve get of proc-specific job-level info from another nspace +- PR #1376: Fix problems in the Log code path, updates to simple test suite +- PR #1377: Skip fastpath/dstore for NULL keys +- PR #1379: Change IF_NAMESIZE to PMIX_IF_NAMESIZE and set to safe size +- PR #1385: Check for EINVAL return from posix_fallocate +- PR #1389: Plug misc memory leaks in configure + + +3.1.3 -- 2 July 2019 +---------------------- +- PR #1096: Restore PMIX_NUM_SLOTS for backward compatibility +- PR #1106: Automatically generate PMIX_NUMERIC_VERSION +- PR #1143: Fix tool connection handshake for tools that are registered + clients +- PR #1163: Fix a compiler warning in atomics on POWER arch +- PR #1162: Fix race condition when clients fail while in a PMIx + collective operation +- PR #1166: Fix a regression in spinlock atomics +- PR #1159: Fix missing pointer update when shared memory segment + was re-attached +- PR #1180: Remove dependency on C++ compiler for thread detection +- PR #1180: Add detection for Flex when building in non-tarball situations +- PR #1165: Add dependency on libevent-devel to rpm spec file +- PR #1188: Link libpmix.so to MCA component libraries +- PR #1194: Ensure any cached notifications arrive after registration completes +- PR #1205: Add "make check" support +- PR #1209: Update configure logic for clock_gettime +- PR #1213/#1217/#1221: Add configure option "--enable-nonglobal-dlopen" + If the MCA component libraries should link back to libpmix.so +- PR #1231: SPEC: Allow splitting PMIx in pmix and pmix-libpmi packages +- PR #1222: Fix case of multiple launcher calls in job script +- PR #1237: Avoid double-free of collective tracker +- PR #1237: Ensure all participants are notified of fence complete +- PR #1237: Ensure all participants are notified of connect and disconnect complete +- PR #1250: Fix PMIx_server_finalize hang (rare) +- PR #1271: PTL/usock doesn't support tools +- PR #1280: Fix the PTL connection establishment protocol +- PR #1280: Fix tool connection in psec/handshake mode +- PR #1289: Avoid output_verbose overhead when it won't print +- PR #1296: Allow setup_fork to proceed even if gdds and pnet don't contribute +- PR #1296: Allow servers to pass NULL module +- PR #1297: Provide internal blocking ability to the register/deregister fns +- PR #1298: Add dummy handshake component to psec framework for testing +- PR #1303: Allow jobs to not specify proc-level info +- PR #1304: Provide proc data in cases where host does not +- PR #1305: Add some more values that can be computed +- PR #1308: Add missing tool rendezvous file +- PR #1309: Fix potential integer overflow in regex +- PR #1311: Work around memory bug in older gcc compilers +- PR #1321: Provide memory op hooks in user-facing macros +- PR #1329: Add -fPIC to static builds +- PR #1340: Do not use '==' in m4 test statements + + +3.1.2 -- 24 Jan 2019 +---------------------- + - Fix a bug in macro identifying system events + - Restore some non-standard macros to the pmix_extend.h + header - these are considered "deprecated" and will be + removed from public-facing headers in future releases + + +3.1.1 -- 18 Jan 2019 +---------------------- +- Fix a bug in registration of default event handlers + that somehow slipped thru testing + + +3.1.0 -- 17 Jan 2019 +---------------------- +**** THIS RELEASE MARKS THE STARTING POINT FOR FULL COMPLIANCE +**** WITH THE PMIX v3 STANDARD. ALL API BEHAVIORS AND ATTRIBUTE +**** DEFINITIONS MEET THE v3 STANDARD SPECIFICATIONS. + - Add a new, faster dstore GDS component 'ds21' + - Performance optimizations for the dstore GDS components. + - Plug miscellaneous memory leaks + - Silence an unnecessary warning message when checking connection + to a non-supporting server + - Ensure lost-connection events get delivered to default event + handlers + - Correctly handle cache refresh for queries + - Protect against race conditions between host and internal library + when dealing with async requests + - Cleanup tool operations and add support for connections to + remote servers. Initial support for debugger direct/indirect + launch verified with PRRTE. Cleanup setting of tmpdir options. + Drop rendezvous files when acting as a launcher + - Automatically store the server URI for easy access by client + - Provide MCA parameter to control TCP connect retry/timeout + - Update event notification system to properly evict oldest events + when more space is needed + - Fix a number of error paths + - Update IOF cache code to properly drop oldest message. Provide + MCA parameter for setting cache size. + - Handle setsockopt(SO_RCVTIMEO) not being supported + - Ensure that epilogs get run even when connections unexpectedly + terminate. Properly split epilog strings to process multiple + paths + - Pass the tool's command line to the server so it can be returned + in queries + - Add support for C11 atomics + - Support collection and forwarding of fabric-specific envars + - Improve handling of hwloc configure option + - Fix PMIx_server_generate_regex to preserve node ordering + - Fix a bug when registering default event handlers + + +3.1.0 -- 17 Jan 2019 +---------------------- +**** THIS RELEASE MARKS THE STARTING POINT FOR FULL COMPLIANCE +**** WITH THE PMIX v3 STANDARD. ALL API BEHAVIORS AND ATTRIBUTE +**** DEFINITIONS MEET THE v3 STANDARD SPECIFICATIONS. + - Add a new, faster dstore GDS component 'ds21' + - Performance optimizations for the dstore GDS components. + - Plug miscellaneous memory leaks + - Silence an unnecessary warning message when checking connection + to a non-supporting server + - Ensure lost-connection events get delivered to default event + handlers + - Correctly handle cache refresh for queries + - Protect against race conditions between host and internal library + when dealing with async requests + - Cleanup tool operations and add support for connections to + remote servers. Initial support for debugger direct/indirect + launch verified with PRRTE. Cleanup setting of tmpdir options. + Drop rendezvous files when acting as a launcher + - Automatically store the server URI for easy access by client + - Provide MCA parameter to control TCP connect retry/timeout + - Update event notification system to properly evict oldest events + when more space is needed + - Fix a number of error paths + - Update IOF cache code to properly drop oldest message. Provide + MCA parameter for setting cache size. + - Handle setsockopt(SO_RCVTIMEO) not being supported + - Ensure that epilogs get run even when connections unexpectedly + terminate. Properly split epilog strings to process multiple + paths + - Pass the tool's command line to the server so it can be returned + in queries + - Add support for C11 atomics + - Support collection and forwarding of fabric-specific envars + - Improve handling of hwloc configure option + - Fix PMIx_server_generate_regex to preserve node ordering + - Fix a bug when registering default event handlers + + +3.0.2 -- 18 Sept 2018 +---------------------- +- Ensure we cleanup any active sensors when a peer departs. Allow the + heartbeat monitor to "reset" if a process stops beating and subsequently + returns +- Fix a few bugs in the event notification system and provide some + missing implementation (support for specifying target procs to + receive the event). +- Add PMIX_PROC_TERMINATED constant +- Properly deal with EOPNOTSUPP from getsockopt() on ARM + + +3.0.1 -- 23 Aug 2018 +---------------------- +**** DEPRECATION WARNING: The pmix_info_array_t struct was +**** initially marked for deprecation in the v2.x series. +**** We failed to provide clear warning at that time. This +**** therefore serves as warning of intended removal of +**** pmix_info_array_t in the future v4 release series. +- Fixed memory corruption bug in event notification + system due to uninitialized variable +- Add numeric version field to pmix_version.h +- Transfer all cached data to client dstore upon first connect +- Implement missing job control and sensor APIs + + +3.0.0 -- 6 July 2018 ------------------------------------ **** NOTE: This release implements the complete PMIX v3.0 Standard **** and therefore includes a number of new APIs and features. These @@ -63,7 +249,67 @@ current release as well as the "stable" bug fix release branch. - Fix several memory and file descriptor leaks -2.1.2 -- TBD +2.2.2 -- 24 Jan 2019 +---------------------- + - Fix a bug in macro identifying system events + + +2.2.1 -- 18 Jan 2019 +---------------------- + - Fix a bug in registration of default event handlers + that somehow slipped thru testing + + +2.2.0 -- 17 Jan 2019 +---------------------- +**** THIS RELEASE MARKS THE STARTING POINT FOR FULL COMPLIANCE +**** WITH THE PMIX v2.2 STANDARD. ALL API BEHAVIORS AND ATTRIBUTE +**** DEFINITIONS MEET THE v2.2 STANDARD SPECIFICATIONS. + - Add a new, faster dstore GDS component 'ds21' + - Performance optimizations for the dstore GDS components. + - Plug miscellaneous memory leaks + - Silence an unnecessary warning message when checking connection + to a non-supporting server + - Ensure lost-connection events get delivered to default event + handlers + - Correctly handle cache refresh for queries + - Protect against race conditions between host and internal library + when dealing with async requests + - Cleanup tool operations and add support for connections to + remote servers. + - Automatically store the server URI for easy access by client + - Provide MCA parameter to control TCP connect retry/timeout + - Update event notification system to properly evict oldest events + when more space is needed + - Fix a number of error paths + - Handle setsockopt(SO_RCVTIMEO) not being supported + - Pass the tool's command line to the server so it can be returned + in queries + - Add support for C11 atomics + - Fix a bug when registering default event handlers + + +2.1.4 -- 18 Sep 2018 +---------------------- +- Updated configury to silence warnings on older compilers +- Implement job control and sensor APIs +- Update sensor support +- Fix a few bugs in the event notification system and provide some + missing implementation (support for specifying target procs to + receive the event). +- Add PMIX_PROC_TERMINATED constant +- Properly deal with EOPNOTSUPP from getsockopt() on ARM + + +2.1.3 -- 23 Aug 2018 +---------------------- +- Fixed memory corruption bug in event notification + system due to uninitialized variable +- Add numeric version definition +- Transfer all cached data to client dstore upon first connect + + +2.1.2 -- 6 July 2018 ---------------------- - Added PMIX_VERSION_RELEASE string to pmix_version.h - Added PMIX_SPAWNED and PMIX_PARENT_ID keys to all procs diff --git a/opal/mca/pmix/pmix3x/pmix/README b/opal/mca/pmix/pmix3x/pmix/README index 22b0ec55bb8..fa96176b7e2 100644 --- a/opal/mca/pmix/pmix3x/pmix/README +++ b/opal/mca/pmix/pmix3x/pmix/README @@ -43,6 +43,11 @@ joe@mycomputer.example.com!). You can subscribe to the list here: https://groups.google.com/d/forum/pmix +Finally, just to round out all the possible ways to communicate with the +PMIx community, you are invited to join the community's Slack channel: + + pmix-workspace.slack.com + Thanks for your time. =========================================================================== @@ -192,21 +197,22 @@ PMIx's version numbers are the union of several different values: major, minor, release, and an optional quantifier. * Major: The major number is the first integer in the version string - (e.g., v1.2.3). Changes in the major number typically indicate a - significant change in the code base and/or end-user - functionality. The major number is always included in the version - number. + (e.g., v1.2.3) and indicates the corresponding version of the PMIx + Standard. In other words, a PMIx library release starting with "v2" + indicates that the implementation conforms to version 2 of the PMIx + Standard. * Minor: The minor number is the second integer in the version string (e.g., v1.2.3). Changes in the minor number typically indicate a incremental change in the code base and/or end-user - functionality. The minor number is always included in the version - number: + functionality, but not the supported version of the Standard. + The minor number is always included in the version number. * Release: The release number is the third integer in the version string (e.g., v1.2.3). Changes in the release number typically indicate a bug fix in the code base and/or end-user - functionality. + functionality. The release number is always included in the + version number. * Quantifier: PMIx version numbers sometimes have an arbitrary string affixed to the end of the version number. Common strings @@ -293,16 +299,15 @@ Application Binary Interface (ABI) Compatibility ------------------------------------------------ PMIx provides forward ABI compatibility in all versions of a given -feature release series and its corresponding -super stable series. For example, on a single platform, an pmix +feature release series. For example, on a single platform, an pmix application linked against PMIx v1.3.2 shared libraries can be updated to point to the shared libraries in any successive v1.3.x or v1.4 release and still work properly (e.g., via the LD_LIBRARY_PATH environment variable or other operating system mechanism). -PMIx reserves the right to break ABI compatibility at new feature +PMIx reserves the right to break ABI compatibility at new major release series. For example, the same pmix application from above -(linked against PMIx v1.3.2 shared libraries) will *not* work with +(linked against PMIx v1.3.2 shared libraries) may *not* work with PMIx v1.5 shared libraries. =========================================================================== diff --git a/opal/mca/pmix/pmix3x/pmix/VERSION b/opal/mca/pmix/pmix3x/pmix/VERSION index 7453582b973..8d2b40af3ea 100644 --- a/opal/mca/pmix/pmix3x/pmix/VERSION +++ b/opal/mca/pmix/pmix3x/pmix/VERSION @@ -14,8 +14,8 @@ # ... major=3 -minor=0 -release=0 +minor=1 +release=4 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitffba520 +repo_rev=gite6837057 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jul 01, 2018" +date="Aug 09, 2019" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library @@ -75,6 +75,13 @@ date="Jul 01, 2018" # Version numbers are described in the Libtool current:revision:age # format. -libpmix_so_version=4:0:2 -libpmi_so_version=1:0:0 +libpmix_so_version=4:24:2 +libpmi_so_version=1:1:0 libpmi2_so_version=1:0:0 + +# "Common" components install standalone libraries that are run-time +# # linked by one or more components. So they need to be versioned as +# # well. Yuck; this somewhat breaks the +# # components-don't-affect-the-build-system abstraction. +# +libmca_common_dstore_so_version=1:1:0 diff --git a/opal/mca/pmix/pmix3x/pmix/autogen.pl b/opal/mca/pmix/pmix3x/pmix/autogen.pl index 40b533d2142..9d365783c8e 100755 --- a/opal/mca/pmix/pmix3x/pmix/autogen.pl +++ b/opal/mca/pmix/pmix3x/pmix/autogen.pl @@ -4,7 +4,7 @@ # Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2015 IBM Corporation. All rights reserved. @@ -37,6 +37,9 @@ # Sanity check file my $topdir_file = "include/pmix.h"; my $dnl_line = "dnl ---------------------------------------------------------------------------"; +# The text file we'll write at the end that will contain +# all the mca component directory paths +my $mca_library_paths_file = "config/mca_library_paths.txt"; # Data structures to fill up with all the stuff we find my $mca_found; @@ -137,6 +140,9 @@ sub mca_process_component { push(@{$mca_found->{$framework}->{"components"}}, $found_component); + # save the directory for later to create the paths + # to all the component libraries + push(@subdirs, $cdir); } ############################################################################## @@ -723,6 +729,15 @@ sub in_tarball { print M4 $m4; close(M4); +# Remove the old library path file and write the new one +verbose "==> Writing txt file with all the mca component paths\n"; +unlink($mca_library_paths_file); +open(M4, ">$mca_library_paths_file") || + my_die "Cannot open $mca_library_paths_file"; +my $paths = join(":", @subdirs); +print M4 $paths; +close(M4); + # Run autoreconf verbose "==> Running autoreconf\n"; my $cmd = "autoreconf -ivf --warnings=all,no-obsolete,no-override -I config"; diff --git a/opal/mca/pmix/pmix3x/pmix/config/Makefile.am b/opal/mca/pmix/pmix3x/pmix/config/Makefile.am index 3793162404c..ebc3af9d96a 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/config/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. @@ -44,7 +44,8 @@ EXTRA_DIST = \ pmix_setup_cc.m4 \ pmix_setup_zlib.m4 \ pmix_setup_libevent.m4 \ - pmix_mca_priority_sort.pl + pmix_mca_priority_sort.pl \ + mca_library_paths.txt maintainer-clean-local: diff --git a/opal/mca/pmix/pmix3x/pmix/config/c_get_alignment.m4 b/opal/mca/pmix/pmix3x/pmix/config/c_get_alignment.m4 index db379100994..6596c0ae88d 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/c_get_alignment.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/c_get_alignment.m4 @@ -11,9 +11,9 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. -dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015-2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -44,7 +44,9 @@ AC_DEFUN([PMIX_C_GET_ALIGNMENT],[ FILE *f=fopen("conftestval", "w"); if (!f) exit(1); diff = ((char *)&p->x) - ((char *)&p->c); + free(p); fprintf(f, "%d\n", (diff >= 0) ? diff : -diff); + fclose(f); ]])], [AS_TR_SH([pmix_cv_c_align_$1])=`cat conftestval`], [AC_MSG_WARN([*** Problem running configure test!]) AC_MSG_WARN([*** See config.log for details.]) diff --git a/opal/mca/pmix/pmix3x/pmix/config/distscript.sh b/opal/mca/pmix/pmix3x/pmix/config/distscript.sh index de41d2ba7b6..e5c948f15f1 100755 --- a/opal/mca/pmix/pmix3x/pmix/config/distscript.sh +++ b/opal/mca/pmix/pmix3x/pmix/config/distscript.sh @@ -11,11 +11,11 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. +# Copyright (c) 2015-2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -42,7 +42,7 @@ fi # Otherwise, use what configure told us, at the cost of allowing one # or two corner cases in (but otherwise VPATH builds won't work). repo_rev=$PMIX_REPO_REV -if test -d .git ; then +if test -e .git ; then repo_rev=$(config/pmix_get_version.sh VERSION --repo-rev) fi diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 index aeca068068d..1d37089f8ae 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 @@ -17,9 +17,9 @@ dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. -dnl Copyright (c) 2013-2018 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015-2017 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015-2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2016 Mellanox Technologies, Inc. dnl All rights reserved. dnl @@ -120,9 +120,11 @@ AC_DEFUN([PMIX_SETUP_CORE],[ pmixmajor=${PMIX_MAJOR_VERSION}L pmixminor=${PMIX_MINOR_VERSION}L pmixrelease=${PMIX_RELEASE_VERSION}L + pmixnumeric=$(printf 0x%4.4x%2.2x%2.2x $PMIX_MAJOR_VERSION $PMIX_MINOR_VERSION $PMIX_RELEASE_VERSION) AC_SUBST(pmixmajor) AC_SUBST(pmixminor) AC_SUBST(pmixrelease) + AC_SUBST(pmixnumeric) AC_CONFIG_FILES(pmix_config_prefix[include/pmix_version.h]) PMIX_GREEK_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --greek`" @@ -189,12 +191,36 @@ AC_DEFUN([PMIX_SETUP_CORE],[ [Link the output PMIx library to this extra lib (used in embedded mode)])) AC_MSG_CHECKING([for extra lib]) AS_IF([test ! -z "$with_pmix_extra_lib"], - [AC_MSG_RESULT([$with_pmix_extra_lib]) - PMIX_EXTRA_LIB=$with_pmix_extra_lib], + [AS_IF([test "$with_pmix_extra_lib" = "yes" || test "$with_pmix_extra_lib" = "no"], + [AC_MSG_RESULT([ERROR]) + AC_MSG_WARN([Invalid value for --with-extra-pmix-lib:]) + AC_MSG_WARN([ $with_pmix_extra_lib]) + AC_MSG_WARN([Must be path name of the library to add]) + AC_MSG_ERROR([Cannot continue])], + [AC_MSG_RESULT([$with_pmix_extra_lib]) + PMIX_EXTRA_LIB=$with_pmix_extra_lib])], [AC_MSG_RESULT([no]) PMIX_EXTRA_LIB=]) AC_SUBST(PMIX_EXTRA_LIB) + # Add any extra libtool lib? + AC_ARG_WITH([pmix-extra-ltlib], + AC_HELP_STRING([--with-pmix-extra-ltlib=LIB], + [Link any embedded components/tools that require it to the provided libtool lib (used in embedded mode)])) + AC_MSG_CHECKING([for extra ltlib]) + AS_IF([test ! -z "$with_pmix_extra_ltlib"], + [AS_IF([test "$with_pmix_extra_ltlib" = "yes" || test "$with_pmix_extra_ltlib" = "no"], + [AC_MSG_RESULT([ERROR]) + AC_MSG_WARN([Invalid value for --with-pmix-extra-ltlib:]) + AC_MSG_WARN([ $with_pmix_extra_ltlib]) + AC_MSG_WARN([Must be path name of the library to add]) + AC_MSG_ERROR([Cannot continue])], + [AC_MSG_RESULT([$with_pmix_extra_ltlib]) + PMIX_EXTRA_LTLIB=$with_pmix_extra_ltlib])], + [AC_MSG_RESULT([no]) + PMIX_EXTRA_LTLIB=]) + AC_SUBST(PMIX_EXTRA_LTLIB) + # # Package/brand string # @@ -391,7 +417,8 @@ AC_DEFUN([PMIX_SETUP_CORE],[ crt_externs.h signal.h \ ioLib.h sockLib.h hostLib.h limits.h \ sys/statfs.h sys/statvfs.h \ - netdb.h ucred.h zlib.h]) + netdb.h ucred.h zlib.h sys/auxv.h \ + sys/sysctl.h]) AC_CHECK_HEADERS([sys/mount.h], [], [], [AC_INCLUDES_DEFAULT @@ -620,6 +647,11 @@ AC_DEFUN([PMIX_SETUP_CORE],[ pmix_show_title "Library and Function tests" + # Darwin doesn't need -lutil, as it's something other than this -lutil. + PMIX_SEARCH_LIBS_CORE([openpty], [util]) + + PMIX_SEARCH_LIBS_CORE([gethostbyname], [nsl]) + PMIX_SEARCH_LIBS_CORE([socket], [socket]) # IRIX and CentOS have dirname in -lgen, usually in libc @@ -628,7 +660,10 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib PMIX_SEARCH_LIBS_CORE([ceil], [m]) - AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp]) + # -lrt might be needed for clock_gettime + PMIX_SEARCH_LIBS_CORE([clock_gettime], [rt]) + + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp setpgid ptsname openpty setenv]) # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get # confused. On others, it's in the standard library, but stubbed with @@ -655,10 +690,6 @@ AC_DEFUN([PMIX_SETUP_CORE],[ [AC_DEFINE_UNQUOTED([HAVE_UNIX_BYTESWAP], [1], [whether unix byteswap routines -- htonl, htons, nothl, ntohs -- are available])]) - # check pandoc separately so we can setup an AM_CONDITIONAL off it - AC_CHECK_PROG([pmix_have_pandoc], [pandoc], [yes], [no]) - AM_CONDITIONAL([PMIX_HAVE_PANDOC], [test "x$pmix_have_pandoc" = "xyes"]) - # # Make sure we can copy va_lists (need check declared, not linkable) # @@ -694,8 +725,6 @@ AC_DEFUN([PMIX_SETUP_CORE],[ CFLAGS="$CFLAGS $THREAD_CFLAGS" CPPFLAGS="$CPPFLAGS $THREAD_CPPFLAGS" - CXXFLAGS="$CXXFLAGS $THREAD_CXXFLAGS" - CXXCPPFLAGS="$CXXCPPFLAGS $THREAD_CXXCPPFLAGS" LDFLAGS="$LDFLAGS $THREAD_LDFLAGS" LIBS="$LIBS $THREAD_LIBS" @@ -705,10 +734,10 @@ AC_DEFUN([PMIX_SETUP_CORE],[ AC_PROG_LN_S + # Check for some common system programs that we need AC_PROG_GREP AC_PROG_EGREP - ################################## # Visibility ################################## @@ -725,8 +754,22 @@ AC_DEFUN([PMIX_SETUP_CORE],[ ################################## pmix_show_title "Libevent" + PMIX_LIBEV_CONFIG PMIX_LIBEVENT_CONFIG + AS_IF([test $pmix_libevent_support -eq 1 && test $pmix_libev_support -eq 1], + [AC_MSG_WARN([Both libevent and libev support have been specified.]) + AC_MSG_WARN([Only one can be configured against at a time. Please]) + AC_MSG_WARN([remove one from the configure command line.]) + AC_MSG_ERROR([Cannot continue])]) + + AS_IF([test $pmix_libevent_support -eq 0 && test $pmix_libev_support -eq 0], + [AC_MSG_WARN([Either libevent or libev support is required, but neither]) + AC_MSG_WARN([was found. Please use the configure options to point us]) + AC_MSG_WARN([to where we can find one or the other library]) + AC_MSG_ERROR([Cannot continue])]) + + ################################## # HWLOC ################################## @@ -823,6 +866,32 @@ AC_DEFUN([PMIX_SETUP_CORE],[ AC_SUBST(pmixlibdir) AC_SUBST(pmixincludedir) + ############################################################################ + # setup "make check" + ############################################################################ + PMIX_BUILT_TEST_PREFIX=$PMIX_top_builddir + AC_SUBST(PMIX_BUILT_TEST_PREFIX) + # expose the mca component library paths in the build system + pathfile=$PMIX_top_srcdir/config/mca_library_paths.txt + PMIX_COMPONENT_LIBRARY_PATHS=`cat $pathfile` + AC_SUBST(PMIX_COMPONENT_LIBRARY_PATHS) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests00.pl], [chmod +x test/run_tests00.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests01.pl], [chmod +x test/run_tests01.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests02.pl], [chmod +x test/run_tests02.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests03.pl], [chmod +x test/run_tests03.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests04.pl], [chmod +x test/run_tests04.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests05.pl], [chmod +x test/run_tests05.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests06.pl], [chmod +x test/run_tests06.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests07.pl], [chmod +x test/run_tests07.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests08.pl], [chmod +x test/run_tests08.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests09.pl], [chmod +x test/run_tests09.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests10.pl], [chmod +x test/run_tests10.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests11.pl], [chmod +x test/run_tests11.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests12.pl], [chmod +x test/run_tests12.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests13.pl], [chmod +x test/run_tests13.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests14.pl], [chmod +x test/run_tests14.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests15.pl], [chmod +x test/run_tests15.pl]) + ############################################################################ # final output ############################################################################ @@ -862,6 +931,10 @@ AC_DEFUN([PMIX_DEFINE_ARGS],[ [Whether build should attempt to use dlopen (or similar) to dynamically load components. (default: enabled)])]) + AS_IF([test "$enable_dlopen" = "unknown"], + [AC_MSG_WARN([enable_dlopen variable has been overwritten by configure]) + AC_MSG_WARN([This is an internal error that should be reported to PMIx developers]) + AC_MSG_ERROR([Cannot continue])]) AS_IF([test "$enable_dlopen" = "no"], [enable_mca_dso="no" enable_mca_static="yes" @@ -877,7 +950,7 @@ AC_DEFUN([PMIX_DEFINE_ARGS],[ AC_ARG_ENABLE([embedded-mode], [AC_HELP_STRING([--enable-embedded-mode], [Using --enable-embedded-mode causes PMIx to skip a few configure checks and install nothing. It should only be used when building PMIx within the scope of a larger package.])]) - AS_IF([test ! -z "$enable_embedded_mode" && test "$enable_embedded_mode" = "yes"], + AS_IF([test "$enable_embedded_mode" = "yes"], [pmix_mode=embedded pmix_install_primary_headers=no AC_MSG_RESULT([yes])], @@ -889,8 +962,16 @@ AC_DEFUN([PMIX_DEFINE_ARGS],[ # Is this a developer copy? # -if test -d .git; then +if test -e $PMIX_TOP_SRCDIR/.git; then PMIX_DEVEL=1 + # check for Flex + AC_PROG_LEX + if test "x$LEX" != xflex; then + AC_MSG_WARN([PMIx requires Flex to build from non-tarball sources,]) + AC_MSG_WARN([but Flex was not found. Please install Flex into]) + AC_MSG_WARN([your path and try again]) + AC_MSG_ERROR([Cannot continue]) + fi else PMIX_DEVEL=0 fi @@ -941,7 +1022,6 @@ fi #################### Early development override #################### if test "$WANT_DEBUG" = "0"; then CFLAGS="-DNDEBUG $CFLAGS" - CXXFLAGS="-DNDEBUG $CXXFLAGS" fi AC_DEFINE_UNQUOTED(PMIX_ENABLE_DEBUG, $WANT_DEBUG, [Whether we want developer-level debugging code or not]) @@ -1078,20 +1158,6 @@ AC_DEFINE_UNQUOTED([PMIX_ENABLE_TIMING], [$WANT_PMIX_TIMING], [Whether we want developer-level timing support or not]) # -# Install header files -# -AC_MSG_CHECKING([if want to head developer-level header files]) -AC_ARG_WITH(devel-headers, - AC_HELP_STRING([--with-devel-headers], - [also install developer-level header files (only for internal PMIx developers, default: disabled)])) -if test "$with_devel_headers" = "yes"; then - AC_MSG_RESULT([yes]) - WANT_INSTALL_HEADERS=1 -else - AC_MSG_RESULT([no]) - WANT_INSTALL_HEADERS=0 -fi - # # Install backward compatibility support for PMI-1 and PMI-2 # @@ -1126,6 +1192,41 @@ fi AM_CONDITIONAL([PMIX_INSTALL_BINARIES], [test $WANT_PMIX_BINARIES -eq 1]) + +# see if they want to disable non-RTLD_GLOBAL dlopen +AC_MSG_CHECKING([if want to support dlopen of non-global namespaces]) +AC_ARG_ENABLE([nonglobal-dlopen], + AC_HELP_STRING([--enable-nonglobal-dlopen], + [enable non-global dlopen (default: enabled)])) +if test "$enable_nonglobal_dlopen" = "no"; then + AC_MSG_RESULT([no]) + pmix_need_libpmix=0 +else + AC_MSG_RESULT([yes]) + pmix_need_libpmix=1 +fi + +# if someone enables embedded mode but doesn't want to install the +# devel headers, then default nonglobal-dlopen to false +AS_IF([test -z "$enable_nonglobal_dlopen" && test "x$pmix_mode" = "xembedded" && test $WANT_INSTALL_HEADERS -eq 0 && test $pmix_need_libpmix -eq 1], + [pmix_need_libpmix=0]) + +# +# psec/dummy_handshake +# + +AC_MSG_CHECKING([if want build psec/dummy_handshake]) +AC_ARG_ENABLE(dummy-handshake, + AC_HELP_STRING([--enable-dummy-handshake], + [Enables psec dummy component intended to check the PTL handshake scenario (default: disabled)])) +if test "$enable_dummy_handshake" != "yes"; then + AC_MSG_RESULT([no]) + eval "DISABLE_psec_dummy_handshake=1" +else + AC_MSG_RESULT([yes]) + eval "DISABLE_psec_dummy_handshake=0" +fi +AM_CONDITIONAL(MCA_BUILD_PSEC_DUMMY_HANDSHAKE, test "$DISABLE_psec_dummy_handshake" = "0") ])dnl # This must be a standalone routine so that it can be called both by @@ -1141,6 +1242,7 @@ AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[ AM_CONDITIONAL([WANT_PRIMARY_HEADERS], [test "x$pmix_install_primary_headers" = "xyes"]) AM_CONDITIONAL(WANT_INSTALL_HEADERS, test "$WANT_INSTALL_HEADERS" = 1) AM_CONDITIONAL(WANT_PMI_BACKWARD, test "$WANT_PMI_BACKWARD" = 1) + AM_CONDITIONAL(NEED_LIBPMIX, [test "$pmix_need_libpmix" = "1"]) ]) pmix_did_am_conditionals=yes ])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_compiler_version.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_compiler_version.m4 index eca2013be82..da822b04810 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_compiler_version.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_compiler_version.m4 @@ -1,7 +1,9 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl dnl $COPYRIGHT$ dnl @@ -43,6 +45,7 @@ int main (int argc, char * argv[]) f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%d", PLATFORM_COMPILER_$1); + fclose(f); return 0; } ], [ @@ -75,6 +78,7 @@ int main (int argc, char * argv[]) f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%s", PLATFORM_COMPILER_$1); + fclose(f); return 0; } ], [ @@ -110,6 +114,7 @@ int main (int argc, char * argv[]) f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%s", _STRINGIFY(PLATFORM_COMPILER_$1)); + fclose(f); return 0; } ], [ diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_icc.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_icc.m4 index e8a06b25148..05ce9431bd3 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_icc.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_icc.m4 @@ -10,9 +10,9 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2016 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016-2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -45,6 +45,7 @@ int main () func (4711, "Help %d [%s]\n", 10, "ten"); f=fopen ("conftestval", "w"); if (!f) exit (1); + fclose(f); return 0; } diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_lock.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_lock.m4 index 69f184506c4..0590dcf56cd 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_lock.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_lock.m4 @@ -57,4 +57,6 @@ AC_DEFUN([PMIX_CHECK_DSTOR_LOCK],[ fi fi LIBS="$orig_libs" + AM_CONDITIONAL([HAVE_DSTORE_PTHREAD_LOCK], [test "$_x_ac_pthread_lock_found" = "1"]) + AM_CONDITIONAL([HAVE_DSTORE_FCNTL_LOCK], [test "$_x_ac_fcntl_lock_found" = "1"]) ]) diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_os_flavors.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_os_flavors.m4 index 5c333ff5111..fa08cf906c5 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_os_flavors.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_os_flavors.m4 @@ -1,7 +1,7 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. dnl Copyright (c) 2014 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl @@ -57,6 +57,13 @@ AC_DEFUN([PMIX_CHECK_OS_FLAVORS], [$pmix_have_solaris], [Whether or not we have solaris]) + AS_IF([test "$pmix_found_apple" = "yes"], + [pmix_have_apple=1], + [pmix_have_apple=0]) + AC_DEFINE_UNQUOTED([PMIX_HAVE_APPLE], + [$pmix_have_apple], + [Whether or not we have apple]) + # check for sockaddr_in (a good sign we have TCP) AC_CHECK_HEADERS([netdb.h netinet/in.h netinet/tcp.h]) AC_CHECK_TYPES([struct sockaddr_in], diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 index 062440499a6..a46b73ad4d4 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 @@ -2,22 +2,22 @@ dnl dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana dnl University Research and Technology dnl Corporation. All rights reserved. -dnl Copyright (c) 2004-2015 The University of Tennessee and The University +dnl Copyright (c) 2004-2018 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. -dnl Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights +dnl Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights dnl reserved. -dnl Copyright (c) 2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2018-2019 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -25,65 +25,262 @@ dnl dnl $HEADER$ dnl +dnl This is a C test to see if 128-bit __atomic_compare_exchange_n() +dnl actually works (e.g., it compiles and links successfully on +dnl ARM64+clang, but returns incorrect answers as of August 2018). +AC_DEFUN([PMIX_ATOMIC_COMPARE_EXCHANGE_N_TEST_SOURCE],[[ +#include +#include +#include -AC_DEFUN([PMIX_CHECK_SYNC_BUILTIN_CSWAP_INT128], [ +typedef union { + uint64_t fake@<:@2@:>@; + __int128 real; +} pmix128; + +static void test1(void) +{ + // As of Aug 2018, we could not figure out a way to assign 128-bit + // constants -- the compilers would not accept it. So use a fake + // union to assign 2 uin64_t's to make a single __int128. + pmix128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + pmix128 expected = { .fake = { 0x11EEDDCCBBAA0099, 0x88776655443322FF }}; + pmix128 desired = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = __atomic_compare_exchange_n(&ptr.real, &expected.real, + desired.real, true, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + if ( !(r == false && ptr.real == expected.real)) { + exit(1); + } +} + +static void test2(void) +{ + pmix128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + pmix128 expected = ptr; + pmix128 desired = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = __atomic_compare_exchange_n(&ptr.real, &expected.real, + desired.real, true, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + if (!(r == true && ptr.real == desired.real)) { + exit(2); + } +} + +int main(int argc, char** argv) +{ + test1(); + test2(); + return 0; +} +]]) + +dnl ------------------------------------------------------------------ + +dnl This is a C test to see if 128-bit __sync_bool_compare_and_swap() +dnl actually works (e.g., it compiles and links successfully on +dnl ARM64+clang, but returns incorrect answers as of August 2018). +AC_DEFUN([PMIX_SYNC_BOOL_COMPARE_AND_SWAP_TEST_SOURCE],[[ +#include +#include +#include + +typedef union { + uint64_t fake@<:@2@:>@; + __int128 real; +} pmix128; + +static void test1(void) +{ + // As of Aug 2018, we could not figure out a way to assign 128-bit + // constants -- the compilers would not accept it. So use a fake + // union to assign 2 uin64_t's to make a single __int128. + pmix128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + pmix128 oldval = { .fake = { 0x11EEDDCCBBAA0099, 0x88776655443322FF }}; + pmix128 newval = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = __sync_bool_compare_and_swap(&ptr.real, oldval.real, newval.real); + if (!(r == false && ptr.real != newval.real)) { + exit(1); + } +} + +static void test2(void) +{ + pmix128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + pmix128 oldval = ptr; + pmix128 newval = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = __sync_bool_compare_and_swap(&ptr.real, oldval.real, newval.real); + if (!(r == true && ptr.real == newval.real)) { + exit(2); + } +} + +int main(int argc, char** argv) +{ + test1(); + test2(); + return 0; +} +]]) - PMIX_VAR_SCOPE_PUSH([sync_bool_compare_and_swap_128_result CFLAGS_save]) +dnl This is a C test to see if 128-bit __atomic_compare_exchange_n() +dnl actually works (e.g., it compiles and links successfully on +dnl ARM64+clang, but returns incorrect answers as of August 2018). +AC_DEFUN([PMIX_ATOMIC_COMPARE_EXCHANGE_STRONG_TEST_SOURCE],[[ +#include +#include +#include +#include - AC_ARG_ENABLE([cross-cmpset128],[AC_HELP_STRING([--enable-cross-cmpset128], - [enable the use of the __sync builtin atomic compare-and-swap 128 when cross compiling])]) +typedef union { + uint64_t fake@<:@2@:>@; + _Atomic __int128 real; +} pmix128; - sync_bool_compare_and_swap_128_result=0 +static void test1(void) +{ + // As of Aug 2018, we could not figure out a way to assign 128-bit + // constants -- the compilers would not accept it. So use a fake + // union to assign 2 uin64_t's to make a single __int128. + pmix128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + pmix128 expected = { .fake = { 0x11EEDDCCBBAA0099, 0x88776655443322FF }}; + pmix128 desired = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = atomic_compare_exchange_strong (&ptr.real, &expected.real, + desired.real, true, + atomic_relaxed, atomic_relaxed); + if ( !(r == false && ptr.real == expected.real)) { + exit(1); + } +} - if test ! "$enable_cross_cmpset128" = "yes" ; then - AC_MSG_CHECKING([for processor support of __sync builtin atomic compare-and-swap on 128-bit values]) +static void test2(void) +{ + pmix128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }}; + pmix128 expected = ptr; + pmix128 desired = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }}; + bool r = atomic_compare_exchange_strong (&ptr.real, &expected.real, + desired.real, true, + atomic_relaxed, atomic_relaxed); + if (!(r == true && ptr.real == desired.real)) { + exit(2); + } +} - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);])], - [AC_MSG_RESULT([yes]) - sync_bool_compare_and_swap_128_result=1], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([no (cross compiling)])]) +int main(int argc, char** argv) +{ + test1(); + test2(); + return 0; +} +]]) - if test $sync_bool_compare_and_swap_128_result = 0 ; then - CFLAGS_save=$CFLAGS - CFLAGS="$CFLAGS -mcx16" +dnl ------------------------------------------------------------------ - AC_MSG_CHECKING([for __sync builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);])], - [AC_MSG_RESULT([yes]) - sync_bool_compare_and_swap_128_result=1 - CFLAGS_save="$CFLAGS"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([no (cross compiling)])]) +dnl +dnl Check to see if a specific function is linkable. +dnl +dnl Check with: +dnl 1. No compiler/linker flags. +dnl 2. CFLAGS += -mcx16 +dnl 3. LIBS += -latomic +dnl 4. Finally, if it links ok with any of #1, #2, or #3, actually try +dnl to run the test code (if we're not cross-compiling) and verify +dnl that it actually gives us the correct result. +dnl +dnl Note that we unfortunately can't use AC SEARCH_LIBS because its +dnl check incorrectly fails (because these functions are special compiler +dnl intrinsics -- SEARCH_LIBS tries with "check FUNC()", which the +dnl compiler complains doesn't match the internal prototype). So we have +dnl to use our own LINK_IFELSE tests. Indeed, since these functions are +dnl so special, we actually need a valid source code that calls the +dnl functions with correct arguments, etc. It's not enough, for example, +dnl to do the usual "try to set a function pointer to the symbol" trick to +dnl determine if these functions are available, because the compiler may +dnl not implement these as actual symbols. So just try to link a real +dnl test code. +dnl +dnl $1: function name to print +dnl $2: program to test +dnl $3: action if any of 1, 2, or 3 succeeds +dnl #4: action if all of 1, 2, and 3 fail +dnl +AC_DEFUN([PMIX_ASM_CHECK_ATOMIC_FUNC],[ + PMIX_VAR_SCOPE_PUSH([pmix_asm_check_func_happy pmix_asm_check_func_CFLAGS_save pmix_asm_check_func_LIBS_save]) + + pmix_asm_check_func_CFLAGS_save=$CFLAGS + pmix_asm_check_func_LIBS_save=$LIBS + + dnl Check with no compiler/linker flags + AC_MSG_CHECKING([for $1]) + AC_LINK_IFELSE([$2], + [pmix_asm_check_func_happy=1 + AC_MSG_RESULT([yes])], + [pmix_asm_check_func_happy=0 + AC_MSG_RESULT([no])]) + + dnl If that didn't work, try again with CFLAGS+=mcx16 + AS_IF([test $pmix_asm_check_func_happy -eq 0], + [AC_MSG_CHECKING([for $1 with -mcx16]) + CFLAGS="$CFLAGS -mcx16" + AC_LINK_IFELSE([$2], + [pmix_asm_check_func_happy=1 + AC_MSG_RESULT([yes])], + [pmix_asm_check_func_happy=0 + CFLAGS=$pmix_asm_check_func_CFLAGS_save + AC_MSG_RESULT([no])]) + ]) + + dnl If that didn't work, try again with LIBS+=-latomic + AS_IF([test $pmix_asm_check_func_happy -eq 0], + [AC_MSG_CHECKING([for $1 with -latomic]) + LIBS="$LIBS -latomic" + AC_LINK_IFELSE([$2], + [pmix_asm_check_func_happy=1 + AC_MSG_RESULT([yes])], + [pmix_asm_check_func_happy=0 + LIBS=$pmix_asm_check_func_LIBS_save + AC_MSG_RESULT([no])]) + ]) + + dnl If we have it, try it and make sure it gives a correct result. + dnl As of Aug 2018, we know that it links but does *not* work on clang + dnl 6 on ARM64. + AS_IF([test $pmix_asm_check_func_happy -eq 1], + [AC_MSG_CHECKING([if $1() gives correct results]) + AC_RUN_IFELSE([$2], + [AC_MSG_RESULT([yes])], + [pmix_asm_check_func_happy=0 + AC_MSG_RESULT([no])], + [AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])]) + ]) - CFLAGS=$CFLAGS_save - fi - else - AC_MSG_CHECKING([for compiler support of __sync builtin atomic compare-and-swap on 128-bit values]) + dnl If we were unsuccessful, restore CFLAGS/LIBS + AS_IF([test $pmix_asm_check_func_happy -eq 0], + [CFLAGS=$pmix_asm_check_func_CFLAGS_save + LIBS=$pmix_asm_check_func_LIBS_save]) - # Check if the compiler supports the __sync builtin - AC_TRY_LINK([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);], - [AC_MSG_RESULT([yes]) - sync_bool_compare_and_swap_128_result=1], - [AC_MSG_RESULT([no])]) + dnl Run the user actions + AS_IF([test $pmix_asm_check_func_happy -eq 1], [$3], [$4]) - if test $sync_bool_compare_and_swap_128_result = 0 ; then - CFLAGS_save=$CFLAGS - CFLAGS="$CFLAGS -mcx16" + PMIX_VAR_SCOPE_POP +]) + +dnl ------------------------------------------------------------------ - AC_MSG_CHECKING([for __sync builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) - AC_TRY_LINK([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);], - [AC_MSG_RESULT([yes]) - sync_bool_compare_and_swap_128_result=1 - CFLAGS_save="$CFLAGS"], - [AC_MSG_RESULT([no])]) +AC_DEFUN([PMIX_CHECK_SYNC_BUILTIN_CSWAP_INT128], [ + PMIX_VAR_SCOPE_PUSH([sync_bool_compare_and_swap_128_result]) - CFLAGS=$CFLAGS_save - fi - fi + # Do we have __sync_bool_compare_and_swap? + # Use a special macro because we need to check with a few different + # CFLAGS/LIBS. + PMIX_ASM_CHECK_ATOMIC_FUNC([__sync_bool_compare_and_swap], + [AC_LANG_SOURCE(PMIX_SYNC_BOOL_COMPARE_AND_SWAP_TEST_SOURCE)], + [sync_bool_compare_and_swap_128_result=1], + [sync_bool_compare_and_swap_128_result=0]) - AC_DEFINE_UNQUOTED([PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128], [$sync_bool_compare_and_swap_128_result], - [Whether the __sync builtin atomic compare and swap supports 128-bit values]) + AC_DEFINE_UNQUOTED([PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128], + [$sync_bool_compare_and_swap_128_result], + [Whether the __sync builtin atomic compare and swap supports 128-bit values]) PMIX_VAR_SCOPE_POP ]) @@ -112,7 +309,7 @@ __sync_add_and_fetch(&tmp, 1);], pmix_asm_sync_have_64bit=0]) AC_DEFINE_UNQUOTED([PMIX_ASM_SYNC_HAVE_64BIT],[$pmix_asm_sync_have_64bit], - [Whether 64-bit is supported by the __sync builtin atomics]) + [Whether 64-bit is supported by the __sync builtin atomics]) # Check for 128-bit support PMIX_CHECK_SYNC_BUILTIN_CSWAP_INT128 @@ -120,73 +317,110 @@ __sync_add_and_fetch(&tmp, 1);], AC_DEFUN([PMIX_CHECK_GCC_BUILTIN_CSWAP_INT128], [ + PMIX_VAR_SCOPE_PUSH([atomic_compare_exchange_n_128_result atomic_compare_exchange_n_128_CFLAGS_save atomic_compare_exchange_n_128_LIBS_save]) + + atomic_compare_exchange_n_128_CFLAGS_save=$CFLAGS + atomic_compare_exchange_n_128_LIBS_save=$LIBS + + # Do we have __sync_bool_compare_and_swap? + # Use a special macro because we need to check with a few different + # CFLAGS/LIBS. + PMIX_ASM_CHECK_ATOMIC_FUNC([__atomic_compare_exchange_n], + [AC_LANG_SOURCE(PMIX_ATOMIC_COMPARE_EXCHANGE_N_TEST_SOURCE)], + [atomic_compare_exchange_n_128_result=1], + [atomic_compare_exchange_n_128_result=0]) + + # If we have it and it works, check to make sure it is always lock + # free. + AS_IF([test $atomic_compare_exchange_n_128_result -eq 1], + [AC_MSG_CHECKING([if __int128 atomic compare-and-swap is always lock-free]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [if (!__atomic_always_lock_free(16, 0)) { return 1; }])], + [AC_MSG_RESULT([yes])], + [atomic_compare_exchange_n_128_result=0 + # If this test fails, need to reset CFLAGS/LIBS (the + # above tests atomically set CFLAGS/LIBS or not; this + # test is running after the fact, so we have to undo + # the side-effects of setting CFLAGS/LIBS if the above + # tests passed). + CFLAGS=$atomic_compare_exchange_n_128_CFLAGS_save + LIBS=$atomic_compare_exchange_n_128_LIBS_save + AC_MSG_RESULT([no])], + [AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])]) + ]) + + AC_DEFINE_UNQUOTED([PMIX_HAVE_GCC_BUILTIN_CSWAP_INT128], + [$atomic_compare_exchange_n_128_result], + [Whether the __atomic builtin atomic compare swap is both supported and lock-free on 128-bit values]) + + dnl If we could not find decent support for 128-bits __atomic let's + dnl try the GCC _sync + AS_IF([test $atomic_compare_exchange_n_128_result -eq 0], + [PMIX_CHECK_SYNC_BUILTIN_CSWAP_INT128]) - PMIX_VAR_SCOPE_PUSH([atomic_compare_exchange_n_128_result CFLAGS_save]) - - AC_ARG_ENABLE([cross-cmpset128],[AC_HELP_STRING([--enable-cross-cmpset128], - [enable the use of the __sync builtin atomic compare-and-swap 128 when cross compiling])]) - - atomic_compare_exchange_n_128_result=0 - - if test ! "$enable_cross_cmpset128" = "yes" ; then - AC_MSG_CHECKING([for processor support of __atomic builtin atomic compare-and-swap on 128-bit values]) - - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], - [AC_MSG_RESULT([yes]) - atomic_compare_exchange_n_128_result=1], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([no (cross compiling)])]) + PMIX_VAR_SCOPE_POP +]) - if test $atomic_compare_exchange_n_128_result = 0 ; then - CFLAGS_save=$CFLAGS - CFLAGS="$CFLAGS -mcx16" +AC_DEFUN([PMIX_CHECK_GCC_ATOMIC_BUILTINS], [ + AC_MSG_CHECKING([for __atomic builtin atomics]) - AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], - [AC_MSG_RESULT([yes]) - atomic_compare_exchange_n_128_result=1 - CFLAGS_save="$CFLAGS"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([no (cross compiling)])]) + AC_TRY_LINK([ +#include +uint32_t tmp, old = 0; +uint64_t tmp64, old64 = 0;], [ +__atomic_thread_fence(__ATOMIC_SEQ_CST); +__atomic_compare_exchange_n(&tmp, &old, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); +__atomic_add_fetch(&tmp, 1, __ATOMIC_RELAXED); +__atomic_compare_exchange_n(&tmp64, &old64, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); +__atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) - CFLAGS=$CFLAGS_save - fi + # Check for 128-bit support + PMIX_CHECK_GCC_BUILTIN_CSWAP_INT128 +]) - if test $atomic_compare_exchange_n_128_result = 1 ; then - AC_MSG_CHECKING([if __int128 atomic compare-and-swap is always lock-free]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([], [if (!__atomic_always_lock_free(16, 0)) { return 1; }])], +AC_DEFUN([PMIX_CHECK_C11_CSWAP_INT128], [ + PMIX_VAR_SCOPE_PUSH([atomic_compare_exchange_result atomic_compare_exchange_CFLAGS_save atomic_compare_exchange_LIBS_save]) + + atomic_compare_exchange_CFLAGS_save=$CFLAGS + atomic_compare_exchange_LIBS_save=$LIBS + + # Do we have C11 atomics on 128-bit integers? + # Use a special macro because we need to check with a few different + # CFLAGS/LIBS. + PMIX_ASM_CHECK_ATOMIC_FUNC([atomic_compare_exchange_strong_16], + [AC_LANG_SOURCE(PMIX_ATOMIC_COMPARE_EXCHANGE_STRONG_TEST_SOURCE)], + [atomic_compare_exchange_result=1], + [atomic_compare_exchange_result=0]) + + # If we have it and it works, check to make sure it is always lock + # free. + AS_IF([test $atomic_compare_exchange_result -eq 1], + [AC_MSG_CHECKING([if C11 __int128 atomic compare-and-swap is always lock-free]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [_Atomic __int128_t x; if (!atomic_is_lock_free(&x)) { return 1; }])], [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - PMIX_CHECK_SYNC_BUILTIN_CSWAP_INT128 - atomic_compare_exchange_n_128_result=0], - [AC_MSG_RESULT([no (cross compiling)])]) - fi - else - AC_MSG_CHECKING([for compiler support of __atomic builtin atomic compare-and-swap on 128-bit values]) - - # Check if the compiler supports the __atomic builtin - AC_TRY_LINK([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);], - [AC_MSG_RESULT([yes]) - atomic_compare_exchange_n_128_result=1], - [AC_MSG_RESULT([no])]) - - if test $atomic_compare_exchange_n_128_result = 0 ; then - CFLAGS_save=$CFLAGS - CFLAGS="$CFLAGS -mcx16" - - AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) - AC_TRY_LINK([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);], - [AC_MSG_RESULT([yes]) - atomic_compare_exchange_n_128_result=1 - CFLAGS_save="$CFLAGS"], - [AC_MSG_RESULT([no])]) - - CFLAGS=$CFLAGS_save - fi - fi - - AC_DEFINE_UNQUOTED([PMIX_HAVE_GCC_BUILTIN_CSWAP_INT128], [$atomic_compare_exchange_n_128_result], - [Whether the __atomic builtin atomic compare and swap is lock-free on 128-bit values]) + [atomic_compare_exchange_result=0 + # If this test fails, need to reset CFLAGS/LIBS (the + # above tests atomically set CFLAGS/LIBS or not; this + # test is running after the fact, so we have to undo + # the side-effects of setting CFLAGS/LIBS if the above + # tests passed). + CFLAGS=$atomic_compare_exchange_CFLAGS_save + LIBS=$atomic_compare_exchange_LIBS_save + AC_MSG_RESULT([no])], + [AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])]) + ]) + + AC_DEFINE_UNQUOTED([PMIX_HAVE_C11_CSWAP_INT128], + [$atomic_compare_exchange_result], + [Whether C11 atomic compare swap is both supported and lock-free on 128-bit values]) + + dnl If we could not find decent support for 128-bits atomic let's + dnl try the GCC _sync + AS_IF([test $atomic_compare_exchange_result -eq 0], + [PMIX_CHECK_SYNC_BUILTIN_CSWAP_INT128]) PMIX_VAR_SCOPE_POP ]) @@ -533,7 +767,7 @@ dnl PMIX_CHECK_ASM_TYPE dnl dnl Sets PMIX_ASM_TYPE to the prefix for the function type to dnl set a symbol's type as function (needed on ELF for shared -dnl libaries). If no .type directive is needed, sets PMIX_ASM_TYPE +dnl libraries). If no .type directive is needed, sets PMIX_ASM_TYPE dnl to an empty string dnl dnl We look for @ \# % @@ -727,7 +961,7 @@ AC_DEFUN([PMIX_CHECK_SPARCV8PLUS],[ AC_MSG_CHECKING([if have Sparc v8+/v9 support]) sparc_result=0 PMIX_TRY_ASSEMBLE([$pmix_cv_asm_text - casa [%o0] 0x80, %o1, %o2], + casa [%o0] 0x80, %o1, %o2], [sparc_result=1], [sparc_result=0]) if test "$sparc_result" = "1" ; then @@ -746,35 +980,8 @@ dnl dnl PMIX_CHECK_CMPXCHG16B dnl dnl ################################################################# -AC_DEFUN([PMIX_CHECK_CMPXCHG16B],[ - PMIX_VAR_SCOPE_PUSH([cmpxchg16b_result]) - - AC_ARG_ENABLE([cross-cmpxchg16b],[AC_HELP_STRING([--enable-cross-cmpxchg16b], - [enable the use of the cmpxchg16b instruction when cross compiling])]) - - if test ! "$enable_cross_cmpxchg16b" = "yes" ; then - AC_MSG_CHECKING([if processor supports x86_64 16-byte compare-and-exchange]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([[unsigned char tmp[16];]],[[ - __asm__ __volatile__ ("lock cmpxchg16b (%%rsi)" : : "S" (tmp) : "memory", "cc");]])], - [AC_MSG_RESULT([yes]) - cmpxchg16b_result=1], - [AC_MSG_RESULT([no]) - cmpxchg16b_result=0], - [AC_MSG_RESULT([no (cross-compiling)]) - cmpxchg16b_result=0]) - else - AC_MSG_CHECKING([if assembler supports x86_64 16-byte compare-and-exchange]) - - PMIX_TRY_ASSEMBLE([$pmix_cv_asm_text - cmpxchg16b 0], - [AC_MSG_RESULT([yes]) - cmpxchg16b_result=1], - [AC_MSG_RESULT([no]) - cmpxchg16b_result=0]) - fi - if test "$cmpxchg16b_result" = 1; then - AC_MSG_CHECKING([if compiler correctly handles volatile 128bits]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([#include +AC_DEFUN([PMIX_CMPXCHG16B_TEST_SOURCE],[[ +#include #include union pmix_counted_pointer_t { @@ -788,8 +995,10 @@ union pmix_counted_pointer_t { int128_t value; #endif }; -typedef union pmix_counted_pointer_t pmix_counted_pointer_t;], - [volatile pmix_counted_pointer_t a; +typedef union pmix_counted_pointer_t pmix_counted_pointer_t; + +int main(int argc, char* argv) { + volatile pmix_counted_pointer_t a; pmix_counted_pointer_t b; a.data.counter = 0; @@ -814,12 +1023,28 @@ typedef union pmix_counted_pointer_t pmix_counted_pointer_t;], return (a.value != b.value); #else return 0; -#endif])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - cmpxchg16b_result=0], - [AC_MSG_RESULT([untested, assuming ok])]) - fi +#endif +} +]]) + +AC_DEFUN([PMIX_CHECK_CMPXCHG16B],[ + PMIX_VAR_SCOPE_PUSH([cmpxchg16b_result]) + + PMIX_ASM_CHECK_ATOMIC_FUNC([cmpxchg16b], + [AC_LANG_PROGRAM([[unsigned char tmp[16];]], + [[__asm__ __volatile__ ("lock cmpxchg16b (%%rsi)" : : "S" (tmp) : "memory", "cc");]])], + [cmpxchg16b_result=1], + [cmpxchg16b_result=0]) + # If we have it, make sure it works. + AS_IF([test $cmpxchg16b_result -eq 1], + [AC_MSG_CHECKING([if cmpxchg16b_result works]) + AC_RUN_IFELSE([AC_LANG_SOURCE(PMIX_CMPXCHG16B_TEST_SOURCE)], + [AC_MSG_RESULT([yes])], + [cmpxchg16b_result=0 + AC_MSG_RESULT([no])], + [AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])]) + ]) + AC_DEFINE_UNQUOTED([PMIX_HAVE_CMPXCHG16B], [$cmpxchg16b_result], [Whether the processor supports the cmpxchg16b instruction]) PMIX_VAR_SCOPE_POP @@ -832,7 +1057,7 @@ dnl dnl Check if the compiler is capable of doing GCC-style inline dnl assembly. Some compilers emit a warning and ignore the inline dnl assembly (xlc on OS X) and compile without error. Therefore, -dnl the test attempts to run the emited code to check that the +dnl the test attempts to run the emitted code to check that the dnl assembly is actually run. To run this test, one argument to dnl the macro must be an assembly instruction in gcc format to move dnl the value 0 into the register containing the variable ret. @@ -885,7 +1110,7 @@ return ret; if test "$asm_result" = "yes" ; then PMIX_C_GCC_INLINE_ASSEMBLY=1 - pmix_cv_asm_inline_supported="yes" + pmix_cv_asm_inline_supported="yes" else PMIX_C_GCC_INLINE_ASSEMBLY=0 fi @@ -912,18 +1137,30 @@ AC_DEFUN([PMIX_CONFIG_ASM],[ AC_REQUIRE([PMIX_SETUP_CC]) AC_REQUIRE([AM_PROG_AS]) + AC_ARG_ENABLE([c11-atomics],[AC_HELP_STRING([--enable-c11-atomics], + [Enable use of C11 atomics if available (default: enabled)])]) + AC_ARG_ENABLE([builtin-atomics], [AC_HELP_STRING([--enable-builtin-atomics], - [Enable use of __sync builtin atomics (default: enabled)])], - [], [enable_builtin_atomics="yes"]) + [Enable use of __sync builtin atomics (default: disabled)])]) - pmix_cv_asm_builtin="BUILTIN_NO" - AS_IF([test "$pmix_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" != "no"], - [PMIX_CHECK_GCC_ATOMIC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_GCC"], [])]) - AS_IF([test "$pmix_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" != "no"], - [PMIX_CHECK_SYNC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_SYNC"], [])]) - AS_IF([test "$pmix_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes"], - [AC_MSG_WARN([__sync builtin atomics requested but not found - proceeding with inline atomics])]) + PMIX_CHECK_C11_CSWAP_INT128 + + if test "x$enable_c11_atomics" != "xno" && test "$pmix_cv_c11_supported" = "yes" ; then + pmix_cv_asm_builtin="BUILTIN_C11" + PMIX_CHECK_C11_CSWAP_INT128 + elif test "x$enable_c11_atomics" = "xyes"; then + AC_MSG_WARN([C11 atomics were requested but are not supported]) + AC_MSG_ERROR([Cannot continue]) + else + pmix_cv_asm_builtin="BUILTIN_NO" + AS_IF([test "$pmix_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes"], + [PMIX_CHECK_GCC_ATOMIC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_GCC"], [])]) + AS_IF([test "$pmix_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes"], + [PMIX_CHECK_SYNC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_SYNC"], [])]) + AS_IF([test "$pmix_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes"], + [AC_MSG_ERROR([__sync builtin atomics requested but not found.])]) + fi PMIX_CHECK_ASM_PROC PMIX_CHECK_ASM_TEXT @@ -960,9 +1197,9 @@ AC_DEFUN([PMIX_CONFIG_ASM],[ ia64-*) pmix_cv_asm_arch="IA64" PMIX_CHECK_SYNC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_SYNC"], - [AC_MSG_ERROR([No atomic primitives available for $host])]) + [AC_MSG_ERROR([No atomic primitives available for $host])]) ;; - aarch64*) + aarch64*) pmix_cv_asm_arch="ARM64" PMIX_ASM_SUPPORT_64BIT=1 PMIX_ASM_ARM_VERSION=8 @@ -994,7 +1231,7 @@ AC_DEFUN([PMIX_CONFIG_ASM],[ # uses Linux kernel helpers for some atomic operations pmix_cv_asm_arch="ARM" PMIX_CHECK_SYNC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_SYNC"], - [AC_MSG_ERROR([No atomic primitives available for $host])]) + [AC_MSG_ERROR([No atomic primitives available for $host])]) ;; mips-*|mips64*) @@ -1002,7 +1239,7 @@ AC_DEFUN([PMIX_CONFIG_ASM],[ # a MIPS III machine (r4000 and later) pmix_cv_asm_arch="MIPS" PMIX_CHECK_SYNC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_SYNC"], - [AC_MSG_ERROR([No atomic primitives available for $host])]) + [AC_MSG_ERROR([No atomic primitives available for $host])]) ;; powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) @@ -1070,11 +1307,11 @@ AC_MSG_ERROR([Can not continue.]) ;; esac - if test "x$PMIX_ASM_SUPPORT_64BIT" = "x1" && test "$pmix_cv_asm_builtin" = "BUILTIN_SYNC" && - test "$pmix_asm_sync_have_64bit" = "0" ; then - # __sync builtins exist but do not implement 64-bit support. Fall back on inline asm. - pmix_cv_asm_builtin="BUILTIN_NO" - fi + if test "x$PMIX_ASM_SUPPORT_64BIT" = "x1" && test "$pmix_cv_asm_builtin" = "BUILTIN_SYNC" && + test "$pmix_asm_sync_have_64bit" = "0" ; then + # __sync builtins exist but do not implement 64-bit support. Fall back on inline asm. + pmix_cv_asm_builtin="BUILTIN_NO" + fi if test "$pmix_cv_asm_builtin" = "BUILTIN_SYNC" || test "$pmix_cv_asm_builtin" = "BUILTIN_GCC" ; then AC_DEFINE([PMIX_C_GCC_INLINE_ASSEMBLY], [1], @@ -1097,7 +1334,7 @@ AC_MSG_ERROR([Can not continue.]) ;; esac - pmix_cv_asm_inline_supported="no" + pmix_cv_asm_inline_supported="no" # now that we know our architecture, try to inline assemble PMIX_CHECK_INLINE_C_GCC([$PMIX_GCC_INLINE_ASSIGN]) diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_pthreads.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_pthreads.m4 index 2e2f1fd8f97..b23f66ebb01 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_pthreads.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_pthreads.m4 @@ -10,7 +10,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. dnl Copyright (c) 2014-2016 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ @@ -71,104 +71,6 @@ int main(int argc, char* argv[]) # END: PMIX_INTL_PTHREAD_TRY_LINK ])dnl - -AC_DEFUN([PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN], [ -# BEGIN: PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN -# -# Make sure that we can run a small application in Fortran, with -# pthreads living in a C object file - -# Fortran module -cat > conftestf.f < conftest.c < -#include -#include -$pmix_conftest_h - -#ifdef __cplusplus -extern "C" { -#endif -int i = 3; -pthread_t me, newthread; - -void cleanup_routine(void *foo); -void *thread_main(void *foo); -void pthreadtest_f(void); - -void cleanup_routine(void *foo) { i = 4; } -void *thread_main(void *foo) { i = 2; return (void*) &i; } - -void pthreadtest_f(void) -{ - pthread_attr_t attr; - - me = pthread_self(); - pthread_atfork(NULL, NULL, NULL); - pthread_attr_init(&attr); - pthread_cleanup_push(cleanup_routine, 0); - pthread_create(&newthread, &attr, thread_main, 0); - pthread_join(newthread, 0); - pthread_cleanup_pop(0); -} - -void pthreadtest(void) -{ pthreadtest_f(); } - -void pthreadtest_(void) -{ pthreadtest_f(); } - -void pthreadtest__(void) -{ pthreadtest_f(); } - -void PTHREADTEST(void) -{ pthreadtest_f(); } - -#ifdef __cplusplus -} -#endif -EOF - -# Try the compile -PMIX_LOG_COMMAND( - [$CC $CFLAGS -I. -c conftest.c], - PMIX_LOG_COMMAND( - [$FC $FCFLAGS conftestf.f conftest.o -o conftest $LDFLAGS $LIBS], - [HAPPY=1], - [HAPPY=0]), - [HAPPY=0]) - -if test "$HAPPY" = "1"; then - $1 -else - PMIX_LOG_MSG([here is the C program:], 1) - PMIX_LOG_FILE([conftest.c]) - if test -f conftest.h; then - PMIX_LOG_MSG([here is contest.h:], 1) - PMIX_LOG_FILE([conftest.h]) - fi - PMIX_LOG_MSG([here is the fortran program:], 1) - PMIX_LOG_FILE([conftestf.f]) - $2 -fi - -unset HAPPY pmix_conftest_h -rm -rf conftest* -# END: PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN -])dnl - - # ******************************************************************** # # Try to compile thread support without any special flags @@ -194,48 +96,6 @@ fi ])dnl -AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN_CXX], [ -# -# C++ compiler -# -if test "$pmix_pthread_cxx_success" = "0"; then - AC_MSG_CHECKING([if C++ compiler and POSIX threads work as is]) - - AC_LANG_PUSH(C++) - PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, - pmix_pthread_cxx_success=0) - AC_LANG_POP(C++) - if test "$pmix_pthread_cxx_success" = "1"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi -fi -])dnl - - -AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN_FC], [ -# -# Fortran compiler -# -if test "$pmix_pthread_fortran_success" = "0" && \ - test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ - test $ompi_fortran_happy -eq 1; then - AC_MSG_CHECKING([if Fortran compiler and POSIX threads work as is]) - - AC_LANG_PUSH(C) - PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, - pmix_pthread_fortran_success=0) - AC_LANG_POP(C) - if test "$pmix_pthread_fortran_success" = "1"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi -fi -])dnl - - AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN], [ # BEGIN: PMIX_INTL_POSIX_THREADS_PLAIN # @@ -246,19 +106,10 @@ AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN], [ # why take chances? # -# Only run C++ and Fortran if those compilers already configured AC_PROVIDE_IFELSE([AC_PROG_CC], [PMIX_INTL_POSIX_THREADS_PLAIN_C], [pmix_pthread_c_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [PMIX_INTL_POSIX_THREADS_PLAIN_CXX], - [pmix_pthread_cxx_success=1]) - -AC_PROVIDE_IFELSE([AC_PROG_FC], - [PMIX_INTL_POSIX_THREADS_PLAIN_FC], - [pmix_pthread_fortran_success=1]) - # End: PMIX_INTL_POSIX_THREADS_PLAIN ])dnl @@ -294,60 +145,6 @@ fi ]) -AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_CXX], [ -# -# C++ compiler -# -if test "$pmix_pthread_cxx_success" = "0"; then - for pf in $pflags; do - AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $pf]) - CXXFLAGS="$orig_CXXFLAGS $pf" - AC_LANG_PUSH(C++) - PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, - pmix_pthread_cxx_success=0) - AC_LANG_POP(C++) - if test "$pmix_pthread_cxx_success" = "1"; then - PTHREAD_CXXFLAGS="$pf" - AC_MSG_RESULT([yes]) - break - else - PTHREAD_CXXFLAGS= - CXXFLAGS="$orig_CXXFLAGS" - AC_MSG_RESULT([no]) - fi - done -fi -]) - - -AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_FC], [ -# -# Fortran compiler -# -if test "$pmix_pthread_fortran_success" = "0" && \ - test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ - test $ompi_fortran_happy -eq 1; then - for pf in $pflags; do - AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $pf]) - FCFLAGS="$orig_FCFLAGS $pf" - AC_LANG_PUSH(C) - PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, - pmix_pthread_fortran_success=0) - AC_LANG_POP(C) - if test "$pmix_pthread_fortran_success" = "1"; then - PTHREAD_FCFLAGS="$pf" - AC_MSG_RESULT([yes]) - break - else - PTHREAD_FCFLAGS= - FCFLAGS="$orig_FCFLAGS" - AC_MSG_RESULT([no]) - fi - done -fi -]) - - AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS],[ # Begin: PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS # @@ -374,19 +171,10 @@ case "${host_cpu}-${host_os}" in ;; esac -# Only run C++ and Fortran if those compilers already configured AC_PROVIDE_IFELSE([AC_PROG_CC], [PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_C], [pmix_pthread_c_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_CXX], - [pmix_pthread_cxx_success=1]) - -AC_PROVIDE_IFELSE([AC_PROG_FC], - [PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_FC], - [pmix_pthread_fortran_success=1]) - # End: PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS ])dnl @@ -435,121 +223,6 @@ if test "$pmix_pthread_c_success" = "0"; then fi ])dnl - -AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS_CXX],[ -# -# C++ compiler -# -if test "$pmix_pthread_cxx_success" = "0"; then - if test ! "$pmix_pthread_c_success" = "0" && test ! "$PTHREAD_LIBS" = "" ; then - AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $PTHREAD_LIBS]) - case "${host_cpu}-${host-_os}" in - *-aix* | *-freebsd*) - if test "`echo $CXXCPPFLAGS | $GREP 'D_THREAD_SAFE'`" = ""; then - PTHREAD_CXXCPPFLAGS="-D_THREAD_SAFE" - CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" - fi - ;; - *) - if test "`echo $CXXCPPFLAGS | $GREP 'D_REENTRANT'`" = ""; then - PTHREAD_CXXCPPFLAGS="-D_REENTRANT" - CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" - fi - ;; - esac - LIBS="$orig_LIBS $PTHREAD_LIBS" - AC_LANG_PUSH(C++) - PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, - pmix_pthread_cxx_success=0) - AC_LANG_POP(C++) - if test "$pmix_pthread_cxx_success" = "1"; then - AC_MSG_RESULT([yes]) - else - CXXCPPFLAGS="$orig_CXXCPPFLAGS" - LIBS="$orig_LIBS" - AC_MSG_RESULT([no]) - AC_MSG_ERROR([Can not find working threads configuration. aborting]) - fi - else - for pl in $plibs; do - AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $pl]) - case "${host_cpu}-${host-_os}" in - *-aix* | *-freebsd*) - if test "`echo $CXXCPPFLAGS | $GREP 'D_THREAD_SAFE'`" = ""; then - PTHREAD_CXXCPPFLAGS="-D_THREAD_SAFE" - CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" - fi - ;; - *) - if test "`echo $CXXCPPFLAGS | $GREP 'D_REENTRANT'`" = ""; then - PTHREAD_CXXCPPFLAGS="-D_REENTRANT" - CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" - fi - ;; - esac - LIBS="$orig_LIBS $pl" - AC_LANG_PUSH(C++) - PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, - pmix_pthread_cxx_success=0) - AC_LANG_POP(C++) - if test "$pmix_pthread_cxx_success" = "1"; then - PTHREAD_LIBS="$pl" - AC_MSG_RESULT([yes]) - else - PTHREAD_CXXCPPFLAGS= - CXXCPPFLAGS="$orig_CXXCPPFLAGS" - LIBS="$orig_LIBS" - AC_MSG_RESULT([no]) - fi - done - fi -fi -])dnl - - -AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS_FC],[ -# -# Fortran compiler -# -if test "$pmix_pthread_fortran_success" = "0" && \ - test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ - test $ompi_fortran_happy -eq 1; then - if test ! "$pmix_pthread_c_success" = "0" && test ! "$PTHREAD_LIBS" = "" ; then - AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $PTHREAD_LIBS]) - LIBS="$orig_LIBS $PTHREAD_LIBS" - AC_LANG_PUSH(C) - PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, - pmix_pthread_fortran_success=0) - AC_LANG_POP(C) - if test "$pmix_pthread_fortran_success" = "1"; then - AC_MSG_RESULT([yes]) - else - LIBS="$orig_LIBS" - AC_MSG_RESULT([no]) - AC_MSG_ERROR([Can not find working threads configuration. aborting]) - fi - else - for pl in $plibs; do - AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $pl]) - LIBS="$orig_LIBS $pl" - AC_LANG_PUSH(C) - PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, - pmix_pthread_fortran_success=0) - AC_LANG_POP(C) - if test "$pmix_pthread_fortran_success" = "1"; then - PTHREAD_LIBS="$pl" - AC_MSG_RESULT([yes]) - break - else - LIBS="$orig_LIBS" - AC_MSG_RESULT([no]) - fi - done - fi -fi -])dnl - - AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS],[ # Begin: PMIX_INTL_POSIX_THREADS_LIBS # @@ -563,19 +236,10 @@ AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS],[ # libpthread: The usual place (like we can define usual!) plibs="-lpthreads -llthread -lpthread" -# Only run C++ and Fortran if those compilers already configured AC_PROVIDE_IFELSE([AC_PROG_CC], [PMIX_INTL_POSIX_THREADS_LIBS_C], [pmix_pthread_c_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [PMIX_INTL_POSIX_THREADS_LIBS_CXX], - [pmix_pthread_cxx_success=1]) - -AC_PROVIDE_IFELSE([AC_PROG_FC], - [PMIX_INTL_POSIX_THREADS_LIBS_FC], - [pmix_pthread_fortran_success=1]) - # End: PMIX_INTL_POSIX_THREADS_LIBS] )dnl @@ -589,21 +253,14 @@ AC_DEFUN([PMIX_CONFIG_POSIX_THREADS],[ AC_REQUIRE([AC_PROG_GREP]) pmix_pthread_c_success=0 -pmix_pthread_cxx_success=0 orig_CFLAGS="$CFLAGS" -orig_FCFLAGS="$FCFLAGS" -orig_CXXFLAGS="$CXXFLAGS" orig_CPPFLAGS="$CPPFLAGS" -orig_CXXCPPFLAGS="$CXXCPPFLAGS" orig_LDFLAGS="$LDFLAGS" orig_LIBS="$LIBS" PTHREAD_CFLAGS= -PTHREAD_FCFLAGS= -PTHREAD_CXXFLAGS= PTHREAD_CPPFLAGS= -PTHREAD_CXXCPPFLAGS= PTHREAD_LDFLAGS= PTHREAD_LIBS= @@ -648,15 +305,11 @@ AC_DEFINE_UNQUOTED([PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK], [$defval], [If PTHREADS implementation supports PTHREAD_MUTEX_ERRORCHECK]) CFLAGS="$orig_CFLAGS" -FCFLAGS="$orig_FCFLAGS" -CXXFLAGS="$orig_CXXFLAGS" CPPFLAGS="$orig_CPPFLAGS" -CXXCPPFLAGS="$orig_CXXCPPFLAGS" LDFLAGS="$orig_LDFLAGS" LIBS="$orig_LIBS" -if test "$pmix_pthread_c_success" = "1" && \ - test "$pmix_pthread_cxx_success" = "1"; then +if test "$pmix_pthread_c_success" = "1"; then internal_useless=1 $1 else @@ -664,6 +317,6 @@ else $2 fi -unset pmix_pthread_c_success pmix_pthread_fortran_success pmix_pthread_cxx_success +unset pmix_pthread_c_success unset internal_useless ])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_threads.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_threads.m4 index 541e63f726c..050f8735577 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_threads.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_threads.m4 @@ -11,7 +11,7 @@ dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ @@ -56,10 +56,7 @@ if test "$HAVE_POSIX_THREADS" = "0"; then fi THREAD_CFLAGS="$PTHREAD_CFLAGS" -THREAD_FCFLAGS="$PTHREAD_FCFLAGS" -THREAD_CXXFLAGS="$PTHREAD_CXXFLAGS" THREAD_CPPFLAGS="$PTHREAD_CPPFLAGS" -THREAD_CXXCPPFLAGS="$PTHREAD_CXXCPPFLAGS" THREAD_LDFLAGS="$PTHREAD_LDFLAGS" THREAD_LIBS="$PTHREAD_LIBS" diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_mca.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_mca.m4 index 262e6f4a88e..2978c928b9a 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_mca.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_mca.m4 @@ -11,7 +11,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +dnl Copyright (c) 2013-2019 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -428,7 +428,7 @@ AC_DEFUN([MCA_CONFIGURE_FRAMEWORK],[ # Create the final .h file that will be included in the type's # top-level glue. This lists all the static components. We don't # need to do this for "common". - if test "$2" != "common"; then + if test "$1" != "common"; then cat > $outfile <]],[[_Static_assert(sizeof(int64_t) == 8, "WTH");]]) - AS_IF([test $pmix_prog_cc_c11_helper__Thread_local_available -eq 1 && test $pmix_prog_cc_c11_helper_atomic_var_available -eq 1], + PMIX_CC_HELPER([if $CC $1 supports C11 atomic_fetch_xor_explicit], [pmix_prog_cc_c11_helper_atomic_fetch_xor_explicit_available], + [[#include +#include ]],[[_Atomic uint32_t a; uint32_t b; atomic_fetch_xor_explicit(&a, b, memory_order_relaxed);]]) + + + AS_IF([test $pmix_prog_cc_c11_helper__Thread_local_available -eq 1 && test $pmix_prog_cc_c11_helper_atomic_var_available -eq 1 && test $pmix_prog_cc_c11_helper_atomic_fetch_xor_explicit_available -eq 1], [$2], [$3]) @@ -128,7 +133,7 @@ AC_DEFUN([PMIX_SETUP_CC],[ AC_REQUIRE([_PMIX_PROG_CC]) AC_REQUIRE([AM_PROG_CC_C_O]) - PMIX_VAR_SCOPE_PUSH([pmix_prog_cc_c11_helper__Thread_local_available pmix_prog_cc_c11_helper_atomic_var_available pmix_prog_cc_c11_helper__Atomic_available pmix_prog_cc_c11_helper__static_assert_available pmix_prog_cc_c11_helper__Generic_available pmix_prog_cc__thread_available]) + PMIX_VAR_SCOPE_PUSH([pmix_prog_cc_c11_helper__Thread_local_available pmix_prog_cc_c11_helper_atomic_var_available pmix_prog_cc_c11_helper__Atomic_available pmix_prog_cc_c11_helper__static_assert_available pmix_prog_cc_c11_helper__Generic_available pmix_prog_cc__thread_available pmix_prog_cc_c11_helper_atomic_fetch_xor_explicit_available]) PMIX_PROG_CC_C11 diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_cli.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_cli.m4 index 9e66a4133fb..043c0c4d44f 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_cli.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_cli.m4 @@ -2,7 +2,8 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2016 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. -dnl Copyright (c) 2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2017-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2018 Cisco, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -33,6 +34,7 @@ AC_DEFUN([PMIX_CAPTURE_CONFIGURE_CLI],[ eval "$1=\$$1\\ \$quoted_arg" done + AC_DEFINE_UNQUOTED([$1], ["$$1"], [Capture the configure cmd line]) PMIX_VAR_SCOPE_POP AC_SUBST($1) ]) diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_hwloc.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_hwloc.m4 index 4ee046d9abd..a17313259a2 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_hwloc.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_hwloc.m4 @@ -2,7 +2,7 @@ # # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -13,6 +13,39 @@ # MCA_hwloc_CONFIG([action-if-found], [action-if-not-found]) # -------------------------------------------------------------------- AC_DEFUN([PMIX_HWLOC_CONFIG],[ + AC_ARG_WITH([hwloc-header], + [AC_HELP_STRING([--with-hwloc-header=HEADER], + [The value that should be included in C files to include hwloc.h])]) + + AS_IF([test "$pmix_mode" = "embedded"], + [_PMIX_HWLOC_EMBEDDED_MODE], + [_PMIX_HWLOC_EXTERNAL]) + + AC_MSG_CHECKING([hwloc header]) + AC_DEFINE_UNQUOTED([PMIX_HWLOC_HEADER], [$PMIX_HWLOC_HEADER], + [Location of hwloc.h]) + AC_MSG_RESULT([$PMIX_HWLOC_HEADER]) + + AC_DEFINE_UNQUOTED([PMIX_HAVE_HWLOC], [$pmix_hwloc_support], + [Whether or not we have hwloc support]) + + PMIX_SUMMARY_ADD([[External Packages]],[[HWLOC]], [pmix_hwloc], [$pmix_hwloc_support_will_build ($pmix_hwloc_source)]) +]) + +AC_DEFUN([_PMIX_HWLOC_EMBEDDED_MODE],[ + AC_MSG_CHECKING([for hwloc]) + AC_MSG_RESULT([assumed available (embedded mode)]) + + AS_IF([test -z "$with_hwloc_header" || test "$with_hwloc_header" = "yes"], + [PMIX_HWLOC_HEADER=""], + [PMIX_HWLOC_HEADER="$with_hwloc_header"]) + + pmix_hwloc_support=1 + pmix_hwloc_source=embedded + pmix_hwloc_support_will_build=yes + ]) + +AC_DEFUN([_PMIX_HWLOC_EXTERNAL],[ PMIX_VAR_SCOPE_PUSH([pmix_hwloc_dir pmix_hwloc_libdir pmix_hwloc_standard_lib_location pmix_hwloc_standard_header_location]) AC_ARG_WITH([hwloc], @@ -24,6 +57,8 @@ AC_DEFUN([PMIX_HWLOC_CONFIG],[ [Search for hwloc libraries in DIR ])]) pmix_hwloc_support=0 + AS_IF([test "$with_hwloc" = "internal" || test "$with_hwloc" = "external"], + [with_hwloc=]) if test "$with_hwloc" != "no"; then AC_MSG_CHECKING([for hwloc in]) @@ -43,6 +78,17 @@ AC_DEFUN([PMIX_HWLOC_CONFIG],[ AC_MSG_RESULT([$pmix_hwloc_dir and $pmix_hwloc_libdir])], [AC_MSG_RESULT([$with_hwloc_libdir])]) else + pmix_hwloc_dir=/usr/include + if test -d /usr/lib; then + pmix_hwloc_libdir=/usr/lib + elif test -d /usr/lib64; then + pmix_hwloc_libdir=/usr/lib64 + else + AC_MSG_RESULT([not found]) + AC_MSG_WARN([Could not find /usr/lib or /usr/lib64 - you may]) + AC_MSG_WARN([need to specify --with-hwloc_libdir=]) + AC_MSG_ERROR([Can not continue]) + fi AC_MSG_RESULT([(default search paths)]) pmix_hwloc_standard_header_location=yes pmix_hwloc_standard_lib_location=yes @@ -60,18 +106,13 @@ AC_DEFUN([PMIX_HWLOC_CONFIG],[ [$pmix_hwloc_libdir], [pmix_hwloc_support=1], [pmix_hwloc_support=0]) - if test $pmix_hwloc_support = "1"; then - LIBS="$LIBS -lhwloc" - PMIX_EMBEDDED_LIBS="$PMIX_EMBEDDED_LIBS -lhwloc" - if test "$pmix_hwloc_standard_header_location" != "yes"; then - PMIX_EMBEDDED_CPPFLAGS="$PMIX_EMBEDDED_CPPFLAGS $pmix_hwloc_CPPFLAGS" - CPPFLAGS="$CPPFLAGS $pmix_hwloc_CPPFLAGS" - fi - if test "$pmix_hwloc_standard_lib_location" != "yes"; then - PMIX_EMBEDDED_LDFLAGS="$PMIX_EMBEDDED_LDFLAGS $pmix_hwloc_LDFLAGS" - LDFLAGS="$LDFLAGS $pmix_hwloc_LDFLAGS" - fi - fi + + AS_IF([test "$pmix_hwloc_standard_header_location" != "yes"], + [PMIX_FLAGS_APPEND_UNIQ(CPPFLAGS, $pmix_hwloc_CPPFLAGS)]) + + AS_IF([test "$pmix_hwloc_standard_lib_location" != "yes"], + [PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_hwloc_LDFLAGS)]) + PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_hwloc_LIBS) fi if test ! -z "$with_hwloc" && test "$with_hwloc" != "no" && test "$pmix_hwloc_support" != "1"; then @@ -96,11 +137,16 @@ AC_DEFUN([PMIX_HWLOC_CONFIG],[ AC_MSG_CHECKING([will hwloc support be built]) if test "$pmix_hwloc_support" != "1"; then AC_MSG_RESULT([no]) + pmix_hwloc_source=none + pmix_hwloc_support_will_build=no else AC_MSG_RESULT([yes]) + pmix_hwloc_source=$pmix_hwloc_dir + pmix_hwloc_support_will_build=yes fi - AC_DEFINE_UNQUOTED([PMIX_HAVE_HWLOC], [$pmix_hwloc_support], - [Whether or not we have hwloc support]) + # Set output variables + PMIX_HWLOC_HEADER="" + PMIX_VAR_SCOPE_POP ])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libev.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libev.m4 new file mode 100644 index 00000000000..494cc2a2c39 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libev.m4 @@ -0,0 +1,96 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_libev_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([PMIX_LIBEV_CONFIG],[ + PMIX_VAR_SCOPE_PUSH([pmix_libev_dir pmix_libev_libdir pmix_libev_standard_header_location pmix_libev_standard_lib_location]) + + AC_ARG_WITH([libev], + [AC_HELP_STRING([--with-libev=DIR], + [Search for libev headers and libraries in DIR ])]) + PMIX_CHECK_WITHDIR([libev], [$with_libev], [include/event.h]) + + AC_ARG_WITH([libev-libdir], + [AC_HELP_STRING([--with-libev-libdir=DIR], + [Search for libev libraries in DIR ])]) + PMIX_CHECK_WITHDIR([libev-libdir], [$with_livev_libdir], [libev.*]) + + pmix_libev_support=0 + + AS_IF([test -n "$with_libev" && test "$with_libev" != "no"], + [AC_MSG_CHECKING([for libev in]) + pmix_check_libev_save_CPPFLAGS="$CPPFLAGS" + pmix_check_libeve_save_LDFLAGS="$LDFLAGS" + pmix_check_libev_save_LIBS="$LIBS" + if test "$with_libev" != "yes"; then + pmix_libev_dir=$with_libev/include + pmix_libev_standard_header_location=no + pmix_libev_standard_lib_location=no + AS_IF([test -z "$with_libev_libdir" || test "$with_libev_libdir" = "yes"], + [if test -d $with_libev/lib; then + pmix_libev_libdir=$with_libev/lib + elif test -d $with_libev/lib64; then + pmix_libev_libdir=$with_libev/lib64 + else + AC_MSG_RESULT([Could not find $with_libev/lib or $with_libev/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_libev_dir and $pmix_libev_libdir])], + [AC_MSG_RESULT([$with_libev_libdir])]) + else + AC_MSG_RESULT([(default search paths)]) + pmix_libev_standard_header_location=yes + pmix_libev_standard_lib_location=yes + fi + AS_IF([test ! -z "$with_libev_libdir" && test "$with_libev_libdir" != "yes"], + [pmix_libev_libdir="$with_libev_libdir" + pmix_libev_standard_lib_location=no]) + + PMIX_CHECK_PACKAGE([pmix_libev], + [event.h], + [ev], + [event_base_new], + [], + [$pmix_libev_dir], + [$pmix_libev_libdir], + [pmix_libev_support=1], + [pmix_libev_support=0]) + CPPFLAGS="$pmix_check_libev_save_CPPFLAGS" + LDFLAGS="$pmix_check_libev_save_LDFLAGS" + LIBS="$pmix_check_libev_save_LIBS"]) + + AS_IF([test $pmix_libev_support -eq 1], + [LIBS="$LIBS $pmix_libev_LIBS" + + AS_IF([test "$pmix_libev_standard_header_location" != "yes"], + [CPPFLAGS="$CPPFLAGS $pmix_libev_CPPFLAGS"]) + AS_IF([test "$pmix_libev_standard_lib_location" != "yes"], + [LDFLAGS="$LDFLAGS $pmix_libev_LDFLAGS"])]) + + AC_MSG_CHECKING([will libev support be built]) + if test $pmix_libev_support -eq 1; then + AC_MSG_RESULT([yes]) + PMIX_EVENT_HEADER="" + AC_DEFINE_UNQUOTED([PMIX_EVENT_HEADER], [$PMIX_EVENT_HEADER], + [Location of event.h]) + PMIX_SUMMARY_ADD([[External Packages]],[[libev]],[libev],[$pmix_libev_dir]) + else + AC_MSG_RESULT([no]) + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_LIBEV], [$pmix_libev_support], [Whether we are building against libev]) + + PMIX_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 index 739f0b8f22a..28e3a412273 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 @@ -2,9 +2,9 @@ # # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2017 Research Organization for Information Science -# and Technology (RIST). All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -19,22 +19,25 @@ AC_DEFUN([PMIX_LIBEVENT_CONFIG],[ [AC_HELP_STRING([--with-libevent-header=HEADER], [The value that should be included in C files to include event.h])]) - AC_ARG_ENABLE([embedded-libevent], - [AC_HELP_STRING([--enable-embedded-libevent], - [Enable use of locally embedded libevent])]) + pmix_libevent_support=0 - AS_IF([test "$enable_embedded_libevent" = "yes"], + AS_IF([test "$pmix_mode" = "embedded"], [_PMIX_LIBEVENT_EMBEDDED_MODE], - [_PMIX_LIBEVENT_EXTERNAL]) - - AC_MSG_CHECKING([libevent header]) - AC_DEFINE_UNQUOTED([PMIX_EVENT_HEADER], [$PMIX_EVENT_HEADER], - [Location of event.h]) - AC_MSG_RESULT([$PMIX_EVENT_HEADER]) - AC_MSG_CHECKING([libevent2/thread header]) - AC_DEFINE_UNQUOTED([PMIX_EVENT2_THREAD_HEADER], [$PMIX_EVENT2_THREAD_HEADER], - [Location of event2/thread.h]) - AC_MSG_RESULT([$PMIX_EVENT2_THREAD_HEADER]) + [AS_IF([test $pmix_libev_support -eq 0], + [_PMIX_LIBEVENT_EXTERNAL])]) + + if test $pmix_libevent_support -eq 1; then + AC_MSG_CHECKING([libevent header]) + AC_DEFINE_UNQUOTED([PMIX_EVENT_HEADER], [$PMIX_EVENT_HEADER], + [Location of event.h]) + AC_MSG_RESULT([$PMIX_EVENT_HEADER]) + AC_MSG_CHECKING([libevent2/thread header]) + AC_DEFINE_UNQUOTED([PMIX_EVENT2_THREAD_HEADER], [$PMIX_EVENT2_THREAD_HEADER], + [Location of event2/thread.h]) + AC_MSG_RESULT([$PMIX_EVENT2_THREAD_HEADER]) + + PMIX_SUMMARY_ADD([[External Packages]],[[Libevent]], [pmix_libevent], [yes ($pmix_libevent_source)]) + fi ]) AC_DEFUN([_PMIX_LIBEVENT_EMBEDDED_MODE],[ @@ -47,7 +50,9 @@ AC_DEFUN([_PMIX_LIBEVENT_EMBEDDED_MODE],[ [PMIX_EVENT_HEADER="$with_libevent_header" PMIX_EVENT2_THREAD_HEADER="$with_libevent_header"]) - ]) + pmix_libevent_source=embedded + pmix_libevent_support=1 +]) AC_DEFUN([_PMIX_LIBEVENT_EXTERNAL],[ PMIX_VAR_SCOPE_PUSH([pmix_event_dir pmix_event_libdir pmix_event_defaults]) @@ -56,88 +61,109 @@ AC_DEFUN([_PMIX_LIBEVENT_EXTERNAL],[ [AC_HELP_STRING([--with-libevent=DIR], [Search for libevent headers and libraries in DIR ])]) - # Bozo check - AS_IF([test "$with_libevent" = "no"], - [AC_MSG_WARN([It is not possible to configure PMIx --without-libevent]) - AC_MSG_ERROR([Cannot continue])]) - AC_ARG_WITH([libevent-libdir], [AC_HELP_STRING([--with-libevent-libdir=DIR], [Search for libevent libraries in DIR ])]) + pmix_check_libevent_save_CPPFLAGS="$CPPFLAGS" + pmix_check_libevent_save_LDFLAGS="$LDFLAGS" + pmix_check_libevent_save_LIBS="$LIBS" + # get rid of the trailing slash(es) libevent_prefix=$(echo $with_libevent | sed -e 'sX/*$XXg') libeventdir_prefix=$(echo $with_libevent_libdir | sed -e 'sX/*$XXg') - AC_MSG_CHECKING([for libevent in]) - if test ! -z "$libevent_prefix" && test "$libevent_prefix" != "yes"; then - pmix_event_defaults=no - pmix_event_dir=$libevent_prefix - if test -d $libevent_prefix/lib; then - pmix_event_libdir=$libevent_prefix/lib - elif test -d $libevent_prefix/lib64; then - pmix_event_libdir=$libevent_prefix/lib64 - elif test -d $libevent_prefix; then - pmix_event_libdir=$libevent_prefix + if test "$libevent_prefix" != "no"; then + AC_MSG_CHECKING([for libevent in]) + if test ! -z "$libevent_prefix" && test "$libevent_prefix" != "yes"; then + pmix_event_defaults=no + pmix_event_dir=$libevent_prefix/include + if test -d $libevent_prefix/lib; then + pmix_event_libdir=$libevent_prefix/lib + elif test -d $libevent_prefix/lib64; then + pmix_event_libdir=$libevent_prefix/lib64 + elif test -d $libevent_prefix; then + pmix_event_libdir=$libevent_prefix + else + AC_MSG_RESULT([Could not find $libevent_prefix/lib, $libevent_prefix/lib64, or $libevent_prefix]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_event_dir and $pmix_event_libdir]) else - AC_MSG_RESULT([Could not find $libevent_prefix/lib, $libevent_prefix/lib64, or $libevent_prefix]) - AC_MSG_ERROR([Can not continue]) + pmix_event_defaults=yes + pmix_event_dir=/usr/include + if test -d /usr/lib; then + pmix_event_libdir=/usr/lib + AC_MSG_RESULT([(default search paths)]) + elif test -d /usr/lib64; then + pmix_event_libdir=/usr/lib64 + AC_MSG_RESULT([(default search paths)]) + else + AC_MSG_RESULT([default paths not found]) + pmix_libevent_support=0 + fi fi - AC_MSG_RESULT([$pmix_event_dir and $pmix_event_libdir]) - else - pmix_event_defaults=yes - pmix_event_dir=/usr/include - if test -d /usr/lib; then - pmix_event_libdir=/usr/lib - elif test -d /usr/lib64; then - pmix_event_libdir=/usr/lib64 - else - AC_MSG_RESULT([not found]) - AC_MSG_WARN([Could not find /usr/lib or /usr/lib64 - you may]) - AC_MSG_WARN([need to specify --with-libevent-libdir=]) - AC_MSG_ERROR([Can not continue]) + AS_IF([test ! -z "$libeventdir_prefix" && "$libeventdir_prefix" != "yes"], + [pmix_event_libdir="$libeventdir_prefix"]) + + PMIX_CHECK_PACKAGE([pmix_libevent], + [event.h], + [event], + [event_config_new], + [-levent -levent_pthreads], + [$pmix_event_dir], + [$pmix_event_libdir], + [pmix_libevent_support=1], + [pmix_libevent_support=0]) + + AS_IF([test "$pmix_event_defaults" = "no"], + [PMIX_FLAGS_APPEND_UNIQ(CPPFLAGS, $pmix_libevent_CPPFLAGS) + PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_libevent_LDFLAGS)]) + PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_libevent_LIBS) + + if test $pmix_libevent_support -eq 1; then + # Ensure that this libevent has the symbol + # "evthread_set_lock_callbacks", which will only exist if + # libevent was configured with thread support. + AC_CHECK_LIB([event], [evthread_set_lock_callbacks], + [], + [AC_MSG_WARN([External libevent does not have thread support]) + AC_MSG_WARN([PMIx requires libevent to be compiled with]) + AC_MSG_WARN([thread support enabled]) + pmix_libevent_support=0]) + fi + if test $pmix_libevent_support -eq 1; then + AC_CHECK_LIB([event_pthreads], [evthread_use_pthreads], + [], + [AC_MSG_WARN([External libevent does not have thread support]) + AC_MSG_WARN([PMIx requires libevent to be compiled with]) + AC_MSG_WARN([thread support enabled]) + pmix_libevent_support=0]) fi - AC_MSG_RESULT([(default search paths)]) fi - AS_IF([test ! -z "$libeventdir_prefix" && "$libeventdir_prefix" != "yes"], - [pmix_event_libdir="$libeventdir_prefix"]) - - PMIX_CHECK_PACKAGE([pmix_libevent], - [event.h], - [event], - [event_config_new], - [-levent -levent_pthreads], - [$pmix_event_dir], - [$pmix_event_libdir], - [], - [AC_MSG_WARN([LIBEVENT SUPPORT NOT FOUND]) - AC_MSG_ERROR([CANNOT CONTINUE])]) - - AS_IF([test "$pmix_event_defaults" = "no"], - [PMIX_FLAGS_APPEND_UNIQ(CPPFLAGS, $pmix_libevent_CPPFLAGS) - PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_libevent_LIBS) - PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_libevent_LDFLAGS)]) - - - # Ensure that this libevent has the symbol - # "evthread_set_lock_callbacks", which will only exist if - # libevent was configured with thread support. - AC_CHECK_LIB([event], [evthread_set_lock_callbacks], - [], - [AC_MSG_WARN([External libevent does not have thread support]) - AC_MSG_WARN([PMIx requires libevent to be compiled with]) - AC_MSG_WARN([thread support enabled]) - AC_MSG_ERROR([Cannot continue])]) - AC_CHECK_LIB([event_pthreads], [evthread_use_pthreads], - [], - [AC_MSG_WARN([External libevent does not have thread support]) - AC_MSG_WARN([PMIx requires libevent to be compiled with]) - AC_MSG_WARN([thread support enabled]) - AC_MSG_ERROR([Cannot continue])]) - - # Set output variables - PMIX_EVENT_HEADER="" - PMIX_EVENT2_THREAD_HEADER="" + + CPPFLAGS="$pmix_check_libevent_save_CPPFLAGS" + LDFLAGS="$pmix_check_libevent_save_LDFLAGS" + LIBS="$pmix_check_libevent_save_LIBS" + + AC_MSG_CHECKING([will libevent support be built]) + if test $pmix_libevent_support -eq 1; then + AC_MSG_RESULT([yes]) + # Set output variables + PMIX_EVENT_HEADER="" + PMIX_EVENT2_THREAD_HEADER="" + AC_DEFINE_UNQUOTED([PMIX_EVENT_HEADER], [$PMIX_EVENT_HEADER], + [Location of event.h]) + pmix_libevent_source=$pmix_event_dir + AS_IF([test "$pmix_event_defaults" = "no"], + [PMIX_FLAGS_APPEND_UNIQ(CPPFLAGS, $pmix_libevent_CPPFLAGS) + PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_libevent_LDFLAGS)]) + PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_libevent_LIBS) + else + AC_MSG_RESULT([no]) + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_LIBEVENT], [$pmix_libevent_support], [Whether we are building against libevent]) PMIX_VAR_SCOPE_POP ])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_summary.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_summary.m4 new file mode 100644 index 00000000000..9b9bc9024e1 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_summary.m4 @@ -0,0 +1,79 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights +dnl reserved. +dnl Copyright (c) 2016-2018 Cisco Systems, Inc. All rights reserved +dnl Copyright (c) 2016 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2018-2019 Intel, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +AC_DEFUN([PMIX_SUMMARY_ADD],[ + PMIX_VAR_SCOPE_PUSH([pmix_summary_section pmix_summary_line pmix_summary_section_current]) + + dnl need to replace spaces in the section name with somethis else. _ seems like a reasonable + dnl choice. if this changes remember to change PMIX_PRINT_SUMMARY as well. + pmix_summary_section=$(echo $1 | tr ' ' '_') + pmix_summary_line="$2: $4" + pmix_summary_section_current=$(eval echo \$pmix_summary_values_$pmix_summary_section) + + if test -z "$pmix_summary_section_current" ; then + if test -z "$pmix_summary_sections" ; then + pmix_summary_sections=$pmix_summary_section + else + pmix_summary_sections="$pmix_summary_sections $pmix_summary_section" + fi + eval pmix_summary_values_$pmix_summary_section=\"$pmix_summary_line\" + else + eval pmix_summary_values_$pmix_summary_section=\"$pmix_summary_section_current,$pmix_summary_line\" + fi + + PMIX_VAR_SCOPE_POP +]) + +AC_DEFUN([PMIX_SUMMARY_PRINT],[ + PMIX_VAR_SCOPE_PUSH([pmix_summary_section pmix_summary_section_name]) + cat < \$repo_arg, - "source-branch=s" => \$source_branch_arg, - "pages-branch=s" => \$pages_branch_arg, - "logfile-dir=s" => \$logfile_dir_arg, - "help|h" => \$help_arg, - "verbose" => \$verbose_arg, - ); - -if (!$ok || $help_arg) { - print "Invalid command line argument.\n\n" - if (!$ok); - print "Options: - --help | -h Print this message - --repo Git repo to be updated - --source-branch Branch containing source files (default: master) - --pages-branch Branch where man pages are to be output (default: gh-pages) - --logfile-dir Directory where execution log is to be written (default: /tmp) - --verbose Print debug info during execution\n"; - exit($ok ? 0 : 1); -} - -# Sanity checks -die "Must specify a git repo" - if (!defined($repo_arg)); - -##################################################################### - -my $logfile_dir = $logfile_dir_arg; -my $logfile_counter = 1; - -sub doit { - my $allowed_to_fail = shift; - my $cmd = shift; - my $stdout_file = shift; - - # Put a prefix on the logfiles so that we know that they belong to - # this script, and put a counter so that we know the sequence of - # logfiles - $stdout_file = "runall-md2nroff-$logfile_counter-$stdout_file"; - ++$logfile_counter; - - # Redirect stdout if requested - if (defined $stdout_file) { - $stdout_file = "$logfile_dir/$stdout_file.log"; - unlink($stdout_file); - $cmd .= " >$stdout_file"; - } elsif (!$verbose_arg && $cmd !~ />/) { - $cmd .= " >/dev/null"; - } - $cmd .= " 2>&1"; - - my $rc = system($cmd); - if (0 != $rc && !$allowed_to_fail) { - # If we die/fail, ensure to change out of the temp tree so - # that it can be removed upon exit. - chdir("/"); - die "Command $cmd failed: exit status $rc"; - } - - system("cat $stdout_file") - if ($verbose_arg && defined($stdout_file) && -f $stdout_file); -} - -sub verbose { - print @_ - if ($verbose_arg); -} - -##################################################################### - -# Setup a logfile dir just for this run -my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime(time); -$logfile_dir = - sprintf("%s/cron-run-all-md2nroff-logs-%04d-%02d-%02d-%02d%02d", - $logfile_dir_arg, $year + 1900, $mon + 1, $mday, - $hour, $min); -my $rc = system("mkdir $logfile_dir"); -if ($rc != 0 || ! -d $logfile_dir || ! -w $logfile_dir) { - chdir("/"); - die "mkdir of $logfile_dir failed, or can't write to it"; -} - -# First, git clone the source branch of the repo -verbose("*** Cloning repo: $repo_arg / $source_branch_arg...\n"); -my $tmpdir = File::Temp->newdir(); - -chdir($tmpdir); -doit(0, "git clone --single-branch --branch $source_branch_arg $repo_arg source", "git-clone"); - -# Next, git clone the pages branch of repo -if (defined($pages_branch_arg)) { - verbose("*** Cloning repo: $repo_arg / $pages_branch_arg...\n"); - doit(0, "git clone --single-branch --branch $pages_branch_arg $repo_arg pages", "git-clone2"); -} - -##################################################################### - -# Find all the *.\d.md files in the source repo -verbose("*** Finding markdown man pages...\n"); -opendir(DIR, "source/man"); -my @markdown_files = grep { /\.\d\.md$/ && -f "source/man/$_" } readdir(DIR); -closedir(DIR); -verbose("Found: @markdown_files\n"); - -##################################################################### - -# Copy each of the markdown files to the pages branch checkout -if (defined($pages_branch_arg)) { - chdir("pages/master"); - foreach my $file (@markdown_files) { - doit(0, "cp ../../source/man/$file man/$file", "loop-cp"); - - # Is there a new man page? If so, we need to "git add" it. - my $out = `git status --porcelain man/$file`; - doit(0, "git add man/$file", "loop-git-add") - if ($out =~ /^\?\?/); - } - - # Git commit those files in the pages repo and push them to the - # upstream repo so that they go live. If nothing changed, the commit - # and push will be no-ops. - chdir(".."); - doit(1, "git commit --no-verify -a -m \"Updated Markdown man pages from $source_branch_arg\"", - "git-commit-first"); - doit(1, "git push", "git-push-first"); -} - -##################################################################### - -# Now process each of the Markdown files in the source repo and -# generate new nroff man pages. -chdir("$tmpdir/source/man"); -foreach my $file (@markdown_files) { - doit(0, "../contrib/md2nroff.pl --source $file", "loop2-md2nroff"); - - # Did we generate a new man page? If so, we need to "git add" it. - my $man_file = basename($file); - - $man_file =~ m/\.(\d)\.md$/; - my $section = $1; - - $man_file =~ s/\.md$//; - - my $full_filename = "man$section/$man_file"; - - my $out = `git status --porcelain $full_filename`; - doit(0, "git add $full_filename", "loop2-git-add") - if ($out =~ /^\?\?/); -} - -# Similar to above: commit the newly-generated nroff pages and push -# them back upstream. If nothing changed, these will be no-ops. -doit(1, "git commit --no-verify -a -m \"Updated nroff-generated man pages\"", "git-commit-final"); -doit(1, "git push", "git-push-final"); - -# chdir out of the tmpdir so that it can be removed -chdir("/"); - -# If we get here, we finished successfully, so there's no need to keep -# the logfile dir around -system("rm -rf $logfile_dir"); - -exit(0); diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/make_dist_tarball b/opal/mca/pmix/pmix3x/pmix/contrib/make_dist_tarball index c9a6d19c646..f3fc22b5e49 100755 --- a/opal/mca/pmix/pmix3x/pmix/contrib/make_dist_tarball +++ b/opal/mca/pmix/pmix3x/pmix/contrib/make_dist_tarball @@ -10,8 +10,10 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2015-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2019 Amazon.com, Inc. or its affiliates. All Rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -108,14 +110,6 @@ if test "$LIBEVENT" != ""; then config_args="--with-libevent=$LIBEVENT $config_args" fi -# if config_args isn't empty, then add that to the distcheck_flags -# (because we'll assumedly need those to run configure under "make -# distcheck"). -if test "$config_args" != ""; then - echo "*** Adding to distcheck_flags: $config_args" - distcheck_flags="$distcheck_flags AM_DISTCHECK_CONFIGURE_FLAGS=\"$config_args\"" -fi - export DISTCHECK_CONFIGURE_FLAGS=$config_args # diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/md2nroff.pl b/opal/mca/pmix/pmix3x/pmix/contrib/md2nroff.pl deleted file mode 100755 index 9eb33fe94b6..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/contrib/md2nroff.pl +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env perl - -# Script to convert markdown to nroff man pages. -# -# The main conversion work is done via pandoc. But pandoc doesn't do -# everything exactly the way we want it, so use some perl regular -# expressions to fix up what pandoc doesn't get right. -# -# Do a "smart" write of the resulting output man page -- only write to -# the output file if the contents have actually changed compared to -# what was already there. - -use strict; -use warnings; - -use POSIX; -use File::Basename; -use Getopt::Long; -use File::Temp qw/tempfile/; - -my $source_arg; -my $target_arg; -my $help_arg; - -my $ok = Getopt::Long::GetOptions("source=s" => \$source_arg, - "target=s" => \$target_arg, - "help|h" => \$help_arg, - ); - -if ($help_arg) { - print "$0 --source input_MD_file --target output_nroff_file\n"; - exit(0); -} - -# Sanity checks -die "Must specify a source file" - if (!defined($source_arg)); -die "Source file does not exist ($source_arg)" - if (! -r $source_arg); - -my $pandoc = `which pandoc`; -die "Cannot find pandoc executable" - if ($pandoc eq ""); - -##################################################################### - -my $file = $source_arg; -$file =~ m/(\d+).md/; -my $section = $1; -die "Could not figure out the man page section: $source_arg" - if (!defined($section)); -my $shortfile = basename($file); -$shortfile =~ s/\.$section\.md$//; - -# If the target file was not specified, derive it from the source file -my $target; -if (!defined($target_arg)) { - $target_arg = $source_arg; - - $target_arg =~ m/\.(\d)\.md$/; - my $section = $1; - - my $dirname = dirname($target_arg); - my $basename = basename($target_arg); - $basename =~ s/\.md$//; - - $target = "$dirname/man$section/$basename"; -} else { - $target = $target_arg; -} - -print "*** Processing: $file -> $target\n"; - -# Read in the file -my $pandoc_input; -open(IN, $file) - || die "Can't open $file"; -$pandoc_input .= $_ - while (); -close(IN); - -# Remove the Jekyll header -$pandoc_input =~ s/.*---\n.+?---\n//s; - -# Remove the {% include ... %} directives -$pandoc_input =~ s/\n{0,1}\s*{%\s+include .+?\s+%}\s*\n/\n/g; - -# Change {% highlight c %} to ```c -$pandoc_input =~ s/^\s*{%\s+highlight\s+c\s+%}\s*$/\n```c/gmi; - -# Change {% endhighlight %} to ``` -$pandoc_input =~ s/^\s*\{\%\s+endhighlight\s+\%\}\s*$/```\n/gmi; - -# Pandoc does not handle markdown links in output nroff properly, -# so just remove all links. -while ($pandoc_input =~ m/\[(.+?)\]\(.+?\)/) { - my $text = $1; - $pandoc_input =~ s/\[(.+?)\]\(.+?\)/$text/; -} - -# Add the pandoc header -$pandoc_input = "% $shortfile($section) PMIx Programmer's Manual | \@VERSION\@ -% PMIx -% \@DATE\@\n\n$pandoc_input"; - -# Generate the nroff output -my ($fh, $temp_filename) = tempfile(); -print $fh $pandoc_input; -close($fh); - -open(IN, "pandoc -s --from=markdown --to=man $temp_filename|") - || die "Can't run pandoc"; -my $pandoc_nroff; -$pandoc_nroff .= $_ - while (); -close(IN); -unlink($temp_filename); - -# Now that we have the nroff string result, is it different than the -# target file? -my $write_nroff = 1; -if (-r $target) { - # If the target file exists, read it in - open(IN, $target) - || die "Can't open $target"; - my $target_nroff; - $target_nroff .= $_ - while (); - close(IN); - - # Remove the date from the target nroff string so that we can - # compare and ignore if the date has changed. Note that some - # versions of pandoc render dates as xxxx\-xx\-xx, and others - # render it as xxxx-xx-xx. Handle both. - $target_nroff =~ s/\"\d\d\d\d\\\-\d\d\\\-\d\d\"/\"\\\@DATE\\\@\"/; - $target_nroff =~ s/\"\d\d\d\d\-\d\d\-\d\d\"/\"\\\@DATE\\\@\"/; - - $write_nroff = 0 - if ($pandoc_nroff eq $target_nroff); -} - -# Do we need to write a new target nroff? -if ($write_nroff) { - - # What's the date right now? - my $now_string = strftime "%Y\\-%m\\-%d", localtime; - $pandoc_nroff =~ s/\\\@DATE\\\@/$now_string/g; - - # Make sure the target directory exists - my $dirname = dirname($target); - mkdir($dirname) - if (! -d $dirname); - - open(OUT, ">$target") - || die "Can't write to $target"; - print OUT $pandoc_nroff; - close(OUT); - - print "--> Wrote new $target\n"; -} else { - print "--> $target unchanged; not written\n"; -} - -exit(0); diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec b/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec index 5f4613b6c26..ae488781f7e 100644 --- a/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec +++ b/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec @@ -12,7 +12,7 @@ # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -192,7 +192,7 @@ Summary: An extended/exascale implementation of PMI Name: %{?_name:%{_name}}%{!?_name:pmix} -Version: 3.0.0 +Version: 3.1.4 Release: 1%{?dist} License: BSD Group: Development/Libraries @@ -204,6 +204,7 @@ Prefix: %{_prefix} Provides: pmix Provides: pmix = %{version} BuildRoot: /var/tmp/%{name}-%{version}-%{release}-root +BuildRequires: libevent-devel %if %{disable_auto_requires} AutoReq: no %endif @@ -230,6 +231,22 @@ scalability. This RPM contains all the tools necessary to compile and link against PMIx. +# if build_all_in_one_rpm = 0, build split packages +%if !%{build_all_in_one_rpm} +%package libpmi +Summary: PMI-1 and PMI-2 compatibility libraries +Requires: %{name}%{?_isa} = %{version}-%{release} +Conflicts: slurm-libpmi + +%description libpmi +The %{name}-libpmi package contains libpmi and libpmi2 libraries that provide +the respective APIs and a copy of the PMIx library – each API is translated +into its PMIx equivalent. This is especially targeted at apps/libs that are +hardcoded to dlopen “libpmi” or “libpmi2”. +This package conflicts sith slurm-libpmi, which provides its own, incompatible +versions of libpmi.so and libpmi2.so. +%endif + ############################################################################# # # Prepatory Section @@ -346,6 +363,10 @@ export CFLAGS CXXFLAGS FCFLAGS # We don't need that in an RPM. find $RPM_BUILD_ROOT -name config.log -exec rm -f {} \; +# If we build separate RPMs, then move the libpmi.* and libpmi2.* compat libs +# out of the way +find $RPM_BUILD_ROOT -name 'libpmi.' | xargs rm -f + # First, the [optional] modulefile %if %{install_modulefile} @@ -490,6 +511,19 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT %endif %doc README INSTALL LICENSE +# if building separate RPMs, split the compatibility libs +%if !%{build_all_in_one_rpm} +%exclude %{_libdir}/libpmi.* +%exclude %{_libdir}/libpmi2.* +%exclude %{_includedir}/pmi.* +%exclude %{_includedir}/pmi2.* + +%files libpmi +%{_libdir}/libpmi.* +%{_libdir}/libpmi2.* +%{_includedir}/pmi.* +%{_includedir}/pmi2.* +%endif ############################################################################# # @@ -497,6 +531,11 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT # ############################################################################# %changelog +* Tue Apr 30 2019 Kilian Cavalotti +- Enable multiple RPMs build to allow backward compatibility PMI-1 and PMI-2 + libs to be built separate. "rpmbuild --define 'build_all_in_one_rpm 0' ..." + will build separate pmix and pmix-libpmi RPMs. + * Tue Oct 17 2017 Ralph Castain - Add PMIx bin directory diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/whitespace-purge.sh b/opal/mca/pmix/pmix3x/pmix/contrib/whitespace-purge.sh index 9c9d8fe909e..905796bc1ef 100755 --- a/opal/mca/pmix/pmix3x/pmix/contrib/whitespace-purge.sh +++ b/opal/mca/pmix/pmix3x/pmix/contrib/whitespace-purge.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved # Copyright (c) 2015 Cisco Systems, Inc. @@ -18,7 +18,7 @@ for file in $(git ls-files) ; do # skip sym links, pdfs, etc. If any other file types should be # skipped add the check here. type=$(file -b --mime-type -h $file) - if test ${type::4} == "text" ; then + if test ${type::4} = "text" ; then # Eliminate whitespace at the end of lines perl -pi -e 's/\s*$/\n/' $file fi diff --git a/opal/mca/pmix/pmix3x/pmix/examples/Makefile.am b/opal/mca/pmix/pmix3x/pmix/examples/Makefile.am index 72ed7216131..b928ab2d589 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/examples/Makefile.am @@ -11,13 +11,14 @@ # All rights reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # +headers = examples.h AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix @@ -28,56 +29,56 @@ if !WANT_HIDDEN noinst_PROGRAMS += server endif -client_SOURCES = client.c +client_SOURCES = client.c examples.h client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) client_LDADD = $(top_builddir)/src/libpmix.la -client2_SOURCES = client2.c +client2_SOURCES = client2.c examples.h client2_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) client2_LDADD = $(top_builddir)/src/libpmix.la -debugger_SOURCES = debugger.c +debugger_SOURCES = debugger.c examples.h debugger_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) debugger_LDADD = $(top_builddir)/src/libpmix.la -debuggerd_SOURCES = debuggerd.c +debuggerd_SOURCES = debuggerd.c examples.h debuggerd_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) debuggerd_LDADD = $(top_builddir)/src/libpmix.la -alloc_SOURCES = alloc.c +alloc_SOURCES = alloc.c examples.h alloc_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) alloc_LDADD = $(top_builddir)/src/libpmix.la -jctrl_SOURCES = jctrl.c +jctrl_SOURCES = jctrl.c examples.h jctrl_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) jctrl_LDADD = $(top_builddir)/src/libpmix.la -dmodex_SOURCES = dmodex.c +dmodex_SOURCES = dmodex.c examples.h dmodex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) dmodex_LDADD = $(top_builddir)/src/libpmix.la -dynamic_SOURCES = dynamic.c +dynamic_SOURCES = dynamic.c examples.h dynamic_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) dynamic_LDADD = $(top_builddir)/src/libpmix.la -fault_SOURCES = fault.c +fault_SOURCES = fault.c examples.h fault_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) fault_LDADD = $(top_builddir)/src/libpmix.la -pub_SOURCES = pub.c +pub_SOURCES = pub.c examples.h pub_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pub_LDADD = $(top_builddir)/src/libpmix.la -pubi_SOURCES = pubi.c +pubi_SOURCES = pubi.c examples.h pubi_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pubi_LDADD = $(top_builddir)/src/libpmix.la -tool_SOURCES = tool.c +tool_SOURCES = tool.c examples.h tool_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) tool_LDADD = $(top_builddir)/src/libpmix.la if !WANT_HIDDEN -server_SOURCES = server.c +server_SOURCES = server.c examples.h server_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) server_LDADD = $(top_builddir)/src/libpmix.la endif diff --git a/opal/mca/pmix/pmix3x/pmix/examples/alloc.c b/opal/mca/pmix/pmix3x/pmix/examples/alloc.c index f0cdf43a0ea..6984c1e1e96 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/alloc.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/alloc.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -30,16 +30,7 @@ #include #include - -/* define a structure for collecting returned - * info from an allocation request */ -typedef struct { - volatile bool active; - pmix_info_t *info; - size_t ninfo; -} mydata_t; - -static volatile bool waiting_for_allocation = true; +#include "examples.h" /* this is a callback function for the PMIx_Query and * PMIx_Allocate APIs. The query will callback with a status indicating @@ -59,7 +50,7 @@ static void infocbfunc(pmix_status_t status, pmix_release_cbfunc_t release_fn, void *release_cbdata) { - mydata_t *mq = (mydata_t*)cbdata; + myquery_data_t *mq = (myquery_data_t*)cbdata; size_t n; fprintf(stderr, "Allocation request returned %s", PMIx_Error_string(status)); @@ -75,6 +66,9 @@ static void infocbfunc(pmix_status_t status, PMIX_INFO_XFER(&mq->info[n], &info[n]); } } + /* the status returned here indicates whether the requested + * information was found or not - preserve it */ + mq->lock.status = status; /* let the library release the data and cleanup from * the operation */ @@ -83,7 +77,7 @@ static void infocbfunc(pmix_status_t status, } /* release the block */ - mq->active = false; + DEBUG_WAKEUP_THREAD(&mq->lock); } /* this is an event notification function that we explicitly request @@ -100,12 +94,37 @@ static void release_fn(size_t evhdlr_registration_id, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { + myrel_t *lock; + size_t n; + + /* find the return object */ + lock = NULL; + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + lock = (myrel_t*)info[n].value.data.ptr; + break; + } + } + /* if the object wasn't returned, then that is an error */ + if (NULL == lock) { + fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); + /* let the event handler progress */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + return; + } + /* tell the event handler state machine that we are the last step */ if (NULL != cbfunc) { cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); } - /* flag that the allocation is complete so we can exit */ - waiting_for_allocation = false; + /* the status will be PMIX_ERR_ALLOC_COMPLETE since that is the code + * we registered to receive. The result of the allocation request is + * in the info array - for now, just assume success */ + lock->lock.status = PMIX_SUCCESS; + /* release the lock */ + DEBUG_WAKEUP_THREAD(&lock->lock); } /* event handler registration is done asynchronously because it @@ -114,18 +133,20 @@ static void release_fn(size_t evhdlr_registration_id, * the status of the request (success or an error), plus a numerical index * to the registered event. The index is used later on to deregister * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ + * PMIx server will do so when it sees us exit */ static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) { - volatile int *active = (volatile int*)cbdata; + mylock_t *lock = (mylock_t*)cbdata; if (PMIX_SUCCESS != status) { fprintf(stderr, "EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", status, (unsigned long)evhandler_ref); } - *active = status; + lock->status = status; + lock->evhandler_ref = evhandler_ref; + DEBUG_WAKEUP_THREAD(lock); } int main(int argc, char **argv) @@ -138,11 +159,12 @@ int main(int argc, char **argv) uint32_t nprocs; pmix_info_t *info; uint64_t nnodes = 12; - mydata_t mydata; + myquery_data_t mydata; pmix_query_t *query; char *myallocation = "MYALLOCATION"; - volatile int active; - pmix_status_t code = PMIX_NOTIFY_ALLOC_COMPLETE; + mylock_t mylock; + pmix_status_t code; + myrel_t myrel; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { @@ -163,63 +185,64 @@ int main(int argc, char **argv) PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); - /* initialize the return info struct */ - mydata.info = NULL; - mydata.ninfo = 0; - if (0 == myproc.rank) { /* try to get an allocation */ - mydata.active = true; + DEBUG_CONSTRUCT_MYQUERY(&mydata); PMIX_INFO_CREATE(info, 2); PMIX_INFO_LOAD(&info[0], PMIX_ALLOC_NUM_NODES, &nnodes, PMIX_UINT64); PMIX_INFO_LOAD(&info[0], PMIX_ALLOC_ID, myallocation, PMIX_STRING); - if (PMIX_SUCCESS != (rc = PMIx_Allocation_request_nb(PMIX_ALLOC_NEW, info, 2, infocbfunc, NULL))) { + if (PMIX_SUCCESS != (rc = PMIx_Allocation_request_nb(PMIX_ALLOC_NEW, info, 2, infocbfunc, &mydata))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Allocation_request_nb failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } - while (mydata.active) { - usleep(10); - } + DEBUG_WAIT_THREAD(&mydata.lock); PMIX_INFO_FREE(info, 2); - if (NULL != mydata.info) { - PMIX_INFO_FREE(mydata.info, mydata.ninfo); - } + fprintf(stderr, "Client ns %s rank %d: Allocation returned status: %s\n", + myproc.nspace, myproc.rank, PMIx_Error_string(mydata.lock.status)); + DEBUG_DESTRUCT_MYQUERY(&mydata); + } else if (1 == myproc.rank) { - /* register a handler specifically for when the allocation - * operation completes */ - PMIX_INFO_CREATE(info, 1); + /* demonstrate a notification based approach - register a handler + * specifically for when the allocation operation completes */ + DEBUG_CONSTRUCT_MYREL(&myrel); + PMIX_INFO_CREATE(info, 2); PMIX_INFO_LOAD(&info[0], PMIX_ALLOC_ID, myallocation, PMIX_STRING); - active = -1; - PMIx_Register_event_handler(&code, 1, info, 1, - release_fn, evhandler_reg_callbk, (void*)&active); - while (-1 == active) { - usleep(10); - } - if (0 != active) { - exit(active); - } - PMIX_INFO_FREE(info, 1); + PMIX_INFO_LOAD(&info[1], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); + DEBUG_CONSTRUCT_LOCK(&mylock); + code = PMIX_NOTIFY_ALLOC_COMPLETE; + PMIx_Register_event_handler(&code, 1, info, 2, + release_fn, evhandler_reg_callbk, (void*)&mylock); + DEBUG_WAIT_THREAD(&mylock); + PMIX_INFO_FREE(info, 2); + rc = mylock.status; + DEBUG_DESTRUCT_LOCK(&mylock); + /* now wait to hear that the request is complete */ - while (waiting_for_allocation) { - usleep(10); - } + DEBUG_WAIT_THREAD(&myrel.lock); + fprintf(stderr, "[%s:%d] Allocation returned status: %s\n", + myproc.nspace, myproc.rank, PMIx_Error_string(myrel.lock.status)); + DEBUG_DESTRUCT_MYREL(&myrel); + } else { - /* I am not the root rank, so let me wait a little while and then - * query the status of the allocation request */ + /* demonstrate a query-based approach - wait a little while and ask to + * see if it was done */ usleep(10); + DEBUG_CONSTRUCT_MYQUERY(&mydata); + PMIX_QUERY_CREATE(query, 1); PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_ALLOC_STATUS); PMIX_INFO_CREATE(query[0].qualifiers, 1); PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_ALLOC_ID, myallocation, PMIX_STRING); - mydata.active = true; + if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, 1, infocbfunc, (void*)&mydata))) { fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); goto done; } - while (mydata.active) { - usleep(10); - } + DEBUG_WAIT_THREAD(&mydata.lock); PMIX_QUERY_FREE(query, 1); + fprintf(stderr, "[%s:%d] Allocation returned status: %s\n", + myproc.nspace, myproc.rank, PMIx_Error_string(mydata.lock.status)); + DEBUG_DESTRUCT_MYQUERY(&mydata); } done: diff --git a/opal/mca/pmix/pmix3x/pmix/examples/client.c b/opal/mca/pmix/pmix3x/pmix/examples/client.c index 7e38608950c..49e471fb258 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/client.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/client.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -31,8 +31,8 @@ #include #include +#include "examples.h" -static volatile bool waiting_for_debugger = true; static pmix_proc_t myproc; /* this is the event notification function we pass down below @@ -66,10 +66,36 @@ static void release_fn(size_t evhdlr_registration_id, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { + myrel_t *lock; + size_t n; + + /* find the return object */ + lock = NULL; + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + lock = (myrel_t*)info[n].value.data.ptr; + break; + } + } + /* if the object wasn't returned, then that is an error */ + if (NULL == lock) { + fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); + /* let the event handler progress */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + return; + } + + /* tell the event handler state machine that we are the last step */ if (NULL != cbfunc) { cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); } - waiting_for_debugger = false; + /* the status will be PMIX_ERR_DEBUGGER_RELEASE since that is the code + * we registered to receive, so just return success */ + lock->lock.status = PMIX_SUCCESS; + /* release the lock */ + DEBUG_WAKEUP_THREAD(&lock->lock); } /* event handler registration is done asynchronously because it @@ -83,27 +109,34 @@ static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) { - volatile int *active = (volatile int*)cbdata; + mylock_t *lock = (mylock_t*)cbdata; if (PMIX_SUCCESS != status) { fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref); } - *active = status; + lock->status = status; + lock->evhandler_ref = evhandler_ref; + DEBUG_WAKEUP_THREAD(lock); } int main(int argc, char **argv) { - int rc; + pmix_status_t rc; pmix_value_t value; pmix_value_t *val = &value; - char *tmp, *ptr, *p; + char *tmp; pmix_proc_t proc; - uint32_t nprocs, m, n, local_cnt, *localpeers; + uint32_t nprocs, n; pmix_info_t *info; - bool flag, local; - volatile int active; + bool flag; + mylock_t mylock; + myrel_t myrel; pmix_status_t dbg = PMIX_ERR_DEBUGGER_RELEASE; + pid_t pid; + + pid = getpid(); + fprintf(stderr, "Client %lu: Running\n", (unsigned long)pid); /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. This includes any @@ -114,20 +147,21 @@ int main(int argc, char **argv) fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } - fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); + fprintf(stderr, "Client ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, (unsigned long)pid); /* register our default event handler - again, this isn't strictly * required, but is generally good practice */ - active = -1; + DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(NULL, 0, NULL, 0, - notification_fn, evhandler_reg_callbk, (void*)&active); - while (-1 == active) { - sleep(1); - } - if (0 != active) { + notification_fn, evhandler_reg_callbk, (void*)&mylock); + DEBUG_WAIT_THREAD(&mylock); + rc = mylock.status; + DEBUG_DESTRUCT_LOCK(&mylock); + + if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); - exit(active); + goto done; } /* job-related info is found in our nspace, assigned to the @@ -145,21 +179,24 @@ int main(int argc, char **argv) * debugger */ if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, PMIX_DEBUG_WAIT_FOR_NOTIFY, NULL, 0, &val))) { /* register for debugger release */ - active = -1; - PMIx_Register_event_handler(&dbg, 1, NULL, 0, - release_fn, evhandler_reg_callbk, (void*)&active); + DEBUG_CONSTRUCT_LOCK(&mylock); + PMIX_INFO_CREATE(info, 1); + DEBUG_CONSTRUCT_MYREL(&myrel); + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); + PMIx_Register_event_handler(&dbg, 1, info, 1, + release_fn, evhandler_reg_callbk, (void*)&mylock); /* wait for registration to complete */ - while (-1 == active) { - sleep(1); - } - if (0 != active) { + DEBUG_WAIT_THREAD(&mylock); + rc = mylock.status; + DEBUG_DESTRUCT_LOCK(&mylock); + PMIX_INFO_FREE(info, 1); + if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s:%d] Debug handler registration failed\n", myproc.nspace, myproc.rank); - exit(active); + goto done; } /* wait for debugger release */ - while (waiting_for_debugger) { - sleep(1); - } + DEBUG_WAIT_THREAD(&myrel.lock); + DEBUG_DESTRUCT_MYREL(&myrel); } /* get our universe size */ @@ -196,7 +233,7 @@ int main(int argc, char **argv) value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Put local failed: %d\n", myproc.nspace, myproc.rank, rc); + fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); @@ -207,7 +244,7 @@ int main(int argc, char **argv) value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Put remote failed: %d\n", myproc.nspace, myproc.rank, rc); + fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } free(tmp); @@ -217,6 +254,9 @@ int main(int argc, char **argv) fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } + if (0 == myproc.rank) { + sleep(2); + } /* call fence to synchronize with our peers - instruct * the fence operation to collect and return all "put" @@ -230,96 +270,53 @@ int main(int argc, char **argv) } PMIX_INFO_FREE(info, 1); - /* get the number of local peers */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get PMIX_LOCAL_SIZE failed: %d", myproc.nspace, myproc.rank, rc); - goto done; - } - local_cnt = val->data.uint32; - PMIX_VALUE_RELEASE(val); - - /* create an array for the peers */ - localpeers = (uint32_t*)malloc(local_cnt * sizeof(int)); - - /* get the list of local peers */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get PMIX_LOCAL_PEERS failed: %d", myproc.nspace, myproc.rank, rc); - goto done; - } - ptr = strdup(val->data.string); - PMIX_VALUE_RELEASE(val); - - /* populate the peers array */ - p = strtok(ptr, ","); - localpeers[0] = strtoul(p, NULL, 10); - for (n=1; n < local_cnt; n++) { - p = strtok(NULL, ","); - localpeers[n] = strtoul(p, NULL, 10); - } - free(ptr); - /* check the returned data */ for (n=0; n < nprocs; n++) { - if (n == myproc.rank) { - continue; + if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { + exit(1); } - proc.rank = n; - local = false; - for (m=0; m < local_cnt; m++) { - if (localpeers[m] == proc.rank) { - local = true; - break; - } + if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); + goto done; + } + if (PMIX_UINT64 != val->type) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + goto done; } - if (local) { - if (0 > asprintf(&tmp, "%s-%d-local", proc.nspace, proc.rank)) { - exit(1); - } - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); - goto done; - } - if (PMIX_UINT64 != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - if (1234 != val->data.uint64) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", myproc.nspace, myproc.rank, tmp, (int)val->data.uint64); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); + if (1234 != val->data.uint64) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", myproc.nspace, myproc.rank, tmp, (int)val->data.uint64); PMIX_VALUE_RELEASE(val); free(tmp); - } else { - if (0 > asprintf(&tmp, "%s-%d-remote", proc.nspace, proc.rank)) { - exit(1); - } - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); - goto done; - } - if (PMIX_STRING != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - if (0 != strcmp(val->data.string, "1234")) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", myproc.nspace, myproc.rank, tmp, val->data.string); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); + goto done; + } + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); + PMIX_VALUE_RELEASE(val); + free(tmp); + if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { + exit(1); + } + if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); + goto done; + } + if (PMIX_STRING != val->type) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + goto done; + } + if (0 != strcmp(val->data.string, "1234")) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", myproc.nspace, myproc.rank, tmp, val->data.string); PMIX_VALUE_RELEASE(val); free(tmp); + goto done; } + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); + PMIX_VALUE_RELEASE(val); + free(tmp); } - free(localpeers); done: /* finalize us */ diff --git a/opal/mca/pmix/pmix3x/pmix/examples/client2.c b/opal/mca/pmix/pmix3x/pmix/examples/client2.c index 9b7f7e6c0fe..4021c15ec7c 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/client2.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/client2.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include #include +#include "examples.h" static pmix_proc_t myproc; @@ -62,25 +63,27 @@ static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) { - volatile int *active = (volatile int*)cbdata; + mylock_t *lock = (mylock_t*)cbdata; if (PMIX_SUCCESS != status) { fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref); } - *active = status; + lock->status = status; + lock->evhandler_ref = evhandler_ref; + DEBUG_WAKEUP_THREAD(lock); } int main(int argc, char **argv) { - int rc; + pmix_status_t rc; pmix_value_t value; pmix_value_t *val, *vptr; pmix_proc_t proc; uint32_t nprocs, n, k; pmix_info_t *info; bool flag; - volatile int active; + mylock_t mylock; pmix_data_array_t da, *dptr; /* init us - note that the call to "init" includes the return of @@ -97,15 +100,16 @@ int main(int argc, char **argv) /* register our default event handler - again, this isn't strictly * required, but is generally good practice */ - active = -1; + DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(NULL, 0, NULL, 0, - notification_fn, evhandler_reg_callbk, (void*)&active); - while (-1 == active) { - sleep(1); - } - if (0 != active) { + notification_fn, evhandler_reg_callbk, (void*)&mylock); + DEBUG_WAIT_THREAD(&mylock); + rc = mylock.status; + DEBUG_DESTRUCT_LOCK(&mylock); + + if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); - exit(active); + goto done; } /* job-related info is found in our nspace, assigned to the diff --git a/opal/mca/pmix/pmix3x/pmix/examples/dmodex.c b/opal/mca/pmix/pmix3x/pmix/examples/dmodex.c index c093d5bc829..76a1ac8ca0c 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/dmodex.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/dmodex.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -33,24 +33,19 @@ #include #include +#include "examples.h" static uint32_t nprocs; static pmix_proc_t myproc; static uint32_t getcount = 0; -#define WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - } while (0) - static void opcbfunc(pmix_status_t status, void *cbdata) { - bool *active = (bool*)cbdata; + mylock_t *lock = (mylock_t*)cbdata; fprintf(stderr, "%s:%d completed fence_nb\n", myproc.nspace, myproc.rank); - *active = false; + lock->status = status; + DEBUG_WAKEUP_THREAD(lock); } static void valcbfunc(pmix_status_t status, @@ -98,7 +93,7 @@ int main(int argc, char **argv) char *tmp; pmix_proc_t proc; uint32_t n, num_gets; - bool active; + mylock_t mylock; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { @@ -170,9 +165,10 @@ int main(int argc, char **argv) PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - active = true; - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(&proc, 1, NULL, 0, opcbfunc, &active))) { + DEBUG_CONSTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(&proc, 1, NULL, 0, opcbfunc, &mylock))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); + DEBUG_DESTRUCT_LOCK(&mylock); goto done; } @@ -203,7 +199,7 @@ int main(int argc, char **argv) } /* wait for the first fence to finish */ - WAIT_FOR_COMPLETION(active); + DEBUG_WAIT_THREAD(&mylock); /* wait for all my "get" calls to complete */ while (getcount < num_gets) { diff --git a/opal/mca/pmix/pmix3x/pmix/examples/dynamic.c b/opal/mca/pmix/pmix3x/pmix/examples/dynamic.c index 4ffe501f02f..6b929420b7e 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/dynamic.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/dynamic.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -35,7 +35,7 @@ #include #include - +#include "examples.h" static pmix_proc_t myproc; @@ -49,9 +49,7 @@ int main(int argc, char **argv) char nsp2[PMIX_MAX_NSLEN+1]; pmix_app_t *app; char hostname[1024], dir[1024]; - pmix_proc_t *peers; - size_t npeers, ntmp=0; - char *nodelist; + size_t ntmp=0; if (0 > gethostname(hostname, sizeof(hostname))) { exit(1); @@ -71,14 +69,14 @@ int main(int argc, char **argv) (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); + /* get our job size */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); + fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs); /* call fence to sync */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); @@ -103,13 +101,6 @@ int main(int argc, char **argv) app->env = (char**)malloc(2 * sizeof(char*)); app->env[0] = strdup("PMIX_ENV_VALUE=3"); app->env[1] = NULL; - PMIX_INFO_CREATE(app->info, 2); - (void)strncpy(app->info[0].key, "DARTH", PMIX_MAX_KEYLEN); - app->info[0].value.type = PMIX_INT8; - app->info[0].value.data.int8 = 12; - (void)strncpy(app->info[1].key, "VADER", PMIX_MAX_KEYLEN); - app->info[1].value.type = PMIX_DOUBLE; - app->info[1].value.data.dval = 12.34; fprintf(stderr, "Client ns %s rank %d: calling PMIx_Spawn\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { @@ -122,65 +113,28 @@ int main(int argc, char **argv) val = NULL; (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) || + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val)) || NULL == val) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); + fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } ntmp = val->data.uint32; PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe %s size %d\n", myproc.nspace, myproc.rank, nsp2, (int)ntmp); - } + fprintf(stderr, "Client %s:%d job %s size %d\n", myproc.nspace, myproc.rank, nsp2, (int)ntmp); - /* just cycle the connect/disconnect functions */ - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Connect(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Connect failed: %d\n", myproc.nspace, myproc.rank, rc); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Connect succeeded\n", - myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Disconnect(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Disonnect failed: %d\n", myproc.nspace, myproc.rank, rc); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Disconnect succeeded\n", myproc.nspace, myproc.rank); - - /* finally, test the resolve functions */ - if (0 == myproc.rank) { - if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, NULL, &peers, &npeers))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d\n", myproc.nspace, myproc.rank, nsp2, rc); - goto done; - } - if ((nprocs+ntmp) != npeers) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d\n", myproc.nspace, myproc.rank, (int)(nprocs+ntmp), (int)npeers); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers\n", myproc.nspace, myproc.rank, (int)npeers); - if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(nsp2, &nodelist))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes failed for nspace %s: %d\n", myproc.nspace, myproc.rank, nsp2, rc); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes %s", myproc.nspace, myproc.rank, nodelist); - } else { - if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, myproc.nspace, &peers, &npeers))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d\n", myproc.nspace, myproc.rank, myproc.nspace, rc); - goto done; - } - if (nprocs != npeers) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d\n", myproc.nspace, myproc.rank, nprocs, (int)npeers); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers\n", myproc.nspace, myproc.rank, (int)npeers); - if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(myproc.nspace, &nodelist))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes failed: %d\n", myproc.nspace, myproc.rank, rc); + /* get a proc-specific value */ + val = NULL; + (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); + proc.rank = 1; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_RANK, NULL, 0, &val)) || + NULL == val) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get local rank failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes %s\n", myproc.nspace, myproc.rank, nodelist); + ntmp = (int)val->data.uint16; + PMIX_VALUE_RELEASE(val); + fprintf(stderr, "Client %s:%d job %s local rank %d\n", myproc.nspace, myproc.rank, nsp2, (int)ntmp); } - PMIX_PROC_FREE(peers, npeers); - free(nodelist); done: /* call fence to sync */ diff --git a/opal/mca/pmix/pmix3x/pmix/examples/examples.h b/opal/mca/pmix/pmix3x/pmix/examples/examples.h new file mode 100644 index 00000000000..d2e5ab7696d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/examples/examples.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include + +typedef struct { + pthread_mutex_t mutex; + pthread_cond_t cond; + volatile bool active; + pmix_status_t status; + int count; + size_t evhandler_ref; +} mylock_t; + +#define DEBUG_CONSTRUCT_LOCK(l) \ + do { \ + pthread_mutex_init(&(l)->mutex, NULL); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + (l)->status = PMIX_SUCCESS; \ + (l)->count = 0; \ + (l)->evhandler_ref = 0; \ + } while(0) + +#define DEBUG_DESTRUCT_LOCK(l) \ + do { \ + pthread_mutex_destroy(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + +#define DEBUG_WAIT_THREAD(lck) \ + do { \ + pthread_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + pthread_cond_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + pthread_mutex_unlock(&(lck)->mutex); \ + } while(0) + +#define DEBUG_WAKEUP_THREAD(lck) \ + do { \ + pthread_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + pthread_cond_broadcast(&(lck)->cond); \ + pthread_mutex_unlock(&(lck)->mutex); \ + } while(0) + +/* define a structure for collecting returned + * info from a query */ +typedef struct { + mylock_t lock; + pmix_info_t *info; + size_t ninfo; +} myquery_data_t; + +#define DEBUG_CONSTRUCT_MYQUERY(q) \ + do { \ + DEBUG_CONSTRUCT_LOCK(&((q)->lock)); \ + (q)->info = NULL; \ + (q)->ninfo = 0; \ + } while(0) + +#define DEBUG_DESTRUCT_MYQUERY(q) \ + do { \ + DEBUG_DESTRUCT_LOCK(&((q)->lock)); \ + if (NULL != (q)->info) { \ + PMIX_INFO_FREE((q)->info, (q)->ninfo); \ + } \ + } while(0) + +/* define a structure for releasing when a given + * nspace terminates */ +typedef struct { + mylock_t lock; + char *nspace; + int exit_code; + bool exit_code_given; +} myrel_t; + + +#define DEBUG_CONSTRUCT_MYREL(r) \ + do { \ + DEBUG_CONSTRUCT_LOCK(&((r)->lock)); \ + (r)->nspace = NULL; \ + (r)->exit_code = 0; \ + (r)->exit_code_given = false; \ + } while(0) + +#define DEBUG_DESTRUCT_MYREL(r) \ + do { \ + DEBUG_DESTRUCT_LOCK(&((r)->lock)); \ + if (NULL != (r)->nspace) { \ + free((r)->nspace); \ + } \ + } while(0) diff --git a/opal/mca/pmix/pmix3x/pmix/examples/fault.c b/opal/mca/pmix/pmix3x/pmix/examples/fault.c index a197f589619..abab3886816 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/fault.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/fault.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -28,11 +28,12 @@ #include #include #include +#include #include +#include "examples.h" static pmix_proc_t myproc; -static bool completed; static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, @@ -42,22 +43,67 @@ static void notification_fn(size_t evhdlr_registration_id, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { - fprintf(stderr, "Client %s:%d NOTIFIED with status %d\n", myproc.nspace, myproc.rank, status); - completed = true; + myrel_t *lock; + bool found; + int exit_code; + size_t n; + pmix_proc_t *affected = NULL; + + /* find our return object */ + lock = NULL; + found = false; + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + lock = (myrel_t*)info[n].value.data.ptr; + /* not every RM will provide an exit code, but check if one was given */ + } else if (0 == strncmp(info[n].key, PMIX_EXIT_CODE, PMIX_MAX_KEYLEN)) { + exit_code = info[n].value.data.integer; + found = true; + } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { + affected = info[n].value.data.proc; + } + } + /* if the object wasn't returned, then that is an error */ + if (NULL == lock) { + fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); + /* let the event handler progress */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + return; + } + + /* tell the event handler state machine that we are the last step */ + if (NULL != cbfunc) { + cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); + } + fprintf(stderr, "DEBUGGER DAEMON NOTIFIED TERMINATED - AFFECTED %s\n", + (NULL == affected) ? "NULL" : affected->nspace); + + if (found) { + lock->exit_code = exit_code; + lock->exit_code_given = true; + } + DEBUG_WAKEUP_THREAD(&lock->lock); } static void op_callbk(pmix_status_t status, void *cbdata) { + mylock_t *lock = (mylock_t*)cbdata; fprintf(stderr, "Client %s:%d OP CALLBACK CALLED WITH STATUS %d\n", myproc.nspace, myproc.rank, status); + DEBUG_WAKEUP_THREAD(lock); } -static void errhandler_reg_callbk(pmix_status_t status, +static void evhandler_reg_callbk(pmix_status_t status, size_t errhandler_ref, void *cbdata) { + mylock_t *lock = (mylock_t*)cbdata; + fprintf(stderr, "Client %s:%d ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu\n", myproc.nspace, myproc.rank, status, (unsigned long)errhandler_ref); + DEBUG_WAKEUP_THREAD(lock); } int main(int argc, char **argv) @@ -67,6 +113,10 @@ int main(int argc, char **argv) pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; + pmix_info_t *info; + mylock_t mylock; + myrel_t myrel; + pmix_status_t code[2] = {PMIX_ERR_PROC_ABORTED, PMIX_ERR_JOB_TERMINATED}; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { @@ -87,11 +137,27 @@ int main(int argc, char **argv) nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); - completed = false; - /* register our errhandler */ - PMIx_Register_event_handler(NULL, 0, NULL, 0, - notification_fn, errhandler_reg_callbk, NULL); + /* register another handler specifically for when the target + * job completes */ + DEBUG_CONSTRUCT_MYREL(&myrel); + PMIX_INFO_CREATE(info, 2); + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); + /* only call me back when one of us terminates */ + PMIX_INFO_LOAD(&info[1], PMIX_NSPACE, myproc.nspace, PMIX_STRING); + + DEBUG_CONSTRUCT_LOCK(&mylock); + PMIx_Register_event_handler(code, 2, info, 2, + notification_fn, evhandler_reg_callbk, (void*)&mylock); + DEBUG_WAIT_THREAD(&mylock); + if (PMIX_SUCCESS != mylock.status) { + rc = mylock.status; + DEBUG_DESTRUCT_LOCK(&mylock); + PMIX_INFO_FREE(info, 2); + goto done; + } + DEBUG_DESTRUCT_LOCK(&mylock); + PMIX_INFO_FREE(info, 2); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); @@ -109,17 +175,16 @@ int main(int argc, char **argv) exit(1); } /* everyone simply waits */ - while (!completed) { - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 100000; - nanosleep(&ts, NULL); - } + DEBUG_WAIT_THREAD(&myrel.lock); + DEBUG_DESTRUCT_MYREL(&myrel); done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - PMIx_Deregister_event_handler(1, op_callbk, NULL); + DEBUG_CONSTRUCT_LOCK(&mylock); + PMIx_Deregister_event_handler(1, op_callbk, &mylock); + DEBUG_WAIT_THREAD(&mylock); + DEBUG_DESTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); diff --git a/opal/mca/pmix/pmix3x/pmix/examples/jctrl.c b/opal/mca/pmix/pmix3x/pmix/examples/jctrl.c index 5c1c1d1f73d..0dc244b3c62 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/jctrl.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/jctrl.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include #include +#include "examples.h" static pmix_proc_t myproc; @@ -63,13 +64,15 @@ static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) { - volatile int *active = (volatile int*)cbdata; + mylock_t *lock = (mylock_t*)cbdata; if (PMIX_SUCCESS != status) { fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref); } - *active = status; + lock->status = status; + lock->evhandler_ref = evhandler_ref; + DEBUG_WAKEUP_THREAD(lock); } static void infocbfunc(pmix_status_t status, @@ -78,26 +81,27 @@ static void infocbfunc(pmix_status_t status, pmix_release_cbfunc_t release_fn, void *release_cbdata) { - volatile int *active = (volatile int*)cbdata; + mylock_t *lock = (mylock_t*)cbdata; /* release the caller */ if (NULL != release_fn) { release_fn(release_cbdata); } - *active = status; + lock->status = status; + DEBUG_WAKEUP_THREAD(lock); } int main(int argc, char **argv) { - int rc; + pmix_status_t rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs, n; pmix_info_t *info, *iptr; bool flag; - volatile int active; + mylock_t mylock; pmix_data_array_t *dptr; /* init us - note that the call to "init" includes the return of @@ -111,15 +115,16 @@ int main(int argc, char **argv) /* register our default event handler - again, this isn't strictly * required, but is generally good practice */ - active = -1; + DEBUG_CONSTRUCT_LOCK(&mylock); PMIx_Register_event_handler(NULL, 0, NULL, 0, - notification_fn, evhandler_reg_callbk, (void*)&active); - while (-1 == active) { - sleep(1); - } - if (0 != active) { + notification_fn, evhandler_reg_callbk, (void*)&mylock); + /* wait for registration to complete */ + DEBUG_WAIT_THREAD(&mylock); + rc = mylock.status; + DEBUG_DESTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); - exit(active); + goto done; } /* job-related info is found in our nspace, assigned to the @@ -145,12 +150,8 @@ int main(int argc, char **argv) PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void*)&flag, PMIX_BOOL); /* can't use "load" to load a pmix_data_array_t */ (void)strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN); - info[1].value.type = PMIX_DATA_ARRAY; - dptr = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); - info[1].value.data.darray = dptr; - dptr->type = PMIX_INFO; - dptr->size = 2; - PMIX_INFO_CREATE(dptr->array, dptr->size); + PMIX_DATA_ARRAY_CREATE(info[1].value.data.darray, 2, PMIX_INFO); + dptr = info[1].value.data.darray; rc = SIGUSR2; iptr = (pmix_info_t*)dptr->array; PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT); @@ -159,18 +160,19 @@ int main(int argc, char **argv) /* since this is informational and not a requested operation, the target parameter * doesn't mean anything and can be ignored */ - active = -1; - if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&active))) { + DEBUG_CONSTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&mylock))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); + DEBUG_DESTRUCT_LOCK(&mylock); goto done; } - while (-1 == active) { - sleep(1); - } + DEBUG_WAIT_THREAD(&mylock); PMIX_INFO_FREE(info, 2); - if (0 != active) { + rc = mylock.status; + DEBUG_DESTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); - exit(active); + goto done; } /* now request that this process be monitored using heartbeats */ @@ -185,20 +187,21 @@ int main(int argc, char **argv) PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32); /* make the request */ - active = -1; + DEBUG_CONSTRUCT_LOCK(&mylock); if (PMIX_SUCCESS != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT, - info, 3, infocbfunc, (void*)&active))) { + info, 3, infocbfunc, (void*)&mylock))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); + DEBUG_DESTRUCT_LOCK(&mylock); goto done; } - while (-1 == active) { - sleep(1); - } + DEBUG_WAIT_THREAD(&mylock); PMIX_INFO_FREE(iptr, 1); PMIX_INFO_FREE(info, 3); - if (0 != active) { + rc = mylock.status; + DEBUG_DESTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); - exit(active); + goto done; } /* send a heartbeat */ diff --git a/opal/mca/pmix/pmix3x/pmix/examples/tool.c b/opal/mca/pmix/pmix3x/pmix/examples/tool.c index 6818e49c5a3..e10699309eb 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/tool.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/tool.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -29,6 +29,7 @@ #include #include +#include "examples.h" static void cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, @@ -36,15 +37,28 @@ static void cbfunc(pmix_status_t status, pmix_release_cbfunc_t release_fn, void *release_cbdata) { - volatile bool *active = (volatile bool*)cbdata; + myquery_data_t *mq = (myquery_data_t*)cbdata; + size_t n; - /* do something with the returned info - it will be + mq->lock.status = status; + + /* save the returned info - it will be * released in the release_fn */ + if (0 < ninfo) { + PMIX_INFO_CREATE(mq->info, ninfo); + mq->ninfo = ninfo; + for (n=0; n < ninfo; n++) { + PMIX_INFO_XFER(&mq->info[n], &info[n]); + } + } + /* let the library release the data */ if (NULL != release_fn) { release_fn(release_cbdata); } - *active = false; + + /* release the block */ + DEBUG_WAKEUP_THREAD(&mq->lock); } int main(int argc, char **argv) @@ -52,31 +66,154 @@ int main(int argc, char **argv) pmix_status_t rc; pmix_proc_t myproc; pmix_query_t *query; - size_t nq; - volatile bool active; + size_t nq, ninfo = 0, n, m; + myquery_data_t mydata; + pmix_info_t *info = NULL, *iptr; + char *server_uri = NULL; + char *nspace = NULL; + char *nodename = NULL; + pmix_data_array_t *darray, *dptr; + bool geturi = false; + char hostname[1024]; + + gethostname(hostname, 1024); + for (n=1; n < (size_t)argc; n++) { + if (0 == strcmp("-u", argv[n]) || 0 == strcmp("--url", argv[n])) { + if (NULL == argv[n+1]) { + fprintf(stderr, "Must provide URI argument to %s option\n", argv[n]); + exit(1); + } + server_uri = argv[n+1]; + } else if (0 == strcmp("-nspace", argv[n]) || 0 == strcmp("--nspace", argv[n])) { + if (NULL == argv[n+1]) { + fprintf(stderr, "Must provide nspace argument to %s option\n", argv[n]); + exit(1); + } + nspace = argv[n+1]; + } else if (0 == strcmp("-uri", argv[n]) || 0 == strcmp("--uri", argv[n])) { + /* retrieve the PMIx server's uri from the indicated node */ + nodename = argv[n+1]; + geturi = true; + } + } + + if (NULL != server_uri) { + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_SERVER_URI, server_uri, PMIX_STRING); + fprintf(stderr, "Connecting to %s\n", server_uri); + } /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) { + if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { fprintf(stderr, "PMIx_tool_init failed: %d\n", rc); exit(rc); } + if (NULL != info) { + PMIX_INFO_FREE(info, ninfo); + } - /* query something */ - nq = 2; - PMIX_QUERY_CREATE(query, nq); - query[0].keys = (char**)malloc(2 * sizeof(char*)); - query[0].keys[0] = strdup("foobar"); - query[0].keys[1] = NULL; - query[1].keys = (char**)malloc(2 * sizeof(char*)); - query[1].keys[0] = strdup("spastic"); - query[1].keys[1] = NULL; - active = true; - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&active))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); + if (geturi) { + nq = 1; + PMIX_QUERY_CREATE(query, nq); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_SERVER_URI); + if (NULL != nodename) { + PMIX_QUERY_QUALIFIERS_CREATE(&query[0], 1); + PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_HOSTNAME, nodename, PMIX_STRING); + } + DEBUG_CONSTRUCT_MYQUERY(&mydata); + if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&mydata))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + DEBUG_WAIT_THREAD(&mydata.lock); + /* find the response */ + if (PMIX_SUCCESS == mydata.lock.status) { + /* should be in the first key */ + if (PMIX_CHECK_KEY(&mydata.info[0], PMIX_SERVER_URI)) { + fprintf(stderr, "PMIx server URI for node %s: %s\n", + (NULL == nodename) ? hostname : nodename, + mydata.info[0].value.data.string); + } else { + fprintf(stderr, "Query returned wrong info key at first posn: %s\n", mydata.info[0].key); + } + } else { + fprintf(stderr, "Query returned error: %s\n", PMIx_Error_string(mydata.lock.status)); + } + DEBUG_DESTRUCT_MYQUERY(&mydata); goto done; } - while(active) { - usleep(10); + + if (NULL == nspace) { + /* query the list of active nspaces */ + nq = 1; + PMIX_QUERY_CREATE(query, nq); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_NAMESPACE_INFO); + DEBUG_CONSTRUCT_MYQUERY(&mydata); + if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&mydata))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + DEBUG_WAIT_THREAD(&mydata.lock); + /* find the response */ + if (PMIX_SUCCESS == mydata.lock.status) { + /* should be in the first key */ + if (PMIX_CHECK_KEY(&mydata.info[0], PMIX_QUERY_NAMESPACE_INFO)) { + darray = mydata.info[0].value.data.darray; + fprintf(stderr, "ACTIVE NSPACES:\n"); + if (NULL == darray || 0 == darray->size || NULL == darray->array) { + fprintf(stderr, "\tNone\n"); + } else { + info = (pmix_info_t*)darray->array; + if (NULL == info) { + fprintf(stderr, "Error\n"); + } else { + for (n=0; n < darray->size; n++) { + dptr = info[n].value.data.darray; + if (NULL == dptr || 0 == dptr->size || NULL == dptr->array) { + fprintf(stderr, "Error in array %s\n", (NULL == dptr) ? "NULL" : "NON-NULL"); + break; + } + iptr = (pmix_info_t*)dptr->array; + for (m=0; m < dptr->size; m++) { + fprintf(stderr, "\t%s", iptr[m].value.data.string); + } + fprintf(stderr, "\n"); + } + } + } + } else { + fprintf(stderr, "Query returned wrong info key at first posn: %s\n", mydata.info[0].key); + } + } else { + fprintf(stderr, "Query returned error: %s\n", PMIx_Error_string(mydata.lock.status)); + } + DEBUG_DESTRUCT_MYQUERY(&mydata); + } else { + nq = 1; + PMIX_QUERY_CREATE(query, nq); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_JOB_SIZE); + PMIX_INFO_CREATE(query[0].qualifiers, 1); + query[0].nqual = 1; + PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, nspace, PMIX_STRING); + DEBUG_CONSTRUCT_MYQUERY(&mydata); + if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&mydata))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + DEBUG_WAIT_THREAD(&mydata.lock); + /* find the response */ + if (PMIX_SUCCESS == mydata.lock.status) { + /* should be in the first key */ + if (PMIX_CHECK_KEY(&mydata.info[0], PMIX_JOB_SIZE)) { + fprintf(stderr, "JOB SIZE FOR NSPACE %s: %lu\n", nspace, (unsigned long)mydata.info[0].value.data.uint32); + } else { + fprintf(stderr, "Query returned wrong info key at first posn: %s\n", mydata.info[0].key); + } + } else { + fprintf(stderr, "Query returned error: %s\n", PMIx_Error_string(mydata.lock.status)); + } + DEBUG_DESTRUCT_MYQUERY(&mydata); } done: diff --git a/opal/mca/pmix/pmix3x/pmix/include/Makefile.am b/opal/mca/pmix/pmix3x/pmix/include/Makefile.am index 3b205f72c57..5c0ff27f100 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/include/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. # # $COPYRIGHT$ # @@ -14,7 +14,8 @@ if WANT_PRIMARY_HEADERS include_HEADERS = \ pmix.h \ pmix_server.h \ - pmix_tool.h + pmix_tool.h \ + pmix_extend.h if WANT_PMI_BACKWARD include_HEADERS += \ diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix.h b/opal/mca/pmix/pmix3x/pmix/include/pmix.h index ae54d85b70a..ba29692c99d 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix.h +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix.h @@ -129,7 +129,7 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int status, const char msg[], * the information locally until _PMIx_Commit_ is called. The provided scope * value is passed to the local PMIx server, which will distribute the data * as directed. */ -PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val); +PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const pmix_key_t key, pmix_value_t *val); /* Push all previously _PMIx_Put_ values to the local PMIx server. @@ -200,7 +200,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs * an error. The timeout parameter can help avoid "hangs" due to programming * errors that prevent the target proc from ever exposing its data. */ -PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], +PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const pmix_key_t key, const pmix_info_t info[], size_t ninfo, pmix_value_t **val); @@ -208,7 +208,7 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], * be executed once the specified data has been _PMIx_Put_ * by the identified process and retrieved by the local server. The info * array is used as described above for the blocking form of this call. */ -PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char key[], +PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const pmix_key_t key, const pmix_info_t info[], size_t ninfo, pmix_value_cbfunc_t cbfunc, void *cbdata); @@ -337,7 +337,7 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys, */ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, const pmix_app_t apps[], size_t napps, - char nspace[]); + pmix_nspace_t nspace); /* Non-blocking form of the _PMIx_Spawn_ function. The callback @@ -394,7 +394,7 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t ranges[], size_t * for releasing the array when done with it - the PMIX_PROC_FREE macro is * provided for this purpose. */ -PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace, +PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, const pmix_nspace_t nspace, pmix_proc_t **procs, size_t *nprocs); @@ -402,7 +402,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *n * that nspace. The returned string will contain a comma-delimited list * of nodenames. The caller is responsible for releasing the string * when done with it */ -PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist); +PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const pmix_nspace_t nspace, char **nodelist); /* Query information about the system in general - can include * a list of active nspaces, network topology, etc. Also can be diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in index 93146a45b33..a3039ff6748 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in @@ -1,10 +1,10 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2016-2017 Mellanox Technologies, Inc. + * Copyright (c) 2016-2018 Mellanox Technologies, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -84,6 +84,10 @@ extern "C" { #define PMIX_MAX_NSLEN 255 #define PMIX_MAX_KEYLEN 511 +/* define abstract types for namespaces and keys */ +typedef char pmix_nspace_t[PMIX_MAX_NSLEN+1]; +typedef char pmix_key_t[PMIX_MAX_KEYLEN+1]; + /* define a type for rank values */ typedef uint32_t pmix_rank_t; @@ -104,6 +108,9 @@ typedef uint32_t pmix_rank_t; #define PMIX_RANK_LOCAL_NODE UINT32_MAX-2 // all ranks on local node /* define an invalid value */ #define PMIX_RANK_INVALID UINT32_MAX-3 +/* define a boundary for valid ranks */ +#define PMIX_RANK_VALID UINT32_MAX-50 + /**** PMIX ENVIRONMENTAL PARAMETERS ****/ /* There are a few environmental parameters used by PMIx for @@ -115,6 +122,9 @@ typedef uint32_t pmix_rank_t; * a tool can connect with further instructions. This envar will be * set by the tool and is _not_ intended for the direct use of users. * + * PMIX_LAUNCHER_RENDEZVOUS_FILE - if set, contains the full pathname + * of a file the launcher is to write that contains its connection info. + * Works in addition to anything else the launcher may output. */ /* define a set of "standard" PMIx attributes that can @@ -140,7 +150,6 @@ typedef uint32_t pmix_rank_t; // client rendezvous points and contact info #define PMIX_SYSTEM_TMPDIR "pmix.sys.tmpdir" // (char*) temp directory for this system, where PMIx // server will place tool rendezvous points and contact info -#define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data #define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server #define PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server #define PMIX_SERVER_RANK "pmix.srv.rank" // (pmix_rank_t) Rank of this server @@ -164,6 +173,7 @@ typedef uint32_t pmix_rank_t; // from the specified processes to this tool #define PMIX_RECONNECT_SERVER "pmix.cnct.recon" // (bool) tool is requesting to change server connections #define PMIX_LAUNCHER "pmix.tool.launcher" // (bool) tool is a launcher and needs rendezvous files created +#define PMIX_LAUNCHER_RENDEZVOUS_FILE "pmix.tool.lncrnd" // (char*) Pathname of file where connection info is to be stored /* identification attributes */ #define PMIX_USERID "pmix.euid" // (uint32_t) effective user id @@ -262,6 +272,7 @@ typedef uint32_t pmix_rank_t; #define PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node #define PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node #define PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job +#define PMIX_NUM_SLOTS "pmix.num.slots" // (uint32_t) #slots allocated #define PMIX_NUM_NODES "pmix.num.nodes" // (uint32_t) #nodes in this nspace @@ -274,7 +285,6 @@ typedef uint32_t pmix_rank_t; /* topology info */ #define PMIX_NET_TOPO "pmix.ntopo" // (char*) xml-representation of network topology #define PMIX_LOCAL_TOPO "pmix.ltopo" // (char*) xml-representation of local node topology -#define PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for this job #define PMIX_TOPOLOGY "pmix.topo" // (hwloc_topology_t) pointer to the PMIx client's internal topology object #define PMIX_TOPOLOGY_XML "pmix.topo.xml" // (char*) XML-based description of topology #define PMIX_TOPOLOGY_FILE "pmix.topo.file" // (char*) full path to file containing XML topology description @@ -330,8 +340,6 @@ typedef uint32_t pmix_rank_t; /* event handler registration and notification info keys */ #define PMIX_EVENT_HDLR_NAME "pmix.evname" // (char*) string name identifying this handler -#define PMIX_EVENT_JOB_LEVEL "pmix.evjob" // (bool) register for job-specific events only -#define PMIX_EVENT_ENVIRO_LEVEL "pmix.evenv" // (bool) register for environment events only #define PMIX_EVENT_HDLR_FIRST "pmix.evfirst" // (bool) invoke this event handler before any other handlers #define PMIX_EVENT_HDLR_LAST "pmix.evlast" // (bool) invoke this event handler after all other handlers have been called #define PMIX_EVENT_HDLR_FIRST_IN_CATEGORY "pmix.evfirstcat" // (bool) invoke this event handler before any other handlers in this category @@ -349,7 +357,9 @@ typedef uint32_t pmix_rank_t; // registered it #define PMIX_EVENT_DO_NOT_CACHE "pmix.evnocache" // (bool) instruct the PMIx server not to cache the event #define PMIX_EVENT_SILENT_TERMINATION "pmix.evsilentterm" // (bool) do not generate an event when this job normally terminates - +#define PMIX_EVENT_PROXY "pmix.evproxy" // (pmix_proc_t*) PMIx server that sourced the event +#define PMIX_EVENT_TEXT_MESSAGE "pmix.evtext" // (char*) text message suitable for output by recipient - e.g., describing + // the cause of the event /* fault tolerance-related events */ #define PMIX_EVENT_TERMINATE_SESSION "pmix.evterm.sess" // (bool) RM intends to terminate session @@ -406,22 +416,16 @@ typedef uint32_t pmix_rank_t; #define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from the spawned processes to this process (typically used by a tool) #define PMIX_FWD_STDDIAG "pmix.fwd.stddiag" // (bool) if a diagnostic channel exists, forward any output on it // from the spawned processes to this process (typically used by a tool) - - -/* connect attributes */ -#define PMIX_CONNECT_NOTIFY_EACH "pmix.cnct.each" // (bool) notify the other participants of the connection by event - // each time a process connects -#define PMIX_CONNECT_NOTIFY_REQ "pmix.cnct.req" // (bool) notify all other participants that they are requested to - // connect -#define PMIX_CONNECT_OPTIONAL "pmix.cnt.opt" // (bool) participation is optional - do not return error if procs - // terminate without having connected -#define PMIX_CONNECT_XCHG_ONLY "pmix.cnt.xchg" // (bool) provide participants with job-level info for all participating - // nspaces, but do not assign a new nspace or rank -#define PMIX_CONNECT_ID "pmix.cnt.id" // (char*) an application-provided string identifier for a PMIx_Connect operation. - +#define PMIX_SPAWN_TOOL "pmix.spwn.tool" // (bool) job being spawned is a tool +#define PMIX_CMD_LINE "pmix.cmd.line" // (char*) command line executing in the specified nspace /* query attributes */ -#define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces +#define PMIX_QUERY_REFRESH_CACHE "pmix.qry.rfsh" // (bool) retrieve updated information from server + // to update local cache +#define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) return a comma-delimited list of active namespaces +#define PMIX_QUERY_NAMESPACE_INFO "pmix.qry.nsinfo" // (pmix_data_array_t) request an array of active nspace information - each + // element will contain an array including the namespace plus the + // command line of the application executing within it #define PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job #define PMIX_QUERY_QUEUE_LIST "pmix.qry.qlst" // (char*) request a comma-delimited list of scheduler queues #define PMIX_QUERY_QUEUE_STATUS "pmix.qry.qst" // (TBD) status of a specified scheduler queue @@ -442,6 +446,49 @@ typedef uint32_t pmix_rank_t; #define PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation // for the specified nspace +/* information retrieval attributes */ +#define PMIX_SESSION_INFO "pmix.ssn.info" // (bool) Return information about the specified session. If information + // about a session other than the one containing the requesting + // process is desired, then the attribute array must contain a + // PMIX_SESSION_ID attribute identifying the desired target. +#define PMIX_JOB_INFO "pmix.job.info" // (bool) Return information about the specified job or namespace. If + // information about a job or namespace other than the one containing + // the requesting process is desired, then the attribute array must + // contain a PMIX_JOBID or PMIX_NSPACE attribute identifying the + // desired target. Similarly, if information is requested about a + // job or namespace in a session other than the one containing the + // requesting process, then an attribute identifying the target + // session must be provided. +#define PMIX_APP_INFO "pmix.app.info" // (bool) Return information about the specified application. If information + // about an application other than the one containing the requesting + // process is desired, then the attribute array must contain a + // PMIX_APPNUM attribute identifying the desired target. Similarly, + // if information is requested about an application in a job or session + // other than the one containing the requesting process, then attributes + // identifying the target job and/or session must be provided. +#define PMIX_NODE_INFO "pmix.node.info" // (bool) Return information about the specified node. If information about a + // node other than the one containing the requesting process is desired, + // then the attribute array must contain either the PMIX_NODEID or + // PMIX_HOSTNAME attribute identifying the desired target. + +/* information storage attributes */ +#define PMIX_SESSION_INFO_ARRAY "pmix.ssn.arr" // (pmix_data_array_t) Provide an array of pmix_info_t containing + // session-level information. The PMIX_SESSION_ID attribute is required + // to be included in the array. +#define PMIX_JOB_INFO_ARRAY "pmix.job.arr" // (pmix_data_array_t) Provide an array of pmix_info_t containing job-level + // information. Information is registered one job (aka namespace) at a time + // via the PMIx_server_register_nspace API. Thus, there is no requirement that + // the array contain either the PMIX_NSPACE or PMIX_JOBID attributes, though + // either or both of them may be included. +#define PMIX_APP_INFO_ARRAY "pmix.app.arr" // (pmix_data_array_t) Provide an array of pmix_info_t containing app-level + // information. The PMIX_NSPACE or PMIX_JOBID attributes of the job containing + // the appplication, plus its PMIX_APPNUM attribute, are required to be + // included in the array. +#define PMIX_NODE_INFO_ARRAY "pmix.node.arr" // (pmix_data_array_t) Provide an array of pmix_info_t containing node-level + // information. At a minimum, either the PMIX_NODEID or PMIX_HOSTNAME + // attribute is required to be included in the array, though both may be + // included. + /* log attributes */ #define PMIX_LOG_SOURCE "pmix.log.source" // (pmix_proc_t*) ID of source of the log request #define PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr @@ -747,72 +794,67 @@ typedef int pmix_status_t; #define PMIX_ERR_V2X_BASE -100 /* v2.x communication errors */ -#define PMIX_ERR_LOST_CONNECTION_TO_SERVER (PMIX_ERR_V2X_BASE - 1) -#define PMIX_ERR_LOST_PEER_CONNECTION (PMIX_ERR_V2X_BASE - 2) -#define PMIX_ERR_LOST_CONNECTION_TO_CLIENT (PMIX_ERR_V2X_BASE - 3) +#define PMIX_ERR_LOST_CONNECTION_TO_SERVER -101 +#define PMIX_ERR_LOST_PEER_CONNECTION -102 +#define PMIX_ERR_LOST_CONNECTION_TO_CLIENT -103 /* used by the query system */ -#define PMIX_QUERY_PARTIAL_SUCCESS (PMIX_ERR_V2X_BASE - 4) +#define PMIX_QUERY_PARTIAL_SUCCESS -104 /* request responses */ -#define PMIX_NOTIFY_ALLOC_COMPLETE (PMIX_ERR_V2X_BASE - 5) +#define PMIX_NOTIFY_ALLOC_COMPLETE -105 /* job control */ -#define PMIX_JCTRL_CHECKPOINT (PMIX_ERR_V2X_BASE - 6) // monitored by client to trigger checkpoint operation -#define PMIX_JCTRL_CHECKPOINT_COMPLETE (PMIX_ERR_V2X_BASE - 7) // sent by client and monitored by server to notify that requested +#define PMIX_JCTRL_CHECKPOINT -106 // monitored by client to trigger checkpoint operation +#define PMIX_JCTRL_CHECKPOINT_COMPLETE -107 // sent by client and monitored by server to notify that requested // checkpoint operation has completed -#define PMIX_JCTRL_PREEMPT_ALERT (PMIX_ERR_V2X_BASE - 8) // monitored by client to detect RM intends to preempt -/* monitoring */ -#define PMIX_MONITOR_HEARTBEAT_ALERT (PMIX_ERR_V2X_BASE - 9) -#define PMIX_MONITOR_FILE_ALERT (PMIX_ERR_V2X_BASE - 10) +#define PMIX_JCTRL_PREEMPT_ALERT -108 // monitored by client to detect RM intends to preempt -/* define a starting point for operational error constants so - * we avoid renumbering when making additions */ -#define PMIX_ERR_OP_BASE PMIX_ERR_V2X_BASE-30 +/* monitoring */ +#define PMIX_MONITOR_HEARTBEAT_ALERT -109 +#define PMIX_MONITOR_FILE_ALERT -110 +#define PMIX_PROC_TERMINATED -111 +#define PMIX_ERR_INVALID_TERMINATION -112 /* operational */ -#define PMIX_ERR_EVENT_REGISTRATION (PMIX_ERR_OP_BASE - 14) -#define PMIX_ERR_JOB_TERMINATED (PMIX_ERR_OP_BASE - 15) -#define PMIX_ERR_UPDATE_ENDPOINTS (PMIX_ERR_OP_BASE - 16) -#define PMIX_MODEL_DECLARED (PMIX_ERR_OP_BASE - 17) -#define PMIX_GDS_ACTION_COMPLETE (PMIX_ERR_OP_BASE - 18) -#define PMIX_PROC_HAS_CONNECTED (PMIX_ERR_OP_BASE - 19) -#define PMIX_CONNECT_REQUESTED (PMIX_ERR_OP_BASE - 20) -#define PMIX_MODEL_RESOURCES (PMIX_ERR_OP_BASE - 21) // model resource usage has changed -#define PMIX_OPENMP_PARALLEL_ENTERED (PMIX_ERR_OP_BASE - 22) // an OpenMP parallel region has been entered -#define PMIX_OPENMP_PARALLEL_EXITED (PMIX_ERR_OP_BASE - 23) // an OpenMP parallel region has completed -#define PMIX_LAUNCH_DIRECTIVE (PMIX_ERR_OP_BASE - 24) -#define PMIX_LAUNCHER_READY (PMIX_ERR_OP_BASE - 25) -#define PMIX_OPERATION_IN_PROGRESS (PMIX_ERR_OP_BASE - 26) - - -/* define a starting point for system error constants so - * we avoid renumbering when making additions */ -#define PMIX_ERR_SYS_BASE PMIX_ERR_OP_BASE-100 +#define PMIX_ERR_EVENT_REGISTRATION -144 +#define PMIX_ERR_JOB_TERMINATED -145 +#define PMIX_ERR_UPDATE_ENDPOINTS -146 +#define PMIX_MODEL_DECLARED -147 +#define PMIX_GDS_ACTION_COMPLETE -148 +#define PMIX_PROC_HAS_CONNECTED -149 +#define PMIX_CONNECT_REQUESTED -150 +#define PMIX_MODEL_RESOURCES -151 // model resource usage has changed +#define PMIX_OPENMP_PARALLEL_ENTERED -152 // an OpenMP parallel region has been entered +#define PMIX_OPENMP_PARALLEL_EXITED -153 // an OpenMP parallel region has completed +#define PMIX_LAUNCH_DIRECTIVE -154 +#define PMIX_LAUNCHER_READY -155 +#define PMIX_OPERATION_IN_PROGRESS -156 +#define PMIX_OPERATION_SUCCEEDED -157 +#define PMIX_ERR_INVALID_OPERATION -158 /* system failures */ -#define PMIX_ERR_NODE_DOWN (PMIX_ERR_SYS_BASE - 1) -#define PMIX_ERR_NODE_OFFLINE (PMIX_ERR_SYS_BASE - 2) +#define PMIX_ERR_NODE_DOWN -231 +#define PMIX_ERR_NODE_OFFLINE -232 +#define PMIX_ERR_SYS_OTHER -330 - -/* define a starting point for event handler error constants so - * we avoid renumbering when making additions */ -#define PMIX_ERR_EVHDLR_BASE PMIX_ERR_SYS_BASE-100 +/* define a macro for identifying system event values */ +#define PMIX_SYSTEM_EVENT(a) \ + ((a) <= PMIX_ERR_NODE_DOWN && PMIX_ERR_SYS_OTHER <= (a)) /* used by event handlers */ -#define PMIX_EVENT_NO_ACTION_TAKEN (PMIX_ERR_EVHDLR_BASE - 1) -#define PMIX_EVENT_PARTIAL_ACTION_TAKEN (PMIX_ERR_EVHDLR_BASE - 2) -#define PMIX_EVENT_ACTION_DEFERRED (PMIX_ERR_EVHDLR_BASE - 3) -#define PMIX_EVENT_ACTION_COMPLETE (PMIX_ERR_EVHDLR_BASE - 4) - +#define PMIX_EVENT_NO_ACTION_TAKEN -331 +#define PMIX_EVENT_PARTIAL_ACTION_TAKEN -332 +#define PMIX_EVENT_ACTION_DEFERRED -333 +#define PMIX_EVENT_ACTION_COMPLETE -334 /* define a starting point for PMIx internal error codes * that are never exposed outside the library */ -#define PMIX_INTERNAL_ERR_BASE -1000 +#define PMIX_INTERNAL_ERR_BASE -1330 /* define a starting point for user-level defined error * constants - negative values larger than this are guaranteed * not to conflict with PMIx values. Definitions should always * be based on the PMIX_EXTERNAL_ERR_BASE constant and -not- a * specific value as the value of the constant may change */ -#define PMIX_EXTERNAL_ERR_BASE -2000 +#define PMIX_EXTERNAL_ERR_BASE PMIX_INTERNAL_ERR_BASE-2000 /**** PMIX DATA TYPES ****/ typedef uint16_t pmix_data_type_t; @@ -847,7 +889,7 @@ typedef uint16_t pmix_data_type_t; #define PMIX_BUFFER 26 #define PMIX_BYTE_OBJECT 27 #define PMIX_KVAL 28 -#define PMIX_MODEX 29 +// Hole left by deprecation/removal of PMIX_MODEX #define PMIX_PERSIST 30 #define PMIX_POINTER 31 #define PMIX_SCOPE 32 @@ -862,9 +904,7 @@ typedef uint16_t pmix_data_type_t; #define PMIX_QUERY 41 #define PMIX_COMPRESSED_STRING 42 // string compressed with zlib #define PMIX_ALLOC_DIRECTIVE 43 -/**** DEPRECATED ****/ -#define PMIX_INFO_ARRAY 44 -/**** ****/ +// Hole left by deprecation/removal of PMIX_INFO_ARRAY #define PMIX_IOF_CHANNEL 45 #define PMIX_ENVAR 46 /********************/ @@ -891,7 +931,7 @@ typedef uint8_t pmix_scope_t; #define PMIX_GLOBAL 3 // share with all procs (local + remote) #define PMIX_INTERNAL 4 // store data in the internal tables -/* define a range for data "published" by PMI +/* define a range for data "published" by PMIx */ typedef uint8_t pmix_data_range_t; #define PMIX_RANGE_UNDEF 0 @@ -917,6 +957,7 @@ typedef uint8_t pmix_persistence_t; * command directives via pmix_info_t arrays */ typedef uint32_t pmix_info_directives_t; #define PMIX_INFO_REQD 0x00000001 +#define PMIX_INFO_ARRAY_END 0x00000002 // mark the end of an array created by PMIX_INFO_CREATE /* the top 16-bits are reserved for internal use by * implementers - these may be changed inside the * PMIx library */ @@ -949,6 +990,55 @@ typedef uint16_t pmix_iof_channel_t; #define PMIX_FWD_STDDIAG_CHANNEL 0x0008 #define PMIX_FWD_ALL_CHANNELS 0x00ff +/* define some "hooks" external libraries can use to + * intercept memory allocation/release operations */ +static inline void* pmix_malloc(size_t n) +{ + return malloc(n); +} + +static inline void pmix_free(void *m) +{ + free(m); +} + +static inline void* pmix_calloc(size_t n, size_t m) +{ + return calloc(n, m); +} + +/* declare a convenience macro for checking keys */ +#define PMIX_CHECK_KEY(a, b) \ + (0 == strncmp((a)->key, (b), PMIX_MAX_KEYLEN)) + +#define PMIX_LOAD_KEY(a, b) \ + do { \ + memset((a), 0, PMIX_MAX_KEYLEN+1); \ + pmix_strncpy((a), (b), PMIX_MAX_KEYLEN); \ + }while(0) + +/* define a convenience macro for loading nspaces */ +#define PMIX_LOAD_NSPACE(a, b) \ + do { \ + memset((a), 0, PMIX_MAX_NSLEN+1); \ + pmix_strncpy((a), (b), PMIX_MAX_NSLEN); \ + }while(0) + +/* define a convenience macro for checking nspaces */ +#define PMIX_CHECK_NSPACE(a, b) \ + (0 == strncmp((a), (b), PMIX_MAX_NSLEN)) + +/* define a convenience macro for loading names */ +#define PMIX_LOAD_PROCID(a, b, c) \ + do { \ + PMIX_LOAD_NSPACE((a)->nspace, (b)); \ + (a)->rank = (c); \ + }while(0) + +/* define a convenience macro for checking names */ +#define PMIX_CHECK_PROCID(a, b) \ + (PMIX_CHECK_NSPACE((a)->nspace, (b)->nspace) && ((a)->rank == (b)->rank || (PMIX_RANK_WILDCARD == (a)->rank || PMIX_RANK_WILDCARD == (b)->rank))) + /**** PMIX BYTE OBJECT ****/ typedef struct pmix_byte_object { @@ -958,7 +1048,7 @@ typedef struct pmix_byte_object { #define PMIX_BYTE_OBJECT_CREATE(m, n) \ do { \ - (m) = (pmix_byte_object_t*)malloc((n) * sizeof(pmix_byte_object_t)); \ + (m) = (pmix_byte_object_t*)pmix_malloc((n) * sizeof(pmix_byte_object_t)); \ if (NULL != (m)) { \ memset((m), 0, (n)*sizeof(pmix_byte_object_t)); \ } \ @@ -973,19 +1063,22 @@ typedef struct pmix_byte_object { #define PMIX_BYTE_OBJECT_DESTRUCT(m) \ do { \ if (NULL != (m)->bytes) { \ - free((m)->bytes); \ + pmix_free((m)->bytes); \ } \ } while(0) -#define PMIX_BYTE_OBJECT_FREE(m, n) \ - do { \ - size_t _n; \ - for (_n=0; _n < n; _n++) { \ - if (NULL != (m)[_n].bytes) { \ - free((m)[_n].bytes); \ - } \ - } \ - free((m)); \ +#define PMIX_BYTE_OBJECT_FREE(m, n) \ + do { \ + size_t _bon; \ + if (NULL != (m)) { \ + for (_bon=0; _bon < n; _bon++) { \ + if (NULL != (m)[_bon].bytes) { \ + pmix_free((m)[_bon].bytes); \ + } \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } \ } while(0) #define PMIX_BYTE_OBJECT_LOAD(b, d, s) \ @@ -1014,16 +1107,16 @@ typedef struct { #define PMIX_ENVAR_CREATE(m, n) \ do { \ - (m) = (pmix_envar_t*)calloc((n) , sizeof(pmix_envar_t)); \ + (m) = (pmix_envar_t*)pmix_calloc((n) , sizeof(pmix_envar_t)); \ } while (0) #define PMIX_ENVAR_FREE(m, n) \ do { \ - size_t _k; \ + size_t _ek; \ if (NULL != (m)) { \ - for (_k=0; _k < (n); _k++) { \ - PMIX_ENVAR_DESTRUCT(&(m)[_k]); \ + for (_ek=0; _ek < (n); _ek++) { \ + PMIX_ENVAR_DESTRUCT(&(m)[_ek]); \ } \ - free((m)); \ + pmix_free((m)); \ } \ } while (0) #define PMIX_ENVAR_CONSTRUCT(m) \ @@ -1035,11 +1128,11 @@ typedef struct { #define PMIX_ENVAR_DESTRUCT(m) \ do { \ if (NULL != (m)->envar) { \ - free((m)->envar); \ + pmix_free((m)->envar); \ (m)->envar = NULL; \ } \ if (NULL != (m)->value) { \ - free((m)->value); \ + pmix_free((m)->value); \ (m)->value = NULL; \ } \ } while(0) @@ -1073,14 +1166,14 @@ typedef struct pmix_data_buffer { } pmix_data_buffer_t; #define PMIX_DATA_BUFFER_CREATE(m) \ do { \ - (m) = (pmix_data_buffer_t*)calloc(1, sizeof(pmix_data_buffer_t)); \ + (m) = (pmix_data_buffer_t*)pmix_calloc(1, sizeof(pmix_data_buffer_t)); \ } while (0) #define PMIX_DATA_BUFFER_RELEASE(m) \ do { \ if (NULL != (m)->base_ptr) { \ - free((m)->base_ptr); \ + pmix_free((m)->base_ptr); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) #define PMIX_DATA_BUFFER_CONSTRUCT(m) \ @@ -1088,7 +1181,7 @@ typedef struct pmix_data_buffer { #define PMIX_DATA_BUFFER_DESTRUCT(m) \ do { \ if (NULL != (m)->base_ptr) { \ - free((m)->base_ptr); \ + pmix_free((m)->base_ptr); \ (m)->base_ptr = NULL; \ } \ (m)->pack_ptr = NULL; \ @@ -1114,17 +1207,17 @@ typedef struct pmix_data_buffer { /**** PMIX PROC OBJECT ****/ typedef struct pmix_proc { - char nspace[PMIX_MAX_NSLEN+1]; + pmix_nspace_t nspace; pmix_rank_t rank; } pmix_proc_t; #define PMIX_PROC_CREATE(m, n) \ do { \ - (m) = (pmix_proc_t*)calloc((n) , sizeof(pmix_proc_t)); \ + (m) = (pmix_proc_t*)pmix_calloc((n) , sizeof(pmix_proc_t)); \ } while (0) #define PMIX_PROC_RELEASE(m) \ do { \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1138,7 +1231,7 @@ typedef struct pmix_proc { #define PMIX_PROC_FREE(m, n) \ do { \ if (NULL != (m)) { \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) @@ -1146,7 +1239,7 @@ typedef struct pmix_proc { #define PMIX_PROC_LOAD(m, n, r) \ do { \ PMIX_PROC_CONSTRUCT((m)); \ - (void)strncpy((m)->nspace, (n), PMIX_MAX_NSLEN); \ + pmix_strncpy((m)->nspace, (n), PMIX_MAX_NSLEN); \ (m)->rank = (r); \ } while(0) @@ -1156,9 +1249,9 @@ typedef struct pmix_proc { memset((t), 0, PMIX_MAX_NSLEN+1); \ _len = strlen((c)); \ if ((_len + strlen((n))) < PMIX_MAX_NSLEN) { \ - (void)strncpy((t), (c), PMIX_MAX_NSLEN); \ + pmix_strncpy((t), (c), PMIX_MAX_NSLEN); \ (t)[_len] = ':'; \ - (void)strncpy(&(t)[_len+1], (n), PMIX_MAX_NSLEN - _len - 1); \ + pmix_strncpy(&(t)[_len+1], (n), PMIX_MAX_NSLEN - _len); \ } \ } while(0) @@ -1188,7 +1281,7 @@ typedef struct pmix_proc_info { } pmix_proc_info_t; #define PMIX_PROC_INFO_CREATE(m, n) \ do { \ - (m) = (pmix_proc_info_t*)calloc((n) , sizeof(pmix_proc_info_t)); \ + (m) = (pmix_proc_info_t*)pmix_calloc((n) , sizeof(pmix_proc_info_t)); \ } while (0) #define PMIX_PROC_INFO_RELEASE(m) \ @@ -1204,11 +1297,11 @@ typedef struct pmix_proc_info { #define PMIX_PROC_INFO_DESTRUCT(m) \ do { \ if (NULL != (m)->hostname) { \ - free((m)->hostname); \ + pmix_free((m)->hostname); \ (m)->hostname = NULL; \ } \ if (NULL != (m)->executable_name) { \ - free((m)->executable_name); \ + pmix_free((m)->executable_name); \ (m)->executable_name = NULL; \ } \ } while(0) @@ -1220,39 +1313,29 @@ typedef struct pmix_proc_info { for (_k=0; _k < (n); _k++) { \ PMIX_PROC_INFO_DESTRUCT(&(m)[_k]); \ } \ - free((m)); \ + pmix_free((m)); \ } \ } while (0) -/**** PMIX VALUE STRUCT ****/ -typedef struct pmix_info_t pmix_info_t; +/**** PMIX DATA ARRAY STRUCT ****/ typedef struct pmix_data_array { pmix_data_type_t type; size_t size; void *array; } pmix_data_array_t; -#define PMIX_DATA_ARRAY_CONSTRUCT(m, n, t) \ - do { \ - (m)->type = (t); \ - (m)->size = (n); \ - } while(0) -#define PMIX_DATA_ARRAY_CREATE(m, n, t) \ - do { \ - (m) = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); \ - PMIX_DATA_ARRAY_CONSTRUCT((m), (n), (t)); \ - } while(0) -typedef struct pmix_info_array { - size_t size; - pmix_info_t *array; -} pmix_info_array_t; -/********************/ +/**** THE PMIX_DATA_ARRAY SUPPORT MACROS ARE DEFINED ****/ +/**** DOWN BELOW (NEAR THE BOTTOM OF THE FILE) TO ****/ +/**** AVOID CIRCULAR DEPENDENCIES ****/ + + +/**** PMIX VALUE STRUCT ****/ /* NOTE: operations can supply a collection of values under - * a single key by passing a pmix_value_t containing an - * array of type PMIX_INFO_ARRAY, with each array element + * a single key by passing a pmix_value_t containing a + * data array of type PMIX_INFO, with each array element * containing its own pmix_info_t object */ typedef struct pmix_value { @@ -1290,9 +1373,6 @@ typedef struct pmix_value { void *ptr; pmix_alloc_directive_t adir; pmix_envar_t envar; - /**** DEPRECATED ****/ - pmix_info_array_t *array; - /********************/ } data; } pmix_value_t; /* allocate and initialize a specified number of value structs */ @@ -1300,7 +1380,7 @@ typedef struct pmix_value { do { \ int _ii; \ pmix_value_t *_v; \ - (m) = (pmix_value_t*)calloc((n), sizeof(pmix_value_t)); \ + (m) = (pmix_value_t*)pmix_calloc((n), sizeof(pmix_value_t)); \ _v = (pmix_value_t*)(m); \ if (NULL != (m)) { \ for (_ii=0; _ii < (int)(n); _ii++) { \ @@ -1313,7 +1393,7 @@ typedef struct pmix_value { #define PMIX_VALUE_RELEASE(m) \ do { \ PMIX_VALUE_DESTRUCT((m)); \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1329,16 +1409,36 @@ typedef struct pmix_value { #define PMIX_VALUE_FREE(m, n) \ do { \ - size_t _s; \ + size_t _vv; \ if (NULL != (m)) { \ - for (_s=0; _s < (n); _s++) { \ - PMIX_VALUE_DESTRUCT(&((m)[_s])); \ + for (_vv=0; _vv < (n); _vv++) { \ + PMIX_VALUE_DESTRUCT(&((m)[_vv])); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) +#define PMIX_VALUE_LOAD(v, d, t) \ + pmix_value_load((v), (d), (t)) + +#define PMIX_VALUE_UNLOAD(r, k, d, s) \ + (r) = pmix_value_unload((k), (d), (s)) + +#define PMIX_VALUE_XFER(r, v, s) \ + do { \ + if (NULL == (v)) { \ + (v) = (pmix_value_t*)pmix_malloc(sizeof(pmix_value_t)); \ + if (NULL == (v)) { \ + (r) = PMIX_ERR_NOMEM; \ + } else { \ + (r) = pmix_value_xfer((v), (s)); \ + } \ + } else { \ + (r) = pmix_value_xfer((v), (s)); \ + } \ + } while(0) + #define PMIX_VALUE_GET_NUMBER(s, m, n, t) \ do { \ (s) = PMIX_SUCCESS; \ @@ -1368,60 +1468,49 @@ typedef struct pmix_value { (n) = (t)((m)->data.fval); \ } else if (PMIX_DOUBLE == (m)->type) { \ (n) = (t)((m)->data.dval); \ + } else if (PMIX_PID == (m)->type) { \ + (n) = (t)((m)->data.pid); \ } else { \ (s) = PMIX_ERR_BAD_PARAM; \ } \ } while(0) -/* expose some functions that are resolved in the - * PMIx library, but part of a header that - * includes internal functions - we don't - * want to expose the entire header here. For - * consistency, we provide macro versions as well - */ -void pmix_value_load(pmix_value_t *v, const void *data, pmix_data_type_t type); -#define PMIX_VALUE_LOAD(v, d, t) \ - pmix_value_load((v), (d), (t)) - -pmix_status_t pmix_value_unload(pmix_value_t *kv, void **data, size_t *sz); -#define PMIX_VALUE_UNLOAD(r, k, d, s) \ - (r) = pmix_value_unload((k), (d), (s)) - -pmix_status_t pmix_value_xfer(pmix_value_t *kv, pmix_value_t *src); -#define PMIX_VALUE_XFER(r, v, s) \ - do { \ - if (NULL == (v)) { \ - (v) = (pmix_value_t*)malloc(sizeof(pmix_value_t)); \ - if (NULL == (v)) { \ - (r) = PMIX_ERR_NOMEM; \ - } else { \ - (r) = pmix_value_xfer((v), (s)); \ - } \ - } else { \ - (r) = pmix_value_xfer((v), (s)); \ - } \ +#define PMIX_VALUE_COMPRESSED_STRING_UNPACK(s) \ + do { \ + char *tmp; \ + /* if this is a compressed string, then uncompress it */ \ + if (PMIX_COMPRESSED_STRING == (s)->type) { \ + pmix_util_uncompress_string(&tmp, (uint8_t*)(s)->data.bo.bytes, \ + (s)->data.bo.size); \ + if (NULL == tmp) { \ + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); \ + rc = PMIX_ERR_NOMEM; \ + PMIX_VALUE_RELEASE(s); \ + val = NULL; \ + } else { \ + PMIX_VALUE_DESTRUCT(s); \ + (s)->data.string = tmp; \ + (s)->type = PMIX_STRING; \ + } \ + } \ } while(0) -pmix_status_t pmix_argv_append_nosize(char ***argv, const char *arg); -#define PMIX_ARGV_APPEND(r, a, b) \ - (r) = pmix_argv_append_nosize(&(a), (b)) - -pmix_status_t pmix_setenv(const char *name, const char *value, - bool overwrite, char ***env); -#define PMIX_SETENV(r, a, b, c) \ - (r) = pmix_setenv((a), (b), true, (c)) - /**** PMIX INFO STRUCT ****/ -struct pmix_info_t { - char key[PMIX_MAX_KEYLEN+1]; // ensure room for the NULL terminator +typedef struct pmix_info { + pmix_key_t key; pmix_info_directives_t flags; // bit-mask of flags pmix_value_t value; -}; +} pmix_info_t; /* utility macros for working with pmix_info_t structs */ #define PMIX_INFO_CREATE(m, n) \ do { \ - (m) = (pmix_info_t*)calloc((n), sizeof(pmix_info_t)); \ + pmix_info_t *_i; \ + (m) = (pmix_info_t*)pmix_calloc((n), sizeof(pmix_info_t)); \ + if (NULL != (m)) { \ + _i = (pmix_info_t*)(m); \ + _i[(n)-1].flags = PMIX_INFO_ARRAY_END; \ + } \ } while (0) #define PMIX_INFO_CONSTRUCT(m) \ @@ -1435,69 +1524,53 @@ struct pmix_info_t { PMIX_VALUE_DESTRUCT(&(m)->value); \ } while (0) -#define PMIX_INFO_FREE(m, n) \ - do { \ - size_t _s; \ - if (NULL != (m)) { \ - for (_s=0; _s < (n); _s++) { \ - PMIX_INFO_DESTRUCT(&((m)[_s])); \ - } \ - free((m)); \ - (m) = NULL; \ - } \ +#define PMIX_INFO_FREE(m, n) \ + do { \ + size_t _is; \ + if (NULL != (m)) { \ + for (_is=0; _is < (n); _is++) { \ + PMIX_INFO_DESTRUCT(&((m)[_is])); \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } \ } while (0) #define PMIX_INFO_LOAD(m, k, v, t) \ do { \ if (NULL != (k)) { \ - (void)strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ + pmix_strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ } \ (m)->flags = 0; \ pmix_value_load(&((m)->value), (v), (t)); \ } while (0) -#define PMIX_INFO_XFER(d, s) \ - do { \ - if (NULL != (s)->key) { \ - (void)strncpy((d)->key, (s)->key, PMIX_MAX_KEYLEN); \ - } \ - (d)->flags = (s)->flags; \ - pmix_value_xfer(&(d)->value, &(s)->value); \ +#define PMIX_INFO_XFER(d, s) \ + do { \ + if (NULL != (s)->key) { \ + pmix_strncpy((d)->key, (s)->key, PMIX_MAX_KEYLEN); \ + } \ + (d)->flags = (s)->flags; \ + pmix_value_xfer(&(d)->value, (pmix_value_t*)&(s)->value); \ } while(0) + +/* macros for setting and unsetting the "reqd" flag + * in a pmix_info_t */ #define PMIX_INFO_REQUIRED(m) \ (m)->flags |= PMIX_INFO_REQD #define PMIX_INFO_OPTIONAL(m) \ (m)->flags &= ~PMIX_INFO_REQD +/* macros for testing the "reqd" flag in a pmix_info_t */ #define PMIX_INFO_IS_REQUIRED(m) \ (m)->flags & PMIX_INFO_REQD #define PMIX_INFO_IS_OPTIONAL(m) \ !((m)->flags & PMIX_INFO_REQD) -#define PMIX_INFO_UNLOAD(r, v, l) \ - do { \ - pmix_info_t *_info; \ - size_t _n, _ninfo; \ - pmix_kval_t *_kv; \ - _info = (pmix_info_t*)(v)->data.darray->array; \ - _ninfo = (v)->data.darray->size; \ - for (_n = 0; _n < _ninfo; _n++){ \ - _kv = PMIX_NEW(pmix_kval_t); \ - if (NULL == _kv) { \ - (r) = PMIX_ERR_NOMEM; \ - break; \ - } \ - if (NULL != _info[_n].key) { \ - _kv->key = strdup(_info[_n].key); \ - } \ - PMIX_VALUE_XFER((r), _kv->value, &_info[_n].value);\ - if (PMIX_SUCCESS != (r)) { \ - PMIX_RELEASE(_kv); \ - break; \ - } \ - pmix_list_append((l), &_kv->super); \ - } \ - } while(0) +/* macro for testing end of the array */ +#define PMIX_INFO_IS_END(m) \ + (m)->flags & PMIX_INFO_ARRAY_END + /* define a special macro for checking if a boolean * info is true - when info structs are provided, a * type of PMIX_UNDEF is taken to imply a boolean "true" @@ -1510,20 +1583,20 @@ struct pmix_info_t { /**** PMIX LOOKUP RETURN STRUCT ****/ typedef struct pmix_pdata { pmix_proc_t proc; - char key[PMIX_MAX_KEYLEN+1]; // ensure room for the NULL terminator + pmix_key_t key; pmix_value_t value; } pmix_pdata_t; /* utility macros for working with pmix_pdata_t structs */ #define PMIX_PDATA_CREATE(m, n) \ do { \ - (m) = (pmix_pdata_t*)calloc((n), sizeof(pmix_pdata_t)); \ + (m) = (pmix_pdata_t*)pmix_calloc((n), sizeof(pmix_pdata_t)); \ } while (0) #define PMIX_PDATA_RELEASE(m) \ do { \ PMIX_VALUE_DESTRUCT(&(m)->value); \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1540,12 +1613,13 @@ typedef struct pmix_pdata { #define PMIX_PDATA_FREE(m, n) \ do { \ - size_t _s; \ - if (NULL != (m)) { \ - for (_s=0; _s < (n); _s++) { \ - PMIX_PDATA_DESTRUCT(&((m)[_s])); \ + size_t _ps; \ + pmix_pdata_t *_pdf = (pmix_pdata_t*)(m); \ + if (NULL != _pdf) { \ + for (_ps=0; _ps < (n); _ps++) { \ + PMIX_PDATA_DESTRUCT(&(_pdf[_ps])); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) @@ -1554,9 +1628,9 @@ typedef struct pmix_pdata { do { \ if (NULL != (m)) { \ memset((m), 0, sizeof(pmix_pdata_t)); \ - (void)strncpy((m)->proc.nspace, (p)->nspace, PMIX_MAX_NSLEN); \ + pmix_strncpy((m)->proc.nspace, (p)->nspace, PMIX_MAX_NSLEN); \ (m)->proc.rank = (p)->rank; \ - (void)strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ + pmix_strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ pmix_value_load(&((m)->value), (v), (t)); \ } \ } while (0) @@ -1565,9 +1639,9 @@ typedef struct pmix_pdata { do { \ if (NULL != (d)) { \ memset((d), 0, sizeof(pmix_pdata_t)); \ - (void)strncpy((d)->proc.nspace, (s)->proc.nspace, PMIX_MAX_NSLEN); \ + pmix_strncpy((d)->proc.nspace, (s)->proc.nspace, PMIX_MAX_NSLEN); \ (d)->proc.rank = (s)->proc.rank; \ - (void)strncpy((d)->key, (s)->key, PMIX_MAX_KEYLEN); \ + pmix_strncpy((d)->key, (s)->key, PMIX_MAX_KEYLEN); \ pmix_value_xfer(&((d)->value), &((s)->value)); \ } \ } while (0) @@ -1586,13 +1660,19 @@ typedef struct pmix_app { /* utility macros for working with pmix_app_t structs */ #define PMIX_APP_CREATE(m, n) \ do { \ - (m) = (pmix_app_t*)calloc((n), sizeof(pmix_app_t)); \ + (m) = (pmix_app_t*)pmix_calloc((n), sizeof(pmix_app_t)); \ } while (0) +#define PMIX_APP_INFO_CREATE(m, n) \ + do { \ + (m)->ninfo = (n); \ + PMIX_INFO_CREATE((m)->info, (m)->ninfo); \ + } while(0) + #define PMIX_APP_RELEASE(m) \ do { \ PMIX_APP_DESTRUCT((m)); \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1603,46 +1683,44 @@ typedef struct pmix_app { #define PMIX_APP_DESTRUCT(m) \ do { \ - size_t _ii; \ + size_t _aii; \ if (NULL != (m)->cmd) { \ - free((m)->cmd); \ + pmix_free((m)->cmd); \ (m)->cmd = NULL; \ } \ if (NULL != (m)->argv) { \ - for (_ii=0; NULL != (m)->argv[_ii]; _ii++) { \ - free((m)->argv[_ii]); \ + for (_aii=0; NULL != (m)->argv[_aii]; _aii++) { \ + pmix_free((m)->argv[_aii]); \ } \ - free((m)->argv); \ + pmix_free((m)->argv); \ (m)->argv = NULL; \ } \ if (NULL != (m)->env) { \ - for (_ii=0; NULL != (m)->env[_ii]; _ii++) { \ - free((m)->env[_ii]); \ + for (_aii=0; NULL != (m)->env[_aii]; _aii++) { \ + pmix_free((m)->env[_aii]); \ } \ - free((m)->env); \ + pmix_free((m)->env); \ (m)->env = NULL; \ } \ if (NULL != (m)->cwd) { \ - free((m)->cwd); \ + pmix_free((m)->cwd); \ (m)->cwd = NULL; \ } \ if (NULL != (m)->info) { \ - for (_ii=0; _ii < (m)->ninfo; _ii++) { \ - PMIX_INFO_DESTRUCT(&(m)->info[_ii]); \ - } \ - free((m)->info); \ + PMIX_INFO_FREE((m)->info, (m)->ninfo); \ (m)->info = NULL; \ + (m)->ninfo = 0; \ } \ } while (0) #define PMIX_APP_FREE(m, n) \ do { \ - size_t _s; \ + size_t _as; \ if (NULL != (m)) { \ - for (_s=0; _s < (n); _s++) { \ - PMIX_APP_DESTRUCT(&((m)[_s])); \ + for (_as=0; _as < (n); _as++) { \ + PMIX_APP_DESTRUCT(&((m)[_as])); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) @@ -1657,13 +1735,19 @@ typedef struct pmix_query { /* utility macros for working with pmix_query_t structs */ #define PMIX_QUERY_CREATE(m, n) \ do { \ - (m) = (pmix_query_t*)calloc((n) , sizeof(pmix_query_t)); \ + (m) = (pmix_query_t*)pmix_calloc((n) , sizeof(pmix_query_t)); \ } while (0) +#define PMIX_QUERY_QUALIFIERS_CREATE(m, n) \ + do { \ + (m)->nqual = (n); \ + PMIX_INFO_CREATE((m)->qualifiers, (m)->nqual); \ + } while(0) + #define PMIX_QUERY_RELEASE(m) \ do { \ PMIX_QUERY_DESTRUCT((m)); \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1674,82 +1758,180 @@ typedef struct pmix_query { #define PMIX_QUERY_DESTRUCT(m) \ do { \ - size_t _ii; \ + size_t _qi; \ if (NULL != (m)->keys) { \ - for (_ii=0; NULL != (m)->keys[_ii]; _ii++) { \ - free((m)->keys[_ii]); \ + for (_qi=0; NULL != (m)->keys[_qi]; _qi++) { \ + pmix_free((m)->keys[_qi]); \ } \ - free((m)->keys); \ + pmix_free((m)->keys); \ (m)->keys = NULL; \ } \ if (NULL != (m)->qualifiers) { \ - for (_ii=0; _ii < (m)->nqual; _ii++) { \ - PMIX_INFO_DESTRUCT(&(m)->qualifiers[_ii]); \ - } \ - free((m)->qualifiers); \ + PMIX_INFO_FREE((m)->qualifiers, (m)->nqual); \ (m)->qualifiers = NULL; \ + (m)->nqual = 0; \ } \ } while (0) #define PMIX_QUERY_FREE(m, n) \ do { \ - size_t _s; \ + size_t _qs; \ if (NULL != (m)) { \ - for (_s=0; _s < (n); _s++) { \ - PMIX_QUERY_DESTRUCT(&((m)[_s])); \ + for (_qs=0; _qs < (n); _qs++) { \ + PMIX_QUERY_DESTRUCT(&((m)[_qs])); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) +/**** GENERIC HELPER MACROS ****/ +/* Append a string (by value) to an new or existing NULL-terminated + * argv array. + * + * @param argv Pointer to an argv array. + * @param str Pointer to the string to append. + * + * @retval PMIX_SUCCESS On success + * @retval PMIX_ERROR On failure + * + * This function adds a string to an argv array of strings by value; + * it is permissable to pass a string on the stack as the str + * argument to this function. + * + * To add the first entry to an argv array, call this function with + * (*argv == NULL). This function will allocate an array of length + * 2; the first entry will point to a copy of the string passed in + * arg, the second entry will be set to NULL. + * + * If (*argv != NULL), it will be realloc'ed to be 1 (char*) larger, + * and the next-to-last entry will point to a copy of the string + * passed in arg. The last entry will be set to NULL. + * + * Just to reinforce what was stated above: the string is copied by + * value into the argv array; there is no need to keep the original + * string (i.e., the arg parameter) after invoking this function. + */ +#define PMIX_ARGV_APPEND(r, a, b) \ + (r) = pmix_argv_append_nosize(&(a), (b)) -/**** PMIX MODEX STRUCT ****/ -typedef struct pmix_modex_data { - char nspace[PMIX_MAX_NSLEN+1]; - int rank; - uint8_t *blob; - size_t size; -} pmix_modex_data_t; -/* utility macros for working with pmix_modex_t structs */ -#define PMIX_MODEX_CREATE(m, n) \ - do { \ - (m) = (pmix_modex_data_t*)calloc((n) , sizeof(pmix_modex_data_t)); \ - } while (0) +/* Prepend a string to a new or existing NULL-terminated + * argv array - same as above only prepend + */ +#define PMIX_ARGV_PREPEND(r, a, b) \ + (r) = pmix_argv_prepend_nosize(a, b) -#define PMIX_MODEX_RELEASE(m) \ - do { \ - PMIX_MODEX_DESTRUCT((m)); \ - free((m)); \ - (m) = NULL; \ - } while (0) +/* Append to an argv-style array, but only if the provided argument + * doesn't already exist somewhere in the array. Ignore the size of the array. + * + * @param argv Pointer to an argv array. + * @param str Pointer to the string to append. + * @param bool Whether or not to overwrite a matching value if found + * + * @retval PMIX_SUCCESS On success + * @retval PMIX_ERROR On failure + * + * This function is identical to the pmix_argv_append_nosize() function + * except that it only appends the provided argument if it does not already + * exist in the provided array, or overwrites it if it is. + */ +#define PMIX_ARGV_APPEND_UNIQUE(r, a, b, c) \ + (r) = pmix_argv_append_unique_nosize(a, b, c) -#define PMIX_MODEX_CONSTRUCT(m) \ - do { \ - memset((m), 0, sizeof(pmix_modex_data_t)); \ - } while (0) +/* Free a NULL-terminated argv array. + * + * @param argv Argv array to free. + * + * This function frees an argv array and all of the strings that it + * contains. Since the argv parameter is passed by value, it is not + * set to NULL in the caller's scope upon return. + * + * It is safe to invoke this function with a NULL pointer. It is + * not safe to invoke this function with a non-NULL-terminated argv + * array. + */ +#define PMIX_ARGV_FREE(a) pmix_argv_free(a) -#define PMIX_MODEX_DESTRUCT(m) \ - do { \ - if (NULL != (m)->blob) { \ - free((m)->blob); \ - (m)->blob = NULL; \ - } \ - } while (0) +/* + * Split a string into a NULL-terminated argv array. Do not include empty + * strings in result array. + * + * @param src_string Input string. + * @param delimiter Delimiter character. + * + * @retval argv pointer to new argv array on success + * @retval NULL on error + * + * All strings are insertted into the argv array by value; the + * newly-allocated array makes no references to the src_string + * argument (i.e., it can be freed after calling this function + * without invalidating the output argv). + */ +#define PMIX_ARGV_SPLIT(a, b, c) \ + (a) = pmix_argv_split(b, c) -#define PMIX_MODEX_FREE(m, n) \ - do { \ - size_t _s; \ - if (NULL != (m)) { \ - for (_s=0; _s < (n); _s++) { \ - PMIX_MODEX_DESTRUCT(&((m)[_s])); \ - } \ - free((m)); \ - (m) = NULL; \ - } \ - } while (0) +/* + * Return the length of a NULL-terminated argv array. + * + * @param argv The input argv array. + * + * @retval 0 If NULL is passed as argv. + * @retval count Number of entries in the argv array. + * + * The argv array must be NULL-terminated. + */ +#define PMIX_ARGV_COUNT(r, a) \ + (r) = pmix_argv_count(a) + +/* + * Join all the elements of an argv array into a single + * newly-allocated string. + * + * @param argv The input argv array. + * @param delimiter Delimiter character placed between each argv string. + * + * @retval new_string Output string on success. + * @retval NULL On failure. + * + * Similar to the Perl join function, this function takes an input + * argv and joins them into into a single string separated by the + * delimiter character. + * + * It is the callers responsibility to free the returned string. + */ +#define PMIX_ARGV_JOIN(a, b, c) \ + (a) = pmix_argv_join(b, c) + +/* + * Copy a NULL-terminated argv array. + * + * @param argv The input argv array. + * + * @retval argv Copied argv array on success. + * @retval NULL On failure. + * + * Copy an argv array, including copying all off its strings. + * Specifically, the output argv will be an array of the same length + * as the input argv, and strcmp(argv_in[i], argv_out[i]) will be 0. + */ +#define PMIX_ARGV_COPY(a, b) \ + (a) = pmix_argv_copy(b) + +/* + * Set an environmenal paramter in an env array + * + * @retval r Return pmix_status_t status + * + * @param a Name of the environmental param + * + * @param b String value of the environmental param + * + * @param c Address of the NULL-terminated env array + */ +#define PMIX_SETENV(r, a, b, c) \ + (r) = pmix_setenv((a), (b), true, (c)) /**** CALLBACK FUNCTIONS FOR NON-BLOCKING OPERATIONS ****/ @@ -1778,7 +1960,7 @@ typedef void (*pmix_modex_cbfunc_t)(pmix_status_t status, * released by the library upon return from the callback function, so * the receiver must copy it if it needs to be retained */ typedef void (*pmix_spawn_cbfunc_t)(pmix_status_t status, - char nspace[], void *cbdata); + pmix_nspace_t nspace, void *cbdata); /* define a callback for common operations that simply return * a status. Examples include the non-blocking versions of @@ -1886,6 +2068,10 @@ typedef void (*pmix_notification_fn_t)(size_t evhdlr_registration_id, typedef void (*pmix_hdlr_reg_cbfunc_t)(pmix_status_t status, size_t refid, void *cbdata); +/* maintain backward compatibility with v2 definition - change of name */ +typedef void (*pmix_evhdlr_reg_cbfunc_t)(pmix_status_t status, + size_t evhdlr_ref, + void *cbdata); /* define a callback function for calls to PMIx_Get_nb. The status * indicates if the requested data was found or not - a pointer to the @@ -2064,7 +2250,7 @@ PMIX_EXPORT void PMIx_Deregister_event_handler(size_t evhdlr_ref, PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status, const pmix_proc_t *source, pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, + const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); /* Provide a string representation for several types of value. Note @@ -2098,7 +2284,7 @@ PMIX_EXPORT const char* PMIx_Get_version(void); * proc. This is data that has only internal scope - it will * never be "pushed" externally */ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, - const char *key, pmix_value_t *val); + const pmix_key_t key, pmix_value_t *val); /** * Top-level interface function to pack one or more values into a @@ -2265,7 +2451,7 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(const pmix_proc_t *target, * status_code = PMIx_Data_unpack(buffer, (void*)&dest, &num_values, PMIX_INT32); * * num_values = 5; - * string_array = malloc(num_values*sizeof(char *)); + * string_array = pmix_malloc(num_values*sizeof(char *)); * status_code = PMIx_Data_unpack(buffer, (void*)(string_array), &num_values, PMIX_STRING); * * @endcode @@ -2328,144 +2514,197 @@ PMIX_EXPORT pmix_status_t PMIx_Data_copy_payload(pmix_data_buffer_t *dest, pmix_data_buffer_t *src); -/* Key-Value pair management macros */ -// TODO: add all possible types/fields here. +static inline void pmix_darray_destruct(pmix_data_array_t *m); -#define PMIX_VAL_FIELD_int(x) ((x)->data.integer) -#define PMIX_VAL_FIELD_uint32_t(x) ((x)->data.uint32) -#define PMIX_VAL_FIELD_uint16_t(x) ((x)->data.uint16) -#define PMIX_VAL_FIELD_string(x) ((x)->data.string) -#define PMIX_VAL_FIELD_float(x) ((x)->data.fval) -#define PMIX_VAL_FIELD_byte(x) ((x)->data.byte) -#define PMIX_VAL_FIELD_flag(x) ((x)->data.flag) - -#define PMIX_VAL_TYPE_int PMIX_INT -#define PMIX_VAL_TYPE_uint32_t PMIX_UINT32 -#define PMIX_VAL_TYPE_uint16_t PMIX_UINT16 -#define PMIX_VAL_TYPE_string PMIX_STRING -#define PMIX_VAL_TYPE_float PMIX_FLOAT -#define PMIX_VAL_TYPE_byte PMIX_BYTE -#define PMIX_VAL_TYPE_flag PMIX_BOOL - -#define PMIX_VAL_set_assign(_v, _field, _val ) \ - do { \ - (_v)->type = PMIX_VAL_TYPE_ ## _field; \ - PMIX_VAL_FIELD_ ## _field((_v)) = _val; \ - } while (0) - -#define PMIX_VAL_set_strdup(_v, _field, _val ) \ - do { \ - (_v)->type = PMIX_VAL_TYPE_ ## _field; \ - PMIX_VAL_FIELD_ ## _field((_v)) = strdup(_val); \ - } while (0) - -#define PMIX_VAL_SET_int PMIX_VAL_set_assign -#define PMIX_VAL_SET_uint32_t PMIX_VAL_set_assign -#define PMIX_VAL_SET_uint16_t PMIX_VAL_set_assign -#define PMIX_VAL_SET_string PMIX_VAL_set_strdup -#define PMIX_VAL_SET_float PMIX_VAL_set_assign -#define PMIX_VAL_SET_byte PMIX_VAL_set_assign -#define PMIX_VAL_SET_flag PMIX_VAL_set_assign - -#define PMIX_VAL_SET(_v, _field, _val ) \ - PMIX_VAL_SET_ ## _field(_v, _field, _val) - -#define PMIX_VAL_cmp_val(_val1, _val2) ((_val1) != (_val2)) -#define PMIX_VAL_cmp_float(_val1, _val2) (((_val1)>(_val2))?(((_val1)-(_val2))>0.000001):(((_val2)-(_val1))>0.000001)) -#define PMIX_VAL_cmp_ptr(_val1, _val2) strncmp(_val1, _val2, strlen(_val1)+1) - -#define PMIX_VAL_CMP_int PMIX_VAL_cmp_val -#define PMIX_VAL_CMP_uint32_t PMIX_VAL_cmp_val -#define PMIX_VAL_CMP_uint16_t PMIX_VAL_cmp_val -#define PMIX_VAL_CMP_float PMIX_VAL_cmp_float -#define PMIX_VAL_CMP_string PMIX_VAL_cmp_ptr -#define PMIX_VAL_CMP_byte PMIX_VAL_cmp_val -#define PMIX_VAL_CMP_flag PMIX_VAL_cmp_val - -#define PMIX_VAL_ASSIGN(_v, _field, _val) \ - PMIX_VAL_set_assign(_v, _field, _val) - -#define PMIX_VAL_CMP(_field, _val1, _val2) \ - PMIX_VAL_CMP_ ## _field(_val1, _val2) - -#define PMIX_VAL_FREE(_v) \ - PMIx_free_value_data(_v) - -static inline void pmix_value_destruct(pmix_value_t * m) { - size_t _n; +static inline void pmix_value_destruct(pmix_value_t * m) +{ if (PMIX_STRING == (m)->type) { if (NULL != (m)->data.string) { - free((m)->data.string); + pmix_free((m)->data.string); (m)->data.string = NULL; } } else if ((PMIX_BYTE_OBJECT == (m)->type) || (PMIX_COMPRESSED_STRING == (m)->type)) { if (NULL != (m)->data.bo.bytes) { - free((m)->data.bo.bytes); + pmix_free((m)->data.bo.bytes); (m)->data.bo.bytes = NULL; (m)->data.bo.size = 0; } } else if (PMIX_DATA_ARRAY == (m)->type) { - if (NULL != (m)->data.darray && NULL != (m)->data.darray->array) { - if (PMIX_STRING == (m)->data.darray->type) { - char **_str = (char**)(m)->data.darray->array; - for (_n=0; _n < (m)->data.darray->size; _n++) { - if (NULL != _str[_n]) { - free(_str[_n]); - } - } - } else if (PMIX_PROC_INFO == (m)->data.darray->type) { - pmix_proc_info_t *_info = - (pmix_proc_info_t*)(m)->data.darray->array; - for (_n=0; _n < (m)->data.darray->size; _n++) { - PMIX_PROC_INFO_DESTRUCT(&_info[_n]); - } - } else if (PMIX_INFO == (m)->data.darray->type) { - pmix_info_t *_info = - (pmix_info_t*)(m)->data.darray->array; - for (_n=0; _n < (m)->data.darray->size; _n++) { - pmix_value_destruct(&_info[_n].value); - } - } else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { - pmix_byte_object_t *_obj = - (pmix_byte_object_t*)(m)->data.darray->array; - for (_n=0; _n < (m)->data.darray->size; _n++) { - if (NULL != _obj[_n].bytes) { - free(_obj[_n].bytes); - } - } - } - free((m)->data.darray->array); - (m)->data.darray->array = NULL; - (m)->data.darray->size = 0; - } if (NULL != (m)->data.darray) { - free((m)->data.darray); + pmix_darray_destruct((m)->data.darray); + pmix_free((m)->data.darray); (m)->data.darray = NULL; } - /**** DEPRECATED ****/ - } else if (PMIX_INFO_ARRAY == (m)->type) { - pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); - for (_n=0; _n < (m)->data.array->size; _n++) { - if (PMIX_STRING == _p[_n].value.type) { - if (NULL != _p[_n].value.data.string) { - free(_p[_n].value.data.string); - } - } else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { - if (NULL != _p[_n].value.data.bo.bytes) { - free(_p[_n].value.data.bo.bytes); - } - } else if (PMIX_PROC_INFO == _p[_n].value.type) { - PMIX_PROC_INFO_DESTRUCT(_p[_n].value.data.pinfo); - } - } - free(_p); - /********************/ } else if (PMIX_ENVAR == (m)->type) { PMIX_ENVAR_DESTRUCT(&(m)->data.envar); + } else if (PMIX_PROC == (m)->type) { + PMIX_PROC_RELEASE((m)->data.proc); + } +} + +static inline void pmix_darray_destruct(pmix_data_array_t *m) +{ + if (NULL != m) { + if (PMIX_INFO == m->type) { + pmix_info_t *_info = (pmix_info_t*)m->array; + PMIX_INFO_FREE(_info, m->size); + } else if (PMIX_PROC == m->type) { + pmix_proc_t *_p = (pmix_proc_t*)m->array; + PMIX_PROC_FREE(_p, m->size); + } else if (PMIX_PROC_INFO == m->type) { + pmix_proc_info_t *_pi = (pmix_proc_info_t*)m->array; + PMIX_PROC_INFO_FREE(_pi, m->size); + } else if (PMIX_ENVAR == m->type) { + pmix_envar_t *_e = (pmix_envar_t*)m->array; + PMIX_ENVAR_FREE(_e, m->size); + } else if (PMIX_VALUE == m->type) { + pmix_value_t *_v = (pmix_value_t*)m->array; + PMIX_VALUE_FREE(_v, m->size); + } else if (PMIX_PDATA == m->type) { + pmix_pdata_t *_pd = (pmix_pdata_t*)m->array; + PMIX_PDATA_FREE(_pd, m->size); + } else if (PMIX_QUERY == m->type) { + pmix_query_t *_q = (pmix_query_t*)m->array; + PMIX_QUERY_FREE(_q, m->size); + } else if (PMIX_APP == m->type) { + pmix_app_t *_a = (pmix_app_t*)m->array; + PMIX_APP_FREE(_a, m->size); + } else if (PMIX_BYTE_OBJECT == m->type) { + pmix_byte_object_t *_b = (pmix_byte_object_t*)m->array; + PMIX_BYTE_OBJECT_FREE(_b, m->size); + } else if (PMIX_STRING == m->type) { + char **_s = (char**)m->array; + size_t _si; + for (_si=0; _si < m->size; _si++) { + pmix_free(_s[_si]); + } + pmix_free(m->array); + m->array = NULL; + } else { + pmix_free(m->array); + } + } +} + +#define PMIX_DATA_ARRAY_CONSTRUCT(m, n, t) \ + do { \ + (m)->type = (t); \ + (m)->size = (n); \ + if (0 < (n)) { \ + if (PMIX_INFO == (t)) { \ + PMIX_INFO_CREATE((m)->array, (n)); \ + } else if (PMIX_PROC == (t)) { \ + PMIX_PROC_CREATE((m)->array, (n)); \ + } else if (PMIX_PROC_INFO == (t)) { \ + PMIX_PROC_INFO_CREATE((m)->array, (n)); \ + } else if (PMIX_ENVAR == (t)) { \ + PMIX_ENVAR_CREATE((m)->array, (n)); \ + } else if (PMIX_VALUE == (t)) { \ + PMIX_VALUE_CREATE((m)->array, (n)); \ + } else if (PMIX_PDATA == (t)) { \ + PMIX_PDATA_CREATE((m)->array, (n)); \ + } else if (PMIX_QUERY == (t)) { \ + PMIX_QUERY_CREATE((m)->array, (n)); \ + } else if (PMIX_APP == (t)) { \ + PMIX_APP_CREATE((m)->array, (n)); \ + } else if (PMIX_BYTE_OBJECT == (t)) { \ + PMIX_BYTE_OBJECT_CREATE((m)->array, (n)); \ + } else if (PMIX_ALLOC_DIRECTIVE == (t) || \ + PMIX_PROC_STATE == (t) || \ + PMIX_PERSIST == (t) || \ + PMIX_SCOPE == (t) || \ + PMIX_DATA_RANGE == (t) || \ + PMIX_BYTE == (t) || \ + PMIX_INT8 == (t) || \ + PMIX_UINT8 == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int8_t)); \ + } else if (PMIX_POINTER == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(void*)); \ + } else if (PMIX_STRING == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(char*)); \ + } else if (PMIX_SIZE == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(size_t)); \ + } else if (PMIX_PID == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(pid_t)); \ + } else if (PMIX_INT == (t) || \ + PMIX_UINT == (t) || \ + PMIX_STATUS == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int)); \ + } else if (PMIX_IOF_CHANNEL == (t) || \ + PMIX_DATA_TYPE == (t) || \ + PMIX_INT16 == (t) || \ + PMIX_UINT16 == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int16_t)); \ + } else if (PMIX_PROC_RANK == (t) || \ + PMIX_INFO_DIRECTIVES == (t) || \ + PMIX_INT32 == (t) || \ + PMIX_UINT32 == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int32_t)); \ + } else if (PMIX_INT64 == (t) || \ + PMIX_UINT64 == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int64_t)); \ + } else if (PMIX_FLOAT == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(float)); \ + } else if (PMIX_DOUBLE == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(double)); \ + } else if (PMIX_TIMEVAL == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(struct timeval)); \ + } else if (PMIX_TIME == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(time_t)); \ + } \ + } else { \ + (m)->array = NULL; \ + } \ + } while(0) +#define PMIX_DATA_ARRAY_CREATE(m, n, t) \ + do { \ + (m) = (pmix_data_array_t*)pmix_calloc(1, sizeof(pmix_data_array_t)); \ + PMIX_DATA_ARRAY_CONSTRUCT((m), (n), (t)); \ + } while(0) + +#define PMIX_DATA_ARRAY_DESTRUCT(m) pmix_darray_destruct(m) + +#define PMIX_DATA_ARRAY_FREE(m) \ + do { \ + if (NULL != (m)) { \ + PMIX_DATA_ARRAY_DESTRUCT(m); \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while(0) + + +/** + * Provide a safe version of strncpy that doesn't generate + * a ton of spurious warnings. Note that not every environment + * provides nice string functions, and we aren't concerned about + * max performance here + * + * @param dest Destination string. + * @param src Source string. + * @param len Size of the dest array - 1 + * + */ +static inline void pmix_strncpy(char *dest, const char *src, size_t len) +{ + size_t i, k; + char *new_dest = dest; + + /* use an algorithm that also protects against + * non-NULL-terminated src strings */ + for (i=0, k=0; i <= len; ++i, ++src, ++new_dest) { + ++k; + *new_dest = *src; + if ('\0' == *src) { + break; + } } + dest[k-1] = '\0'; } +#include + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_extend.h b/opal/mca/pmix/pmix3x/pmix/include/pmix_extend.h new file mode 100644 index 00000000000..f6f320abab5 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_extend.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Artem Y. Polyakov . + * All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer listed + * in this license in the documentation and/or other materials + * provided with the distribution. + * + * - Neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * The copyright holders provide no reassurances that the source code + * provided does not infringe any patent, copyright, or any other + * intellectual property rights of third parties. The copyright holders + * disclaim any liability to any recipient for claims brought against + * recipient by any third party for infringement of that parties + * intellectual property rights. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $HEADER$ + */ + +#ifndef PMIx_EXTEND_H +#define PMIx_EXTEND_H + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* expose some functions that are resolved in the + * PMIx library, but part of a header that + * includes internal functions - we don't + * want to expose the entire header here. These + * back the associated macros included in the + * PMIx Standard + */ +void pmix_value_load(pmix_value_t *v, const void *data, pmix_data_type_t type); + +pmix_status_t pmix_value_unload(pmix_value_t *kv, void **data, size_t *sz); + +pmix_status_t pmix_value_xfer(pmix_value_t *kv, const pmix_value_t *src); + +pmix_status_t pmix_argv_append_nosize(char ***argv, const char *arg); + +pmix_status_t pmix_argv_prepend_nosize(char ***argv, const char *arg); + +pmix_status_t pmix_argv_append_unique_nosize(char ***argv, const char *arg, bool overwrite); + +void pmix_argv_free(char **argv); + +char **pmix_argv_split(const char *src_string, int delimiter); + +int pmix_argv_count(char **argv); + +char *pmix_argv_join(char **argv, int delimiter); + +char **pmix_argv_copy(char **argv); + +pmix_status_t pmix_setenv(const char *name, const char *value, + bool overwrite, char ***env); + + +/* the following are a set of legacy macros not included in the + * PMIx Standard, but used in some codes (e.g., the Slurm plugin). + * These should be considered "deprecated" and will be removed + * in the next major release of the PRI */ +#define PMIX_VAL_FIELD_int(x) ((x)->data.integer) +#define PMIX_VAL_FIELD_uint32_t(x) ((x)->data.uint32) +#define PMIX_VAL_FIELD_uint16_t(x) ((x)->data.uint16) +#define PMIX_VAL_FIELD_string(x) ((x)->data.string) +#define PMIX_VAL_FIELD_float(x) ((x)->data.fval) +#define PMIX_VAL_FIELD_byte(x) ((x)->data.byte) +#define PMIX_VAL_FIELD_flag(x) ((x)->data.flag) + +#define PMIX_VAL_TYPE_int PMIX_INT +#define PMIX_VAL_TYPE_uint32_t PMIX_UINT32 +#define PMIX_VAL_TYPE_uint16_t PMIX_UINT16 +#define PMIX_VAL_TYPE_string PMIX_STRING +#define PMIX_VAL_TYPE_float PMIX_FLOAT +#define PMIX_VAL_TYPE_byte PMIX_BYTE +#define PMIX_VAL_TYPE_flag PMIX_BOOL + +#define PMIX_VAL_set_assign(_v, _field, _val ) \ + do { \ + (_v)->type = PMIX_VAL_TYPE_ ## _field; \ + PMIX_VAL_FIELD_ ## _field((_v)) = _val; \ + } while (0) + +#define PMIX_VAL_set_strdup(_v, _field, _val ) \ + do { \ + (_v)->type = PMIX_VAL_TYPE_ ## _field; \ + PMIX_VAL_FIELD_ ## _field((_v)) = strdup(_val); \ + } while (0) + +#define PMIX_VAL_SET_int PMIX_VAL_set_assign +#define PMIX_VAL_SET_uint32_t PMIX_VAL_set_assign +#define PMIX_VAL_SET_uint16_t PMIX_VAL_set_assign +#define PMIX_VAL_SET_string PMIX_VAL_set_strdup +#define PMIX_VAL_SET_float PMIX_VAL_set_assign +#define PMIX_VAL_SET_byte PMIX_VAL_set_assign +#define PMIX_VAL_SET_flag PMIX_VAL_set_assign + +#define PMIX_VAL_SET(_v, _field, _val ) \ + PMIX_VAL_SET_ ## _field(_v, _field, _val) + +#define PMIX_VAL_cmp_val(_val1, _val2) ((_val1) != (_val2)) +#define PMIX_VAL_cmp_float(_val1, _val2) (((_val1)>(_val2))?(((_val1)-(_val2))>0.000001):(((_val2)-(_val1))>0.000001)) +#define PMIX_VAL_cmp_ptr(_val1, _val2) strncmp(_val1, _val2, strlen(_val1)+1) + +#define PMIX_VAL_CMP_int PMIX_VAL_cmp_val +#define PMIX_VAL_CMP_uint32_t PMIX_VAL_cmp_val +#define PMIX_VAL_CMP_uint16_t PMIX_VAL_cmp_val +#define PMIX_VAL_CMP_float PMIX_VAL_cmp_float +#define PMIX_VAL_CMP_string PMIX_VAL_cmp_ptr +#define PMIX_VAL_CMP_byte PMIX_VAL_cmp_val +#define PMIX_VAL_CMP_flag PMIX_VAL_cmp_val + +#define PMIX_VAL_ASSIGN(_v, _field, _val) \ + PMIX_VAL_set_assign(_v, _field, _val) + +#define PMIX_VAL_CMP(_field, _val1, _val2) \ + PMIX_VAL_CMP_ ## _field(_val1, _val2) + +#define PMIX_VAL_FREE(_v) \ + PMIx_free_value_data(_v) + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_rename.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_rename.h.in index e5a74b5c2e3..a06bbfdfde7 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_rename.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_rename.h.in @@ -444,6 +444,7 @@ #define pmix_output_close @PMIX_RENAME@pmix_output_close #define pmix_output_finalize @PMIX_RENAME@pmix_output_finalize #define pmix_output_get_verbosity @PMIX_RENAME@pmix_output_get_verbosity +#define pmix_output_check_verbosity @PMIX_RENAME@pmix_output_check_verbosity #define pmix_output_hexdump @PMIX_RENAME@pmix_output_hexdump #define pmix_output_init @PMIX_RENAME@pmix_output_init #define pmix_output_open @PMIX_RENAME@pmix_output_open @@ -452,7 +453,6 @@ #define pmix_output_set_output_file_info @PMIX_RENAME@pmix_output_set_output_file_info #define pmix_output_set_verbosity @PMIX_RENAME@pmix_output_set_verbosity #define pmix_output_switch @PMIX_RENAME@pmix_output_switch -#define pmix_output_verbose @PMIX_RENAME@pmix_output_verbose #define pmix_output_vverbose @PMIX_RENAME@pmix_output_vverbose #define pmix_path_access @PMIX_RENAME@pmix_path_access #define pmix_path_df @PMIX_RENAME@pmix_path_df diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_server.h b/opal/mca/pmix/pmix3x/pmix/include/pmix_server.h index ecc08307fec..4d3f36bbd43 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_server.h +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_server.h @@ -596,7 +596,7 @@ PMIX_EXPORT pmix_status_t PMIx_generate_ppn(const char *input, char **ppn); * for the PMIx server library to correctly handle collectives * as a collective operation call can occur before all the * procs have been started */ -PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int nlocalprocs, +PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const pmix_nspace_t nspace, int nlocalprocs, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); @@ -605,7 +605,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n * intended to support persistent PMIx servers by providing * an opportunity for the host RM to tell the PMIx server * library to release all memory for a completed job */ -PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[], +PMIX_EXPORT void PMIx_server_deregister_nspace(const pmix_nspace_t nspace, pmix_op_cbfunc_t cbfunc, void *cbdata); /* Register a client process with the PMIx server library. The @@ -676,7 +676,7 @@ typedef void (*pmix_setup_application_cbfunc_t)(pmix_status_t status, * operation in case network libraries need to perform some action * before responding. Any returned env will be distributed along * with the application */ -PMIX_EXPORT pmix_status_t PMIx_server_setup_application(const char nspace[], +PMIX_EXPORT pmix_status_t PMIx_server_setup_application(const pmix_nspace_t nspace, pmix_info_t info[], size_t ninfo, pmix_setup_application_cbfunc_t cbfunc, void *cbdata); @@ -692,7 +692,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_setup_application(const char nspace[], * for the first local client - i.e., they will only be executed * once for a given nspace */ -PMIX_EXPORT pmix_status_t PMIx_server_setup_local_support(const char nspace[], +PMIX_EXPORT pmix_status_t PMIx_server_setup_local_support(const pmix_nspace_t nspace, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_version.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_version.h.in index a88e3a0c0e3..af4a00cd5df 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_version.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_version.h.in @@ -2,6 +2,9 @@ * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,4 +20,6 @@ #define PMIX_VERSION_MAJOR @pmixmajor@ #define PMIX_VERSION_MINOR @pmixminor@ #define PMIX_VERSION_RELEASE @pmixrelease@ + +#define PMIX_NUMERIC_VERSION @pmixnumeric@ #endif diff --git a/opal/mca/pmix/pmix3x/pmix/man/Makefile.am b/opal/mca/pmix/pmix3x/pmix/man/Makefile.am deleted file mode 100644 index 7c0f8bffe46..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/Makefile.am +++ /dev/null @@ -1,60 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -if !PMIX_EMBEDDED_MODE - -man_MANS = \ - man3/pmix_init.3 \ - man3/pmix_finalize.3 \ - man3/pmix_initialized.3 \ - man3/pmix_abort.3 \ - man3/pmix_put.3 \ - man3/pmix_commit.3 \ - man7/pmix.7 \ - man7/pmix_constants.7 - -EXTRA_DIST = $(man_MANS) - -man3/pmix_init.3: - $(top_srcdir)/contrib/md2nroff.pl --source=pmix_init.3.md; - -man3/pmix_finalize.3: - $(top_srcdir)/contrib/md2nroff.pl --source=pmix_finalize.3.md; - -man3/pmix_initialized.3: - $(top_srcdir)/contrib/md2nroff.pl --source=pmix_initialized.3.md; - -man3/pmix_abort.3: - $(top_srcdir)/contrib/md2nroff.pl --source=pmix_abort.3.md; - -man3/pmix_put.3: - $(top_srcdir)/contrib/md2nroff.pl --source=pmix_put.3.md; - -man3/pmix_commit.3: - $(top_srcdir)/contrib/md2nroff.pl --source=pmix_commit.3.md; - -man7/pmix.7: - $(top_srcdir)/contrib/md2nroff.pl --source=pmix.7.md; - -man7/pmix_constants.7: - $(top_srcdir)/contrib/md2nroff.pl --source=pmix_constants.7.md; - -endif # !PMIX_EMBEDDED_MODE diff --git a/opal/mca/pmix/pmix3x/pmix/man/README b/opal/mca/pmix/pmix3x/pmix/man/README deleted file mode 100644 index 73c605cb7f0..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/README +++ /dev/null @@ -1,186 +0,0 @@ -This file describes how the developer side of man pages work in PMIx. - -The Definitive Source Of Truth man pages are the Markdown man pages in -this directory (i.e., the files ending in ..md. If you want to -edit man pages, you need to edit the ..md pages. Do NOT edit -the . nroff man pages directly; these files are automatically -generated -- you will lose any manual edits the next time those files -are generated. - -The Markdown web pages are rendered in two different ways: - -1. Nroff man pages. These man pages are put into the `master` branch - and later included in PMIx distribution tarballs. - -2. HTML. The http://open-mpi.github.io/pmix/ web site (which is - served by the Github web servers) automatically renders the content - of the `gh-pages` branch of the PMIx repo. - -Markdown syntax -=============== - -The definitive man pages are the Markdown man pages. To edit them, -you need to understand the syntax used in these files. - -The canonical reference for Markdown is here: - - http://daringfireball.net/projects/markdown/syntax - -Note, however, that the PMIx Markdown man pages are served via -the Github Pages web servers, which use a system called Jekyll to -render the Markdown into HTML (https://github.com/jekyll/jekyll). -As such, there are a few Jekyll annotations in the PMIx Markdown -pages (so that they can be served up properly from Github's web -servers). - -If you're familiar with Markdown, you should be ok. But there are a -small number differences and quirks with which you should be familiar: - -1. The first few lines of each file are a YAML header and include - directive for Jekyll. DO NOT REMOVE THIS HEADER (or the file will - not render to HTML properly when served up from Github's web - servers). Here's a sample YAML header from pmix.7.md: - ---- -layout: page -title: PMIx(7) -tagline: PMIx Programmer's Manual ---- -{% include JB/setup %} - - The whole block is needed, and it must be the first input in the - file. - -2. In Github-flavored Markdown, you may be used to using "fenced - blocks" for multi-line code blocks, like this: - -```c -void my_c_code(void) { - int i; - /* Hello, world */ -} -``` - - Such fenced blocks will not work in Jekyll. Instead, you must - delineate your code blocks with Jekyll delimiters: - -{% highlight c %} -void my_c_code(void) { - int i; - /* Hello, world */ -} -{% endhighlight %} - - This will result in a pretty code box in the rendered HTML output, - and it will be syntax highlighted for the C language. Leave the - "c" out of the first directive if your multi-line block is not C - code, and then it won't do C syntax highlighting. - -3. The PMIx man pages are full of 2-level lists of things. E.g., - lists of functions, and then in some of the functions, there is a - sub-list of flags that can be used with that function. - - The convention used in the PMIx man pages is to highlight a - word/phrase representing each list item. Then use a ":" to start - the next line that describes that item. For example: - -*PMIX_FLOAT* -: A single-precision floating point value (IEEE 754). - - This will make the token "PMIX_FLOAT" be highlighted in both - HTML and nroff output, and then the paragraph that comes after it - will be properly delimited and indented. - - To make a sub-list inside an item, use the same format, but prefix - the sub-list items with "-", like this: - -*scope* -: Flag that controls the visible scope of the data. - -- *PMIX_GLOBAL* -: Indicates that the data is to be visible to all applications executed - by this user. - -4. There may be a small number of places in the PMIx man pages where - there are unnumbered lists with deliberate line breaks. For - example: - -foo / bar -baz / goo -: Something really intelligent - - Note the first line is "foo / bar", and then there is - a deliberate line break, and then the second line is "baz / goo". - - To effect the deliberate line break, you have to put two blank - spaces after "bar". To show that graphically (showing "_" - for " "): - -foo / bar__ -baz / goo -: Something really intelligent - -5. The "SEE ALSO" items at the end of each man page are linked to - their corresponding man pages. Note that the links are made to - ".html" files -- *not* ".md" files. If you care, the reason is - because the Github web servers statically generate .html files from - the .md files when you git push to the gh-pages branch. Hence, the - man pages are actually served from static .html files on the Github - web servers. - - Also, since links are meaningless in nroff, they are effectively - ignored in the resulting nroff output. - -Workflow -======== - -The workflow is like this: - -1. Developer edits ..md files for new changes. - -2. In a perfect world, the developer makes perfect edits and pushes - the changes up to `master`. An automated cron job will eventually - notice the new pages, and do two things: - - 2a. Copy the modified Markdown pages to the `gh-master` branch (so - that they go live on the web site). - - 2b. Re-generate any relevant nroff man pages in `master`. - - The automated cron job actually does exist and does these things, - but it should only be relied upon once a developer is sure that - their changes to the Markdown man pages are correct. - -3. To check that the changes will render properly, developers should - do two things: - - 3a. Run "make nroff". This will convert all the Markdown man pages - into nroff man pages (in the man/ directory). Check to ensure - that your changes look appropriate in the rendered nroff - output. - - *CAUTION* The "pandoc" utility is used to generate the nroff - files from the Markdown source. Different versions of pandoc - will generate slightly different nroff output. Meaning: when - you run "make nroff", you might end up changing every nroff man - page, simply because your version of pandoc is different than - the last person who ran it. Please only check in your changes, - if possible. - - 3b. Check out the `gh-pages` branch from PMIx and copy any - modified Markdown pages into the "master/man" directory (i.e., - the directory for man pages from the master development - branch). - - Then run the "jekyll serve" command from the top-level - directory in `gh-pages`. This runs a local web server on your - computer and renders the Markdown files into HTML such that you - can point a browser to http://127.0.0.1:4000 and see the web - site. - - If you make any changes to files in the tree where "jekyll" is - running, Jekyll will notice the changes and automatically - re-generate the relevant HTML. Meaning: you can just refresh - the page from http://127.0.0.1:4000 in your browser and you'll - see your changes -- there's no need to restart Jekyll to force - it to notice new changes. diff --git a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_abort.3 b/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_abort.3 deleted file mode 100644 index ea0690ae943..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_abort.3 +++ /dev/null @@ -1,62 +0,0 @@ -.TH "pmix_abort" "3" "2016\-03\-01" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" -.SH NAME -.PP -PMIx_Abort \- Abort the specified processes -.SH SYNOPSIS -.IP -.nf -\f[C] -#include\ - -pmix\\_status\\_t\ PMIx\\_Abort(int\ status,\ const\ char\ msg[], -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ pmix\\_proc\\_t\ procs[],\ size_t\ nprocs); -\f[] -.fi -.SH ARGUMENTS -.PP -\f[I]status\f[] : Status value to be returned. -A value of zero is permitted by PMIx, but may not be returned by some -resource managers. -.PP -\f[I]msg\f[] : A string message to be displayed -.PP -\f[I]procs\f[] : An array of pmix_proc_t structures defining the -processes to be aborted. -A \f[I]NULL\f[] for the proc array indicates that all processes in the -caller\[aq]s nspace are to be aborted. -A wildcard value for the rank in any structure indicates that all -processes in that nspace are to be aborted. -.PP -\f[I]nprocs\f[] : Number of pmix_proc_t structures in the \f[I]procs\f[] -array -.SH DESCRIPTION -.PP -Request that the provided array of procs be aborted, returning the -provided \f[I]status\f[] and printing the provided message. -A \f[I]NULL\f[] for the proc array indicates that all processes in the -caller\[aq]s nspace are to be aborted. -.PP -The response to this request is somewhat dependent on the specific -resource manager and its configuration (e.g., some resource managers -will not abort the application if the provided \f[I]status\f[] is zero -unless specifically configured to do so), and thus lies outside the -control of PMIx itself. -However, the client will inform the RM of the request that the -application be aborted, regardless of the value of the provided -\f[I]status\f[]. -.PP -Passing a \f[I]NULL\f[] msg parameter is allowed. -Note that race conditions caused by multiple processes calling -PMIx_Abort are left to the server implementation to resolve with regard -to which status is returned and what messages (if any) are printed. -.SH RETURN VALUE -.PP -Returns PMIX_SUCCESS on success. -On error, a negative value corresponding to a PMIx errno is returned. -.SH ERRORS -.PP -PMIx errno values are defined in \f[C]pmix_common.h\f[]. -.SH NOTES -.SH SEE ALSO -.SH AUTHORS -PMIx. diff --git a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_commit.3 b/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_commit.3 deleted file mode 100644 index 31dd4fc2f76..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_commit.3 +++ /dev/null @@ -1,35 +0,0 @@ -.TH "pmix_commit" "3" "2016\-03\-01" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" -.SH NAME -.PP -PMIx_Commit \- Push all previously \f[I]PMIx_Put\f[] values to the local -PMIx server. -.SH SYNOPSIS -.IP -.nf -\f[C] -#include\ - -pmix\\_status\\_t\ PMIx_Commit(void); -\f[] -.fi -.SH ARGUMENTS -.PP -\f[I]none\f[] -.SH DESCRIPTION -.PP -This is an asynchronous operation \- the library will immediately return -to the caller while the data is transmitted to the local server in the -background -.SH RETURN VALUE -.PP -Returns PMIX_SUCCESS on success. -On error, a negative value corresponding to a PMIx errno is returned. -.SH ERRORS -.PP -PMIx errno values are defined in \f[C]pmix_common.h\f[]. -.SH NOTES -.SH SEE ALSO -.PP -\f[C]PMIx_Put\f[](3) -.SH AUTHORS -PMIx. diff --git a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_finalize.3 b/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_finalize.3 deleted file mode 100644 index 6b15282945b..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_finalize.3 +++ /dev/null @@ -1,45 +0,0 @@ -.TH "pmix_finalize" "3" "2016\-03\-01" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" -.SH NAME -.PP -PMIx_Finalize \- Finalize the PMIx Client -.SH SYNOPSIS -.IP -.nf -\f[C] -#include\ - -pmix\\_status\\_t\ PMIx\\_Finalize(const\ pmix\\_info\\_t\ info[],\ size_t\ ninfo); -\f[] -.fi -.SH ARGUMENTS -.PP -\f[I]info\f[] : An optional array of pmix_info_t structures -.PP -\f[I]ninfo\f[] : Number of pmix_info_t structures in the pmix_info_t -array -.SH DESCRIPTION -.PP -Finalize the PMIx client, closing the connection with the local PMIx -server and releasing all malloc\[aq]d memory. -.PP -The info array is used to pass user requests regarding the fence -operation. -This can include: -.IP "(a)" 4 -PMIX_EMBED_BARRIER \- By default, \f[I]PMIx_Finalize\f[] does not -include an internal barrier operation. -This attribute directs \f[I]PMIx_Finalize\f[] to execute a barrier as -part of the finalize operation. -.SH RETURN VALUE -.PP -Returns PMIX_SUCCESS on success. -On error, a negative value corresponding to a PMIx errno is returned. -.SH ERRORS -.PP -PMIx errno values are defined in \f[C]pmix_common.h\f[]. -.SH NOTES -.SH SEE ALSO -.PP -\f[C]PMIx_Init\f[](3) -.SH AUTHORS -PMIx. diff --git a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_init.3 b/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_init.3 deleted file mode 100644 index b988200b33b..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_init.3 +++ /dev/null @@ -1,52 +0,0 @@ -.TH "pmix_init" "3" "2016\-03\-01" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" -.SH NAME -.PP -PMIx_Init \- Initialize the PMIx Client -.SH SYNOPSIS -.IP -.nf -\f[C] -#include\ - -pmix\\_status\\_t\ PMIx_Init(pmix\\_proc\\_t\ *proc); -\f[] -.fi -.SH ARGUMENTS -.PP -\f[I]proc\f[] : Pointer to a pmix_proc_t object in which the -client\[aq]s namespace and rank are to be returned. -.SH DESCRIPTION -.PP -Initialize the PMIx client, returning the process identifier assigned to -this client\[aq]s application in the provided pmix_proc_t struct. -Passing a value of \f[I]NULL\f[] for this parameter is allowed if the -user wishes solely to initialize the PMIx system and does not require -return of the identifier at that time. -.PP -When called, the PMIx client will check for the required connection -information of the local PMIx server and will establish the connection. -If the information is not found, or the server connection fails, then an -appropriate error constant will be returned. -.PP -If successful, the function will return PMIX_SUCCESS and will fill the -provided structure with the server\-assigned namespace and rank of the -process within the application. -In addition, all startup information provided by the resource manager -will be made available to the client process via subsequent calls to -\f[I]PMIx_Get\f[]. -.PP -Note that the PMIx client library is referenced counted, and so multiple -calls to PMIx_Init are allowed. -Thus, one way to obtain the namespace and rank of the process is to -simply call PMIx_Init with a non\-NULL parameter. -.SH RETURN VALUE -.PP -Returns PMIX_SUCCESS on success. -On error, a negative value corresponding to a PMIx errno is returned. -.SH ERRORS -.PP -PMIx errno values are defined in \f[C]pmix_common.h\f[]. -.SH NOTES -.SH SEE ALSO -.SH AUTHORS -PMIx. diff --git a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_initialized.3 b/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_initialized.3 deleted file mode 100644 index 6e2170f35c4..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_initialized.3 +++ /dev/null @@ -1,30 +0,0 @@ -.TH "pmix_initialized" "3" "2016\-03\-01" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" -.SH NAME -.PP -PMIx_Initialized \- Check if \f[I]PMIx_Init\f[] has been called -.SH SYNOPSIS -.IP -.nf -\f[C] -#include\ - -int\ PMIx_Initialized(void); -\f[] -.fi -.SH ARGUMENTS -.PP -\f[I]none\f[] -.SH DESCRIPTION -.PP -Check to see if the PMIx Client library has been intialized -.SH RETURN VALUE -.PP -Returns \f[I]true\f[] if the PMIx Client has been initialized, and -\f[I]false\f[] if not. -.SH ERRORS -.SH NOTES -.SH SEE ALSO -.PP -\f[C]PMIx_Init\f[](3) -.SH AUTHORS -PMIx. diff --git a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_put.3 b/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_put.3 deleted file mode 100644 index e960583e404..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/man3/pmix_put.3 +++ /dev/null @@ -1,60 +0,0 @@ -.TH "pmix_put" "3" "2016\-03\-01" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" -.SH NAME -.PP -PMIx_Put \- Push a value into the client\[aq]s namespace -.SH SYNOPSIS -.IP -.nf -\f[C] -#include\ - -pmix\\_status\\_t\ PMIx\\_Init(pmix\\_scope\\_t\ scope,\ const\ char\ key[],\ pmix\\_value\\_t\ *val); -\f[] -.fi -.SH ARGUMENTS -.PP -\f[I]scope\f[] : Defines a scope for data "put" by PMI per the -following: -.IP "(a)" 4 -PMI_LOCAL \- the data is intended only for other application processes -on the same node. -Data marked in this way will not be included in data packages sent to -remote requestors -.IP "(b)" 4 -PMI_REMOTE \- the data is intended solely for application processes on -remote nodes. -Data marked in this way will not be shared with other processes on the -same node -.IP "(c)" 4 -PMI_GLOBAL \- the data is to be shared with all other requesting -processes, regardless of location -.PP -\f[I]key\f[] : String key identifying the information. -This can be either one of the PMIx defined attributes, or a -user\-defined value -.PP -\f[I]val\f[] : Pointer to a pmix_value_t structure containing the data -to be pushed along with the type of the provided data. -.SH DESCRIPTION -.PP -Push a value into the client\[aq]s namespace. -The client library will cache the information locally until -\f[I]PMIx_Commit\f[] is called. -The provided scope value is passed to the local PMIx server, which will -distribute the data as directed. -.SH RETURN VALUE -.PP -Returns PMIX_SUCCESS on success. -On error, a negative value corresponding to a PMIx errno is returned. -.SH ERRORS -.PP -PMIx errno values are defined in \f[C]pmix_common.h\f[]. -.SH NOTES -.PP -See \[aq]pmix_common.h\[aq] for definition of the pmix_value_t -structure. -.SH SEE ALSO -.PP -\f[C]PMIx_Constants\f[](7), \f[C]PMIx_Structures\f[](7) -.SH AUTHORS -PMIx. diff --git a/opal/mca/pmix/pmix3x/pmix/man/man7/pmix.7 b/opal/mca/pmix/pmix3x/pmix/man/man7/pmix.7 deleted file mode 100644 index d2e545af94f..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/man7/pmix.7 +++ /dev/null @@ -1,35 +0,0 @@ -.TH "pmix" "7" "2015\-10\-29" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" -.SH NAME -.PP -Process Management Interface \- Exascale -.SH SYNOPSIS -.IP -.nf -\f[C] -#include\ -\f[] -.fi -.SH OVERVIEW -.PP -The Process Management Interface (PMI) has been used for quite some time -as a means of exchanging wireup information needed for interprocess -communication. -Two versions (PMI\-1 and PMI\-2) have been released as part of the MPICH -effort. -While PMI\-2 demonstrates better scaling properties than its PMI\-1 -predecessor, attaining rapid launch and wireup of the roughly 1M -processes executing across 100k nodes expected for exascale operations -remains challenging. -.PP -PMI Exascale (PMIx) represents an attempt to resolve these questions by -providing an extended version of the PMI standard specifically designed -to support clusters up to and including exascale sizes. -The overall objective of the project is not to branch the existing -pseudo\-standard definitions \- in fact, PMIx fully supports both of the -existing PMI\-1 and PMI\-2 APIs \- but rather to (a) augment and extend -those APIs to eliminate some current restrictions that impact -scalability, and (b) provide a reference implementation of the -PMI\-server that demonstrates the desired level of scalability. -.SH SEE ALSO -.SH AUTHORS -PMIx. diff --git a/opal/mca/pmix/pmix3x/pmix/man/man7/pmix_constants.7 b/opal/mca/pmix/pmix3x/pmix/man/man7/pmix_constants.7 deleted file mode 100644 index bd6414cb7a2..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/man/man7/pmix_constants.7 +++ /dev/null @@ -1,79 +0,0 @@ -.TH "pmix_constants" "7" "2016\-03\-01" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" -.SH NAME -.PP -PMIx Constants -.SH SYNOPSIS -.IP -.nf -\f[C] -#include\ -\f[] -.fi -.SH OVERVIEW -.PP -PMIx relies on the following types of constants: -.PP -\f[I]Maximum Sizes\f[] : In order to minimize malloc performance -penalties, PMIx utilizes constant\-sized arrays wherever possible. -These constants provide the user with the maximum size of the various -array types. -.PP -\f[I]Attributes\f[] : . -.PP -\f[I]Errors\f[] : PMIx uses negative error constants, with 0 indicating -"success". -.SH MAXIMUM SIZES -.PP -The . -.PP -\f[I]PMIX_MAX_NSLEN\f[] : The maximum length of a namespace. -Note that any declaration of an array to hold a key string must include -one extra space for the terminating \f[I]NULL\f[]. -.PP -\f[I]PMIX_MAX_KEYLEN\f[] : Maximum length of the key string used in -structures such as the \f[I]pmix\f[]info_t_. -Note that any declaration of an array to hold a key string must include -one extra space for the terminating \f[I]NULL\f[]. -.SH ATTRIBUTES -.PP -Define a set of "standard" PMIx attributes that can be queried using the -PMIx_Get function. -Implementations (and users) are free to extend as desired \- thus, -functions calling PMIx_Get must be capable of handling the "not found" -condition. -Note that these are attributes of the system and the job as opposed to -values the application (or underlying programming library) might choose -to expose \- i.e., they are values provided by the resource manager as -opposed to the application. -Thus, these keys are RESERVED for use by PMIx, and users should avoid -defining any attribute starting with the keyword \f[I]PMIX\f[]. -.PP -A list of the current PMIx attributes, and the type of their associated -data value, is provided here. -.PP -\f[I]PMIX_ATTR_UNDEF (NULL)\f[] : Used to initialize an attribute field, -indicating that the attribute has not yet been assigned. -.PP -\f[I]PMIX_USERID (uint32_t)\f[] : . -.PP -\f[I]PMIX_GRPID (uint32_t)\f[] : An access domain represents a single -logical connection into a fabric. -It may map to a single physical or virtual NIC or a port. -An access domain defines the boundary across which fabric resources may -be associated. -Each access domain belongs to a single fabric domain. -.PP -\f[I]PMIX_CPUSET (char*)\f[] : . -.SH ERROR CONSTANTS -.PP -\&. -.PP -\f[I]PMIX_SUCCESS\f[] : Indicates that the operation was successful. -.PP -\f[I]PMIX_ERROR\f[] : A general error code \- an error occurred, but no -specific reason can be provided. -.SH SEE ALSO -.PP -\f[C]pmix\f[](7) -.SH AUTHORS -PMIx. diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include index dbd384e3e5a..20b9a3c8f84 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include @@ -15,7 +15,7 @@ # reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -30,7 +30,8 @@ headers += \ atomics/sys/atomic.h \ atomics/sys/atomic_impl.h \ atomics/sys/timer.h \ - atomics/sys/cma.h + atomics/sys/cma.h \ + atomics/sys/atomic_stdc.h include atomics/sys/x86_64/Makefile.include include atomics/sys/arm/Makefile.include diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h index cb386f67b3c..9fa0f4eefb3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 Research Organization for Information Science @@ -47,6 +47,7 @@ #define PMIX_BUILTIN_SYNC 0200 #define PMIX_BUILTIN_GCC 0202 #define PMIX_BUILTIN_NO 0203 +#define PMIX_BUILTIN_C11 0204 /* Formats */ #define PMIX_DEFAULT 1000 /* standard for given architecture */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/arm/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/arm/atomic.h index 1598547f34d..43a18b21511 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/arm/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/arm/atomic.h @@ -12,9 +12,9 @@ * All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 ARM ltd. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2017-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -110,7 +110,7 @@ void pmix_atomic_isync(void) #define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define PMIX_HAVE_ATOMIC_MATH_32 1 -static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { int32_t prev, tmp; bool ret; @@ -138,7 +138,7 @@ static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *add atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { bool rc; @@ -149,7 +149,7 @@ static inline bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t } -static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { pmix_atomic_wmb(); return pmix_atomic_compare_exchange_strong_32 (addr, oldval, newval); @@ -158,7 +158,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t #if (PMIX_ASM_SUPPORT_64BIT == 1) #define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { int64_t prev; int tmp; @@ -189,7 +189,7 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { bool rc; @@ -200,7 +200,7 @@ static inline bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t } -static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { pmix_atomic_wmb(); return pmix_atomic_compare_exchange_strong_64 (addr, oldval, newval); @@ -210,7 +210,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t #define PMIX_HAVE_ATOMIC_ADD_32 1 -static inline int32_t pmix_atomic_fetch_add_32(volatile int32_t* v, int inc) +static inline int32_t pmix_atomic_fetch_add_32(pmix_atomic_int32_t* v, int inc) { int32_t t, old; int tmp; @@ -231,7 +231,7 @@ static inline int32_t pmix_atomic_fetch_add_32(volatile int32_t* v, int inc) } #define PMIX_HAVE_ATOMIC_SUB_32 1 -static inline int32_t pmix_atomic_fetch_sub_32(volatile int32_t* v, int dec) +static inline int32_t pmix_atomic_fetch_sub_32(pmix_atomic_int32_t* v, int dec) { int32_t t, old; int tmp; diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/arm64/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/arm64/atomic.h index b3df2624313..1026fa5556f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/arm64/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/arm64/atomic.h @@ -12,9 +12,9 @@ * All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 ARM ltd. All rights reserved. - * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,7 +83,7 @@ static inline void pmix_atomic_isync (void) * *********************************************************************/ -static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { int32_t prev, tmp; bool ret; @@ -103,7 +103,7 @@ static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *add return ret; } -static inline int32_t pmix_atomic_swap_32(volatile int32_t *addr, int32_t newval) +static inline int32_t pmix_atomic_swap_32(pmix_atomic_int32_t *addr, int32_t newval) { int32_t ret, tmp; @@ -122,7 +122,7 @@ static inline int32_t pmix_atomic_swap_32(volatile int32_t *addr, int32_t newval atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { int32_t prev, tmp; bool ret; @@ -143,7 +143,7 @@ static inline bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t } -static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { int32_t prev, tmp; bool ret; @@ -165,7 +165,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t #define pmix_atomic_ll_32(addr, ret) \ do { \ - volatile int32_t *_addr = (addr); \ + pmix_atomic_int32_t *_addr = (addr); \ int32_t _ret; \ \ __asm__ __volatile__ ("ldaxr %w0, [%1] \n" \ @@ -177,7 +177,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t #define pmix_atomic_sc_32(addr, newval, ret) \ do { \ - volatile int32_t *_addr = (addr); \ + pmix_atomic_int32_t *_addr = (addr); \ int32_t _newval = (int32_t) newval; \ int _ret; \ \ @@ -189,7 +189,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t ret = (_ret == 0); \ } while (0) -static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { int64_t prev; int tmp; @@ -210,7 +210,7 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add return ret; } -static inline int64_t pmix_atomic_swap_64 (volatile int64_t *addr, int64_t newval) +static inline int64_t pmix_atomic_swap_64 (pmix_atomic_int64_t *addr, int64_t newval) { int64_t ret; int tmp; @@ -230,7 +230,7 @@ static inline int64_t pmix_atomic_swap_64 (volatile int64_t *addr, int64_t newva atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { int64_t prev; int tmp; @@ -252,7 +252,7 @@ static inline bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t } -static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { int64_t prev; int tmp; @@ -275,7 +275,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t #define pmix_atomic_ll_64(addr, ret) \ do { \ - volatile int64_t *_addr = (addr); \ + pmix_atomic_int64_t *_addr = (addr); \ int64_t _ret; \ \ __asm__ __volatile__ ("ldaxr %0, [%1] \n" \ @@ -287,7 +287,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t #define pmix_atomic_sc_64(addr, newval, ret) \ do { \ - volatile int64_t *_addr = (addr); \ + pmix_atomic_int64_t *_addr = (addr); \ int64_t _newval = (int64_t) newval; \ int _ret; \ \ @@ -300,7 +300,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t } while (0) #define PMIX_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ - static inline type pmix_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \ + static inline type pmix_atomic_fetch_ ## name ## _ ## bits (pmix_atomic_ ## type *addr, type value) \ { \ type newval, old; \ int32_t tmp; \ diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h index f28ee1d35a2..5aca60ea366 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h @@ -16,7 +16,7 @@ * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +57,13 @@ #include #include "src/atomics/sys/architecture.h" -#include "src/include/pmix_stdint.h" +#include "src/include/pmix_stdatomic.h" + +#if PMIX_ASSEMBLY_BUILTIN == PMIX_BUILTIN_C11 + +#include "atomic_stdc.h" + +#else /* !PMIX_C_HAVE__ATOMIC */ /* do some quick #define cleanup in cases where we are doing testing... */ @@ -93,7 +99,7 @@ BEGIN_C_DECLS */ struct pmix_atomic_lock_t { union { - volatile int32_t lock; /**< The lock address (an integer) */ + pmix_atomic_int32_t lock; /**< The lock address (an integer) */ volatile unsigned char sparc_lock; /**< The lock address on sparc */ char padding[sizeof(int)]; /**< Array for optional padding */ } u; @@ -148,6 +154,8 @@ enum { PMIX_ATOMIC_LOCK_LOCKED = 1 }; +#define PMIX_ATOMIC_LOCK_INIT {.u = {.lock = PMIX_ATOMIC_LOCK_UNLOCKED}} + /********************************************************************** * * Load the appropriate architecture files and set some reasonable @@ -351,19 +359,19 @@ void pmix_atomic_unlock(pmix_atomic_lock_t *lock); #if PMIX_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, +bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval); #if PMIX_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, +bool pmix_atomic_compare_exchange_strong_acq_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval); #if PMIX_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 static inline #endif -bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, +bool pmix_atomic_compare_exchange_strong_rel_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval); #endif @@ -376,19 +384,19 @@ bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t #if PMIX_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif -bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, +bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval); #if PMIX_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif -bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, +bool pmix_atomic_compare_exchange_strong_acq_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval); #if PMIX_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 static inline #endif -bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, +bool pmix_atomic_compare_exchange_strong_rel_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval); #endif @@ -400,20 +408,20 @@ bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t #if defined(DOXYGEN) || PMIX_HAVE_ATOMIC_MATH_32 || PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -static inline int32_t pmix_atomic_add_fetch_32(volatile int32_t *addr, int delta); -static inline int32_t pmix_atomic_fetch_add_32(volatile int32_t *addr, int delta); -static inline int32_t pmix_atomic_and_fetch_32(volatile int32_t *addr, int32_t value); -static inline int32_t pmix_atomic_fetch_and_32(volatile int32_t *addr, int32_t value); -static inline int32_t pmix_atomic_or_fetch_32(volatile int32_t *addr, int32_t value); -static inline int32_t pmix_atomic_fetch_or_32(volatile int32_t *addr, int32_t value); -static inline int32_t pmix_atomic_xor_fetch_32(volatile int32_t *addr, int32_t value); -static inline int32_t pmix_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value); -static inline int32_t pmix_atomic_sub_fetch_32(volatile int32_t *addr, int delta); -static inline int32_t pmix_atomic_fetch_sub_32(volatile int32_t *addr, int delta); -static inline int32_t pmix_atomic_min_fetch_32 (volatile int32_t *addr, int32_t value); -static inline int32_t pmix_atomic_fetch_min_32 (volatile int32_t *addr, int32_t value); -static inline int32_t pmix_atomic_max_fetch_32 (volatile int32_t *addr, int32_t value); -static inline int32_t pmix_atomic_fetch_max_32 (volatile int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_add_fetch_32(pmix_atomic_int32_t *addr, int delta); +static inline int32_t pmix_atomic_fetch_add_32(pmix_atomic_int32_t *addr, int delta); +static inline int32_t pmix_atomic_and_fetch_32(pmix_atomic_int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_fetch_and_32(pmix_atomic_int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_or_fetch_32(pmix_atomic_int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_fetch_or_32(pmix_atomic_int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_xor_fetch_32(pmix_atomic_int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_fetch_xor_32(pmix_atomic_int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_sub_fetch_32(pmix_atomic_int32_t *addr, int delta); +static inline int32_t pmix_atomic_fetch_sub_32(pmix_atomic_int32_t *addr, int delta); +static inline int32_t pmix_atomic_min_fetch_32 (pmix_atomic_int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_fetch_min_32 (pmix_atomic_int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_max_fetch_32 (pmix_atomic_int32_t *addr, int32_t value); +static inline int32_t pmix_atomic_fetch_max_32 (pmix_atomic_int32_t *addr, int32_t value); #endif /* PMIX_HAVE_ATOMIC_MATH_32 */ @@ -430,19 +438,19 @@ static inline int32_t pmix_atomic_fetch_max_32 (volatile int32_t *addr, int32_t #if defined(DOXYGEN) || PMIX_HAVE_ATOMIC_MATH_64 || PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -static inline int64_t pmix_atomic_add_fetch_64(volatile int64_t *addr, int64_t delta); -static inline int64_t pmix_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta); -static inline int64_t pmix_atomic_and_fetch_64(volatile int64_t *addr, int64_t value); -static inline int64_t pmix_atomic_fetch_and_64(volatile int64_t *addr, int64_t value); -static inline int64_t pmix_atomic_or_fetch_64(volatile int64_t *addr, int64_t value); -static inline int64_t pmix_atomic_fetch_or_64(volatile int64_t *addr, int64_t value); -static inline int64_t pmix_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value); -static inline int64_t pmix_atomic_sub_fetch_64(volatile int64_t *addr, int64_t delta); -static inline int64_t pmix_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta); -static inline int64_t pmix_atomic_min_fetch_64 (volatile int64_t *addr, int64_t value); -static inline int64_t pmix_atomic_fetch_min_64 (volatile int64_t *addr, int64_t value); -static inline int64_t pmix_atomic_max_fetch_64 (volatile int64_t *addr, int64_t value); -static inline int64_t pmix_atomic_fetch_max_64 (volatile int64_t *addr, int64_t value); +static inline int64_t pmix_atomic_add_fetch_64(pmix_atomic_int64_t *addr, int64_t delta); +static inline int64_t pmix_atomic_fetch_add_64(pmix_atomic_int64_t *addr, int64_t delta); +static inline int64_t pmix_atomic_and_fetch_64(pmix_atomic_int64_t *addr, int64_t value); +static inline int64_t pmix_atomic_fetch_and_64(pmix_atomic_int64_t *addr, int64_t value); +static inline int64_t pmix_atomic_or_fetch_64(pmix_atomic_int64_t *addr, int64_t value); +static inline int64_t pmix_atomic_fetch_or_64(pmix_atomic_int64_t *addr, int64_t value); +static inline int64_t pmix_atomic_fetch_xor_64(pmix_atomic_int64_t *addr, int64_t value); +static inline int64_t pmix_atomic_sub_fetch_64(pmix_atomic_int64_t *addr, int64_t delta); +static inline int64_t pmix_atomic_fetch_sub_64(pmix_atomic_int64_t *addr, int64_t delta); +static inline int64_t pmix_atomic_min_fetch_64 (pmix_atomic_int64_t *addr, int64_t value); +static inline int64_t pmix_atomic_fetch_min_64 (pmix_atomic_int64_t *addr, int64_t value); +static inline int64_t pmix_atomic_max_fetch_64 (pmix_atomic_int64_t *addr, int64_t value); +static inline int64_t pmix_atomic_fetch_max_64 (pmix_atomic_int64_t *addr, int64_t value); #endif /* PMIX_HAVE_ATOMIC_MATH_64 */ @@ -459,7 +467,7 @@ static inline int64_t pmix_atomic_fetch_max_64 (volatile int64_t *addr, int64_t */ #if defined(DOXYGEN) || PMIX_ENABLE_DEBUG static inline size_t -pmix_atomic_add_fetch_size_t(volatile size_t *addr, size_t delta) +pmix_atomic_add_fetch_size_t(pmix_atomic_size_t *addr, size_t delta) { #if SIZEOF_SIZE_T == 4 return (size_t) pmix_atomic_add_fetch_32((int32_t*) addr, delta); @@ -471,7 +479,7 @@ pmix_atomic_add_fetch_size_t(volatile size_t *addr, size_t delta) } static inline size_t -pmix_atomic_fetch_add_size_t(volatile size_t *addr, size_t delta) +pmix_atomic_fetch_add_size_t(pmix_atomic_size_t *addr, size_t delta) { #if SIZEOF_SIZE_T == 4 return (size_t) pmix_atomic_fetch_add_32((int32_t*) addr, delta); @@ -483,7 +491,7 @@ pmix_atomic_fetch_add_size_t(volatile size_t *addr, size_t delta) } static inline size_t -pmix_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta) +pmix_atomic_sub_fetch_size_t(pmix_atomic_size_t *addr, size_t delta) { #if SIZEOF_SIZE_T == 4 return (size_t) pmix_atomic_sub_fetch_32((int32_t*) addr, delta); @@ -495,7 +503,7 @@ pmix_atomic_sub_fetch_size_t(volatile size_t *addr, size_t delta) } static inline size_t -pmix_atomic_fetch_sub_size_t(volatile size_t *addr, size_t delta) +pmix_atomic_fetch_sub_size_t(pmix_atomic_size_t *addr, size_t delta) { #if SIZEOF_SIZE_T == 4 return (size_t) pmix_atomic_fetch_sub_32((int32_t*) addr, delta); @@ -508,15 +516,15 @@ pmix_atomic_fetch_sub_size_t(volatile size_t *addr, size_t delta) #else #if SIZEOF_SIZE_T == 4 -#define pmix_atomic_add_fetch_size_t(addr, delta) ((size_t) pmix_atomic_add_fetch_32((volatile int32_t *) addr, delta)) -#define pmix_atomic_fetch_add_size_t(addr, delta) ((size_t) pmix_atomic_fetch_add_32((volatile int32_t *) addr, delta)) -#define pmix_atomic_sub_fetch_size_t(addr, delta) ((size_t) pmix_atomic_sub_fetch_32((volatile int32_t *) addr, delta)) -#define pmix_atomic_fetch_sub_size_t(addr, delta) ((size_t) pmix_atomic_fetch_sub_32((volatile int32_t *) addr, delta)) +#define pmix_atomic_add_fetch_size_t(addr, delta) ((size_t) pmix_atomic_add_fetch_32((pmix_atomic_int32_t *) addr, delta)) +#define pmix_atomic_fetch_add_size_t(addr, delta) ((size_t) pmix_atomic_fetch_add_32((pmix_atomic_int32_t *) addr, delta)) +#define pmix_atomic_sub_fetch_size_t(addr, delta) ((size_t) pmix_atomic_sub_fetch_32((pmix_atomic_int32_t *) addr, delta)) +#define pmix_atomic_fetch_sub_size_t(addr, delta) ((size_t) pmix_atomic_fetch_sub_32((pmix_atomic_int32_t *) addr, delta)) #elif SIZEOF_SIZE_T == 8 -#define pmix_atomic_add_fetch_size_t(addr, delta) ((size_t) pmix_atomic_add_fetch_64((volatile int64_t *) addr, delta)) -#define pmix_atomic_fetch_add_size_t(addr, delta) ((size_t) pmix_atomic_fetch_add_64((volatile int64_t *) addr, delta)) -#define pmix_atomic_sub_fetch_size_t(addr, delta) ((size_t) pmix_atomic_sub_fetch_64((volatile int64_t *) addr, delta)) -#define pmix_atomic_fetch_sub_size_t(addr, delta) ((size_t) pmix_atomic_fetch_sub_64((volatile int64_t *) addr, delta)) +#define pmix_atomic_add_fetch_size_t(addr, delta) ((size_t) pmix_atomic_add_fetch_64((pmix_atomic_int64_t *) addr, delta)) +#define pmix_atomic_fetch_add_size_t(addr, delta) ((size_t) pmix_atomic_fetch_add_64((pmix_atomic_int64_t *) addr, delta)) +#define pmix_atomic_sub_fetch_size_t(addr, delta) ((size_t) pmix_atomic_sub_fetch_64((pmix_atomic_int64_t *) addr, delta)) +#define pmix_atomic_fetch_sub_size_t(addr, delta) ((size_t) pmix_atomic_fetch_sub_64((pmix_atomic_int64_t *) addr, delta)) #else #error "Unknown size_t size" #endif @@ -526,20 +534,20 @@ pmix_atomic_fetch_sub_size_t(volatile size_t *addr, size_t delta) /* these are always done with inline functions, so always mark as static inline */ -static inline bool pmix_atomic_compare_exchange_strong_xx (volatile void *addr, void *oldval, +static inline bool pmix_atomic_compare_exchange_strong_xx (pmix_atomic_intptr_t *addr, intptr_t *oldval, int64_t newval, size_t length); -static inline bool pmix_atomic_compare_exchange_strong_acq_xx (volatile void *addr, void *oldval, +static inline bool pmix_atomic_compare_exchange_strong_acq_xx (pmix_atomic_intptr_t *addr, intptr_t *oldval, int64_t newval, size_t length); -static inline bool pmix_atomic_compare_exchange_strong_rel_xx (volatile void *addr, void *oldval, +static inline bool pmix_atomic_compare_exchange_strong_rel_xx (pmix_atomic_intptr_t *addr, intptr_t *oldval, int64_t newval, size_t length); -static inline bool pmix_atomic_compare_exchange_strong_ptr (volatile void* addr, void *oldval, - void *newval); -static inline bool pmix_atomic_compare_exchange_strong_acq_ptr (volatile void* addr, void *oldval, - void *newval); -static inline bool pmix_atomic_compare_exchange_strong_rel_ptr (volatile void* addr, void *oldval, - void *newval); +static inline bool pmix_atomic_compare_exchange_strong_ptr (pmix_atomic_intptr_t* addr, intptr_t *oldval, + intptr_t newval); +static inline bool pmix_atomic_compare_exchange_strong_acq_ptr (pmix_atomic_intptr_t* addr, intptr_t *oldval, + intptr_t newval); +static inline bool pmix_atomic_compare_exchange_strong_rel_ptr (pmix_atomic_intptr_t* addr, intptr_t *oldval, + intptr_t newval); /** * Atomic compare and set of generic type with relaxed semantics. This @@ -555,7 +563,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_ptr (volatile void* a * See pmix_atomic_compare_exchange_* for pseudo-code. */ #define pmix_atomic_compare_exchange_strong( ADDR, OLDVAL, NEWVAL ) \ - pmix_atomic_compare_exchange_strong_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + pmix_atomic_compare_exchange_strong_xx( (pmix_atomic_intptr_t*)(ADDR), (intptr_t *)(OLDVAL), \ (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) /** @@ -572,7 +580,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_ptr (volatile void* a * See pmix_atomic_compare_exchange_acq_* for pseudo-code. */ #define pmix_atomic_compare_exchange_strong_acq( ADDR, OLDVAL, NEWVAL ) \ - pmix_atomic_compare_exchange_strong_acq_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + pmix_atomic_compare_exchange_strong_acq_xx( (pmix_atomic_intptr_t*)(ADDR), (intptr_t *)(OLDVAL), \ (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) /** @@ -589,7 +597,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_ptr (volatile void* a * See pmix_atomic_compare_exchange_rel_* for pseudo-code. */ #define pmix_atomic_compare_exchange_strong_rel( ADDR, OLDVAL, NEWVAL ) \ - pmix_atomic_compare_exchange_strong_rel_xx( (volatile void*)(ADDR), (void *)(OLDVAL), \ + pmix_atomic_compare_exchange_strong_rel_xx( (pmix_atomic_intptr_t*)(ADDR), (intptr_t *)(OLDVAL), \ (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) @@ -597,15 +605,15 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_ptr (volatile void* a #if defined(DOXYGEN) || (PMIX_HAVE_ATOMIC_MATH_32 || PMIX_HAVE_ATOMIC_MATH_64) -static inline void pmix_atomic_add_xx(volatile void* addr, +static inline void pmix_atomic_add_xx(pmix_atomic_intptr_t* addr, int32_t value, size_t length); -static inline void pmix_atomic_sub_xx(volatile void* addr, +static inline void pmix_atomic_sub_xx(pmix_atomic_intptr_t* addr, int32_t value, size_t length); -static inline intptr_t pmix_atomic_add_fetch_ptr( volatile void* addr, void* delta ); -static inline intptr_t pmix_atomic_fetch_add_ptr( volatile void* addr, void* delta ); -static inline intptr_t pmix_atomic_sub_fetch_ptr( volatile void* addr, void* delta ); -static inline intptr_t pmix_atomic_fetch_sub_ptr( volatile void* addr, void* delta ); +static inline intptr_t pmix_atomic_add_fetch_ptr( pmix_atomic_intptr_t* addr, void* delta ); +static inline intptr_t pmix_atomic_fetch_add_ptr( pmix_atomic_intptr_t* addr, void* delta ); +static inline intptr_t pmix_atomic_sub_fetch_ptr( pmix_atomic_intptr_t* addr, void* delta ); +static inline intptr_t pmix_atomic_fetch_sub_ptr( pmix_atomic_intptr_t* addr, void* delta ); /** * Atomically increment the content depending on the type. This @@ -618,7 +626,7 @@ static inline intptr_t pmix_atomic_fetch_sub_ptr( volatile void* addr, void* del * @param delta Value to add (converted to ). */ #define pmix_atomic_add( ADDR, VALUE ) \ - pmix_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ + pmix_atomic_add_xx( (pmix_atomic_intptr_t*)(ADDR), (int32_t)(VALUE), \ sizeof(*(ADDR)) ) /** @@ -632,7 +640,7 @@ static inline intptr_t pmix_atomic_fetch_sub_ptr( volatile void* addr, void* del * @param delta Value to substract (converted to ). */ #define pmix_atomic_sub( ADDR, VALUE ) \ - pmix_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ + pmix_atomic_sub_xx( (pmix_atomic_intptr_t*)(ADDR), (int32_t)(VALUE), \ sizeof(*(ADDR)) ) #endif /* PMIX_HAVE_ATOMIC_MATH_32 || PMIX_HAVE_ATOMIC_MATH_64 */ @@ -644,6 +652,8 @@ static inline intptr_t pmix_atomic_fetch_sub_ptr( volatile void* addr, void* del */ #include "src/atomics/sys/atomic_impl.h" +#endif /* !PMIX_C_HAVE__ATOMIC */ + END_C_DECLS #endif /* PMIX_SYS_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_impl.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_impl.h index d3a3f40dd31..d03f83de283 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_impl.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_impl.h @@ -11,9 +11,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +41,7 @@ #if PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 #if !defined(PMIX_HAVE_ATOMIC_MIN_32) -static inline int32_t pmix_atomic_fetch_min_32 (volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_fetch_min_32 (pmix_atomic_int32_t *addr, int32_t value) { int32_t old = *addr; do { @@ -58,7 +58,7 @@ static inline int32_t pmix_atomic_fetch_min_32 (volatile int32_t *addr, int32_t #endif /* PMIX_HAVE_ATOMIC_MIN_32 */ #if !defined(PMIX_HAVE_ATOMIC_MAX_32) -static inline int32_t pmix_atomic_fetch_max_32 (volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_fetch_max_32 (pmix_atomic_int32_t *addr, int32_t value) { int32_t old = *addr; do { @@ -74,7 +74,7 @@ static inline int32_t pmix_atomic_fetch_max_32 (volatile int32_t *addr, int32_t #endif /* PMIX_HAVE_ATOMIC_MAX_32 */ #define PMIX_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operation, name) \ - static inline type pmix_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \ + static inline type pmix_atomic_fetch_ ## name ## _ ## bits (pmix_atomic_ ## type *addr, type value) \ { \ type oldval; \ do { \ @@ -86,7 +86,7 @@ static inline int32_t pmix_atomic_fetch_max_32 (volatile int32_t *addr, int32_t #if !defined(PMIX_HAVE_ATOMIC_SWAP_32) #define PMIX_HAVE_ATOMIC_SWAP_32 1 -static inline int32_t pmix_atomic_swap_32(volatile int32_t *addr, +static inline int32_t pmix_atomic_swap_32(pmix_atomic_int32_t *addr, int32_t newval) { int32_t old = *addr; @@ -139,7 +139,7 @@ PMIX_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub) #if PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_64 #if !defined(PMIX_HAVE_ATOMIC_MIN_64) -static inline int64_t pmix_atomic_fetch_min_64 (volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_fetch_min_64 (pmix_atomic_int64_t *addr, int64_t value) { int64_t old = *addr; do { @@ -156,7 +156,7 @@ static inline int64_t pmix_atomic_fetch_min_64 (volatile int64_t *addr, int64_t #endif /* PMIX_HAVE_ATOMIC_MIN_64 */ #if !defined(PMIX_HAVE_ATOMIC_MAX_64) -static inline int64_t pmix_atomic_fetch_max_64 (volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_fetch_max_64 (pmix_atomic_int64_t *addr, int64_t value) { int64_t old = *addr; do { @@ -173,7 +173,7 @@ static inline int64_t pmix_atomic_fetch_max_64 (volatile int64_t *addr, int64_t #if !defined(PMIX_HAVE_ATOMIC_SWAP_64) #define PMIX_HAVE_ATOMIC_SWAP_64 1 -static inline int64_t pmix_atomic_swap_64(volatile int64_t *addr, +static inline int64_t pmix_atomic_swap_64(pmix_atomic_int64_t *addr, int64_t newval) { int64_t old = *addr; @@ -236,15 +236,15 @@ PMIX_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub) #if PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 && PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_64 #define PMIX_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ static inline bool \ - pmix_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \ + pmix_atomic_compare_exchange_strong ## semantics ## xx (pmix_atomic_intptr_t* addr, intptr_t *oldval, \ int64_t newval, const size_t length) \ { \ switch (length) { \ case 4: \ - return pmix_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \ + return pmix_atomic_compare_exchange_strong_32 ((pmix_atomic_int32_t *) addr, \ (int32_t *) oldval, (int32_t) newval); \ case 8: \ - return pmix_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, \ + return pmix_atomic_compare_exchange_strong_64 ((pmix_atomic_int64_t *) addr, \ (int64_t *) oldval, (int64_t) newval); \ } \ abort(); \ @@ -252,12 +252,12 @@ PMIX_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub) #elif PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 #define PMIX_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ static inline bool \ - pmix_atomic_compare_exchange_strong ## semantics ## xx (volatile void* addr, void *oldval, \ + pmix_atomic_compare_exchange_strong ## semantics ## xx (pmix_atomic_intptr_t* addr, intptr_t *oldval, \ int64_t newval, const size_t length) \ { \ switch (length) { \ case 4: \ - return pmix_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, \ + return pmix_atomic_compare_exchange_strong_32 ((pmix_atomic_int32_t *) addr, \ (int32_t *) oldval, (int32_t) newval); \ } \ abort(); \ @@ -273,16 +273,16 @@ PMIX_ATOMIC_DEFINE_CMPXCG_XX(_rel_) #if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 #define PMIX_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ static inline bool \ - pmix_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \ + pmix_atomic_compare_exchange_strong ## semantics ## ptr (pmix_atomic_intptr_t* addr, intptr_t *oldval, intptr_t newval) \ { \ - return pmix_atomic_compare_exchange_strong_32 ((volatile int32_t *) addr, (int32_t *) oldval, (int32_t) newval); \ + return pmix_atomic_compare_exchange_strong_32 ((pmix_atomic_int32_t *) addr, (int32_t *) oldval, (int32_t) newval); \ } #elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_64 #define PMIX_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ static inline bool \ - pmix_atomic_compare_exchange_strong ## semantics ## ptr (volatile void* addr, void *oldval, void *newval) \ + pmix_atomic_compare_exchange_strong ## semantics ## ptr (pmix_atomic_intptr_t* addr, intptr_t *oldval, intptr_t newval) \ { \ - return pmix_atomic_compare_exchange_strong_64 ((volatile int64_t *) addr, (int64_t *) oldval, (int64_t) newval); \ + return pmix_atomic_compare_exchange_strong_64 ((pmix_atomic_int64_t *) addr, (int64_t *) oldval, (int64_t) newval); \ } #else #error "Can not define pmix_atomic_compare_exchange_strong_ptr with existing atomics" @@ -298,9 +298,9 @@ PMIX_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) #if (PMIX_HAVE_ATOMIC_SWAP_32 || PMIX_HAVE_ATOMIC_SWAP_64) #if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_SWAP_32 -#define pmix_atomic_swap_ptr(addr, value) (void *) pmix_atomic_swap_32((int32_t *) addr, (int32_t) value) +#define pmix_atomic_swap_ptr(addr, value) (intptr_t) pmix_atomic_swap_32((pmix_atomic_int32_t *) addr, (int32_t) value) #elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_SWAP_64 -#define pmix_atomic_swap_ptr(addr, value) (void *) pmix_atomic_swap_64((int64_t *) addr, (int64_t) value) +#define pmix_atomic_swap_ptr(addr, value) (intptr_t) pmix_atomic_swap_64((pmix_atomic_int64_t *) addr, (int64_t) value) #endif #endif /* (PMIX_HAVE_ATOMIC_SWAP_32 || PMIX_HAVE_ATOMIC_SWAP_64) */ @@ -309,15 +309,15 @@ PMIX_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) #if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_LLSC_32 -#define pmix_atomic_ll_ptr(addr, ret) pmix_atomic_ll_32((volatile int32_t *) (addr), ret) -#define pmix_atomic_sc_ptr(addr, value, ret) pmix_atomic_sc_32((volatile int32_t *) (addr), (intptr_t) (value), ret) +#define pmix_atomic_ll_ptr(addr, ret) pmix_atomic_ll_32((pmix_atomic_int32_t *) (addr), ret) +#define pmix_atomic_sc_ptr(addr, value, ret) pmix_atomic_sc_32((pmix_atomic_int32_t *) (addr), (intptr_t) (value), ret) #define PMIX_HAVE_ATOMIC_LLSC_PTR 1 #elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_LLSC_64 -#define pmix_atomic_ll_ptr(addr, ret) pmix_atomic_ll_64((volatile int64_t *) (addr), ret) -#define pmix_atomic_sc_ptr(addr, value, ret) pmix_atomic_sc_64((volatile int64_t *) (addr), (intptr_t) (value), ret) +#define pmix_atomic_ll_ptr(addr, ret) pmix_atomic_ll_64((pmix_atomic_int64_t *) (addr), ret) +#define pmix_atomic_sc_ptr(addr, value, ret) pmix_atomic_sc_64((pmix_atomic_int64_t *) (addr), (intptr_t) (value), ret) #define PMIX_HAVE_ATOMIC_LLSC_PTR 1 @@ -332,18 +332,18 @@ PMIX_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) #if PMIX_HAVE_ATOMIC_MATH_32 || PMIX_HAVE_ATOMIC_MATH_64 static inline void - pmix_atomic_add_xx(volatile void* addr, int32_t value, size_t length) + pmix_atomic_add_xx(pmix_atomic_intptr_t* addr, int32_t value, size_t length) { switch( length ) { #if PMIX_HAVE_ATOMIC_ADD_32 case 4: - (void) pmix_atomic_fetch_add_32( (volatile int32_t*)addr, (int32_t)value ); + (void) pmix_atomic_fetch_add_32( (pmix_atomic_int32_t*)addr, (int32_t)value ); break; #endif /* PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ #if PMIX_HAVE_ATOMIC_ADD_64 case 8: - (void) pmix_atomic_fetch_add_64( (volatile int64_t*)addr, (int64_t)value ); + (void) pmix_atomic_fetch_add_64( (pmix_atomic_int64_t*)addr, (int64_t)value ); break; #endif /* PMIX_HAVE_ATOMIC_ADD_64 */ default: @@ -355,18 +355,18 @@ static inline void static inline void -pmix_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) +pmix_atomic_sub_xx(pmix_atomic_intptr_t* addr, int32_t value, size_t length) { switch( length ) { #if PMIX_HAVE_ATOMIC_SUB_32 case 4: - (void) pmix_atomic_fetch_sub_32( (volatile int32_t*)addr, (int32_t)value ); + (void) pmix_atomic_fetch_sub_32( (pmix_atomic_int32_t*)addr, (int32_t)value ); break; #endif /* PMIX_HAVE_ATOMIC_SUB_32 */ #if PMIX_HAVE_ATOMIC_SUB_64 case 8: - (void) pmix_atomic_fetch_sub_64( (volatile int64_t*)addr, (int64_t)value ); + (void) pmix_atomic_fetch_sub_64( (pmix_atomic_int64_t*)addr, (int64_t)value ); break; #endif /* PMIX_HAVE_ATOMIC_SUB_64 */ default: @@ -377,7 +377,7 @@ pmix_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) } #define PMIX_ATOMIC_DEFINE_OP_FETCH(op, operation, type, ptr_type, suffix) \ - static inline type pmix_atomic_ ## op ## _fetch_ ## suffix (volatile ptr_type *addr, type value) \ + static inline type pmix_atomic_ ## op ## _fetch_ ## suffix (pmix_atomic_ ## ptr_type *addr, type value) \ { \ return pmix_atomic_fetch_ ## op ## _ ## suffix (addr, value) operation value; \ } @@ -388,13 +388,13 @@ PMIX_ATOMIC_DEFINE_OP_FETCH(or, |, int32_t, int32_t, 32) PMIX_ATOMIC_DEFINE_OP_FETCH(xor, ^, int32_t, int32_t, 32) PMIX_ATOMIC_DEFINE_OP_FETCH(sub, -, int32_t, int32_t, 32) -static inline int32_t pmix_atomic_min_fetch_32 (volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_min_fetch_32 (pmix_atomic_int32_t *addr, int32_t value) { int32_t old = pmix_atomic_fetch_min_32 (addr, value); return old <= value ? old : value; } -static inline int32_t pmix_atomic_max_fetch_32 (volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_max_fetch_32 (pmix_atomic_int32_t *addr, int32_t value) { int32_t old = pmix_atomic_fetch_max_32 (addr, value); return old >= value ? old : value; @@ -407,13 +407,13 @@ PMIX_ATOMIC_DEFINE_OP_FETCH(or, |, int64_t, int64_t, 64) PMIX_ATOMIC_DEFINE_OP_FETCH(xor, ^, int64_t, int64_t, 64) PMIX_ATOMIC_DEFINE_OP_FETCH(sub, -, int64_t, int64_t, 64) -static inline int64_t pmix_atomic_min_fetch_64 (volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_min_fetch_64 (pmix_atomic_int64_t *addr, int64_t value) { int64_t old = pmix_atomic_fetch_min_64 (addr, value); return old <= value ? old : value; } -static inline int64_t pmix_atomic_max_fetch_64 (volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_max_fetch_64 (pmix_atomic_int64_t *addr, int64_t value) { int64_t old = pmix_atomic_fetch_max_64 (addr, value); return old >= value ? old : value; @@ -421,52 +421,52 @@ static inline int64_t pmix_atomic_max_fetch_64 (volatile int64_t *addr, int64_t #endif -static inline intptr_t pmix_atomic_fetch_add_ptr( volatile void* addr, +static inline intptr_t pmix_atomic_fetch_add_ptr( pmix_atomic_intptr_t* addr, void* delta ) { #if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_ADD_32 - return pmix_atomic_fetch_add_32((int32_t*) addr, (unsigned long) delta); + return pmix_atomic_fetch_add_32((pmix_atomic_int32_t*) addr, (unsigned long) delta); #elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_ADD_64 - return pmix_atomic_fetch_add_64((int64_t*) addr, (unsigned long) delta); + return pmix_atomic_fetch_add_64((pmix_atomic_int64_t*) addr, (unsigned long) delta); #else abort (); return 0; #endif } -static inline intptr_t pmix_atomic_add_fetch_ptr( volatile void* addr, +static inline intptr_t pmix_atomic_add_fetch_ptr( pmix_atomic_intptr_t* addr, void* delta ) { #if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_ADD_32 - return pmix_atomic_add_fetch_32((int32_t*) addr, (unsigned long) delta); + return pmix_atomic_add_fetch_32((pmix_atomic_int32_t*) addr, (unsigned long) delta); #elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_ADD_64 - return pmix_atomic_add_fetch_64((int64_t*) addr, (unsigned long) delta); + return pmix_atomic_add_fetch_64((pmix_atomic_int64_t*) addr, (unsigned long) delta); #else abort (); return 0; #endif } -static inline intptr_t pmix_atomic_fetch_sub_ptr( volatile void* addr, +static inline intptr_t pmix_atomic_fetch_sub_ptr( pmix_atomic_intptr_t* addr, void* delta ) { #if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_SUB_32 - return pmix_atomic_fetch_sub_32((int32_t*) addr, (unsigned long) delta); + return pmix_atomic_fetch_sub_32((pmix_atomic_int32_t*) addr, (unsigned long) delta); #elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_SUB_32 - return pmix_atomic_fetch_sub_64((int64_t*) addr, (unsigned long) delta); + return pmix_atomic_fetch_sub_64((pmix_atomic_int64_t*) addr, (unsigned long) delta); #else abort(); return 0; #endif } -static inline intptr_t pmix_atomic_sub_fetch_ptr( volatile void* addr, +static inline intptr_t pmix_atomic_sub_fetch_ptr( pmix_atomic_intptr_t* addr, void* delta ) { #if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_SUB_32 - return pmix_atomic_sub_fetch_32((int32_t*) addr, (unsigned long) delta); + return pmix_atomic_sub_fetch_32((pmix_atomic_int32_t*) addr, (unsigned long) delta); #elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_SUB_32 - return pmix_atomic_sub_fetch_64((int64_t*) addr, (unsigned long) delta); + return pmix_atomic_sub_fetch_64((pmix_atomic_int64_t*) addr, (unsigned long) delta); #else abort(); return 0; @@ -496,7 +496,7 @@ static inline int pmix_atomic_trylock(pmix_atomic_lock_t *lock) { int32_t unlocked = PMIX_ATOMIC_LOCK_UNLOCKED; - bool ret = pmix_atomic_compare_exchange_strong_32 (&lock->u.lock, &unlocked, PMIX_ATOMIC_LOCK_LOCKED); + bool ret = pmix_atomic_compare_exchange_strong_acq_32 (&lock->u.lock, &unlocked, PMIX_ATOMIC_LOCK_LOCKED); return (ret == false) ? 1 : 0; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_stdc.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_stdc.h new file mode 100644 index 00000000000..5fc5b0a1326 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_stdc.h @@ -0,0 +1,269 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This file provides shims between the pmix atomics interface and the C11 atomics interface. It + * is intended as the first step in moving to using C11 atomics across the entire codebase. Once + * all officially supported compilers offer C11 atomic (GCC 4.9.0+, icc 2018+, pgi, xlc, etc) then + * this shim will go away and the codebase will be updated to use C11's atomic support + * directly. + * This shim contains some functions already present in atomic_impl.h because we do not include + * atomic_impl.h when using C11 atomics. It would require alot of #ifdefs to avoid duplicate + * definitions to be worthwhile. */ + +#if !defined(PMIX_ATOMIC_STDC_H) +#define PMIX_ATOMIC_STDC_H + +#include +#include +#include "src/include/pmix_stdint.h" + +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 + +#define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 +#define PMIX_HAVE_ATOMIC_SWAP_32 1 + +#define PMIX_HAVE_ATOMIC_MATH_32 1 +#define PMIX_HAVE_ATOMIC_ADD_32 1 +#define PMIX_HAVE_ATOMIC_AND_32 1 +#define PMIX_HAVE_ATOMIC_OR_32 1 +#define PMIX_HAVE_ATOMIC_XOR_32 1 +#define PMIX_HAVE_ATOMIC_SUB_32 1 + +#define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 +#define PMIX_HAVE_ATOMIC_SWAP_64 1 + +#define PMIX_HAVE_ATOMIC_MATH_64 1 +#define PMIX_HAVE_ATOMIC_ADD_64 1 +#define PMIX_HAVE_ATOMIC_AND_64 1 +#define PMIX_HAVE_ATOMIC_OR_64 1 +#define PMIX_HAVE_ATOMIC_XOR_64 1 +#define PMIX_HAVE_ATOMIC_SUB_64 1 + +#define PMIX_HAVE_ATOMIC_LLSC_32 0 +#define PMIX_HAVE_ATOMIC_LLSC_64 0 +#define PMIX_HAVE_ATOMIC_LLSC_PTR 0 + +#define PMIX_HAVE_ATOMIC_MIN_32 1 +#define PMIX_HAVE_ATOMIC_MAX_32 1 + +#define PMIX_HAVE_ATOMIC_MIN_64 1 +#define PMIX_HAVE_ATOMIC_MAX_64 1 + +#define PMIX_HAVE_ATOMIC_SPINLOCKS 1 + +static inline void pmix_atomic_mb (void) +{ + atomic_thread_fence (memory_order_seq_cst); +} + +static inline void pmix_atomic_wmb (void) +{ + atomic_thread_fence (memory_order_release); +} + +static inline void pmix_atomic_rmb (void) +{ +#if PMIX_ASSEMBLY_ARCH == PMIX_X86_64 + /* work around a bug in older gcc versions (observed in gcc 6.x) + * where acquire seems to get treated as a no-op instead of being + * equivalent to __asm__ __volatile__("": : :"memory") on x86_64 */ + pmix_atomic_mb (); +#else + atomic_thread_fence (memory_order_acquire); +#endif +} + +#define pmix_atomic_compare_exchange_strong_32(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_relaxed, memory_order_relaxed) +#define pmix_atomic_compare_exchange_strong_64(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_relaxed, memory_order_relaxed) +#define pmix_atomic_compare_exchange_strong_ptr(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_relaxed, memory_order_relaxed) +#define pmix_atomic_compare_exchange_strong_acq_32(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_acquire, memory_order_relaxed) +#define pmix_atomic_compare_exchange_strong_acq_64(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_acquire, memory_order_relaxed) +#define pmix_atomic_compare_exchange_strong_acq_ptr(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_acquire, memory_order_relaxed) + +#define pmix_atomic_compare_exchange_strong_rel_32(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_release, memory_order_relaxed) +#define pmix_atomic_compare_exchange_strong_rel_64(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_release, memory_order_relaxed) +#define pmix_atomic_compare_exchange_strong_rel_ptr(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_release, memory_order_relaxed) + +#define pmix_atomic_compare_exchange_strong(addr, oldval, newval) atomic_compare_exchange_strong_explicit (addr, oldval, newval, memory_order_relaxed, memory_order_relaxed) +#define pmix_atomic_compare_exchange_strong_acq(addr, oldval, newval) atomic_compare_exchange_strong_explicit (addr, oldval, newval, memory_order_acquire, memory_order_relaxed) +#define pmix_atomic_compare_exchange_strong_rel(addr, oldval, newval) atomic_compare_exchange_strong_explicit (addr, oldval, newval, memory_order_release, memory_order_relaxed) + +#define pmix_atomic_swap_32(addr, value) atomic_exchange_explicit (addr, value, memory_order_relaxed) +#define pmix_atomic_swap_64(addr, value) atomic_exchange_explicit (addr, value, memory_order_relaxed) +#define pmix_atomic_swap_ptr(addr, value) atomic_exchange_explicit (addr, value, memory_order_relaxed) + +#define PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ + static inline type pmix_atomic_fetch_ ## op ##_## bits (pmix_atomic_ ## type *addr, type value) \ + { \ + return atomic_fetch_ ## op ## _explicit (addr, value, memory_order_relaxed); \ + } \ + \ + static inline type pmix_atomic_## op ## _fetch_ ## bits (pmix_atomic_ ## type *addr, type value) \ + { \ + return atomic_fetch_ ## op ## _explicit (addr, value, memory_order_relaxed) operator value; \ + } + +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(add, 32, int32_t, +) +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(add, 64, int64_t, +) +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(add, size_t, size_t, +) + +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(sub, 32, int32_t, -) +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(sub, 64, int64_t, -) +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(sub, size_t, size_t, -) + +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(or, 32, int32_t, |) +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(or, 64, int64_t, |) + +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(xor, 32, int32_t, ^) +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(xor, 64, int64_t, ^) + +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(and, 32, int32_t, &) +PMIX_ATOMIC_STDC_DEFINE_FETCH_OP(and, 64, int64_t, &) + +#define pmix_atomic_add(addr, value) (void) atomic_fetch_add_explicit (addr, value, memory_order_relaxed) + +static inline int32_t pmix_atomic_fetch_min_32 (pmix_atomic_int32_t *addr, int32_t value) +{ + int32_t old = *addr; + do { + if (old <= value) { + break; + } + } while (!pmix_atomic_compare_exchange_strong_32 (addr, &old, value)); + + return old; +} + +static inline int32_t pmix_atomic_fetch_max_32 (pmix_atomic_int32_t *addr, int32_t value) +{ + int32_t old = *addr; + do { + if (old >= value) { + break; + } + } while (!pmix_atomic_compare_exchange_strong_32 (addr, &old, value)); + + return old; +} + +static inline int64_t pmix_atomic_fetch_min_64 (pmix_atomic_int64_t *addr, int64_t value) +{ + int64_t old = *addr; + do { + if (old <= value) { + break; + } + } while (!pmix_atomic_compare_exchange_strong_64 (addr, &old, value)); + + return old; +} + +static inline int64_t pmix_atomic_fetch_max_64 (pmix_atomic_int64_t *addr, int64_t value) +{ + int64_t old = *addr; + do { + if (old >= value) { + break; + } + } while (!pmix_atomic_compare_exchange_strong_64 (addr, &old, value)); + + return old; +} + +static inline int32_t pmix_atomic_min_fetch_32 (pmix_atomic_int32_t *addr, int32_t value) +{ + int32_t old = pmix_atomic_fetch_min_32 (addr, value); + return old <= value ? old : value; +} + +static inline int32_t pmix_atomic_max_fetch_32 (pmix_atomic_int32_t *addr, int32_t value) +{ + int32_t old = pmix_atomic_fetch_max_32 (addr, value); + return old >= value ? old : value; +} + +static inline int64_t pmix_atomic_min_fetch_64 (pmix_atomic_int64_t *addr, int64_t value) +{ + int64_t old = pmix_atomic_fetch_min_64 (addr, value); + return old <= value ? old : value; +} + +static inline int64_t pmix_atomic_max_fetch_64 (pmix_atomic_int64_t *addr, int64_t value) +{ + int64_t old = pmix_atomic_fetch_max_64 (addr, value); + return old >= value ? old : value; +} + +#define PMIX_ATOMIC_LOCK_UNLOCKED false +#define PMIX_ATOMIC_LOCK_LOCKED true + +#define PMIX_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT + +typedef atomic_flag pmix_atomic_lock_t; + +/* + * Lock initialization function. It set the lock to UNLOCKED. + */ +static inline void pmix_atomic_lock_init (pmix_atomic_lock_t *lock, bool value) +{ + atomic_flag_clear (lock); +} + + +static inline int pmix_atomic_trylock (pmix_atomic_lock_t *lock) +{ + return (int) atomic_flag_test_and_set (lock); +} + + +static inline void pmix_atomic_lock(pmix_atomic_lock_t *lock) +{ + while (pmix_atomic_trylock (lock)) { + } +} + + +static inline void pmix_atomic_unlock (pmix_atomic_lock_t *lock) +{ + atomic_flag_clear (lock); +} + + +#if PMIX_HAVE_C11_CSWAP_INT128 + +/* the C11 atomic compare-exchange is lock free so use it */ +#define pmix_atomic_compare_exchange_strong_128 atomic_compare_exchange_strong + +#define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 + +#elif PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128 + +/* fall back on the __sync builtin if available since it will emit the expected instruction on x86_64 (cmpxchng16b) */ +__pmix_attribute_always_inline__ +static inline bool pmix_atomic_compare_exchange_strong_128 (pmix_atomic_int128_t *addr, + pmix_int128_t *oldval, pmix_int128_t newval) +{ + pmix_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; +} + +#define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 + +#else + +#define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 + +#endif + +#endif /* !defined(PMIX_ATOMIC_STDC_H) */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h index 3eacce23351..967d13c63f4 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h @@ -4,7 +4,7 @@ * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ */ @@ -85,13 +85,13 @@ #elif PMIX_ASSEMBLY_ARCH == PMIX_S390 -#define __NR_process_vm_readv 340 -#define __NR_process_vm_writev 341 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 #elif PMIX_ASSEMBLY_ARCH == PMIX_S390X -#define __NR_process_vm_readv 340 -#define __NR_process_vm_writev 341 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 #else #error "Unsupported architecture for process_vm_readv and process_vm_writev syscalls" diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/gcc_builtin/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/gcc_builtin/atomic.h index 0c47d83988e..e07c4d88196 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/gcc_builtin/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/gcc_builtin/atomic.h @@ -11,11 +11,13 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,7 +60,14 @@ static inline void pmix_atomic_mb(void) static inline void pmix_atomic_rmb(void) { +#if PMIX_ASSEMBLY_ARCH == PMIX_X86_64 + /* work around a bug in older gcc versions where ACQUIRE seems to get + * treated as a no-op instead of being equivalent to + * __asm__ __volatile__("": : :"memory") */ + __atomic_thread_fence (__ATOMIC_SEQ_CST); +#else __atomic_thread_fence (__ATOMIC_ACQUIRE); +#endif } static inline void pmix_atomic_wmb(void) @@ -77,103 +86,103 @@ static inline void pmix_atomic_wmb(void) /* * Suppress numerous (spurious ?) warnings from Oracle Studio compilers * see https://community.oracle.com/thread/3968347 - */ + */ #if defined(__SUNPRO_C) || defined(__SUNPRO_CC) #pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L) #endif -static inline bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } -static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline int32_t pmix_atomic_swap_32 (volatile int32_t *addr, int32_t newval) +static inline int32_t pmix_atomic_swap_32 (pmix_atomic_int32_t *addr, int32_t newval) { int32_t oldval; __atomic_exchange (addr, &newval, &oldval, __ATOMIC_RELAXED); return oldval; } -static inline int32_t pmix_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta) +static inline int32_t pmix_atomic_fetch_add_32(pmix_atomic_int32_t *addr, int32_t delta) { return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED); } -static inline int32_t pmix_atomic_fetch_and_32(volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_fetch_and_32(pmix_atomic_int32_t *addr, int32_t value) { return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED); } -static inline int32_t pmix_atomic_fetch_or_32(volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_fetch_or_32(pmix_atomic_int32_t *addr, int32_t value) { return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED); } -static inline int32_t pmix_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_fetch_xor_32(pmix_atomic_int32_t *addr, int32_t value) { return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED); } -static inline int32_t pmix_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta) +static inline int32_t pmix_atomic_fetch_sub_32(pmix_atomic_int32_t *addr, int32_t delta) { return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED); } -static inline bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } -static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { return __atomic_compare_exchange_n (addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline int64_t pmix_atomic_swap_64 (volatile int64_t *addr, int64_t newval) +static inline int64_t pmix_atomic_swap_64 (pmix_atomic_int64_t *addr, int64_t newval) { int64_t oldval; __atomic_exchange (addr, &newval, &oldval, __ATOMIC_RELAXED); return oldval; } -static inline int64_t pmix_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta) +static inline int64_t pmix_atomic_fetch_add_64(pmix_atomic_int64_t *addr, int64_t delta) { return __atomic_fetch_add (addr, delta, __ATOMIC_RELAXED); } -static inline int64_t pmix_atomic_fetch_and_64(volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_fetch_and_64(pmix_atomic_int64_t *addr, int64_t value) { return __atomic_fetch_and (addr, value, __ATOMIC_RELAXED); } -static inline int64_t pmix_atomic_fetch_or_64(volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_fetch_or_64(pmix_atomic_int64_t *addr, int64_t value) { return __atomic_fetch_or (addr, value, __ATOMIC_RELAXED); } -static inline int64_t pmix_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_fetch_xor_64(pmix_atomic_int64_t *addr, int64_t value) { return __atomic_fetch_xor (addr, value, __ATOMIC_RELAXED); } -static inline int64_t pmix_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta) +static inline int64_t pmix_atomic_fetch_sub_64(pmix_atomic_int64_t *addr, int64_t delta) { return __atomic_fetch_sub (addr, delta, __ATOMIC_RELAXED); } @@ -182,7 +191,7 @@ static inline int64_t pmix_atomic_fetch_sub_64(volatile int64_t *addr, int64_t d #define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 -static inline bool pmix_atomic_compare_exchange_strong_128 (volatile pmix_int128_t *addr, +static inline bool pmix_atomic_compare_exchange_strong_128 (pmix_atomic_int128_t *addr, pmix_int128_t *oldval, pmix_int128_t newval) { return __atomic_compare_exchange_n (addr, oldval, newval, false, @@ -195,7 +204,7 @@ static inline bool pmix_atomic_compare_exchange_strong_128 (volatile pmix_int128 /* __atomic version is not lock-free so use legacy __sync version */ -static inline bool pmix_atomic_compare_exchange_strong_128 (volatile pmix_int128_t *addr, +static inline bool pmix_atomic_compare_exchange_strong_128 (pmix_atomic_pmix_int128_t *addr, pmix_int128_t *oldval, pmix_int128_t newval) { pmix_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/ia32/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/ia32/atomic.h index 4e8a6d81455..c50c162a6ed 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/ia32/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/ia32/atomic.h @@ -13,9 +13,9 @@ * Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -85,7 +85,7 @@ static inline void pmix_atomic_isync(void) *********************************************************************/ #if PMIX_GCC_INLINE_ASSEMBLY -static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { unsigned char ret; __asm__ __volatile__ ( @@ -107,15 +107,15 @@ static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *add #define PMIX_HAVE_ATOMIC_SWAP_32 1 -static inline int32_t pmix_atomic_swap_32( volatile int32_t *addr, - int32_t newval) +static inline int32_t pmix_atomic_swap_32( pmix_atomic_int32_t *addr, + int32_t newval) { int32_t oldval; __asm__ __volatile__("xchg %1, %0" : - "=r" (oldval), "=m" (*addr) : - "0" (newval), "m" (*addr) : - "memory"); + "=r" (oldval), "=m" (*addr) : + "0" (newval), "m" (*addr) : + "memory"); return oldval; } @@ -131,7 +131,7 @@ static inline int32_t pmix_atomic_swap_32( volatile int32_t *addr, * * Atomically adds @i to @v. */ -static inline int32_t pmix_atomic_fetch_add_32(volatile int32_t* v, int i) +static inline int32_t pmix_atomic_fetch_add_32(pmix_atomic_int32_t* v, int i) { int ret = i; __asm__ __volatile__( @@ -151,7 +151,7 @@ static inline int32_t pmix_atomic_fetch_add_32(volatile int32_t* v, int i) * * Atomically subtracts @i from @v. */ -static inline int32_t pmix_atomic_fetch_sub_32(volatile int32_t* v, int i) +static inline int32_t pmix_atomic_fetch_sub_32(pmix_atomic_int32_t* v, int i) { int ret = -i; __asm__ __volatile__( diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h index e5ce2663082..17134e11c16 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h @@ -13,7 +13,7 @@ * Copyright (c) 2010-2017 IBM Corporation. All rights reserved. * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -99,27 +99,7 @@ void pmix_atomic_isync(void) ISYNC(); } -#elif PMIX_XLC_INLINE_ASSEMBLY /* end PMIX_GCC_INLINE_ASSEMBLY */ - -/* Yeah, I don't know who thought this was a reasonable syntax for - * inline assembly. Do these because they are used so often and they - * are fairly simple (aka: there is a tech pub on IBM's web site - * containing the right hex for the instructions). - */ - -#undef PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER -#define PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 - -#pragma mc_func pmix_atomic_mb { "7c0004ac" } /* sync */ -#pragma reg_killed_by pmix_atomic_mb /* none */ - -#pragma mc_func pmix_atomic_rmb { "7c2004ac" } /* lwsync */ -#pragma reg_killed_by pmix_atomic_rmb /* none */ - -#pragma mc_func pmix_atomic_wmb { "7c2004ac" } /* lwsync */ -#pragma reg_killed_by pmix_atomic_wmb /* none */ - -#endif +#endif /* end PMIX_GCC_INLINE_ASSEMBLY */ /********************************************************************** * @@ -145,7 +125,7 @@ void pmix_atomic_isync(void) #define PMIX_ASM_VALUE64(x) x #endif -static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { int32_t prev; bool ret; @@ -171,7 +151,7 @@ static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *add * load the arguments to/from the stack. This sequence may cause the ll reservation to be cancelled. */ #define pmix_atomic_ll_32(addr, ret) \ do { \ - volatile int32_t *_addr = (addr); \ + pmix_atomic_int32_t *_addr = (addr); \ int32_t _ret; \ __asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" \ : "=&r" (_ret) \ @@ -182,7 +162,7 @@ static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *add #define pmix_atomic_sc_32(addr, value, ret) \ do { \ - volatile int32_t *_addr = (addr); \ + pmix_atomic_int32_t *_addr = (addr); \ int32_t _ret, _foo, _newval = (int32_t) value; \ \ __asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" \ @@ -201,7 +181,7 @@ static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *add atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { bool rc; @@ -212,13 +192,13 @@ static inline bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t } -static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { pmix_atomic_wmb(); return pmix_atomic_compare_exchange_strong_32 (addr, oldval, newval); } -static inline int32_t pmix_atomic_swap_32(volatile int32_t *addr, int32_t newval) +static inline int32_t pmix_atomic_swap_32(pmix_atomic_int32_t *addr, int32_t newval) { int32_t ret; @@ -240,7 +220,7 @@ static inline int32_t pmix_atomic_swap_32(volatile int32_t *addr, int32_t newval #if PMIX_GCC_INLINE_ASSEMBLY #define PMIX_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ -static inline int64_t pmix_atomic_fetch_ ## type ## _64(volatile int64_t* v, int64_t val) \ +static inline int64_t pmix_atomic_fetch_ ## type ## _64(pmix_atomic_int64_t* v, int64_t val) \ { \ int64_t t, old; \ \ @@ -262,7 +242,7 @@ PMIX_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) PMIX_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) PMIX_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) -static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { int64_t prev; bool ret; @@ -285,7 +265,7 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add #define pmix_atomic_ll_64(addr, ret) \ do { \ - volatile int64_t *_addr = (addr); \ + pmix_atomic_int64_t *_addr = (addr); \ int64_t _ret; \ __asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \ : "=&r" (_ret) \ @@ -296,8 +276,8 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add #define pmix_atomic_sc_64(addr, value, ret) \ do { \ - volatile int64_t *_addr = (addr); \ - int64_t _foo, _newval = (int64_t) value; \ + pmix_atomic_int64_t *_addr = (addr); \ + int64_t _newval = (int64_t) value; \ int32_t _ret; \ \ __asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \ @@ -311,7 +291,7 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add ret = _ret; \ } while (0) -static inline int64_t pmix_atomic_swap_64(volatile int64_t *addr, int64_t newval) +static inline int64_t pmix_atomic_swap_64(pmix_atomic_int64_t *addr, int64_t newval) { int64_t ret; @@ -336,7 +316,7 @@ static inline int64_t pmix_atomic_swap_64(volatile int64_t *addr, int64_t newval #if PMIX_GCC_INLINE_ASSEMBLY -static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { int64_t prev; int ret; @@ -383,7 +363,7 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ -static inline bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { bool rc; @@ -394,7 +374,7 @@ static inline bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t } -static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { pmix_atomic_wmb(); return pmix_atomic_compare_exchange_strong_64 (addr, oldval, newval); @@ -402,7 +382,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t #define PMIX_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ -static inline int32_t pmix_atomic_fetch_ ## type ## _32(volatile int32_t* v, int val) \ +static inline int32_t pmix_atomic_fetch_ ## type ## _32(pmix_atomic_int32_t* v, int val) \ { \ int32_t t, old; \ \ diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sparcv9/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sparcv9/atomic.h index 45afeea54d5..e74a152090f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sparcv9/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sparcv9/atomic.h @@ -13,9 +13,9 @@ * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2017-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +32,7 @@ #define ASI_P "0x80" -#define MEMBAR(type) __asm__ __volatile__ ("membar " type : : : "memory") +#define MEPMIXMBAR(type) __asm__ __volatile__ ("membar " type : : : "memory") /********************************************************************** @@ -56,19 +56,19 @@ static inline void pmix_atomic_mb(void) { - MEMBAR("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad"); + MEPMIXMBAR("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad"); } static inline void pmix_atomic_rmb(void) { - MEMBAR("#LoadLoad"); + MEPMIXMBAR("#LoadLoad"); } static inline void pmix_atomic_wmb(void) { - MEMBAR("#StoreStore"); + MEPMIXMBAR("#StoreStore"); } static inline void pmix_atomic_isync(void) @@ -86,7 +86,7 @@ static inline void pmix_atomic_isync(void) *********************************************************************/ #if PMIX_GCC_INLINE_ASSEMBLY -static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) * @@ -108,7 +108,7 @@ static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *add } -static inline bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { bool rc; @@ -119,7 +119,7 @@ static inline bool pmix_atomic_compare_exchange_strong_acq_32 (volatile int32_t } -static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { pmix_atomic_wmb(); return pmix_atomic_compare_exchange_strong_32 (addr, oldval, newval); @@ -128,7 +128,7 @@ static inline bool pmix_atomic_compare_exchange_strong_rel_32 (volatile int32_t #if PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_64 -static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) * @@ -150,7 +150,7 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add #else /* PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_64 */ -static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) * @@ -180,7 +180,7 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add #endif /* PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_64 */ -static inline bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_acq_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { bool rc; @@ -191,7 +191,7 @@ static inline bool pmix_atomic_compare_exchange_strong_acq_64 (volatile int64_t } -static inline bool pmix_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_rel_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { pmix_atomic_wmb(); return pmix_atomic_compare_exchange_strong_64 (addr, oldval, newval); diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sync_builtin/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sync_builtin/atomic.h index 2c91411e194..240d297f7a5 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sync_builtin/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sync_builtin/atomic.h @@ -11,11 +11,11 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,7 +58,7 @@ static inline void pmix_atomic_wmb(void) #define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { int32_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); bool ret = prev == *oldval; @@ -72,31 +72,31 @@ static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *add #define PMIX_HAVE_ATOMIC_MATH_32 1 #define PMIX_HAVE_ATOMIC_ADD_32 1 -static inline int32_t pmix_atomic_fetch_add_32(volatile int32_t *addr, int32_t delta) +static inline int32_t pmix_atomic_fetch_add_32(pmix_atomic_int32_t *addr, int32_t delta) { return __sync_fetch_and_add(addr, delta); } #define PMIX_HAVE_ATOMIC_AND_32 1 -static inline int32_t pmix_atomic_fetch_and_32(volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_fetch_and_32(pmix_atomic_int32_t *addr, int32_t value) { return __sync_fetch_and_and(addr, value); } #define PMIX_HAVE_ATOMIC_OR_32 1 -static inline int32_t pmix_atomic_fetch_or_32(volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_fetch_or_32(pmix_atomic_int32_t *addr, int32_t value) { return __sync_fetch_and_or(addr, value); } #define PMIX_HAVE_ATOMIC_XOR_32 1 -static inline int32_t pmix_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value) +static inline int32_t pmix_atomic_fetch_xor_32(pmix_atomic_int32_t *addr, int32_t value) { return __sync_fetch_and_xor(addr, value); } #define PMIX_HAVE_ATOMIC_SUB_32 1 -static inline int32_t pmix_atomic_fetch_sub_32(volatile int32_t *addr, int32_t delta) +static inline int32_t pmix_atomic_fetch_sub_32(pmix_atomic_int32_t *addr, int32_t delta) { return __sync_fetch_and_sub(addr, delta); } @@ -105,7 +105,7 @@ static inline int32_t pmix_atomic_fetch_sub_32(volatile int32_t *addr, int32_t d #define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { int64_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); bool ret = prev == *oldval; @@ -118,31 +118,31 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add #define PMIX_HAVE_ATOMIC_MATH_64 1 #define PMIX_HAVE_ATOMIC_ADD_64 1 -static inline int64_t pmix_atomic_fetch_add_64(volatile int64_t *addr, int64_t delta) +static inline int64_t pmix_atomic_fetch_add_64(pmix_atomic_int64_t *addr, int64_t delta) { return __sync_fetch_and_add(addr, delta); } #define PMIX_HAVE_ATOMIC_AND_64 1 -static inline int64_t pmix_atomic_fetch_and_64(volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_fetch_and_64(pmix_atomic_int64_t *addr, int64_t value) { return __sync_fetch_and_and(addr, value); } #define PMIX_HAVE_ATOMIC_OR_64 1 -static inline int64_t pmix_atomic_fetch_or_64(volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_fetch_or_64(pmix_atomic_int64_t *addr, int64_t value) { return __sync_fetch_and_or(addr, value); } #define PMIX_HAVE_ATOMIC_XOR_64 1 -static inline int64_t pmix_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value) +static inline int64_t pmix_atomic_fetch_xor_64(pmix_atomic_int64_t *addr, int64_t value) { return __sync_fetch_and_xor(addr, value); } #define PMIX_HAVE_ATOMIC_SUB_64 1 -static inline int64_t pmix_atomic_fetch_sub_64(volatile int64_t *addr, int64_t delta) +static inline int64_t pmix_atomic_fetch_sub_64(pmix_atomic_int64_t *addr, int64_t delta) { return __sync_fetch_and_sub(addr, delta); } @@ -150,7 +150,7 @@ static inline int64_t pmix_atomic_fetch_sub_64(volatile int64_t *addr, int64_t d #endif #if PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128 -static inline bool pmix_atomic_compare_exchange_strong_128 (volatile pmix_int128_t *addr, +static inline bool pmix_atomic_compare_exchange_strong_128 (pmix_atomic_int128_t *addr, pmix_int128_t *oldval, pmix_int128_t newval) { pmix_int128_t prev = __sync_val_compare_and_swap (addr, *oldval, newval); diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h index b2a562e4218..e3fdc1326e8 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h @@ -13,7 +13,7 @@ * Copyright (c) 2016 Broadcom Limited. All rights reserved. * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/x86_64/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/x86_64/atomic.h index f2144eb0467..005d2d66c20 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/x86_64/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/x86_64/atomic.h @@ -11,11 +11,11 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. - * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,7 +83,7 @@ static inline void pmix_atomic_isync(void) *********************************************************************/ #if PMIX_GCC_INLINE_ASSEMBLY -static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) +static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { unsigned char ret; __asm__ __volatile__ ( @@ -103,13 +103,13 @@ static inline bool pmix_atomic_compare_exchange_strong_32 (volatile int32_t *add #if PMIX_GCC_INLINE_ASSEMBLY -static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) +static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { unsigned char ret; __asm__ __volatile__ ( SMPLOCK "cmpxchgq %3,%2 \n\t" "sete %0 \n\t" - : "=qm" (ret), "+a" (*oldval), "+m" (*((volatile long*)addr)) + : "=qm" (ret), "+a" (*oldval), "+m" (*((pmix_atomic_long_t *)addr)) : "q"(newval) : "memory", "cc" ); @@ -124,7 +124,7 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (volatile int64_t *add #if PMIX_GCC_INLINE_ASSEMBLY && PMIX_HAVE_CMPXCHG16B && HAVE_PMIX_INT128_T -static inline bool pmix_atomic_compare_exchange_strong_128 (volatile pmix_int128_t *addr, pmix_int128_t *oldval, pmix_int128_t newval) +static inline bool pmix_atomic_compare_exchange_strong_128 (pmix_atomic_int128_t *addr, pmix_int128_t *oldval, pmix_int128_t newval) { unsigned char ret; @@ -151,15 +151,15 @@ static inline bool pmix_atomic_compare_exchange_strong_128 (volatile pmix_int128 #define PMIX_HAVE_ATOMIC_SWAP_64 1 -static inline int32_t pmix_atomic_swap_32( volatile int32_t *addr, - int32_t newval) +static inline int32_t pmix_atomic_swap_32( pmix_atomic_int32_t *addr, + int32_t newval) { int32_t oldval; __asm__ __volatile__("xchg %1, %0" : - "=r" (oldval), "+m" (*addr) : - "0" (newval) : - "memory"); + "=r" (oldval), "+m" (*addr) : + "0" (newval) : + "memory"); return oldval; } @@ -167,15 +167,15 @@ static inline int32_t pmix_atomic_swap_32( volatile int32_t *addr, #if PMIX_GCC_INLINE_ASSEMBLY -static inline int64_t pmix_atomic_swap_64( volatile int64_t *addr, +static inline int64_t pmix_atomic_swap_64( pmix_atomic_int64_t *addr, int64_t newval) { int64_t oldval; __asm__ __volatile__("xchgq %1, %0" : - "=r" (oldval), "+m" (*addr) : - "0" (newval) : - "memory"); + "=r" (oldval), "+m" (*addr) : + "0" (newval) : + "memory"); return oldval; } @@ -197,7 +197,7 @@ static inline int64_t pmix_atomic_swap_64( volatile int64_t *addr, * * Atomically adds @i to @v. */ -static inline int32_t pmix_atomic_fetch_add_32(volatile int32_t* v, int i) +static inline int32_t pmix_atomic_fetch_add_32(pmix_atomic_int32_t* v, int i) { int ret = i; __asm__ __volatile__( @@ -218,7 +218,7 @@ static inline int32_t pmix_atomic_fetch_add_32(volatile int32_t* v, int i) * * Atomically adds @i to @v. */ -static inline int64_t pmix_atomic_fetch_add_64(volatile int64_t* v, int64_t i) +static inline int64_t pmix_atomic_fetch_add_64(pmix_atomic_int64_t* v, int64_t i) { int64_t ret = i; __asm__ __volatile__( @@ -239,7 +239,7 @@ static inline int64_t pmix_atomic_fetch_add_64(volatile int64_t* v, int64_t i) * * Atomically subtracts @i from @v. */ -static inline int32_t pmix_atomic_fetch_sub_32(volatile int32_t* v, int i) +static inline int32_t pmix_atomic_fetch_sub_32(pmix_atomic_int32_t* v, int i) { int ret = -i; __asm__ __volatile__( @@ -260,7 +260,7 @@ static inline int32_t pmix_atomic_fetch_sub_32(volatile int32_t* v, int i) * * Atomically subtracts @i from @v. */ -static inline int64_t pmix_atomic_fetch_sub_64(volatile int64_t* v, int64_t i) +static inline int64_t pmix_atomic_fetch_sub_64(pmix_atomic_int64_t* v, int64_t i) { int64_t ret = -i; __asm__ __volatile__( diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/x86_64/timer.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/x86_64/timer.h index 6b25f9aafee..31054235a55 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/x86_64/timer.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/x86_64/timer.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. ALl rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.h b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.h index 02a246bfdd5..883a2c5c6ce 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.h +++ b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_hotel.h @@ -2,7 +2,7 @@ /* * Copyright (c) 2012-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -190,6 +190,7 @@ static inline pmix_status_t pmix_hotel_checkin(pmix_hotel_t *hotel, /* Do we have any rooms available? */ if (PMIX_UNLIKELY(hotel->last_unoccupied_room < 0)) { + *room_num = -1; return PMIX_ERR_OUT_OF_RESOURCE; } @@ -247,6 +248,10 @@ static inline void pmix_hotel_checkout(pmix_hotel_t *hotel, int room_num) /* Bozo check */ assert(room_num < hotel->num_rooms); + if (0 > room_num) { + /* occupant wasn't checked in */ + return; + } /* If there's an occupant in the room, check them out */ room = &(hotel->rooms[room_num]); @@ -285,6 +290,11 @@ static inline void pmix_hotel_checkout_and_return_occupant(pmix_hotel_t *hotel, /* Bozo check */ assert(room_num < hotel->num_rooms); + if (0 > room_num) { + /* occupant wasn't checked in */ + *occupant = NULL; + return; + } /* If there's an occupant in the room, check them out */ room = &(hotel->rooms[room_num]); @@ -339,6 +349,10 @@ static inline void pmix_hotel_knock(pmix_hotel_t *hotel, int room_num, void **oc assert(room_num < hotel->num_rooms); *occupant = NULL; + if (0 > room_num) { + /* occupant wasn't checked in */ + return; + } /* If there's an occupant in the room, have them come to the door */ room = &(hotel->rooms[room_num]); diff --git a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_list.h b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_list.h index df3f6a2280d..f29bea22a95 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_list.h +++ b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_list.h @@ -13,7 +13,7 @@ * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -109,7 +109,7 @@ struct pmix_list_item_t #if PMIX_ENABLE_DEBUG /** Atomic reference count for debugging */ - volatile int32_t pmix_list_item_refcount; + pmix_atomic_int32_t pmix_list_item_refcount; /** The list this item belong to */ volatile struct pmix_list_t* pmix_list_item_belong_to; #endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_object.h b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_object.h index 8ad7f90f105..7b7ed41a72c 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/class/pmix_object.h +++ b/opal/mca/pmix/pmix3x/pmix/src/class/pmix_object.h @@ -192,7 +192,7 @@ struct pmix_object_t { uint64_t obj_magic_id; #endif pmix_class_t *obj_class; /**< class descriptor */ - volatile int32_t obj_reference_count; /**< reference count */ + pmix_atomic_int32_t obj_reference_count; /**< reference count */ #if PMIX_ENABLE_DEBUG const char* cls_init_file_name; /**< In debug mode store the file where the object get contructed */ int cls_init_lineno; /**< In debug mode store the line number where the object get contructed */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c index 1c8517ea0e7..56774e4fb24 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -75,7 +75,7 @@ PMIX_EXPORT int PMI_Init(int *spawned) *spawned = 0; } pmi_singleton = true; - (void)strncpy(myproc.nspace, "1234", PMIX_MAX_NSLEN); + pmix_strncpy(myproc.nspace, "1234", PMIX_MAX_NSLEN); myproc.rank = 0; pmi_init = 1; return PMI_SUCCESS; @@ -85,7 +85,7 @@ PMIX_EXPORT int PMI_Init(int *spawned) /* getting internal key requires special rank value */ memcpy(&proc, &myproc, sizeof(myproc)); - proc.rank = PMIX_RANK_UNDEF; + proc.rank = PMIX_RANK_WILDCARD; /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup @@ -242,7 +242,7 @@ PMIX_EXPORT int PMI_KVS_Get( const char kvsname[], const char key[], char value[ proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) && (NULL != val) && (PMIX_STRING == val->type)) { - strncpy(value, val->data.string, length); + pmix_strncpy(value, val->data.string, length-1); PMIX_VALUE_FREE(val, 1); return PMI_SUCCESS; } else { @@ -259,7 +259,7 @@ PMIX_EXPORT int PMI_KVS_Get( const char kvsname[], const char key[], char value[ /* retrieve the data from PMIx - since we don't have a rank, * we indicate that by passing the UNDEF value */ - (void)strncpy(proc.nspace, kvsname, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, kvsname, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_UNDEF; rc = PMIx_Get(&proc, key, NULL, 0, &val); @@ -267,7 +267,7 @@ PMIX_EXPORT int PMI_KVS_Get( const char kvsname[], const char key[], char value[ if (PMIX_STRING != val->type) { rc = PMIX_ERROR; } else if (NULL != val->data.string) { - (void)strncpy(value, val->data.string, length); + pmix_strncpy(value, val->data.string, length-1); } PMIX_VALUE_RELEASE(val); } @@ -394,8 +394,6 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; - pmix_proc_t proc = myproc; - proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -414,11 +412,11 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); - rc = PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val); + rc = PMIx_Get(&myproc, PMIX_APPNUM, info, 1, &val); if (PMIX_SUCCESS == rc) { rc = convert_int(appnum, val); PMIX_VALUE_RELEASE(val); - } else if( PMIX_ERR_NOT_FOUND == rc ){ + } else { /* this is optional value, set to 0 */ *appnum = 0; rc = PMIX_SUCCESS; @@ -445,7 +443,7 @@ PMIX_EXPORT int PMI_Publish_name(const char service_name[], const char port[]) } /* pass the service/port */ - (void) strncpy(info.key, service_name, PMIX_MAX_KEYLEN); + pmix_strncpy(info.key, service_name, PMIX_MAX_KEYLEN); info.value.type = PMIX_STRING; info.value.data.string = (char*) port; @@ -497,7 +495,7 @@ PMIX_EXPORT int PMI_Lookup_name(const char service_name[], char port[]) PMIX_PDATA_CONSTRUCT(&pdata); /* pass the service */ - (void) strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN); + pmix_strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN); /* PMI-1 doesn't want the nspace back */ if (PMIX_SUCCESS != (rc = PMIx_Lookup(&pdata, 1, NULL, 0))) { @@ -514,7 +512,7 @@ PMIX_EXPORT int PMI_Lookup_name(const char service_name[], char port[]) * potential we could overrun it. As this feature * isn't widely supported in PMI-1, try being * conservative */ - (void) strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN); + pmix_strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN); PMIX_PDATA_DESTRUCT(&pdata); return PMIX_SUCCESS; @@ -535,7 +533,7 @@ PMIX_EXPORT int PMI_Get_id(char id_str[], int length) return PMI_ERR_INVALID_LENGTH; } - (void) strncpy(id_str, myproc.nspace, length); + pmix_strncpy(id_str, myproc.nspace, length-1); return PMI_SUCCESS; } @@ -742,7 +740,7 @@ PMIX_EXPORT int PMI_Spawn_multiple(int count, apps[i].info = (pmix_info_t*)malloc(apps[i].ninfo * sizeof(pmix_info_t)); /* copy the info objects */ for (j = 0; j < apps[i].ninfo; j++) { - (void)strncpy(apps[i].info[j].key, info_keyval_vectors[i][j].key, PMIX_MAX_KEYLEN); + pmix_strncpy(apps[i].info[j].key, info_keyval_vectors[i][j].key, PMIX_MAX_KEYLEN); apps[i].info[j].value.type = PMIX_STRING; apps[i].info[j].value.data.string = strdup(info_keyval_vectors[i][j].val); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmi2.c index 2ad443c7382..4adf6ba37de 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmi2.c @@ -79,7 +79,7 @@ PMIX_EXPORT int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) *appnum = 0; } pmi2_singleton = true; - (void)strncpy(myproc.nspace, "1234", PMIX_MAX_NSLEN); + pmix_strncpy(myproc.nspace, "1234", PMIX_MAX_NSLEN); myproc.rank = 0; pmi2_init = 1; return PMI2_SUCCESS; @@ -227,7 +227,7 @@ PMIX_EXPORT int PMI2_Job_Spawn(int count, const char * cmds[], apps[i].info = (pmix_info_t*)malloc(apps[i].ninfo * sizeof(pmix_info_t)); /* copy the info objects */ for (j=0; j < apps[i].ninfo; j++) { - (void)strncpy(apps[i].info[j].key, info_keyval_vectors[i][j].key, PMIX_MAX_KEYLEN); + pmix_strncpy(apps[i].info[j].key, info_keyval_vectors[i][j].key, PMIX_MAX_KEYLEN); apps[i].info[j].value.type = PMIX_STRING; apps[i].info[j].value.data.string = strdup(info_keyval_vectors[i][j].val); } @@ -271,7 +271,7 @@ PMIX_EXPORT int PMI2_Job_GetId(char jobid[], int jobid_size) if (NULL == jobid) { return PMI2_ERR_INVALID_ARGS; } - (void)strncpy(jobid, myproc.nspace, jobid_size); + pmix_strncpy(jobid, myproc.nspace, jobid_size-1); return PMI2_SUCCESS; } @@ -339,7 +339,7 @@ PMIX_EXPORT int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn) } memset(proc.nspace, 0, sizeof(proc.nspace)); - (void)strncpy(proc.nspace, (jobid ? jobid : proc.nspace), sizeof(proc.nspace)-1); + pmix_strncpy(proc.nspace, (jobid ? jobid : proc.nspace), PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; rc = PMIx_Connect(&proc, 1, NULL, 0); return convert_err(rc); @@ -357,7 +357,7 @@ PMIX_EXPORT int PMI2_Job_Disconnect(const char jobid[]) } memset(proc.nspace, 0, sizeof(proc.nspace)); - (void)strncpy(proc.nspace, (jobid ? jobid : proc.nspace), sizeof(proc.nspace)-1); + pmix_strncpy(proc.nspace, (jobid ? jobid : proc.nspace), PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; rc = PMIx_Disconnect(&proc, 1, NULL, 0); return convert_err(rc); @@ -455,7 +455,7 @@ PMIX_EXPORT int PMI2_KVS_Get(const char *jobid, int src_pmi_id, pmix_output_verbose(3, pmix_globals.debug_output, "PMI2_KVS_Get: key=%s jobid=%s src_pmi_id=%d", key, (jobid ? jobid : "null"), src_pmi_id); - (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), PMIX_MAX_NSLEN); if (src_pmi_id == PMI2_ID_NULL) { /* the rank is UNDEF */ proc.rank = PMIX_RANK_UNDEF; @@ -468,7 +468,7 @@ PMIX_EXPORT int PMI2_KVS_Get(const char *jobid, int src_pmi_id, if (PMIX_STRING != val->type) { rc = PMIX_ERROR; } else if (NULL != val->data.string) { - (void)strncpy(value, val->data.string, maxvalue); + pmix_strncpy(value, val->data.string, maxvalue-1); *vallen = strlen(val->data.string); } PMIX_VALUE_RELEASE(val); @@ -511,7 +511,7 @@ PMIX_EXPORT int PMI2_Info_GetNodeAttr(const char name[], if (PMIX_STRING != val->type) { rc = PMIX_ERROR; } else if (NULL != val->data.string) { - (void)strncpy(value, val->data.string, valuelen); + pmix_strncpy(value, val->data.string, valuelen-1); *found = 1; } PMIX_VALUE_RELEASE(val); @@ -586,7 +586,7 @@ PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuel proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) && (NULL != val) && (PMIX_STRING == val->type)) { - strncpy(value, val->data.string, valuelen); + pmix_strncpy(value, val->data.string, valuelen); PMIX_VALUE_FREE(val, 1); *found = 1; return PMI2_SUCCESS; @@ -610,7 +610,7 @@ PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuel if (PMIX_STRING != val->type) { rc = PMIX_ERROR; } else if (NULL != val->data.string) { - (void)strncpy(value, val->data.string, valuelen); + pmix_strncpy(value, val->data.string, valuelen-1); *found = 1; } PMIX_VALUE_RELEASE(val); @@ -648,14 +648,14 @@ PMIX_EXPORT int PMI2_Nameserv_publish(const char service_name[], } /* pass the service/port */ - (void)strncpy(info[0].key, service_name, PMIX_MAX_KEYLEN); + pmix_strncpy(info[0].key, service_name, PMIX_MAX_KEYLEN); info[0].value.type = PMIX_STRING; info[0].value.data.string = (char*)port; nvals = 1; /* if provided, add any other value */ if (NULL != info_ptr) { - (void)strncpy(info[1].key, info_ptr->key, PMIX_MAX_KEYLEN); + pmix_strncpy(info[1].key, info_ptr->key, PMIX_MAX_KEYLEN); info[1].value.type = PMIX_STRING; info[1].value.data.string = (char*)info_ptr->val; nvals = 2; @@ -689,12 +689,12 @@ PMIX_EXPORT int PMI2_Nameserv_lookup(const char service_name[], PMIX_PDATA_CONSTRUCT(&pdata[1]); /* pass the service */ - (void)strncpy(pdata[0].key, service_name, PMIX_MAX_KEYLEN); + pmix_strncpy(pdata[0].key, service_name, PMIX_MAX_KEYLEN); nvals = 1; /* if provided, add any other value */ if (NULL != info_ptr) { - (void)strncpy(pdata[1].key, info_ptr->key, PMIX_MAX_KEYLEN); + pmix_strncpy(pdata[1].key, info_ptr->key, PMIX_MAX_KEYLEN); pdata[1].value.type = PMIX_STRING; pdata[1].value.data.string = info_ptr->val; nvals = 2; @@ -716,7 +716,7 @@ PMIX_EXPORT int PMI2_Nameserv_lookup(const char service_name[], } /* return the port */ - (void)strncpy(port, pdata[0].value.data.string, portLen); + pmix_strncpy(port, pdata[0].value.data.string, portLen-1); PMIX_PDATA_DESTRUCT(&pdata[0]); if (NULL != info_ptr) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c index 8f0d21390fe..8d522e7a4fa 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -50,10 +50,12 @@ #include #endif #include PMIX_EVENT_HEADER +#if ! PMIX_HAVE_LIBEV #include PMIX_EVENT2_THREAD_HEADER +#endif static const char pmix_version_string[] = PMIX_VERSION; - +static pmix_status_t pmix_init_result = PMIX_ERR_INIT; #include "src/class/pmix_list.h" #include "src/event/pmix_event.h" @@ -169,14 +171,9 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, PMIX_RELEASE(chain); goto error; } - /* check for non-default flag */ - for (cnt=0; cnt < (int)ninfo; cnt++) { - if (0 == strncmp(chain->info[cnt].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - chain->nondefault = PMIX_INFO_TRUE(&chain->info[cnt]); - break; - } - } } + /* prep the chain for processing */ + pmix_prep_event_chain(chain, chain->info, ninfo, false); pmix_output_verbose(2, pmix_client_globals.base_output, "[%s:%d] pmix:client_notify_recv - processing event %s, calling errhandler", @@ -273,6 +270,9 @@ static void notification_fn(size_t evhdlr_registration_id, char *name = NULL; size_t n; + pmix_output_verbose(2, pmix_client_globals.base_output, + "[%s:%d] DEBUGGER RELEASE RECVD", + pmix_globals.myid.nspace, pmix_globals.myid.rank); if (NULL != info) { lock = NULL; for (n=0; n < ninfo; n++) { @@ -444,7 +444,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, * rank should be known. So return them here if * requested */ if (NULL != proc) { - (void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); proc->rank = pmix_globals.myid.rank; } ++pmix_globals.init_cntr; @@ -455,10 +455,13 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (NULL != info) { _check_for_notify(info, ninfo); } - return PMIX_SUCCESS; + return pmix_init_result; } + ++pmix_globals.init_cntr; + /* if we don't see the required info, then we cannot init */ if (NULL == (evar = getenv("PMIX_NAMESPACE"))) { + pmix_init_result = PMIX_ERR_INVALID_NAMESPACE; PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INVALID_NAMESPACE; } @@ -468,6 +471,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_CLIENT, info, ninfo, pmix_client_notify_recv))) { PMIX_ERROR_LOG(rc); + pmix_init_result = rc; PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -485,24 +489,25 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_pointer_array_init(&pmix_client_globals.peers, 1, INT_MAX, 1); pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); if (NULL == pmix_client_globals.myserver) { + pmix_init_result = PMIX_ERR_NOMEM; PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_NOMEM; } - pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t); if (NULL == pmix_client_globals.myserver->nptr) { PMIX_RELEASE(pmix_client_globals.myserver); + pmix_init_result = PMIX_ERR_NOMEM; PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_NOMEM; } pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); if (NULL == pmix_client_globals.myserver->info) { PMIX_RELEASE(pmix_client_globals.myserver); + pmix_init_result = PMIX_ERR_NOMEM; PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_NOMEM; } - pmix_output_verbose(2, pmix_client_globals.base_output, - "pmix: init called"); /* setup the base verbosity */ if (0 < pmix_client_globals.base_verbose) { /* set default output */ @@ -511,17 +516,21 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_client_globals.base_verbose); } + pmix_output_verbose(2, pmix_client_globals.base_output, + "pmix: init called"); + /* we require our nspace */ if (NULL != proc) { - (void)strncpy(proc->nspace, evar, PMIX_MAX_NSLEN); + pmix_strncpy(proc->nspace, evar, PMIX_MAX_NSLEN); } - (void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN); - /* set the global pmix_nspace_t object for our peer */ + PMIX_LOAD_NSPACE(pmix_globals.myid.nspace, evar); + /* set the global pmix_namespace_t object for our peer */ pmix_globals.mypeer->nptr->nspace = strdup(evar); /* we also require our rank */ if (NULL == (evar = getenv("PMIX_RANK"))) { /* let the caller know that the server isn't available yet */ + pmix_init_result = PMIX_ERR_DATA_VALUE_NOT_FOUND; PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_DATA_VALUE_NOT_FOUND; } @@ -533,6 +542,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* setup a rank_info object for us */ pmix_globals.mypeer->info = PMIX_NEW(pmix_rank_info_t); if (NULL == pmix_globals.mypeer->info) { + pmix_init_result = PMIX_ERR_NOMEM; PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_NOMEM; } @@ -545,6 +555,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, evar = getenv("PMIX_SECURITY_MODE"); pmix_globals.mypeer->nptr->compat.psec = pmix_psec_base_assign_module(evar); if (NULL == pmix_globals.mypeer->nptr->compat.psec) { + pmix_init_result = PMIX_ERR_INIT; PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } @@ -579,6 +590,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_client_globals.myserver->nptr->compat.gds = pmix_gds_base_assign_module(NULL, 0); } if (NULL == pmix_client_globals.myserver->nptr->compat.gds) { + pmix_init_result = PMIX_ERR_INIT; PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } @@ -601,6 +613,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_globals.mypeer->nptr->compat.gds = pmix_gds_base_assign_module(&ginfo, 1); if (NULL == pmix_globals.mypeer->nptr->compat.gds) { PMIX_INFO_DESTRUCT(&ginfo); + pmix_init_result = PMIX_ERR_INIT; PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } @@ -609,6 +622,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* connect to the server */ rc = pmix_ptl_base_connect_to_peer((struct pmix_peer_t*)pmix_client_globals.myserver, info, ninfo); if (PMIX_SUCCESS != rc) { + pmix_init_result = rc; PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -624,6 +638,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(req); + pmix_init_result = rc; PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -632,6 +647,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, req, job_data, (void*)&cb); if (PMIX_SUCCESS != rc) { + pmix_init_result = rc; PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -641,15 +657,16 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, PMIX_DESTRUCT(&cb); if (PMIX_SUCCESS == rc) { - pmix_globals.init_cntr++; + pmix_init_result = PMIX_SUCCESS; } else { + pmix_init_result = rc; PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE_THREAD(&pmix_global_lock); /* look for a debugger attach key */ - (void)strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); wildcard.rank = PMIX_RANK_WILDCARD; PMIX_INFO_LOAD(&ginfo, PMIX_OPTIONAL, NULL, PMIX_BOOL); if (PMIX_SUCCESS == PMIx_Get(&wildcard, PMIX_DEBUG_STOP_IN_INIT, &ginfo, 1, &val)) { @@ -660,7 +677,9 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, PMIX_CONSTRUCT_LOCK(&releaselock); PMIX_INFO_LOAD(&evinfo[0], PMIX_EVENT_RETURN_OBJECT, &releaselock, PMIX_POINTER); PMIX_INFO_LOAD(&evinfo[1], PMIX_EVENT_HDLR_NAME, "WAIT-FOR-DEBUGGER", PMIX_STRING); - + pmix_output_verbose(2, pmix_client_globals.base_output, + "[%s:%d] WAITING IN INIT FOR DEBUGGER", + pmix_globals.myid.nspace, pmix_globals.myid.rank); PMIx_Register_event_handler(&code, 1, evinfo, 2, notification_fn, evhandler_reg_callbk, (void*)®lock); /* wait for registration to complete */ @@ -1008,7 +1027,9 @@ static void _putfn(int sd, short args, void *cbdata) PMIX_WAKEUP_THREAD(&cb->lock); } -PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val) +PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, + const pmix_key_t key, + pmix_value_t *val) { pmix_cb_t *cb; pmix_status_t rc; @@ -1217,7 +1238,7 @@ static void _resolve_peers(int sd, short args, void *cbdata) /* need to thread-shift this request */ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, - const char *nspace, + const pmix_nspace_t nspace, pmix_proc_t **procs, size_t *nprocs) { pmix_cb_t *cb; @@ -1244,7 +1265,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, /* if the nspace wasn't found, then we need to * ask the server for that info */ if (PMIX_ERR_INVALID_NAMESPACE == cb->status) { - (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* any key will suffice as it will bring down * the entire data blob */ @@ -1290,7 +1311,7 @@ static void _resolve_nodes(int fd, short args, void *cbdata) } /* need to thread-shift this request */ -PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) +PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const pmix_nspace_t nspace, char **nodelist) { pmix_cb_t *cb; pmix_status_t rc; @@ -1314,7 +1335,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist /* if the nspace wasn't found, then we need to * ask the server for that info */ if (PMIX_ERR_INVALID_NAMESPACE == cb->status) { - (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* any key will suffice as it will bring down * the entire data blob */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_fence.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_fence.c index d3e182d584d..adac9bbf4d3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_fence.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_fence.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -73,7 +73,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, PMIX_ACQUIRE_THREAD(&pmix_global_lock); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: executing fence"); if (pmix_globals.init_cntr <= 0) { @@ -106,7 +106,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, rc = cb->status; PMIX_RELEASE(cb); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: fence released"); return rc; @@ -125,7 +125,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs PMIX_ACQUIRE_THREAD(&pmix_global_lock); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: fence_nb called"); if (pmix_globals.init_cntr <= 0) { @@ -147,7 +147,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs /* if we are given a NULL proc, then the caller is referencing * all procs within our own nspace */ if (NULL == procs) { - (void)strncpy(rg.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(rg.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); rg.rank = PMIX_RANK_WILDCARD; rgs = &rg; nrg = 1; @@ -185,7 +185,7 @@ static pmix_status_t unpack_return(pmix_buffer_t *data) pmix_status_t ret; int32_t cnt; - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "client:unpack fence called"); /* unpack the status code */ @@ -196,7 +196,7 @@ static pmix_status_t unpack_return(pmix_buffer_t *data) PMIX_ERROR_LOG(rc); return rc; } - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "client:unpack fence received status %d", ret); return ret; } @@ -255,7 +255,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: fence_nb callback recvd"); if (NULL == cb) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c index 04a2e09184a..b620c05cf29 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c @@ -1,11 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. - * Copyright (c) 2016 Mellanox Technologies, Inc. + * Copyright (c) 2016-2018 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -57,6 +57,7 @@ #include "src/util/compress.h" #include "src/util/error.h" #include "src/util/hash.h" +#include "src/util/name_fns.h" #include "src/util/output.h" #include "src/mca/gds/gds.h" #include "src/mca/ptl/ptl.h" @@ -75,8 +76,15 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata); +static pmix_status_t _getfn_fastpath(const pmix_proc_t *proc, const pmix_key_t key, + const pmix_info_t info[], size_t ninfo, + pmix_value_t **val); -PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], +static pmix_status_t process_values(pmix_value_t **v, pmix_cb_t *cb); + + +PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, + const pmix_key_t key, const pmix_info_t info[], size_t ninfo, pmix_value_t **val) { @@ -91,12 +99,18 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], } PMIX_RELEASE_THREAD(&pmix_global_lock); - pmix_output_verbose(2, pmix_client_globals.get_verbose, - "pmix:client get for %s:%d key %s", - (NULL == proc) ? "NULL" : proc->nspace, - (NULL == proc) ? PMIX_RANK_UNDEF : proc->rank, + pmix_output_verbose(2, pmix_client_globals.get_output, + "pmix:client get for %s key %s", + (NULL == proc) ? "NULL" : PMIX_NAME_PRINT(proc), (NULL == key) ? "NULL" : key); + /* try to get data directly, without threadshift */ + if (PMIX_RANK_UNDEF != proc->rank && NULL != key) { + if (PMIX_SUCCESS == (rc = _getfn_fastpath(proc, key, info, ninfo, val))) { + goto done; + } + } + /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ @@ -115,13 +129,14 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], } PMIX_RELEASE(cb); - pmix_output_verbose(2, pmix_client_globals.get_verbose, + done: + pmix_output_verbose(2, pmix_client_globals.get_output, "pmix:client get completed"); return rc; } -PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, +PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const pmix_key_t key, const pmix_info_t info[], size_t ninfo, pmix_value_cbfunc_t cbfunc, void *cbdata) { @@ -149,7 +164,7 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, * Either case is supported. However, we don't currently * support the case where -both- values are NULL */ if (NULL == proc && NULL == key) { - pmix_output_verbose(2, pmix_client_globals.get_verbose, + pmix_output_verbose(2, pmix_client_globals.get_output, "pmix: get_nb value error - both proc and key are NULL"); return PMIX_ERR_BAD_PARAM; } @@ -157,7 +172,7 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, /* if the key is NULL, the rank cannot be WILDCARD as * we cannot return all info from every rank */ if (NULL != proc && PMIX_RANK_WILDCARD == proc->rank && NULL == key) { - pmix_output_verbose(2, pmix_client_globals.get_verbose, + pmix_output_verbose(2, pmix_client_globals.get_output, "pmix: get_nb value error - WILDCARD rank and key is NULL"); return PMIX_ERR_BAD_PARAM; } @@ -179,7 +194,7 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, rank = proc->rank; } - pmix_output_verbose(2, pmix_client_globals.get_verbose, + pmix_output_verbose(2, pmix_client_globals.get_output, "pmix: get_nb value for proc %s:%u key %s", nm, rank, (NULL == key) ? "NULL" : key); @@ -284,7 +299,7 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_proc_t proc; pmix_kval_t *kv; - pmix_output_verbose(2, pmix_client_globals.get_verbose, + pmix_output_verbose(2, pmix_client_globals.get_output, "pmix: get_nb callback recvd"); if (NULL == cb) { @@ -294,7 +309,7 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, } /* cache the proc id */ - (void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); proc.rank = cb->pname.rank; /* a zero-byte buffer indicates that this recv is being @@ -318,7 +333,11 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, if (PMIX_SUCCESS != ret) { goto done; } - PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_client_globals.myserver, buf); + if (PMIX_RANK_UNDEF == proc.rank) { + PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_globals.mypeer, buf); + } else { + PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_client_globals.myserver, buf); + } if (PMIX_SUCCESS != rc) { goto done; } @@ -337,7 +356,11 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, /* fetch the data from server peer module - since it is passing * it back to the user, we need a copy of it */ cb->copy = true; - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + if (PMIX_RANK_UNDEF == proc.rank) { + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); + } else { + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + } if (PMIX_SUCCESS == rc) { if (1 != pmix_list_get_size(&cb->kvs)) { rc = PMIX_ERR_INVALID_VAL; @@ -407,7 +430,7 @@ static pmix_status_t process_values(pmix_value_t **v, pmix_cb_t *cb) /* copy the list elements */ n=0; PMIX_LIST_FOREACH(kv, kvs, pmix_kval_t) { - (void)strncpy(info[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix_strncpy(info[n].key, kv->key, PMIX_MAX_KEYLEN); pmix_value_xfer(&info[n].value, kv->value); ++n; } @@ -472,6 +495,61 @@ static void infocb(pmix_status_t status, } } +static pmix_status_t _getfn_fastpath(const pmix_proc_t *proc, const pmix_key_t key, + const pmix_info_t info[], size_t ninfo, + pmix_value_t **val) +{ + pmix_cb_t *cb = PMIX_NEW(pmix_cb_t); + pmix_status_t rc = PMIX_SUCCESS; + size_t n; + + /* scan the incoming directives */ + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&info[n], PMIX_DATA_SCOPE)) { + cb->scope = info[n].value.data.scope; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_OPTIONAL) || + PMIX_CHECK_KEY(&info[n], PMIX_IMMEDIATE)) { + continue; + } else { + /* we cannot handle any other directives via this path */ + PMIX_RELEASE(cb); + return PMIX_ERR_NOT_SUPPORTED; + } + } + } + cb->proc = (pmix_proc_t*)proc; + cb->copy = true; + cb->key = (char*)key; + cb->info = (pmix_info_t*)info; + cb->ninfo = ninfo; + + PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_client_globals.myserver); + if (PMIX_SUCCESS == rc) { + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + if (PMIX_SUCCESS == rc) { + goto done; + } + } + PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_globals.mypeer); + if (PMIX_SUCCESS == rc) { + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); + if (PMIX_SUCCESS == rc) { + goto done; + } + } + PMIX_RELEASE(cb); + return rc; + + done: + rc = process_values(val, cb); + if (NULL != *val) { + PMIX_VALUE_COMPRESSED_STRING_UNPACK(*val); + } + PMIX_RELEASE(cb); + return rc; +} + static void _getnbfn(int fd, short flags, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; @@ -480,7 +558,6 @@ static void _getnbfn(int fd, short flags, void *cbdata) pmix_value_t *val = NULL; pmix_status_t rc; size_t n; - char *tmp; pmix_proc_t proc; bool optional = false; bool immediate = false; @@ -490,23 +567,23 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* cb was passed to us from another thread - acquire it */ PMIX_ACQUIRE_OBJECT(cb); - pmix_output_verbose(2, pmix_client_globals.get_verbose, - "pmix: getnbfn value for proc %s:%u key %s", - cb->pname.nspace, cb->pname.rank, - (NULL == cb->key) ? "NULL" : cb->key); - /* set the proc object identifier */ - (void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); proc.rank = cb->pname.rank; + pmix_output_verbose(2, pmix_client_globals.get_output, + "pmix: getnbfn value for proc %s key %s", + PMIX_NAME_PRINT(&proc), + (NULL == cb->key) ? "NULL" : cb->key); + /* scan the incoming directives */ if (NULL != cb->info) { for (n=0; n < cb->ninfo; n++) { - if (0 == strncmp(cb->info[n].key, PMIX_OPTIONAL, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&cb->info[n], PMIX_OPTIONAL)) { optional = PMIX_INFO_TRUE(&cb->info[n]); - } else if (0 == strncmp(cb->info[n].key, PMIX_IMMEDIATE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_IMMEDIATE)) { immediate = PMIX_INFO_TRUE(&cb->info[n]); - } else if (0 == strncmp(cb->info[n].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_TIMEOUT)) { /* set a timer to kick us out if we don't * have an answer within their window */ if (0 < cb->info[n].value.data.integer) { @@ -517,8 +594,16 @@ static void _getnbfn(int fd, short flags, void *cbdata) pmix_event_evtimer_add(&cb->ev, &tv); cb->timer_running = true; } - } else if (0 == strncmp(cb->info[n].key, PMIX_DATA_SCOPE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_DATA_SCOPE)) { cb->scope = cb->info[n].value.data.scope; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_SESSION_INFO)) { + cb->level = PMIX_LEVEL_SESSION; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_JOB_INFO)) { + cb->level = PMIX_LEVEL_JOB; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_APP_INFO)) { + cb->level = PMIX_LEVEL_APP; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_NODE_INFO)) { + cb->level = PMIX_LEVEL_NODE; } } } @@ -528,12 +613,12 @@ static void _getnbfn(int fd, short flags, void *cbdata) cb->copy = true; PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); if (PMIX_SUCCESS == rc) { - pmix_output_verbose(5, pmix_client_globals.get_verbose, + pmix_output_verbose(5, pmix_client_globals.get_output, "pmix:client data found in internal storage"); rc = process_values(&val, cb); goto respond; } - pmix_output_verbose(5, pmix_client_globals.get_verbose, + pmix_output_verbose(5, pmix_client_globals.get_output, "pmix:client data NOT found in internal storage"); /* if the key is NULL or starts with "pmix", then they are looking @@ -543,9 +628,15 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* fetch the data from my server's module - since we are passing * it back to the user, we need a copy of it */ cb->copy = true; - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + /* if the peer and server GDS component are the same, then no + * point in trying it again */ + if (0 != strcmp(pmix_globals.mypeer->nptr->compat.gds->name, pmix_client_globals.myserver->nptr->compat.gds->name)) { + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + } else { + rc = PMIX_ERR_TAKE_NEXT_OPTION; + } if (PMIX_SUCCESS != rc) { - pmix_output_verbose(5, pmix_client_globals.get_verbose, + pmix_output_verbose(5, pmix_client_globals.get_output, "pmix:client job-level data NOT found"); if (0 != strncmp(cb->pname.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN)) { /* we are asking about the job-level info from another @@ -559,7 +650,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) * job-level info. In some cases, a server may elect not * to provide info at init to save memory */ if (immediate) { - pmix_output_verbose(5, pmix_client_globals.get_verbose, + pmix_output_verbose(5, pmix_client_globals.get_output, "pmix:client IMMEDIATE given - querying data"); /* the direct modex request doesn't pass a key as it * was intended to support non-job-level information. @@ -579,20 +670,30 @@ static void _getnbfn(int fd, short flags, void *cbdata) return; } /* we should have had this info, so respond with the error */ - pmix_output_verbose(5, pmix_client_globals.get_verbose, + pmix_output_verbose(5, pmix_client_globals.get_output, "pmix:client returning NOT FOUND error"); goto respond; } else { - pmix_output_verbose(5, pmix_client_globals.get_verbose, + pmix_output_verbose(5, pmix_client_globals.get_output, "pmix:client NULL KEY - returning error"); goto respond; } } - pmix_output_verbose(5, pmix_client_globals.get_verbose, + pmix_output_verbose(5, pmix_client_globals.get_output, "pmix:client job-level data NOT found"); rc = process_values(&val, cb); goto respond; + } else if (PMIX_RANK_UNDEF == proc.rank) { + /* the data would have to be stored on our own peer, so + * we need to go request it */ + goto request; } else { + /* if the peer and server GDS component are the same, then no + * point in trying it again */ + if (0 == strcmp(pmix_globals.mypeer->nptr->compat.gds->name, pmix_client_globals.myserver->nptr->compat.gds->name)) { + val = NULL; + goto request; + } cb->proc = &proc; cb->copy = true; PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); @@ -608,19 +709,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* if a callback was provided, execute it */ if (NULL != cb->cbfunc.valuefn) { if (NULL != val) { - /* if this is a compressed string, then uncompress it */ - if (PMIX_COMPRESSED_STRING == val->type) { - pmix_util_uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size); - if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - rc = PMIX_ERR_NOMEM; - PMIX_VALUE_RELEASE(val); - val = NULL; - } else { - PMIX_VALUE_DESTRUCT(val); - PMIX_VAL_ASSIGN(val, string, tmp); - } - } + PMIX_VALUE_COMPRESSED_STRING_UNPACK(val); } cb->cbfunc.valuefn(rc, val, cb->cbdata); } @@ -644,7 +733,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) * us to attempt to retrieve it from the server */ if (optional) { /* they don't want us to try and retrieve it */ - pmix_output_verbose(2, pmix_client_globals.get_verbose, + pmix_output_verbose(2, pmix_client_globals.get_output, "PMIx_Get key=%s for rank = %u, namespace = %s was not found - request was optional", cb->key, cb->pname.rank, cb->pname.nspace); rc = PMIX_ERR_NOT_FOUND; @@ -672,10 +761,10 @@ static void _getnbfn(int fd, short flags, void *cbdata) goto respond; } - pmix_output_verbose(2, pmix_client_globals.get_verbose, - "%s:%d REQUESTING DATA FROM SERVER FOR %s:%d KEY %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, - cb->pname.nspace, cb->pname.rank, cb->key); + pmix_output_verbose(2, pmix_client_globals.get_output, + "%s REQUESTING DATA FROM SERVER FOR %s KEY %s", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(cb->proc), cb->key); /* track the callback object */ pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_pub.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_pub.c index cee3dcaf940..bd6795eac5e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_pub.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_pub.c @@ -660,7 +660,7 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda for (j=0; j < cb->nvals; j++) { if (0 == strcmp(pdata[i].key, tgt[j].key)) { /* transfer the publishing proc id */ - (void)strncpy(tgt[j].proc.nspace, pdata[i].proc.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(tgt[j].proc.nspace, pdata[i].proc.nspace, PMIX_MAX_NSLEN); tgt[j].proc.rank = pdata[i].proc.rank; /* transfer the value to the pmix_info_t */ PMIX_BFROPS_VALUE_XFER(cb->status, pmix_client_globals.myserver, &tgt[j].value, &pdata[i].value); diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_spawn.c index f04dcec34d1..b7aefc4316f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_spawn.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -49,6 +49,7 @@ #include "src/class/pmix_list.h" #include "src/threads/threads.h" #include "src/mca/bfrops/bfrops.h" +#include "src/mca/pnet/base/base.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -64,7 +65,7 @@ static void spawn_cbfunc(pmix_status_t status, char nspace[], void *cbdata); PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, const pmix_app_t apps[], size_t napps, - char nspace[]) + pmix_nspace_t nspace) { pmix_status_t rc; pmix_cb_t *cb; @@ -104,7 +105,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; if (NULL != nspace) { - (void)strncpy(nspace, cb->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(nspace, cb->pname.nspace, PMIX_MAX_NSLEN); } PMIX_RELEASE(cb); @@ -119,6 +120,12 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin pmix_cmd_t cmd = PMIX_SPAWNNB_CMD; pmix_status_t rc; pmix_cb_t *cb; + size_t n, m; + pmix_app_t *aptr; + bool jobenvars = false; + char *harvest[2] = {"PMIX_MCA_", NULL}; + pmix_kval_t *kv; + pmix_list_t ilist; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -137,6 +144,70 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin } PMIX_RELEASE_THREAD(&pmix_global_lock); + /* check job info for directives */ + if (NULL != job_info) { + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&job_info[n], PMIX_SETUP_APP_ENVARS)) { + PMIX_CONSTRUCT(&ilist, pmix_list_t); + rc = pmix_pnet_base_harvest_envars(harvest, NULL, &ilist); + if (PMIX_SUCCESS != rc) { + PMIX_LIST_DESTRUCT(&ilist); + return rc; + } + PMIX_LIST_FOREACH(kv, &ilist, pmix_kval_t) { + /* cycle across all the apps and set this envar */ + for (m=0; m < napps; m++) { + aptr = (pmix_app_t*)&apps[m]; + pmix_setenv(kv->value->data.envar.envar, + kv->value->data.envar.value, + true, &aptr->env); + } + } + jobenvars = true; + PMIX_LIST_DESTRUCT(&ilist); + break; + } + } + } + + for (n=0; n < napps; n++) { + /* do a quick check of the apps directive array to ensure + * the ninfo field has been set */ + aptr = (pmix_app_t*)&apps[n]; + if (NULL != aptr->info && 0 == aptr->ninfo) { + /* look for the info marked as "end" */ + m = 0; + while (!(PMIX_INFO_IS_END(&aptr->info[m])) && m < SIZE_MAX) { + ++m; + } + if (SIZE_MAX == m) { + /* nothing we can do */ + return PMIX_ERR_BAD_PARAM; + } + aptr->ninfo = m; + } + if (!jobenvars) { + for (m=0; m < aptr->ninfo; m++) { + if (PMIX_CHECK_KEY(&aptr->info[m], PMIX_SETUP_APP_ENVARS)) { + PMIX_CONSTRUCT(&ilist, pmix_list_t); + rc = pmix_pnet_base_harvest_envars(harvest, NULL, &ilist); + if (PMIX_SUCCESS != rc) { + PMIX_LIST_DESTRUCT(&ilist); + return rc; + } + PMIX_LIST_FOREACH(kv, &ilist, pmix_kval_t) { + pmix_setenv(kv->value->data.envar.envar, + kv->value->data.envar.value, + true, &aptr->env); + } + jobenvars = true; + PMIX_LIST_DESTRUCT(&ilist); + break; + } + } + } + } + msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, @@ -240,27 +311,25 @@ static void wait_cbfunc(struct pmix_peer_t *pr, PMIX_ERROR_LOG(rc); ret = rc; } - if (PMIX_SUCCESS == ret) { - /* unpack the namespace */ - cnt = 1; - PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, - buf, &n2, &cnt, PMIX_STRING); + /* unpack the namespace */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &n2, &cnt, PMIX_STRING); + if (PMIX_SUCCESS != rc && PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + ret = rc; + } + pmix_output_verbose(1, pmix_globals.debug_output, + "pmix:client recv '%s'", n2); + + if (NULL != n2) { + /* protect length */ + pmix_strncpy(nspace, n2, PMIX_MAX_NSLEN); + free(n2); + PMIX_GDS_STORE_JOB_INFO(rc, pmix_globals.mypeer, nspace, buf); + /* extract and process any job-related info for this nspace */ if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - ret = rc; - } - pmix_output_verbose(1, pmix_globals.debug_output, - "pmix:client recv '%s'", n2); - - if (NULL != n2) { - /* protect length */ - (void)strncpy(nspace, n2, PMIX_MAX_NSLEN); - free(n2); - PMIX_GDS_STORE_JOB_INFO(rc, pmix_globals.mypeer, nspace, buf); - /* extract and process any job-related info for this nspace */ - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - } } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_control.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_control.c index 26123cca2a6..1c2f74308a0 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_control.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_control.c @@ -4,6 +4,8 @@ * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -85,7 +87,7 @@ static void query_cbfunc(struct pmix_peer_t *peer, /* unpack any returned data */ cnt = 1; PMIX_BFROPS_UNPACK(rc, peer, buf, &results->ninfo, &cnt, PMIX_SIZE); - if (PMIX_SUCCESS != rc) { + if (PMIX_SUCCESS != rc && PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); goto complete; } @@ -105,6 +107,8 @@ static void query_cbfunc(struct pmix_peer_t *peer, /* release the caller */ if (NULL != cd->cbfunc) { cd->cbfunc(results->status, results->info, results->ninfo, cd->cbdata, relcbfunc, results); + } else { + PMIX_RELEASE(results); } PMIX_RELEASE(cd); } @@ -332,6 +336,12 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm return PMIX_ERR_INIT; } + /* sanity check */ + if (NULL == monitor) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_BAD_PARAM; + } + /* if we are the server, then we just issue the request and * return the response */ if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && @@ -355,6 +365,19 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm } PMIX_RELEASE_THREAD(&pmix_global_lock); + /* if the monitor is PMIX_SEND_HEARTBEAT, then send it */ + if (0 == strncmp(monitor->key, PMIX_SEND_HEARTBEAT, PMIX_MAX_KEYLEN)) { + msg = PMIX_NEW(pmix_buffer_t); + if (NULL == msg) { + return PMIX_ERR_NOMEM; + } + PMIX_PTL_SEND_ONEWAY(rc, pmix_client_globals.myserver, msg, PMIX_PTL_TAG_HEARTBEAT); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(msg); + } + return rc; + } + /* if we are a client, then relay this request to the server */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_data.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_data.c index 39ca2bcf291..a9e32c661f6 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_data.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_data.c @@ -98,7 +98,7 @@ static pmix_peer_t* find_peer(const pmix_proc_t *proc) /* didn't find it, so try to get the library version of the target * from the host - the result will be cached, so we will only have * to retrieve it once */ - (void)strncpy(wildcard.nspace, proc->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(wildcard.nspace, proc->nspace, PMIX_MAX_NSLEN); wildcard.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&wildcard, PMIX_BFROPS_MODULE, NULL, 0, &value))) { /* couldn't get it - nothing we can do */ @@ -110,7 +110,7 @@ static pmix_peer_t* find_peer(const pmix_proc_t *proc) PMIX_RELEASE(value); return NULL; } - peer->nptr = PMIX_NEW(pmix_nspace_t); + peer->nptr = PMIX_NEW(pmix_namespace_t); if (NULL == peer->nptr) { PMIX_RELEASE(peer); PMIX_RELEASE(value); @@ -137,15 +137,9 @@ static pmix_peer_t* find_peer(const pmix_proc_t *proc) return pmix_client_globals.myserver; } - /* if the target is another member of my nspace, then - * they must be using the same version */ - if (0 == strncmp(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN)) { - return pmix_globals.mypeer; - } - /* try to get the library version of this peer - the result will be * cached, so we will only have to retrieve it once */ - (void)strncpy(wildcard.nspace, proc->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(wildcard.nspace, proc->nspace, PMIX_MAX_NSLEN); wildcard.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&wildcard, PMIX_BFROPS_MODULE, NULL, 0, &value))) { /* couldn't get it - nothing we can do */ @@ -157,7 +151,7 @@ static pmix_peer_t* find_peer(const pmix_proc_t *proc) PMIX_RELEASE(value); return NULL; } - peer->nptr = PMIX_NEW(pmix_nspace_t); + peer->nptr = PMIX_NEW(pmix_namespace_t); if (NULL == peer->nptr) { PMIX_RELEASE(peer); PMIX_RELEASE(value); diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.h b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.h index fcc5f7b3da8..3525c5fb471 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.h +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_iof.h @@ -148,7 +148,7 @@ pmix_iof_fd_always_ready(int fd) "defining endpt: file %s line %d fd %d", \ __FILE__, __LINE__, (fid))); \ PMIX_CONSTRUCT((snk), pmix_iof_sink_t); \ - (void)strncpy((snk)->name.nspace, (nm)->nspace, PMIX_MAX_NSLEN); \ + pmix_strncpy((snk)->name.nspace, (nm)->nspace, PMIX_MAX_NSLEN); \ (snk)->name.rank = (nm)->rank; \ (snk)->tag = (tg); \ if (0 <= (fid)) { \ diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_log.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_log.c index 582a64e4eaa..0c5aa760fc7 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_log.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_log.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -82,14 +82,18 @@ PMIX_EXPORT pmix_status_t PMIx_Log(const pmix_info_t data[], size_t ndata, * recv routine so we know which callback to use when * the return message is recvd */ PMIX_CONSTRUCT(&cb, pmix_cb_t); - if (PMIX_SUCCESS != (rc = PMIx_Log_nb(data, ndata, directives, - ndirs, opcbfunc, &cb))) { + rc = PMIx_Log_nb(data, ndata, directives, ndirs, opcbfunc, &cb); + if (PMIX_SUCCESS == rc) { + /* wait for the operation to complete */ + PMIX_WAIT_THREAD(&cb.lock); + } else { PMIX_DESTRUCT(&cb); + if (PMIX_OPERATION_SUCCEEDED == rc) { + rc = PMIX_SUCCESS; + } return rc; } - /* wait for the operation to complete */ - PMIX_WAIT_THREAD(&cb.lock); rc = cb.status; PMIX_DESTRUCT(&cb); diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c index 33bc025dafd..1f217d18a5d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -53,6 +53,8 @@ static void query_cbfunc(struct pmix_peer_t *peer, pmix_status_t rc; pmix_shift_caddy_t *results; int cnt; + size_t n; + pmix_kval_t *kv; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query cback from server"); @@ -88,6 +90,19 @@ static void query_cbfunc(struct pmix_peer_t *peer, results->status = rc; goto complete; } + /* locally cache the results */ + for (n=0; n < results->ninfo; n++) { + kv = PMIX_NEW(pmix_kval_t); + kv->key = strdup(results->info[n].key); + PMIX_VALUE_CREATE(kv->value, 1); + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + kv->value, &results->info[n].value); + + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, PMIX_INTERNAL, + kv); + PMIX_RELEASE(kv); // maintain accounting + } } complete: @@ -100,6 +115,22 @@ static void query_cbfunc(struct pmix_peer_t *peer, PMIX_RELEASE(cd); } +static void _local_relcb(void *cbdata) +{ + pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata; + PMIX_RELEASE(cd); +} + +static void _local_cbfunc(int sd, short args, void *cbdata) +{ + pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata; + if (NULL != cd->cbfunc) { + cd->cbfunc(cd->status, cd->info, cd->ninfo, cd->cbdata, _local_relcb, cd); + return; + } + PMIX_RELEASE(cd); +} + PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nqueries, pmix_info_cbfunc_t cbfunc, void *cbdata) @@ -108,6 +139,12 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque pmix_cmd_t cmd = PMIX_QUERY_CMD; pmix_buffer_t *msg; pmix_status_t rc; + pmix_cb_t cb; + size_t n, p; + pmix_list_t results; + pmix_kval_t *kv, *kvnxt; + pmix_proc_t proc; + bool rank_given; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -124,6 +161,133 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque return PMIX_ERR_BAD_PARAM; } + /* do a quick check of the qualifiers array to ensure + * the nqual field has been set */ + for (n=0; n < nqueries; n++) { + if (NULL != queries[n].qualifiers && 0 == queries[n].nqual) { + /* look for the info marked as "end" */ + p = 0; + while (!(PMIX_INFO_IS_END(&queries[n].qualifiers[p])) && p < SIZE_MAX) { + ++p; + } + if (SIZE_MAX == p) { + /* nothing we can do */ + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_BAD_PARAM; + } + queries[n].nqual = p; + } + } + + /* setup the list of local results */ + PMIX_CONSTRUCT(&results, pmix_list_t); + + /* check the directives to see if they want us to refresh + * the local cached results - if we wanted to optimize this + * more, we would check each query and allow those that don't + * want to be refreshed to be executed locally, and those that + * did would be sent to the host. However, for now we simply + * */ + memset(proc.nspace, 0, PMIX_MAX_NSLEN+1); + proc.rank = PMIX_RANK_INVALID; + for (n=0; n < nqueries; n++) { + rank_given = false; + for (p=0; p < queries[n].nqual; p++) { + if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_QUERY_REFRESH_CACHE)) { + if (PMIX_INFO_TRUE(&queries[n].qualifiers[p])) { + PMIX_LIST_DESTRUCT(&results); + goto query; + } + } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_PROCID)) { + PMIX_LOAD_NSPACE(proc.nspace, queries[n].qualifiers[p].value.data.proc->nspace); + proc.rank = queries[n].qualifiers[p].value.data.proc->rank; + rank_given = true; + } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_NSPACE)) { + PMIX_LOAD_NSPACE(proc.nspace, queries[n].qualifiers[p].value.data.string); + } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_RANK)) { + proc.rank = queries[n].qualifiers[p].value.data.rank; + rank_given = true; + } + } + /* we get here if a refresh isn't required - first try a local + * "get" on the data to see if we already have it */ + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.copy = false; + /* if they are querying about node or app values not directly + * associated with a proc (i.e., they didn't specify the proc), + * then we obtain those by leaving the proc info as undefined */ + if (!rank_given) { + proc.rank = PMIX_RANK_UNDEF; + cb.proc = &proc; + } else { + /* set the proc */ + if (PMIX_RANK_INVALID == proc.rank && + 0 == strlen(proc.nspace)) { + /* use our id */ + cb.proc = &pmix_globals.myid; + } else { + if (0 == strlen(proc.nspace)) { + /* use our nspace */ + PMIX_LOAD_NSPACE(cb.proc->nspace, pmix_globals.myid.nspace); + } + if (PMIX_RANK_INVALID == proc.rank) { + /* user the wildcard rank */ + proc.rank = PMIX_RANK_WILDCARD; + } + cb.proc = &proc; + } + } + for (p=0; NULL != queries[n].keys[p]; p++) { + cb.key = queries[n].keys[p]; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS != rc) { + /* needs to be passed to the host */ + PMIX_LIST_DESTRUCT(&results); + PMIX_DESTRUCT(&cb); + goto query; + } + /* need to retain this result */ + PMIX_LIST_FOREACH_SAFE(kv, kvnxt, &cb.kvs, pmix_kval_t) { + pmix_list_remove_item(&cb.kvs, &kv->super); + pmix_list_append(&results, &kv->super); + } + PMIX_DESTRUCT(&cb); + } + } + + /* if we get here, then all queries were completely locally + * resolved, so construct the results for return */ + cd = PMIX_NEW(pmix_query_caddy_t); + cd->cbfunc = cbfunc; + cd->cbdata = cbdata; + cd->status = PMIX_SUCCESS; + cd->ninfo = pmix_list_get_size(&results); + PMIX_INFO_CREATE(cd->info, cd->ninfo); + n = 0; + PMIX_LIST_FOREACH_SAFE(kv, kvnxt, &results, pmix_kval_t) { + PMIX_LOAD_KEY(cd->info[n].key, kv->key); + rc = pmix_value_xfer(&cd->info[n].value, kv->value); + if (PMIX_SUCCESS != rc) { + cd->status = rc; + PMIX_INFO_FREE(cd->info, cd->ninfo); + break; + } + ++n; + } + /* done with the list of results */ + PMIX_LIST_DESTRUCT(&results); + /* we need to thread-shift as we are not allowed to + * execute the callback function prior to returning + * from the API */ + PMIX_THREADSHIFT(cd, _local_cbfunc); + /* regardless of the result of the query, we return + * PMIX_SUCCESS here to indicate that the operation + * was accepted for processing */ + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_SUCCESS; + + + query: /* if we are the server, then we just issue the query and * return the response */ if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && @@ -135,10 +299,10 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque } pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query handed to RM"); - pmix_host_server.query(&pmix_globals.myid, - queries, nqueries, - cbfunc, cbdata); - return PMIX_SUCCESS; + rc = pmix_host_server.query(&pmix_globals.myid, + queries, nqueries, + cbfunc, cbdata); + return rc; } /* if we aren't connected, don't attempt to send */ @@ -177,6 +341,7 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque PMIX_RELEASE(cd); return rc; } + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query sending to server"); PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c index 7ee50b50fa9..48a2d5c7a19 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_strings.c @@ -12,6 +12,8 @@ * Copyright (c) 2007-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -241,25 +243,24 @@ PMIX_EXPORT const char* PMIx_IOF_channel_string(pmix_iof_channel_t channel) { size_t cnt=0; - memset(answer, 0, sizeof(answer)); if (PMIX_FWD_STDIN_CHANNEL & channel) { - strncpy(&answer[cnt], "STDIN ", strlen("STDIN ")); + strcpy(&answer[cnt], "STDIN "); cnt += strlen("STDIN "); } if (PMIX_FWD_STDOUT_CHANNEL & channel) { - strncpy(&answer[cnt], "STDOUT ", strlen("STDOUT ")); + strcpy(&answer[cnt], "STDOUT "); cnt += strlen("STDOUT "); } if (PMIX_FWD_STDERR_CHANNEL & channel) { - strncpy(&answer[cnt], "STDERR ", strlen("STDERR ")); + strcpy(&answer[cnt], "STDERR "); cnt += strlen("STDERR "); } if (PMIX_FWD_STDDIAG_CHANNEL & channel) { - strncpy(&answer[cnt], "STDDIAG ", strlen("STDDIAG ")); + strcpy(&answer[cnt], "STDDIAG "); cnt += strlen("STDDIAG "); } if (0 == cnt) { - strncpy(&answer[cnt], "NONE", strlen("NONE")); + strcpy(&answer[cnt], "NONE"); } return answer; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event.h b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event.h index b4ee30b0c0e..6ba6b774932 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event.h +++ b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event.h @@ -39,6 +39,13 @@ #define PMIX_EVENT_ORDER_PREPEND 0x10 #define PMIX_EVENT_ORDER_APPEND 0x20 +/* define an internal attribute for marking that the + * server processed an event before passing it up + * to its host in case it comes back down - avoids + * infinite loop */ +#define PMIX_SERVER_INTERNAL_NOTIFY "pmix.srvr.internal.notify" + + /* define a struct for tracking registration ranges */ typedef struct { pmix_data_range_t range; @@ -117,8 +124,15 @@ typedef struct pmix_event_chain_t { bool endchain; pmix_proc_t source; pmix_data_range_t range; + /* When generating events, callers can specify + * the range of targets to receive notifications. + */ + pmix_proc_t *targets; + size_t ntargets; + /* the processes that we affected by the event */ pmix_proc_t *affected; size_t naffected; + /* any info provided by the event generator */ pmix_info_t *info; size_t ninfo; size_t nallocated; @@ -130,6 +144,13 @@ typedef struct pmix_event_chain_t { } pmix_event_chain_t; PMIX_CLASS_DECLARATION(pmix_event_chain_t); +/* prepare a chain for processing by cycling across provided + * info structs and translating those supported by the event + * system into the chain object*/ +pmix_status_t pmix_prep_event_chain(pmix_event_chain_t *chain, + const pmix_info_t *info, size_t ninfo, + bool xfer); + /* invoke the error handler that is registered against the given * status, passing it the provided info on the procs that were * affected, plus any additional info provided by the server */ @@ -146,68 +167,89 @@ bool pmix_notify_check_affected(pmix_proc_t *interested, size_t ninterested, pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, const pmix_proc_t *source, pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, + const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); void pmix_event_timeout_cb(int fd, short flags, void *arg); -#define PMIX_REPORT_EVENT(e, p, r, f) \ - do { \ - pmix_event_chain_t *ch, *cp; \ - size_t n, ninfo; \ - pmix_info_t *info; \ - pmix_proc_t proc; \ - \ - ch = NULL; \ - /* see if we already have this event cached */ \ - PMIX_LIST_FOREACH(cp, &pmix_globals.cached_events, pmix_event_chain_t) { \ - if (cp->status == (e)) { \ - ch = cp; \ - break; \ - } \ - } \ - if (NULL == ch) { \ - /* nope - need to add it */ \ - ch = PMIX_NEW(pmix_event_chain_t); \ - ch->status = (e); \ - ch->range = (r); \ - (void)strncpy(ch->source.nspace, \ - (p)->nptr->nspace, \ - PMIX_MAX_NSLEN); \ - ch->source.rank = (p)->info->pname.rank; \ - ch->ninfo = 0; \ - ch->nallocated = 2; \ - ch->final_cbfunc = (f); \ - ch->final_cbdata = ch; \ - PMIX_INFO_CREATE(ch->info, ch->nallocated); \ - /* cache it */ \ - pmix_list_append(&pmix_globals.cached_events, &ch->super); \ - ch->timer_active = true; \ - pmix_event_assign(&ch->ev, pmix_globals.evbase, -1, 0, \ - pmix_event_timeout_cb, ch); \ - PMIX_POST_OBJECT(ch); \ - pmix_event_add(&ch->ev, &pmix_globals.event_window); \ - } else { \ - /* add this peer to the array of sources */ \ - (void)strncpy(proc.nspace, (p)->nptr->nspace, PMIX_MAX_NSLEN); \ - proc.rank = (p)->info->pname.rank; \ - ninfo = ch->nallocated + 1; \ - PMIX_INFO_CREATE(info, ninfo); \ - /* must keep the hdlr name and return object at the end, so prepend */ \ - PMIX_INFO_LOAD(&info[0], PMIX_PROCID, \ - &proc, PMIX_PROC); \ - for (n=0; n < ch->ninfo; n++) { \ - PMIX_INFO_XFER(&info[n+1], &ch->info[n]); \ - } \ - PMIX_INFO_FREE(ch->info, ch->nallocated); \ - ch->nallocated = ninfo; \ - ch->info = info; \ - ch->ninfo = ninfo - 2; \ - /* reset the timer */ \ - pmix_event_del(&ch->ev); \ - PMIX_POST_OBJECT(ch); \ - pmix_event_add(&ch->ev, &pmix_globals.event_window); \ - } \ +#define PMIX_REPORT_EVENT(e, p, r, f) \ + do { \ + pmix_event_chain_t *ch, *cp; \ + size_t n, ninfo; \ + pmix_info_t *info; \ + pmix_proc_t proc; \ + \ + ch = NULL; \ + /* see if we already have this event cached */ \ + PMIX_LIST_FOREACH(cp, &pmix_globals.cached_events, pmix_event_chain_t) { \ + if (cp->status == (e)) { \ + ch = cp; \ + break; \ + } \ + } \ + if (NULL == ch) { \ + /* nope - need to add it */ \ + ch = PMIX_NEW(pmix_event_chain_t); \ + ch->status = (e); \ + ch->range = (r); \ + PMIX_LOAD_PROCID(&ch->source, (p)->nptr->nspace, \ + (p)->info->pname.rank); \ + PMIX_PROC_CREATE(ch->affected, 1); \ + ch->naffected = 1; \ + PMIX_LOAD_PROCID(ch->affected, (p)->nptr->nspace, \ + (p)->info->pname.rank); \ + /* if I'm a client or tool and this is my server, then we don't */ \ + /* set the targets - otherwise, we do */ \ + if (!PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && \ + !PMIX_CHECK_PROCID(&pmix_client_globals.myserver->info->pname, \ + &(p)->info->pname)) { \ + PMIX_PROC_CREATE(ch->targets, 1); \ + ch->ntargets = 1; \ + PMIX_LOAD_PROCID(ch->targets, (p)->nptr->nspace, PMIX_RANK_WILDCARD); \ + } \ + /* if this is lost-connection-to-server, then we let it go to */ \ + /* the default event handler - otherwise, we don't */ \ + if (PMIX_ERR_LOST_CONNECTION_TO_SERVER != (e) && \ + PMIX_ERR_UNREACH != (e)) { \ + ch->ninfo = 1; \ + ch->nallocated = 3; \ + PMIX_INFO_CREATE(ch->info, ch->nallocated); \ + /* mark for non-default handlers only */ \ + PMIX_INFO_LOAD(&ch->info[0], PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); \ + } else { \ + ch->nallocated = 2; \ + PMIX_INFO_CREATE(ch->info, ch->nallocated); \ + } \ + ch->final_cbfunc = (f); \ + ch->final_cbdata = ch; \ + /* cache it */ \ + pmix_list_append(&pmix_globals.cached_events, &ch->super); \ + ch->timer_active = true; \ + pmix_event_assign(&ch->ev, pmix_globals.evbase, -1, 0, \ + pmix_event_timeout_cb, ch); \ + PMIX_POST_OBJECT(ch); \ + pmix_event_add(&ch->ev, &pmix_globals.event_window); \ + } else { \ + /* add this peer to the array of sources */ \ + pmix_strncpy(proc.nspace, (p)->nptr->nspace, PMIX_MAX_NSLEN); \ + proc.rank = (p)->info->pname.rank; \ + ninfo = ch->nallocated + 1; \ + PMIX_INFO_CREATE(info, ninfo); \ + /* must keep the hdlr name and return object at the end, so prepend */ \ + PMIX_INFO_LOAD(&info[0], PMIX_PROCID, \ + &proc, PMIX_PROC); \ + for (n=0; n < ch->ninfo; n++) { \ + PMIX_INFO_XFER(&info[n+1], &ch->info[n]); \ + } \ + PMIX_INFO_FREE(ch->info, ch->nallocated); \ + ch->nallocated = ninfo; \ + ch->info = info; \ + ch->ninfo = ninfo - 2; \ + /* reset the timer */ \ + pmix_event_del(&ch->ev); \ + PMIX_POST_OBJECT(ch); \ + pmix_event_add(&ch->ev, &pmix_globals.event_window); \ + } \ } while(0) diff --git a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c index f0ab4c21f0e..c667489394c 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * * $COPYRIGHT$ @@ -30,7 +30,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, const pmix_proc_t *source, pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, + const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); /* if we are a client, we call this function to notify the server of @@ -39,7 +39,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status, const pmix_proc_t *source, pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, + const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; @@ -51,17 +51,22 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status, return PMIX_ERR_INIT; } - if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && !PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { PMIX_RELEASE_THREAD(&pmix_global_lock); + + pmix_output_verbose(2, pmix_server_globals.event_output, + "pmix_server_notify_event source = %s:%d event_status = %s", + (NULL == source) ? "UNKNOWN" : source->nspace, + (NULL == source) ? PMIX_RANK_WILDCARD : source->rank, PMIx_Error_string(status)); + rc = pmix_server_notify_client_of_event(status, source, range, info, ninfo, cbfunc, cbdata); - pmix_output_verbose(2, pmix_server_globals.event_output, - "pmix_server_notify_event source = %s:%d event_status = %d, rc= %d", - (NULL == source) ? "UNKNOWN" : source->nspace, - (NULL == source) ? PMIX_RANK_WILDCARD : source->rank, status, rc); + + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { + PMIX_ERROR_LOG(rc); + } return rc; } @@ -71,14 +76,17 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status, return PMIX_ERR_UNREACH; } PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_client_globals.event_output, + "pmix_client_notify_event source = %s:%d event_status =%d", + (NULL == source) ? pmix_globals.myid.nspace : source->nspace, + (NULL == source) ? pmix_globals.myid.rank : source->rank, status); rc = notify_server_of_event(status, source, range, info, ninfo, cbfunc, cbdata); - pmix_output_verbose(2, pmix_client_globals.event_output, - "pmix_client_notify_event source = %s:%d event_status =%d, rc=%d", - (NULL == source) ? pmix_globals.myid.nspace : source->nspace, - (NULL == source) ? pmix_globals.myid.rank : source->rank, status, rc); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } return rc; } @@ -102,11 +110,54 @@ static void notify_event_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, PMIX_RELEASE(cb); } +static pmix_status_t notify_event_cache(pmix_notify_caddy_t *cd) +{ + pmix_status_t rc; + int j; + pmix_notify_caddy_t *pk; + int idx; + time_t etime; + + /* add to our cache */ + rc = pmix_hotel_checkin(&pmix_globals.notifications, cd, &cd->room); + /* if there wasn't room, then search for the longest tenured + * occupant and evict them */ + if (PMIX_SUCCESS != rc) { + etime = 0; + idx = -1; + for (j=0; j < pmix_globals.max_events; j++) { + pmix_hotel_knock(&pmix_globals.notifications, j, (void**)&pk); + if (NULL == pk) { + /* hey, there is room! */ + pmix_hotel_checkin_with_res(&pmix_globals.notifications, cd, &cd->room); + return PMIX_SUCCESS; + } + /* check the age */ + if (0 == j) { + etime = pk->ts; + idx = j; + } else { + if (difftime(pk->ts, etime) < 0) { + etime = pk->ts; + idx = j; + } + } + } + if (0 <= idx) { + /* we found the oldest occupant - evict it */ + pmix_hotel_checkout_and_return_occupant(&pmix_globals.notifications, idx, (void**)&pk); + PMIX_RELEASE(pk); + rc = pmix_hotel_checkin(&pmix_globals.notifications, cd, &cd->room); + } + } + return rc; +} + /* as a client, we pass the notification to our server */ static pmix_status_t notify_server_of_event(pmix_status_t status, const pmix_proc_t *source, pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, + const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_status_t rc; @@ -115,11 +166,13 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_cb_t *cb; pmix_event_chain_t *chain; size_t n; - pmix_notify_caddy_t *cd, *rbout; + pmix_notify_caddy_t *cd; pmix_output_verbose(2, pmix_client_globals.event_output, - "client: notifying server %s:%d of status %s for range %s", + "[%s:%d] client: notifying server %s:%d of status %s for range %s", pmix_globals.myid.nspace, pmix_globals.myid.rank, + pmix_client_globals.myserver->info->pname.nspace, + pmix_client_globals.myserver->info->pname.rank, PMIx_Error_string(status), PMIx_Data_range_string(range)); if (PMIX_RANGE_PROC_LOCAL != range) { @@ -166,102 +219,56 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, /* setup for our own local callbacks */ chain = PMIX_NEW(pmix_event_chain_t); chain->status = status; - (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); chain->source.rank = pmix_globals.myid.rank; /* we always leave space for event hdlr name and a callback object */ chain->nallocated = ninfo + 2; PMIX_INFO_CREATE(chain->info, chain->nallocated); - - if (0 < ninfo) { - chain->ninfo = ninfo; - /* need to copy the info */ - for (n=0; n < ninfo; n++) { - PMIX_INFO_XFER(&chain->info[n], &info[n]); - } - } + /* prep the chain for processing */ + pmix_prep_event_chain(chain, info, ninfo, true); /* we need to cache this event so we can pass it into * ourselves should someone later register for it */ cd = PMIX_NEW(pmix_notify_caddy_t); cd->status = status; if (NULL == source) { - (void)strncpy(cd->source.nspace, "UNDEF", PMIX_MAX_NSLEN); + pmix_strncpy(cd->source.nspace, "UNDEF", PMIX_MAX_NSLEN); cd->source.rank = PMIX_RANK_UNDEF; } else { - (void)strncpy(cd->source.nspace, source->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(cd->source.nspace, source->nspace, PMIX_MAX_NSLEN); cd->source.rank = source->rank; } cd->range = range; if (0 < chain->ninfo) { cd->ninfo = chain->ninfo; PMIX_INFO_CREATE(cd->info, cd->ninfo); + cd->nondefault = chain->nondefault; /* need to copy the info */ for (n=0; n < cd->ninfo; n++) { PMIX_INFO_XFER(&cd->info[n], &chain->info[n]); - if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - cd->nondefault = true; - chain->nondefault = true; - } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) { - /* provides an array of pmix_proc_t identifying the procs - * that are to receive this notification, or a single pmix_proc_t */ - if (PMIX_DATA_ARRAY == cd->info[n].value.type && - NULL != cd->info[n].value.data.darray && - NULL != cd->info[n].value.data.darray->array) { - cd->ntargets = cd->info[n].value.data.darray->size; - PMIX_PROC_CREATE(cd->targets, cd->ntargets); - memcpy(cd->targets, cd->info[n].value.data.darray->array, cd->ntargets * sizeof(pmix_proc_t)); - } else if (PMIX_PROC == cd->info[n].value.type) { - cd->ntargets = 1; - PMIX_PROC_CREATE(cd->targets, cd->ntargets); - memcpy(cd->targets, cd->info[n].value.data.proc, sizeof(pmix_proc_t)); - } else { - /* this is an error */ - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - return PMIX_ERR_BAD_PARAM; - } - } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - PMIX_PROC_CREATE(cd->affected, 1); - if (NULL == cd->affected) { - rc = PMIX_ERR_NOMEM; - goto cleanup; - } - cd->naffected = 1; - memcpy(cd->affected, cd->info[n].value.data.proc, sizeof(pmix_proc_t)); - /* need to do the same for chain so it can be correctly processed */ - PMIX_PROC_CREATE(chain->affected, 1); - if (NULL == chain->affected) { - rc = PMIX_ERR_NOMEM; - goto cleanup; - } - chain->naffected = 1; - memcpy(chain->affected, cd->info[n].value.data.proc, sizeof(pmix_proc_t)); - } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_AFFECTED_PROCS, PMIX_MAX_KEYLEN)) { - cd->naffected = cd->info[n].value.data.darray->size; - PMIX_PROC_CREATE(cd->affected, cd->naffected); - if (NULL == cd->affected) { - cd->naffected = 0; - rc = PMIX_ERR_NOMEM; - goto cleanup; - } - memcpy(cd->affected, cd->info[n].value.data.darray->array, cd->naffected * sizeof(pmix_proc_t)); - /* need to do the same for chain so it can be correctly processed */ - chain->naffected = cd->info[n].value.data.darray->size; - PMIX_PROC_CREATE(chain->affected, chain->naffected); - if (NULL == chain->affected) { - chain->naffected = 0; - rc = PMIX_ERR_NOMEM; - goto cleanup; - } - memcpy(chain->affected, cd->info[n].value.data.darray->array, chain->naffected * sizeof(pmix_proc_t)); - } } } - - /* add to our cache */ - rbout = pmix_ring_buffer_push(&pmix_globals.notifications, cd); - /* if an older event was bumped, release it */ - if (NULL != rbout) { - PMIX_RELEASE(rbout); + if (NULL != chain->targets) { + cd->ntargets = chain->ntargets; + PMIX_PROC_CREATE(cd->targets, cd->ntargets); + memcpy(cd->targets, chain->targets, cd->ntargets * sizeof(pmix_proc_t)); + } + if (NULL != chain->affected) { + cd->naffected = chain->naffected; + PMIX_PROC_CREATE(cd->affected, cd->naffected); + if (NULL == cd->affected) { + cd->naffected = 0; + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + memcpy(cd->affected, chain->affected, cd->naffected * sizeof(pmix_proc_t)); + } + /* cache it */ + rc = notify_event_cache(cd); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(cd); + goto cleanup; } if (PMIX_RANGE_PROC_LOCAL != range && NULL != msg) { @@ -275,8 +282,10 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, cb->cbdata = cbdata; /* send to the server */ pmix_output_verbose(2, pmix_client_globals.event_output, - "client: notifying server %s:%d - sending", - pmix_globals.myid.nspace, pmix_globals.myid.rank); + "[%s:%d] client: notifying server %s:%d - sending", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + pmix_client_globals.myserver->info->pname.nspace, + pmix_client_globals.myserver->info->pname.rank); PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); if (PMIX_SUCCESS != rc) { @@ -284,7 +293,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, PMIX_RELEASE(cb); goto cleanup; } - } else { + } else if (NULL != cbfunc) { cbfunc(PMIX_SUCCESS, cbdata); } @@ -346,9 +355,9 @@ static void progress_local_event_hdlr(pmix_status_t status, /* save this handler's returned status */ if (NULL != chain->evhdlr->name) { - (void)strncpy(newinfo[cnt].key, chain->evhdlr->name, PMIX_MAX_KEYLEN); + pmix_strncpy(newinfo[cnt].key, chain->evhdlr->name, PMIX_MAX_KEYLEN); } else { - (void)strncpy(newinfo[cnt].key, "UNKNOWN", PMIX_MAX_KEYLEN); + pmix_strncpy(newinfo[cnt].key, "UNKNOWN", PMIX_MAX_KEYLEN); } newinfo[cnt].value.type = PMIX_STATUS; newinfo[cnt].value.data.status = status; @@ -425,7 +434,7 @@ static void progress_local_event_hdlr(pmix_status_t status, } while (pmix_list_get_end(&pmix_globals.events.multi_events) != (item = pmix_list_get_next(item))) { nxt = (pmix_event_hdlr_t*)item; - if (!pmix_notify_check_range(&nxt->rng, &chain->source) && + if (!pmix_notify_check_range(&nxt->rng, &chain->source) || !pmix_notify_check_affected(nxt->affected, nxt->naffected, chain->affected, chain->naffected)) { continue; @@ -623,10 +632,17 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) goto complete; } - /* check for directives */ - for (i=0; i < chain->ninfo; i++) { - if (0 == strncmp(chain->info[i].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - chain->nondefault = true; + /* if we are not a target, then we can simply ignore this event */ + if (NULL != chain->targets) { + found = false; + for (i=0; i < chain->ntargets; i++) { + if (PMIX_CHECK_PROCID(&chain->targets[i], &pmix_globals.myid)) { + found = true; + break; + } + } + if (!found) { + goto complete; } } @@ -743,6 +759,8 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) /* we still have to call their final callback */ if (NULL != chain->final_cbfunc) { chain->final_cbfunc(rc, chain->final_cbdata); + } else { + PMIX_RELEASE(chain); } return; @@ -789,17 +807,19 @@ static void local_cbfunc(pmix_status_t status, void *cbdata) static void _notify_client_event(int sd, short args, void *cbdata) { pmix_notify_caddy_t *cd = (pmix_notify_caddy_t*)cbdata; - pmix_notify_caddy_t *rbout; pmix_regevents_info_t *reginfoptr; pmix_peer_events_info_t *pr; pmix_event_chain_t *chain; - size_t n; + size_t n, nleft; bool matched, holdcd; pmix_buffer_t *bfr; pmix_cmd_t cmd = PMIX_NOTIFY_CMD; pmix_status_t rc; pmix_list_t trk; pmix_namelist_t *nm; + pmix_namespace_t *nptr, *tmp; + pmix_range_trkr_t rngtrk; + pmix_proc_t proc; /* need to acquire the object from its originating thread */ PMIX_ACQUIRE_OBJECT(cd); @@ -815,7 +835,7 @@ static void _notify_client_event(int sd, short args, void *cbdata) if (0 < cd->ninfo) { /* check for caching instructions */ for (n=0; n < cd->ninfo; n++) { - if (0 == strncmp(cd->info[n].key, PMIX_EVENT_DO_NOT_CACHE, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&cd->info[n], PMIX_EVENT_DO_NOT_CACHE)) { if (PMIX_INFO_TRUE(&cd->info[n])) { holdcd = false; } @@ -829,17 +849,92 @@ static void _notify_client_event(int sd, short args, void *cbdata) * the message until all local procs have received it, or it ages to * the point where it gets pushed out by more recent events */ PMIX_RETAIN(cd); - rbout = pmix_ring_buffer_push(&pmix_globals.notifications, cd); + rc = notify_event_cache(cd); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + } + + /* we may also have registered for events, so setup to check this + * against our registrations */ + chain = PMIX_NEW(pmix_event_chain_t); + chain->status = cd->status; + pmix_strncpy(chain->source.nspace, cd->source.nspace, PMIX_MAX_NSLEN); + chain->source.rank = cd->source.rank; + /* we always leave space for a callback object and + * the evhandler name. */ + chain->nallocated = cd->ninfo + 2; + PMIX_INFO_CREATE(chain->info, chain->nallocated); + /* prep the chain for processing */ + pmix_prep_event_chain(chain, cd->info, cd->ninfo, true); + + /* copy setup to the cd object */ + cd->nondefault = chain->nondefault; + if (NULL != chain->targets) { + cd->ntargets = chain->ntargets; + PMIX_PROC_CREATE(cd->targets, cd->ntargets); + memcpy(cd->targets, chain->targets, cd->ntargets * sizeof(pmix_proc_t)); + /* compute the number of targets that need to be notified */ + nleft = 0; + for (n=0; n < cd->ntargets; n++) { + /* if this is a single proc, then increment by one */ + if (PMIX_RANK_VALID >= cd->targets[n].rank) { + ++nleft; + } else { + /* look up the nspace for this proc */ + nptr = NULL; + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { + if (PMIX_CHECK_NSPACE(tmp->nspace, cd->targets[n].nspace)) { + nptr = tmp; + break; + } + } + /* if we don't yet know it, then nothing to do */ + if (NULL == nptr) { + nleft = SIZE_MAX; + break; + } + /* might notify all local members */ + nleft += nptr->nlocalprocs; + } + } + cd->nleft = nleft; + } + if (NULL != chain->affected) { + cd->naffected = chain->naffected; + PMIX_PROC_CREATE(cd->affected, cd->naffected); + if (NULL == cd->affected) { + cd->naffected = 0; + /* notify the caller */ + if (NULL != cd->cbfunc) { + cd->cbfunc(PMIX_ERR_NOMEM, cd->cbdata); + } + PMIX_RELEASE(cd); + PMIX_RELEASE(chain); + return; + } + memcpy(cd->affected, chain->affected, cd->naffected * sizeof(pmix_proc_t)); + } - /* if an older event was bumped, release it */ - if (NULL != rbout) { - PMIX_RELEASE(rbout); + /* if they provided a PMIX_EVENT_CUSTOM_RANGE info object but + * specified a range other than PMIX_RANGE_CUSTOM, then this + * is an error */ + if (PMIX_RANGE_CUSTOM != cd->range && NULL != cd->targets) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + /* notify the caller */ + if (NULL != cd->cbfunc) { + cd->cbfunc(PMIX_ERR_BAD_PARAM, cd->cbdata); } + PMIX_RELEASE(cd); + PMIX_RELEASE(chain); + return; } holdcd = false; if (PMIX_RANGE_PROC_LOCAL != cd->range) { PMIX_CONSTRUCT(&trk, pmix_list_t); + rngtrk.procs = NULL; + rngtrk.nprocs = 0; /* cycle across our registered events and send the message to * any client who registered for it */ PMIX_LIST_FOREACH(reginfoptr, &pmix_server_globals.events, pmix_regevents_info_t) { @@ -849,8 +944,7 @@ static void _notify_client_event(int sd, short args, void *cbdata) /* if this client was the source of the event, then * don't send it back as they will have processed it * when they generated it */ - if (0 == strncmp(cd->source.nspace, pr->peer->info->pname.nspace, PMIX_MAX_NSLEN) && - cd->source.rank == pr->peer->info->pname.rank) { + if (PMIX_CHECK_PROCID(&cd->source, &pr->peer->info->pname)) { continue; } /* if we have already notified this client, then don't do it again */ @@ -864,23 +958,24 @@ static void _notify_client_event(int sd, short args, void *cbdata) if (matched) { continue; } - /* if we were given specific targets, check if this is one */ - if (NULL != cd->targets) { - matched = false; - for (n=0; n < cd->ntargets; n++) { - if (0 != strncmp(pr->peer->info->pname.nspace, cd->targets[n].nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == cd->targets[n].rank || - pr->peer->info->pname.rank == cd->targets[n].rank) { - matched = true; - break; - } - } - if (!matched) { - /* do not notify this one */ - continue; - } + /* check if the affected procs (if given) match those they + * wanted to know about */ + if (!pmix_notify_check_affected(cd->affected, cd->naffected, + pr->affected, pr->naffected)) { + continue; + } + /* check the range */ + if (NULL == cd->targets) { + rngtrk.procs = &cd->source; + rngtrk.nprocs = 1; + } else { + rngtrk.procs = cd->targets; + rngtrk.nprocs = cd->ntargets; + } + rngtrk.range = cd->range; + PMIX_LOAD_PROCID(&proc, pr->peer->info->pname.nspace, pr->peer->info->pname.rank); + if (!pmix_notify_check_range(&rngtrk, &proc)) { + continue; } pmix_output_verbose(2, pmix_server_globals.event_output, "pmix_server: notifying client %s:%u on status %s", @@ -935,17 +1030,28 @@ static void _notify_client_event(int sd, short args, void *cbdata) continue; } } - PMIX_SERVER_QUEUE_REPLY(pr->peer, 0, bfr); + PMIX_SERVER_QUEUE_REPLY(rc, pr->peer, 0, bfr); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(bfr); + } + if (NULL != cd->targets && 0 < cd->nleft) { + /* track the number of targets we have left to notify */ + --cd->nleft; + /* if the event was cached and this is the last one, + * then evict this event from the cache */ + if (0 == cd->nleft) { + pmix_hotel_checkout(&pmix_globals.notifications, cd->room); + holdcd = false; + break; + } + } } } } PMIX_LIST_DESTRUCT(&trk); - if (PMIX_RANGE_LOCAL != cd->range && - 0 == strncmp(cd->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN) && - cd->source.rank == pmix_globals.myid.rank) { + if (PMIX_RANGE_LOCAL != cd->range && PMIX_CHECK_PROCID(&cd->source, &pmix_globals.myid)) { /* if we are the source, then we need to post this upwards as - * well so the host RM can broadcast it as necessary - we rely - * on the host RM to _not_ deliver this back to us! */ + * well so the host RM can broadcast it as necessary */ if (NULL != pmix_host_server.notify_event) { /* mark that we sent it upstairs so we don't release * the caddy until we return from the host RM */ @@ -953,85 +1059,10 @@ static void _notify_client_event(int sd, short args, void *cbdata) pmix_host_server.notify_event(cd->status, &cd->source, cd->range, cd->info, cd->ninfo, local_cbfunc, cd); } - } } - /* we may also have registered for events, so be sure to check this - * against our registrations */ - chain = PMIX_NEW(pmix_event_chain_t); - chain->status = cd->status; - (void)strncpy(chain->source.nspace, cd->source.nspace, PMIX_MAX_NSLEN); - chain->source.rank = cd->source.rank; - /* we always leave space for a callback object and - * the evhandler name. */ - chain->nallocated = cd->ninfo + 2; - PMIX_INFO_CREATE(chain->info, chain->nallocated); - if (0 < cd->ninfo) { - chain->ninfo = cd->ninfo; - /* need to copy the info */ - for (n=0; n < cd->ninfo; n++) { - PMIX_INFO_XFER(&chain->info[n], &cd->info[n]); - if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - cd->nondefault = true; - chain->nondefault = true; - } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) { - /* provides an array of pmix_proc_t identifying the procs - * that are to receive this notification, or a single pmix_proc_t */ - if (PMIX_DATA_ARRAY == cd->info[n].value.type && - NULL != cd->info[n].value.data.darray && - NULL != cd->info[n].value.data.darray->array) { - cd->ntargets = cd->info[n].value.data.darray->size; - PMIX_PROC_CREATE(cd->targets, cd->ntargets); - memcpy(cd->targets, cd->info[n].value.data.darray->array, cd->ntargets * sizeof(pmix_proc_t)); - } else if (PMIX_PROC == cd->info[n].value.type) { - cd->ntargets = 1; - PMIX_PROC_CREATE(cd->targets, cd->ntargets); - memcpy(cd->targets, cd->info[n].value.data.proc, sizeof(pmix_proc_t)); - } else { - /* this is an error */ - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - PMIX_RELEASE(chain); - return; - } - } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - PMIX_PROC_CREATE(cd->affected, 1); - if (NULL == cd->affected) { - PMIX_RELEASE(chain); - return; - } - cd->naffected = 1; - memcpy(cd->affected, cd->info[n].value.data.proc, sizeof(pmix_proc_t)); - /* need to do the same for chain so it can be correctly processed */ - PMIX_PROC_CREATE(chain->affected, 1); - if (NULL == chain->affected) { - PMIX_RELEASE(chain); - return; - } - chain->naffected = 1; - memcpy(chain->affected, cd->info[n].value.data.proc, sizeof(pmix_proc_t)); - } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_AFFECTED_PROCS, PMIX_MAX_KEYLEN)) { - cd->naffected = cd->info[n].value.data.darray->size; - PMIX_PROC_CREATE(cd->affected, cd->naffected); - if (NULL == cd->affected) { - cd->naffected = 0; - PMIX_RELEASE(chain); - return; - } - memcpy(cd->affected, cd->info[n].value.data.darray->array, cd->naffected * sizeof(pmix_proc_t)); - /* need to do the same for chain so it can be correctly processed */ - chain->naffected = cd->info[n].value.data.darray->size; - PMIX_PROC_CREATE(chain->affected, chain->naffected); - if (NULL == chain->affected) { - chain->naffected = 0; - PMIX_RELEASE(chain); - return; - } - memcpy(chain->affected, cd->info[n].value.data.darray->array, chain->naffected * sizeof(pmix_proc_t)); - } - } - } - /* process it */ + /* process it ourselves */ pmix_invoke_local_event_hdlr(chain); if (!holdcd) { @@ -1054,7 +1085,7 @@ static void _notify_client_event(int sd, short args, void *cbdata) pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, const pmix_proc_t *source, pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, + const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_notify_caddy_t *cd; @@ -1064,18 +1095,27 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, "pmix_server: notify client of event %s", PMIx_Error_string(status)); + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_PROXY) && + PMIX_CHECK_PROCID(info[n].value.data.proc, &pmix_globals.myid)) { + return PMIX_OPERATION_SUCCEEDED; + } + } + } + cd = PMIX_NEW(pmix_notify_caddy_t); cd->status = status; if (NULL == source) { - (void)strncpy(cd->source.nspace, "UNDEF", PMIX_MAX_NSLEN); + pmix_strncpy(cd->source.nspace, "UNDEF", PMIX_MAX_NSLEN); cd->source.rank = PMIX_RANK_UNDEF; } else { - (void)strncpy(cd->source.nspace, source->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(cd->source.nspace, source->nspace, PMIX_MAX_NSLEN); cd->source.rank = source->rank; } cd->range = range; /* have to copy the info to preserve it for future when cached */ - if (0 < ninfo) { + if (0 < ninfo && NULL != info) { cd->ninfo = ninfo; PMIX_INFO_CREATE(cd->info, cd->ninfo); /* need to copy the info */ @@ -1084,51 +1124,6 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, } } - /* check for directives */ - if (NULL != info) { - for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - cd->nondefault = true; - } else if (0 == strncmp(info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) { - /* provides an array of pmix_proc_t identifying the procs - * that are to receive this notification, or a single pmix_proc_t */ - if (PMIX_DATA_ARRAY == info[n].value.type && - NULL != info[n].value.data.darray && - NULL != info[n].value.data.darray->array) { - cd->ntargets = info[n].value.data.darray->size; - PMIX_PROC_CREATE(cd->targets, cd->ntargets); - memcpy(cd->targets, info[n].value.data.darray->array, cd->ntargets * sizeof(pmix_proc_t)); - } else if (PMIX_PROC == info[n].value.type) { - cd->ntargets = 1; - PMIX_PROC_CREATE(cd->targets, cd->ntargets); - memcpy(cd->targets, info[n].value.data.proc, sizeof(pmix_proc_t)); - } else { - /* this is an error */ - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - return PMIX_ERR_BAD_PARAM; - } - } - } - } - - /* - * If the range is PMIX_RANGE_NAMESPACE, then they should not have set a - * PMIX_EVENT_CUSTOM_RANGE info object or at least we should ignore it - */ - if (PMIX_RANGE_NAMESPACE == cd->range) { - if (cd->targets) { - PMIX_PROC_FREE(cd->targets, cd->ntargets); - } - PMIX_PROC_CREATE(cd->targets, 1); - cd->ntargets = 1; - cd->targets[0].rank = PMIX_RANK_WILDCARD; - if (NULL == source) { - strncpy(cd->targets[0].nspace, "UNDEF", PMIX_MAX_NSLEN); - } else { - strncpy(cd->targets[0].nspace, source->nspace, PMIX_MAX_NSLEN); - } - } - /* track the eventual callback info */ cd->cbfunc = cbfunc; cd->cbdata = cbdata; @@ -1155,37 +1150,34 @@ bool pmix_notify_check_range(pmix_range_trkr_t *rng, return true; } if (PMIX_RANGE_NAMESPACE == rng->range) { - if (0 == strncmp(pmix_globals.myid.nspace, proc->nspace, PMIX_MAX_NSLEN)) { - return true; + for (n=0; n < rng->nprocs; n++) { + if (PMIX_CHECK_NSPACE(rng->procs[n].nspace, proc->nspace)) { + return true; + } } return false; } if (PMIX_RANGE_PROC_LOCAL == rng->range) { - if (0 == strncmp(pmix_globals.myid.nspace, proc->nspace, PMIX_MAX_NSLEN) && - pmix_globals.myid.rank == proc->rank) { - return true; + for (n=0; n < rng->nprocs; n++) { + if (PMIX_CHECK_PROCID(&rng->procs[n], proc)) { + return true; + } } return false; } if (PMIX_RANGE_CUSTOM == rng->range) { - if (NULL != rng->procs) { - /* see if this proc was included */ - for (n=0; n < rng->nprocs; n++) { - if (0 != strncmp(rng->procs[n].nspace, proc->nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == rng->procs[n].rank || - rng->procs[n].rank == proc->rank) { - return true; - } + /* see if this proc was included */ + for (n=0; n < rng->nprocs; n++) { + if (0 != strncmp(rng->procs[n].nspace, proc->nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (PMIX_RANK_WILDCARD == rng->procs[n].rank || + rng->procs[n].rank == proc->rank) { + return true; } - /* if we get here, then this proc isn't in range */ - return false; - } else { - /* if they didn't give us a list, then assume - * everyone included */ - return true; } + /* if we get here, then this proc isn't in range */ + return false; } /* if it is anything else, then reject it */ @@ -1208,12 +1200,7 @@ bool pmix_notify_check_affected(pmix_proc_t *interested, size_t ninterested, /* check if the two overlap */ for (n=0; n < naffected; n++) { for (m=0; m < ninterested; m++) { - if (0 != strncmp(affected[n].nspace, interested[m].nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == interested[m].rank || - PMIX_RANK_WILDCARD == affected[n].rank || - affected[n].rank == interested[m].rank) { + if (PMIX_CHECK_PROCID(&affected[n], &interested[m])) { return true; } } @@ -1246,6 +1233,65 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg) } } +pmix_status_t pmix_prep_event_chain(pmix_event_chain_t *chain, + const pmix_info_t *info, size_t ninfo, + bool xfer) +{ + size_t n; + + if (NULL != info && 0 < ninfo) { + chain->ninfo = ninfo; + if (NULL == chain->info) { + PMIX_INFO_CREATE(chain->info, chain->ninfo); + } + /* need to copy the info */ + for (n=0; n < ninfo; n++) { + if (xfer) { + /* chain doesn't already have a copy of the info */ + PMIX_INFO_XFER(&chain->info[n], &info[n]); + } + /* look for specific directives */ + if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { + chain->nondefault = PMIX_INFO_TRUE(&info[n]); + } else if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_CUSTOM_RANGE)) { + /* provides an array of pmix_proc_t identifying the procs + * that are to receive this notification, or a single pmix_proc_t */ + if (PMIX_DATA_ARRAY == info[n].value.type && + NULL != info[n].value.data.darray && + NULL != info[n].value.data.darray->array) { + chain->ntargets = info[n].value.data.darray->size; + PMIX_PROC_CREATE(chain->targets, chain->ntargets); + memcpy(chain->targets, info[n].value.data.darray->array, chain->ntargets * sizeof(pmix_proc_t)); + } else if (PMIX_PROC == info[n].value.type) { + chain->ntargets = 1; + PMIX_PROC_CREATE(chain->targets, chain->ntargets); + memcpy(chain->targets, info[n].value.data.proc, sizeof(pmix_proc_t)); + } else { + /* this is an error */ + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_AFFECTED_PROC)) { + PMIX_PROC_CREATE(chain->affected, 1); + if (NULL == chain->affected) { + return PMIX_ERR_NOMEM; + } + chain->naffected = 1; + memcpy(chain->affected, info[n].value.data.proc, sizeof(pmix_proc_t)); + } else if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_AFFECTED_PROCS)) { + chain->naffected = info[n].value.data.darray->size; + PMIX_PROC_CREATE(chain->affected, chain->naffected); + if (NULL == chain->affected) { + chain->naffected = 0; + return PMIX_ERR_NOMEM; + } + memcpy(chain->affected, info[n].value.data.darray->array, chain->naffected * sizeof(pmix_proc_t)); + } + } + } + return PMIX_SUCCESS; +} + /**** CLASS INSTANTIATIONS ****/ static void sevcon(pmix_event_hdlr_t *p) @@ -1328,6 +1374,8 @@ static void chcon(pmix_event_chain_t *p) p->source.rank = PMIX_RANK_UNDEF; p->nondefault = false; p->endchain = false; + p->targets = NULL; + p->ntargets = 0; p->range = PMIX_RANGE_UNDEF; p->affected = NULL; p->naffected = 0; @@ -1345,6 +1393,9 @@ static void chdes(pmix_event_chain_t *p) if (p->timer_active) { pmix_event_del(&p->ev); } + if (NULL != p->targets) { + PMIX_PROC_FREE(p->targets, p->ntargets); + } if (NULL != p->affected) { PMIX_PROC_FREE(p->affected, p->naffected); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c index d1c95358ffb..be2346048d8 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -78,6 +78,8 @@ PMIX_CLASS_INSTANCE(pmix_rshift_caddy_t, static void check_cached_events(pmix_rshift_caddy_t *cd); +/* catch the event registration response message from the + * server and process it */ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { @@ -95,8 +97,14 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, PMIX_BFROPS_UNPACK(rc, peer, buf, &ret, &cnt, PMIX_STATUS); if ((PMIX_SUCCESS != rc) || (PMIX_SUCCESS != ret)) { - PMIX_ERROR_LOG(rc); - /* remove the err handler and call the error handler reg completion callback fn.*/ + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } else { + PMIX_ERROR_LOG(ret); + } + /* remove the err handler and call the error handler + * reg completion callback fn so the requestor + * doesn't hang */ if (NULL == rb->list) { if (NULL != rb->hdlr) { PMIX_RELEASE(rb->hdlr); @@ -293,11 +301,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) PMIX_INFO_CREATE(cd2->info, cd2->ninfo); n=0; PMIX_LIST_FOREACH(ixfer, xfer, pmix_info_caddy_t) { - (void)strncpy(cd2->info[n].key, ixfer->info[n].key, PMIX_MAX_KEYLEN); - PMIX_BFROPS_VALUE_LOAD(pmix_client_globals.myserver, - &cd2->info[n].value, - &ixfer->info[n].value.data, - ixfer->info[n].value.type); + PMIX_INFO_XFER(&cd2->info[n], ixfer->info); ++n; } } @@ -333,16 +337,17 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) NULL != pmix_host_server.register_events) { pmix_output_verbose(2, pmix_client_globals.event_output, "pmix: _add_hdlr registering with server"); - if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(cd->codes, cd->ncodes, - cd2->info, cd2->ninfo, - reg_cbfunc, cd2))) { + rc = pmix_host_server.register_events(cd->codes, cd->ncodes, + cd2->info, cd2->ninfo, + reg_cbfunc, cd2); + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { if (NULL != cd2->info) { PMIX_INFO_FREE(cd2->info, cd2->ninfo); } PMIX_RELEASE(cd2); return rc; } - return PMIX_ERR_WOULD_BLOCK; + return PMIX_SUCCESS; } else { if (NULL != cd2->info) { PMIX_INFO_FREE(cd2->info, cd2->ninfo); @@ -355,13 +360,15 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) static void check_cached_events(pmix_rshift_caddy_t *cd) { - size_t i, n; + size_t n; pmix_notify_caddy_t *ncd; bool found, matched; pmix_event_chain_t *chain; + int j; - for (i=0; i < (size_t)pmix_globals.notifications.size; i++) { - if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { + for (j=0; j < pmix_globals.max_events; j++) { + pmix_hotel_knock(&pmix_globals.notifications, j, (void**)&ncd); + if (NULL == ncd) { continue; } found = false; @@ -381,15 +388,11 @@ static void check_cached_events(pmix_rshift_caddy_t *cd) if (!found) { continue; } - /* if we were given specific targets, check if we are one */ + /* if we were given specific targets, check if we are one */ if (NULL != ncd->targets) { matched = false; for (n=0; n < ncd->ntargets; n++) { - if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == ncd->targets[n].rank || - pmix_globals.myid.rank == ncd->targets[n].rank) { + if (PMIX_CHECK_PROCID(&pmix_globals.myid, &ncd->targets[n])) { matched = true; break; } @@ -407,7 +410,7 @@ static void check_cached_events(pmix_rshift_caddy_t *cd) /* create the chain */ chain = PMIX_NEW(pmix_event_chain_t); chain->status = ncd->status; - (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); chain->source.rank = pmix_globals.myid.rank; /* we always leave space for event hdlr name and a callback object */ chain->nallocated = ncd->ninfo + 2; @@ -439,6 +442,12 @@ static void check_cached_events(pmix_rshift_caddy_t *cd) } } } + /* check this event out of the cache since we + * are processing it */ + pmix_hotel_checkout(&pmix_globals.notifications, ncd->room); + /* release the storage */ + PMIX_RELEASE(ncd); + /* we don't want this chain to propagate, so indicate it * should only be run as a single-shot */ chain->endchain = true; @@ -493,8 +502,6 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { name = cd->info[n].value.data.string; - } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_ENVIRO_LEVEL, PMIX_MAX_KEYLEN)) { - cd->enviro = PMIX_INFO_TRUE(&cd->info[n]); } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { cbobject = cd->info[n].value.data.ptr; } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_FIRST_IN_CATEGORY, PMIX_MAX_KEYLEN)) { @@ -519,17 +526,34 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { cd->affected = cd->info[n].value.data.proc; cd->naffected = 1; + ixfer = PMIX_NEW(pmix_info_caddy_t); + ixfer->info = &cd->info[n]; + ixfer->ninfo = 1; + pmix_list_append(&xfer, &ixfer->super); } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_AFFECTED_PROCS, PMIX_MAX_KEYLEN)) { cd->affected = (pmix_proc_t*)cd->info[n].value.data.darray->array; cd->naffected = cd->info[n].value.data.darray->size; + ixfer = PMIX_NEW(pmix_info_caddy_t); + ixfer->info = &cd->info[n]; + ixfer->ninfo = 1; + pmix_list_append(&xfer, &ixfer->super); } else { ixfer = PMIX_NEW(pmix_info_caddy_t); ixfer->info = &cd->info[n]; + ixfer->ninfo = 1; pmix_list_append(&xfer, &ixfer->super); } } } + /* check the codes for system events */ + for (n=0; n < cd->ncodes; n++) { + if (PMIX_SYSTEM_EVENT(cd->codes[n])) { + cd->enviro = true; + break; + } + } + /* if they indicated this is to be the "first" or "last" event, then * first check to ensure they didn't already direct some * other event into the same cherished position */ @@ -814,7 +838,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) cd->evregcbfn(rc, index, cd->cbdata); } - /* check if any matching notifications have been cached */ + /* check if any matching notifications have been locally cached */ check_cached_events(cd); if (NULL != cd->codes) { free(cd->codes); diff --git a/opal/mca/pmix/pmix3x/pmix/src/hwloc/hwloc-internal.h b/opal/mca/pmix/pmix3x/pmix/src/hwloc/hwloc-internal.h index 1e731323b71..7468985a049 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/hwloc/hwloc-internal.h +++ b/opal/mca/pmix/pmix3x/pmix/src/hwloc/hwloc-internal.h @@ -3,7 +3,7 @@ * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,7 +22,7 @@ #include #if PMIX_HAVE_HWLOC -#include +#include PMIX_HWLOC_HEADER #if HWLOC_API_VERSION < 0x00010b00 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE diff --git a/opal/mca/pmix/pmix3x/pmix/src/hwloc/hwloc.c b/opal/mca/pmix/pmix3x/pmix/src/hwloc/hwloc.c index 84050ef7bb2..14dbae90765 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/hwloc/hwloc.c +++ b/opal/mca/pmix/pmix3x/pmix/src/hwloc/hwloc.c @@ -82,7 +82,7 @@ static int set_flags(hwloc_topology_t topo, unsigned int flags) } return PMIX_SUCCESS; } -#endif +#endif // have_hwloc pmix_status_t pmix_hwloc_get_topology(pmix_info_t *info, size_t ninfo) { @@ -493,7 +493,7 @@ pmix_status_t pmix_hwloc_get_topology(pmix_info_t *info, size_t ninfo) return PMIX_SUCCESS; #else // PMIX_HAVE_HWLOC - return PMIX_ERR_NOT_SUPPORTED; + return PMIX_SUCCESS; #endif } diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/Makefile.include b/opal/mca/pmix/pmix3x/pmix/src/include/Makefile.include index 7fcbe03aeb1..4ab3952e1c9 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/Makefile.include +++ b/opal/mca/pmix/pmix3x/pmix/src/include/Makefile.include @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. # Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. @@ -41,7 +41,8 @@ headers += \ include/pmix_config_top.h \ include/pmix_config_bottom.h \ include/pmix_portable_platform.h \ - include/frameworks.h + include/frameworks.h \ + include/pmix_stdatomic.h endif ! PMIX_EMBEDDED_MODE diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_config_bottom.h b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_config_bottom.h index 8e9cf2a1131..bea74d6e290 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_config_bottom.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_config_bottom.h @@ -13,7 +13,7 @@ * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -573,4 +573,5 @@ typedef PMIX_PTRDIFF_TYPE ptrdiff_t; #undef HAVE_CONFIG_H #endif /* PMIX_BUILDING */ + #endif /* PMIX_CONFIG_BOTTOM_H */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c index 8096896b24e..df8a6b6e5b4 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c @@ -1,9 +1,9 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -96,7 +96,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cleanup_dir_t, pmix_list_item_t, cdcon, cddes); -static void nscon(pmix_nspace_t *p) +static void nscon(pmix_namespace_t *p) { p->nspace = NULL; p->nprocs = 0; @@ -113,7 +113,7 @@ static void nscon(pmix_nspace_t *p) PMIX_CONSTRUCT(&p->epilog.ignores, pmix_list_t); PMIX_CONSTRUCT(&p->setup_data, pmix_list_t); } -static void nsdes(pmix_nspace_t *p) +static void nsdes(pmix_namespace_t *p) { if (NULL != p->nspace) { free(p->nspace); @@ -130,7 +130,7 @@ static void nsdes(pmix_nspace_t *p) PMIX_LIST_DESTRUCT(&p->epilog.ignores); PMIX_LIST_DESTRUCT(&p->setup_data); } -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nspace_t, +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_namespace_t, pmix_list_item_t, nscon, nsdes); @@ -238,6 +238,9 @@ static void iofreqdes(pmix_iof_req_t *p) if (NULL != p->peer) { PMIX_RELEASE(p->peer); } + if (NULL != p->pname.nspace) { + free(p->pname.nspace); + } } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_iof_req_t, pmix_list_item_t, @@ -303,6 +306,7 @@ static void cbcon(pmix_cb_t *p) PMIX_CONSTRUCT(&p->kvs, pmix_list_t); p->copy = false; p->timer_running = false; + p->level = PMIX_LEVEL_UNDEF; } static void cbdes(pmix_cb_t *p) { @@ -339,11 +343,16 @@ static void qcon(pmix_query_caddy_t *p) p->relcbfunc = NULL; p->credcbfunc = NULL; p->validcbfunc = NULL; + PMIX_CONSTRUCT(&p->results, pmix_list_t); } static void qdes(pmix_query_caddy_t *p) { PMIX_DESTRUCT_LOCK(&p->lock); PMIX_BYTE_OBJECT_DESTRUCT(&p->bo); + PMIX_PROC_FREE(p->targets, p->ntargets); + PMIX_INFO_FREE(p->info, p->ninfo); + PMIX_LIST_DESTRUCT(&p->results); + PMIX_QUERY_FREE(p->queries, p->nqueries); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t, pmix_object_t, @@ -355,32 +364,38 @@ void pmix_execute_epilog(pmix_epilog_t *epi) pmix_cleanup_dir_t *cd, *cdnext; struct stat statbuf; int rc; + char **tmp; + size_t n; /* start with any specified files */ PMIX_LIST_FOREACH_SAFE(cf, cfnext, &epi->cleanup_files, pmix_cleanup_file_t) { /* check the effective uid/gid of the file and ensure it * matches that of the peer - we do this to provide at least * some minimum level of protection */ - rc = stat(cf->path, &statbuf); - if (0 != rc) { - pmix_output_verbose(10, pmix_globals.debug_output, - "File %s failed to stat: %d", cf->path, rc); - continue; - } - if (statbuf.st_uid != epi->uid || - statbuf.st_gid != epi->gid) { - pmix_output_verbose(10, pmix_globals.debug_output, - "File %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)", - cf->path, - (unsigned long)statbuf.st_uid, (unsigned long)epi->uid, - (unsigned long)statbuf.st_gid, (unsigned long)epi->gid); - continue; - } - rc = unlink(cf->path); - if (0 != rc) { - pmix_output_verbose(10, pmix_globals.debug_output, - "File %s failed to unlink: %d", cf->path, rc); + tmp = pmix_argv_split(cf->path, ','); + for (n=0; NULL != tmp[n]; n++) { + rc = stat(tmp[n], &statbuf); + if (0 != rc) { + pmix_output_verbose(10, pmix_globals.debug_output, + "File %s failed to stat: %d", tmp[n], rc); + continue; + } + if (statbuf.st_uid != epi->uid || + statbuf.st_gid != epi->gid) { + pmix_output_verbose(10, pmix_globals.debug_output, + "File %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)", + cf->path, + (unsigned long)statbuf.st_uid, (unsigned long)epi->uid, + (unsigned long)statbuf.st_gid, (unsigned long)epi->gid); + continue; + } + rc = unlink(tmp[n]); + if (0 != rc) { + pmix_output_verbose(10, pmix_globals.debug_output, + "File %s failed to unlink: %d", tmp[n], rc); + } } + pmix_argv_free(tmp); pmix_list_remove_item(&epi->cleanup_files, &cf->super); PMIX_RELEASE(cf); } @@ -390,27 +405,31 @@ void pmix_execute_epilog(pmix_epilog_t *epi) /* check the effective uid/gid of the file and ensure it * matches that of the peer - we do this to provide at least * some minimum level of protection */ - rc = stat(cd->path, &statbuf); - if (0 != rc) { - pmix_output_verbose(10, pmix_globals.debug_output, - "Directory %s failed to stat: %d", cd->path, rc); - continue; - } - if (statbuf.st_uid != epi->uid || - statbuf.st_gid != epi->gid) { - pmix_output_verbose(10, pmix_globals.debug_output, - "Directory %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)", - cd->path, - (unsigned long)statbuf.st_uid, (unsigned long)epi->uid, - (unsigned long)statbuf.st_gid, (unsigned long)epi->gid); - continue; - } - if ((statbuf.st_mode & S_IRWXU) == S_IRWXU) { - dirpath_destroy(cd->path, cd, epi); - } else { - pmix_output_verbose(10, pmix_globals.debug_output, - "Directory %s lacks permissions", cd->path); + tmp = pmix_argv_split(cd->path, ','); + for (n=0; NULL != tmp[n]; n++) { + rc = stat(tmp[n], &statbuf); + if (0 != rc) { + pmix_output_verbose(10, pmix_globals.debug_output, + "Directory %s failed to stat: %d", tmp[n], rc); + continue; + } + if (statbuf.st_uid != epi->uid || + statbuf.st_gid != epi->gid) { + pmix_output_verbose(10, pmix_globals.debug_output, + "Directory %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)", + cd->path, + (unsigned long)statbuf.st_uid, (unsigned long)epi->uid, + (unsigned long)statbuf.st_gid, (unsigned long)epi->gid); + continue; + } + if ((statbuf.st_mode & S_IRWXU) == S_IRWXU) { + dirpath_destroy(tmp[n], cd, epi); + } else { + pmix_output_verbose(10, pmix_globals.debug_output, + "Directory %s lacks permissions", tmp[n]); + } } + pmix_argv_free(tmp); pmix_list_remove_item(&epi->cleanup_dirs, &cd->super); PMIX_RELEASE(cd); } @@ -551,3 +570,30 @@ static bool dirpath_is_empty(const char *path ) return true; } + +int pmix_event_assign(struct event *ev, pmix_event_base_t *evbase, + int fd, short arg, event_callback_fn cbfn, void *cbd) +{ +#if PMIX_HAVE_LIBEV + event_set(ev, fd, arg, cbfn, cbd); + event_base_set(evbase, ev); +#else + event_assign(ev, evbase, fd, arg, cbfn, cbd); +#endif + return 0; +} + +pmix_event_t* pmix_event_new(pmix_event_base_t *b, int fd, + short fg, event_callback_fn cbfn, void *cbd) +{ + pmix_event_t *ev = NULL; + +#if PMIX_HAVE_LIBEV + ev = (pmix_event_t*)calloc(1, sizeof(pmix_event_t)); + ev->ev_base = b; +#else + ev = event_new(b, fd, fg, (event_callback_fn) cbfn, cbd); +#endif + + return ev; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h index 9f565214a76..113cd48faab 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,7 +36,7 @@ #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" -#include "src/class/pmix_ring_buffer.h" +#include "src/class/pmix_hotel.h" #include "src/event/pmix_event.h" #include "src/threads/threads.h" @@ -110,6 +110,9 @@ typedef uint8_t pmix_cmd_t; /* provide a "pretty-print" function for cmds */ const char* pmix_command_string(pmix_cmd_t cmd); +/* provide a hook to init tool data */ +PMIX_EXPORT extern pmix_status_t pmix_tool_init_info(void); + /* define a set of flags to direct collection * of data during operations */ typedef enum { @@ -119,6 +122,16 @@ typedef enum { PMIX_COLLECT_MAX } pmix_collect_t; +/* define a set of flags indicating the level + * of information being stored/requested */ +typedef enum { + PMIX_LEVEL_UNDEF, + PMIX_LEVEL_SESSION, + PMIX_LEVEL_JOB, + PMIX_LEVEL_APP, + PMIX_LEVEL_NODE +} pmix_level_t; + /**** PEER STRUCTURES ****/ /* clients can only talk to their server, and servers are @@ -178,14 +191,14 @@ typedef struct { // from this nspace pmix_list_t setup_data; // list of pmix_kval_t containing info structs having blobs // for setting up the local node for this nspace/application -} pmix_nspace_t; -PMIX_CLASS_DECLARATION(pmix_nspace_t); +} pmix_namespace_t; +PMIX_CLASS_DECLARATION(pmix_namespace_t); -/* define a caddy for quickly creating a list of pmix_nspace_t +/* define a caddy for quickly creating a list of pmix_namespace_t * objects for local, dedicated purposes */ typedef struct { pmix_list_item_t super; - pmix_nspace_t *ns; + pmix_namespace_t *ns; } pmix_nspace_caddy_t; PMIX_CLASS_DECLARATION(pmix_nspace_caddy_t); @@ -219,7 +232,7 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); * by the socket, not the process nspace/rank */ typedef struct pmix_peer_t { pmix_object_t super; - pmix_nspace_t *nptr; // point to the nspace object for this process + pmix_namespace_t *nptr; // point to the nspace object for this process pmix_rank_info_t *info; pmix_proc_type_t proc_type; pmix_listener_protocol_t protocol; @@ -265,6 +278,7 @@ typedef struct { pmix_info_t *info; size_t ninfo; pmix_byte_object_t bo; + pmix_list_t results; pmix_info_cbfunc_t cbfunc; pmix_value_cbfunc_t valcbfunc; pmix_release_cbfunc_t relcbfunc; @@ -278,6 +292,11 @@ PMIX_CLASS_DECLARATION(pmix_query_caddy_t); * - instanced in pmix_server_ops.c */ typedef struct { pmix_list_item_t super; + pmix_event_t ev; + bool event_active; + bool host_called; // tracker has been passed up to host + bool local; // operation is strictly local + char *id; // string identifier for the collective pmix_cmd_t type; pmix_proc_t pname; bool hybrid; // true if participating procs are from more than one nspace @@ -295,6 +314,7 @@ typedef struct { pmix_collect_t collect_type; // whether or not data is to be returned at completion pmix_modex_cbfunc_t modexcbfunc; pmix_op_cbfunc_t op_cbfunc; + void *cbdata; } pmix_server_trkr_t; PMIX_CLASS_DECLARATION(pmix_server_trkr_t); @@ -340,6 +360,7 @@ PMIX_CLASS_DECLARATION(pmix_server_caddy_t); pmix_release_cbfunc_t relfn; pmix_hdlr_reg_cbfunc_t hdlrregcbfn; pmix_op_cbfunc_t opcbfn; + pmix_modex_cbfunc_t modexcbfunc; } cbfunc; void *cbdata; size_t ref; @@ -378,6 +399,7 @@ typedef struct { pmix_list_t kvs; bool copy; bool timer_running; + pmix_level_t level; } pmix_cb_t; PMIX_CLASS_DECLARATION(pmix_cb_t); @@ -394,6 +416,11 @@ typedef struct { pmix_object_t super; pmix_event_t ev; pmix_lock_t lock; + /* timestamp receipt of the notification so we + * can evict the oldest one if we get overwhelmed */ + time_t ts; + /* what room of the hotel they are in */ + int room; pmix_status_t status; pmix_proc_t source; pmix_data_range_t range; @@ -403,6 +430,7 @@ typedef struct { */ pmix_proc_t *targets; size_t ntargets; + size_t nleft; // number of targets left to be notified /* When generating a notification, the originator can * specify the range of procs affected by this event. * For example, when creating a JOB_TERMINATED event, @@ -438,6 +466,8 @@ typedef struct { pmix_peer_t *mypeer; // my own peer object uid_t uid; // my effective uid gid_t gid; // my effective gid + char *hostname; // my hostname + uint32_t nodeid; // my nodeid, if given int pindex; pmix_event_base_t *evbase; bool external_evbase; @@ -448,7 +478,9 @@ typedef struct { struct timeval event_window; pmix_list_t cached_events; // events waiting in the window prior to processing pmix_list_t iof_requests; // list of pmix_iof_req_t IOF requests - pmix_ring_buffer_t notifications; // ring buffer of pending notifications + int max_events; // size of the notifications hotel + int event_eviction_time; // max time to cache notifications + pmix_hotel_t notifications; // hotel of pending notifications /* processes also need a place where they can store * their own internal data - e.g., data provided by * the user via the store_internal interface, as well @@ -461,6 +493,7 @@ typedef struct { bool xml_output; bool timestamp_output; size_t output_limit; + pmix_list_t nspaces; } pmix_globals_t; /* provide access to a function to cleanup epilogs */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_stdatomic.h b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_stdatomic.h new file mode 100644 index 00000000000..eb9562a6e6d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_stdatomic.h @@ -0,0 +1,67 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(PMIX_STDATOMIC_H) +#define PMIX_STDATOMIC_H + +#include "pmix_stdint.h" + +#if PMIX_ASSEMBLY_BUILTIN != PMIX_BUILTIN_C11 + +typedef volatile int pmix_atomic_int_t; +typedef volatile long pmix_atomic_long_t; + +typedef volatile int32_t pmix_atomic_int32_t; +typedef volatile uint32_t pmix_atomic_uint32_t; +typedef volatile int64_t pmix_atomic_int64_t; +typedef volatile uint64_t pmix_atomic_uint64_t; + +typedef volatile size_t pmix_atomic_size_t; +typedef volatile ssize_t pmix_atomic_ssize_t; +typedef volatile intptr_t pmix_atomic_intptr_t; +typedef volatile uintptr_t pmix_atomic_uintptr_t; + +#else /* PMIX_HAVE_C__ATOMIC */ + +#include + +typedef atomic_int pmix_atomic_int_t; +typedef atomic_long pmix_atomic_long_t; + +typedef _Atomic int32_t pmix_atomic_int32_t; +typedef _Atomic uint32_t pmix_atomic_uint32_t; +typedef _Atomic int64_t pmix_atomic_int64_t; +typedef _Atomic uint64_t pmix_atomic_uint64_t; + +typedef _Atomic size_t pmix_atomic_size_t; +typedef _Atomic ssize_t pmix_atomic_ssize_t; +typedef _Atomic intptr_t pmix_atomic_intptr_t; +typedef _Atomic uintptr_t pmix_atomic_uintptr_t; + +#endif /* PMIX_HAVE_C__ATOMIC */ + +#if HAVE_PMIX_INT128_T + +/* do not use C11 atomics for __int128 if they are not lock free */ +#if PMIX_HAVE_C11_CSWAP_INT128 + +typedef _Atomic pmix_int128_t pmix_atomic_int128_t; + +#else + +typedef volatile pmix_int128_t pmix_atomic_int128_t; + +#endif + +#endif + +#endif /* !defined(PMIX_STDATOMIC_H) */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_stdint.h b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_stdint.h index 28c3099ef37..86d1cc7afe1 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_stdint.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_stdint.h @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,8 @@ #ifndef PMIX_STDINT_H #define PMIX_STDINT_H 1 +#include "pmix_config.h" + /* * Include what we can and define what is missing. */ @@ -125,3 +127,4 @@ typedef unsigned long long uintptr_t; #endif #endif /* PMIX_STDINT_H */ + diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/types.h b/opal/mca/pmix/pmix3x/pmix/src/include/types.h index cf8d082c34d..6b52843ee48 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/types.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/types.h @@ -9,9 +9,11 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,11 +45,17 @@ #include #endif #include PMIX_EVENT_HEADER +#if ! PMIX_HAVE_LIBEV +#include PMIX_EVENT2_THREAD_HEADER +#endif #if PMIX_ENABLE_DEBUG #include "src/util/output.h" #endif +#include +#include + /* * portable assignment of pointer to int @@ -137,6 +145,18 @@ static inline uint64_t pmix_ntoh64(uint64_t val) #endif } +/* Convert size_t value from host to network byte order and back */ +#if SIZEOF_SIZE_T == 4 + +#define pmix_htonsizet(x) htonl(x) +#define pmix_ntohsizet(x) ntohl(x) + +#elif SIZEOF_SIZE_T == 8 + +#define pmix_htonsizet(x) pmix_hton64(x) +#define pmix_ntohsizet(x) pmix_ntoh64(x) + +#endif /** * Convert between a local representation of pointer and a 64 bits value. @@ -225,6 +245,8 @@ static inline uint64_t pmix_swap_bytes8(uint64_t val) #define PMIX_EVLOOP_ONCE EVLOOP_ONCE /**< Block at most once. */ #define PMIX_EVLOOP_NONBLOCK EVLOOP_NONBLOCK /**< Do not block. */ +#define PMIX_EVENT_SIGNAL(ev) pmix_event_get_signal(ev) + typedef struct event_base pmix_event_base_t; typedef struct event pmix_event_t; @@ -232,42 +254,52 @@ typedef struct event pmix_event_t; #define pmix_event_base_free(b) event_base_free(b) -#define pmix_event_free(x) event_free(x) -#define pmix_event_base_loopbreak(b) event_base_loopbreak(b) - -#define pmix_event_base_loopexit(b) event_base_loopexit(b, NULL) +#if PMIX_HAVE_LIBEV +#define pmix_event_use_threads() +#define pmix_event_free(b) free(b) +#define pmix_event_get_signal(x) (x)->ev_fd +#else /* thread support APIs */ #define pmix_event_use_threads() evthread_use_pthreads() +#define pmix_event_free(x) event_free(x) +#define pmix_event_get_signal(x) event_get_signal(x) +#endif /* Basic event APIs */ #define pmix_event_enable_debug_mode() event_enable_debug_mode() -#define pmix_event_assign(x, b, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) +PMIX_EXPORT int pmix_event_assign(struct event *ev, pmix_event_base_t *evbase, + int fd, short arg, event_callback_fn cbfn, void *cbd); -#define pmix_event_set(b, x, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) +#define pmix_event_set(b, x, fd, fg, cb, arg) pmix_event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) +#if PMIX_HAVE_LIBEV +PMIX_EXPORT int pmix_event_add(struct event *ev, struct timeval *tv); +PMIX_EXPORT int pmix_event_del(struct event *ev); +PMIX_EXPORT void pmix_event_active (struct event *ev, int res, short ncalls); +PMIX_EXPORT void pmix_event_base_loopexit (pmix_event_base_t *b); +#else #define pmix_event_add(ev, tv) event_add((ev), (tv)) - #define pmix_event_del(ev) event_del((ev)) - #define pmix_event_active(x, y, z) event_active((x), (y), (z)) +#define pmix_event_base_loopexit(b) event_base_loopexit(b, NULL) +#endif -#define pmix_event_new(b, fd, fg, cb, arg) event_new((b), (fd), (fg), (event_callback_fn) (cb), (arg)) +PMIX_EXPORT pmix_event_t* pmix_event_new(pmix_event_base_t *b, int fd, + short fg, event_callback_fn cbfn, void *cbd); #define pmix_event_loop(b, fg) event_base_loop((b), (fg)) -#define pmix_event_active(x, y, z) event_active((x), (y), (z)) - #define pmix_event_evtimer_new(b, cb, arg) pmix_event_new((b), -1, 0, (cb), (arg)) #define pmix_event_evtimer_add(x, tv) pmix_event_add((x), (tv)) -#define pmix_event_evtimer_set(b, x, cb, arg) event_assign((x), (b), -1, 0, (event_callback_fn) (cb), (arg)) +#define pmix_event_evtimer_set(b, x, cb, arg) pmix_event_assign((x), (b), -1, 0, (event_callback_fn) (cb), (arg)) #define pmix_event_evtimer_del(x) pmix_event_del((x)) -#define pmix_event_signal_set(b, x, fd, cb, arg) event_assign((x), (b), (fd), EV_SIGNAL|EV_PERSIST, (event_callback_fn) (cb), (arg)) +#define pmix_event_signal_set(b, x, fd, cb, arg) pmix_event_assign((x), (b), (fd), EV_SIGNAL|EV_PERSIST, (event_callback_fn) (cb), (arg)) #endif /* PMIX_TYPES_H */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/base/help-pmix-mca-base.txt b/opal/mca/pmix/pmix3x/pmix/src/mca/base/help-pmix-mca-base.txt index 7a96e7ace8f..3c8a67f1990 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/base/help-pmix-mca-base.txt +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/base/help-pmix-mca-base.txt @@ -10,8 +10,8 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2018 Intel, Inc. All rights reserved. +# Copyright (c) 2008-2019 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2018-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -60,3 +60,12 @@ all components *except* a and b", while "c,d" specifies the inclusive behavior and means "use *only* components c and d." You cannot mix inclusive and exclusive behavior. +# +[failed to add component dir] +The pmix_mca_base_component_path MCA variable was used to add paths to +search for PMIX components. At least one directory failed to add +properly: + + %s + +Check to make sure that this directory exists, is readable, etc. diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_find.c b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_find.c index fed38f988fd..7d96e21c36a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_find.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_find.c @@ -16,7 +16,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -339,8 +339,8 @@ static int component_find_check (pmix_mca_base_framework_t *framework, char **re } if (!found) { - char h[MAXHOSTNAMELEN]; - gethostname(h, sizeof(h)); + char h[PMIX_MAXHOSTNAMELEN] = {0}; + gethostname(h, sizeof(h)-1); pmix_show_help("help-pmix-mca-base.txt", "find-available:not-valid", true, h, framework->framework_name, requested_component_names[i]); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_repository.c b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_repository.c index d34e32b7b09..062b1cb75d6 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_repository.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_repository.c @@ -10,12 +10,12 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2019 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,6 +43,7 @@ #include "pmix_common.h" #include "src/class/pmix_hash_table.h" #include "src/util/basename.h" +#include "src/util/show_help.h" #if PMIX_HAVE_PDL_SUPPORT @@ -164,12 +165,12 @@ static int process_repository_item (const char *filename, void *data) return PMIX_ERR_OUT_OF_RESOURCE; } - /* strncpy does not guarantee a \0 */ + /* pmix_strncpy does not guarantee a \0 */ ri->ri_type[PMIX_MCA_BASE_MAX_TYPE_NAME_LEN] = '\0'; - strncpy (ri->ri_type, type, PMIX_MCA_BASE_MAX_TYPE_NAME_LEN); + pmix_strncpy (ri->ri_type, type, PMIX_MCA_BASE_MAX_TYPE_NAME_LEN); ri->ri_name[PMIX_MCA_BASE_MAX_TYPE_NAME_LEN] = '\0'; - strncpy (ri->ri_name, name, PMIX_MCA_BASE_MAX_COMPONENT_NAME_LEN); + pmix_strncpy (ri->ri_name, name, PMIX_MCA_BASE_MAX_COMPONENT_NAME_LEN); pmix_list_append (component_list, &ri->super); @@ -220,8 +221,13 @@ int pmix_mca_base_component_repository_add (const char *path) dir = pmix_mca_base_system_default_path; } - if (0 != pmix_pdl_foreachfile(dir, process_repository_item, NULL)) { - break; + if (0 != pmix_pdl_foreachfile(dir, process_repository_item, NULL) && + !(0 == strcmp(dir, pmix_mca_base_system_default_path) || 0 == strcmp(dir, pmix_mca_base_user_default_path))) { + // It is not an error if a directory fails to add (e.g., + // if it doesn't exist). But we should warn about it as + // it is something related to "show_load_errors" + pmix_show_help("help-pmix-mca-base.txt", + "failed to add component dir", true, dir); } } while (NULL != (dir = strtok_r (NULL, sep, &ctx))); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_open.c b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_open.c index f152f2c2a95..fbb55dcb355 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_open.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_open.c @@ -13,7 +13,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,7 +68,7 @@ int pmix_mca_base_open(void) { char *value; pmix_output_stream_t lds; - char hostname[64]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; int var_id; int rc; @@ -155,7 +155,7 @@ int pmix_mca_base_open(void) } else { set_defaults(&lds); } - gethostname(hostname, 64); + gethostname(hostname, PMIX_MAXHOSTNAMELEN-1); rc = asprintf(&lds.lds_prefix, "[%s:%05d] ", hostname, getpid()); if (0 > rc) { return PMIX_ERR_OUT_OF_RESOURCE; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/base.h b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/base.h index 318f076fdca..41ee2de2663 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/base.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/base.h @@ -205,6 +205,12 @@ PMIX_EXPORT extern pmix_bfrops_globals_t pmix_bfrops_globals; free(tmpbuf); \ } while (0) +/* for backwards compatibility */ +typedef struct pmix_info_array { + size_t size; + pmix_info_t *array; +} pmix_info_array_t; + /** * Internal struct used for holding registered bfrop functions @@ -268,7 +274,7 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_stub_copy_payload(struct pmix_peer_t *peer pmix_buffer_t *src); PMIX_EXPORT pmix_status_t pmix_bfrops_stub_value_xfer(struct pmix_peer_t *peer, pmix_value_t *dest, - pmix_value_t *src); + const pmix_value_t *src); PMIX_EXPORT void pmix_bfrops_stub_value_load(struct pmix_peer_t *peer, pmix_value_t *v, void *data, pmix_data_type_t type); @@ -496,9 +502,6 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_string(char **dest, char *src, PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_value(pmix_value_t **dest, pmix_value_t *src, pmix_data_type_t type); -PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_array(pmix_info_array_t **dest, - pmix_info_array_t *src, - pmix_data_type_t type); PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_proc(pmix_proc_t **dest, pmix_proc_t *src, pmix_data_type_t type); @@ -514,9 +517,6 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_buf(pmix_buffer_t **dest, PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_kval(pmix_kval_t **dest, pmix_kval_t *src, pmix_data_type_t type); -PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_modex(pmix_modex_data_t **dest, - pmix_modex_data_t *src, - pmix_data_type_t type); PMIX_EXPORT pmix_status_t pmix_bfrop_base_copy_persist(pmix_persistence_t **dest, pmix_persistence_t *src, pmix_data_type_t type); @@ -538,10 +538,6 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_query(pmix_query_t **dest, PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_envar(pmix_envar_t **dest, pmix_envar_t *src, pmix_data_type_t type); -/**** DEPRECATED ****/ -PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_array(pmix_info_array_t **dest, - pmix_info_array_t *src, - pmix_data_type_t type); /* * "Standard" print functions @@ -596,8 +592,6 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_status(char **output, char *pre PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_value(char **output, char *prefix, pmix_value_t *src, pmix_data_type_t type); -PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_array(char **output, char *prefix, - pmix_info_array_t *src, pmix_data_type_t type); PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_proc(char **output, char *prefix, pmix_proc_t *src, pmix_data_type_t type); PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_app(char **output, char *prefix, @@ -608,8 +602,6 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_buf(char **output, char *prefix pmix_buffer_t *src, pmix_data_type_t type); PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_kval(char **output, char *prefix, pmix_kval_t *src, pmix_data_type_t type); -PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_modex(char **output, char *prefix, - pmix_modex_data_t *src, pmix_data_type_t type); PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_persist(char **output, char *prefix, pmix_persistence_t *src, pmix_data_type_t type); PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_bo(char **output, char *prefix, @@ -678,7 +670,7 @@ PMIX_EXPORT pmix_status_t pmix_bfrops_base_value_unload(pmix_value_t *kv, size_t *sz); PMIX_EXPORT pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p, - pmix_value_t *src); + const pmix_value_t *src); PMIX_EXPORT pmix_value_cmp_t pmix_bfrops_base_value_cmp(pmix_value_t *p, pmix_value_t *p1); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c index 481eb69e76a..d5bf41e94fd 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_copy.c @@ -236,7 +236,7 @@ pmix_status_t pmix_bfrops_base_copy_info(pmix_info_t **dest, pmix_data_type_t type) { *dest = (pmix_info_t*)malloc(sizeof(pmix_info_t)); - (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); + pmix_strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); (*dest)->flags = src->flags; return pmix_bfrops_base_value_xfer(&(*dest)->value, &src->value); } @@ -267,7 +267,7 @@ pmix_status_t pmix_bfrops_base_copy_app(pmix_app_t **dest, (*dest)->ninfo = src->ninfo; (*dest)->info = (pmix_info_t*)malloc(src->ninfo * sizeof(pmix_info_t)); for (j=0; j < src->ninfo; j++) { - (void)strncpy((*dest)->info[j].key, src->info[j].key, PMIX_MAX_KEYLEN); + pmix_strncpy((*dest)->info[j].key, src->info[j].key, PMIX_MAX_KEYLEN); pmix_value_xfer(&(*dest)->info[j].value, &src->info[j].value); } return PMIX_SUCCESS; @@ -300,32 +300,11 @@ pmix_status_t pmix_bfrops_base_copy_proc(pmix_proc_t **dest, if (NULL == *dest) { return PMIX_ERR_OUT_OF_RESOURCE; } - (void)strncpy((*dest)->nspace, src->nspace, PMIX_MAX_NSLEN); + pmix_strncpy((*dest)->nspace, src->nspace, PMIX_MAX_NSLEN); (*dest)->rank = src->rank; return PMIX_SUCCESS; } -pmix_status_t pmix_bfrops_base_copy_modex(pmix_modex_data_t **dest, - pmix_modex_data_t *src, - pmix_data_type_t type) -{ - *dest = (pmix_modex_data_t*)malloc(sizeof(pmix_modex_data_t)); - if (NULL == *dest) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - (*dest)->blob = NULL; - (*dest)->size = 0; - if (NULL != src->blob) { - (*dest)->blob = (uint8_t*)malloc(src->size * sizeof(uint8_t)); - if (NULL == (*dest)->blob) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - memcpy((*dest)->blob, src->blob, src->size * sizeof(uint8_t)); - (*dest)->size = src->size; - } - return PMIX_SUCCESS; -} - pmix_status_t pmix_bfrop_base_copy_persist(pmix_persistence_t **dest, pmix_persistence_t *src, pmix_data_type_t type) @@ -357,9 +336,9 @@ pmix_status_t pmix_bfrops_base_copy_pdata(pmix_pdata_t **dest, pmix_data_type_t type) { *dest = (pmix_pdata_t*)malloc(sizeof(pmix_pdata_t)); - (void)strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); + pmix_strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); (*dest)->proc.rank = src->proc.rank; - (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); + pmix_strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); return pmix_bfrops_base_value_xfer(&(*dest)->value, &src->value); } @@ -405,7 +384,6 @@ pmix_status_t pmix_bfrops_base_copy_darray(pmix_data_array_t **dest, pmix_buffer_t *pb, *sb; pmix_byte_object_t *pbo, *sbo; pmix_kval_t *pk, *sk; - pmix_modex_data_t *pm, *sm; pmix_proc_info_t *pi, *si; pmix_query_t *pq, *sq; pmix_envar_t *pe, *se; @@ -699,31 +677,6 @@ pmix_status_t pmix_bfrops_base_copy_darray(pmix_data_array_t **dest, } } break; - case PMIX_MODEX: - PMIX_MODEX_CREATE(p->array, src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pm = (pmix_modex_data_t*)p->array; - sm = (pmix_modex_data_t*)src->array; - for (n=0; n < src->size; n++) { - memcpy(&pm[n], &sm[n], sizeof(pmix_modex_data_t)); - if (NULL != sm[n].blob && 0 < sm[n].size) { - pm[n].blob = (uint8_t*)malloc(sm[n].size); - if (NULL == pm[n].blob) { - PMIX_MODEX_FREE(pm, src->size); - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(pm[n].blob, sm[n].blob, sm[n].size); - pm[n].size = sm[n].size; - } else { - pm[n].blob = NULL; - pm[n].size = 0; - } - } - break; case PMIX_PERSIST: p->array = (pmix_persistence_t*)malloc(src->size * sizeof(pmix_persistence_t)); if (NULL == p->array) { @@ -876,32 +829,6 @@ pmix_status_t pmix_bfrops_base_copy_query(pmix_query_t **dest, return PMIX_SUCCESS; } -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrops_base_copy_array(pmix_info_array_t **dest, - pmix_info_array_t *src, - pmix_data_type_t type) -{ - pmix_info_t *d1, *s1; - - *dest = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); - if (NULL == (*dest)) { - return PMIX_ERR_NOMEM; - } - (*dest)->size = src->size; - if (0 < src->size) { - (*dest)->array = (pmix_info_t*)malloc(src->size * sizeof(pmix_info_t)); - if (NULL == (*dest)->array) { - free(*dest); - return PMIX_ERR_NOMEM; - } - d1 = (pmix_info_t*)(*dest)->array; - s1 = (pmix_info_t*)src->array; - memcpy(d1, s1, src->size * sizeof(pmix_info_t)); - } - return PMIX_SUCCESS; -} -/*******************/ - pmix_status_t pmix_bfrops_base_copy_envar(pmix_envar_t **dest, pmix_envar_t *src, pmix_data_type_t type) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_fns.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_fns.c index bcd083baf57..e93f14889a0 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_fns.c @@ -46,7 +46,7 @@ PMIX_EXPORT pmix_status_t pmix_value_unload(pmix_value_t *kv, } PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *dest, - pmix_value_t *src) + const pmix_value_t *src) { return pmix_bfrops_base_value_xfer(dest, src); } @@ -509,11 +509,8 @@ pmix_value_cmp_t pmix_bfrops_base_value_cmp(pmix_value_t *p, /* Xfer FUNCTIONS FOR GENERIC PMIX TYPES */ pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p, - pmix_value_t *src) + const pmix_value_t *src) { - size_t n; - pmix_info_t *p1, *s1; - /* copy the right field */ p->type = src->type; switch (src->type) { @@ -643,22 +640,6 @@ pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p, p->data.envar.separator = src->data.envar.separator; break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - p->data.array->size = src->data.array->size; - if (0 < src->data.array->size) { - p->data.array->array = (pmix_info_t*)malloc(src->data.array->size * sizeof(pmix_info_t)); - if (NULL == p->data.array->array) { - return PMIX_ERR_NOMEM; - } - p1 = (pmix_info_t*)p->data.array->array; - s1 = (pmix_info_t*)src->data.array->array; - for (n=0; n < src->data.darray->size; n++) { - PMIX_INFO_XFER(&p1[n], &s1[n]); - } - } - break; - /********************/ default: pmix_output(0, "PMIX-XFER-VALUE: UNSUPPORTED TYPE %d", (int)src->type); return PMIX_ERROR; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_pack.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_pack.c index 2e66dfe57b2..4045d874ecf 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_pack.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_pack.c @@ -703,28 +703,6 @@ pmix_status_t pmix_bfrops_base_pack_kval(pmix_buffer_t *buffer, const void *src, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrops_base_pack_modex(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_modex_data_t *ptr; - int32_t i; - int ret; - - ptr = (pmix_modex_data_t *) src; - - for (i = 0; i < num_vals; ++i) { - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { - return ret; - } - if( 0 < ptr[i].size){ - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, ptr[i].blob, ptr[i].size, PMIX_UINT8))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - pmix_status_t pmix_bfrops_base_pack_persist(pmix_buffer_t *buffer, const void *src, int32_t num_vals, pmix_data_type_t type) { @@ -1007,13 +985,6 @@ pmix_status_t pmix_bfrops_base_pack_darray(pmix_buffer_t *buffer, const void *sr } break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_array(buffer, p[i].array, p[i].size, PMIX_INFO_ARRAY))) { - return ret; - } - break; - /********************/ default: pmix_output(0, "PACK-PMIX-VALUE[%s:%d]: UNSUPPORTED TYPE %d", __FILE__, __LINE__, (int)p[i].type); @@ -1236,17 +1207,10 @@ pmix_status_t pmix_bfrops_base_pack_val(pmix_buffer_t *buffer, } break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_array(buffer, p->data.array, 1, PMIX_INFO_ARRAY))) { - return ret; - } - break; - /********************/ default: - pmix_output(0, "PACK-PMIX-VALUE[%s:%d]: UNSUPPORTED TYPE %d", - __FILE__, __LINE__, (int)p->type); - return PMIX_ERROR; + pmix_output(0, "PACK-PMIX-VALUE[%s:%d]: UNSUPPORTED TYPE %d", + __FILE__, __LINE__, (int)p->type); + return PMIX_ERROR; } return PMIX_SUCCESS; } @@ -1257,33 +1221,6 @@ pmix_status_t pmix_bfrops_base_pack_alloc_directive(pmix_buffer_t *buffer, const return pmix_bfrops_base_pack_byte(buffer, src, num_vals, PMIX_UINT8); } - -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrops_base_pack_array(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_info_array_t *ptr; - int32_t i; - pmix_status_t ret; - - ptr = (pmix_info_array_t *) src; - - for (i = 0; i < num_vals; ++i) { - /* pack the size */ - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { - return ret; - } - if (0 < ptr[i].size) { - /* pack the values */ - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_info(buffer, ptr[i].array, ptr[i].size, PMIX_INFO))) { - return ret; - } - } - } - - return PMIX_SUCCESS; -} - pmix_status_t pmix_bfrops_base_pack_iof_channel(pmix_buffer_t *buffer, const void *src, int32_t num_vals, pmix_data_type_t type) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_print.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_print.c index c02fce285b1..d17a731257b 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_print.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_print.c @@ -1021,12 +1021,6 @@ int pmix_bfrops_base_print_status(char **output, char *prefix, src->data.envar.separator); break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - rc = asprintf(output, "%sPMIX_VALUE: Data type: INFO_ARRAY\tARRAY SIZE: %ld", - prefx, (long)src->data.array->size); - break; - /********************/ default: rc = asprintf(output, "%sPMIX_VALUE: Data type: UNKNOWN\tValue: UNPRINTABLE", prefx); break; @@ -1143,12 +1137,6 @@ int pmix_bfrops_base_print_kval(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrops_base_print_modex(char **output, char *prefix, - pmix_modex_data_t *src, pmix_data_type_t type) -{ - return PMIX_SUCCESS; -} - int pmix_bfrops_base_print_persist(char **output, char *prefix, pmix_persistence_t *src, pmix_data_type_t type) { @@ -1702,37 +1690,3 @@ pmix_status_t pmix_bfrops_base_print_envar(char **output, char *prefix, return PMIX_SUCCESS; } } - - -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrops_base_print_array(char **output, char *prefix, - pmix_info_array_t *src, pmix_data_type_t type) -{ - size_t j; - char *tmp, *tmp2, *tmp3, *pfx; - pmix_info_t *s1; - - if (0 > asprintf(&tmp, "%sARRAY SIZE: %ld", prefix, (long)src->size)) { - return PMIX_ERR_NOMEM; - } - if (0 > asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix)) { - free(tmp); - return PMIX_ERR_NOMEM; - } - s1 = (pmix_info_t*)src->array; - - for (j=0; j < src->size; j++) { - pmix_bfrops_base_print_info(&tmp2, pfx, &s1[j], PMIX_INFO); - if (0 > asprintf(&tmp3, "%s%s", tmp, tmp2)) { - free(tmp); - free(tmp2); - return PMIX_ERR_NOMEM; - } - free(tmp); - free(tmp2); - tmp = tmp3; - } - *output = tmp; - return PMIX_SUCCESS; -} -/********************/ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c index afd685b4374..051c35d82a6 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c @@ -764,21 +764,9 @@ pmix_status_t pmix_bfrops_base_unpack_val(pmix_buffer_t *buffer, return ret; } break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - /* this field is now a pointer, so we must allocate storage for it */ - val->data.array = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); - if (NULL == val->data.array) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_array(buffer, val->data.array, &m, PMIX_INFO_ARRAY))) { - return ret; - } - break; - /********************/ default: - pmix_output(0, "UNPACK-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)val->type); - return PMIX_ERROR; + pmix_output(0, "UNPACK-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)val->type); + return PMIX_ERROR; } return PMIX_SUCCESS; @@ -836,7 +824,7 @@ pmix_status_t pmix_bfrops_base_unpack_info(pmix_buffer_t *buffer, void *dest, if (NULL == tmp) { return PMIX_ERROR; } - (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); + pmix_strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); free(tmp); /* unpack the directives */ m=1; @@ -890,7 +878,7 @@ pmix_status_t pmix_bfrops_base_unpack_pdata(pmix_buffer_t *buffer, void *dest, PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; } - (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); + pmix_strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); free(tmp); /* unpack value - since the value structure is statically-defined * instead of a pointer in this struct, we directly unpack it to @@ -982,7 +970,7 @@ pmix_status_t pmix_bfrops_base_unpack_proc(pmix_buffer_t *buffer, void *dest, PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; } - (void)strncpy(ptr[i].nspace, tmp, PMIX_MAX_NSLEN); + pmix_strncpy(ptr[i].nspace, tmp, PMIX_MAX_NSLEN); free(tmp); /* unpack the rank */ m=1; @@ -1111,38 +1099,6 @@ pmix_status_t pmix_bfrops_base_unpack_kval(pmix_buffer_t *buffer, void *dest, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrops_base_unpack_modex(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_modex_data_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - - pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, - "pmix_bfrop_unpack: %d modex", *num_vals); - - ptr = (pmix_modex_data_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - memset(&ptr[i], 0, sizeof(pmix_modex_data_t)); - /* unpack the number of bytes */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { - return ret; - } - if (0 < ptr[i].size) { - ptr[i].blob = (uint8_t*)malloc(ptr[i].size * sizeof(uint8_t)); - m=ptr[i].size; - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, ptr[i].blob, &m, PMIX_UINT8))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - - pmix_status_t pmix_bfrops_base_unpack_persist(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type) { @@ -1539,17 +1495,6 @@ pmix_status_t pmix_bfrops_base_unpack_darray(pmix_buffer_t *buffer, void *dest, return ret; } break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - ptr[i].array = (pmix_info_array_t*)malloc(m * sizeof(pmix_info_array_t)); - if (NULL == ptr[i].array) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_array(buffer, ptr[i].array, &m, ptr[i].type))) { - return ret; - } - break; - /********************/ default: return PMIX_ERR_NOT_SUPPORTED; } @@ -1657,37 +1602,3 @@ pmix_status_t pmix_bfrops_base_unpack_envar(pmix_buffer_t *buffer, void *dest, } return PMIX_SUCCESS; } - -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrops_base_unpack_array(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_info_array_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - - pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, - "pmix_bfrop_unpack: %d info arrays", *num_vals); - - ptr = (pmix_info_array_t*) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, - "pmix_bfrop_unpack: init array[%d]", i); - memset(&ptr[i], 0, sizeof(pmix_info_array_t)); - /* unpack the size of this array */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { - return ret; - } - if (0 < ptr[i].size) { - ptr[i].array = (pmix_info_t*)malloc(ptr[i].size * sizeof(pmix_info_t)); - m=ptr[i].size; - if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_value(buffer, ptr[i].array, &m, PMIX_INFO))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/bfrops.h b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/bfrops.h index bfb4013b875..258b727afe8 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/bfrops.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/bfrops.h @@ -324,7 +324,7 @@ typedef pmix_status_t (*pmix_bfrop_print_fn_t)(char **output, char *prefix, * @retval PMIX_ERROR(s) An appropriate error code */ typedef pmix_status_t (*pmix_bfrop_value_xfer_fn_t)(pmix_value_t *dest, - pmix_value_t *src); + const pmix_value_t *src); /** diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/Makefile.am index db788f3a7c8..5855a6aeae3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -51,6 +51,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_bfrops_v12_la_SOURCES = $(component_sources) mca_bfrops_v12_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_bfrops_v12_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_bfrops_v12_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/copy.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/copy.c index d833e4bdc86..c4eeeb14bbc 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/copy.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/copy.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -225,10 +225,8 @@ pmix_value_cmp_t pmix12_bfrop_value_cmp(pmix_value_t *p, pmix_value_t *p1) return PMIX_VALUE1_GREATER; } /* COPY FUNCTIONS FOR GENERIC PMIX TYPES */ -pmix_status_t pmix12_bfrop_value_xfer(pmix_value_t *p, pmix_value_t *src) +pmix_status_t pmix12_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src) { - pmix_info_t *p1, *s1; - /* copy the right field */ p->type = src->type; switch (src->type) { @@ -300,22 +298,7 @@ pmix_status_t pmix12_bfrop_value_xfer(pmix_value_t *p, pmix_value_t *src) p->data.tv.tv_usec = src->data.tv.tv_usec; break; case PMIX_INFO_ARRAY: - p->data.array = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); - if (NULL == p->data.array) { - return PMIX_ERR_NOMEM; - } - p->data.array->size = src->data.array->size; - if (0 < src->data.array->size) { - p->data.array->array = (pmix_info_t*)malloc(src->data.array->size * sizeof(pmix_info_t)); - if (NULL == p->data.array->array) { - free(p->data.array); - return PMIX_ERR_NOMEM; - } - p1 = (pmix_info_t*)p->data.array->array; - s1 = (pmix_info_t*)src->data.array->array; - memcpy(p1, s1, src->data.array->size * sizeof(pmix_info_t)); - } - break; + return PMIX_ERR_NOT_SUPPORTED; case PMIX_BYTE_OBJECT: if (NULL != src->data.bo.bytes && 0 < src->data.bo.size) { p->data.bo.bytes = malloc(src->data.bo.size); @@ -356,7 +339,7 @@ pmix_status_t pmix12_bfrop_copy_info(pmix_info_t **dest, pmix_info_t *src, pmix_data_type_t type) { *dest = (pmix_info_t*)malloc(sizeof(pmix_info_t)); - (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); + pmix_strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); return pmix_value_xfer(&(*dest)->value, &src->value); } @@ -381,7 +364,7 @@ pmix_status_t pmix12_bfrop_copy_app(pmix_app_t **dest, pmix_app_t *src, (*dest)->ninfo = src->ninfo; (*dest)->info = (pmix_info_t*)malloc(src->ninfo * sizeof(pmix_info_t)); for (j=0; j < src->ninfo; j++) { - (void)strncpy((*dest)->info[j].key, src->info[j].key, PMIX_MAX_KEYLEN); + pmix_strncpy((*dest)->info[j].key, src->info[j].key, PMIX_MAX_KEYLEN); pmix_value_xfer(&(*dest)->info[j].value, &src->info[j].value); } return PMIX_SUCCESS; @@ -427,7 +410,7 @@ pmix_status_t pmix12_bfrop_copy_proc(pmix_proc_t **dest, pmix_proc_t *src, if (NULL == *dest) { return PMIX_ERR_OUT_OF_RESOURCE; } - (void)strncpy((*dest)->nspace, src->nspace, PMIX_MAX_NSLEN); + pmix_strncpy((*dest)->nspace, src->nspace, PMIX_MAX_NSLEN); (*dest)->rank = src->rank; return PMIX_SUCCESS; } @@ -484,9 +467,9 @@ pmix_status_t pmix12_bfrop_copy_pdata(pmix_pdata_t **dest, pmix_data_type_t type) { *dest = (pmix_pdata_t*)malloc(sizeof(pmix_pdata_t)); - (void)strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); + pmix_strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); (*dest)->proc.rank = src->proc.rank; - (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); + pmix_strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); return pmix_value_xfer(&(*dest)->value, &src->value); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/internal.h b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/internal.h index 78dbf47666f..9c74c855685 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/internal.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/internal.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -41,6 +41,55 @@ BEGIN_C_DECLS +/* DEPRECATED data type values */ +#define PMIX_MODEX 29 +#define PMIX_INFO_ARRAY 44 + +/**** PMIX MODEX STRUCT - DEPRECATED ****/ +typedef struct pmix_modex_data { + char nspace[PMIX_MAX_NSLEN+1]; + int rank; + uint8_t *blob; + size_t size; +} pmix_modex_data_t; +/* utility macros for working with pmix_modex_t structs */ +#define PMIX_MODEX_CREATE(m, n) \ + do { \ + (m) = (pmix_modex_data_t*)calloc((n) , sizeof(pmix_modex_data_t)); \ + } while (0) + +#define PMIX_MODEX_RELEASE(m) \ + do { \ + PMIX_MODEX_DESTRUCT((m)); \ + free((m)); \ + (m) = NULL; \ + } while (0) + +#define PMIX_MODEX_CONSTRUCT(m) \ + do { \ + memset((m), 0, sizeof(pmix_modex_data_t)); \ + } while (0) + +#define PMIX_MODEX_DESTRUCT(m) \ + do { \ + if (NULL != (m)->blob) { \ + free((m)->blob); \ + (m)->blob = NULL; \ + } \ + } while (0) + +#define PMIX_MODEX_FREE(m, n) \ + do { \ + size_t _s; \ + if (NULL != (m)) { \ + for (_s=0; _s < (n); _s++) { \ + PMIX_MODEX_DESTRUCT(&((m)[_s])); \ + } \ + free((m)); \ + (m) = NULL; \ + } \ + } while (0) + /* * Implementations of API functions */ @@ -58,7 +107,7 @@ pmix_status_t pmix12_bfrop_print(char **output, char *prefix, void *src, pmix_da pmix_status_t pmix12_bfrop_copy_payload(pmix_buffer_t *dest, pmix_buffer_t *src); -pmix_status_t pmix12_bfrop_value_xfer(pmix_value_t *p, pmix_value_t *src); +pmix_status_t pmix12_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src); void pmix12_bfrop_value_load(pmix_value_t *v, const void *data, pmix_data_type_t type); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/pack.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/pack.c index 07f9a74a27d..efddd287de5 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/pack.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/pack.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -537,11 +537,6 @@ static pmix_status_t pack_val(pmix_buffer_t *buffer, return ret; } break; - case PMIX_INFO_ARRAY: - if (PMIX_SUCCESS != (ret = pmix12_bfrop_pack_buffer(buffer, &p->data.array, 1, PMIX_INFO_ARRAY))) { - return ret; - } - break; case PMIX_BYTE_OBJECT: if (PMIX_SUCCESS != (ret = pmix12_bfrop_pack_buffer(buffer, &p->data.bo, 1, PMIX_BYTE_OBJECT))) { return ret; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/unpack.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/unpack.c index 5604637f1b7..a001728ef02 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/unpack.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/unpack.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -670,7 +670,13 @@ static pmix_status_t unpack_val(pmix_buffer_t *buffer, pmix_value_t *val) } break; case PMIX_INFO_ARRAY: - if (PMIX_SUCCESS != (ret = pmix12_bfrop_unpack_buffer(buffer, &val->data.array, &m, PMIX_INFO_ARRAY))) { + /* we don't know anything about info array's so we + * have to convert this to a data array */ + val->data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); + val->data.darray->type = PMIX_INFO_ARRAY; + val->data.darray->size = m; + /* unpack into it */ + if (PMIX_SUCCESS != (ret = pmix12_bfrop_unpack_buffer(buffer, &val->data.darray->array, &m, PMIX_INFO_ARRAY))) { return ret; } break; @@ -743,7 +749,7 @@ pmix_status_t pmix12_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest, if (NULL == tmp) { return PMIX_ERROR; } - (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); + pmix_strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); free(tmp); /* unpack value - since the value structure is statically-defined * instead of a pointer in this struct, we directly unpack it to @@ -797,7 +803,7 @@ pmix_status_t pmix12_bfrop_unpack_pdata(pmix_buffer_t *buffer, void *dest, if (NULL == tmp) { return PMIX_ERROR; } - (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); + pmix_strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); free(tmp); /* unpack value - since the value structure is statically-defined * instead of a pointer in this struct, we directly unpack it to @@ -881,7 +887,7 @@ pmix_status_t pmix12_bfrop_unpack_proc(pmix_buffer_t *buffer, void *dest, if (NULL == tmp) { return PMIX_ERROR; } - (void)strncpy(ptr[i].nspace, tmp, PMIX_MAX_NSLEN); + pmix_strncpy(ptr[i].nspace, tmp, PMIX_MAX_NSLEN); free(tmp); /* unpack the rank */ m=1; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/Makefile.am index ffe16123206..42eb14940aa 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -51,6 +51,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_bfrops_v20_la_SOURCES = $(component_sources) mca_bfrops_v20_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_bfrops_v20_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_bfrops_v20_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/copy.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/copy.c index 53fbf9b264c..fbdbae3efff 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/copy.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/copy.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -337,7 +337,7 @@ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1) /* COPY FUNCTIONS FOR GENERIC PMIX TYPES - we * are not allocating memory and so we cannot * use the regular copy functions */ -pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, pmix_value_t *src) +pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src) { size_t n, m; pmix_status_t rc; @@ -356,518 +356,522 @@ pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, pmix_value_t *src) /* copy the right field */ p->type = src->type; switch (src->type) { - case PMIX_UNDEF: - break; - case PMIX_BOOL: - p->data.flag = src->data.flag; - break; - case PMIX_BYTE: - p->data.byte = src->data.byte; - break; - case PMIX_STRING: - if (NULL != src->data.string) { - p->data.string = strdup(src->data.string); - } else { - p->data.string = NULL; - } - break; - case PMIX_SIZE: - p->data.size = src->data.size; - break; - case PMIX_PID: - p->data.pid = src->data.pid; - break; - case PMIX_INT: - /* to avoid alignment issues */ - memcpy(&p->data.integer, &src->data.integer, sizeof(int)); - break; - case PMIX_INT8: - p->data.int8 = src->data.int8; - break; - case PMIX_INT16: - /* to avoid alignment issues */ - memcpy(&p->data.int16, &src->data.int16, 2); - break; - case PMIX_INT32: - /* to avoid alignment issues */ - memcpy(&p->data.int32, &src->data.int32, 4); - break; - case PMIX_INT64: - /* to avoid alignment issues */ - memcpy(&p->data.int64, &src->data.int64, 8); - break; - case PMIX_UINT: - /* to avoid alignment issues */ - memcpy(&p->data.uint, &src->data.uint, sizeof(unsigned int)); - break; - case PMIX_UINT8: - p->data.uint8 = src->data.uint8; - break; - case PMIX_UINT16: - /* to avoid alignment issues */ - memcpy(&p->data.uint16, &src->data.uint16, 2); - break; - case PMIX_UINT32: - /* to avoid alignment issues */ - memcpy(&p->data.uint32, &src->data.uint32, 4); - break; - case PMIX_UINT64: - /* to avoid alignment issues */ - memcpy(&p->data.uint64, &src->data.uint64, 8); - break; - case PMIX_FLOAT: - p->data.fval = src->data.fval; - break; - case PMIX_DOUBLE: - p->data.dval = src->data.dval; - break; - case PMIX_TIMEVAL: - memcpy(&p->data.tv, &src->data.tv, sizeof(struct timeval)); - break; - case PMIX_TIME: - memcpy(&p->data.time, &src->data.time, sizeof(time_t)); - break; - case PMIX_STATUS: - memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t)); - break; - case PMIX_PROC: - memcpy(&p->data.proc, &src->data.proc, sizeof(pmix_proc_t)); - break; - case PMIX_PROC_RANK: - memcpy(&p->data.proc, &src->data.rank, sizeof(pmix_rank_t)); - break; - case PMIX_BYTE_OBJECT: - case PMIX_COMPRESSED_STRING: - memset(&p->data.bo, 0, sizeof(pmix_byte_object_t)); - if (NULL != src->data.bo.bytes && 0 < src->data.bo.size) { - p->data.bo.bytes = malloc(src->data.bo.size); - memcpy(p->data.bo.bytes, src->data.bo.bytes, src->data.bo.size); - p->data.bo.size = src->data.bo.size; - } else { - p->data.bo.bytes = NULL; - p->data.bo.size = 0; - } - break; - case PMIX_PERSIST: - memcpy(&p->data.persist, &src->data.persist, sizeof(pmix_persistence_t)); - break; - case PMIX_SCOPE: - memcpy(&p->data.scope, &src->data.scope, sizeof(pmix_scope_t)); - break; - case PMIX_DATA_RANGE: - memcpy(&p->data.range, &src->data.range, sizeof(pmix_data_range_t)); - break; - case PMIX_PROC_STATE: - memcpy(&p->data.state, &src->data.state, sizeof(pmix_proc_state_t)); - break; - case PMIX_PROC_INFO: - PMIX_PROC_INFO_CREATE(p->data.pinfo, 1); - if (NULL != src->data.pinfo->hostname) { - p->data.pinfo->hostname = strdup(src->data.pinfo->hostname); - } - if (NULL != src->data.pinfo->executable_name) { - p->data.pinfo->executable_name = strdup(src->data.pinfo->executable_name); - } - memcpy(&p->data.pinfo->pid, &src->data.pinfo->pid, sizeof(pid_t)); - memcpy(&p->data.pinfo->exit_code, &src->data.pinfo->exit_code, sizeof(int)); - memcpy(&p->data.pinfo->state, &src->data.pinfo->state, sizeof(pmix_proc_state_t)); - break; - case PMIX_DATA_ARRAY: - p->data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); - p->data.darray->type = src->data.darray->type; - p->data.darray->size = src->data.darray->size; - if (0 == p->data.darray->size || NULL == src->data.darray->array) { - p->data.darray->array = NULL; - p->data.darray->size = 0; + case PMIX_UNDEF: break; - } - /* allocate space and do the copy */ - switch (src->data.darray->type) { - case PMIX_UINT8: - case PMIX_INT8: - case PMIX_BYTE: - p->data.darray->array = (char*)malloc(src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size); - break; - case PMIX_UINT16: - case PMIX_INT16: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint16_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint16_t)); - break; - case PMIX_UINT32: - case PMIX_INT32: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint32_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint32_t)); - break; - case PMIX_UINT64: - case PMIX_INT64: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint64_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint64_t)); - break; - case PMIX_BOOL: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(bool)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(bool)); - break; - case PMIX_SIZE: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(size_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(size_t)); - break; - case PMIX_PID: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(pid_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pid_t)); + case PMIX_BOOL: + p->data.flag = src->data.flag; + break; + case PMIX_BYTE: + p->data.byte = src->data.byte; + break; + case PMIX_STRING: + if (NULL != src->data.string) { + p->data.string = strdup(src->data.string); + } else { + p->data.string = NULL; + } + break; + case PMIX_SIZE: + p->data.size = src->data.size; + break; + case PMIX_PID: + p->data.pid = src->data.pid; + break; + case PMIX_INT: + /* to avoid alignment issues */ + memcpy(&p->data.integer, &src->data.integer, sizeof(int)); + break; + case PMIX_INT8: + p->data.int8 = src->data.int8; + break; + case PMIX_INT16: + /* to avoid alignment issues */ + memcpy(&p->data.int16, &src->data.int16, 2); + break; + case PMIX_INT32: + /* to avoid alignment issues */ + memcpy(&p->data.int32, &src->data.int32, 4); + break; + case PMIX_INT64: + /* to avoid alignment issues */ + memcpy(&p->data.int64, &src->data.int64, 8); + break; + case PMIX_UINT: + /* to avoid alignment issues */ + memcpy(&p->data.uint, &src->data.uint, sizeof(unsigned int)); + break; + case PMIX_UINT8: + p->data.uint8 = src->data.uint8; + break; + case PMIX_UINT16: + /* to avoid alignment issues */ + memcpy(&p->data.uint16, &src->data.uint16, 2); + break; + case PMIX_UINT32: + /* to avoid alignment issues */ + memcpy(&p->data.uint32, &src->data.uint32, 4); + break; + case PMIX_UINT64: + /* to avoid alignment issues */ + memcpy(&p->data.uint64, &src->data.uint64, 8); + break; + case PMIX_FLOAT: + p->data.fval = src->data.fval; + break; + case PMIX_DOUBLE: + p->data.dval = src->data.dval; + break; + case PMIX_TIMEVAL: + memcpy(&p->data.tv, &src->data.tv, sizeof(struct timeval)); + break; + case PMIX_TIME: + memcpy(&p->data.time, &src->data.time, sizeof(time_t)); + break; + case PMIX_STATUS: + memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t)); + break; + case PMIX_PROC: + /* create the storage */ + p->data.proc = (pmix_proc_t*)malloc(sizeof(pmix_proc_t)); + if (NULL == p->data.proc) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.proc, src->data.proc, sizeof(pmix_proc_t)); + break; + case PMIX_PROC_RANK: + memcpy(&p->data.rank, &src->data.rank, sizeof(pmix_rank_t)); + break; + case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: + memset(&p->data.bo, 0, sizeof(pmix_byte_object_t)); + if (NULL != src->data.bo.bytes && 0 < src->data.bo.size) { + p->data.bo.bytes = malloc(src->data.bo.size); + memcpy(p->data.bo.bytes, src->data.bo.bytes, src->data.bo.size); + p->data.bo.size = src->data.bo.size; + } else { + p->data.bo.bytes = NULL; + p->data.bo.size = 0; + } + break; + case PMIX_PERSIST: + memcpy(&p->data.persist, &src->data.persist, sizeof(pmix_persistence_t)); + break; + case PMIX_SCOPE: + memcpy(&p->data.scope, &src->data.scope, sizeof(pmix_scope_t)); + break; + case PMIX_DATA_RANGE: + memcpy(&p->data.range, &src->data.range, sizeof(pmix_data_range_t)); + break; + case PMIX_PROC_STATE: + memcpy(&p->data.state, &src->data.state, sizeof(pmix_proc_state_t)); + break; + case PMIX_PROC_INFO: + PMIX_PROC_INFO_CREATE(p->data.pinfo, 1); + if (NULL != src->data.pinfo->hostname) { + p->data.pinfo->hostname = strdup(src->data.pinfo->hostname); + } + if (NULL != src->data.pinfo->executable_name) { + p->data.pinfo->executable_name = strdup(src->data.pinfo->executable_name); + } + memcpy(&p->data.pinfo->pid, &src->data.pinfo->pid, sizeof(pid_t)); + memcpy(&p->data.pinfo->exit_code, &src->data.pinfo->exit_code, sizeof(int)); + memcpy(&p->data.pinfo->state, &src->data.pinfo->state, sizeof(pmix_proc_state_t)); + break; + case PMIX_DATA_ARRAY: + p->data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); + p->data.darray->type = src->data.darray->type; + p->data.darray->size = src->data.darray->size; + if (0 == p->data.darray->size || NULL == src->data.darray->array) { + p->data.darray->array = NULL; + p->data.darray->size = 0; break; - case PMIX_STRING: - p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - prarray = (char**)p->data.darray->array; - strarray = (char**)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != strarray[n]) { - prarray[n] = strdup(strarray[n]); + } + /* allocate space and do the copy */ + switch (src->data.darray->type) { + case PMIX_UINT8: + case PMIX_INT8: + case PMIX_BYTE: + p->data.darray->array = (char*)malloc(src->data.darray->size); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - } - break; - case PMIX_INT: - case PMIX_UINT: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(int)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(int)); - break; - case PMIX_FLOAT: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(float)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(float)); - break; - case PMIX_DOUBLE: - p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(double)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(double)); - break; - case PMIX_TIMEVAL: - p->data.darray->array = (struct timeval*)malloc(src->data.darray->size * sizeof(struct timeval)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(struct timeval)); - break; - case PMIX_TIME: - p->data.darray->array = (time_t*)malloc(src->data.darray->size * sizeof(time_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(time_t)); - break; - case PMIX_STATUS: - p->data.darray->array = (pmix_status_t*)malloc(src->data.darray->size * sizeof(pmix_status_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_status_t)); - break; - case PMIX_VALUE: - PMIX_VALUE_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pv = (pmix_value_t*)p->data.darray->array; - sv = (pmix_value_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (PMIX_SUCCESS != (rc = pmix20_bfrop_value_xfer(&pv[n], &sv[n]))) { - PMIX_VALUE_FREE(pv, src->data.darray->size); - return rc; + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size); + break; + case PMIX_UINT16: + case PMIX_INT16: + p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint16_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - } - break; - case PMIX_PROC: - PMIX_PROC_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_proc_t)); - break; - case PMIX_APP: - PMIX_APP_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pa = (pmix_app_t*)p->data.darray->array; - sa = (pmix_app_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != sa[n].cmd) { - pa[n].cmd = strdup(sa[n].cmd); + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint16_t)); + break; + case PMIX_UINT32: + case PMIX_INT32: + p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint32_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - if (NULL != sa[n].argv) { - pa[n].argv = pmix_argv_copy(sa[n].argv); + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint32_t)); + break; + case PMIX_UINT64: + case PMIX_INT64: + p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(uint64_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - if (NULL != sa[n].env) { - pa[n].env = pmix_argv_copy(sa[n].env); + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(uint64_t)); + break; + case PMIX_BOOL: + p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(bool)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - if (NULL != sa[n].cwd) { - pa[n].cwd = strdup(sa[n].cwd); + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(bool)); + break; + case PMIX_SIZE: + p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(size_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(size_t)); + break; + case PMIX_PID: + p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(pid_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - pa[n].maxprocs = sa[n].maxprocs; - if (0 < sa[n].ninfo && NULL != sa[n].info) { - PMIX_INFO_CREATE(pa[n].info, sa[n].ninfo); - if (NULL == pa[n].info) { - PMIX_APP_FREE(pa, src->data.darray->size); - return PMIX_ERR_NOMEM; + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pid_t)); + break; + case PMIX_STRING: + p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + prarray = (char**)p->data.darray->array; + strarray = (char**)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + if (NULL != strarray[n]) { + prarray[n] = strdup(strarray[n]); } - pa[n].ninfo = sa[n].ninfo; - for (m=0; m < pa[n].ninfo; m++) { - PMIX_INFO_XFER(&pa[n].info[m], &sa[n].info[m]); + } + break; + case PMIX_INT: + case PMIX_UINT: + p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(int)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(int)); + break; + case PMIX_FLOAT: + p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(float)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(float)); + break; + case PMIX_DOUBLE: + p->data.darray->array = (char*)malloc(src->data.darray->size * sizeof(double)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(double)); + break; + case PMIX_TIMEVAL: + p->data.darray->array = (struct timeval*)malloc(src->data.darray->size * sizeof(struct timeval)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(struct timeval)); + break; + case PMIX_TIME: + p->data.darray->array = (time_t*)malloc(src->data.darray->size * sizeof(time_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(time_t)); + break; + case PMIX_STATUS: + p->data.darray->array = (pmix_status_t*)malloc(src->data.darray->size * sizeof(pmix_status_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_status_t)); + break; + case PMIX_VALUE: + PMIX_VALUE_CREATE(p->data.darray->array, src->data.darray->size); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + pv = (pmix_value_t*)p->data.darray->array; + sv = (pmix_value_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + if (PMIX_SUCCESS != (rc = pmix20_bfrop_value_xfer(&pv[n], &sv[n]))) { + PMIX_VALUE_FREE(pv, src->data.darray->size); + return rc; } } - } - break; - case PMIX_INFO: - PMIX_INFO_CREATE(p->data.darray->array, src->data.darray->size); - p1 = (pmix_info_t*)p->data.darray->array; - s1 = (pmix_info_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - PMIX_INFO_LOAD(&p1[n], s1[n].key, &s1[n].value.data.flag, s1[n].value.type); - } - break; - case PMIX_PDATA: - PMIX_PDATA_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pd = (pmix_pdata_t*)p->data.darray->array; - sd = (pmix_pdata_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - PMIX_PDATA_LOAD(&pd[n], &sd[n].proc, sd[n].key, &sd[n].value.data.flag, sd[n].value.type); - } - break; - case PMIX_BUFFER: - p->data.darray->array = (pmix_buffer_t*)malloc(src->data.darray->size * sizeof(pmix_buffer_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pb = (pmix_buffer_t*)p->data.darray->array; - sb = (pmix_buffer_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - PMIX_CONSTRUCT(&pb[n], pmix_buffer_t); - pmix20_bfrop_copy_payload(&pb[n], &sb[n]); - } - break; - case PMIX_BYTE_OBJECT: - case PMIX_COMPRESSED_STRING: - p->data.darray->array = (pmix_byte_object_t*)malloc(src->data.darray->size * sizeof(pmix_byte_object_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pbo = (pmix_byte_object_t*)p->data.darray->array; - sbo = (pmix_byte_object_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != sbo[n].bytes && 0 < sbo[n].size) { - pbo[n].size = sbo[n].size; - pbo[n].bytes = (char*)malloc(pbo[n].size); - memcpy(pbo[n].bytes, sbo[n].bytes, pbo[n].size); - } else { - pbo[n].bytes = NULL; - pbo[n].size = 0; + break; + case PMIX_PROC: + PMIX_PROC_CREATE(p->data.darray->array, src->data.darray->size); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - } - break; - case PMIX_KVAL: - p->data.darray->array = (pmix_kval_t*)calloc(src->data.darray->size , sizeof(pmix_kval_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pk = (pmix_kval_t*)p->data.darray->array; - sk = (pmix_kval_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != sk[n].key) { - pk[n].key = strdup(sk[n].key); + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_proc_t)); + break; + case PMIX_APP: + PMIX_APP_CREATE(p->data.darray->array, src->data.darray->size); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - if (NULL != sk[n].value) { - PMIX_VALUE_CREATE(pk[n].value, 1); - if (NULL == pk[n].value) { - free(p->data.darray->array); - return PMIX_ERR_NOMEM; + pa = (pmix_app_t*)p->data.darray->array; + sa = (pmix_app_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + if (NULL != sa[n].cmd) { + pa[n].cmd = strdup(sa[n].cmd); + } + if (NULL != sa[n].argv) { + pa[n].argv = pmix_argv_copy(sa[n].argv); + } + if (NULL != sa[n].env) { + pa[n].env = pmix_argv_copy(sa[n].env); } - if (PMIX_SUCCESS != (rc = pmix20_bfrop_value_xfer(pk[n].value, sk[n].value))) { + if (NULL != sa[n].cwd) { + pa[n].cwd = strdup(sa[n].cwd); + } + pa[n].maxprocs = sa[n].maxprocs; + if (0 < sa[n].ninfo && NULL != sa[n].info) { + PMIX_INFO_CREATE(pa[n].info, sa[n].ninfo); + if (NULL == pa[n].info) { + PMIX_APP_FREE(pa, src->data.darray->size); + return PMIX_ERR_NOMEM; + } + pa[n].ninfo = sa[n].ninfo; + for (m=0; m < pa[n].ninfo; m++) { + PMIX_INFO_XFER(&pa[n].info[m], &sa[n].info[m]); + } + } + } + break; + case PMIX_INFO: + PMIX_INFO_CREATE(p->data.darray->array, src->data.darray->size); + p1 = (pmix_info_t*)p->data.darray->array; + s1 = (pmix_info_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + PMIX_LOAD_KEY(p1[n].key, s1[n].key); + rc = pmix_value_xfer(&p1[n].value, &s1[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_INFO_FREE(p1, src->data.darray->size); return rc; } } - } - break; - case PMIX_MODEX: - PMIX_MODEX_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pm = (pmix_modex_data_t*)p->data.darray->array; - sm = (pmix_modex_data_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - memcpy(&pm[n], &sm[n], sizeof(pmix_modex_data_t)); - if (NULL != sm[n].blob && 0 < sm[n].size) { - pm[n].blob = (uint8_t*)malloc(sm[n].size); - if (NULL == pm[n].blob) { - return PMIX_ERR_NOMEM; + break; + case PMIX_PDATA: + PMIX_PDATA_CREATE(p->data.darray->array, src->data.darray->size); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + pd = (pmix_pdata_t*)p->data.darray->array; + sd = (pmix_pdata_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + memcpy(&pd[n].proc, &sd[n].proc, sizeof(pmix_proc_t)); + PMIX_LOAD_KEY(pd[n].key, sd[n].key); + rc = pmix_value_xfer(&pd[n].value, &sd[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_INFO_FREE(pd, src->data.darray->size); + return rc; } - memcpy(pm[n].blob, sm[n].blob, sm[n].size); - pm[n].size = sm[n].size; - } else { - pm[n].blob = NULL; - pm[n].size = 0; } - } - break; - case PMIX_PERSIST: - p->data.darray->array = (pmix_persistence_t*)malloc(src->data.darray->size * sizeof(pmix_persistence_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_persistence_t)); - break; - case PMIX_POINTER: - p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - prarray = (char**)p->data.darray->array; - strarray = (char**)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - prarray[n] = strarray[n]; - } - break; - case PMIX_SCOPE: - p->data.darray->array = (pmix_scope_t*)malloc(src->data.darray->size * sizeof(pmix_scope_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_scope_t)); - break; - case PMIX_DATA_RANGE: - p->data.darray->array = (pmix_data_range_t*)malloc(src->data.darray->size * sizeof(pmix_data_range_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_data_range_t)); - break; - case PMIX_COMMAND: - p->data.darray->array = (pmix_cmd_t*)malloc(src->data.darray->size * sizeof(pmix_cmd_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_cmd_t)); - break; - case PMIX_INFO_DIRECTIVES: - p->data.darray->array = (pmix_info_directives_t*)malloc(src->data.darray->size * sizeof(pmix_info_directives_t)); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_info_directives_t)); - break; - case PMIX_PROC_INFO: - PMIX_PROC_INFO_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pi = (pmix_proc_info_t*)p->data.darray->array; - si = (pmix_proc_info_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - memcpy(&pi[n].proc, &si[n].proc, sizeof(pmix_proc_t)); - if (NULL != si[n].hostname) { - pi[n].hostname = strdup(si[n].hostname); - } else { - pi[n].hostname = NULL; + break; + case PMIX_BUFFER: + p->data.darray->array = (pmix_buffer_t*)malloc(src->data.darray->size * sizeof(pmix_buffer_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - if (NULL != si[n].executable_name) { - pi[n].executable_name = strdup(si[n].executable_name); - } else { - pi[n].executable_name = NULL; + pb = (pmix_buffer_t*)p->data.darray->array; + sb = (pmix_buffer_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + PMIX_CONSTRUCT(&pb[n], pmix_buffer_t); + pmix20_bfrop_copy_payload(&pb[n], &sb[n]); } - pi[n].pid = si[n].pid; - pi[n].exit_code = si[n].exit_code; - pi[n].state = si[n].state; - } - break; - case PMIX_DATA_ARRAY: - return PMIX_ERR_NOT_SUPPORTED; // don't support iterative arrays - case PMIX_QUERY: - PMIX_QUERY_CREATE(p->data.darray->array, src->data.darray->size); - if (NULL == p->data.darray->array) { - return PMIX_ERR_NOMEM; - } - pq = (pmix_query_t*)p->data.darray->array; - sq = (pmix_query_t*)src->data.darray->array; - for (n=0; n < src->data.darray->size; n++) { - if (NULL != sq[n].keys) { - pq[n].keys = pmix_argv_copy(sq[n].keys); + break; + case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: + p->data.darray->array = (pmix_byte_object_t*)malloc(src->data.darray->size * sizeof(pmix_byte_object_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; } - if (NULL != sq[n].qualifiers && 0 < sq[n].nqual) { - PMIX_INFO_CREATE(pq[n].qualifiers, sq[n].nqual); - if (NULL == pq[n].qualifiers) { - PMIX_QUERY_FREE(pq, src->data.darray->size); - return PMIX_ERR_NOMEM; + pbo = (pmix_byte_object_t*)p->data.darray->array; + sbo = (pmix_byte_object_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + if (NULL != sbo[n].bytes && 0 < sbo[n].size) { + pbo[n].size = sbo[n].size; + pbo[n].bytes = (char*)malloc(pbo[n].size); + memcpy(pbo[n].bytes, sbo[n].bytes, pbo[n].size); + } else { + pbo[n].bytes = NULL; + pbo[n].size = 0; } - for (m=0; m < sq[n].nqual; m++) { - PMIX_INFO_XFER(&pq[n].qualifiers[m], &sq[n].qualifiers[m]); + } + break; + case PMIX_KVAL: + p->data.darray->array = (pmix_kval_t*)calloc(src->data.darray->size , sizeof(pmix_kval_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + pk = (pmix_kval_t*)p->data.darray->array; + sk = (pmix_kval_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + if (NULL != sk[n].key) { + pk[n].key = strdup(sk[n].key); + } + if (NULL != sk[n].value) { + PMIX_VALUE_CREATE(pk[n].value, 1); + if (NULL == pk[n].value) { + free(p->data.darray->array); + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (rc = pmix20_bfrop_value_xfer(pk[n].value, sk[n].value))) { + return rc; + } } - pq[n].nqual = sq[n].nqual; - } else { - pq[n].qualifiers = NULL; - pq[n].nqual = 0; } - } - break; - default: - return PMIX_ERR_UNKNOWN_DATA_TYPE; - } - break; - case PMIX_POINTER: - memcpy(&p->data.ptr, &src->data.ptr, sizeof(void*)); - break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - p->data.array->size = src->data.array->size; - if (0 < src->data.array->size) { - p->data.array->array = (pmix_info_t*)malloc(src->data.array->size * sizeof(pmix_info_t)); - if (NULL == p->data.array->array) { - return PMIX_ERR_NOMEM; - } - p1 = (pmix_info_t*)p->data.array->array; - s1 = (pmix_info_t*)src->data.array->array; - for (n=0; n < src->data.darray->size; n++) { - PMIX_INFO_LOAD(&p1[n], s1[n].key, &s1[n].value.data.flag, s1[n].value.type); + break; + case PMIX_MODEX: + PMIX_MODEX_CREATE(p->data.darray->array, src->data.darray->size); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + pm = (pmix_modex_data_t*)p->data.darray->array; + sm = (pmix_modex_data_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + memcpy(&pm[n], &sm[n], sizeof(pmix_modex_data_t)); + if (NULL != sm[n].blob && 0 < sm[n].size) { + pm[n].blob = (uint8_t*)malloc(sm[n].size); + if (NULL == pm[n].blob) { + return PMIX_ERR_NOMEM; + } + memcpy(pm[n].blob, sm[n].blob, sm[n].size); + pm[n].size = sm[n].size; + } else { + pm[n].blob = NULL; + pm[n].size = 0; + } + } + break; + case PMIX_PERSIST: + p->data.darray->array = (pmix_persistence_t*)malloc(src->data.darray->size * sizeof(pmix_persistence_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_persistence_t)); + break; + case PMIX_POINTER: + p->data.darray->array = (char**)malloc(src->data.darray->size * sizeof(char*)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + prarray = (char**)p->data.darray->array; + strarray = (char**)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + prarray[n] = strarray[n]; + } + break; + case PMIX_SCOPE: + p->data.darray->array = (pmix_scope_t*)malloc(src->data.darray->size * sizeof(pmix_scope_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_scope_t)); + break; + case PMIX_DATA_RANGE: + p->data.darray->array = (pmix_data_range_t*)malloc(src->data.darray->size * sizeof(pmix_data_range_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_data_range_t)); + break; + case PMIX_COMMAND: + p->data.darray->array = (pmix_cmd_t*)malloc(src->data.darray->size * sizeof(pmix_cmd_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_cmd_t)); + break; + case PMIX_INFO_DIRECTIVES: + p->data.darray->array = (pmix_info_directives_t*)malloc(src->data.darray->size * sizeof(pmix_info_directives_t)); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.darray->array, src->data.darray->array, src->data.darray->size * sizeof(pmix_info_directives_t)); + break; + case PMIX_PROC_INFO: + PMIX_PROC_INFO_CREATE(p->data.darray->array, src->data.darray->size); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + pi = (pmix_proc_info_t*)p->data.darray->array; + si = (pmix_proc_info_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + memcpy(&pi[n].proc, &si[n].proc, sizeof(pmix_proc_t)); + if (NULL != si[n].hostname) { + pi[n].hostname = strdup(si[n].hostname); + } else { + pi[n].hostname = NULL; + } + if (NULL != si[n].executable_name) { + pi[n].executable_name = strdup(si[n].executable_name); + } else { + pi[n].executable_name = NULL; + } + pi[n].pid = si[n].pid; + pi[n].exit_code = si[n].exit_code; + pi[n].state = si[n].state; + } + break; + case PMIX_DATA_ARRAY: + return PMIX_ERR_NOT_SUPPORTED; // don't support iterative arrays + case PMIX_QUERY: + PMIX_QUERY_CREATE(p->data.darray->array, src->data.darray->size); + if (NULL == p->data.darray->array) { + return PMIX_ERR_NOMEM; + } + pq = (pmix_query_t*)p->data.darray->array; + sq = (pmix_query_t*)src->data.darray->array; + for (n=0; n < src->data.darray->size; n++) { + if (NULL != sq[n].keys) { + pq[n].keys = pmix_argv_copy(sq[n].keys); + } + if (NULL != sq[n].qualifiers && 0 < sq[n].nqual) { + PMIX_INFO_CREATE(pq[n].qualifiers, sq[n].nqual); + if (NULL == pq[n].qualifiers) { + PMIX_QUERY_FREE(pq, src->data.darray->size); + return PMIX_ERR_NOMEM; + } + for (m=0; m < sq[n].nqual; m++) { + PMIX_INFO_XFER(&pq[n].qualifiers[m], &sq[n].qualifiers[m]); + } + pq[n].nqual = sq[n].nqual; + } else { + pq[n].qualifiers = NULL; + pq[n].nqual = 0; + } + } + break; + default: + return PMIX_ERR_UNKNOWN_DATA_TYPE; } - } - break; - /********************/ - default: - pmix_output(0, "COPY-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)src->type); - return PMIX_ERROR; + break; + case PMIX_POINTER: + memcpy(&p->data.ptr, &src->data.ptr, sizeof(void*)); + break; + /**** DEPRECATED ****/ + case PMIX_INFO_ARRAY: + return PMIX_ERR_NOT_SUPPORTED; + /********************/ + default: + pmix_output(0, "COPY-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)src->type); + return PMIX_ERROR; } return PMIX_SUCCESS; } @@ -895,7 +899,7 @@ pmix_status_t pmix20_bfrop_copy_info(pmix_info_t **dest, pmix_info_t *src, pmix_data_type_t type) { *dest = (pmix_info_t*)malloc(sizeof(pmix_info_t)); - (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); + pmix_strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); (*dest)->flags = src->flags; return pmix20_bfrop_value_xfer(&(*dest)->value, &src->value); } @@ -924,7 +928,7 @@ pmix_status_t pmix20_bfrop_copy_app(pmix_app_t **dest, pmix_app_t *src, (*dest)->ninfo = src->ninfo; (*dest)->info = (pmix_info_t*)malloc(src->ninfo * sizeof(pmix_info_t)); for (j=0; j < src->ninfo; j++) { - (void)strncpy((*dest)->info[j].key, src->info[j].key, PMIX_MAX_KEYLEN); + pmix_strncpy((*dest)->info[j].key, src->info[j].key, PMIX_MAX_KEYLEN); pmix20_bfrop_value_xfer(&(*dest)->info[j].value, &src->info[j].value); } return PMIX_SUCCESS; @@ -955,7 +959,7 @@ pmix_status_t pmix20_bfrop_copy_proc(pmix_proc_t **dest, pmix_proc_t *src, if (NULL == *dest) { return PMIX_ERR_OUT_OF_RESOURCE; } - (void)strncpy((*dest)->nspace, src->nspace, PMIX_MAX_NSLEN); + pmix_strncpy((*dest)->nspace, src->nspace, PMIX_MAX_NSLEN); (*dest)->rank = src->rank; return PMIX_SUCCESS; } @@ -1008,9 +1012,9 @@ pmix_status_t pmix20_bfrop_copy_pdata(pmix_pdata_t **dest, pmix_pdata_t *src, pmix_data_type_t type) { *dest = (pmix_pdata_t*)malloc(sizeof(pmix_pdata_t)); - (void)strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); + pmix_strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); (*dest)->proc.rank = src->proc.rank; - (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); + pmix_strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); return pmix20_bfrop_value_xfer(&(*dest)->value, &src->value); } @@ -1018,7 +1022,7 @@ pmix_status_t pmix20_bfrop_copy_pinfo(pmix_proc_info_t **dest, pmix_proc_info_t pmix_data_type_t type) { *dest = (pmix_proc_info_t*)malloc(sizeof(pmix_proc_info_t)); - (void)strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); + pmix_strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); (*dest)->proc.rank = src->proc.rank; if (NULL != src->hostname) { (*dest)->hostname = strdup(src->hostname); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/internal.h b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/internal.h index d85ac2985c2..1478687f6c7 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/internal.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/internal.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -42,6 +42,55 @@ BEGIN_C_DECLS +/* DEPRECATED data type values */ +#define PMIX_MODEX 29 +#define PMIX_INFO_ARRAY 44 + +/**** PMIX MODEX STRUCT - DEPRECATED ****/ +typedef struct pmix_modex_data { + char nspace[PMIX_MAX_NSLEN+1]; + int rank; + uint8_t *blob; + size_t size; +} pmix_modex_data_t; +/* utility macros for working with pmix_modex_t structs */ +#define PMIX_MODEX_CREATE(m, n) \ + do { \ + (m) = (pmix_modex_data_t*)calloc((n) , sizeof(pmix_modex_data_t)); \ + } while (0) + +#define PMIX_MODEX_RELEASE(m) \ + do { \ + PMIX_MODEX_DESTRUCT((m)); \ + free((m)); \ + (m) = NULL; \ + } while (0) + +#define PMIX_MODEX_CONSTRUCT(m) \ + do { \ + memset((m), 0, sizeof(pmix_modex_data_t)); \ + } while (0) + +#define PMIX_MODEX_DESTRUCT(m) \ + do { \ + if (NULL != (m)->blob) { \ + free((m)->blob); \ + (m)->blob = NULL; \ + } \ + } while (0) + +#define PMIX_MODEX_FREE(m, n) \ + do { \ + size_t _s; \ + if (NULL != (m)) { \ + for (_s=0; _s < (n); _s++) { \ + PMIX_MODEX_DESTRUCT(&((m)[_s])); \ + } \ + free((m)); \ + (m) = NULL; \ + } \ + } while (0) + /* * Implementations of API functions */ @@ -59,7 +108,7 @@ pmix_status_t pmix20_bfrop_print(char **output, char *prefix, void *src, pmix_da pmix_status_t pmix20_bfrop_copy_payload(pmix_buffer_t *dest, pmix_buffer_t *src); -pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, pmix_value_t *src); +pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src); void pmix20_bfrop_value_load(pmix_value_t *v, const void *data, pmix_data_type_t type); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/pack.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/pack.c index 91636204497..6aa194b8b97 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/pack.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/pack.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -613,13 +613,6 @@ static pmix_status_t pack_val(pmix_buffer_t *buffer, return ret; } break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - if (PMIX_SUCCESS != (ret = pmix20_bfrop_pack_buffer(buffer, p->data.array, 1, PMIX_INFO_ARRAY))) { - return ret; - } - break; - /********************/ default: pmix_output(0, "PACK-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)p->type); return PMIX_ERROR; @@ -1035,10 +1028,9 @@ pmix_status_t pmix20_bfrop_pack_alloc_directive(pmix_buffer_t *buffer, const voi return pmix20_bfrop_pack_byte(buffer, src, num_vals, PMIX_UINT8); } - /**** DEPRECATED ****/ pmix_status_t pmix20_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) + int32_t num_vals, pmix_data_type_t type) { pmix_info_array_t *ptr; int32_t i; @@ -1048,12 +1040,12 @@ pmix_status_t pmix20_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, for (i = 0; i < num_vals; ++i) { /* pack the size */ - if (PMIX_SUCCESS != (ret = pmix20_bfrop_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { return ret; } if (0 < ptr[i].size) { /* pack the values */ - if (PMIX_SUCCESS != (ret = pmix20_bfrop_pack_info(buffer, ptr[i].array, ptr[i].size, PMIX_INFO))) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_info(buffer, ptr[i].array, ptr[i].size, PMIX_INFO))) { return ret; } } @@ -1062,3 +1054,4 @@ pmix_status_t pmix20_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, return PMIX_SUCCESS; } /********************/ + diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/print.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/print.c index 500bd87f1c1..a9009be364d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/print.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/print.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -761,7 +761,7 @@ pmix_status_t pmix20_bfrop_print_status(char **output, char *prefix, * PMIX_VALUE */ pmix_status_t pmix20_bfrop_print_value(char **output, char *prefix, - pmix_value_t *src, pmix_data_type_t type) + pmix_value_t *src, pmix_data_type_t type) { char *prefx; int rc; @@ -904,12 +904,6 @@ pmix_status_t pmix20_bfrop_print_status(char **output, char *prefix, rc = asprintf(output, "%sPMIX_VALUE: Data type: DATA_ARRAY\tARRAY SIZE: %ld", prefx, (long)src->data.darray->size); break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - rc = asprintf(output, "%sPMIX_VALUE: Data type: INFO_ARRAY\tARRAY SIZE: %ld", - prefx, (long)src->data.array->size); - break; - /********************/ default: rc = asprintf(output, "%sPMIX_VALUE: Data type: UNKNOWN\tValue: UNPRINTABLE", prefx); break; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/unpack.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/unpack.c index f812a005bc1..91ce264fd30 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/unpack.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/unpack.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -756,12 +756,13 @@ pmix_status_t pmix20_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, break; /**** DEPRECATED ****/ case PMIX_INFO_ARRAY: - /* this field is now a pointer, so we must allocate storage for it */ - val->data.array = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); - if (NULL == val->data.array) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix20_bfrop_unpack_buffer(buffer, val->data.array, &m, PMIX_INFO_ARRAY))) { + /* we don't know anything about info array's so we + * have to convert this to a data array */ + val->data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); + val->data.darray->type = PMIX_INFO_ARRAY; + val->data.darray->size = m; + /* unpack into it */ + if (PMIX_SUCCESS != (ret = pmix20_bfrop_unpack_buffer(buffer, &val->data.darray->array, &m, PMIX_INFO_ARRAY))) { return ret; } break; @@ -825,7 +826,7 @@ pmix_status_t pmix20_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest, PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; } - (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); + pmix_strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); free(tmp); /* unpack the flags */ m=1; @@ -882,7 +883,7 @@ pmix_status_t pmix20_bfrop_unpack_pdata(pmix_buffer_t *buffer, void *dest, if (NULL == tmp) { return PMIX_ERROR; } - (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); + pmix_strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); free(tmp); /* unpack value - since the value structure is statically-defined * instead of a pointer in this struct, we directly unpack it to @@ -962,7 +963,7 @@ pmix_status_t pmix20_bfrop_unpack_proc(pmix_buffer_t *buffer, void *dest, if (NULL == tmp) { return PMIX_ERROR; } - (void)strncpy(ptr[i].nspace, tmp, PMIX_MAX_NSLEN); + pmix_strncpy(ptr[i].nspace, tmp, PMIX_MAX_NSLEN); free(tmp); /* unpack the rank */ m=1; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/Makefile.am index d4da3258b44..1658de9d1db 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_bfrops_v21_la_SOURCES = $(component_sources) mca_bfrops_v21_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_bfrops_v21_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_bfrops_v21_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/bfrop_pmix21.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/bfrop_pmix21.c index adcc3cba565..8100b70ed64 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/bfrop_pmix21.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/bfrop_pmix21.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -63,6 +63,37 @@ pmix_bfrops_module_t pmix_bfrops_pmix21_module = { .data_type_string = data_type_string }; +/* DEPRECATED data type values */ +#define PMIX_MODEX 29 +#define PMIX_INFO_ARRAY 44 + +/**** PMIX MODEX STRUCT - DEPRECATED ****/ +typedef struct pmix_modex_data { + char nspace[PMIX_MAX_NSLEN+1]; + int rank; + uint8_t *blob; + size_t size; +} pmix_modex_data_t; + +static pmix_status_t pmix21_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +static pmix_status_t pmix21_bfrop_pack_modex(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +static pmix_status_t pmix21_bfrop_unpack_array(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +static pmix_status_t pmix21_bfrop_unpack_modex(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +static pmix_status_t pmix21_bfrop_copy_array(pmix_info_array_t **dest, + pmix_info_array_t *src, + pmix_data_type_t type); +static pmix_status_t pmix21_bfrop_copy_modex(pmix_modex_data_t **dest, pmix_modex_data_t *src, + pmix_data_type_t type); +static pmix_status_t pmix21_bfrop_print_array(char **output, char *prefix, + pmix_info_array_t *src, pmix_data_type_t type); +static pmix_status_t pmix21_bfrop_print_modex(char **output, char *prefix, + pmix_modex_data_t *src, pmix_data_type_t type); + + static pmix_status_t init(void) { /* some standard types don't require anything special */ @@ -265,10 +296,10 @@ static pmix_status_t init(void) &mca_bfrops_v21_component.types); PMIX_REGISTER_TYPE("PMIX_MODEX", PMIX_MODEX, - pmix_bfrops_base_pack_modex, - pmix_bfrops_base_unpack_modex, - pmix_bfrops_base_copy_modex, - pmix_bfrops_base_print_modex, + pmix21_bfrop_pack_modex, + pmix21_bfrop_unpack_modex, + pmix21_bfrop_copy_modex, + pmix21_bfrop_print_modex, &mca_bfrops_v21_component.types); /* these are fixed-sized values and can be done by base */ @@ -374,10 +405,10 @@ static pmix_status_t init(void) /**** DEPRECATED ****/ PMIX_REGISTER_TYPE("PMIX_INFO_ARRAY", PMIX_INFO_ARRAY, - pmix_bfrops_base_pack_array, - pmix_bfrops_base_unpack_array, - pmix_bfrops_base_copy_array, - pmix_bfrops_base_print_array, + pmix21_bfrop_pack_array, + pmix21_bfrop_unpack_array, + pmix21_bfrop_copy_array, + pmix21_bfrop_print_array, &mca_bfrops_v21_component.types); /********************/ @@ -446,3 +477,198 @@ static const char* data_type_string(pmix_data_type_t type) { return pmix_bfrops_base_data_type_string(&mca_bfrops_v21_component.types, type); } + +/**** DEPRECATED ****/ +static pmix_status_t pmix21_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_info_array_t *ptr; + int32_t i; + pmix_status_t ret; + + ptr = (pmix_info_array_t *) src; + + for (i = 0; i < num_vals; ++i) { + /* pack the size */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + /* pack the values */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_info(buffer, ptr[i].array, ptr[i].size, PMIX_INFO))) { + return ret; + } + } + } + + return PMIX_SUCCESS; +} + +static pmix_status_t pmix21_bfrop_pack_modex(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_modex_data_t *ptr; + int32_t i; + pmix_status_t ret; + + ptr = (pmix_modex_data_t *) src; + + for (i = 0; i < num_vals; ++i) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { + return ret; + } + if( 0 < ptr[i].size){ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, ptr[i].blob, ptr[i].size, PMIX_UINT8))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +/********************/ + +/**** DEPRECATED ****/ +static pmix_status_t pmix21_bfrop_unpack_array(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_info_array_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, + "pmix21_bfrop_unpack: %d info arrays", *num_vals); + + ptr = (pmix_info_array_t*) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, + "pmix21_bfrop_unpack: init array[%d]", i); + memset(&ptr[i], 0, sizeof(pmix_info_array_t)); + /* unpack the size of this array */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + ptr[i].array = (pmix_info_t*)malloc(ptr[i].size * sizeof(pmix_info_t)); + m=ptr[i].size; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_value(buffer, ptr[i].array, &m, PMIX_INFO))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +static pmix_status_t pmix21_bfrop_unpack_modex(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_modex_data_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, + "pmix20_bfrop_unpack: %d modex", *num_vals); + + ptr = (pmix_modex_data_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + memset(&ptr[i], 0, sizeof(pmix_modex_data_t)); + /* unpack the number of bytes */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + ptr[i].blob = (uint8_t*)malloc(ptr[i].size * sizeof(uint8_t)); + m=ptr[i].size; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, ptr[i].blob, &m, PMIX_UINT8))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +/********************/ + +/**** DEPRECATED ****/ +static pmix_status_t pmix21_bfrop_copy_array(pmix_info_array_t **dest, + pmix_info_array_t *src, + pmix_data_type_t type) +{ + pmix_info_t *d1, *s1; + + *dest = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); + (*dest)->size = src->size; + (*dest)->array = (pmix_info_t*)malloc(src->size * sizeof(pmix_info_t)); + d1 = (pmix_info_t*)(*dest)->array; + s1 = (pmix_info_t*)src->array; + memcpy(d1, s1, src->size * sizeof(pmix_info_t)); + return PMIX_SUCCESS; +} + +static pmix_status_t pmix21_bfrop_copy_modex(pmix_modex_data_t **dest, pmix_modex_data_t *src, + pmix_data_type_t type) +{ + *dest = (pmix_modex_data_t*)malloc(sizeof(pmix_modex_data_t)); + if (NULL == *dest) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + (*dest)->blob = NULL; + (*dest)->size = 0; + if (NULL != src->blob) { + (*dest)->blob = (uint8_t*)malloc(src->size * sizeof(uint8_t)); + if (NULL == (*dest)->blob) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + memcpy((*dest)->blob, src->blob, src->size * sizeof(uint8_t)); + (*dest)->size = src->size; + } + return PMIX_SUCCESS; +} + +/*******************/ + +/**** DEPRECATED ****/ +static pmix_status_t pmix21_bfrop_print_array(char **output, char *prefix, + pmix_info_array_t *src, pmix_data_type_t type) +{ + size_t j; + char *tmp, *tmp2, *tmp3, *pfx; + pmix_info_t *s1; + + if (0 > asprintf(&tmp, "%sARRAY SIZE: %ld", prefix, (long)src->size)) { + return PMIX_ERR_NOMEM; + } + if (0 > asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix)) { + free(tmp); + return PMIX_ERR_NOMEM; + } + s1 = (pmix_info_t*)src->array; + + for (j=0; j < src->size; j++) { + pmix_bfrops_base_print_info(&tmp2, pfx, &s1[j], PMIX_INFO); + if (0 > asprintf(&tmp3, "%s%s", tmp, tmp2)) { + free(tmp); + free(tmp2); + return PMIX_ERR_NOMEM; + } + free(tmp); + free(tmp2); + tmp = tmp3; + } + *output = tmp; + return PMIX_SUCCESS; +} + +static pmix_status_t pmix21_bfrop_print_modex(char **output, char *prefix, + pmix_modex_data_t *src, pmix_data_type_t type) +{ + return PMIX_SUCCESS; +} + +/********************/ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/Makefile.am index 14438eac7f1..d14a13258ff 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_bfrops_v3_la_SOURCES = $(component_sources) mca_bfrops_v3_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_bfrops_v3_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_bfrops_v3_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/bfrop_pmix3.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/bfrop_pmix3.c index 828cb39ac63..eac138a80dc 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/bfrop_pmix3.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/bfrop_pmix3.c @@ -63,6 +63,37 @@ pmix_bfrops_module_t pmix_bfrops_pmix3_module = { .data_type_string = data_type_string }; +/* DEPRECATED data type values */ +#define PMIX_MODEX 29 +#define PMIX_INFO_ARRAY 44 + +/**** PMIX MODEX STRUCT - DEPRECATED ****/ +typedef struct pmix_modex_data { + char nspace[PMIX_MAX_NSLEN+1]; + int rank; + uint8_t *blob; + size_t size; +} pmix_modex_data_t; + +static pmix_status_t pmix3_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +static pmix_status_t pmix3_bfrop_pack_modex(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +static pmix_status_t pmix3_bfrop_unpack_array(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +static pmix_status_t pmix3_bfrop_unpack_modex(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +static pmix_status_t pmix3_bfrop_copy_array(pmix_info_array_t **dest, + pmix_info_array_t *src, + pmix_data_type_t type); +static pmix_status_t pmix3_bfrop_copy_modex(pmix_modex_data_t **dest, pmix_modex_data_t *src, + pmix_data_type_t type); +static pmix_status_t pmix3_bfrop_print_array(char **output, char *prefix, + pmix_info_array_t *src, pmix_data_type_t type); +static pmix_status_t pmix3_bfrop_print_modex(char **output, char *prefix, + pmix_modex_data_t *src, pmix_data_type_t type); + + static pmix_status_t init(void) { /* some standard types don't require anything special */ @@ -265,10 +296,10 @@ static pmix_status_t init(void) &mca_bfrops_v3_component.types); PMIX_REGISTER_TYPE("PMIX_MODEX", PMIX_MODEX, - pmix_bfrops_base_pack_modex, - pmix_bfrops_base_unpack_modex, - pmix_bfrops_base_copy_modex, - pmix_bfrops_base_print_modex, + pmix3_bfrop_pack_modex, + pmix3_bfrop_unpack_modex, + pmix3_bfrop_copy_modex, + pmix3_bfrop_print_modex, &mca_bfrops_v3_component.types); /* these are fixed-sized values and can be done by base */ @@ -390,10 +421,10 @@ static pmix_status_t init(void) /**** DEPRECATED ****/ PMIX_REGISTER_TYPE("PMIX_INFO_ARRAY", PMIX_INFO_ARRAY, - pmix_bfrops_base_pack_array, - pmix_bfrops_base_unpack_array, - pmix_bfrops_base_copy_array, - pmix_bfrops_base_print_array, + pmix3_bfrop_pack_array, + pmix3_bfrop_unpack_array, + pmix3_bfrop_copy_array, + pmix3_bfrop_print_array, &mca_bfrops_v3_component.types); /********************/ @@ -462,3 +493,201 @@ static const char* data_type_string(pmix_data_type_t type) { return pmix_bfrops_base_data_type_string(&mca_bfrops_v3_component.types, type); } + +/**** DEPRECATED ****/ +static pmix_status_t pmix3_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_info_array_t *ptr; + int32_t i; + pmix_status_t ret; + + ptr = (pmix_info_array_t *) src; + + for (i = 0; i < num_vals; ++i) { + /* pack the size */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + /* pack the values */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_info(buffer, ptr[i].array, ptr[i].size, PMIX_INFO))) { + return ret; + } + } + } + + return PMIX_SUCCESS; +} + +static pmix_status_t pmix3_bfrop_pack_modex(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_modex_data_t *ptr; + int32_t i; + pmix_status_t ret; + + ptr = (pmix_modex_data_t *) src; + + for (i = 0; i < num_vals; ++i) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { + return ret; + } + if( 0 < ptr[i].size){ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, ptr[i].blob, ptr[i].size, PMIX_UINT8))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + + +/********************/ + +/**** DEPRECATED ****/ +static pmix_status_t pmix3_bfrop_unpack_array(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_info_array_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, + "pmix3_bfrop_unpack: %d info arrays", *num_vals); + + ptr = (pmix_info_array_t*) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, + "pmix3_bfrop_unpack: init array[%d]", i); + memset(&ptr[i], 0, sizeof(pmix_info_array_t)); + /* unpack the size of this array */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + ptr[i].array = (pmix_info_t*)malloc(ptr[i].size * sizeof(pmix_info_t)); + m=ptr[i].size; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_value(buffer, ptr[i].array, &m, PMIX_INFO))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +static pmix_status_t pmix3_bfrop_unpack_modex(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_modex_data_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_bfrops_base_framework.framework_output, + "pmix20_bfrop_unpack: %d modex", *num_vals); + + ptr = (pmix_modex_data_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + memset(&ptr[i], 0, sizeof(pmix_modex_data_t)); + /* unpack the number of bytes */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + ptr[i].blob = (uint8_t*)malloc(ptr[i].size * sizeof(uint8_t)); + m=ptr[i].size; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, ptr[i].blob, &m, PMIX_UINT8))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + + +/********************/ + +/**** DEPRECATED ****/ +static pmix_status_t pmix3_bfrop_copy_array(pmix_info_array_t **dest, + pmix_info_array_t *src, + pmix_data_type_t type) +{ + pmix_info_t *d1, *s1; + + *dest = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); + (*dest)->size = src->size; + (*dest)->array = (pmix_info_t*)malloc(src->size * sizeof(pmix_info_t)); + d1 = (pmix_info_t*)(*dest)->array; + s1 = (pmix_info_t*)src->array; + memcpy(d1, s1, src->size * sizeof(pmix_info_t)); + return PMIX_SUCCESS; +} + +static pmix_status_t pmix3_bfrop_copy_modex(pmix_modex_data_t **dest, pmix_modex_data_t *src, + pmix_data_type_t type) +{ + *dest = (pmix_modex_data_t*)malloc(sizeof(pmix_modex_data_t)); + if (NULL == *dest) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + (*dest)->blob = NULL; + (*dest)->size = 0; + if (NULL != src->blob) { + (*dest)->blob = (uint8_t*)malloc(src->size * sizeof(uint8_t)); + if (NULL == (*dest)->blob) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + memcpy((*dest)->blob, src->blob, src->size * sizeof(uint8_t)); + (*dest)->size = src->size; + } + return PMIX_SUCCESS; +} + +/*******************/ + +/**** DEPRECATED ****/ +static pmix_status_t pmix3_bfrop_print_array(char **output, char *prefix, + pmix_info_array_t *src, pmix_data_type_t type) +{ + size_t j; + char *tmp, *tmp2, *tmp3, *pfx; + pmix_info_t *s1; + + if (0 > asprintf(&tmp, "%sARRAY SIZE: %ld", prefix, (long)src->size)) { + return PMIX_ERR_NOMEM; + } + if (0 > asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix)) { + free(tmp); + return PMIX_ERR_NOMEM; + } + s1 = (pmix_info_t*)src->array; + + for (j=0; j < src->size; j++) { + pmix_bfrops_base_print_info(&tmp2, pfx, &s1[j], PMIX_INFO); + if (0 > asprintf(&tmp3, "%s%s", tmp, tmp2)) { + free(tmp); + free(tmp2); + return PMIX_ERR_NOMEM; + } + free(tmp); + free(tmp2); + tmp = tmp3; + } + *output = tmp; + return PMIX_SUCCESS; +} + +static pmix_status_t pmix3_bfrop_print_modex(char **output, char *prefix, + pmix_modex_data_t *src, pmix_data_type_t type) +{ + return PMIX_SUCCESS; +} + + +/********************/ diff --git a/orte/mca/notifier/base/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/common/Makefile.am similarity index 62% rename from orte/mca/notifier/base/Makefile.am rename to opal/mca/pmix/pmix3x/pmix/src/mca/common/Makefile.am index 89171605f59..4567c654307 100644 --- a/orte/mca/notifier/base/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/Makefile.am @@ -1,3 +1,4 @@ +# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. @@ -8,9 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. -# +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -18,10 +17,9 @@ # $HEADER$ # -headers += \ - base/base.h - -libmca_notifier_la_SOURCES += \ - base/notifier_base_frame.c \ - base/notifier_base_select.c \ - base/notifier_base_fns.c +# Note that this file must exist, even though it is empty (there is no +# "base" directory for the common framework). autogen.pl and +# opal_mca.m4 assume that every framework has a top-level Makefile.am. +# We *could* adjust the framework glue code to exclude "common" from +# this requirement, but it's just a lot easier to have an empty +# Makefile.am here. diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/Makefile.am new file mode 100644 index 00000000000..50c601838fc --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/Makefile.am @@ -0,0 +1,59 @@ +# +# Copyright (c) 2018 Mellanox Technologies. All rights reserved. +# +# Copyright (c) 2018 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Header files + +AM_CPPFLAGS = $(LTDLINCL) + +headers = \ + dstore_common.h \ + dstore_base.h \ + dstore_segment.h \ + dstore_file.h + +# Source files + +sources = \ + dstore_base.c \ + dstore_segment.c + +lib_LTLIBRARIES = +noinst_LTLIBRARIES = +comp_inst = libmca_common_dstore.la +comp_noinst = libmca_common_dstore_noinst.la + +if MCA_BUILD_pmix_common_dstore_DSO +lib_LTLIBRARIES += $(comp_inst) +else +noinst_LTLIBRARIES += $(comp_noinst) +endif + +libmca_common_dstore_la_SOURCES = $(headers) $(sources) +libmca_common_dstore_la_LDFLAGS = -version-info $(libmca_common_dstore_so_version) +libmca_common_dstore_noinst_la_SOURCES = $(headers) $(sources) + +# Conditionally install the header files + +if WANT_INSTALL_HEADERS +pmixdir = $(pmixincludedir)/$(subdir) +pmix_HEADERS = $(headers) +endif + +all-local: + if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \ + fi + +clean-local: + if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + fi diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/configure.m4 b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/configure.m4 new file mode 100644 index 00000000000..ca90b6568fe --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/configure.m4 @@ -0,0 +1,17 @@ +# -*- shell-script -*- +# +# Copyright (c) 2018 Mellanox Technologies. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_pmix_common_dstore_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_pmix_common_dstore_CONFIG], [ + AC_CONFIG_FILES([src/mca/common/dstore/Makefile]) + $1 +])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore.c b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_base.c similarity index 55% rename from opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore.c rename to opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_base.c index 102f64f27c9..c0fc676e6c9 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_base.c @@ -1,9 +1,9 @@ /* * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2016-2017 Mellanox Technologies, Inc. + * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2018 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science + * Copyright (c) 2018-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ @@ -50,8 +50,10 @@ #include "src/mca/preg/preg.h" #include "src/mca/gds/base/base.h" -#include "gds_dstore.h" #include "src/mca/pshmem/base/base.h" +#include "dstore_common.h" +#include "dstore_base.h" +#include "dstore_segment.h" #define ESH_REGION_EXTENSION "EXTENSION_SLOT" #define ESH_REGION_INVALIDATED "INVALIDATED" @@ -60,418 +62,89 @@ #define ESH_ENV_NS_DATA_SEG_SIZE "NS_DATA_SEG_SIZE" #define ESH_ENV_LINEAR "SM_USE_LINEAR_SEARCH" -#define ESH_MIN_KEY_LEN (sizeof(ESH_REGION_INVALIDATED)) - -#define ESH_KV_SIZE(addr) \ -__pmix_attribute_extension__ ({ \ - size_t sz; \ - if (PMIX_PROC_IS_V1(_client_peer())) { \ - sz = ESH_KV_SIZE_V12(addr); \ - } else { \ - sz = ESH_KV_SIZE_V20(addr); \ - } \ - sz; \ -}) - -#define ESH_KNAME_PTR(addr) \ -__pmix_attribute_extension__ ({ \ - char *name_ptr; \ - if (PMIX_PROC_IS_V1(_client_peer())) { \ - name_ptr = ESH_KNAME_PTR_V12(addr); \ - } else { \ - name_ptr = ESH_KNAME_PTR_V20(addr); \ - } \ - name_ptr; \ -}) - -#define ESH_KNAME_LEN(key) \ -__pmix_attribute_extension__ ({ \ - size_t len; \ - if (PMIX_PROC_IS_V1(_client_peer())) { \ - len = ESH_KNAME_LEN_V12(key); \ - } else { \ - len = ESH_KNAME_LEN_V20(key); \ - } \ - len; \ -}) - -#define ESH_DATA_PTR(addr) \ -__pmix_attribute_extension__ ({ \ - uint8_t *data_ptr; \ - if (PMIX_PROC_IS_V1(_client_peer())) { \ - data_ptr = ESH_DATA_PTR_V12(addr); \ - } else { \ - data_ptr = ESH_DATA_PTR_V20(addr); \ - } \ - data_ptr; \ -}) - -#define ESH_DATA_SIZE(addr, data_ptr) \ -__pmix_attribute_extension__ ({ \ - size_t sz; \ - if (PMIX_PROC_IS_V1(_client_peer())) { \ - sz = ESH_DATA_SIZE_V12(addr); \ - } else { \ - sz = ESH_DATA_SIZE_V20(addr, data_ptr); \ - } \ - sz; \ -}) - -#define ESH_KEY_SIZE(key, size) \ -__pmix_attribute_extension__ ({ \ - size_t len; \ - if (PMIX_PROC_IS_V1(_client_peer())) { \ - len = ESH_KEY_SIZE_V12(key, size); \ - } else { \ - len = ESH_KEY_SIZE_V20(key, size); \ - } \ - len; \ -}) - -#define EXT_SLOT_SIZE() \ -__pmix_attribute_extension__ ({ \ - size_t sz; \ - if (PMIX_PROC_IS_V1(_client_peer())) { \ - sz = EXT_SLOT_SIZE_V12(); \ - } else { \ - sz = EXT_SLOT_SIZE_V20(); \ - } \ - sz; \ -}) - -#define ESH_PUT_KEY(addr, key, buffer, size) \ -__pmix_attribute_extension__ ({ \ - if (PMIX_PROC_IS_V1(_client_peer())) { \ - ESH_PUT_KEY_V12(addr, key, buffer, size); \ - } else { \ - ESH_PUT_KEY_V20(addr, key, buffer, size); \ - } \ -}) - -/* PMIx v2.x dstore specific macro */ -#define ESH_KV_SIZE_V20(addr) \ -__pmix_attribute_extension__ ({ \ - size_t sz; \ - memcpy(&sz, addr, sizeof(size_t)); \ - sz; \ -}) - -#define ESH_KNAME_PTR_V20(addr) \ -__pmix_attribute_extension__ ({ \ - char *name_ptr = (char *)addr + sizeof(size_t); \ - name_ptr; \ -}) - -#define ESH_KNAME_LEN_V20(key) \ -__pmix_attribute_extension__ ({ \ - size_t kname_len = strlen(key) + 1; \ - size_t len = (kname_len < ESH_MIN_KEY_LEN) ? \ - ESH_MIN_KEY_LEN : kname_len; \ - len; \ -}) - -#define ESH_DATA_PTR_V20(addr) \ -__pmix_attribute_extension__ ({ \ - size_t kname_len = \ - ESH_KNAME_LEN_V20(ESH_KNAME_PTR_V20(addr)); \ - uint8_t *data_ptr = addr + sizeof(size_t) + kname_len; \ - data_ptr; \ -}) - -#define ESH_DATA_SIZE_V20(addr, data_ptr) \ -__pmix_attribute_extension__ ({ \ - size_t sz = ESH_KV_SIZE_V20(addr); \ - size_t data_size = sz - (data_ptr - addr); \ - data_size; \ -}) - -#define ESH_KEY_SIZE_V20(key, size) \ -__pmix_attribute_extension__ ({ \ - size_t len = \ - sizeof(size_t) + ESH_KNAME_LEN_V20(key) + size; \ - len; \ -}) - -/* in ext slot new offset will be stored in case if - * new data were added for the same process during - * next commit - */ -#define EXT_SLOT_SIZE_V20() \ - (ESH_KEY_SIZE_V20(ESH_REGION_EXTENSION, sizeof(size_t))) - - -#define ESH_PUT_KEY_V20(addr, key, buffer, size) \ -__pmix_attribute_extension__ ({ \ - size_t sz = ESH_KEY_SIZE_V20(key, size); \ - memcpy(addr, &sz, sizeof(size_t)); \ - memset(addr + sizeof(size_t), 0, \ - ESH_KNAME_LEN_V20(key)); \ - strncpy((char *)addr + sizeof(size_t), \ - key, ESH_KNAME_LEN_V20(key)); \ - memcpy(addr + sizeof(size_t) + ESH_KNAME_LEN_V20(key), \ - buffer, size); \ -}) - -/* PMIx v1.2 dstore specific macro */ -#define ESH_KEY_SIZE_V12(key, size) \ -__pmix_attribute_extension__ ({ \ - size_t len = strlen(key) + 1 + sizeof(size_t) + size; \ - len; \ -}) - -/* in ext slot new offset will be stored in case if - * new data were added for the same process during - * next commit - */ -#define EXT_SLOT_SIZE_V12() \ - (ESH_KEY_SIZE_V12(ESH_REGION_EXTENSION, sizeof(size_t))) - -#define ESH_KV_SIZE_V12(addr) \ -__pmix_attribute_extension__ ({ \ - size_t sz; \ - memcpy(&sz, addr + \ - ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)), \ - sizeof(size_t)); \ - sz += ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)) + \ - sizeof(size_t); \ - sz; \ -}) - -#define ESH_KNAME_PTR_V12(addr) \ -__pmix_attribute_extension__ ({ \ - char *name_ptr = (char *)addr; \ - name_ptr; \ -}) - -#define ESH_KNAME_LEN_V12(key) \ -__pmix_attribute_extension__ ({ \ - size_t len = strlen((char*)key) + 1; \ - len; \ -}) - -#define ESH_DATA_PTR_V12(addr) \ -__pmix_attribute_extension__ ({ \ - uint8_t *data_ptr = \ - addr + \ - sizeof(size_t) + \ - ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)); \ - data_ptr; \ -}) - -#define ESH_DATA_SIZE_V12(addr) \ -__pmix_attribute_extension__ ({ \ - size_t data_size; \ - memcpy(&data_size, \ - addr + ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)), \ - sizeof(size_t)); \ - data_size; \ -}) - -#define ESH_PUT_KEY_V12(addr, key, buffer, size) \ -__pmix_attribute_extension__ ({ \ - size_t sz = size; \ - memset(addr, 0, ESH_KNAME_LEN_V12(key)); \ - strncpy((char *)addr, key, ESH_KNAME_LEN_V12(key)); \ - memcpy(addr + ESH_KNAME_LEN_V12(key), &sz, \ - sizeof(size_t)); \ - memcpy(addr + ESH_KNAME_LEN_V12(key) + sizeof(size_t), \ - buffer, size); \ -}) - -#ifdef ESH_PTHREAD_LOCK -#define _ESH_LOCK(rwlock, func) \ -__pmix_attribute_extension__ ({ \ - pmix_status_t ret = PMIX_SUCCESS; \ - int rc; \ - rc = pthread_rwlock_##func(rwlock); \ - if (0 != rc) { \ - switch (errno) { \ - case EINVAL: \ - ret = PMIX_ERR_INIT; \ - break; \ - case EPERM: \ - ret = PMIX_ERR_NO_PERMISSIONS; \ - break; \ - } \ - } \ - if (ret) { \ - pmix_output(0, "%s %d:%s lock failed: %s", \ - __FILE__, __LINE__, __func__, strerror(errno)); \ - } \ - ret; \ -}) - -#define _ESH_WRLOCK(rwlock) _ESH_LOCK(rwlock, wrlock) -#define _ESH_RDLOCK(rwlock) _ESH_LOCK(rwlock, rdlock) -#define _ESH_UNLOCK(rwlock) _ESH_LOCK(rwlock, unlock) -#endif - -#ifdef ESH_FCNTL_LOCK -#define _ESH_LOCK(lockfd, operation) \ -__pmix_attribute_extension__ ({ \ - pmix_status_t ret = PMIX_SUCCESS; \ - int i; \ - struct flock fl = {0}; \ - fl.l_type = operation; \ - fl.l_whence = SEEK_SET; \ - for(i = 0; i < 10; i++) { \ - if( 0 > fcntl(lockfd, F_SETLKW, &fl) ) { \ - switch( errno ){ \ - case EINTR: \ - continue; \ - case ENOENT: \ - case EINVAL: \ - ret = PMIX_ERR_NOT_FOUND; \ - break; \ - case EBADF: \ - ret = PMIX_ERR_BAD_PARAM; \ - break; \ - case EDEADLK: \ - case EFAULT: \ - case ENOLCK: \ - ret = PMIX_ERR_RESOURCE_BUSY; \ - break; \ - default: \ - ret = PMIX_ERROR; \ - break; \ - } \ - } \ - break; \ - } \ - if (ret) { \ - pmix_output(0, "%s %d:%s lock failed: %s", \ - __FILE__, __LINE__, __func__, strerror(errno)); \ - } \ - ret; \ -}) - -#define _ESH_WRLOCK(lock) _ESH_LOCK(lock, F_WRLCK) -#define _ESH_RDLOCK(lock) _ESH_LOCK(lock, F_RDLCK) -#define _ESH_UNLOCK(lock) _ESH_LOCK(lock, F_UNLCK) -#endif - #define ESH_INIT_SESSION_TBL_SIZE 2 #define ESH_INIT_NS_MAP_TBL_SIZE 2 -static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_buffer_t *buf); -static seg_desc_t *_create_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id); -static seg_desc_t *_attach_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id); -static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info); -static int _put_ns_info_to_initial_segment(const ns_map_data_t *ns_map, pmix_pshmem_seg_t *metaseg, pmix_pshmem_seg_t *dataseg); -static ns_seg_info_t *_get_ns_info_from_initial_segment(const ns_map_data_t *ns_map); -static ns_track_elem_t *_get_track_elem_for_namespace(ns_map_data_t *ns_map); -static rank_meta_info *_get_rank_meta_info(pmix_rank_t rank, seg_desc_t *segdesc); -static uint8_t *_get_data_region_by_offset(seg_desc_t *segdesc, size_t offset); -static void _update_initial_segment_info(const ns_map_data_t *ns_map); -static void _set_constants_from_env(void); -static void _delete_sm_desc(seg_desc_t *desc); -static int _pmix_getpagesize(void); -static inline ssize_t _get_univ_size(const char *nspace); - -static inline ns_map_data_t * _esh_session_map_search_server(const char *nspace); -static inline ns_map_data_t * _esh_session_map_search_client(const char *nspace); -static inline ns_map_data_t * _esh_session_map(const char *nspace, size_t tbl_idx); -static inline void _esh_session_map_clean(ns_map_t *m); -static inline int _esh_jobuid_tbl_search(uid_t jobuid, size_t *tbl_idx); -static inline int _esh_session_tbl_add(size_t *tbl_idx); -static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, int setjobuid); -static inline void _esh_session_release(session_t *s); -static inline void _esh_ns_track_cleanup(void); -static inline void _esh_sessions_cleanup(void); -static inline void _esh_ns_map_cleanup(void); +static int _store_data_for_rank(pmix_common_dstore_ctx_t *ds_ctx, ns_track_elem_t *ns_info, + pmix_rank_t rank, pmix_buffer_t *buf); +static int _update_ns_elem(pmix_common_dstore_ctx_t *ds_ctx, ns_track_elem_t *ns_elem, ns_seg_info_t *info); +static int _put_ns_info_to_initial_segment(pmix_common_dstore_ctx_t *ds_ctx, + const ns_map_data_t *ns_map, pmix_pshmem_seg_t *metaseg, + pmix_pshmem_seg_t *dataseg); +static ns_seg_info_t *_get_ns_info_from_initial_segment(pmix_common_dstore_ctx_t *ds_ctx, + const ns_map_data_t *ns_map); +static ns_track_elem_t *_get_track_elem_for_namespace(pmix_common_dstore_ctx_t *ds_ctx, + ns_map_data_t *ns_map); +static rank_meta_info *_get_rank_meta_info(pmix_common_dstore_ctx_t *ds_ctx, pmix_rank_t rank, + pmix_dstore_seg_desc_t *segdesc); +static uint8_t *_get_data_region_by_offset(pmix_common_dstore_ctx_t *ds_ctx, + pmix_dstore_seg_desc_t *segdesc, size_t offset); +static void _update_initial_segment_info(pmix_common_dstore_ctx_t *ds_ctx, + const ns_map_data_t *ns_map); +static void _set_constants_from_env(pmix_common_dstore_ctx_t *ds_ctx); +static inline ssize_t _get_univ_size(pmix_common_dstore_ctx_t *ds_ctx, const char *nspace); + +static inline ns_map_data_t * _esh_session_map_search_server(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace); +static inline ns_map_data_t * _esh_session_map_search_client(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace); +static inline ns_map_data_t * _esh_session_map(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace, uint32_t local_size, + size_t tbl_idx); +static inline void _esh_session_map_clean(pmix_common_dstore_ctx_t *ds_ctx, ns_map_t *m); +static inline int _esh_jobuid_tbl_search(pmix_common_dstore_ctx_t *ds_ctx, + uid_t jobuid, size_t *tbl_idx); +static inline int _esh_session_tbl_add(pmix_common_dstore_ctx_t *ds_ctx, size_t *tbl_idx); +static int _esh_session_init(pmix_common_dstore_ctx_t *ds_ctx, size_t idx, ns_map_data_t *m, + uint32_t local_size, size_t jobuid, int setjobuid); +static void _esh_session_release(pmix_common_dstore_ctx_t *ds_ctx, size_t idx); +static inline void _esh_ns_track_cleanup(pmix_common_dstore_ctx_t *ds_ctx); +static inline void _esh_sessions_cleanup(pmix_common_dstore_ctx_t *ds_ctx); +static inline void _esh_ns_map_cleanup(pmix_common_dstore_ctx_t *ds_ctx); static inline int _esh_dir_del(const char *dirname); -static inline void _client_compat_save(pmix_peer_t *peer); -static inline pmix_peer_t * _client_peer(void); +static inline void _client_compat_save(pmix_common_dstore_ctx_t *ds_ctx, pmix_peer_t *peer); +static inline pmix_peer_t * _client_peer(pmix_common_dstore_ctx_t *ds_ctx); static inline int _my_client(const char *nspace, pmix_rank_t rank); -static pmix_status_t dstore_init(pmix_info_t info[], size_t ninfo); +static pmix_status_t _dstor_store_modex_cb(pmix_common_dstore_ctx_t *ds_ctx, + struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_byte_object_t *bo); -static void dstore_finalize(void); +static pmix_status_t _dstore_store_nolock(pmix_common_dstore_ctx_t *ds_ctx, + ns_map_data_t *ns_map, + pmix_rank_t rank, + pmix_kval_t *kv); -static pmix_status_t dstore_setup_fork(const pmix_proc_t *peer, char ***env); +static pmix_status_t _dstore_fetch(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace, pmix_rank_t rank, + const char *key, pmix_value_t **kvs); -static pmix_status_t dstore_cache_job_info(struct pmix_nspace_t *ns, - pmix_info_t info[], size_t ninfo); +ns_map_data_t * (*_esh_session_map_search)(const char *nspace) = NULL; -static pmix_status_t dstore_register_job_info(struct pmix_peer_t *pr, - pmix_buffer_t *reply); +#define _ESH_SESSION_lock(session_array, tbl_idx) \ + (PMIX_VALUE_ARRAY_GET_BASE(session_array, session_t)[tbl_idx].lock) -static pmix_status_t dstore_store_job_info(const char *nspace, - pmix_buffer_t *job_data); +#define _ESH_SESSION_path(session_array, tbl_idx) \ + (PMIX_VALUE_ARRAY_GET_BASE(session_array, session_t)[tbl_idx].nspace_path) -static pmix_status_t _dstore_store(const char *nspace, - pmix_rank_t rank, - pmix_kval_t *kv); +#define _ESH_SESSION_lockfile(session_array, tbl_idx) \ + (PMIX_VALUE_ARRAY_GET_BASE(session_array, session_t)[tbl_idx].lockfile) -static pmix_status_t dstore_store(const pmix_proc_t *proc, - pmix_scope_t scope, - pmix_kval_t *kv); - -static pmix_status_t _dstore_fetch(const char *nspace, - pmix_rank_t rank, - const char *key, pmix_value_t **kvs); - -static pmix_status_t dstore_fetch(const pmix_proc_t *proc, - pmix_scope_t scope, bool copy, - const char *key, - pmix_info_t info[], size_t ninfo, - pmix_list_t *kvs); - -static pmix_status_t dstore_add_nspace(const char *nspace, - pmix_info_t info[], - size_t ninfo); - -static pmix_status_t dstore_del_nspace(const char* nspace); - -static pmix_status_t dstore_assign_module(pmix_info_t *info, size_t ninfo, - int *priority); - -static pmix_status_t dstore_store_modex(struct pmix_nspace_t *nspace, - pmix_list_t *cbs, - pmix_byte_object_t *bo); - -pmix_gds_base_module_t pmix_ds12_module = { - .name = "ds12", - .init = dstore_init, - .finalize = dstore_finalize, - .assign_module = dstore_assign_module, - .cache_job_info = dstore_cache_job_info, - .register_job_info = dstore_register_job_info, - .store_job_info = dstore_store_job_info, - .store = dstore_store, - .store_modex = dstore_store_modex, - .fetch = dstore_fetch, - .setup_fork = dstore_setup_fork, - .add_nspace = dstore_add_nspace, - .del_nspace = dstore_del_nspace, -}; - -static char *_base_path = NULL; -static size_t _initial_segment_size = 0; -static size_t _max_ns_num; -static size_t _meta_segment_size = 0; -static size_t _max_meta_elems; -static size_t _data_segment_size = 0; -static size_t _lock_segment_size = 0; -static uid_t _jobuid; -static char _setjobuid = 0; -static pmix_peer_t *_clients_peer = NULL; - -static pmix_value_array_t *_session_array = NULL; -static pmix_value_array_t *_ns_map_array = NULL; -static pmix_value_array_t *_ns_track_array = NULL; +#define _ESH_SESSION_setjobuid(session_array, tbl_idx) \ + (PMIX_VALUE_ARRAY_GET_BASE(session_array, session_t)[tbl_idx].setjobuid) -ns_map_data_t * (*_esh_session_map_search)(const char *nspace) = NULL; -int (*_esh_lock_init)(size_t idx) = NULL; +#define _ESH_SESSION_jobuid(session_array, tbl_idx) \ + (PMIX_VALUE_ARRAY_GET_BASE(session_array, session_t)[tbl_idx].jobuid) + +#define _ESH_SESSION_sm_seg_first(session_array, tbl_idx) \ + (PMIX_VALUE_ARRAY_GET_BASE(session_array, session_t)[tbl_idx].sm_seg_first) +#define _ESH_SESSION_sm_seg_last(session_array, tbl_idx) \ + (PMIX_VALUE_ARRAY_GET_BASE(session_array, session_t)[tbl_idx].sm_seg_last) -#define _ESH_SESSION_path(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].nspace_path) -#define _ESH_SESSION_lockfile(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].lockfile) -#define _ESH_SESSION_setjobuid(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].setjobuid) -#define _ESH_SESSION_jobuid(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].jobuid) -#define _ESH_SESSION_sm_seg_first(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].sm_seg_first) -#define _ESH_SESSION_sm_seg_last(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].sm_seg_last) -#define _ESH_SESSION_ns_info(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].ns_info) +#define _ESH_SESSION_ns_info(session_array, tbl_idx) \ + (PMIX_VALUE_ARRAY_GET_BASE(session_array, session_t)[tbl_idx].ns_info) #ifdef ESH_PTHREAD_LOCK #define _ESH_SESSION_pthread_rwlock(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].rwlock) @@ -484,14 +157,13 @@ int (*_esh_lock_init)(size_t idx) = NULL; #define _ESH_SESSION_lock(tbl_idx) _ESH_SESSION_lockfd(tbl_idx) #endif -/* If _direct_mode is set, it means that we use linear search - * along the array of rank meta info objects inside a meta segment - * to find the requested rank. Otherwise, we do a fast lookup - * based on rank and directly compute offset. - * This mode is called direct because it's effectively used in - * sparse communication patterns when direct modex is usually used. - */ -static int _direct_mode = 0; +#define _ESH_LOCK(ds_ctx, session_id, operation) \ +__pmix_attribute_extension__ ({ \ + pmix_status_t rc = PMIX_SUCCESS; \ + rc = ds_ctx->lock_cbs->operation(_ESH_SESSION_lock(ds_ctx->session_array, \ + session_id)); \ + rc; \ +}) static void ncon(ns_track_elem_t *p) { memset(&p->ns_map, 0, sizeof(p->ns_map)); @@ -503,8 +175,8 @@ static void ncon(ns_track_elem_t *p) { } static void ndes(ns_track_elem_t *p) { - _delete_sm_desc(p->meta_seg); - _delete_sm_desc(p->data_seg); + pmix_common_dstor_delete_sm_desc(p->meta_seg); + pmix_common_dstor_delete_sm_desc(p->data_seg); memset(&p->ns_map, 0, sizeof(p->ns_map)); p->in_use = false; } @@ -513,153 +185,11 @@ PMIX_CLASS_INSTANCE(ns_track_elem_t, pmix_value_array_t, ncon, ndes); -static inline void _esh_session_map_clean(ns_map_t *m) { +static inline void _esh_session_map_clean(pmix_common_dstore_ctx_t *ds_ctx, ns_map_t *m) { memset(m, 0, sizeof(*m)); m->data.track_idx = -1; } -#ifdef ESH_FCNTL_LOCK -static inline int _flock_init(size_t idx) { - pmix_status_t rc = PMIX_SUCCESS; - - if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { - _ESH_SESSION_lock(idx) = open(_ESH_SESSION_lockfile(idx), O_CREAT | O_RDWR | O_EXCL, 0600); - - /* if previous launch was crashed, the lockfile might not be deleted and unlocked, - * so we delete it and create a new one. */ - if (_ESH_SESSION_lock(idx) < 0) { - unlink(_ESH_SESSION_lockfile(idx)); - _ESH_SESSION_lock(idx) = open(_ESH_SESSION_lockfile(idx), O_CREAT | O_RDWR, 0600); - if (_ESH_SESSION_lock(idx) < 0) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - } - if (_ESH_SESSION_setjobuid(idx) > 0) { - if (0 > chown(_ESH_SESSION_lockfile(idx), (uid_t) _ESH_SESSION_jobuid(idx), (gid_t) -1)) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - if (0 > chmod(_ESH_SESSION_lockfile(idx), S_IRUSR | S_IWGRP | S_IRGRP)) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - } - } - else { - _ESH_SESSION_lock(idx) = open(_ESH_SESSION_lockfile(idx), O_RDONLY); - if (-1 == _ESH_SESSION_lock(idx)) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - } - return rc; -} -#endif - -#ifdef ESH_PTHREAD_LOCK -static inline int _rwlock_init(size_t idx) { - pmix_status_t rc = PMIX_SUCCESS; - size_t size = _lock_segment_size; - pthread_rwlockattr_t attr; - - if ((NULL != _ESH_SESSION_pthread_seg(idx)) || (NULL != _ESH_SESSION_pthread_rwlock(idx))) { - rc = PMIX_ERR_INIT; - return rc; - } - _ESH_SESSION_pthread_seg(idx) = (pmix_pshmem_seg_t *)malloc(sizeof(pmix_pshmem_seg_t)); - if (NULL == _ESH_SESSION_pthread_seg(idx)) { - rc = PMIX_ERR_OUT_OF_RESOURCE; - return rc; - } - if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { - if (PMIX_SUCCESS != (rc = pmix_pshmem.segment_create(_ESH_SESSION_pthread_seg(idx), _ESH_SESSION_lockfile(idx), size))) { - return rc; - } - memset(_ESH_SESSION_pthread_seg(idx)->seg_base_addr, 0, size); - if (_ESH_SESSION_setjobuid(idx) > 0) { - if (0 > chown(_ESH_SESSION_lockfile(idx), (uid_t) _ESH_SESSION_jobuid(idx), (gid_t) -1)){ - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - /* set the mode as required */ - if (0 > chmod(_ESH_SESSION_lockfile(idx), S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP )) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - } - _ESH_SESSION_pthread_rwlock(idx) = (pthread_rwlock_t *)_ESH_SESSION_pthread_seg(idx)->seg_base_addr; - - if (0 != pthread_rwlockattr_init(&attr)) { - rc = PMIX_ERR_INIT; - pmix_pshmem.segment_detach(_ESH_SESSION_pthread_seg(idx)); - return rc; - } - if (0 != pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) { - rc = PMIX_ERR_INIT; - pmix_pshmem.segment_detach(_ESH_SESSION_pthread_seg(idx)); - pthread_rwlockattr_destroy(&attr); - return rc; - } -#ifdef HAVE_PTHREAD_SETKIND - if (0 != pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP)) { - rc = PMIX_ERR_INIT; - pmix_pshmem.segment_detach(_ESH_SESSION_pthread_seg(idx)); - pthread_rwlockattr_destroy(&attr); - return rc; - } -#endif - if (0 != pthread_rwlock_init(_ESH_SESSION_pthread_rwlock(idx), &attr)) { - rc = PMIX_ERR_INIT; - pmix_pshmem.segment_detach(_ESH_SESSION_pthread_seg(idx)); - pthread_rwlockattr_destroy(&attr); - return rc; - } - if (0 != pthread_rwlockattr_destroy(&attr)) { - rc = PMIX_ERR_INIT; - return rc; - } - - } - else { - _ESH_SESSION_pthread_seg(idx)->seg_size = size; - snprintf(_ESH_SESSION_pthread_seg(idx)->seg_name, PMIX_PATH_MAX, "%s", _ESH_SESSION_lockfile(idx)); - if (PMIX_SUCCESS != (rc = pmix_pshmem.segment_attach(_ESH_SESSION_pthread_seg(idx), PMIX_PSHMEM_RW))) { - return rc; - } - _ESH_SESSION_pthread_rwlock(idx) = (pthread_rwlock_t *)_ESH_SESSION_pthread_seg(idx)->seg_base_addr; - } - - return rc; -} - -static inline void _rwlock_release(session_t *s) { - pmix_status_t rc; - - if (0 != pthread_rwlock_destroy(s->rwlock)) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return; - } - - /* detach & unlink from current desc */ - if (s->rwlock_seg->seg_cpid == getpid()) { - pmix_pshmem.segment_unlink(s->rwlock_seg); - } - pmix_pshmem.segment_detach(s->rwlock_seg); - - free(s->rwlock_seg); - s->rwlock_seg = NULL; - s->rwlock = NULL; -} -#endif - static inline int _esh_dir_del(const char *path) { DIR *dir; @@ -715,132 +245,133 @@ static inline int _esh_dir_del(const char *path) return rc; } -static inline int _esh_tbls_init(void) +static inline int _esh_tbls_init(pmix_common_dstore_ctx_t *ds_ctx) { pmix_status_t rc = PMIX_SUCCESS; size_t idx; /* initial settings */ - _ns_track_array = NULL; - _session_array = NULL; - _ns_map_array = NULL; + ds_ctx->ns_track_array = NULL; + ds_ctx->session_array = NULL; + ds_ctx->ns_map_array = NULL; /* Setup namespace tracking array */ - if (NULL == (_ns_track_array = PMIX_NEW(pmix_value_array_t))) { + if (NULL == (ds_ctx->ns_track_array = PMIX_NEW(pmix_value_array_t))) { rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); goto err_exit; } - if (PMIX_SUCCESS != (rc = pmix_value_array_init(_ns_track_array, sizeof(ns_track_elem_t)))){ + if (PMIX_SUCCESS != (rc = pmix_value_array_init(ds_ctx->ns_track_array, sizeof(ns_track_elem_t)))){ PMIX_ERROR_LOG(rc); goto err_exit; } /* Setup sessions table */ - if (NULL == (_session_array = PMIX_NEW(pmix_value_array_t))){ + if (NULL == (ds_ctx->session_array = PMIX_NEW(pmix_value_array_t))){ rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); goto err_exit; } - if (PMIX_SUCCESS != (rc = pmix_value_array_init(_session_array, sizeof(session_t)))) { + if (PMIX_SUCCESS != (rc = pmix_value_array_init(ds_ctx->session_array, sizeof(session_t)))) { PMIX_ERROR_LOG(rc); goto err_exit; } - if (PMIX_SUCCESS != (rc = pmix_value_array_set_size(_session_array, ESH_INIT_SESSION_TBL_SIZE))) { + if (PMIX_SUCCESS != (rc = pmix_value_array_set_size(ds_ctx->session_array, ESH_INIT_SESSION_TBL_SIZE))) { PMIX_ERROR_LOG(rc); goto err_exit; } for (idx = 0; idx < ESH_INIT_SESSION_TBL_SIZE; idx++) { - memset(pmix_value_array_get_item(_session_array, idx), 0, sizeof(session_t)); + memset(pmix_value_array_get_item(ds_ctx->session_array, idx), 0, sizeof(session_t)); } /* Setup namespace map array */ - if (NULL == (_ns_map_array = PMIX_NEW(pmix_value_array_t))) { + if (NULL == (ds_ctx->ns_map_array = PMIX_NEW(pmix_value_array_t))) { rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); goto err_exit; } - if (PMIX_SUCCESS != (rc = pmix_value_array_init(_ns_map_array, sizeof(ns_map_t)))) { + if (PMIX_SUCCESS != (rc = pmix_value_array_init(ds_ctx->ns_map_array, sizeof(ns_map_t)))) { PMIX_ERROR_LOG(rc); goto err_exit; } - if (PMIX_SUCCESS != (rc = pmix_value_array_set_size(_ns_map_array, ESH_INIT_NS_MAP_TBL_SIZE))) { + if (PMIX_SUCCESS != (rc = pmix_value_array_set_size(ds_ctx->ns_map_array, ESH_INIT_NS_MAP_TBL_SIZE))) { PMIX_ERROR_LOG(rc); goto err_exit; } for (idx = 0; idx < ESH_INIT_NS_MAP_TBL_SIZE; idx++) { - _esh_session_map_clean(pmix_value_array_get_item(_ns_map_array, idx)); + _esh_session_map_clean(ds_ctx, pmix_value_array_get_item(ds_ctx->ns_map_array, idx)); } return PMIX_SUCCESS; err_exit: - if (NULL != _ns_track_array) { - PMIX_RELEASE(_ns_track_array); + if (NULL != ds_ctx->ns_track_array) { + PMIX_RELEASE(ds_ctx->ns_track_array); } - if (NULL != _session_array) { - PMIX_RELEASE(_session_array); + if (NULL != ds_ctx->session_array) { + PMIX_RELEASE(ds_ctx->session_array); } - if (NULL != _ns_map_array) { - PMIX_RELEASE(_ns_map_array); + if (NULL != ds_ctx->ns_map_array) { + PMIX_RELEASE(ds_ctx->ns_map_array); } return rc; } -static inline void _esh_ns_map_cleanup(void) +static inline void _esh_ns_map_cleanup(pmix_common_dstore_ctx_t *ds_ctx) { size_t idx; size_t size; ns_map_t *ns_map; - if (NULL == _ns_map_array) { + if (NULL == ds_ctx->ns_map_array) { return; } - size = pmix_value_array_get_size(_ns_map_array); - ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + size = pmix_value_array_get_size(ds_ctx->ns_map_array); + ns_map = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->ns_map_array, ns_map_t); for (idx = 0; idx < size; idx++) { - if(ns_map[idx].in_use) - _esh_session_map_clean(&ns_map[idx]); + if(ns_map[idx].in_use) { + _esh_session_map_clean(ds_ctx, &ns_map[idx]); + } } - PMIX_RELEASE(_ns_map_array); - _ns_map_array = NULL; + PMIX_RELEASE(ds_ctx->ns_map_array); + ds_ctx->ns_map_array = NULL; } -static inline void _esh_sessions_cleanup(void) +static inline void _esh_sessions_cleanup(pmix_common_dstore_ctx_t *ds_ctx) { size_t idx; size_t size; session_t *s_tbl; - if (NULL == _session_array) { + if (NULL == ds_ctx->session_array) { return; } - size = pmix_value_array_get_size(_session_array); - s_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + size = pmix_value_array_get_size(ds_ctx->session_array); + s_tbl = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->session_array, session_t); for (idx = 0; idx < size; idx++) { if(s_tbl[idx].in_use) - _esh_session_release(&s_tbl[idx]); + _esh_session_release(ds_ctx, idx); } - PMIX_RELEASE(_session_array); - _session_array = NULL; + PMIX_RELEASE(ds_ctx->session_array); + ds_ctx->session_array = NULL; } -static inline void _esh_ns_track_cleanup(void) +static inline void _esh_ns_track_cleanup(pmix_common_dstore_ctx_t *ds_ctx) { int size; ns_track_elem_t *ns_trk; - if (NULL == _ns_track_array) { + if (NULL == ds_ctx->ns_track_array) { return; } - size = pmix_value_array_get_size(_ns_track_array); - ns_trk = PMIX_VALUE_ARRAY_GET_BASE(_ns_track_array, ns_track_elem_t); + size = pmix_value_array_get_size(ds_ctx->ns_track_array); + ns_trk = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->ns_track_array, ns_track_elem_t); for (int i = 0; i < size; i++) { ns_track_elem_t *trk = ns_trk + i; @@ -849,15 +380,17 @@ static inline void _esh_ns_track_cleanup(void) } } - PMIX_RELEASE(_ns_track_array); - _ns_track_array = NULL; + PMIX_RELEASE(ds_ctx->ns_track_array); + ds_ctx->ns_track_array = NULL; } -static inline ns_map_data_t * _esh_session_map(const char *nspace, size_t tbl_idx) +static inline ns_map_data_t * _esh_session_map(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace, uint32_t local_size, + size_t tbl_idx) { size_t map_idx; - size_t size = pmix_value_array_get_size(_ns_map_array);; - ns_map_t *ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t);; + size_t size = pmix_value_array_get_size(ds_ctx->ns_map_array); + ns_map_t *ns_map = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->ns_map_array, ns_map_t); ns_map_t *new_map = NULL; if (NULL == nspace) { @@ -868,32 +401,33 @@ static inline ns_map_data_t * _esh_session_map(const char *nspace, size_t tbl_id for(map_idx = 0; map_idx < size; map_idx++) { if (!ns_map[map_idx].in_use) { ns_map[map_idx].in_use = true; - strncpy(ns_map[map_idx].data.name, nspace, sizeof(ns_map[map_idx].data.name)-1); + pmix_strncpy(ns_map[map_idx].data.name, nspace, sizeof(ns_map[map_idx].data.name)-1); ns_map[map_idx].data.tbl_idx = tbl_idx; return &ns_map[map_idx].data; } } - if (NULL == (new_map = pmix_value_array_get_item(_ns_map_array, map_idx))) { + if (NULL == (new_map = pmix_value_array_get_item(ds_ctx->ns_map_array, map_idx))) { PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); return NULL; } - _esh_session_map_clean(new_map); + _esh_session_map_clean(ds_ctx, new_map); new_map->in_use = true; new_map->data.tbl_idx = tbl_idx; - strncpy(new_map->data.name, nspace, sizeof(new_map->data.name)-1); + pmix_strncpy(new_map->data.name, nspace, sizeof(new_map->data.name)-1); return &new_map->data; } -static inline int _esh_jobuid_tbl_search(uid_t jobuid, size_t *tbl_idx) +static inline int _esh_jobuid_tbl_search(pmix_common_dstore_ctx_t *ds_ctx, + uid_t jobuid, size_t *tbl_idx) { size_t idx, size; session_t *session_tbl = NULL; - size = pmix_value_array_get_size(_session_array); - session_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + size = pmix_value_array_get_size(ds_ctx->session_array); + session_tbl = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->session_array, session_t); for(idx = 0; idx < size; idx++) { if (session_tbl[idx].in_use && session_tbl[idx].jobuid == jobuid) { @@ -905,37 +439,38 @@ static inline int _esh_jobuid_tbl_search(uid_t jobuid, size_t *tbl_idx) return PMIX_ERR_NOT_FOUND; } -static inline int _esh_session_tbl_add(size_t *tbl_idx) +static inline int _esh_session_tbl_add(pmix_common_dstore_ctx_t *ds_ctx, size_t *tbl_idx) { size_t idx; - size_t size = pmix_value_array_get_size(_session_array); - session_t *s_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + size_t size = pmix_value_array_get_size(ds_ctx->session_array); + session_t *s_tbl = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->session_array, session_t); session_t *new_sesion; pmix_status_t rc = PMIX_SUCCESS; for(idx = 0; idx < size; idx ++) { if (0 == s_tbl[idx].in_use) { - s_tbl[idx].in_use = 1; - *tbl_idx = idx; - return PMIX_SUCCESS; + goto done; } } - if (NULL == (new_sesion = pmix_value_array_get_item(_session_array, idx))) { + if (NULL == (new_sesion = pmix_value_array_get_item(ds_ctx->session_array, idx))) { rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); return rc; } + +done: s_tbl[idx].in_use = 1; *tbl_idx = idx; return PMIX_SUCCESS; } -static inline ns_map_data_t * _esh_session_map_search_server(const char *nspace) +static inline ns_map_data_t * _esh_session_map_search_server(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace) { - size_t idx, size = pmix_value_array_get_size(_ns_map_array); - ns_map_t *ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + size_t idx, size = pmix_value_array_get_size(ds_ctx->ns_map_array); + ns_map_t *ns_map = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->ns_map_array, ns_map_t); if (NULL == nspace) { return NULL; } @@ -949,10 +484,11 @@ static inline ns_map_data_t * _esh_session_map_search_server(const char *nspace) return NULL; } -static inline ns_map_data_t * _esh_session_map_search_client(const char *nspace) +static inline ns_map_data_t * _esh_session_map_search_client(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace) { - size_t idx, size = pmix_value_array_get_size(_ns_map_array); - ns_map_t *ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + size_t idx, size = pmix_value_array_get_size(ds_ctx->ns_map_array); + ns_map_t *ns_map = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->ns_map_array, ns_map_t); if (NULL == nspace) { return NULL; @@ -964,29 +500,19 @@ static inline ns_map_data_t * _esh_session_map_search_client(const char *nspace) return &ns_map[idx].data; } } - return _esh_session_map(nspace, 0); + return _esh_session_map(ds_ctx, nspace, 0, 0); } -static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, int setjobuid) +static int _esh_session_init(pmix_common_dstore_ctx_t *ds_ctx, size_t idx, ns_map_data_t *m, + uint32_t local_size, size_t jobuid, int setjobuid) { - seg_desc_t *seg = NULL; - session_t *s = &(PMIX_VALUE_ARRAY_GET_ITEM(_session_array, session_t, idx)); + pmix_dstore_seg_desc_t *seg = NULL; + session_t *s = &(PMIX_VALUE_ARRAY_GET_ITEM(ds_ctx->session_array, session_t, idx)); pmix_status_t rc = PMIX_SUCCESS; s->setjobuid = setjobuid; s->jobuid = jobuid; - s->nspace_path = strdup(_base_path); - - /* create a lock file to prevent clients from reading while server is writing to the shared memory. - * This situation is quite often, especially in case of direct modex when clients might ask for data - * simultaneously.*/ - if(0 > asprintf(&s->lockfile, "%s/dstore_sm.lock", s->nspace_path)) { - rc = PMIX_ERR_OUT_OF_RESOURCE; - PMIX_ERROR_LOG(rc); - return rc; - } - PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, - "%s:%d:%s _lockfile_name: %s", __FILE__, __LINE__, __func__, s->lockfile)); + s->nspace_path = strdup(ds_ctx->base_path); if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { if (0 != mkdir(s->nspace_path, 0770)) { @@ -1005,7 +531,8 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, return rc; } } - seg = _create_new_segment(INITIAL_SEGMENT, m, 0); + seg = pmix_common_dstor_create_new_segment(PMIX_DSTORE_INITIAL_SEGMENT, ds_ctx->base_path, + m->name, 0, ds_ctx->jobuid, ds_ctx->setjobuid); if( NULL == seg ){ rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); @@ -1013,245 +540,92 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, } } else { - seg = _attach_new_segment(INITIAL_SEGMENT, m, 0); + seg = pmix_common_dstor_attach_new_segment(PMIX_DSTORE_INITIAL_SEGMENT, ds_ctx->base_path, m->name, 0); if( NULL == seg ){ rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); return rc; } } - - if (NULL == _esh_lock_init) { - rc = PMIX_ERR_INIT; - PMIX_ERROR_LOG(rc); - return rc; - } - if ( PMIX_SUCCESS != (rc = _esh_lock_init(m->tbl_idx))) { - PMIX_ERROR_LOG(rc); - return rc; - } - s->sm_seg_first = seg; s->sm_seg_last = s->sm_seg_first; + return PMIX_SUCCESS; } -static inline void _esh_session_release(session_t *s) +static void _esh_session_release(pmix_common_dstore_ctx_t *ds_ctx, size_t idx) { + session_t *s = &(PMIX_VALUE_ARRAY_GET_ITEM(ds_ctx->session_array, session_t, idx)); + if (!s->in_use) { return; } - _delete_sm_desc(s->sm_seg_first); - /* if the lock fd was somehow set, then we - * need to close it */ - if (0 != s->lockfd) { - close(s->lockfd); - } + pmix_common_dstor_delete_sm_desc(s->sm_seg_first); + + ds_ctx->lock_cbs->finalize(&_ESH_SESSION_lock(ds_ctx->session_array, idx)); - if (NULL != s->lockfile) { - if(PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { - unlink(s->lockfile); - } - free(s->lockfile); - } if (NULL != s->nspace_path) { if(PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { _esh_dir_del(s->nspace_path); } free(s->nspace_path); } -#ifdef ESH_PTHREAD_LOCK - _rwlock_release(s); -#endif memset ((char *) s, 0, sizeof(*s)); } -static void _set_constants_from_env() +static void _set_constants_from_env(pmix_common_dstore_ctx_t *ds_ctx) { char *str; - int page_size = _pmix_getpagesize(); + int page_size = pmix_common_dstor_getpagesize(); if( NULL != (str = getenv(ESH_ENV_INITIAL_SEG_SIZE)) ) { - _initial_segment_size = strtoul(str, NULL, 10); - if ((size_t)page_size > _initial_segment_size) { - _initial_segment_size = (size_t)page_size; + ds_ctx->initial_segment_size = strtoul(str, NULL, 10); + if ((size_t)page_size > ds_ctx->initial_segment_size) { + ds_ctx->initial_segment_size = (size_t)page_size; } } - if (0 == _initial_segment_size) { - _initial_segment_size = INITIAL_SEG_SIZE; + if (0 == ds_ctx->initial_segment_size) { + ds_ctx->initial_segment_size = INITIAL_SEG_SIZE; } if( NULL != (str = getenv(ESH_ENV_NS_META_SEG_SIZE)) ) { - _meta_segment_size = strtoul(str, NULL, 10); - if ((size_t)page_size > _meta_segment_size) { - _meta_segment_size = (size_t)page_size; + ds_ctx->meta_segment_size = strtoul(str, NULL, 10); + if ((size_t)page_size > ds_ctx->meta_segment_size) { + ds_ctx->meta_segment_size = (size_t)page_size; } } - if (0 == _meta_segment_size) { - _meta_segment_size = NS_META_SEG_SIZE; + if (0 == ds_ctx->meta_segment_size) { + ds_ctx->meta_segment_size = NS_META_SEG_SIZE; } if( NULL != (str = getenv(ESH_ENV_NS_DATA_SEG_SIZE)) ) { - _data_segment_size = strtoul(str, NULL, 10); - if ((size_t)page_size > _data_segment_size) { - _data_segment_size = (size_t)page_size; + ds_ctx->data_segment_size = strtoul(str, NULL, 10); + if ((size_t)page_size > ds_ctx->data_segment_size) { + ds_ctx->data_segment_size = (size_t)page_size; } } - if (0 == _data_segment_size) { - _data_segment_size = NS_DATA_SEG_SIZE; + if (0 == ds_ctx->data_segment_size) { + ds_ctx->data_segment_size = NS_DATA_SEG_SIZE; } if (NULL != (str = getenv(ESH_ENV_LINEAR))) { if (1 == strtoul(str, NULL, 10)) { - _direct_mode = 1; + ds_ctx->direct_mode = 1; } } - _lock_segment_size = page_size; - _max_ns_num = (_initial_segment_size - sizeof(size_t) * 2) / sizeof(ns_seg_info_t); - _max_meta_elems = (_meta_segment_size - sizeof(size_t)) / sizeof(rank_meta_info); + ds_ctx->lock_segment_size = page_size; + ds_ctx->max_ns_num = (ds_ctx->initial_segment_size - sizeof(size_t) * 2) / sizeof(ns_seg_info_t); + ds_ctx->max_meta_elems = (ds_ctx->meta_segment_size - sizeof(size_t)) / sizeof(rank_meta_info); -} + pmix_common_dstor_init_segment_info(ds_ctx->initial_segment_size, ds_ctx->meta_segment_size, + ds_ctx->data_segment_size); -static void _delete_sm_desc(seg_desc_t *desc) -{ - seg_desc_t *tmp; - - /* free all global segments */ - while (NULL != desc) { - tmp = desc->next; - /* detach & unlink from current desc */ - if (desc->seg_info.seg_cpid == getpid()) { - pmix_pshmem.segment_unlink(&desc->seg_info); - } - pmix_pshmem.segment_detach(&desc->seg_info); - free(desc); - desc = tmp; - } -} - -static int _pmix_getpagesize(void) -{ -#if defined(_SC_PAGESIZE ) - return sysconf(_SC_PAGESIZE); -#elif defined(_SC_PAGE_SIZE) - return sysconf(_SC_PAGE_SIZE); -#else - return 65536; /* safer to overestimate than under */ -#endif -} - -static seg_desc_t *_create_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id) -{ - pmix_status_t rc; - char file_name[PMIX_PATH_MAX]; - size_t size; - seg_desc_t *new_seg = NULL; - - PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, - "%s:%d:%s: segment type %d, nspace %s, id %u", - __FILE__, __LINE__, __func__, type, ns_map->name, id)); - - switch (type) { - case INITIAL_SEGMENT: - size = _initial_segment_size; - snprintf(file_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", - _ESH_SESSION_path(ns_map->tbl_idx), id); - break; - case NS_META_SEGMENT: - size = _meta_segment_size; - snprintf(file_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", - _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); - break; - case NS_DATA_SEGMENT: - size = _data_segment_size; - snprintf(file_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", - _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); - break; - default: - PMIX_ERROR_LOG(PMIX_ERROR); - return NULL; - } - new_seg = (seg_desc_t*)malloc(sizeof(seg_desc_t)); - if (new_seg) { - new_seg->id = id; - new_seg->next = NULL; - new_seg->type = type; - rc = pmix_pshmem.segment_create(&new_seg->seg_info, file_name, size); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - goto err_exit; - } - memset(new_seg->seg_info.seg_base_addr, 0, size); - - - if (_ESH_SESSION_setjobuid(ns_map->tbl_idx) > 0){ - rc = PMIX_ERR_PERM; - if (0 > chown(file_name, (uid_t) _ESH_SESSION_jobuid(ns_map->tbl_idx), (gid_t) -1)){ - PMIX_ERROR_LOG(rc); - goto err_exit; - } - /* set the mode as required */ - if (0 > chmod(file_name, S_IRUSR | S_IRGRP | S_IWGRP )) { - PMIX_ERROR_LOG(rc); - goto err_exit; - } - } - } - return new_seg; - -err_exit: - if( NULL != new_seg ){ - free(new_seg); - } - return NULL; -} - -static seg_desc_t *_attach_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id) -{ - pmix_status_t rc; - seg_desc_t *new_seg = NULL; - new_seg = (seg_desc_t*)malloc(sizeof(seg_desc_t)); - new_seg->id = id; - new_seg->next = NULL; - new_seg->type = type; - - PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, - "%s:%d:%s: segment type %d, nspace %s, id %u", - __FILE__, __LINE__, __func__, type, ns_map->name, id)); - - switch (type) { - case INITIAL_SEGMENT: - new_seg->seg_info.seg_size = _initial_segment_size; - snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", - _ESH_SESSION_path(ns_map->tbl_idx), id); - break; - case NS_META_SEGMENT: - new_seg->seg_info.seg_size = _meta_segment_size; - snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", - _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); - break; - case NS_DATA_SEGMENT: - new_seg->seg_info.seg_size = _data_segment_size; - snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", - _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); - break; - default: - free(new_seg); - PMIX_ERROR_LOG(PMIX_ERROR); - return NULL; - } - rc = pmix_pshmem.segment_attach(&new_seg->seg_info, PMIX_PSHMEM_RONLY); - if (PMIX_SUCCESS != rc) { - free(new_seg); - new_seg = NULL; - PMIX_ERROR_LOG(rc); - } - return new_seg; } /* This function synchronizes the content of initial shared segment and the local track list. */ -static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info) +static int _update_ns_elem(pmix_common_dstore_ctx_t *ds_ctx, ns_track_elem_t *ns_elem, + ns_seg_info_t *info) { - seg_desc_t *seg, *tmp = NULL; + pmix_dstore_seg_desc_t *seg, *tmp = NULL; size_t i, offs; ns_map_data_t *ns_map = NULL; pmix_status_t rc; @@ -1260,7 +634,7 @@ static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info) "%s:%d:%s", __FILE__, __LINE__, __func__)); - if (NULL == (ns_map = _esh_session_map_search(info->ns_map.name))) { + if (NULL == (ns_map = ds_ctx->session_map_search(ds_ctx, info->ns_map.name))) { rc = PMIX_ERR_NOT_AVAILABLE; PMIX_ERROR_LOG(rc); return rc; @@ -1276,14 +650,16 @@ static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info) /* synchronize number of meta segments for the target namespace. */ for (i = ns_elem->num_meta_seg; i < info->num_meta_seg; i++) { if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { - seg = _create_new_segment(NS_META_SEGMENT, &info->ns_map, i); + seg = pmix_common_dstor_create_new_segment(PMIX_DSTORE_NS_META_SEGMENT, ds_ctx->base_path, + info->ns_map.name, i, ds_ctx->jobuid, + ds_ctx->setjobuid); if (NULL == seg) { rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); return rc; } } else { - seg = _attach_new_segment(NS_META_SEGMENT, &info->ns_map, i); + seg = pmix_common_dstor_attach_new_segment(PMIX_DSTORE_NS_META_SEGMENT, ds_ctx->base_path, info->ns_map.name, i); if (NULL == seg) { rc = PMIX_ERR_NOT_AVAILABLE; PMIX_ERROR_LOG(rc); @@ -1309,7 +685,9 @@ static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info) /* synchronize number of data segments for the target namespace. */ for (i = ns_elem->num_data_seg; i < info->num_data_seg; i++) { if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { - seg = _create_new_segment(NS_DATA_SEGMENT, &info->ns_map, i); + seg = pmix_common_dstor_create_new_segment(PMIX_DSTORE_NS_DATA_SEGMENT, ds_ctx->base_path, + info->ns_map.name, i, ds_ctx->jobuid, + ds_ctx->setjobuid); if (NULL == seg) { rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); @@ -1318,7 +696,7 @@ static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info) offs = sizeof(size_t);//shift on offset field itself memcpy(seg->seg_info.seg_base_addr, &offs, sizeof(size_t)); } else { - seg = _attach_new_segment(NS_DATA_SEGMENT, &info->ns_map, i); + seg = pmix_common_dstor_attach_new_segment(PMIX_DSTORE_NS_DATA_SEGMENT, ds_ctx->base_path, info->ns_map.name, i); if (NULL == seg) { rc = PMIX_ERR_NOT_AVAILABLE; PMIX_ERROR_LOG(rc); @@ -1338,66 +716,54 @@ static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info) return PMIX_SUCCESS; } -static seg_desc_t *extend_segment(seg_desc_t *segdesc, const ns_map_data_t *ns_map) -{ - seg_desc_t *tmp, *seg; - - PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, - "%s:%d:%s", - __FILE__, __LINE__, __func__)); - /* find last segment */ - tmp = segdesc; - while (NULL != tmp->next) { - tmp = tmp->next; - } - /* create another segment, the old one is full. */ - seg = _create_new_segment(segdesc->type, ns_map, tmp->id + 1); - tmp->next = seg; - - return seg; -} - -static int _put_ns_info_to_initial_segment(const ns_map_data_t *ns_map, pmix_pshmem_seg_t *metaseg, pmix_pshmem_seg_t *dataseg) +static int _put_ns_info_to_initial_segment(pmix_common_dstore_ctx_t *ds_ctx, + const ns_map_data_t *ns_map, pmix_pshmem_seg_t *metaseg, + pmix_pshmem_seg_t *dataseg) { ns_seg_info_t elem; size_t num_elems; - num_elems = *((size_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr)); - seg_desc_t *last_seg = _ESH_SESSION_sm_seg_last(ns_map->tbl_idx); + num_elems = *((size_t*)(_ESH_SESSION_sm_seg_last(ds_ctx->session_array, + ns_map->tbl_idx)->seg_info.seg_base_addr)); + pmix_dstore_seg_desc_t *last_seg = _ESH_SESSION_sm_seg_last(ds_ctx->session_array, ns_map->tbl_idx); pmix_status_t rc; PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s", __FILE__, __LINE__, __func__)); - if (_max_ns_num == num_elems) { + if (ds_ctx->max_ns_num == num_elems) { num_elems = 0; - if (NULL == (last_seg = extend_segment(last_seg, ns_map))) { + if (NULL == (last_seg = pmix_common_dstor_extend_segment(last_seg, ds_ctx->base_path, ns_map->name, + ds_ctx->jobuid, ds_ctx->setjobuid))) { rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); return rc; } /* mark previous segment as full */ size_t full = 1; - memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr + sizeof(size_t)), &full, sizeof(size_t)); - _ESH_SESSION_sm_seg_last(ns_map->tbl_idx) = last_seg; - memset(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr, 0, _initial_segment_size); + memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ds_ctx->session_array, ns_map->tbl_idx)->seg_info.seg_base_addr + + sizeof(size_t)), &full, sizeof(size_t)); + _ESH_SESSION_sm_seg_last(ds_ctx->session_array, ns_map->tbl_idx) = last_seg; + memset(_ESH_SESSION_sm_seg_last(ds_ctx->session_array, ns_map->tbl_idx)->seg_info.seg_base_addr, + 0, ds_ctx->initial_segment_size); } memset(&elem.ns_map, 0, sizeof(elem.ns_map)); - strncpy(elem.ns_map.name, ns_map->name, sizeof(elem.ns_map.name)-1); + pmix_strncpy(elem.ns_map.name, ns_map->name, sizeof(elem.ns_map.name)-1); elem.ns_map.tbl_idx = ns_map->tbl_idx; elem.num_meta_seg = 1; elem.num_data_seg = 1; - memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr) + sizeof(size_t) * 2 + num_elems * sizeof(ns_seg_info_t), - &elem, sizeof(ns_seg_info_t)); + memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ds_ctx->session_array, ns_map->tbl_idx)->seg_info.seg_base_addr) + + sizeof(size_t) * 2 + num_elems * sizeof(ns_seg_info_t), &elem, sizeof(ns_seg_info_t)); num_elems++; - memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr), &num_elems, sizeof(size_t)); + memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ds_ctx->session_array, ns_map->tbl_idx)->seg_info.seg_base_addr), + &num_elems, sizeof(size_t)); return PMIX_SUCCESS; } /* clients should sync local info with information from initial segment regularly */ -static void _update_initial_segment_info(const ns_map_data_t *ns_map) +static void _update_initial_segment_info(pmix_common_dstore_ctx_t *ds_ctx, const ns_map_data_t *ns_map) { - seg_desc_t *tmp; - tmp = _ESH_SESSION_sm_seg_first(ns_map->tbl_idx); + pmix_dstore_seg_desc_t *tmp; + tmp = _ESH_SESSION_sm_seg_first(ds_ctx->session_array, ns_map->tbl_idx); PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, "%s:%d:%s", __FILE__, __LINE__, __func__)); @@ -1406,7 +772,8 @@ static void _update_initial_segment_info(const ns_map_data_t *ns_map) do { /* check if current segment was marked as full but no more next segment is in the chain */ if (NULL == tmp->next && 1 == *((size_t*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t)))) { - tmp->next = _attach_new_segment(INITIAL_SEGMENT, ns_map, tmp->id+1); + tmp->next = pmix_common_dstor_attach_new_segment(PMIX_DSTORE_INITIAL_SEGMENT, ds_ctx->base_path, + ns_map->name, tmp->id+1); } tmp = tmp->next; } @@ -1414,11 +781,12 @@ static void _update_initial_segment_info(const ns_map_data_t *ns_map) } /* this function will be used by clients to get ns data from the initial segment and add them to the tracker list */ -static ns_seg_info_t *_get_ns_info_from_initial_segment(const ns_map_data_t *ns_map) +static ns_seg_info_t *_get_ns_info_from_initial_segment(pmix_common_dstore_ctx_t *ds_ctx, + const ns_map_data_t *ns_map) { pmix_status_t rc; size_t i; - seg_desc_t *tmp; + pmix_dstore_seg_desc_t *tmp; ns_seg_info_t *elem, *cur_elem; elem = NULL; size_t num_elems; @@ -1426,7 +794,7 @@ static ns_seg_info_t *_get_ns_info_from_initial_segment(const ns_map_data_t *ns_ PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, "%s:%d:%s", __FILE__, __LINE__, __func__)); - tmp = _ESH_SESSION_sm_seg_first(ns_map->tbl_idx); + tmp = _ESH_SESSION_sm_seg_first(ds_ctx->session_array, ns_map->tbl_idx); rc = 1; /* go through all global segments */ @@ -1448,10 +816,11 @@ static ns_seg_info_t *_get_ns_info_from_initial_segment(const ns_map_data_t *ns_ return elem; } -static ns_track_elem_t *_get_track_elem_for_namespace(ns_map_data_t *ns_map) +static ns_track_elem_t *_get_track_elem_for_namespace(pmix_common_dstore_ctx_t *ds_ctx, + ns_map_data_t *ns_map) { ns_track_elem_t *new_elem = NULL; - size_t size = pmix_value_array_get_size(_ns_track_array); + size_t size = pmix_value_array_get_size(ds_ctx->ns_track_array); PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: nspace %s", @@ -1464,27 +833,27 @@ static ns_track_elem_t *_get_track_elem_for_namespace(ns_map_data_t *ns_map) } /* data for this namespace should be already stored in shared memory region. */ /* so go and just put new data. */ - return pmix_value_array_get_item(_ns_track_array, ns_map->track_idx); + return pmix_value_array_get_item(ds_ctx->ns_track_array, ns_map->track_idx); } /* create shared memory regions for this namespace and store its info locally * to operate with address and detach/unlink afterwards. */ - if (NULL == (new_elem = pmix_value_array_get_item(_ns_track_array, size))) { + if (NULL == (new_elem = pmix_value_array_get_item(ds_ctx->ns_track_array, size))) { return NULL; } PMIX_CONSTRUCT(new_elem, ns_track_elem_t); - strncpy(new_elem->ns_map.name, ns_map->name, sizeof(new_elem->ns_map.name)-1); + pmix_strncpy(new_elem->ns_map.name, ns_map->name, sizeof(new_elem->ns_map.name)-1); /* save latest track idx to info of nspace */ ns_map->track_idx = size; return new_elem; } -static rank_meta_info *_get_rank_meta_info(pmix_rank_t rank, seg_desc_t *segdesc) +static rank_meta_info *_get_rank_meta_info(pmix_common_dstore_ctx_t *ds_ctx, pmix_rank_t rank, pmix_dstore_seg_desc_t *segdesc) { size_t i; rank_meta_info *elem = NULL; - seg_desc_t *tmp = segdesc; + pmix_dstore_seg_desc_t *tmp = segdesc; size_t num_elems, rel_offset; int id; rank_meta_info *cur_elem; @@ -1495,7 +864,7 @@ static rank_meta_info *_get_rank_meta_info(pmix_rank_t rank, seg_desc_t *segdesc "%s:%d:%s", __FILE__, __LINE__, __func__)); - if (1 == _direct_mode) { + if (1 == ds_ctx->direct_mode) { /* do linear search to find the requested rank inside all meta segments * for this namespace. */ /* go through all existing meta segments for this namespace */ @@ -1514,8 +883,8 @@ static rank_meta_info *_get_rank_meta_info(pmix_rank_t rank, seg_desc_t *segdesc } else { /* directly compute index of meta segment (id) and relative offset (rel_offset) * inside this segment for fast lookup a rank_meta_info object for the requested rank. */ - id = rcount/_max_meta_elems; - rel_offset = (rcount%_max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); + id = rcount/ds_ctx->max_meta_elems; + rel_offset = (rcount % ds_ctx->max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); /* go through all existing meta segments for this namespace. * Stop at id number if it exists. */ while (NULL != tmp->next && 0 != id) { @@ -1534,10 +903,10 @@ static rank_meta_info *_get_rank_meta_info(pmix_rank_t rank, seg_desc_t *segdesc return elem; } -static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) +static int set_rank_meta_info(pmix_common_dstore_ctx_t *ds_ctx, ns_track_elem_t *ns_info, rank_meta_info *rinfo) { /* it's claimed that there is still no meta info for this rank stored */ - seg_desc_t *tmp; + pmix_dstore_seg_desc_t *tmp; size_t num_elems, rel_offset; int id, count; rank_meta_info *cur_elem; @@ -1554,18 +923,19 @@ static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) (unsigned long)rinfo->offset, (unsigned long)rinfo->count)); tmp = ns_info->meta_seg; - if (1 == _direct_mode) { + if (1 == ds_ctx->direct_mode) { /* get the last meta segment to put new rank_meta_info at the end. */ while (NULL != tmp->next) { tmp = tmp->next; } num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); - if (_max_meta_elems <= num_elems) { + if (ds_ctx->max_meta_elems <= num_elems) { PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, "%s:%d:%s: extend meta segment for nspace %s", __FILE__, __LINE__, __func__, ns_info->ns_map.name)); /* extend meta segment, so create a new one */ - tmp = extend_segment(tmp, &ns_info->ns_map); + tmp = pmix_common_dstor_extend_segment(tmp, ds_ctx->base_path, ns_info->ns_map.name, + ds_ctx->jobuid, ds_ctx->setjobuid); if (NULL == tmp) { PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; @@ -1573,7 +943,7 @@ static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) ns_info->num_meta_seg++; memset(tmp->seg_info.seg_base_addr, 0, sizeof(rank_meta_info)); /* update number of meta segments for namespace in initial_segment */ - ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); + ns_seg_info_t *elem = _get_ns_info_from_initial_segment(ds_ctx, &ns_info->ns_map); if (NULL == elem) { PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; @@ -1591,8 +961,8 @@ static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) /* directly compute index of meta segment (id) and relative offset (rel_offset) * inside this segment for fast lookup a rank_meta_info object for the requested rank. */ size_t rcount = rinfo->rank == PMIX_RANK_WILDCARD ? 0 : rinfo->rank + 1; - id = rcount/_max_meta_elems; - rel_offset = (rcount % _max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); + id = rcount/ds_ctx->max_meta_elems; + rel_offset = (rcount % ds_ctx->max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); count = id; /* go through all existing meta segments for this namespace. * Stop at id number if it exists. */ @@ -1604,7 +974,8 @@ static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) if ((int)ns_info->num_meta_seg < (id+1)) { while ((int)ns_info->num_meta_seg != (id+1)) { /* extend meta segment, so create a new one */ - tmp = extend_segment(tmp, &ns_info->ns_map); + tmp = pmix_common_dstor_extend_segment(tmp, ds_ctx->base_path, ns_info->ns_map.name, + ds_ctx->jobuid, ds_ctx->setjobuid); if (NULL == tmp) { PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; @@ -1613,7 +984,7 @@ static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) ns_info->num_meta_seg++; } /* update number of meta segments for namespace in initial_segment */ - ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); + ns_seg_info_t *elem = _get_ns_info_from_initial_segment(ds_ctx, &ns_info->ns_map); if (NULL == elem) { PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; @@ -1629,9 +1000,9 @@ static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) return PMIX_SUCCESS; } -static uint8_t *_get_data_region_by_offset(seg_desc_t *segdesc, size_t offset) +static uint8_t *_get_data_region_by_offset(pmix_common_dstore_ctx_t *ds_ctx, pmix_dstore_seg_desc_t *segdesc, size_t offset) { - seg_desc_t *tmp = segdesc; + pmix_dstore_seg_desc_t *tmp = segdesc; size_t rel_offset = offset; uint8_t *dataaddr = NULL; @@ -1641,8 +1012,8 @@ static uint8_t *_get_data_region_by_offset(seg_desc_t *segdesc, size_t offset) /* go through all existing data segments for this namespace */ do { - if (rel_offset >= _data_segment_size) { - rel_offset -= _data_segment_size; + if (rel_offset >= ds_ctx->data_segment_size) { + rel_offset -= ds_ctx->data_segment_size; } else { dataaddr = tmp->seg_info.seg_base_addr + rel_offset; } @@ -1652,10 +1023,10 @@ static uint8_t *_get_data_region_by_offset(seg_desc_t *segdesc, size_t offset) return dataaddr; } -static size_t get_free_offset(seg_desc_t *data_seg) +static size_t get_free_offset(pmix_common_dstore_ctx_t *ds_ctx, pmix_dstore_seg_desc_t *data_seg) { size_t offset; - seg_desc_t *tmp; + pmix_dstore_seg_desc_t *tmp; int id = 0; tmp = data_seg; /* first find the last data segment */ @@ -1668,35 +1039,42 @@ static size_t get_free_offset(seg_desc_t *data_seg) /* this is the first created data segment, the first 8 bytes are used to place the free offset value itself */ offset = sizeof(size_t); } - return (id * _data_segment_size + offset); + return (id * ds_ctx->data_segment_size + offset); } -static int put_empty_ext_slot(seg_desc_t *dataseg) +static int put_empty_ext_slot(pmix_common_dstore_ctx_t *ds_ctx, pmix_dstore_seg_desc_t *dataseg) { size_t global_offset, rel_offset, data_ended, val = 0; uint8_t *addr; - global_offset = get_free_offset(dataseg); - rel_offset = global_offset % _data_segment_size; - if (rel_offset + EXT_SLOT_SIZE() > _data_segment_size) { + pmix_status_t rc; + + global_offset = get_free_offset(ds_ctx, dataseg); + rel_offset = global_offset % ds_ctx->data_segment_size; + if (rel_offset + PMIX_DS_SLOT_SIZE(ds_ctx) > ds_ctx->data_segment_size) { PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; } - addr = _get_data_region_by_offset(dataseg, global_offset); - ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&val, sizeof(size_t)); - + addr = _get_data_region_by_offset(ds_ctx, dataseg, global_offset); + PMIX_DS_PUT_KEY(rc, ds_ctx, addr, ESH_REGION_EXTENSION, (void*)&val, sizeof(size_t)); + if (rc != PMIX_SUCCESS) { + PMIX_ERROR_LOG(rc); + return rc; + } /* update offset at the beginning of current segment */ - data_ended = rel_offset + EXT_SLOT_SIZE(); + data_ended = rel_offset + PMIX_DS_SLOT_SIZE(ds_ctx); addr = (uint8_t*)(addr - rel_offset); memcpy(addr, &data_ended, sizeof(size_t)); return PMIX_SUCCESS; } -static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, char *key, void *buffer, size_t size) +static size_t put_data_to_the_end(pmix_common_dstore_ctx_t *ds_ctx, ns_track_elem_t *ns_info, + pmix_dstore_seg_desc_t *dataseg, char *key, void *buffer, size_t size) { size_t offset, id = 0; - seg_desc_t *tmp; + pmix_dstore_seg_desc_t *tmp; size_t global_offset, data_ended; uint8_t *addr; + pmix_status_t rc; PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, "%s:%d:%s: key %s", @@ -1707,17 +1085,19 @@ static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, tmp = tmp->next; id++; } - global_offset = get_free_offset(dataseg); - offset = global_offset % _data_segment_size; + global_offset = get_free_offset(ds_ctx, dataseg); + offset = global_offset % ds_ctx->data_segment_size; /* We should provide additional space at the end of segment to * place EXTENSION_SLOT to have an ability to enlarge data for this rank.*/ - if ((sizeof(size_t) + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) { + if ((sizeof(size_t) + PMIX_DS_KEY_SIZE(ds_ctx, key, size) + PMIX_DS_SLOT_SIZE(ds_ctx)) > + ds_ctx->data_segment_size) { /* this is an error case: segment is so small that cannot place evem a single key-value pair. * warn a user about it and fail. */ offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ pmix_output(0, "PLEASE set NS_DATA_SEG_SIZE to value which is larger when %lu.", - (unsigned long)(sizeof(size_t) + strlen(key) + 1 + sizeof(size_t) + size + EXT_SLOT_SIZE())); + (unsigned long)(sizeof(size_t) + strlen(key) + 1 + sizeof(size_t) + + size + PMIX_DS_SLOT_SIZE(ds_ctx))); return offset; } @@ -1728,10 +1108,12 @@ static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, * new segment wasn't allocated to us but (global_offset % _data_segment_size) == 0 * so if offset is 0 here - we need to allocate the segment as well */ - if ( (0 == offset) || ( (offset + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) ) { + if ( (0 == offset) || ( (offset + PMIX_DS_KEY_SIZE(ds_ctx, key, size) + + PMIX_DS_SLOT_SIZE(ds_ctx)) > ds_ctx->data_segment_size) ) { id++; /* create a new data segment. */ - tmp = extend_segment(tmp, &ns_info->ns_map); + tmp = pmix_common_dstor_extend_segment(tmp, ds_ctx->base_path, ns_info->ns_map.name, + ds_ctx->jobuid, ds_ctx->setjobuid); if (NULL == tmp) { PMIX_ERROR_LOG(PMIX_ERR_NOMEM); offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ @@ -1739,7 +1121,7 @@ static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, } ns_info->num_data_seg++; /* update_ns_info_in_initial_segment */ - ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); + ns_seg_info_t *elem = _get_ns_info_from_initial_segment(ds_ctx, &ns_info->ns_map); if (NULL == elem) { PMIX_ERROR_LOG(PMIX_ERR_NOMEM); offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ @@ -1748,12 +1130,16 @@ static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, elem->num_data_seg++; offset = sizeof(size_t); } - global_offset = offset + id * _data_segment_size; + global_offset = offset + id * ds_ctx->data_segment_size; addr = (uint8_t*)(tmp->seg_info.seg_base_addr)+offset; - ESH_PUT_KEY(addr, key, buffer, size); + PMIX_DS_PUT_KEY(rc, ds_ctx, addr, key, buffer, size); + if (rc != PMIX_SUCCESS) { + PMIX_ERROR_LOG(rc); + return 0; + } /* update offset at the beginning of current segment */ - data_ended = offset + ESH_KEY_SIZE(key, size); + data_ended = offset + PMIX_DS_KEY_SIZE(ds_ctx, key, size); addr = (uint8_t*)(tmp->seg_info.seg_base_addr); memcpy(addr, &data_ended, sizeof(size_t)); PMIX_OUTPUT_VERBOSE((1, pmix_gds_base_framework.framework_output, @@ -1761,17 +1147,18 @@ static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, __FILE__, __LINE__, __func__, key, (unsigned long)offset, (unsigned long)data_ended, - (unsigned long)(id * _data_segment_size), + (unsigned long)(id * ds_ctx->data_segment_size), (unsigned long)size)); return global_offset; } -static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t *kval, rank_meta_info **rinfo, int data_exist) +static int pmix_sm_store(pmix_common_dstore_ctx_t *ds_ctx, ns_track_elem_t *ns_info, + pmix_rank_t rank, pmix_kval_t *kval, rank_meta_info **rinfo, int data_exist) { size_t offset, size, kval_cnt; pmix_buffer_t buffer; pmix_status_t rc; - seg_desc_t *datadesc; + pmix_dstore_seg_desc_t *datadesc; uint8_t *addr; PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, @@ -1781,7 +1168,7 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t datadesc = ns_info->data_seg; /* pack value to the buffer */ PMIX_CONSTRUCT(&buffer, pmix_buffer_t); - PMIX_BFROPS_PACK(rc, _client_peer(), &buffer, kval->value, 1, PMIX_VALUE); + PMIX_BFROPS_PACK(rc, _client_peer(ds_ctx), &buffer, kval->value, 1, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; @@ -1791,8 +1178,8 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t if (0 == data_exist) { /* there is no data blob for this rank yet, so add it. */ size_t free_offset; - free_offset = get_free_offset(datadesc); - offset = put_data_to_the_end(ns_info, datadesc, kval->key, buffer.base_ptr, size); + free_offset = get_free_offset(ds_ctx, datadesc); + offset = put_data_to_the_end(ds_ctx, ns_info, datadesc, kval->key, buffer.base_ptr, size); if (0 == offset) { /* this is an error */ rc = PMIX_ERROR; @@ -1806,11 +1193,15 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t * because previous segment is already full. */ if (free_offset != offset && NULL != *rinfo) { /* here we compare previous free offset with the offset where we just put data. - * It should be equal in the normal case. It it's not true, then it means that + * It should be equal in the normal case. If it's not true, then it means that * segment was extended, and we put data to the next segment, so we now need to * put extension slot at the end of previous segment with a "reference" to a new_offset */ - addr = _get_data_region_by_offset(datadesc, free_offset); - ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); + addr = _get_data_region_by_offset(ds_ctx, datadesc, free_offset); + PMIX_DS_PUT_KEY(rc, ds_ctx, addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); + if (rc != PMIX_SUCCESS) { + PMIX_ERROR_LOG(rc); + return 0; + } } if (NULL == *rinfo) { *rinfo = (rank_meta_info*)malloc(sizeof(rank_meta_info)); @@ -1821,7 +1212,7 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t (*rinfo)->count++; } else if (NULL != *rinfo) { /* there is data blob for this rank */ - addr = _get_data_region_by_offset(datadesc, (*rinfo)->offset); + addr = _get_data_region_by_offset(ds_ctx, datadesc, (*rinfo)->offset); if (NULL == addr) { rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); @@ -1843,8 +1234,8 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t * ..... * extension slot which has key = EXTENSION_SLOT and a size_t value for offset to next data address for this process. */ - if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { - memcpy(&offset, ESH_DATA_PTR(addr), sizeof(size_t)); + if(PMIX_DS_KEY_IS_EXTSLOT(ds_ctx, addr)) { + memcpy(&offset, PMIX_DS_DATA_PTR(ds_ctx, addr), sizeof(size_t)); if (0 < offset) { PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %lu, replace flag %d %s is filled with %lu value", @@ -1852,7 +1243,7 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t (unsigned long)rank, data_exist, ESH_REGION_EXTENSION, (unsigned long)offset)); /* go to next item, updating address */ - addr = _get_data_region_by_offset(datadesc, offset); + addr = _get_data_region_by_offset(ds_ctx, datadesc, offset); if (NULL == addr) { rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); @@ -1861,20 +1252,21 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t } else { /* should not be, we should be out of cycle when this happens */ } - } else if (0 == strncmp(ESH_KNAME_PTR(addr), kval->key, ESH_KNAME_LEN(kval->key))) { + } else if (0 == strncmp(PMIX_DS_KNAME_PTR(ds_ctx, addr), kval->key, + PMIX_DS_KNAME_LEN(ds_ctx, kval->key))) { PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %u, replace flag %d found target key %s", __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); /* target key is found, compare value sizes */ - if (ESH_DATA_SIZE(addr, ESH_DATA_PTR(addr)) != size) { + if (PMIX_DS_DATA_SIZE(ds_ctx, addr, PMIX_DS_DATA_PTR(ds_ctx, addr)) != size) { //if (1) { /* if we want to test replacing values for existing keys. */ /* invalidate current value and store another one at the end of data region. */ - strncpy(ESH_KNAME_PTR(addr), ESH_REGION_INVALIDATED, ESH_KNAME_LEN(ESH_REGION_INVALIDATED)); + PMIX_DS_KEY_SET_INVALID(ds_ctx, addr); /* decrementing count, it will be incremented back when we add a new value for this key at the end of region. */ (*rinfo)->count--; kval_cnt--; /* go to next item, updating address */ - addr += ESH_KV_SIZE(addr); + addr += PMIX_DS_KV_SIZE(ds_ctx, addr); PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %u, replace flag %d mark key %s regions as invalidated. put new data at the end.", __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); @@ -1883,23 +1275,25 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t "%s:%d:%s: for rank %u, replace flag %d replace data for key %s type %d in place", __FILE__, __LINE__, __func__, rank, data_exist, kval->key, kval->value->type)); /* replace old data with new one. */ - memset(ESH_DATA_PTR(addr), 0, ESH_DATA_SIZE(addr, ESH_DATA_PTR(addr))); - memcpy(ESH_DATA_PTR(addr), buffer.base_ptr, size); - addr += ESH_KV_SIZE(addr); + memset(PMIX_DS_DATA_PTR(ds_ctx, addr), 0, + PMIX_DS_DATA_SIZE(ds_ctx, addr, PMIX_DS_DATA_PTR(ds_ctx, addr))); + memcpy(PMIX_DS_DATA_PTR(ds_ctx, addr), buffer.base_ptr, size); + addr += PMIX_DS_KV_SIZE(ds_ctx, addr); add_to_the_end = 0; break; } } else { PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %u, replace flag %d skip %s key, look for %s key", - __FILE__, __LINE__, __func__, rank, data_exist, ESH_KNAME_PTR(addr), kval->key)); + __FILE__, __LINE__, __func__, rank, data_exist, + PMIX_DS_KNAME_PTR(ds_ctx, addr), kval->key)); /* Skip it: key is "INVALIDATED" or key is valid but different from target one. */ - if (0 != strncmp(ESH_REGION_INVALIDATED, ESH_KNAME_PTR(addr), ESH_KNAME_LEN(ESH_KNAME_PTR(addr)))) { + if (!PMIX_DS_KEY_IS_INVALID(ds_ctx, addr)) { /* count only valid items */ kval_cnt--; } /* go to next item, updating address */ - addr += ESH_KV_SIZE(addr); + addr += PMIX_DS_KV_SIZE(ds_ctx, addr); } } if (1 == add_to_the_end) { @@ -1908,9 +1302,68 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t * for the same key. */ size_t free_offset; (*rinfo)->count++; - free_offset = get_free_offset(datadesc); + free_offset = get_free_offset(ds_ctx, datadesc); + + /* + * Remove trailing extention slot if we are continuing + * same ranks data. + * + * When keys are stored individually through _store_data_for_rank + * an empty extention slot is placed every time. + * + * This is required because there is no information about whether or not the next key + * will belong to the same rank. + * + * As the result EACH keys stored with _store_data_for_rank is + * followed by extension slot. This slows down search and increases + * the memory footprint. + * + * The following code tries to deal with such one-key-at-a-time + * situation by: + * - checking if the last key-value for this rank is an extention + * slot + * - If this is the case - checks if this key-value pair is the + * last one at the moment and can be safely deleted. + * - if it is - current segment's offset pointer is decreased by + * the size of the extention slot key-value effectively removing + * it from the dstor + */ + if (PMIX_DS_KEY_IS_EXTSLOT(ds_ctx, addr)){ + /* Find the last data segment */ + pmix_dstore_seg_desc_t *ldesc = datadesc; + uint8_t *segstart; + size_t offs_past_extslot = 0; + size_t offs_cur_segment = 0; + while (NULL != ldesc->next) { + ldesc = ldesc->next; + } + + /* Calculate the offset of the end of the extension slot */ + offs_cur_segment = free_offset % ds_ctx->data_segment_size; + segstart = ldesc->seg_info.seg_base_addr; + offs_past_extslot = (addr + PMIX_DS_KV_SIZE(ds_ctx, addr)) - segstart; + + /* We can erase extension slot if: + * - address of the ext slot belongs to the occupied part of the + * last segment + * - local offset within the segment is equal to the local + * offset of the end of extension slot + */ + if( ( (addr > segstart) && (addr < (segstart + offs_cur_segment)) ) + && (offs_cur_segment == offs_past_extslot) ) { + /* Calculate a new free offset that doesn't account this + * extension slot */ + size_t new_offset = addr - segstart; + /* Rewrite segment's offset information to exclude + * extension slot */ + memcpy(segstart, &new_offset, sizeof(size_t)); + /* Recalculate free_offset */ + free_offset = get_free_offset(ds_ctx, datadesc); + } + } + /* add to the end */ - offset = put_data_to_the_end(ns_info, datadesc, kval->key, buffer.base_ptr, size); + offset = put_data_to_the_end(ds_ctx, ns_info, datadesc, kval->key, buffer.base_ptr, size); if (0 == offset) { rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); @@ -1921,11 +1374,11 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t * data for different ranks, and that's why next element is EXTENSION_SLOT. * We put new data to the end of data region and just update EXTENSION_SLOT value by new offset. */ - if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { + if (PMIX_DS_KEY_IS_EXTSLOT(ds_ctx, addr)) { PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %u, replace flag %d %s should be filled with offset %lu value", __FILE__, __LINE__, __func__, rank, data_exist, ESH_REGION_EXTENSION, offset)); - memcpy(ESH_DATA_PTR(addr), &offset, sizeof(size_t)); + memcpy(PMIX_DS_DATA_PTR(ds_ctx, addr), &offset, sizeof(size_t)); } else { /* (2) - we point to the first free offset, no more data is stored further in this segment. * There is no EXTENSION_SLOT by this addr since we continue pushing data for the same rank, @@ -1935,7 +1388,11 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t * forcibly and store new offset in its value. */ if (free_offset != offset) { /* segment was extended, need to put extension slot by free_offset indicating new_offset */ - ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); + PMIX_DS_PUT_KEY(rc, ds_ctx, addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); + if (rc != PMIX_SUCCESS) { + PMIX_ERROR_LOG(rc); + return 0; + } } } PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, @@ -1948,12 +1405,13 @@ static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t return rc; } -static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_buffer_t *buf) +static int _store_data_for_rank(pmix_common_dstore_ctx_t *ds_ctx, ns_track_elem_t *ns_info, + pmix_rank_t rank, pmix_buffer_t *buf) { pmix_status_t rc; pmix_kval_t *kp; - seg_desc_t *metadesc, *datadesc; + pmix_dstore_seg_desc_t *metadesc, *datadesc; int32_t cnt; rank_meta_info *rinfo = NULL; @@ -1974,11 +1432,11 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix num_elems = *((size_t*)(metadesc->seg_info.seg_base_addr)); data_exist = 0; - /* when we don't use linear search (_direct_mode ==0 ) we don't use num_elems field, + /* when we don't use linear search (direct_mode == 0) we don't use num_elems field, * so anyway try to get rank_meta_info first. */ - if (0 < num_elems || 0 == _direct_mode) { + if (0 < num_elems || 0 == ds_ctx->direct_mode) { /* go through all elements in meta segment and look for target rank. */ - rinfo = _get_rank_meta_info(rank, metadesc); + rinfo = _get_rank_meta_info(ds_ctx, rank, metadesc); if (NULL != rinfo) { data_exist = 1; } @@ -1987,14 +1445,14 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix * so unpack these buffers, and then unpack kvals from each modex buffer, * storing them in the shared memory dstore. */ - free_offset = get_free_offset(datadesc); + free_offset = get_free_offset(ds_ctx, datadesc); cnt = 1; kp = PMIX_NEW(pmix_kval_t); PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, buf, kp, &cnt, PMIX_KVAL); while(PMIX_SUCCESS == rc) { pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "pmix: unpacked key %s", kp->key); - if (PMIX_SUCCESS != (rc = pmix_sm_store(ns_info, rank, kp, &rinfo, data_exist))) { + if (PMIX_SUCCESS != (rc = pmix_sm_store(ds_ctx, ns_info, rank, kp, &rinfo, data_exist))) { PMIX_ERROR_LOG(rc); if (NULL != rinfo) { free(rinfo); @@ -2021,7 +1479,7 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix * in that case we don't reserve space for EXTENSION_SLOT, it's * already reserved. * */ - new_free_offset = get_free_offset(datadesc); + new_free_offset = get_free_offset(ds_ctx, datadesc); if (new_free_offset != free_offset) { /* Reserve space for EXTENSION_SLOT at the end of data blob. * We need it to split data for one rank from data for different @@ -2029,7 +1487,7 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix * We also put EXTENSION_SLOT at the end of each data segment, and * its value points to the beginning of next data segment. * */ - rc = put_empty_ext_slot(ns_info->data_seg); + rc = put_empty_ext_slot(ds_ctx, ns_info->data_seg); if (PMIX_SUCCESS != rc) { if ((0 == data_exist) && NULL != rinfo) { free(rinfo); @@ -2042,7 +1500,7 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix /* if this is the first data posted for this rank, then * update meta info for it */ if (0 == data_exist) { - set_rank_meta_info(ns_info, rinfo); + set_rank_meta_info(ds_ctx, ns_info, rinfo); if (NULL != rinfo) { free(rinfo); } @@ -2051,13 +1509,13 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix return rc; } -static inline ssize_t _get_univ_size(const char *nspace) +static inline ssize_t _get_univ_size(pmix_common_dstore_ctx_t *ds_ctx, const char *nspace) { ssize_t nprocs = 0; pmix_value_t *val; int rc; - rc = _dstore_fetch(nspace, PMIX_RANK_WILDCARD, PMIX_UNIV_SIZE, &val); + rc = _dstore_fetch(ds_ctx, nspace, PMIX_RANK_WILDCARD, PMIX_UNIV_SIZE, &val); if( PMIX_SUCCESS != rc ) { PMIX_ERROR_LOG(rc); return rc; @@ -2072,23 +1530,39 @@ static inline ssize_t _get_univ_size(const char *nspace) return nprocs; } -static pmix_status_t dstore_cache_job_info(struct pmix_nspace_t *ns, +PMIX_EXPORT pmix_status_t pmix_common_dstor_cache_job_info(pmix_common_dstore_ctx_t *ds_ctx, + struct pmix_namespace_t *ns, pmix_info_t info[], size_t ninfo) { return PMIX_SUCCESS; } -static pmix_status_t dstore_init(pmix_info_t info[], size_t ninfo) + +pmix_common_dstore_ctx_t *pmix_common_dstor_init(const char *ds_name, pmix_info_t info[], size_t ninfo, + pmix_common_lock_callbacks_t *lock_cb, + pmix_common_dstore_file_cbs_t *file_cb) { pmix_status_t rc; size_t n; char *dstor_tmpdir = NULL; - size_t tbl_idx=0; + size_t tbl_idx = 0; ns_map_data_t *ns_map = NULL; + pmix_common_dstore_ctx_t *ds_ctx = NULL; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "pmix:gds:dstore init"); + ds_ctx = (pmix_common_dstore_ctx_t*) malloc(sizeof(*ds_ctx)); + if (NULL == ds_ctx) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + return NULL; + } + memset(ds_ctx, 0, sizeof(*ds_ctx)); + + /* assign lock callbacks */ + ds_ctx->lock_cbs = lock_cb; + ds_ctx->file_cbs = file_cb; + /* open the pshmem and select the active plugins */ if( PMIX_SUCCESS != (rc = pmix_mca_base_framework_open(&pmix_pshmem_base_framework, 0)) ) { PMIX_ERROR_LOG(rc); @@ -2099,17 +1573,10 @@ static pmix_status_t dstore_init(pmix_info_t info[], size_t ninfo) goto err_exit; } - _jobuid = getuid(); - _setjobuid = 0; - -#ifdef ESH_PTHREAD_LOCK - _esh_lock_init = _rwlock_init; -#endif -#ifdef ESH_FCNTL_LOCK - _esh_lock_init = _flock_init; -#endif + ds_ctx->jobuid = getuid(); + ds_ctx->setjobuid = 0; - if (PMIX_SUCCESS != (rc = _esh_tbls_init())) { + if (PMIX_SUCCESS != (rc = _esh_tbls_init(ds_ctx))) { PMIX_ERROR_LOG(rc); goto err_exit; } @@ -2120,23 +1587,19 @@ static pmix_status_t dstore_init(pmix_info_t info[], size_t ninfo) goto err_exit; } - _set_constants_from_env(); - - if (NULL != _base_path) { - free(_base_path); - _base_path = NULL; - } + _set_constants_from_env(ds_ctx); + ds_ctx->ds_name = strdup(ds_name); /* find the temp dir */ if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { - _esh_session_map_search = _esh_session_map_search_server; + ds_ctx->session_map_search = (session_map_search_fn_t)_esh_session_map_search_server; /* scan incoming info for directives */ if (NULL != info) { for (n=0; n < ninfo; n++) { if (0 == strcmp(PMIX_USERID, info[n].key)) { - _jobuid = info[n].value.data.uint32; - _setjobuid = 1; + ds_ctx->jobuid = info[n].value.data.uint32; + ds_ctx->setjobuid = 1; continue; } if (0 == strcmp(PMIX_DSTPATH, info[n].key)) { @@ -2182,67 +1645,109 @@ static pmix_status_t dstore_init(pmix_info_t info[], size_t ninfo) } } - rc = asprintf(&_base_path, "%s/pmix_dstor_%d", dstor_tmpdir, getpid()); - if ((0 > rc) || (NULL == _base_path)) { + rc = asprintf(&ds_ctx->base_path, "%s/pmix_dstor_%s_%d", dstor_tmpdir, + ds_ctx->ds_name, getpid()); + if ((0 > rc) || (NULL == ds_ctx->base_path)) { rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); goto err_exit; } - if (0 != mkdir(_base_path, 0770)) { + if (0 != mkdir(ds_ctx->base_path, 0770)) { if (EEXIST != errno) { rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); goto err_exit; } } - if (_setjobuid > 0) { - if (chown(_base_path, (uid_t) _jobuid, (gid_t) -1) < 0){ + if (ds_ctx->setjobuid > 0) { + if (chown(ds_ctx->base_path, (uid_t) ds_ctx->jobuid, (gid_t) -1) < 0){ rc = PMIX_ERR_NO_PERMISSIONS; PMIX_ERROR_LOG(rc); goto err_exit; } } - _esh_session_map_search = _esh_session_map_search_server; - return PMIX_SUCCESS; + ds_ctx->session_map_search = _esh_session_map_search_server; + return ds_ctx; } /* for clients */ else { - if (NULL == (dstor_tmpdir = getenv(PMIX_DSTORE_ESH_BASE_PATH))){ - return PMIX_ERR_NOT_AVAILABLE; // simply disqualify ourselves + char *env_name = NULL; + int ds_ver = 0; + + sscanf(ds_ctx->ds_name, "ds%d", &ds_ver); + if (0 == ds_ver) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (0 > asprintf(&env_name, PMIX_DSTORE_VER_BASE_PATH_FMT, ds_ver)) { + rc = PMIX_ERR_NOMEM; + PMIX_ERROR_LOG(rc); + goto err_exit; } - if (NULL == (_base_path = strdup(dstor_tmpdir))) { + dstor_tmpdir = getenv(env_name); + free(env_name); + + if (NULL == dstor_tmpdir) { + dstor_tmpdir = getenv(PMIX_DSTORE_ESH_BASE_PATH); + } + if (NULL == dstor_tmpdir){ + rc = PMIX_ERR_NOT_AVAILABLE; // simply disqualify ourselves + goto err_exit; + } + if (NULL == (ds_ctx->base_path = strdup(dstor_tmpdir))) { rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); goto err_exit; } - _esh_session_map_search = _esh_session_map_search_client; + ds_ctx->session_map_search = _esh_session_map_search_client; + /* init ds_ctx protect lock */ + if (0 != pthread_mutex_init(&ds_ctx->lock, NULL)) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto err_exit; + } } - rc = _esh_session_tbl_add(&tbl_idx); + rc = _esh_session_tbl_add(ds_ctx, &tbl_idx); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto err_exit; } - ns_map = _esh_session_map(pmix_globals.myid.nspace, tbl_idx); + char *nspace = NULL; + /* if we don't see the required info, then we cannot init */ + if (NULL == (nspace = getenv("PMIX_NAMESPACE"))) { + rc = PMIX_ERR_INVALID_NAMESPACE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + /* lock init */ + rc = ds_ctx->lock_cbs->init(&_ESH_SESSION_lock(ds_ctx->session_array, tbl_idx), ds_ctx->base_path, nspace, 1, ds_ctx->jobuid, ds_ctx->setjobuid); + if (rc != PMIX_SUCCESS) { + goto err_exit; + } + ns_map = _esh_session_map(ds_ctx, nspace, 0, tbl_idx); if (NULL == ns_map) { rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); goto err_exit; } - if (PMIX_SUCCESS != (rc =_esh_session_init(tbl_idx, ns_map, _jobuid, _setjobuid))) { + if (PMIX_SUCCESS != (rc =_esh_session_init(ds_ctx, tbl_idx, ns_map, 1, + ds_ctx->jobuid, ds_ctx->setjobuid))) { PMIX_ERROR_LOG(rc); goto err_exit; } - return PMIX_SUCCESS; + return ds_ctx; err_exit: - return rc; + pmix_common_dstor_finalize(ds_ctx); + return NULL; } -static void dstore_finalize(void) +PMIX_EXPORT void pmix_common_dstor_finalize(pmix_common_dstore_ctx_t *ds_ctx) { struct stat st = {0}; pmix_status_t rc = PMIX_SUCCESS; @@ -2250,42 +1755,45 @@ static void dstore_finalize(void) PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s", __FILE__, __LINE__, __func__)); - _esh_sessions_cleanup(); - _esh_ns_map_cleanup(); - _esh_ns_track_cleanup(); + _esh_sessions_cleanup(ds_ctx); + _esh_ns_map_cleanup(ds_ctx); + _esh_ns_track_cleanup(ds_ctx); pmix_pshmem.finalize(); - if (NULL != _base_path){ + if (NULL != ds_ctx->base_path){ if(PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { - if (lstat(_base_path, &st) >= 0){ - if (PMIX_SUCCESS != (rc = _esh_dir_del(_base_path))) { + if (lstat(ds_ctx->base_path, &st) >= 0){ + if (PMIX_SUCCESS != (rc = _esh_dir_del(ds_ctx->base_path))) { PMIX_ERROR_LOG(rc); } } } - free(_base_path); - _base_path = NULL; + free(ds_ctx->base_path); + ds_ctx->base_path = NULL; } - if (NULL != _clients_peer) { - PMIX_RELEASE(_clients_peer->nptr); - PMIX_RELEASE(_clients_peer); + if (NULL != ds_ctx->clients_peer) { + PMIX_RELEASE(ds_ctx->clients_peer->nptr); + PMIX_RELEASE(ds_ctx->clients_peer); } /* close the pshmem framework */ if( PMIX_SUCCESS != (rc = pmix_mca_base_framework_close(&pmix_pshmem_base_framework)) ) { PMIX_ERROR_LOG(rc); } + free(ds_ctx->ds_name); + free(ds_ctx->base_path); + free(ds_ctx); } -static pmix_status_t _dstore_store(const char *nspace, - pmix_rank_t rank, - pmix_kval_t *kv) +static pmix_status_t _dstore_store_nolock(pmix_common_dstore_ctx_t *ds_ctx, + ns_map_data_t *ns_map, + pmix_rank_t rank, + pmix_kval_t *kv) { - pmix_status_t rc = PMIX_SUCCESS, tmp_rc; + pmix_status_t rc = PMIX_SUCCESS; ns_track_elem_t *elem; pmix_buffer_t xfer; ns_seg_info_t ns_info; - ns_map_data_t *ns_map = NULL; if (NULL == kv) { return PMIX_ERROR; @@ -2293,19 +1801,7 @@ static pmix_status_t _dstore_store(const char *nspace, PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for %s:%u", - __FILE__, __LINE__, __func__, nspace, rank)); - - if (NULL == (ns_map = _esh_session_map_search(nspace))) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } - - /* set exclusive lock */ - if (PMIX_SUCCESS != (rc = _ESH_WRLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { - PMIX_ERROR_LOG(rc); - return rc; - } + __FILE__, __LINE__, __func__, ns_map->name, rank)); /* First of all, we go through local track list (list of ns_track_elem_t structures) * and look for an element for the target namespace. @@ -2317,36 +1813,36 @@ static pmix_status_t _dstore_store(const char *nspace, * All this stuff is done inside _get_track_elem_for_namespace function. */ - elem = _get_track_elem_for_namespace(ns_map); + elem = _get_track_elem_for_namespace(ds_ctx, ns_map); if (NULL == elem) { rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); - goto err_exit; + goto exit; } /* If a new element was just created, we need to create corresponding meta and * data segments and update corresponding element's fields. */ if (NULL == elem->meta_seg || NULL == elem->data_seg) { memset(&ns_info.ns_map, 0, sizeof(ns_info.ns_map)); - strncpy(ns_info.ns_map.name, ns_map->name, sizeof(ns_info.ns_map.name)-1); + pmix_strncpy(ns_info.ns_map.name, ns_map->name, sizeof(ns_info.ns_map.name)-1); ns_info.ns_map.tbl_idx = ns_map->tbl_idx; ns_info.num_meta_seg = 1; ns_info.num_data_seg = 1; - rc = _update_ns_elem(elem, &ns_info); + rc = _update_ns_elem(ds_ctx, elem, &ns_info); if (PMIX_SUCCESS != rc || NULL == elem->meta_seg || NULL == elem->data_seg) { PMIX_ERROR_LOG(rc); - goto err_exit; + goto exit; } /* zero created shared memory segments for this namespace */ - memset(elem->meta_seg->seg_info.seg_base_addr, 0, _meta_segment_size); - memset(elem->data_seg->seg_info.seg_base_addr, 0, _data_segment_size); + memset(elem->meta_seg->seg_info.seg_base_addr, 0, ds_ctx->meta_segment_size); + memset(elem->data_seg->seg_info.seg_base_addr, 0, ds_ctx->data_segment_size); /* put ns's shared segments info to the global meta segment. */ - rc = _put_ns_info_to_initial_segment(ns_map, &elem->meta_seg->seg_info, &elem->data_seg->seg_info); + rc = _put_ns_info_to_initial_segment(ds_ctx, ns_map, &elem->meta_seg->seg_info, &elem->data_seg->seg_info); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - goto err_exit; + goto exit; } } @@ -2355,34 +1851,28 @@ static pmix_status_t _dstore_store(const char *nspace, PMIX_CONSTRUCT(&xfer, pmix_buffer_t); PMIX_LOAD_BUFFER(pmix_globals.mypeer, &xfer, kv->value->data.bo.bytes, kv->value->data.bo.size); - rc = _store_data_for_rank(elem, rank, &xfer); + rc = _store_data_for_rank(ds_ctx, elem, rank, &xfer); PMIX_DESTRUCT(&xfer); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - goto err_exit; - } - - /* unset lock */ - if (PMIX_SUCCESS != (rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { - PMIX_ERROR_LOG(rc); + goto exit; } - return rc; -err_exit: - /* unset lock */ - if (PMIX_SUCCESS != (tmp_rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { - PMIX_ERROR_LOG(tmp_rc); - } +exit: return rc; } -static pmix_status_t dstore_store(const pmix_proc_t *proc, - pmix_scope_t scope, - pmix_kval_t *kv) +PMIX_EXPORT pmix_status_t pmix_common_dstor_store(pmix_common_dstore_ctx_t *ds_ctx, + const pmix_proc_t *proc, + pmix_scope_t scope, + pmix_kval_t *kv) { pmix_status_t rc = PMIX_SUCCESS; + ns_map_data_t *ns_map; + pmix_kval_t *kv2; + pmix_buffer_t tmp; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds: dstore store for key '%s' scope %d", @@ -2393,26 +1883,51 @@ static pmix_status_t dstore_store(const pmix_proc_t *proc, PMIX_ERROR_LOG(rc); return rc; } - else { - pmix_kval_t *kv2; - kv2 = PMIX_NEW(pmix_kval_t); - PMIX_VALUE_CREATE(kv2->value, 1); - kv2->value->type = PMIX_BYTE_OBJECT; - pmix_buffer_t tmp; - PMIX_CONSTRUCT(&tmp, pmix_buffer_t); + kv2 = PMIX_NEW(pmix_kval_t); + PMIX_VALUE_CREATE(kv2->value, 1); + kv2->value->type = PMIX_BYTE_OBJECT; - PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &tmp, kv, 1, PMIX_KVAL); - PMIX_UNLOAD_BUFFER(&tmp, kv2->value->data.bo.bytes, kv2->value->data.bo.size); + PMIX_CONSTRUCT(&tmp, pmix_buffer_t); + + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &tmp, kv, 1, PMIX_KVAL); + PMIX_UNLOAD_BUFFER(&tmp, kv2->value->data.bo.bytes, kv2->value->data.bo.size); + + if (NULL == (ns_map = ds_ctx->session_map_search(ds_ctx, proc->nspace))) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto exit; + } + + /* set exclusive lock */ + rc = _ESH_LOCK(ds_ctx, ns_map->tbl_idx, wr_lock); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto exit; + } - rc = _dstore_store(proc->nspace, proc->rank, kv2); - PMIX_RELEASE(kv2); - PMIX_DESTRUCT(&tmp); + rc = _dstore_store_nolock(ds_ctx, ns_map, proc->rank, kv2); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto exit; } + + /* unset lock */ + rc = _ESH_LOCK(ds_ctx, ns_map->tbl_idx, wr_unlock); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto exit; + } + +exit: + PMIX_RELEASE(kv2); + PMIX_DESTRUCT(&tmp); + return rc; } -static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, +static pmix_status_t _dstore_fetch(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace, pmix_rank_t rank, const char *key, pmix_value_t **kvs) { ns_seg_info_t *ns_info = NULL; @@ -2420,7 +1935,7 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, ns_track_elem_t *elem; rank_meta_info *rinfo = NULL; size_t kval_cnt = 0; - seg_desc_t *meta_seg, *data_seg; + pmix_dstore_seg_desc_t *meta_seg, *data_seg; uint8_t *addr; pmix_buffer_t buffer; pmix_value_t val, *kval = NULL; @@ -2431,6 +1946,8 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, bool key_found = false; pmix_info_t *info = NULL; size_t ninfo; + size_t keyhash = 0; + bool lock_is_set = false; PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for %s:%u look for key %s", @@ -2440,34 +1957,38 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, PMIX_OUTPUT_VERBOSE((7, pmix_gds_base_framework.framework_output, "dstore: Does not support passed parameters")); rc = PMIX_ERR_BAD_PARAM; - PMIX_ERROR_LOG(rc); - return rc; + goto error; } PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for %s:%u look for key %s", __FILE__, __LINE__, __func__, nspace, rank, key)); - if (NULL == (ns_map = _esh_session_map_search(nspace))) { + /* protect info of dstore segments before it will be updated */ + if (!PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + if (0 != (rc = pthread_mutex_lock(&ds_ctx->lock))) { + goto error; + } + lock_is_set = true; + } + + if (NULL == (ns_map = ds_ctx->session_map_search(ds_ctx, nspace))) { /* This call is issued from the the client. * client must have the session, otherwise the error is fatal. */ rc = PMIX_ERR_FATAL; - PMIX_ERROR_LOG(rc); - return rc; + goto error; } if (NULL == kvs) { rc = PMIX_ERR_FATAL; - PMIX_ERROR_LOG(rc); - return rc; + goto error; } if (PMIX_RANK_UNDEF == rank) { - ssize_t _nprocs = _get_univ_size(ns_map->name); + ssize_t _nprocs = _get_univ_size(ds_ctx, ns_map->name); if( 0 > _nprocs ){ - PMIX_ERROR_LOG(rc); - return rc; + goto error; } nprocs = (size_t) _nprocs; cur_rank = 0; @@ -2477,11 +1998,11 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, } /* grab shared lock */ - if (PMIX_SUCCESS != (lock_rc = _ESH_RDLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { + lock_rc = _ESH_LOCK(ds_ctx, ns_map->tbl_idx, rd_lock); + if (PMIX_SUCCESS != lock_rc) { /* Something wrong with the lock. The error is fatal */ - rc = PMIX_ERR_FATAL; - PMIX_ERROR_LOG(lock_rc); - return lock_rc; + rc = lock_rc; + goto error; } /* First of all, we go through all initial segments and look at their field. @@ -2497,9 +2018,9 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, */ /* first update local information about initial segments. they can be extended, so then we need to attach to new segments. */ - _update_initial_segment_info(ns_map); + _update_initial_segment_info(ds_ctx, ns_map); - ns_info = _get_ns_info_from_initial_segment(ns_map); + ns_info = _get_ns_info_from_initial_segment(ds_ctx, ns_map); if (NULL == ns_info) { /* no data for this namespace is found in the shared memory. */ PMIX_OUTPUT_VERBOSE((7, pmix_gds_base_framework.framework_output, @@ -2510,7 +2031,7 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, } /* get ns_track_elem_t object for the target namespace from the local track list. */ - elem = _get_track_elem_for_namespace(ns_map); + elem = _get_track_elem_for_namespace(ds_ctx, ns_map); if (NULL == elem) { /* Shouldn't happen! */ rc = PMIX_ERR_FATAL; @@ -2521,7 +2042,7 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, /* need to update tracker: * attach to shared memory regions for this namespace and store its info locally * to operate with address and detach/unlink afterwards. */ - rc = _update_ns_elem(elem, ns_info); + rc = _update_ns_elem(ds_ctx, elem, ns_info); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto done; @@ -2531,9 +2052,21 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, meta_seg = elem->meta_seg; data_seg = elem->data_seg; + if( NULL != key ) { + keyhash = PMIX_DS_KEY_HASH(ds_ctx, key); + } + + /* all segment data updated, ctx lock may released */ + if (lock_is_set) { + lock_is_set = false; + if (0 != (rc = pthread_mutex_unlock(&ds_ctx->lock))) { + goto error; + } + } + while (nprocs--) { /* Get the rank meta info in the shared meta segment. */ - rinfo = _get_rank_meta_info(cur_rank, meta_seg); + rinfo = _get_rank_meta_info(ds_ctx, cur_rank, meta_seg); if (NULL == rinfo) { PMIX_OUTPUT_VERBOSE((7, pmix_gds_base_framework.framework_output, "%s:%d:%s: no data for this rank is found in the shared memory. rank %u", @@ -2541,7 +2074,7 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, all_ranks_found = false; continue; } - addr = _get_data_region_by_offset(data_seg, rinfo->offset); + addr = _get_data_region_by_offset(ds_ctx, data_seg, rinfo->offset); if (NULL == addr) { /* This means that meta-info is broken - error is fatal */ rc = PMIX_ERR_FATAL; @@ -2593,22 +2126,22 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, * EXTENSION slot which has key = EXTENSION_SLOT and a size_t value for offset * to next data address for this process. */ - if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_INVALIDATED, ESH_KNAME_LEN(ESH_REGION_INVALIDATED))) { + if (PMIX_DS_KEY_IS_INVALID(ds_ctx, addr)) { PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %s:%u, skip %s region", __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_INVALIDATED)); /* skip it * go to next item, updating address */ - addr += ESH_KV_SIZE(addr); - } else if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { + addr += PMIX_DS_KV_SIZE(ds_ctx, addr); + } else if (PMIX_DS_KEY_IS_EXTSLOT(ds_ctx, addr)) { size_t offset; - memcpy(&offset, ESH_DATA_PTR(addr), sizeof(size_t)); + memcpy(&offset, PMIX_DS_DATA_PTR(ds_ctx, addr), sizeof(size_t)); PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %s:%u, reached %s with %lu value", __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_EXTENSION, offset)); if (0 < offset) { /* go to next item, updating address */ - addr = _get_data_region_by_offset(data_seg, offset); + addr = _get_data_region_by_offset(ds_ctx, data_seg, offset); if (NULL == addr) { /* This shouldn't happen - error is fatal */ rc = PMIX_ERR_FATAL; @@ -2625,21 +2158,22 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, } else if (NULL == key) { PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %s:%u, found target key %s", - __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_KNAME_PTR(addr))); + __FILE__, __LINE__, __func__, nspace, cur_rank, PMIX_DS_KNAME_PTR(ds_ctx, addr))); - uint8_t *data_ptr = ESH_DATA_PTR(addr); - size_t data_size = ESH_DATA_SIZE(addr, data_ptr); + uint8_t *data_ptr = PMIX_DS_DATA_PTR(ds_ctx, addr); + size_t data_size = PMIX_DS_DATA_SIZE(ds_ctx, addr, data_ptr); PMIX_CONSTRUCT(&buffer, pmix_buffer_t); - PMIX_LOAD_BUFFER(_client_peer(), &buffer, data_ptr, data_size); + PMIX_LOAD_BUFFER(_client_peer(ds_ctx), &buffer, data_ptr, data_size); int cnt = 1; /* unpack value for this key from the buffer. */ PMIX_VALUE_CONSTRUCT(&val); - PMIX_BFROPS_UNPACK(rc, _client_peer(), &buffer, &val, &cnt, PMIX_VALUE); + PMIX_BFROPS_UNPACK(rc, _client_peer(ds_ctx), &buffer, &val, &cnt, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto done; } - strncpy(info[kval_cnt - 1].key, ESH_KNAME_PTR(addr), ESH_KNAME_LEN((char *)addr)); + pmix_strncpy(info[kval_cnt - 1].key, PMIX_DS_KNAME_PTR(ds_ctx, addr), + PMIX_DS_KNAME_LEN(ds_ctx, addr)); pmix_value_xfer(&info[kval_cnt - 1].value, &val); PMIX_VALUE_DESTRUCT(&val); buffer.base_ptr = NULL; @@ -2648,30 +2182,24 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, key_found = true; kval_cnt--; - addr += ESH_KV_SIZE(addr); - } else if (0 == strncmp(ESH_KNAME_PTR(addr), key, ESH_KNAME_LEN(key))) { + addr += PMIX_DS_KV_SIZE(ds_ctx, addr); + } else if (PMIX_DS_KEY_MATCH(ds_ctx, addr, key, keyhash)) { PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %s:%u, found target key %s", __FILE__, __LINE__, __func__, nspace, cur_rank, key)); /* target key is found, get value */ - uint8_t *data_ptr = ESH_DATA_PTR(addr); - size_t data_size = ESH_DATA_SIZE(addr, data_ptr); + uint8_t *data_ptr = PMIX_DS_DATA_PTR(ds_ctx, addr); + size_t data_size = PMIX_DS_DATA_SIZE(ds_ctx, addr, data_ptr); PMIX_CONSTRUCT(&buffer, pmix_buffer_t); - PMIX_LOAD_BUFFER(_client_peer(), &buffer, data_ptr, data_size); + PMIX_LOAD_BUFFER(_client_peer(ds_ctx), &buffer, data_ptr, data_size); int cnt = 1; /* unpack value for this key from the buffer. */ - PMIX_VALUE_CONSTRUCT(&val); - PMIX_BFROPS_UNPACK(rc, _client_peer(), &buffer, &val, &cnt, PMIX_VALUE); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - goto done; - } - PMIX_BFROPS_COPY(rc, _client_peer(), (void**)kvs, &val, PMIX_VALUE); + *kvs = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_BFROPS_UNPACK(rc, _client_peer(ds_ctx), &buffer, (void*)*kvs, &cnt, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto done; } - PMIX_VALUE_DESTRUCT(&val); buffer.base_ptr = NULL; buffer.bytes_used = 0; PMIX_DESTRUCT(&buffer); @@ -2680,9 +2208,10 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, } else { PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s: for rank %s:%u, skip key %s look for key %s", - __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_KNAME_PTR(addr), key)); + __FILE__, __LINE__, __func__, nspace, cur_rank, + PMIX_DS_KNAME_PTR(ds_ctx, addr), key)); /* go to next item, updating address */ - addr += ESH_KV_SIZE(addr); + addr += PMIX_DS_KV_SIZE(ds_ctx, addr); kval_cnt--; } } @@ -2694,10 +2223,16 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, done: /* unset lock */ - if (PMIX_SUCCESS != (lock_rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { + lock_rc = _ESH_LOCK(ds_ctx, ns_map->tbl_idx, rd_unlock); + if (PMIX_SUCCESS != lock_rc) { PMIX_ERROR_LOG(lock_rc); } + /* unset ds_ctx lock */ + if (lock_is_set) { + pthread_mutex_unlock(&ds_ctx->lock); + } + if( rc != PMIX_SUCCESS ){ if ((NULL == key) && (kval_cnt > 0)) { if( NULL != info ) { @@ -2724,13 +2259,21 @@ static pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, } rc = PMIX_ERR_NOT_FOUND; return rc; + +error: + if (lock_is_set) { + pthread_mutex_unlock(&ds_ctx->lock); + } + PMIX_ERROR_LOG(rc); + return rc; } -static pmix_status_t dstore_fetch(const pmix_proc_t *proc, - pmix_scope_t scope, bool copy, - const char *key, - pmix_info_t info[], size_t ninfo, - pmix_list_t *kvs) +PMIX_EXPORT pmix_status_t pmix_common_dstor_fetch(pmix_common_dstore_ctx_t *ds_ctx, + const pmix_proc_t *proc, + pmix_scope_t scope, bool copy, + const char *key, + pmix_info_t info[], size_t ninfo, + pmix_list_t *kvs) { pmix_kval_t *kv; pmix_value_t *val; @@ -2739,7 +2282,7 @@ static pmix_status_t dstore_fetch(const pmix_proc_t *proc, pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "gds: dstore fetch `%s`", key == NULL ? "NULL" : key); - rc = _dstore_fetch(proc->nspace, proc->rank, key, &val); + rc = _dstore_fetch(ds_ctx, proc->nspace, proc->rank, key, &val); if (PMIX_SUCCESS == rc) { if( NULL == key ) { pmix_info_t *info; @@ -2787,7 +2330,8 @@ static pmix_status_t dstore_fetch(const pmix_proc_t *proc, return rc; } -static pmix_status_t dstore_setup_fork(const pmix_proc_t *peer, char ***env) +PMIX_EXPORT pmix_status_t pmix_common_dstor_setup_fork(pmix_common_dstore_ctx_t *ds_ctx, const char *base_path_env, + const pmix_proc_t *peer, char ***env) { pmix_status_t rc = PMIX_SUCCESS; ns_map_data_t *ns_map = NULL; @@ -2795,41 +2339,43 @@ static pmix_status_t dstore_setup_fork(const pmix_proc_t *peer, char ***env) pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "gds: dstore setup fork"); - if (NULL == _esh_session_map_search) { + if (NULL == ds_ctx->session_map_search) { rc = PMIX_ERR_NOT_AVAILABLE; PMIX_ERROR_LOG(rc); return rc; } - if (NULL == (ns_map = _esh_session_map_search(peer->nspace))) { + if (NULL == (ns_map = ds_ctx->session_map_search(ds_ctx, peer->nspace))) { rc = PMIX_ERR_NOT_AVAILABLE; PMIX_ERROR_LOG(rc); return rc; } - if ((NULL == _base_path) || (strlen(_base_path) == 0)){ + if ((NULL == ds_ctx->base_path) || (strlen(ds_ctx->base_path) == 0)){ rc = PMIX_ERR_NOT_AVAILABLE; PMIX_ERROR_LOG(rc); return rc; } - if(PMIX_SUCCESS != (rc = pmix_setenv(PMIX_DSTORE_ESH_BASE_PATH, - _ESH_SESSION_path(ns_map->tbl_idx), true, env))){ + if(PMIX_SUCCESS != (rc = pmix_setenv(base_path_env, + _ESH_SESSION_path(ds_ctx->session_array, ns_map->tbl_idx), + true, env))){ PMIX_ERROR_LOG(rc); } + return rc; } -static pmix_status_t dstore_add_nspace(const char *nspace, - pmix_info_t info[], - size_t ninfo) +PMIX_EXPORT pmix_status_t pmix_common_dstor_add_nspace(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace, pmix_info_t info[], size_t ninfo) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; size_t tbl_idx=0; - uid_t jobuid = _jobuid; - char setjobuid = _setjobuid; + uid_t jobuid = ds_ctx->jobuid; + char setjobuid = ds_ctx->setjobuid; size_t n; ns_map_data_t *ns_map = NULL; + uint32_t local_size = 0; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "gds: dstore add nspace"); @@ -2841,31 +2387,36 @@ static pmix_status_t dstore_add_nspace(const char *nspace, setjobuid = 1; continue; } + if (0 == strcmp(PMIX_LOCAL_SIZE, info[n].key)) { + local_size = info[n].value.data.uint32; + continue; + } } } - if (PMIX_SUCCESS != _esh_jobuid_tbl_search(jobuid, &tbl_idx)) { + if (PMIX_SUCCESS != _esh_jobuid_tbl_search(ds_ctx, jobuid, &tbl_idx)) { - rc = _esh_session_tbl_add(&tbl_idx); + rc = _esh_session_tbl_add(ds_ctx, &tbl_idx); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } - ns_map = _esh_session_map(nspace, tbl_idx); + ns_map = _esh_session_map(ds_ctx, nspace, local_size, tbl_idx); if (NULL == ns_map) { rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); return rc; } - if (PMIX_SUCCESS != (rc =_esh_session_init(tbl_idx, ns_map, jobuid, setjobuid))) { + if (PMIX_SUCCESS != (rc =_esh_session_init(ds_ctx, tbl_idx, ns_map, + local_size, jobuid, setjobuid))) { rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); return rc; } } else { - ns_map = _esh_session_map(nspace, tbl_idx); + ns_map = _esh_session_map(ds_ctx, nspace, local_size, tbl_idx); if (NULL == ns_map) { rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); @@ -2873,10 +2424,19 @@ static pmix_status_t dstore_add_nspace(const char *nspace, } } + /* lock init */ + ds_ctx->lock_cbs->init(&_ESH_SESSION_lock(ds_ctx->session_array, tbl_idx), + ds_ctx->base_path, nspace, local_size, ds_ctx->jobuid, + ds_ctx->setjobuid); + if (NULL == _ESH_SESSION_lock(ds_ctx->session_array, tbl_idx)) { + PMIX_ERROR_LOG(rc); + return rc; + } + return PMIX_SUCCESS; } -static pmix_status_t dstore_del_nspace(const char* nspace) +PMIX_EXPORT pmix_status_t pmix_common_dstor_del_nspace(pmix_common_dstore_ctx_t *ds_ctx, const char* nspace) { pmix_status_t rc = PMIX_SUCCESS; size_t map_idx, size; @@ -2891,20 +2451,20 @@ static pmix_status_t dstore_del_nspace(const char* nspace) PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s delete nspace `%s`", __FILE__, __LINE__, __func__, nspace)); - if (NULL == (ns_map_data = _esh_session_map_search(nspace))) { + if (NULL == (ns_map_data = ds_ctx->session_map_search(ds_ctx, nspace))) { rc = PMIX_ERR_NOT_AVAILABLE; return rc; } dstor_track_idx = ns_map_data->track_idx; session_tbl_idx = ns_map_data->tbl_idx; - size = pmix_value_array_get_size(_ns_map_array); - ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + size = pmix_value_array_get_size(ds_ctx->ns_map_array); + ns_map = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->ns_map_array, ns_map_t); for (map_idx = 0; map_idx < size; map_idx++){ if (ns_map[map_idx].in_use && (ns_map[map_idx].data.tbl_idx == ns_map_data->tbl_idx)) { if (0 == strcmp(ns_map[map_idx].data.name, nspace)) { - _esh_session_map_clean(&ns_map[map_idx]); + _esh_session_map_clean(ds_ctx, &ns_map[map_idx]); continue; } in_use++; @@ -2914,68 +2474,29 @@ static pmix_status_t dstore_del_nspace(const char* nspace) /* A lot of nspaces may be using same session info * session record can only be deleted once all references are gone */ if (!in_use) { - session_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + session_tbl = PMIX_VALUE_ARRAY_GET_BASE(ds_ctx->session_array, session_t); PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s delete session for jobuid: %d", __FILE__, __LINE__, __func__, session_tbl[session_tbl_idx].jobuid)); - size = pmix_value_array_get_size(_ns_track_array); + size = pmix_value_array_get_size(ds_ctx->ns_track_array); if (size && (dstor_track_idx >= 0)) { - if((dstor_track_idx + 1) > size) { + if((dstor_track_idx + 1) > (int)size) { rc = PMIX_ERR_VALUE_OUT_OF_BOUNDS; PMIX_ERROR_LOG(rc); goto exit; } - trk = pmix_value_array_get_item(_ns_track_array, dstor_track_idx); + trk = pmix_value_array_get_item(ds_ctx->ns_track_array, dstor_track_idx); if (true == trk->in_use) { PMIX_DESTRUCT(trk); + pmix_value_array_remove_item(ds_ctx->ns_track_array, dstor_track_idx); } } - _esh_session_release(&session_tbl[session_tbl_idx]); + _esh_session_release(ds_ctx, session_tbl_idx); } exit: return rc; } -static pmix_status_t dstore_assign_module(pmix_info_t *info, size_t ninfo, - int *priority) -{ - size_t n, m; - char **options; - - *priority = 20; - if (NULL != info) { - for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN)) { - options = pmix_argv_split(info[n].value.data.string, ','); - for (m=0; NULL != options[m]; m++) { - if (0 == strcmp(options[m], "ds12")) { - /* they specifically asked for us */ - *priority = 100; - break; - } - if (0 == strcmp(options[m], "dstore")) { - /* they are asking for any dstore module - we - * take an intermediate priority in case another - * dstore is more modern than us */ - *priority = 50; - break; - } - } - pmix_argv_free(options); - break; - } - } - } - -#if 0 - if PMIX_GDS_MODULE != "ds12" - *proirity = 0; - else PMIX_GDS_MODULE == "ds12" || !PMIX_GDS_MODULE - *priority = -1; -#endif - return PMIX_SUCCESS; -} - static inline int _my_client(const char *nspace, pmix_rank_t rank) { pmix_peer_t *peer; @@ -2998,16 +2519,59 @@ static inline int _my_client(const char *nspace, pmix_rank_t rank) * host has received data from some other peer. It therefore * always contains data solely from remote procs, and we * shall store it accordingly */ -static pmix_status_t dstore_store_modex(struct pmix_nspace_t *nspace, - pmix_list_t *cbs, - pmix_byte_object_t *bo) +PMIX_EXPORT pmix_status_t pmix_common_dstor_store_modex(pmix_common_dstore_ctx_t *ds_ctx, + struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_buffer_t *buf) { - pmix_nspace_t *ns = (pmix_nspace_t*)nspace; + pmix_status_t rc = PMIX_SUCCESS; + pmix_status_t rc1 = PMIX_SUCCESS; + pmix_namespace_t *ns = (pmix_namespace_t*)nspace; + ns_map_data_t *ns_map; + + if (NULL == (ns_map = ds_ctx->session_map_search(ds_ctx, ns->nspace))) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + + /* set exclusive lock */ + rc = _ESH_LOCK(ds_ctx, ns_map->tbl_idx, wr_lock); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + + rc = pmix_gds_base_store_modex(nspace, cbs, buf, (pmix_gds_base_store_modex_cb_fn_t)_dstor_store_modex_cb, ds_ctx); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + + /* unset lock */ + rc1 = _ESH_LOCK(ds_ctx, ns_map->tbl_idx, wr_unlock); + if (PMIX_SUCCESS != rc1) { + PMIX_ERROR_LOG(rc1); + if (PMIX_SUCCESS == rc) { + rc = rc1; + } + } + + return rc; +} + +static pmix_status_t _dstor_store_modex_cb(pmix_common_dstore_ctx_t *ds_ctx, + struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_byte_object_t *bo) +{ + pmix_namespace_t *ns = (pmix_namespace_t*)nspace; pmix_status_t rc = PMIX_SUCCESS; int32_t cnt; pmix_buffer_t pbkt; pmix_proc_t proc; pmix_kval_t *kv; + ns_map_data_t *ns_map; + pmix_buffer_t tmp; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:dstore:store_modex for nspace %s", @@ -3048,6 +2612,10 @@ static pmix_status_t dstore_store_modex(struct pmix_nspace_t *nspace, PMIX_DESTRUCT(&pbkt); return PMIX_SUCCESS; } + + /* Prepare a buffer to be provided to the dstor store primitive */ + PMIX_CONSTRUCT(&tmp, pmix_buffer_t); + /* unpack the remaining values until we hit the end of the buffer */ cnt = 1; kv = PMIX_NEW(pmix_kval_t); @@ -3063,29 +2631,68 @@ static pmix_status_t dstore_store_modex(struct pmix_nspace_t *nspace, PMIX_DESTRUCT(&pbkt); return rc; } - if (PMIX_SUCCESS != (rc = dstore_store(&proc, PMIX_REMOTE, kv))) { - PMIX_ERROR_LOG(rc); - } - PMIX_RELEASE(kv); // maintain accounting as the hash increments the ref count - /* continue along */ + + /* place the key to the to be provided to _dstore_store_nolock */ + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &tmp, kv, 1, PMIX_KVAL); + + /* Release the kv to maintain accounting + * as the hash increments the ref count */ + PMIX_RELEASE(kv); + + /* proceed to the next element */ kv = PMIX_NEW(pmix_kval_t); cnt = 1; PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &pbkt, kv, &cnt, PMIX_KVAL); } - PMIX_RELEASE(kv); // maintain accounting + + /* Release the kv that didn't received the value + * because input buffer was exhausted */ + PMIX_RELEASE(kv); if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); } else { rc = PMIX_SUCCESS; } + + /* Create a key-value pair with the buffer + * to be passed to _dstore_store_nolock */ + kv = PMIX_NEW(pmix_kval_t); + PMIX_VALUE_CREATE(kv->value, 1); + kv->value->type = PMIX_BYTE_OBJECT; + PMIX_UNLOAD_BUFFER(&tmp, kv->value->data.bo.bytes, kv->value->data.bo.size); + + /* Get the namespace map element for the process "proc" */ + if (NULL == (ns_map = ds_ctx->session_map_search(ds_ctx, proc.nspace))) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + bo->bytes = pbkt.base_ptr; + bo->size = pbkt.bytes_used; // restore the incoming data + pbkt.base_ptr = NULL; + PMIX_DESTRUCT(&pbkt); + return rc; + } + + /* Store all keys at once */ + rc = _dstore_store_nolock(ds_ctx, ns_map, proc.rank, kv); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + + /* Release all resources */ + PMIX_RELEASE(kv); + PMIX_DESTRUCT(&tmp); + + /* Reset the input buffer */ bo->bytes = pbkt.base_ptr; - bo->size = pbkt.bytes_used; // restore the incoming data + bo->size = pbkt.bytes_used; pbkt.base_ptr = NULL; PMIX_DESTRUCT(&pbkt); + return rc; } -static pmix_status_t _store_job_info(pmix_proc_t *proc) +static pmix_status_t _store_job_info(pmix_common_dstore_ctx_t *ds_ctx, ns_map_data_t *ns_map, + pmix_proc_t *proc) { pmix_cb_t cb; pmix_kval_t *kv; @@ -3113,7 +2720,7 @@ static pmix_status_t _store_job_info(pmix_proc_t *proc) } PMIX_LIST_FOREACH(kv, &cb.kvs, pmix_kval_t) { - if ((PMIX_PROC_IS_V1(_client_peer()) || PMIX_PROC_IS_V20(_client_peer())) && + if ((PMIX_PROC_IS_V1(_client_peer(ds_ctx)) || PMIX_PROC_IS_V20(_client_peer(ds_ctx))) && 0 != strncmp("pmix.", kv->key, 4) && kv->value->type == PMIX_DATA_ARRAY) { pmix_info_t *info; @@ -3150,7 +2757,7 @@ static pmix_status_t _store_job_info(pmix_proc_t *proc) } PMIX_UNLOAD_BUFFER(&buf, kvp->value->data.bo.bytes, kvp->value->data.bo.size); - if (PMIX_SUCCESS != (rc = _dstore_store(proc->nspace, proc->rank, kvp))) { + if (PMIX_SUCCESS != (rc = _dstore_store_nolock(ds_ctx, ns_map, proc->rank, kvp))) { PMIX_ERROR_LOG(rc); goto exit; } @@ -3162,15 +2769,16 @@ static pmix_status_t _store_job_info(pmix_proc_t *proc) return rc; } -static pmix_status_t dstore_register_job_info(struct pmix_peer_t *pr, - pmix_buffer_t *reply) +PMIX_EXPORT pmix_status_t pmix_common_dstor_register_job_info(pmix_common_dstore_ctx_t *ds_ctx, + struct pmix_peer_t *pr, + pmix_buffer_t *reply) { pmix_peer_t *peer = (pmix_peer_t*)pr; - pmix_nspace_t *ns = peer->nptr; + pmix_namespace_t *ns = peer->nptr; char *msg; pmix_status_t rc; pmix_proc_t proc; - pmix_rank_info_t *rinfo; + pmix_rank_t rank; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:dstore:register_job_info for peer [%s:%d]", @@ -3178,23 +2786,44 @@ static pmix_status_t dstore_register_job_info(struct pmix_peer_t *pr, peer->info->pname.nspace, peer->info->pname.rank); if (0 == ns->ndelivered) { // don't store twice - _client_compat_save(peer); - (void)strncpy(proc.nspace, ns->nspace, PMIX_MAX_NSLEN); + ns_map_data_t *ns_map; + + _client_compat_save(ds_ctx, peer); + pmix_strncpy(proc.nspace, ns->nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - rc = _store_job_info(&proc); + if (NULL == (ns_map = ds_ctx->session_map_search(ds_ctx, proc.nspace))) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + + /* set exclusive lock */ + rc = _ESH_LOCK(ds_ctx, ns_map->tbl_idx, wr_lock); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + + rc = _store_job_info(ds_ctx, ns_map, &proc); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } - PMIX_LIST_FOREACH(rinfo, &ns->ranks, pmix_rank_info_t) { - proc.rank = rinfo->pname.rank; - rc = _store_job_info(&proc); + for (rank=0; rank < ns->nprocs; rank++) { + proc.rank = rank; + rc = _store_job_info(ds_ctx, ns_map, &proc); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } } + /* unset lock */ + rc = _ESH_LOCK(ds_ctx, ns_map->tbl_idx, wr_unlock); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } } /* answer to client */ @@ -3208,7 +2837,9 @@ static pmix_status_t dstore_register_job_info(struct pmix_peer_t *pr, return rc; } -static pmix_status_t dstore_store_job_info(const char *nspace, pmix_buffer_t *buf) +PMIX_EXPORT pmix_status_t pmix_common_dstor_store_job_info(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace, + pmix_buffer_t *job_data) { pmix_status_t rc = PMIX_SUCCESS; @@ -3217,7 +2848,7 @@ static pmix_status_t dstore_store_job_info(const char *nspace, pmix_buffer_t *b pmix_globals.myid.nspace, pmix_globals.myid.rank, nspace); /* check buf data */ - if ((NULL == buf) || (0 == buf->bytes_used)) { + if ((NULL == job_data) || (0 == job_data->bytes_used)) { rc = PMIX_ERR_BAD_PARAM; PMIX_ERROR_LOG(rc); return rc; @@ -3225,23 +2856,23 @@ static pmix_status_t dstore_store_job_info(const char *nspace, pmix_buffer_t *b return rc; } -static void _client_compat_save(pmix_peer_t *peer) +static void _client_compat_save(pmix_common_dstore_ctx_t *ds_ctx, pmix_peer_t *peer) { - pmix_nspace_t *nptr = NULL; + pmix_namespace_t *nptr = NULL; - if (NULL == _clients_peer) { - _clients_peer = PMIX_NEW(pmix_peer_t); - nptr = PMIX_NEW(pmix_nspace_t); - _clients_peer->nptr = nptr; + if (NULL == ds_ctx->clients_peer) { + ds_ctx->clients_peer = PMIX_NEW(pmix_peer_t); + nptr = PMIX_NEW(pmix_namespace_t); + ds_ctx->clients_peer->nptr = nptr; } - _clients_peer->nptr->compat = peer->nptr->compat; - _clients_peer->proc_type = peer->proc_type; + ds_ctx->clients_peer->nptr->compat = peer->nptr->compat; + ds_ctx->clients_peer->proc_type = peer->proc_type; } -static inline pmix_peer_t * _client_peer(void) +static inline pmix_peer_t * _client_peer(pmix_common_dstore_ctx_t *ds_ctx) { - if (NULL == _clients_peer) { + if (NULL == ds_ctx->clients_peer) { return pmix_globals.mypeer; } - return _clients_peer; + return ds_ctx->clients_peer; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_base.h b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_base.h new file mode 100644 index 00000000000..7989ae6ca56 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_base.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_DSTORE_H +#define PMIX_DSTORE_H + +#include + + +#include "src/mca/gds/gds.h" +#include "src/mca/pshmem/pshmem.h" + +BEGIN_C_DECLS + +#include +#include "src/class/pmix_value_array.h" +#include "dstore_common.h" +#include "dstore_segment.h" +#include "dstore_file.h" + +#define INITIAL_SEG_SIZE 4096 +#define NS_META_SEG_SIZE (1<<22) +#define NS_DATA_SEG_SIZE (1<<22) + +#define PMIX_DSTORE_ESH_BASE_PATH "PMIX_DSTORE_ESH_BASE_PATH" +#define PMIX_DSTORE_VER_BASE_PATH_FMT "PMIX_DSTORE_%d_BASE_PATH" + +typedef struct ns_map_data_s ns_map_data_t; +typedef struct session_s session_t; +typedef struct ns_map_s ns_map_t; + +typedef ns_map_data_t * (*session_map_search_fn_t)(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace); + +struct pmix_common_dstore_ctx_s { + char *ds_name; + char *base_path; + uid_t jobuid; + char setjobuid; + + pmix_value_array_t *session_array; + pmix_value_array_t *ns_map_array; + pmix_value_array_t *ns_track_array; + + pmix_common_lock_callbacks_t *lock_cbs; + pmix_common_dstore_file_cbs_t *file_cbs; + + size_t initial_segment_size; + size_t meta_segment_size; + size_t data_segment_size; + size_t lock_segment_size; + + size_t max_ns_num; + size_t max_meta_elems; + + session_map_search_fn_t session_map_search; + pmix_peer_t *clients_peer; + /* If _direct_mode is set, it means that we use linear search + * along the array of rank meta info objects inside a meta segment + * to find the requested rank. Otherwise, we do a fast lookup + * based on rank and directly compute offset. + * This mode is called direct because it's effectively used in + * sparse communication patterns when direct modex is usually used. + */ + int direct_mode; + /* dstore ctx protect lock, uses for clients only */ + pthread_mutex_t lock; +}; + +struct session_s { + int in_use; + uid_t jobuid; + char setjobuid; + char *nspace_path; + pmix_dstore_seg_desc_t *sm_seg_first; + pmix_dstore_seg_desc_t *sm_seg_last; + pmix_common_dstor_lock_ctx_t lock; +}; + +struct ns_map_data_s { + char name[PMIX_MAX_NSLEN+1]; + size_t tbl_idx; + int track_idx; +}; + +struct ns_map_s { + int in_use; + ns_map_data_t data; +}; + +/* initial segment format: + * size_t num_elems; + * size_t full; //indicate to client that it needs to attach to the next segment + * ns_seg_info_t ns_seg_info[max_ns_num]; + */ + +typedef struct { + ns_map_data_t ns_map; + size_t num_meta_seg;/* read by clients to attach to this number of segments. */ + size_t num_data_seg; +} ns_seg_info_t; + +/* meta segment format: + * size_t num_elems; + * rank_meta_info meta_info[max_meta_elems]; + */ + +typedef struct { + size_t rank; + size_t offset; + size_t count; +} rank_meta_info; + +typedef struct { + pmix_value_array_t super; + ns_map_data_t ns_map; + size_t num_meta_seg; + size_t num_data_seg; + pmix_dstore_seg_desc_t *meta_seg; + pmix_dstore_seg_desc_t *data_seg; + bool in_use; +} ns_track_elem_t; + +typedef struct { + pmix_list_item_t super; + pmix_common_dstor_lock_ctx_t *lock; +} lock_track_item_t; + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_common.h b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_common.h new file mode 100644 index 00000000000..466eccd9a5d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_common.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_GDS_DS_BASE_H_ +#define PMIX_GDS_DS_BASE_H_ + +#include +#include +#include + +#include "src/include/pmix_globals.h" +#include "src/class/pmix_list.h" +#include "src/mca/gds/gds.h" +#include "src/mca/pshmem/pshmem.h" +#include "src/mca/common/dstore/dstore_file.h" + +typedef void* pmix_common_dstor_lock_ctx_t; + +typedef pmix_status_t (*pmix_common_dstor_lock_init_fn_t)(pmix_common_dstor_lock_ctx_t *ctx, + const char *base_path, const char *name, + uint32_t local_size, uid_t uid, bool setuid); +typedef void (*pmix_common_dstor_lock_finalize_fn_t)(pmix_common_dstor_lock_ctx_t *ctx); +typedef pmix_status_t (*pmix_common_dstor_lock_rd_get_fn_t)(pmix_common_dstor_lock_ctx_t ctx); +typedef pmix_status_t (*pmix_common_dstor_lock_rd_rel_fn_t)(pmix_common_dstor_lock_ctx_t ctx); +typedef pmix_status_t (*pmix_common_dstor_lock_wr_get_fn_t)(pmix_common_dstor_lock_ctx_t ctx); +typedef pmix_status_t (*pmix_common_dstor_lock_wr_rel_fn_t)(pmix_common_dstor_lock_ctx_t ctx); + +typedef struct { + pmix_common_dstor_lock_init_fn_t init; + pmix_common_dstor_lock_finalize_fn_t finalize; + pmix_common_dstor_lock_rd_get_fn_t rd_lock; + pmix_common_dstor_lock_rd_rel_fn_t rd_unlock; + pmix_common_dstor_lock_wr_get_fn_t wr_lock; + pmix_common_dstor_lock_wr_rel_fn_t wr_unlock; +} pmix_common_lock_callbacks_t; + +typedef struct pmix_common_dstore_ctx_s pmix_common_dstore_ctx_t; + +PMIX_EXPORT pmix_common_dstore_ctx_t *pmix_common_dstor_init(const char *ds_name, pmix_info_t info[], size_t ninfo, + pmix_common_lock_callbacks_t *lock_cb, + pmix_common_dstore_file_cbs_t *file_cb); +PMIX_EXPORT void pmix_common_dstor_finalize(pmix_common_dstore_ctx_t *ds_ctx); +PMIX_EXPORT pmix_status_t pmix_common_dstor_add_nspace(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace, pmix_info_t info[], size_t ninfo); +PMIX_EXPORT pmix_status_t pmix_common_dstor_del_nspace(pmix_common_dstore_ctx_t *ds_ctx, const char* nspace); +PMIX_EXPORT pmix_status_t pmix_common_dstor_setup_fork(pmix_common_dstore_ctx_t *ds_ctx, const char *base_path_env, + const pmix_proc_t *peer, char ***env); +PMIX_EXPORT pmix_status_t pmix_common_dstor_cache_job_info(pmix_common_dstore_ctx_t *ds_ctx, + struct pmix_namespace_t *ns, + pmix_info_t info[], size_t ninfo); +PMIX_EXPORT pmix_status_t pmix_common_dstor_register_job_info(pmix_common_dstore_ctx_t *ds_ctx, + struct pmix_peer_t *pr, + pmix_buffer_t *reply); +PMIX_EXPORT pmix_status_t pmix_common_dstor_store_job_info(pmix_common_dstore_ctx_t *ds_ctx, + const char *nspace, + pmix_buffer_t *job_data); +PMIX_EXPORT pmix_status_t pmix_common_dstor_store(pmix_common_dstore_ctx_t *ds_ctx, + const pmix_proc_t *proc, + pmix_scope_t scope, + pmix_kval_t *kv); +PMIX_EXPORT pmix_status_t pmix_common_dstor_fetch(pmix_common_dstore_ctx_t *ds_ctx, + const pmix_proc_t *proc, + pmix_scope_t scope, bool copy, + const char *key, + pmix_info_t info[], size_t ninfo, + pmix_list_t *kvs); +PMIX_EXPORT pmix_status_t pmix_common_dstor_store_modex(pmix_common_dstore_ctx_t *ds_ctx, + struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_buffer_t *buff); +#endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_file.h b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_file.h new file mode 100644 index 00000000000..576149b0569 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_file.h @@ -0,0 +1,161 @@ +#ifndef DSTORE_FORMAT_H +#define DSTORE_FORMAT_H + +typedef size_t (*pmix_common_dstore_kv_size_fn)(uint8_t *addr); +typedef char* (*pmix_common_dstore_key_name_ptr_fn)(uint8_t *addr); +typedef size_t (*pmix_common_dstore_key_name_len_fn)(char *key); +typedef uint8_t* (*pmix_common_dstore_data_ptr_fn)(uint8_t *addr); +typedef size_t (*pmix_common_dstore_data_size_fn)(uint8_t *addr, uint8_t* data_ptr); +typedef size_t (*pmix_common_dstore_key_size_fn)(char *key, size_t data_size); +typedef size_t (*pmix_common_dstore_ext_slot_size_fn)(void); +typedef int (*pmix_common_dstore_put_key_fn)(uint8_t *addr, char *key, void *buf, + size_t size); +typedef bool (*pmix_common_dstore_is_invalid_fn)(uint8_t *addr); +typedef bool (*pmix_common_dstore_is_extslot_fn)(uint8_t *addr); +typedef void (*pmix_common_dstore_set_invalid_fn)(uint8_t *addr); +typedef size_t (*pmix_common_dstore_key_hash_fn)(const char *key); +typedef bool (*pmix_common_dstore_key_match_fn)(uint8_t *addr, const char *key, + size_t key_hash); + +typedef struct { + const char *name; + pmix_common_dstore_kv_size_fn kval_size; + pmix_common_dstore_key_name_ptr_fn kname_ptr; + pmix_common_dstore_key_name_len_fn kname_len; + pmix_common_dstore_data_ptr_fn data_ptr; + pmix_common_dstore_data_size_fn data_size; + pmix_common_dstore_key_size_fn key_size; + pmix_common_dstore_ext_slot_size_fn ext_slot_size; + pmix_common_dstore_put_key_fn put_key; + pmix_common_dstore_is_invalid_fn is_invalid; + pmix_common_dstore_is_extslot_fn is_extslot; + pmix_common_dstore_set_invalid_fn set_invalid; + pmix_common_dstore_key_hash_fn key_hash; + pmix_common_dstore_key_match_fn key_match; +} pmix_common_dstore_file_cbs_t; + +#define ESH_REGION_EXTENSION "EXTENSION_SLOT" +#define ESH_REGION_INVALIDATED "INVALIDATED" +#define ESH_ENV_INITIAL_SEG_SIZE "INITIAL_SEG_SIZE" +#define ESH_ENV_NS_META_SEG_SIZE "NS_META_SEG_SIZE" +#define ESH_ENV_NS_DATA_SEG_SIZE "NS_DATA_SEG_SIZE" +#define ESH_ENV_LINEAR "SM_USE_LINEAR_SEARCH" + +#define ESH_MIN_KEY_LEN (sizeof(ESH_REGION_INVALIDATED)) + +#define PMIX_DS_PUT_KEY(rc, ctx, addr, key, buf, size) \ + do { \ + rc = PMIX_ERROR; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->put_key) { \ + rc = (ctx)->file_cbs->put_key(addr, key, buf, size); \ + } \ + } while(0) + +#define PMIX_DS_KV_SIZE(ctx, addr) \ +__pmix_attribute_extension__ ({ \ + size_t size = 0; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->kval_size) { \ + size = (ctx)->file_cbs->kval_size(addr); \ + } \ + size; \ +}) + +#define PMIX_DS_KNAME_PTR(ctx, addr) \ +__pmix_attribute_extension__ ({ \ + char *name_ptr = NULL; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->kname_ptr) { \ + name_ptr = (ctx)->file_cbs->kname_ptr(addr); \ + } \ + name_ptr; \ +}) + +#define PMIX_DS_KNAME_LEN(ctx, addr) \ +__pmix_attribute_extension__ ({ \ + size_t len = 0; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->kname_len) { \ + len = (ctx)->file_cbs->kname_len((char*)addr); \ + } \ + len; \ +}) + +#define PMIX_DS_DATA_PTR(ctx, addr) \ +__pmix_attribute_extension__ ({ \ + uint8_t *data_ptr = NULL; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->data_ptr) { \ + data_ptr = (ctx)->file_cbs->data_ptr(addr); \ + } \ + data_ptr; \ +}) + +#define PMIX_DS_DATA_SIZE(ctx, addr, data_ptr) \ +__pmix_attribute_extension__ ({ \ + size_t size = 0; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->data_size) { \ + size = (ctx)->file_cbs->data_size(addr, data_ptr); \ + } \ + size; \ +}) + +#define PMIX_DS_KEY_SIZE(ctx, key, data_size) \ +__pmix_attribute_extension__ ({ \ + size_t __size = 0; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->key_size) { \ + __size = (ctx)->file_cbs->key_size(key, data_size); \ + } \ + __size; \ +}) + +#define PMIX_DS_SLOT_SIZE(ctx) \ +__pmix_attribute_extension__ ({ \ + size_t __size = 0; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->ext_slot_size) { \ + __size = (ctx)->file_cbs->ext_slot_size(); \ + } \ + __size; \ +}) + +#define PMIX_DS_KEY_HASH(ctx, key) \ +__pmix_attribute_extension__ ({ \ + size_t keyhash = 0; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->key_hash) { \ + keyhash = (ctx)->file_cbs->key_hash(key); \ + } \ + keyhash; \ +}) + +#define PMIX_DS_KEY_MATCH(ctx, addr, key, hash) \ +__pmix_attribute_extension__ ({ \ + int ret = 0; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->key_match) { \ + ret = (ctx)->file_cbs->key_match(addr, key, hash); \ + } \ + ret; \ +}) + +#define PMIX_DS_KEY_IS_INVALID(ctx, addr) \ +__pmix_attribute_extension__ ({ \ + int ret = 0; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->is_invalid) { \ + ret = (ctx)->file_cbs->is_invalid(addr); \ + } \ + ret; \ +}) + +#define PMIX_DS_KEY_SET_INVALID(ctx, addr) \ + do { \ + if ((ctx)->file_cbs && (ctx)->file_cbs->set_invalid) { \ + (ctx)->file_cbs->set_invalid(addr); \ + } \ + } while(0) + +#define PMIX_DS_KEY_IS_EXTSLOT(ctx, addr) \ +__pmix_attribute_extension__ ({ \ + int ret = 0; \ + if ((ctx)->file_cbs && (ctx)->file_cbs->is_invalid) { \ + ret = (ctx)->file_cbs->is_extslot(addr); \ + } \ + ret; \ +}) + + +#endif // DSTORE_FORMAT_H diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.c b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.c new file mode 100644 index 00000000000..69ec1ba577f --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.c @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2018-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif + +#ifdef HAVE_SYS_AUXV_H +#include +#if PMIX_HAVE_LIBEV +/* EV_NONE is macro-defined in that is included by + * and used in an enum in from libev, so #undef it to fix an issue*/ +#undef EV_NONE +#endif +#endif + +#include + +#include "src/include/pmix_globals.h" +#include "src/mca/gds/base/base.h" +#include "src/mca/pshmem/base/base.h" +#include "src/util/error.h" +#include "src/util/output.h" + +#include "dstore_common.h" +#include "dstore_segment.h" + +static size_t _initial_segment_size; +static size_t _meta_segment_size; +static size_t _data_segment_size; + +PMIX_EXPORT int pmix_common_dstor_getpagesize(void) +{ +#if defined(_SC_PAGESIZE ) + return sysconf(_SC_PAGESIZE); +#elif defined(_SC_PAGE_SIZE) + return sysconf(_SC_PAGE_SIZE); +#else + return 65536; /* safer to overestimate than under */ +#endif +} + +PMIX_EXPORT size_t pmix_common_dstor_getcacheblocksize(void) +{ + size_t cache_line = 0; + +#if defined(_SC_LEVEL1_DCACHE_LINESIZE) + cache_line = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); +#endif +#if (defined(HAVE_SYS_AUXV_H)) && (defined(AT_DCACHEBSIZE)) + if (0 == cache_line) { + cache_line = getauxval(AT_DCACHEBSIZE); + } +#endif + return cache_line; +} + +PMIX_EXPORT void pmix_common_dstor_init_segment_info(size_t initial_segment_size, + size_t meta_segment_size, + size_t data_segment_size) +{ + _initial_segment_size = initial_segment_size; + _meta_segment_size = meta_segment_size; + _data_segment_size = data_segment_size; +} + +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_create_new_lock_seg(const char *base_path, size_t size, + const char *name, uint32_t id, uid_t uid, bool setuid) +{ + pmix_status_t rc; + char file_name[PMIX_PATH_MAX]; + pmix_dstore_seg_desc_t *new_seg = NULL; + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: segment type %d, nspace %s, id %u", + __FILE__, __LINE__, __func__, PMIX_DSTORE_NS_LOCK_SEGMENT, + name, id)); + + snprintf(file_name, PMIX_PATH_MAX, "%s/smlockseg-%s", base_path, name); + new_seg = (pmix_dstore_seg_desc_t*)malloc(sizeof(pmix_dstore_seg_desc_t)); + if (new_seg) { + new_seg->id = id; + new_seg->next = NULL; + new_seg->type = PMIX_DSTORE_NS_LOCK_SEGMENT; + rc = pmix_pshmem.segment_create(&new_seg->seg_info, file_name, size); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + memset(new_seg->seg_info.seg_base_addr, 0, size); + + if (setuid > 0){ + rc = PMIX_ERR_PERM; + if (0 > chown(file_name, (uid_t) uid, (gid_t) -1)){ + PMIX_ERROR_LOG(rc); + goto err_exit; + } + /* set the mode as required */ + if (0 > chmod(file_name, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP )) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + } + } + return new_seg; + + err_exit: + if( NULL != new_seg ){ + free(new_seg); + } + return NULL; + +} + +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_attach_new_lock_seg(const char *base_path, + size_t size, const char *name, uint32_t id) +{ + pmix_status_t rc; + pmix_dstore_seg_desc_t *new_seg = NULL; + new_seg = (pmix_dstore_seg_desc_t*)malloc(sizeof(pmix_dstore_seg_desc_t)); + new_seg->id = id; + new_seg->next = NULL; + new_seg->type = PMIX_DSTORE_NS_LOCK_SEGMENT; + new_seg->seg_info.seg_size = size; + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: segment type %d, name %s, id %u", + __FILE__, __LINE__, __func__, new_seg->type, name, id)); + + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smlockseg-%s", + base_path, name); + rc = pmix_pshmem.segment_attach(&new_seg->seg_info, PMIX_PSHMEM_RW); + if (PMIX_SUCCESS != rc) { + free(new_seg); + new_seg = NULL; + } + return new_seg; +} + +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_create_new_segment(pmix_dstore_segment_type type, + const char *base_path, const char *name, uint32_t id, + uid_t uid, bool setuid) +{ + pmix_status_t rc; + char file_name[PMIX_PATH_MAX]; + size_t size; + pmix_dstore_seg_desc_t *new_seg = NULL; + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: segment type %d, nspace %s, id %u", + __FILE__, __LINE__, __func__, type, name, id)); + + switch (type) { + case PMIX_DSTORE_INITIAL_SEGMENT: + size = _initial_segment_size; + snprintf(file_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", + base_path, id); + break; + case PMIX_DSTORE_NS_META_SEGMENT: + size = _meta_segment_size; + snprintf(file_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", base_path, name, id); + break; + case PMIX_DSTORE_NS_DATA_SEGMENT: + size = _data_segment_size; + snprintf(file_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", base_path, name, id); + break; + default: + PMIX_ERROR_LOG(PMIX_ERROR); + return NULL; + } + new_seg = (pmix_dstore_seg_desc_t*)malloc(sizeof(pmix_dstore_seg_desc_t)); + if (new_seg) { + new_seg->id = id; + new_seg->next = NULL; + new_seg->type = type; + rc = pmix_pshmem.segment_create(&new_seg->seg_info, file_name, size); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + memset(new_seg->seg_info.seg_base_addr, 0, size); + + if (setuid > 0){ + rc = PMIX_ERR_PERM; + if (0 > chown(file_name, (uid_t) uid, (gid_t) -1)){ + PMIX_ERROR_LOG(rc); + goto err_exit; + } + /* set the mode as required */ + if (0 > chmod(file_name, S_IRUSR | S_IRGRP | S_IWGRP )) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + } + } + return new_seg; + +err_exit: + if( NULL != new_seg ){ + free(new_seg); + } + return NULL; +} + +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_attach_new_segment(pmix_dstore_segment_type type, const char *base_path, + const char *name, uint32_t id) +{ + pmix_status_t rc; + pmix_dstore_seg_desc_t *new_seg = NULL; + new_seg = (pmix_dstore_seg_desc_t*)malloc(sizeof(pmix_dstore_seg_desc_t)); + new_seg->id = id; + new_seg->next = NULL; + new_seg->type = type; + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: segment type %d, nspace %s, id %u", + __FILE__, __LINE__, __func__, type, name, id)); + + switch (type) { + case PMIX_DSTORE_INITIAL_SEGMENT: + new_seg->seg_info.seg_size = _initial_segment_size; + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", + base_path, id); + break; + case PMIX_DSTORE_NS_META_SEGMENT: + new_seg->seg_info.seg_size = _meta_segment_size; + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", + base_path, name, id); + break; + case PMIX_DSTORE_NS_DATA_SEGMENT: + new_seg->seg_info.seg_size = _data_segment_size; + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", + base_path, name, id); + break; + default: + free(new_seg); + PMIX_ERROR_LOG(PMIX_ERROR); + return NULL; + } + rc = pmix_pshmem.segment_attach(&new_seg->seg_info, PMIX_PSHMEM_RONLY); + if (PMIX_SUCCESS != rc) { + free(new_seg); + new_seg = NULL; + PMIX_ERROR_LOG(rc); + } + return new_seg; +} + +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_extend_segment(pmix_dstore_seg_desc_t *segdesc, const char *base_path, + const char *name, uid_t uid, bool setuid) +{ + pmix_dstore_seg_desc_t *tmp, *seg; + + PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, + "%s:%d:%s", + __FILE__, __LINE__, __func__)); + /* find last segment */ + tmp = segdesc; + while (NULL != tmp->next) { + tmp = tmp->next; + } + /* create another segment, the old one is full. */ + seg = pmix_common_dstor_create_new_segment(segdesc->type, base_path, name, tmp->id + 1, uid, setuid); + tmp->next = seg; + + return seg; +} + +PMIX_EXPORT void pmix_common_dstor_delete_sm_desc(pmix_dstore_seg_desc_t *desc) +{ + pmix_dstore_seg_desc_t *tmp; + + /* free all global segments */ + while (NULL != desc) { + tmp = desc->next; + /* detach & unlink from current desc */ + if (desc->seg_info.seg_cpid == getpid()) { + pmix_pshmem.segment_unlink(&desc->seg_info); + } + pmix_pshmem.segment_detach(&desc->seg_info); + free(desc); + desc = tmp; + } +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.h b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.h new file mode 100644 index 00000000000..6430273faab --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include "src/include/pmix_globals.h" +#include "src/mca/gds/base/base.h" +#include "src/mca/pshmem/base/base.h" + +#include "dstore_common.h" + +#ifndef DSTORE_SEGMENT_H +#define DSTORE_SEGMENT_H + +/* this structs are used to store information about + * shared segments addresses locally at each process, + * so they are common for different types of segments + * and don't have a specific content (namespace's info, + * rank's meta info, ranks's data). */ + +typedef struct pmix_dstore_seg_desc_t pmix_dstore_seg_desc_t; + +typedef enum { + PMIX_DSTORE_INITIAL_SEGMENT, + PMIX_DSTORE_NS_META_SEGMENT, + PMIX_DSTORE_NS_DATA_SEGMENT, + PMIX_DSTORE_NS_LOCK_SEGMENT, +} pmix_dstore_segment_type; + +struct pmix_dstore_seg_desc_t { + pmix_dstore_segment_type type; + pmix_pshmem_seg_t seg_info; + uint32_t id; + pmix_dstore_seg_desc_t *next; +}; + +PMIX_EXPORT int pmix_common_dstor_getpagesize(void); +PMIX_EXPORT size_t pmix_common_dstor_getcacheblocksize(void); +PMIX_EXPORT void pmix_common_dstor_init_segment_info(size_t initial_segment_size, + size_t meta_segment_size, + size_t data_segment_size); +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_create_new_segment(pmix_dstore_segment_type type, + const char *base_path, const char *name, uint32_t id, + uid_t uid, bool setuid); +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_attach_new_segment(pmix_dstore_segment_type type, + const char *base_path, + const char *name, uint32_t id); +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_extend_segment(pmix_dstore_seg_desc_t *segdesc, + const char *base_path, + const char *name, uid_t uid, bool setuid); +PMIX_EXPORT void pmix_common_dstor_delete_sm_desc(pmix_dstore_seg_desc_t *desc); +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_create_new_lock_seg(const char *base_path, size_t size, + const char *name, uint32_t id, uid_t uid, bool setuid); +PMIX_EXPORT pmix_dstore_seg_desc_t *pmix_common_dstor_attach_new_lock_seg(const char *base_path, + size_t size, const char *name, uint32_t id); + +#endif // DSTORE_SEGMENT_H diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/base.h b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/base.h index 3ada366984f..242fc4dabd4 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/base.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/base.h @@ -14,6 +14,7 @@ * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,6 +77,12 @@ struct pmix_gds_globals_t { }; typedef struct pmix_gds_globals_t pmix_gds_globals_t; +typedef void * pmix_gds_base_store_modex_cbdata_t; +typedef pmix_status_t (*pmix_gds_base_store_modex_cb_fn_t)(pmix_gds_base_store_modex_cbdata_t cbdata, + struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_byte_object_t *bo); + PMIX_EXPORT extern pmix_gds_globals_t pmix_gds_globals; /* get a list of available support - caller must free results @@ -98,6 +105,12 @@ PMIX_EXPORT pmix_gds_base_module_t* pmix_gds_base_assign_module(pmix_info_t *inf PMIX_EXPORT pmix_status_t pmix_gds_base_setup_fork(const pmix_proc_t *proc, char ***env); +PMIX_EXPORT pmix_status_t pmix_gds_base_store_modex(struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_buffer_t *xfer, + pmix_gds_base_store_modex_cb_fn_t cb_fn, + pmix_gds_base_store_modex_cbdata_t cbdata); + END_C_DECLS #endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/gds_base_fns.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/gds_base_fns.c index 16e88485c9f..abec7a744da 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/gds_base_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/gds_base_fns.c @@ -1,8 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -76,10 +79,107 @@ pmix_status_t pmix_gds_base_setup_fork(const pmix_proc_t *proc, if (NULL == active->module->setup_fork) { continue; } - if (PMIX_SUCCESS != (rc = active->module->setup_fork(proc, env))) { + rc = active->module->setup_fork(proc, env); + if (PMIX_SUCCESS != rc && PMIX_ERR_NOT_AVAILABLE != rc) { return rc; } } return PMIX_SUCCESS; } + +pmix_status_t pmix_gds_base_store_modex(struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_buffer_t * buff, + pmix_gds_base_store_modex_cb_fn_t cb_fn, + pmix_gds_base_store_modex_cbdata_t cbdata) +{ + pmix_status_t rc = PMIX_SUCCESS; + pmix_namespace_t * ns = (pmix_namespace_t *)nspace; + pmix_buffer_t bkt; + pmix_byte_object_t bo, bo2; + int32_t cnt = 1; + char byte; + pmix_collect_t ctype; + bool have_ctype = false; + + /* Loop over the enclosed byte object envelopes and + * store them in our GDS module */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + buff, &bo, &cnt, PMIX_BYTE_OBJECT); + while (PMIX_SUCCESS == rc) { + PMIX_CONSTRUCT(&bkt, pmix_buffer_t); + PMIX_LOAD_BUFFER(pmix_globals.mypeer, &bkt, bo.bytes, bo.size); + /* unpack the data collection flag */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + &bkt, &byte, &cnt, PMIX_BYTE); + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { + /* no data was returned, so we are done with this blob */ + PMIX_DESTRUCT(&bkt); + break; + } + if (PMIX_SUCCESS != rc) { + /* we have an error */ + PMIX_DESTRUCT(&bkt); + goto error; + } + + // Check that this blob was accumulated with the same data collection setting + if (have_ctype) { + if (ctype != (pmix_collect_t)byte) { + rc = PMIX_ERR_INVALID_ARG; + PMIX_DESTRUCT(&bkt); + goto error; + } + } + else { + ctype = (pmix_collect_t)byte; + have_ctype = true; + } + + /* unpack the enclosed blobs from the various peers */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + &bkt, &bo2, &cnt, PMIX_BYTE_OBJECT); + while (PMIX_SUCCESS == rc) { + /* unpack all the kval's from this peer and store them in + * our GDS. Note that PMIx by design holds all data at + * the server level until requested. If our GDS is a + * shared memory region, then the data may be available + * right away - but the client still has to be notified + * of its presence. */ + rc = cb_fn(cbdata, (struct pmix_namespace_t *)ns, cbs, &bo2); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT(&bkt); + goto error; + } + PMIX_BYTE_OBJECT_DESTRUCT(&bo2); + /* get the next blob */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + &bkt, &bo2, &cnt, PMIX_BYTE_OBJECT); + } + PMIX_DESTRUCT(&bkt); + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { + rc = PMIX_SUCCESS; + } else if (PMIX_SUCCESS != rc) { + goto error; + } + /* unpack and process the next blob */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + buff, &bo, &cnt, PMIX_BYTE_OBJECT); + } + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { + rc = PMIX_SUCCESS; + } + +error: + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + + return rc; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/Makefile.am index ac62d8a9aad..eae7ef34abc 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2017 Mellanox Technologies, Inc. @@ -24,11 +24,24 @@ # headers = \ - gds_dstore.h + gds_ds12_base.h \ + gds_ds12_lock.h \ + gds_ds12_file.h sources = \ - gds_dstore.c \ - gds_dstore_component.c + gds_ds12_base.c \ + gds_ds12_lock.c \ + gds_ds12_component.c \ + gds_ds12_file.c \ + gds_ds20_file.c + +if HAVE_DSTORE_PTHREAD_LOCK +sources += gds_ds12_lock_pthread.c +else +if HAVE_DSTORE_FCNTL_LOCK +sources += gds_ds12_lock_fcntl.c +endif +endif # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la @@ -49,7 +62,11 @@ endif mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_gds_ds12_la_SOURCES = $(component_sources) -mca_gds_ds12_la_LDFLAGS = -module -avoid-version +mca_gds_ds12_la_LDFLAGS = -module -avoid-version \ + $(PMIX_TOP_BUILDDIR)/src/mca/common/dstore/libmca_common_dstore.la +if NEED_LIBPMIX +mca_gds_ds12_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_gds_ds12_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_base.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_base.c new file mode 100644 index 00000000000..cdfcb252709 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_base.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2018 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include "src/include/pmix_globals.h" +#include "src/util/error.h" +#include "src/mca/gds/base/base.h" +#include "src/util/argv.h" + +#include "src/mca/common/dstore/dstore_common.h" +#include "gds_ds12_base.h" +#include "gds_ds12_lock.h" +#include "gds_ds12_file.h" +#include "src/mca/common/dstore/dstore_base.h" + +static pmix_common_dstore_ctx_t *ds12_ctx; + +static pmix_status_t ds12_init(pmix_info_t info[], size_t ninfo) +{ + pmix_status_t rc = PMIX_SUCCESS; + pmix_common_dstore_file_cbs_t *dstore_file_cbs = NULL; + + if (!PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + dstore_file_cbs = &pmix_ds20_file_module; + } + ds12_ctx = pmix_common_dstor_init("ds12", info, ninfo, + &pmix_ds12_lock_module, + dstore_file_cbs); + if (NULL == ds12_ctx) { + rc = PMIX_ERR_INIT; + } + + return rc; +} + +static void ds12_finalize(void) +{ + pmix_common_dstor_finalize(ds12_ctx); +} + +static pmix_status_t ds12_assign_module(pmix_info_t *info, size_t ninfo, + int *priority) +{ + size_t n, m; + char **options; + + *priority = 20; + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN)) { + options = pmix_argv_split(info[n].value.data.string, ','); + for (m=0; NULL != options[m]; m++) { + if (0 == strcmp(options[m], "ds12")) { + /* they specifically asked for us */ + *priority = 100; + break; + } + if (0 == strcmp(options[m], "dstore")) { + /* they are asking for any dstore module - we + * take an intermediate priority in case another + * dstore is more modern than us */ + *priority = 50; + break; + } + } + pmix_argv_free(options); + break; + } + } + } + + return PMIX_SUCCESS; +} + +static pmix_status_t ds12_cache_job_info(struct pmix_namespace_t *ns, + pmix_info_t info[], size_t ninfo) +{ + return PMIX_SUCCESS; +} + +static pmix_status_t ds12_register_job_info(struct pmix_peer_t *pr, + pmix_buffer_t *reply) +{ + if (PMIX_PROC_IS_V1(pr)) { + ds12_ctx->file_cbs = &pmix_ds12_file_module; + } else { + ds12_ctx->file_cbs = &pmix_ds20_file_module; + } + return pmix_common_dstor_register_job_info(ds12_ctx, pr, reply); +} + +static pmix_status_t ds12_store_job_info(const char *nspace, pmix_buffer_t *buf) +{ + return pmix_common_dstor_store_job_info(ds12_ctx, nspace, buf); +} + +static pmix_status_t ds12_store(const pmix_proc_t *proc, + pmix_scope_t scope, + pmix_kval_t *kv) +{ + return pmix_common_dstor_store(ds12_ctx, proc, scope, kv); +} + +/* this function is only called by the PMIx server when its + * host has received data from some other peer. It therefore + * always contains data solely from remote procs, and we + * shall store it accordingly */ +static pmix_status_t ds12_store_modex(struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_buffer_t *buf) +{ + return pmix_common_dstor_store_modex(ds12_ctx, nspace, cbs, buf); +} + +static pmix_status_t ds12_fetch(const pmix_proc_t *proc, + pmix_scope_t scope, bool copy, + const char *key, + pmix_info_t info[], size_t ninfo, + pmix_list_t *kvs) +{ + return pmix_common_dstor_fetch(ds12_ctx, proc, scope, copy, key, info, ninfo, kvs); +} + +static pmix_status_t ds12_setup_fork(const pmix_proc_t *peer, char ***env) +{ + return pmix_common_dstor_setup_fork(ds12_ctx, PMIX_DSTORE_ESH_BASE_PATH, peer, env); +} + +static pmix_status_t ds12_add_nspace(const char *nspace, + pmix_info_t info[], + size_t ninfo) +{ + return pmix_common_dstor_add_nspace(ds12_ctx, nspace, info, ninfo); +} + +static pmix_status_t ds12_del_nspace(const char* nspace) +{ + return pmix_common_dstor_del_nspace(ds12_ctx, nspace); +} + +pmix_gds_base_module_t pmix_ds12_module = { + .name = "ds12", + .is_tsafe = false, + .init = ds12_init, + .finalize = ds12_finalize, + .assign_module = ds12_assign_module, + .cache_job_info = ds12_cache_job_info, + .register_job_info = ds12_register_job_info, + .store_job_info = ds12_store_job_info, + .store = ds12_store, + .store_modex = ds12_store_modex, + .fetch = ds12_fetch, + .setup_fork = ds12_setup_fork, + .add_nspace = ds12_add_nspace, + .del_nspace = ds12_del_nspace, +}; + diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_base.h b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_base.h new file mode 100644 index 00000000000..d208e2d154f --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_base.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef GDS_DSTORE_12_H +#define GDS_DSTORE_12_H + +#include "src/mca/gds/gds.h" + +/* the component must be visible data for the linker to find it */ +PMIX_EXPORT extern pmix_gds_base_component_t mca_gds_ds12_component; +extern pmix_gds_base_module_t pmix_ds12_module; + +#endif // GDS_DSTORE_12_H diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_component.c similarity index 99% rename from opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore_component.c rename to opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_component.c index 35d984e1b90..9f52d4fe996 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_component.c @@ -33,7 +33,7 @@ #include "src/include/pmix_globals.h" #include "src/mca/gds/gds.h" -#include "gds_dstore.h" +#include "gds_ds12_base.h" static pmix_status_t component_open(void); static pmix_status_t component_close(void); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_file.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_file.c new file mode 100644 index 00000000000..701578d83a6 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_file.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include "src/include/pmix_globals.h" +#include "src/mca/gds/base/base.h" + +#include "src/mca/common/dstore/dstore_file.h" +#include "gds_ds12_file.h" + +#define ESH_KEY_SIZE_V12(key, size) \ +__pmix_attribute_extension__ ({ \ + size_t len = strlen((char*)key) + 1 + sizeof(size_t) + size; \ + len; \ +}) + +/* in ext slot new offset will be stored in case if + * new data were added for the same process during + * next commit + */ +#define EXT_SLOT_SIZE_V12() \ + (ESH_KEY_SIZE_V12(ESH_REGION_EXTENSION, sizeof(size_t))) + +#define ESH_KV_SIZE_V12(addr) \ +__pmix_attribute_extension__ ({ \ + size_t sz; \ + memcpy(&sz, addr + \ + ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)), \ + sizeof(size_t)); \ + sz += ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)) + \ + sizeof(size_t); \ + sz; \ +}) + +#define ESH_KNAME_PTR_V12(addr) \ +__pmix_attribute_extension__ ({ \ + char *name_ptr = (char*)addr; \ + name_ptr; \ +}) + +#define ESH_KNAME_LEN_V12(key) \ +__pmix_attribute_extension__ ({ \ + size_t len = strlen((char*)key) + 1; \ + len; \ +}) + +#define ESH_DATA_PTR_V12(addr) \ +__pmix_attribute_extension__ ({ \ + uint8_t *data_ptr = \ + addr + \ + sizeof(size_t) + \ + ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)); \ + data_ptr; \ +}) + +#define ESH_DATA_SIZE_V12(addr) \ +__pmix_attribute_extension__ ({ \ + size_t data_size; \ + memcpy(&data_size, \ + addr + ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)), \ + sizeof(size_t)); \ + data_size; \ +}) + +#define ESH_PUT_KEY_V12(addr, key, buffer, size) \ +__pmix_attribute_extension__ ({ \ + size_t sz = size; \ + memset(addr, 0, ESH_KNAME_LEN_V12(key)); \ + strncpy((char *)addr, key, ESH_KNAME_LEN_V12(key)); \ + memcpy(addr + ESH_KNAME_LEN_V12(key), &sz, \ + sizeof(size_t)); \ + memcpy(addr + ESH_KNAME_LEN_V12(key) + sizeof(size_t), \ + buffer, size); \ +}) + +static size_t pmix_ds12_kv_size(uint8_t *addr) +{ + size_t size; + + memcpy(&size, addr + ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)), + sizeof(size_t)); + size += ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)) + sizeof(size_t); + return size; +} + +static char* pmix_ds12_key_name_ptr(uint8_t *addr) +{ + return ESH_KNAME_PTR_V12(addr); +} + +static size_t pmix_ds12_key_name_len(char *key) +{ + return ESH_KNAME_LEN_V12(key); +} + +static uint8_t* pmix_ds12_data_ptr(uint8_t *addr) +{ + return ESH_DATA_PTR_V12(addr); +} + +static size_t pmix_ds12_data_size(uint8_t *addr, uint8_t* data_ptr) +{ + return ESH_DATA_SIZE_V12(addr); +} + +static size_t pmix_ds12_key_size(char *addr, size_t data_size) +{ + return ESH_KEY_SIZE_V12(addr, data_size); +} + +static size_t pmix_ds12_ext_slot_size(void) +{ + return EXT_SLOT_SIZE_V12(); +} + +static int pmix_ds12_put_key(uint8_t *addr, char *key, void *buf, size_t size) +{ + ESH_PUT_KEY_V12(addr, key, buf, size); + return PMIX_SUCCESS; +} + +static bool pmix_ds12_is_invalid(uint8_t *addr) +{ + bool ret = (0 == strncmp(ESH_REGION_INVALIDATED, ESH_KNAME_PTR_V12(addr), + ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)))); + return ret; +} + +static void pmix_ds12_set_invalid(uint8_t *addr) +{ + strncpy(ESH_KNAME_PTR_V12(addr), ESH_REGION_INVALIDATED, + ESH_KNAME_LEN_V12(ESH_REGION_INVALIDATED)); +} + +static bool pmix_ds12_is_ext_slot(uint8_t *addr) +{ + bool ret; + ret = (0 == strncmp(ESH_REGION_EXTENSION, ESH_KNAME_PTR_V12(addr), + ESH_KNAME_LEN_V12(ESH_KNAME_PTR_V12(addr)))); + return ret; +} + +static bool pmix_ds12_kname_match(uint8_t *addr, const char *key, size_t key_hash) +{ + bool ret = 0; + + ret = (0 == strncmp(ESH_KNAME_PTR_V12(addr), + key, ESH_KNAME_LEN_V12(key))); + return ret; +} + +pmix_common_dstore_file_cbs_t pmix_ds12_file_module = { + .name = "ds12", + .kval_size = pmix_ds12_kv_size, + .kname_ptr = pmix_ds12_key_name_ptr, + .kname_len = pmix_ds12_key_name_len, + .data_ptr = pmix_ds12_data_ptr, + .data_size = pmix_ds12_data_size, + .key_size = pmix_ds12_key_size, + .ext_slot_size = pmix_ds12_ext_slot_size, + .put_key = pmix_ds12_put_key, + .is_invalid = pmix_ds12_is_invalid, + .is_extslot = pmix_ds12_is_ext_slot, + .set_invalid = pmix_ds12_set_invalid, + .key_hash = NULL, + .key_match = pmix_ds12_kname_match +}; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_file.h b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_file.h new file mode 100644 index 00000000000..33c8864743d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_file.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef GDS_DS12_FILE_H +#define GDS_DS12_FILE_H + +#include +#include + +extern pmix_common_dstore_file_cbs_t pmix_ds12_file_module; +extern pmix_common_dstore_file_cbs_t pmix_ds20_file_module; + +#endif // GDS_DS12_FILE_H diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock.c new file mode 100644 index 00000000000..9872dd7c4d1 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include "src/mca/common/dstore/dstore_common.h" + +#include "gds_ds12_lock.h" + +pmix_common_lock_callbacks_t pmix_ds12_lock_module = { + .init = pmix_gds_ds12_lock_init, + .finalize = pmix_ds12_lock_finalize, + .rd_lock = pmix_ds12_lock_rd_get, + .rd_unlock = pmix_ds12_lock_rw_rel, + .wr_lock = pmix_ds12_lock_wr_get, + .wr_unlock = pmix_ds12_lock_rw_rel +}; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock.h b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock.h new file mode 100644 index 00000000000..9d9b91ad1ee --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef DS12_LOCK_H +#define DS12_LOCK_H + +#include +#include + +#include "src/mca/common/dstore/dstore_common.h" + +pmix_status_t pmix_gds_ds12_lock_init(pmix_common_dstor_lock_ctx_t *lock_ctx, + const char *base_path, const char *name, + uint32_t local_size, uid_t uid, bool setuid); +void pmix_ds12_lock_finalize(pmix_common_dstor_lock_ctx_t *lock_ctx); +pmix_status_t pmix_ds12_lock_rd_get(pmix_common_dstor_lock_ctx_t lock_ctx); +pmix_status_t pmix_ds12_lock_wr_get(pmix_common_dstor_lock_ctx_t lock_ctx); +pmix_status_t pmix_ds12_lock_rw_rel(pmix_common_dstor_lock_ctx_t lock_ctx); + +extern pmix_common_lock_callbacks_t pmix_ds12_lock_module; + +#endif // DS12_LOCK_H diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock_fcntl.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock_fcntl.c new file mode 100644 index 00000000000..477e91465fb --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock_fcntl.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2018 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif + +#include + +#include "src/mca/common/dstore/dstore_common.h" +#include "src/mca/gds/base/base.h" + +#include "src/util/error.h" +#include "src/util/output.h" + +#include "gds_ds12_lock.h" + +#define _ESH_12_FCNTL_LOCK(lockfd, operation) \ +__pmix_attribute_extension__ ({ \ + pmix_status_t ret = PMIX_SUCCESS; \ + int i; \ + struct flock fl = {0}; \ + fl.l_type = operation; \ + fl.l_whence = SEEK_SET; \ + for(i = 0; i < 10; i++) { \ + if( 0 > fcntl(lockfd, F_SETLKW, &fl) ) { \ + switch( errno ){ \ + case EINTR: \ + continue; \ + case ENOENT: \ + case EINVAL: \ + ret = PMIX_ERR_NOT_FOUND; \ + break; \ + case EBADF: \ + ret = PMIX_ERR_BAD_PARAM; \ + break; \ + case EDEADLK: \ + case EFAULT: \ + case ENOLCK: \ + ret = PMIX_ERR_RESOURCE_BUSY; \ + break; \ + default: \ + ret = PMIX_ERROR; \ + break; \ + } \ + } \ + break; \ + } \ + if (ret) { \ + pmix_output(0, "%s %d:%s lock failed: %s", \ + __FILE__, __LINE__, __func__, strerror(errno)); \ + } \ + ret; \ +}) + +typedef struct { + char *lockfile; + int lockfd; +} ds12_lock_fcntl_ctx_t; + +pmix_status_t pmix_gds_ds12_lock_init(pmix_common_dstor_lock_ctx_t *ctx, const char *base_path, + const char *name, uint32_t local_size, uid_t uid, bool setuid) +{ + pmix_status_t rc = PMIX_SUCCESS; + ds12_lock_fcntl_ctx_t *lock_ctx; + + if (*ctx != NULL) { + return PMIX_SUCCESS; + } + + lock_ctx = (ds12_lock_fcntl_ctx_t*)malloc(sizeof(ds12_lock_fcntl_ctx_t)); + if (NULL == lock_ctx) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto error; + } + *ctx = lock_ctx; + memset(lock_ctx, 0, sizeof(ds12_lock_fcntl_ctx_t)); + lock_ctx->lockfd = -1; + + /* create a lock file to prevent clients from reading while server is writing + * to the shared memory. This situation is quite often, especially in case of + * direct modex when clients might ask for data simultaneously. */ + if(0 > asprintf(&lock_ctx->lockfile, "%s/dstore_sm.lock", base_path)) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto error; + } + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s _lockfile_name: %s", __FILE__, __LINE__, __func__, lock_ctx->lockfile)); + + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + lock_ctx->lockfd = open(lock_ctx->lockfile, O_CREAT | O_RDWR | O_EXCL, 0600); + + /* if previous launch was crashed, the lockfile might not be deleted and unlocked, + * so we delete it and create a new one. */ + if (lock_ctx->lockfd < 0) { + unlink(lock_ctx->lockfile); + lock_ctx->lockfd = open(lock_ctx->lockfile, O_CREAT | O_RDWR, 0600); + if (lock_ctx->lockfd < 0) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto error; + } + } + if (0 != setuid) { + if (0 > chown(lock_ctx->lockfile, uid, (gid_t) -1)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto error; + } + if (0 > chmod(lock_ctx->lockfile, S_IRUSR | S_IWGRP | S_IRGRP)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto error; + } + } + } + else { + lock_ctx->lockfd = open(lock_ctx->lockfile, O_RDONLY); + if (0 > lock_ctx->lockfd) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto error; + } + } + + return rc; + +error: + if (NULL != lock_ctx) { + if (NULL != lock_ctx->lockfile) { + free(lock_ctx->lockfile); + } + if (0 > lock_ctx->lockfd) { + close(lock_ctx->lockfd); + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + unlink(lock_ctx->lockfile); + } + } + free(lock_ctx); + lock_ctx = NULL; + } + *ctx = NULL; + + return rc; +} + +void pmix_ds12_lock_finalize(pmix_common_dstor_lock_ctx_t *lock_ctx) +{ + ds12_lock_fcntl_ctx_t *fcntl_lock = (ds12_lock_fcntl_ctx_t*)*lock_ctx; + + if (NULL == fcntl_lock) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + return; + } + + close(fcntl_lock->lockfd); + + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + unlink(fcntl_lock->lockfile); + } + free(fcntl_lock); + *lock_ctx = NULL; +} + +pmix_status_t pmix_ds12_lock_rd_get(pmix_common_dstor_lock_ctx_t lock_ctx) +{ ds12_lock_fcntl_ctx_t *fcntl_lock = (ds12_lock_fcntl_ctx_t*)lock_ctx; + pmix_status_t rc; + + if (NULL == fcntl_lock) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + rc = _ESH_12_FCNTL_LOCK(fcntl_lock->lockfd, F_RDLCK); + + return rc; + +} + +pmix_status_t pmix_ds12_lock_wr_get(pmix_common_dstor_lock_ctx_t lock_ctx) +{ ds12_lock_fcntl_ctx_t *fcntl_lock = (ds12_lock_fcntl_ctx_t*)lock_ctx; + pmix_status_t rc; + + if (NULL == fcntl_lock) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + rc = _ESH_12_FCNTL_LOCK(fcntl_lock->lockfd, F_WRLCK); + + return rc; + +} + +pmix_status_t pmix_ds12_lock_rw_rel(pmix_common_dstor_lock_ctx_t lock_ctx) +{ ds12_lock_fcntl_ctx_t *fcntl_lock = (ds12_lock_fcntl_ctx_t*)lock_ctx; + pmix_status_t rc; + + if (NULL == fcntl_lock) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + rc = _ESH_12_FCNTL_LOCK(fcntl_lock->lockfd, F_UNLCK); + + return rc; + +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock_pthread.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock_pthread.c new file mode 100644 index 00000000000..163015856eb --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock_pthread.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2018 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif + +#include + +#include "src/mca/common/dstore/dstore_common.h" +#include "src/mca/gds/base/base.h" +#include "src/mca/pshmem/pshmem.h" + +#include "src/util/error.h" +#include "src/util/output.h" + +#include "gds_ds12_lock.h" +#include "src/mca/common/dstore/dstore_segment.h" + +#define _ESH_12_PTHREAD_LOCK(rwlock, func) \ +__pmix_attribute_extension__ ({ \ + pmix_status_t ret = PMIX_SUCCESS; \ + int rc; \ + rc = pthread_rwlock_##func(rwlock); \ + if (0 != rc) { \ + switch (errno) { \ + case EINVAL: \ + ret = PMIX_ERR_INIT; \ + break; \ + case EPERM: \ + ret = PMIX_ERR_NO_PERMISSIONS; \ + break; \ + } \ + } \ + if (ret) { \ + pmix_output(0, "%s %d:%s lock failed: %s", \ + __FILE__, __LINE__, __func__, strerror(errno)); \ + } \ + ret; \ +}) + +typedef struct { + char *lockfile; + pmix_pshmem_seg_t *segment; + pthread_rwlock_t *rwlock; +} ds12_lock_pthread_ctx_t; + +pmix_status_t pmix_gds_ds12_lock_init(pmix_common_dstor_lock_ctx_t *ctx, const char *base_path, + const char * name, uint32_t local_size, uid_t uid, bool setuid) +{ + size_t size = pmix_common_dstor_getpagesize(); + pmix_status_t rc = PMIX_SUCCESS; + pthread_rwlockattr_t attr; + ds12_lock_pthread_ctx_t *lock_ctx = (ds12_lock_pthread_ctx_t*)ctx; + + if (*ctx != NULL) { + return PMIX_SUCCESS; + } + + lock_ctx = (ds12_lock_pthread_ctx_t*)malloc(sizeof(ds12_lock_pthread_ctx_t)); + if (NULL == lock_ctx) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto error; + } + memset(lock_ctx, 0, sizeof(ds12_lock_pthread_ctx_t)); + *ctx = (pmix_common_dstor_lock_ctx_t*)lock_ctx; + + lock_ctx->segment = (pmix_pshmem_seg_t *)malloc(sizeof(pmix_pshmem_seg_t)); + if (NULL == lock_ctx->segment) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto error; + } + + /* create a lock file to prevent clients from reading while server is writing + * to the shared memory. This situation is quite often, especially in case of + * direct modex when clients might ask for data simultaneously. */ + if(0 > asprintf(&lock_ctx->lockfile, "%s/dstore_sm.lock", base_path)) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto error; + } + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s _lockfile_name: %s", __FILE__, __LINE__, __func__, lock_ctx->lockfile)); + + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + if (PMIX_SUCCESS != (rc = pmix_pshmem.segment_create(lock_ctx->segment, + lock_ctx->lockfile, size))) { + PMIX_ERROR_LOG(rc); + goto error; + } + memset(lock_ctx->segment->seg_base_addr, 0, size); + if (0 != setuid) { + if (0 > chown(lock_ctx->lockfile, (uid_t) uid, (gid_t) -1)){ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto error; + } + /* set the mode as required */ + if (0 > chmod(lock_ctx->lockfile, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP )) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto error; + } + } + lock_ctx->rwlock = (pthread_rwlock_t *)lock_ctx->segment->seg_base_addr; + + if (0 != pthread_rwlockattr_init(&attr)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto error; + } + if (0 != pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) { + pthread_rwlockattr_destroy(&attr); + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto error; + } +#ifdef HAVE_PTHREAD_SETKIND + if (0 != pthread_rwlockattr_setkind_np(&attr, + PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP)) { + pthread_rwlockattr_destroy(&attr); + PMIX_ERROR_LOG(PMIX_ERR_INIT); + goto error; + } +#endif + if (0 != pthread_rwlock_init(lock_ctx->rwlock, &attr)) { + pthread_rwlockattr_destroy(&attr); + PMIX_ERROR_LOG(PMIX_ERR_INIT); + goto error; + } + if (0 != pthread_rwlockattr_destroy(&attr)) { + PMIX_ERROR_LOG(PMIX_ERR_INIT); + goto error; + } + + } + else { + lock_ctx->segment->seg_size = size; + snprintf(lock_ctx->segment->seg_name, PMIX_PATH_MAX, "%s", lock_ctx->lockfile); + if (PMIX_SUCCESS != (rc = pmix_pshmem.segment_attach(lock_ctx->segment, + PMIX_PSHMEM_RW))) { + PMIX_ERROR_LOG(rc); + goto error; + } + lock_ctx->rwlock = (pthread_rwlock_t *)lock_ctx->segment->seg_base_addr; + } + + return PMIX_SUCCESS; + +error: + if (NULL != lock_ctx) { + if (lock_ctx->segment) { + /* detach & unlink from current desc */ + if (lock_ctx->segment->seg_cpid == getpid()) { + pmix_pshmem.segment_unlink(lock_ctx->segment); + } + pmix_pshmem.segment_detach(lock_ctx->segment); + lock_ctx->rwlock = NULL; + } + if (NULL != lock_ctx->lockfile) { + free(lock_ctx->lockfile); + } + free(lock_ctx); + *ctx = (pmix_common_dstor_lock_ctx_t*)NULL; + } + + return rc; +} + +void pmix_ds12_lock_finalize(pmix_common_dstor_lock_ctx_t *lock_ctx) +{ + ds12_lock_pthread_ctx_t *pthread_lock = + (ds12_lock_pthread_ctx_t*)*lock_ctx; + + if (NULL == pthread_lock) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + return; + } + if (0 != pthread_rwlock_destroy(pthread_lock->rwlock)) { + PMIX_ERROR_LOG(PMIX_ERROR); + return; + } + + if (NULL == pthread_lock->segment) { + PMIX_ERROR_LOG(PMIX_ERROR); + return; + } + if (NULL == pthread_lock->lockfile) { + PMIX_ERROR_LOG(PMIX_ERROR); + return; + } + + /* detach & unlink from current desc */ + if (pthread_lock->segment->seg_cpid == getpid()) { + pmix_pshmem.segment_unlink(pthread_lock->segment); + } + pmix_pshmem.segment_detach(pthread_lock->segment); + + free(pthread_lock->segment); + pthread_lock->segment = NULL; + free(pthread_lock->lockfile); + pthread_lock->lockfile = NULL; + pthread_lock->rwlock = NULL; + free(pthread_lock); + *lock_ctx = NULL; +} + +pmix_status_t pmix_ds12_lock_rd_get(pmix_common_dstor_lock_ctx_t lock_ctx) +{ + ds12_lock_pthread_ctx_t *pthread_lock = (ds12_lock_pthread_ctx_t*)lock_ctx; + pmix_status_t rc; + + if (NULL == pthread_lock) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + rc = _ESH_12_PTHREAD_LOCK(pthread_lock->rwlock, rdlock); + + return rc; +} + +pmix_status_t pmix_ds12_lock_wr_get(pmix_common_dstor_lock_ctx_t lock_ctx) +{ + ds12_lock_pthread_ctx_t *pthread_lock = (ds12_lock_pthread_ctx_t*)lock_ctx; + pmix_status_t rc; + + if (NULL == pthread_lock) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + rc = _ESH_12_PTHREAD_LOCK(pthread_lock->rwlock, wrlock); + + return rc; +} + +pmix_status_t pmix_ds12_lock_rw_rel(pmix_common_dstor_lock_ctx_t lock_ctx) +{ + ds12_lock_pthread_ctx_t *pthread_lock = (ds12_lock_pthread_ctx_t*)lock_ctx; + pmix_status_t rc; + + if (NULL == pthread_lock) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + rc = _ESH_12_PTHREAD_LOCK(pthread_lock->rwlock, unlock); + + return rc; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds20_file.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds20_file.c new file mode 100644 index 00000000000..d50cb8124c5 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds20_file.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include "src/include/pmix_globals.h" +#include "src/mca/gds/base/base.h" + +#include "src/mca/common/dstore/dstore_file.h" +#include "gds_ds12_file.h" + +#define ESH_KV_SIZE_V20(addr) \ +__pmix_attribute_extension__ ({ \ + size_t sz; \ + memcpy(&sz, addr, sizeof(size_t)); \ + sz; \ +}) + +#define ESH_KNAME_PTR_V20(addr) \ + ((char *)addr + sizeof(size_t)) + +#define ESH_KNAME_LEN_V20(key) \ +__pmix_attribute_extension__ ({ \ + size_t kname_len = strlen(key) + 1; \ + size_t len = (kname_len < ESH_MIN_KEY_LEN) ? \ + ESH_MIN_KEY_LEN : kname_len; \ + len; \ +}) + +#define ESH_DATA_PTR_V20(addr) \ +__pmix_attribute_extension__ ({ \ + size_t kname_len = \ + ESH_KNAME_LEN_V20(ESH_KNAME_PTR_V20(addr)); \ + uint8_t *data_ptr = addr + sizeof(size_t) + kname_len; \ + data_ptr; \ +}) + +#define ESH_DATA_SIZE_V20(addr, data_ptr) \ +__pmix_attribute_extension__ ({ \ + size_t __sz = ESH_KV_SIZE_V20(addr); \ + size_t data_size = __sz - (data_ptr - addr); \ + data_size; \ +}) + +#define ESH_KEY_SIZE_V20(key, size) \ + (sizeof(size_t) + ESH_KNAME_LEN_V20((char*)key) + size) + +/* in ext slot new offset will be stored in case if + * new data were added for the same process during + * next commit + */ +#define EXT_SLOT_SIZE_V20() \ + (ESH_KEY_SIZE_V20(ESH_REGION_EXTENSION, sizeof(size_t))) + + +#define ESH_PUT_KEY_V20(addr, key, buffer, size) \ +__pmix_attribute_extension__ ({ \ + size_t sz = ESH_KEY_SIZE_V20(key, size); \ + memcpy(addr, &sz, sizeof(size_t)); \ + memset(addr + sizeof(size_t), 0, \ + ESH_KNAME_LEN_V20(key)); \ + strncpy((char *)addr + sizeof(size_t), \ + key, ESH_KNAME_LEN_V20(key)); \ + memcpy(addr + sizeof(size_t) + ESH_KNAME_LEN_V20(key), \ + buffer, size); \ +}) + +static size_t pmix_ds20_kv_size(uint8_t *key) +{ + size_t size; + + memcpy(&size, key, sizeof(size_t)); + return size; +} + +static char* pmix_ds20_key_name_ptr(uint8_t *addr) +{ + return ESH_KNAME_PTR_V20(addr); +} + +static size_t pmix_ds20_key_name_len(char *key) +{ + return ESH_KNAME_LEN_V20(key); +} + +static uint8_t* pmix_ds20_data_ptr(uint8_t *addr) +{ + return ESH_DATA_PTR_V20(addr); +} + +static size_t pmix_ds20_data_size(uint8_t *addr, uint8_t* data_ptr) +{ + return ESH_DATA_SIZE_V20(addr, data_ptr); +} + +static size_t pmix_ds20_key_size(char *addr, size_t data_size) +{ + return ESH_KEY_SIZE_V20(addr, data_size); +} + +static size_t pmix_ds20_ext_slot_size(void) +{ + return EXT_SLOT_SIZE_V20(); +} + +static int pmix_ds20_put_key(uint8_t *addr, char *key, void *buf, size_t size) +{ + ESH_PUT_KEY_V20(addr, key, buf, size); + return PMIX_SUCCESS; +} + +static bool pmix_ds20_is_invalid(uint8_t *addr) +{ + bool ret = (0 == strncmp(ESH_REGION_INVALIDATED, ESH_KNAME_PTR_V20(addr), + ESH_KNAME_LEN_V20(ESH_KNAME_PTR_V20(addr)))); + return ret; +} + +static void pmix_ds20_set_invalid(uint8_t *addr) +{ + strncpy(ESH_KNAME_PTR_V20(addr), ESH_REGION_INVALIDATED, + ESH_KNAME_LEN_V20(ESH_REGION_INVALIDATED)); +} + +static bool pmix_ds20_is_ext_slot(uint8_t *addr) +{ + bool ret; + ret = (0 == strncmp(ESH_REGION_EXTENSION, ESH_KNAME_PTR_V20(addr), + ESH_KNAME_LEN_V20(ESH_KNAME_PTR_V20(addr)))); + return ret; +} + +static bool pmix_ds20_kname_match(uint8_t *addr, const char *key, size_t key_hash) +{ + bool ret = 0; + + ret = (0 == strncmp(ESH_KNAME_PTR_V20(addr), + key, ESH_KNAME_LEN_V20(key))); + return ret; +} + + +pmix_common_dstore_file_cbs_t pmix_ds20_file_module = { + .name = "ds20", + .kval_size = pmix_ds20_kv_size, + .kname_ptr = pmix_ds20_key_name_ptr, + .kname_len = pmix_ds20_key_name_len, + .data_ptr = pmix_ds20_data_ptr, + .data_size = pmix_ds20_data_size, + .key_size = pmix_ds20_key_size, + .ext_slot_size = pmix_ds20_ext_slot_size, + .put_key = pmix_ds20_put_key, + .is_invalid = pmix_ds20_is_invalid, + .is_extslot = pmix_ds20_is_ext_slot, + .set_invalid = pmix_ds20_set_invalid, + .key_hash = NULL, + .key_match = pmix_ds20_kname_match +}; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore.h b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore.h deleted file mode 100644 index abd4723ad25..00000000000 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef PMIX_DS12_H -#define PMIX_DS12_H - -#include - - -#include "src/mca/gds/gds.h" -#include "src/mca/pshmem/pshmem.h" - -BEGIN_C_DECLS - -#include -#include "src/class/pmix_value_array.h" - -#define INITIAL_SEG_SIZE 4096 -#define NS_META_SEG_SIZE (1<<22) -#define NS_DATA_SEG_SIZE (1<<22) - -#define PMIX_DSTORE_ESH_BASE_PATH "PMIX_DSTORE_ESH_BASE_PATH" - -#ifdef HAVE_PTHREAD_SHARED -#define ESH_PTHREAD_LOCK -#elif defined HAVE_FCNTL_FLOCK -#define ESH_FCNTL_LOCK -#else -#error No locking mechanism was found -#endif - -/* this structs are used to store information about - * shared segments addresses locally at each process, - * so they are common for different types of segments - * and don't have a specific content (namespace's info, - * rank's meta info, ranks's data). */ - -typedef enum { - INITIAL_SEGMENT, - NS_META_SEGMENT, - NS_DATA_SEGMENT -} segment_type; - -typedef struct seg_desc_t seg_desc_t; -struct seg_desc_t { - segment_type type; - pmix_pshmem_seg_t seg_info; - uint32_t id; - seg_desc_t *next; -}; - -typedef struct ns_map_data_s ns_map_data_t; -typedef struct session_s session_t; -typedef struct ns_map_s ns_map_t; - -struct session_s { - int in_use; - uid_t jobuid; - char setjobuid; - char *nspace_path; - char *lockfile; -#ifdef ESH_PTHREAD_LOCK - pmix_pshmem_seg_t *rwlock_seg; - pthread_rwlock_t *rwlock; -#endif - int lockfd; - seg_desc_t *sm_seg_first; - seg_desc_t *sm_seg_last; -}; - -struct ns_map_data_s { - char name[PMIX_MAX_NSLEN+1]; - size_t tbl_idx; - int track_idx; -}; - -struct ns_map_s { - int in_use; - ns_map_data_t data; -}; - -/* initial segment format: - * size_t num_elems; - * size_t full; //indicate to client that it needs to attach to the next segment - * ns_seg_info_t ns_seg_info[max_ns_num]; - */ - -typedef struct { - ns_map_data_t ns_map; - size_t num_meta_seg;/* read by clients to attach to this number of segments. */ - size_t num_data_seg; -} ns_seg_info_t; - -/* meta segment format: - * size_t num_elems; - * rank_meta_info meta_info[max_meta_elems]; - */ - -typedef struct { - size_t rank; - size_t offset; - size_t count; -} rank_meta_info; - -typedef struct { - pmix_value_array_t super; - ns_map_data_t ns_map; - size_t num_meta_seg; - size_t num_data_seg; - seg_desc_t *meta_seg; - seg_desc_t *data_seg; - bool in_use; -} ns_track_elem_t; - -/* the component must be visible data for the linker to find it */ -PMIX_EXPORT extern pmix_gds_base_component_t mca_gds_ds12_component; -extern pmix_gds_base_module_t pmix_ds12_module; - -END_C_DECLS - -#endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/Makefile.am new file mode 100644 index 00000000000..06e1dd13a90 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/Makefile.am @@ -0,0 +1,65 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2017-2018 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers = \ + gds_ds21_base.h \ + gds_ds21_lock.h \ + gds_ds21_file.h + +sources = \ + gds_ds21_base.c \ + gds_ds21_lock.c \ + gds_ds21_lock_pthread.c \ + gds_ds21_component.c \ + gds_ds21_file.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_gds_ds21_DSO +lib = +lib_sources = +component = mca_gds_ds21.la +component_sources = $(headers) $(sources) +else +lib = libmca_gds_ds21.la +lib_sources = $(headers) $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_gds_ds21_la_SOURCES = $(component_sources) +mca_gds_ds21_la_LDFLAGS = -module -avoid-version \ + $(PMIX_TOP_BUILDDIR)/src/mca/common/dstore/libmca_common_dstore.la +if NEED_LIBPMIX +mca_gds_ds21_la_LIBADD = $(top_builddir)/src/libpmix.la +endif + +noinst_LTLIBRARIES = $(lib) +libmca_gds_ds21_la_SOURCES = $(lib_sources) +libmca_gds_ds21_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_base.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_base.c new file mode 100644 index 00000000000..a3f32c9c26e --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_base.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2018 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include "src/include/pmix_globals.h" +#include "src/util/error.h" +#include "src/mca/gds/base/base.h" +#include "src/util/argv.h" + +#include "src/mca/common/dstore/dstore_common.h" +#include "gds_ds21_base.h" +#include "gds_ds21_lock.h" +#include "gds_ds21_file.h" +#include "src/mca/common/dstore/dstore_base.h" + +static pmix_common_dstore_ctx_t *ds21_ctx; + +static pmix_status_t ds21_init(pmix_info_t info[], size_t ninfo) +{ + pmix_status_t rc = PMIX_SUCCESS; + + ds21_ctx = pmix_common_dstor_init("ds21", info, ninfo, + &pmix_ds21_lock_module, + &pmix_ds21_file_module); + if (NULL == ds21_ctx) { + rc = PMIX_ERR_INIT; + } + + return rc; +} + +static void ds21_finalize(void) +{ + pmix_common_dstor_finalize(ds21_ctx); +} + +static pmix_status_t ds21_assign_module(pmix_info_t *info, size_t ninfo, + int *priority) +{ + size_t n, m; + char **options; + + *priority = 20; + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN)) { + options = pmix_argv_split(info[n].value.data.string, ','); + for (m=0; NULL != options[m]; m++) { + if (0 == strcmp(options[m], "ds21")) { + /* they specifically asked for us */ + *priority = 120; + break; + } + if (0 == strcmp(options[m], "dstore")) { + *priority = 60; + break; + } + } + pmix_argv_free(options); + break; + } + } + } + + return PMIX_SUCCESS; +} + +static pmix_status_t ds21_cache_job_info(struct pmix_namespace_t *ns, + pmix_info_t info[], size_t ninfo) +{ + return PMIX_SUCCESS; +} + +static pmix_status_t ds21_register_job_info(struct pmix_peer_t *pr, + pmix_buffer_t *reply) +{ + return pmix_common_dstor_register_job_info(ds21_ctx, pr, reply); +} + +static pmix_status_t ds21_store_job_info(const char *nspace, pmix_buffer_t *buf) +{ + return pmix_common_dstor_store_job_info(ds21_ctx, nspace, buf); +} + +static pmix_status_t ds21_store(const pmix_proc_t *proc, + pmix_scope_t scope, + pmix_kval_t *kv) +{ + return pmix_common_dstor_store(ds21_ctx, proc, scope, kv); +} + +/* this function is only called by the PMIx server when its + * host has received data from some other peer. It therefore + * always contains data solely from remote procs, and we + * shall store it accordingly */ +static pmix_status_t ds21_store_modex(struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_buffer_t *buf) +{ + return pmix_common_dstor_store_modex(ds21_ctx, nspace, cbs, buf); +} + +static pmix_status_t ds21_fetch(const pmix_proc_t *proc, + pmix_scope_t scope, bool copy, + const char *key, + pmix_info_t info[], size_t ninfo, + pmix_list_t *kvs) +{ + return pmix_common_dstor_fetch(ds21_ctx, proc, scope, copy, key, info, ninfo, kvs); +} + +static pmix_status_t ds21_setup_fork(const pmix_proc_t *peer, char ***env) +{ + pmix_status_t rc; + char *env_name = NULL; + int ds_ver = 0; + + sscanf(ds21_ctx->ds_name, "ds%d", &ds_ver); + if (0 == ds_ver) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + return rc; + } + if (0 > asprintf(&env_name, PMIX_DSTORE_VER_BASE_PATH_FMT, ds_ver)) { + rc = PMIX_ERR_NOMEM; + PMIX_ERROR_LOG(rc); + return rc; + } + rc = pmix_common_dstor_setup_fork(ds21_ctx, env_name, peer, env); + free(env_name); + + return rc; +} + +static pmix_status_t ds21_add_nspace(const char *nspace, + pmix_info_t info[], + size_t ninfo) +{ + return pmix_common_dstor_add_nspace(ds21_ctx, nspace, info, ninfo); +} + +static pmix_status_t ds21_del_nspace(const char* nspace) +{ + return pmix_common_dstor_del_nspace(ds21_ctx, nspace); +} + +pmix_gds_base_module_t pmix_ds21_module = { + .name = "ds21", + .is_tsafe = true, + .init = ds21_init, + .finalize = ds21_finalize, + .assign_module = ds21_assign_module, + .cache_job_info = ds21_cache_job_info, + .register_job_info = ds21_register_job_info, + .store_job_info = ds21_store_job_info, + .store = ds21_store, + .store_modex = ds21_store_modex, + .fetch = ds21_fetch, + .setup_fork = ds21_setup_fork, + .add_nspace = ds21_add_nspace, + .del_nspace = ds21_del_nspace, +}; + diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_base.h b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_base.h new file mode 100644 index 00000000000..c8fc4d43e54 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_base.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef GDS_DSTORE_21_H +#define GDS_DSTORE_21_H + +#include "src/mca/gds/gds.h" + +/* the component must be visible data for the linker to find it */ +PMIX_EXPORT extern pmix_gds_base_component_t mca_gds_ds21_component; +extern pmix_gds_base_module_t pmix_ds21_module; + +#endif // GDS_DSTORE_21_H diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_component.c new file mode 100644 index 00000000000..c1f42944df8 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_component.c @@ -0,0 +1,93 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include +#include "pmix_common.h" + +#include "src/include/pmix_globals.h" +#include "src/mca/gds/gds.h" +#include "gds_ds21_base.h" + +static pmix_status_t component_open(void); +static pmix_status_t component_close(void); +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +pmix_gds_base_component_t mca_gds_ds21_component = { + .base = { + PMIX_GDS_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "ds21", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_query_component = component_query, + }, + .data = { + /* The component is checkpoint ready */ + PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; + + +static int component_open(void) +{ + return PMIX_SUCCESS; +} + + +static int component_query(pmix_mca_base_module_t **module, int *priority) +{ + /* launchers cannot use the dstore */ + if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { + *priority = 0; + *module = NULL; + return PMIX_ERROR; + } + + *priority = 30; + *module = (pmix_mca_base_module_t *)&pmix_ds21_module; + return PMIX_SUCCESS; +} + + +static int component_close(void) +{ + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_file.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_file.c new file mode 100644 index 00000000000..7a23edd2063 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_file.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include "src/include/pmix_globals.h" +#include "src/mca/gds/base/base.h" + +#include "src/mca/common/dstore/dstore_file.h" +#include "gds_ds21_file.h" + +#if 8 > SIZEOF_SIZE_T +#define ESH_REGION_EXTENSION_FLG 0x80000000 +#define ESH_REGION_INVALIDATED_FLG 0x40000000 +#define ESH_REGION_SIZE_MASK 0x3FFFFFFF +#else +#define ESH_REGION_EXTENSION_FLG 0x8000000000000000 +#define ESH_REGION_INVALIDATED_FLG 0x4000000000000000 +#define ESH_REGION_SIZE_MASK 0x3FFFFFFFFFFFFFFF +#endif + +#define ESH_KV_SIZE_V21(addr) \ +__pmix_attribute_extension__ ({ \ + size_t sz; \ + memcpy(&sz, addr, sizeof(size_t)); \ + /* drop flags in lsb's */ \ + (sz & ESH_REGION_SIZE_MASK); \ +}) + +#define ESH_KNAME_PTR_V21(addr) \ + ((char *)addr + 2 * sizeof(size_t)) + +#define ESH_KNAME_LEN_V21(key) \ + (strlen(key) + 1) + +#define ESH_DATA_PTR_V21(addr) \ +__pmix_attribute_extension__ ({ \ + char *key_ptr = ESH_KNAME_PTR_V21(addr); \ + size_t kname_len = ESH_KNAME_LEN_V21(key_ptr); \ + uint8_t *data_ptr = \ + addr + (key_ptr - (char*)addr) + kname_len; \ + data_ptr; \ +}) + +#define ESH_DATA_SIZE_V21(addr, data_ptr) \ +__pmix_attribute_extension__ ({ \ + size_t sz = ESH_KV_SIZE_V21(addr); \ + size_t data_size = sz - (data_ptr - addr); \ + data_size; \ +}) + +#define ESH_KEY_SIZE_V21(key, size) \ + (2 * sizeof(size_t) + ESH_KNAME_LEN_V21((char*)key) + size) + +/* in ext slot new offset will be stored in case if + * new data were added for the same process during + * next commit + */ +#define EXT_SLOT_SIZE_V21() \ + (ESH_KEY_SIZE_V21("", sizeof(size_t))) + +static bool pmix_ds21_is_invalid(uint8_t *addr) +{ + size_t sz; + memcpy(&sz, addr, sizeof(size_t)); + return !!(sz & ESH_REGION_INVALIDATED_FLG); +} + +static void pmix_ds21_set_invalid(uint8_t *addr) +{ + size_t sz; + memcpy(&sz, addr, sizeof(size_t)); + sz |= ESH_REGION_INVALIDATED_FLG; + memcpy(addr, &sz, sizeof(size_t)); +} + +static bool pmix_ds21_is_ext_slot(uint8_t *addr) +{ + size_t sz; + memcpy(&sz, addr, sizeof(size_t)); + return !!(sz & ESH_REGION_EXTENSION_FLG); +} + +static size_t pmix_ds21_key_hash(const char *key) +{ + size_t hash = 0; + int i; + for(i=0; key[i]; i++) { + hash += key[i]; + } + return hash; +} + +static bool pmix_ds21_kname_match(uint8_t *addr, const char *key, size_t key_hash) +{ + bool ret = 0; + size_t hash; + memcpy(&hash, (char*)addr + sizeof(size_t), sizeof(size_t)); + if( key_hash != hash ) { + return ret; + } + return (0 == strncmp(ESH_KNAME_PTR_V21(addr), key, ESH_KNAME_LEN_V21(key))); +} + +static size_t pmix_ds21_kval_size(uint8_t *key) +{ + return ESH_KV_SIZE_V21(key); ; +} + +static char* pmix_ds21_key_name_ptr(uint8_t *addr) +{ + return ESH_KNAME_PTR_V21(addr); +} + +static size_t pmix_ds21_key_name_len(char *key) +{ + return ESH_KNAME_LEN_V21(key); +} + +static uint8_t* pmix_ds21_data_ptr(uint8_t *addr) +{ + return ESH_DATA_PTR_V21(addr); +} + +static size_t pmix_ds21_data_size(uint8_t *addr, uint8_t* data_ptr) +{ + return ESH_DATA_SIZE_V21(addr, data_ptr); +} + +static size_t pmix_ds21_key_size(char *addr, size_t data_size) +{ + return ESH_KEY_SIZE_V21(addr, data_size); +} + +static size_t pmix_ds21_ext_slot_size(void) +{ + return EXT_SLOT_SIZE_V21(); +} + +static int pmix_ds21_put_key(uint8_t *addr, char *key, + void* buffer, size_t size) +{ + size_t flag = 0; + size_t hash = 0; + char *addr_ch = (char*)addr; + if( !strcmp(key, ESH_REGION_EXTENSION) ) { + /* we have a flag for this special key */ + key = ""; + flag |= ESH_REGION_EXTENSION_FLG; + } + size_t sz = ESH_KEY_SIZE_V21(key, size); + if( ESH_REGION_SIZE_MASK < sz ) { + return PMIX_ERROR; + } + sz |= flag; + memcpy(addr_ch, &sz, sizeof(size_t)); + hash = pmix_ds21_key_hash(key); + memcpy(addr_ch + sizeof(size_t), &hash, sizeof(size_t)); + strncpy(addr_ch + 2 * sizeof(size_t), key, ESH_KNAME_LEN_V21(key)); + memcpy(ESH_DATA_PTR_V21(addr), buffer, size); + return PMIX_SUCCESS; +} + +pmix_common_dstore_file_cbs_t pmix_ds21_file_module = { + .name = "ds21", + .kval_size = pmix_ds21_kval_size, + .kname_ptr = pmix_ds21_key_name_ptr, + .kname_len = pmix_ds21_key_name_len, + .data_ptr = pmix_ds21_data_ptr, + .data_size = pmix_ds21_data_size, + .key_size = pmix_ds21_key_size, + .ext_slot_size = pmix_ds21_ext_slot_size, + .put_key = pmix_ds21_put_key, + .is_invalid = pmix_ds21_is_invalid, + .is_extslot = pmix_ds21_is_ext_slot, + .set_invalid = pmix_ds21_set_invalid, + .key_hash = pmix_ds21_key_hash, + .key_match = pmix_ds21_kname_match +}; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_file.h b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_file.h new file mode 100644 index 00000000000..ea75788ffe8 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_file.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef GDS_DS21_FILE_H +#define GDS_DS21_FILE_H + +#include +#include + +extern pmix_common_dstore_file_cbs_t pmix_ds21_file_module; + +#endif // GDS_DS21_FILE_H diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock.c new file mode 100644 index 00000000000..340343d8c8e --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include "src/mca/common/dstore/dstore_common.h" + +#include "gds_ds21_lock.h" + +pmix_common_lock_callbacks_t pmix_ds21_lock_module = { + .init = pmix_gds_ds21_lock_init, + .finalize = pmix_ds21_lock_finalize, + .rd_lock = pmix_ds21_lock_rd_get, + .rd_unlock = pmix_ds21_lock_rd_rel, + .wr_lock = pmix_ds21_lock_wr_get, + .wr_unlock = pmix_ds21_lock_wr_rel +}; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock.h b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock.h new file mode 100644 index 00000000000..158e7cbf5ab --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef DS21_LOCK_H +#define DS21_LOCK_H + +#include +#include + +#include "src/mca/common/dstore/dstore_common.h" + +pmix_status_t pmix_gds_ds21_lock_init(pmix_common_dstor_lock_ctx_t *lock_ctx, + const char *base_path, const char *name, + uint32_t local_size, uid_t uid, bool setuid); +void pmix_ds21_lock_finalize(pmix_common_dstor_lock_ctx_t *lock_ctx); +pmix_status_t pmix_ds21_lock_rd_get(pmix_common_dstor_lock_ctx_t lock_ctx); +pmix_status_t pmix_ds21_lock_wr_get(pmix_common_dstor_lock_ctx_t lock_ctx); +pmix_status_t pmix_ds21_lock_rd_rel(pmix_common_dstor_lock_ctx_t lock_ctx); +pmix_status_t pmix_ds21_lock_wr_rel(pmix_common_dstor_lock_ctx_t lock_ctx); + +extern pmix_common_lock_callbacks_t pmix_ds21_lock_module; + +#endif // DS21_LOCK_H diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c new file mode 100644 index 00000000000..99713f5651e --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif + +#include "src/mca/common/dstore/dstore_common.h" +#include "src/mca/gds/base/base.h" +#include "src/mca/pshmem/pshmem.h" +#include "src/class/pmix_list.h" + +#include "src/util/error.h" +#include "src/util/output.h" + +#include "gds_ds21_lock.h" +#include "src/mca/common/dstore/dstore_segment.h" + +typedef struct { + pmix_list_item_t super; + + char *lockfile; + pmix_dstore_seg_desc_t *seg_desc; + pthread_mutex_t *mutex; + uint32_t num_locks; + uint32_t lock_idx; +} lock_item_t; + +typedef struct { + pmix_list_t lock_traker; +} lock_ctx_t; + +typedef pmix_list_t ds21_lock_pthread_ctx_t; + +/* + * Lock segment format: + * 1. Segment size sizeof(size_t) + * 2. local_size: sizeof(uint32_t) + * 3. Align size sizeof(size_t) + * 4. Offset of mutexes sizeof(size_t) + * 5. Array of in use indexes: sizeof(int32_t)*local_size + * 6. Double array of locks: sizeof(pthread_mutex_t)*local_size*2 + */ +typedef struct { + size_t seg_size; + uint32_t num_locks; + size_t align_size; + size_t mutex_offs; +} segment_hdr_t; + +#define _GET_IDX_ARR_PTR(seg_ptr) \ + ((pmix_atomic_int32_t*)((char*)seg_ptr + sizeof(segment_hdr_t))) + +#define _GET_MUTEX_ARR_PTR(seg_hdr) \ + ((pthread_mutex_t*)((char*)seg_hdr + seg_hdr->mutex_offs)) + +#define _GET_MUTEX_PTR(seg_hdr, idx) \ + ((pthread_mutex_t*)((char*)seg_hdr + seg_hdr->mutex_offs + seg_hdr->align_size * (idx))) + + +static void ncon(lock_item_t *p) { + p->lockfile = NULL; + p->lock_idx = 0; + p->mutex = NULL; + p->num_locks = 0; + p->seg_desc = NULL; +} + +static void ldes(lock_item_t *p) { + uint32_t i; + + if(PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + segment_hdr_t *seg_hdr = (segment_hdr_t *)p->seg_desc->seg_info.seg_base_addr; + if (p->lockfile) { + unlink(p->lockfile); + } + for(i = 0; i < p->num_locks * 2; i++) { + pthread_mutex_t *mutex = _GET_MUTEX_PTR(seg_hdr, i); + if (0 != pthread_mutex_destroy(mutex)) { + PMIX_ERROR_LOG(PMIX_ERROR); + } + } + } + if (p->lockfile) { + free(p->lockfile); + } + if (p->seg_desc) { + pmix_common_dstor_delete_sm_desc(p->seg_desc); + } +} + +PMIX_CLASS_INSTANCE(lock_item_t, + pmix_list_item_t, + ncon, ldes); + +pmix_status_t pmix_gds_ds21_lock_init(pmix_common_dstor_lock_ctx_t *ctx, const char *base_path, const char * name, + uint32_t local_size, uid_t uid, bool setuid) +{ + pthread_mutexattr_t attr; + size_t size; + uint32_t i; + int page_size = pmix_common_dstor_getpagesize(); + segment_hdr_t *seg_hdr; + lock_item_t *lock_item = NULL; + lock_ctx_t *lock_ctx = (lock_ctx_t*)*ctx; + pmix_list_t *lock_tracker; + pmix_status_t rc = PMIX_SUCCESS; + + if (NULL == *ctx) { + lock_ctx = (lock_ctx_t*)malloc(sizeof(lock_ctx_t)); + if (NULL == lock_ctx) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto error; + } + memset(lock_ctx, 0, sizeof(lock_ctx_t)); + PMIX_CONSTRUCT(&lock_ctx->lock_traker, pmix_list_t); + *ctx = lock_ctx; + } + + lock_tracker = &lock_ctx->lock_traker; + lock_item = PMIX_NEW(lock_item_t); + + if (NULL == lock_item) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto error; + } + pmix_list_append(lock_tracker, &lock_item->super); + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s local_size %d", __FILE__, __LINE__, __func__, local_size)); + + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + size_t seg_align_size; + size_t seg_hdr_size; + + if (0 != (seg_align_size = pmix_common_dstor_getcacheblocksize())) { + seg_align_size = (sizeof(pthread_mutex_t) / seg_align_size + 1) + * seg_align_size; + } else { + seg_align_size = sizeof(pthread_mutex_t); + } + + seg_hdr_size = ((sizeof(segment_hdr_t) + + sizeof(int32_t) * local_size) + / seg_align_size + 1) * seg_align_size; + + size = ((seg_hdr_size + + 2 * local_size * seg_align_size) /* array of mutexes */ + / page_size + 1) * page_size; + + lock_item->seg_desc = pmix_common_dstor_create_new_lock_seg(base_path, + size, name, 0, uid, setuid); + if (NULL == lock_item->seg_desc) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto error; + } + + if (0 != pthread_mutexattr_init(&attr)) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto error; + } + if (0 != pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) { + pthread_mutexattr_destroy(&attr); + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto error; + } + + segment_hdr_t *seg_hdr = (segment_hdr_t*)lock_item->seg_desc->seg_info.seg_base_addr; + seg_hdr->num_locks = local_size; + seg_hdr->seg_size = size; + seg_hdr->align_size = seg_align_size; + seg_hdr->mutex_offs = seg_hdr_size; + + lock_item->lockfile = strdup(lock_item->seg_desc->seg_info.seg_name); + lock_item->num_locks = local_size; + lock_item->mutex = _GET_MUTEX_ARR_PTR(seg_hdr); + + for(i = 0; i < local_size * 2; i++) { + pthread_mutex_t *mutex = _GET_MUTEX_PTR(seg_hdr, i); + if (0 != pthread_mutex_init(mutex, &attr)) { + pthread_mutexattr_destroy(&attr); + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(rc); + goto error; + } + } + if (0 != pthread_mutexattr_destroy(&attr)) { + rc = PMIX_ERR_INIT; + PMIX_ERROR_LOG(PMIX_ERR_INIT); + goto error; + } + } + else { + pmix_atomic_int32_t *lock_idx_ptr; + bool idx_found = false; + + size = pmix_common_dstor_getpagesize(); + lock_item->seg_desc = pmix_common_dstor_attach_new_lock_seg(base_path, size, name, 0); + if (NULL == lock_item->seg_desc) { + rc = PMIX_ERR_NOT_FOUND; + goto error; + } + seg_hdr = (segment_hdr_t*)lock_item->seg_desc->seg_info.seg_base_addr; + + if (seg_hdr->seg_size > size) { + size = seg_hdr->seg_size; + pmix_common_dstor_delete_sm_desc(lock_item->seg_desc); + lock_item->seg_desc = pmix_common_dstor_attach_new_lock_seg(base_path, size, name, 0); + if (NULL == lock_item->seg_desc) { + rc = PMIX_ERR_NOT_FOUND; + goto error; + } + seg_hdr = (segment_hdr_t*)lock_item->seg_desc->seg_info.seg_base_addr; + } + + lock_item->num_locks = seg_hdr->num_locks; + lock_idx_ptr = _GET_IDX_ARR_PTR(seg_hdr); + lock_item->mutex = _GET_MUTEX_ARR_PTR(seg_hdr); + + for (i = 0; i < lock_item->num_locks; i++) { + int32_t expected = 0; + if (pmix_atomic_compare_exchange_strong_32(&lock_idx_ptr[i], &expected, 1)) { + lock_item->lock_idx = i; + lock_item->lockfile = strdup(lock_item->seg_desc->seg_info.seg_name); + idx_found = true; + break; + } + } + + if (false == idx_found) { + rc = PMIX_ERR_NOT_FOUND; + goto error; + } + } + + return rc; + +error: + if (NULL != lock_item) { + pmix_list_remove_item(lock_tracker, &lock_item->super); + PMIX_RELEASE(lock_item); + lock_item = NULL; + } + *ctx = NULL; + + return rc; +} + +void pmix_ds21_lock_finalize(pmix_common_dstor_lock_ctx_t *lock_ctx) +{ + lock_item_t *lock_item, *item_next; + pmix_list_t *lock_tracker = &((lock_ctx_t*)*lock_ctx)->lock_traker; + + if (NULL == lock_tracker) { + return; + } + + PMIX_LIST_FOREACH_SAFE(lock_item, item_next, lock_tracker, lock_item_t) { + pmix_list_remove_item(lock_tracker, &lock_item->super); + PMIX_RELEASE(lock_item); + } + if (pmix_list_is_empty(lock_tracker)) { + PMIX_LIST_DESTRUCT(lock_tracker); + free(lock_tracker); + lock_tracker = NULL; + } + *lock_ctx = NULL; +} + +pmix_status_t pmix_ds21_lock_wr_get(pmix_common_dstor_lock_ctx_t lock_ctx) +{ + lock_item_t *lock_item; + pmix_list_t *lock_tracker = &((lock_ctx_t*)lock_ctx)->lock_traker; + uint32_t num_locks; + uint32_t i; + pmix_status_t rc; + segment_hdr_t *seg_hdr; + + if (NULL == lock_tracker) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + return rc; + } + + PMIX_LIST_FOREACH(lock_item, lock_tracker, lock_item_t) { + num_locks = lock_item->num_locks; + seg_hdr = (segment_hdr_t *)lock_item->seg_desc->seg_info.seg_base_addr; + + /* Lock the "signalling" lock first to let clients know that + * server is going to get a write lock. + * Clients do not hold this lock for a long time, + * so this loop should be relatively dast. + */ + for (i = 0; i < num_locks; i++) { + pthread_mutex_t *mutex = _GET_MUTEX_PTR(seg_hdr, 2*i); + if (0 != pthread_mutex_lock(mutex)) { + return PMIX_ERROR; + } + } + + /* Now we can go and grab the main locks + * New clients will be stopped at the previous + * "barrier" locks. + * We will wait here while all clients currently holding + * locks will be done + */ + for(i = 0; i < num_locks; i++) { + pthread_mutex_t *mutex = _GET_MUTEX_PTR(seg_hdr, 2*i + 1); + if (0 != pthread_mutex_lock(mutex)) { + return PMIX_ERROR; + } + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_ds21_lock_wr_rel(pmix_common_dstor_lock_ctx_t lock_ctx) +{ + lock_item_t *lock_item; + pmix_list_t *lock_tracker = &((lock_ctx_t*)lock_ctx)->lock_traker; + uint32_t num_locks; + uint32_t i; + pmix_status_t rc; + segment_hdr_t *seg_hdr; + + if (NULL == lock_tracker) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + + PMIX_LIST_FOREACH(lock_item, lock_tracker, lock_item_t) { + seg_hdr = (segment_hdr_t *)lock_item->seg_desc->seg_info.seg_base_addr; + num_locks = lock_item->num_locks; + + /* Lock the second lock first to ensure that all procs will see + * that we are trying to grab the main one */ + for(i=0; ilock_traker; + uint32_t idx; + pmix_status_t rc; + segment_hdr_t *seg_hdr; + + if (NULL == lock_tracker) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + + lock_item = (lock_item_t*)pmix_list_get_first(lock_tracker); + idx = lock_item->lock_idx; + seg_hdr = (segment_hdr_t *)lock_item->seg_desc->seg_info.seg_base_addr; + + /* This mutex is only used to acquire the next one, + * this is a barrier that server is using to let clients + * know that it is going to grab the write lock + */ + + if (0 != pthread_mutex_lock(_GET_MUTEX_PTR(seg_hdr, 2*idx))) { + return PMIX_ERROR; + } + + /* Now grab the main lock */ + if (0 != pthread_mutex_lock(_GET_MUTEX_PTR(seg_hdr, 2*idx + 1))) { + return PMIX_ERROR; + } + + /* Once done - release signalling lock */ + if (0 != pthread_mutex_unlock(_GET_MUTEX_PTR(seg_hdr, 2*idx))) { + return PMIX_ERROR; + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_ds21_lock_rd_rel(pmix_common_dstor_lock_ctx_t lock_ctx) +{ + lock_item_t *lock_item; + pmix_list_t *lock_tracker = &((lock_ctx_t*)lock_ctx)->lock_traker; + pmix_status_t rc; + uint32_t idx; + segment_hdr_t *seg_hdr; + + if (NULL == lock_tracker) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + + lock_item = (lock_item_t*)pmix_list_get_first(lock_tracker); + seg_hdr = (segment_hdr_t *)lock_item->seg_desc->seg_info.seg_base_addr; + idx = lock_item->lock_idx; + + /* Release the main lock */ + if (0 != pthread_mutex_unlock(_GET_MUTEX_PTR(seg_hdr, 2*idx + 1))) { + return PMIX_SUCCESS; + } + + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/gds.h b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/gds.h index 9d90dbc565d..9ced4788aa7 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/gds.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/gds.h @@ -1,8 +1,9 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2016-2017 Mellanox Technologies, Inc. + * Copyright (c) 2016-2018 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,7 +45,7 @@ BEGIN_C_DECLS /* forward declaration */ struct pmix_peer_t; -struct pmix_nspace_t; +struct pmix_namespace_t; /* backdoor to base verbosity */ PMIX_EXPORT extern int pmix_gds_base_output; @@ -117,7 +118,7 @@ typedef pmix_status_t (*pmix_gds_base_module_accept_kvs_resp_fn_t)(pmix_buffer_t * only we don't have packed data on the server side, and don't want * to incur the overhead of packing it just to unpack it in the function. */ -typedef pmix_status_t (*pmix_gds_base_module_cache_job_info_fn_t)(struct pmix_nspace_t *ns, +typedef pmix_status_t (*pmix_gds_base_module_cache_job_info_fn_t)(struct pmix_namespace_t *ns, pmix_info_t info[], size_t ninfo); /* define a convenience macro for caching job info */ @@ -127,7 +128,7 @@ typedef pmix_status_t (*pmix_gds_base_module_cache_job_info_fn_t)(struct pmix_ns pmix_output_verbose(1, pmix_gds_base_output, \ "[%s:%d] GDS CACHE JOB INFO WITH %s", \ __FILE__, __LINE__, _g->name); \ - (s) = _g->cache_job_info((struct pmix_nspace_t*)(n), (i), (ni)); \ + (s) = _g->cache_job_info((struct pmix_namespace_t*)(n), (i), (ni)); \ } while(0) /* register job-level info - this is provided as a special function @@ -135,7 +136,7 @@ typedef pmix_status_t (*pmix_gds_base_module_cache_job_info_fn_t)(struct pmix_ns * prepare the job-level info provided at PMIx_Register_nspace, because * we don't know the GDS component to use for that application until * a local client contacts us. Thus, the module is required to process - * the job-level info cached in the pmix_nspace_t for this job and + * the job-level info cached in the pmix_namespace_t for this job and * do whatever is necessary to support the client, packing any required * return message into the provided buffer. * @@ -155,7 +156,7 @@ typedef pmix_status_t (*pmix_gds_base_module_cache_job_info_fn_t)(struct pmix_ns * * The pmix_peer_t of the requesting client is provided here so that * the module can access the job-level info cached on the corresponding - * pmix_nspace_t pointed to by the pmix_peer_t + * pmix_namespace_t pointed to by the pmix_peer_t */ typedef pmix_status_t (*pmix_gds_base_module_register_job_info_fn_t)(struct pmix_peer_t *pr, pmix_buffer_t *reply); @@ -241,16 +242,16 @@ typedef pmix_status_t (*pmix_gds_base_module_store_fn_t)(const pmix_proc_t *proc * bo - pointer to the byte object containing the data * */ -typedef pmix_status_t (*pmix_gds_base_module_store_modex_fn_t)(struct pmix_nspace_t *ns, +typedef pmix_status_t (*pmix_gds_base_module_store_modex_fn_t)(struct pmix_namespace_t *ns, pmix_list_t *cbs, - pmix_byte_object_t *bo); + pmix_buffer_t *buff); /** * define a convenience macro for storing modex byte objects * * r - return status code * - * n - pointer to the pmix_nspace_t this blob is to be stored for + * n - pointer to the pmix_namespace_t this blob is to be stored for * * l - pointer to pmix_list_t containing pmix_server_caddy_t objects * of the local_cbs of the collective tracker @@ -262,7 +263,7 @@ typedef pmix_status_t (*pmix_gds_base_module_store_modex_fn_t)(struct pmix_nspac pmix_output_verbose(1, pmix_gds_base_output, \ "[%s:%d] GDS STORE MODEX WITH %s", \ __FILE__, __LINE__, (n)->compat.gds->name); \ - (r) = (n)->compat.gds->store_modex((struct pmix_nspace_t*)n, l, b); \ + (r) = (n)->compat.gds->store_modex((struct pmix_namespace_t*)n, l, b); \ } while (0) /** @@ -398,12 +399,26 @@ typedef pmix_status_t (*pmix_gds_base_module_del_nspace_fn_t)(const char* nspace } \ } while(0) +/* define a convenience macro for is_tsafe for fetch operation */ +#define PMIX_GDS_FETCH_IS_TSAFE(s, p) \ + do { \ + pmix_gds_base_module_t *_g = (p)->nptr->compat.gds; \ + pmix_output_verbose(1, pmix_gds_base_output, \ + "[%s:%d] GDS FETCH IS THREAD SAFE WITH %s", \ + __FILE__, __LINE__, _g->name); \ + if (true == _g->is_tsafe) { \ + (s) = PMIX_SUCCESS; \ + } else { \ + (s) = PMIX_ERR_NOT_SUPPORTED; \ + } \ +} while(0) /** * structure for gds modules */ typedef struct { const char *name; + const bool is_tsafe; pmix_gds_base_module_init_fn_t init; pmix_gds_base_module_fini_fn_t finalize; pmix_gds_base_assign_module_fn_t assign_module; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/Makefile.am index 7d9da0189e2..4067145ff28 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -49,6 +49,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_gds_hash_la_SOURCES = $(component_sources) mca_gds_hash_la_LIBADD = $(gds_hash_LIBS) mca_gds_hash_la_LDFLAGS = -module -avoid-version $(gds_hash_LDFLAGS) +if NEED_LIBPMIX +mca_gds_hash_la_LIBADD += $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_gds_hash_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c index da9608fb3e5..5e6a5341bd2 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c @@ -1,8 +1,10 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. * * $COPYRIGHT$ * @@ -36,11 +38,12 @@ #include "src/server/pmix_server_ops.h" #include "src/util/argv.h" #include "src/util/compress.h" +#include "src/mca/preg/preg.h" #include "src/util/error.h" #include "src/util/hash.h" #include "src/util/output.h" +#include "src/util/name_fns.h" #include "src/util/pmix_environ.h" -#include "src/mca/preg/preg.h" #include "src/mca/gds/base/base.h" #include "gds_hash.h" @@ -51,7 +54,7 @@ static void hash_finalize(void); static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, int *priority); -static pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, +static pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, pmix_info_t info[], size_t ninfo); static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, @@ -64,9 +67,14 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, pmix_scope_t scope, pmix_kval_t *kv); -static pmix_status_t hash_store_modex(struct pmix_nspace_t *ns, +static pmix_status_t hash_store_modex(struct pmix_namespace_t *ns, pmix_list_t *cbs, - pmix_byte_object_t *bo); + pmix_buffer_t *buff); + +static pmix_status_t _hash_store_modex(void * cbdata, + struct pmix_namespace_t *ns, + pmix_list_t *cbs, + pmix_byte_object_t *bo); static pmix_status_t hash_fetch(const pmix_proc_t *proc, pmix_scope_t scope, bool copy, @@ -91,6 +99,7 @@ static pmix_status_t accept_kvs_resp(pmix_buffer_t *buf); pmix_gds_base_module_t pmix_hash_module = { .name = "hash", + .is_tsafe = false, .init = hash_init, .finalize = hash_finalize, .assign_module = hash_assign_module, @@ -107,20 +116,75 @@ pmix_gds_base_module_t pmix_hash_module = { .accept_kvs_resp = accept_kvs_resp }; +/* Define a bitmask to track what information may not have + * been provided but is computable from other info */ +#define PMIX_HASH_PROC_DATA 0x00000001 +#define PMIX_HASH_JOB_SIZE 0x00000002 +#define PMIX_HASH_MAX_PROCS 0x00000004 +#define PMIX_HASH_NUM_NODES 0x00000008 +#define PMIX_HASH_PROC_MAP 0x00000010 +#define PMIX_HASH_NODE_MAP 0x00000020 + +/**********************************************/ +/* struct definitions */ +typedef struct { + pmix_list_item_t super; + uint32_t session; + pmix_list_t sessioninfo; + pmix_list_t nodeinfo; +} pmix_session_t; + typedef struct { pmix_list_item_t super; char *ns; - pmix_nspace_t *nptr; + pmix_namespace_t *nptr; pmix_hash_table_t internal; pmix_hash_table_t remote; pmix_hash_table_t local; bool gdata_added; -} pmix_hash_trkr_t; + pmix_list_t jobinfo; + pmix_list_t apps; + pmix_list_t nodeinfo; + pmix_session_t *session; +} pmix_job_t; -static void htcon(pmix_hash_trkr_t *p) +typedef struct { + pmix_list_item_t super; + uint32_t appnum; + pmix_list_t appinfo; + pmix_list_t nodeinfo; + pmix_job_t *job; +} pmix_apptrkr_t; + +typedef struct { + pmix_list_item_t super; + uint32_t nodeid; + char *hostname; + pmix_list_t info; +} pmix_nodeinfo_t; + +/**********************************************/ +/* class instantiations */ +static void scon(pmix_session_t *s) +{ + s->session = UINT32_MAX; + PMIX_CONSTRUCT(&s->sessioninfo, pmix_list_t); + PMIX_CONSTRUCT(&s->nodeinfo, pmix_list_t); +} +static void sdes(pmix_session_t *s) +{ + PMIX_LIST_DESTRUCT(&s->sessioninfo); + PMIX_LIST_DESTRUCT(&s->nodeinfo); +} +static PMIX_CLASS_INSTANCE(pmix_session_t, + pmix_list_item_t, + scon, sdes); + +static void htcon(pmix_job_t *p) { p->ns = NULL; p->nptr = NULL; + PMIX_CONSTRUCT(&p->jobinfo, pmix_list_t); PMIX_CONSTRUCT(&p->internal, pmix_hash_table_t); pmix_hash_table_init(&p->internal, 256); PMIX_CONSTRUCT(&p->remote, pmix_hash_table_t); @@ -128,8 +192,11 @@ static void htcon(pmix_hash_trkr_t *p) PMIX_CONSTRUCT(&p->local, pmix_hash_table_t); pmix_hash_table_init(&p->local, 256); p->gdata_added = false; + PMIX_CONSTRUCT(&p->apps, pmix_list_t); + PMIX_CONSTRUCT(&p->nodeinfo, pmix_list_t); + p->session = NULL; } -static void htdes(pmix_hash_trkr_t *p) +static void htdes(pmix_job_t *p) { if (NULL != p->ns) { free(p->ns); @@ -137,25 +204,411 @@ static void htdes(pmix_hash_trkr_t *p) if (NULL != p->nptr) { PMIX_RELEASE(p->nptr); } + PMIX_LIST_DESTRUCT(&p->jobinfo); pmix_hash_remove_data(&p->internal, PMIX_RANK_WILDCARD, NULL); PMIX_DESTRUCT(&p->internal); pmix_hash_remove_data(&p->remote, PMIX_RANK_WILDCARD, NULL); PMIX_DESTRUCT(&p->remote); pmix_hash_remove_data(&p->local, PMIX_RANK_WILDCARD, NULL); PMIX_DESTRUCT(&p->local); + PMIX_LIST_DESTRUCT(&p->apps); + PMIX_LIST_DESTRUCT(&p->nodeinfo); + if (NULL != p->session) { + PMIX_RELEASE(p->session); + } } -static PMIX_CLASS_INSTANCE(pmix_hash_trkr_t, +static PMIX_CLASS_INSTANCE(pmix_job_t, pmix_list_item_t, htcon, htdes); -static pmix_list_t myhashes; +static void apcon(pmix_apptrkr_t *p) +{ + p->appnum = 0; + PMIX_CONSTRUCT(&p->appinfo, pmix_list_t); + PMIX_CONSTRUCT(&p->nodeinfo, pmix_list_t); + p->job = NULL; +} +static void apdes(pmix_apptrkr_t *p) +{ + PMIX_LIST_DESTRUCT(&p->appinfo); + PMIX_LIST_DESTRUCT(&p->nodeinfo); + if (NULL != p->job) { + PMIX_RELEASE(p->job); + } +} +static PMIX_CLASS_INSTANCE(pmix_apptrkr_t, + pmix_list_item_t, + apcon, apdes); + +static void ndinfocon(pmix_nodeinfo_t *p) +{ + p->nodeid = 0; + p->hostname = NULL; + PMIX_CONSTRUCT(&p->info, pmix_list_t); +} +static void ndinfodes(pmix_nodeinfo_t *p) +{ + if (NULL != p->hostname) { + free(p->hostname); + } + PMIX_LIST_DESTRUCT(&p->info); +} +static PMIX_CLASS_INSTANCE(pmix_nodeinfo_t, + pmix_list_item_t, + ndinfocon, ndinfodes); + +/**********************************************/ + +/* process a node array - contains an array of + * node-level info for a single node. Either the + * nodeid, hostname, or both must be included + * in the array to identify the node */ +static pmix_status_t process_node_array(pmix_info_t *info, + pmix_list_t *tgt) +{ + size_t size, j; + pmix_info_t *iptr; + pmix_status_t rc = PMIX_SUCCESS; + pmix_kval_t *kp2, *k1, *knext; + pmix_list_t cache; + pmix_nodeinfo_t *nd = NULL, *ndptr; + bool update; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "PROCESSING NODE ARRAY"); + + /* array of node-level info for a specific node */ + if (PMIX_DATA_ARRAY != info->value.type) { + PMIX_ERROR_LOG(PMIX_ERR_TYPE_MISMATCH); + return PMIX_ERR_TYPE_MISMATCH; + } + + /* setup arrays */ + size = info->value.data.darray->size; + iptr = (pmix_info_t*)info->value.data.darray->array; + PMIX_CONSTRUCT(&cache, pmix_list_t); + + /* cache the values while searching for the nodeid + * and/or hostname */ + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODEID)) { + if (NULL == nd) { + nd = PMIX_NEW(pmix_nodeinfo_t); + } + PMIX_VALUE_GET_NUMBER(rc, &iptr[j].value, nd->nodeid, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(nd); + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_HOSTNAME)) { + if (NULL == nd) { + nd = PMIX_NEW(pmix_nodeinfo_t); + } + nd->hostname = strdup(iptr[j].value.data.string); + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + if (NULL != nd) { + PMIX_RELEASE(nd); + } + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + pmix_list_append(&cache, &kp2->super); + } + } + + if (NULL == nd) { + /* they forgot to pass us the ident for the node */ + PMIX_LIST_DESTRUCT(&cache); + return PMIX_ERR_BAD_PARAM; + } + + /* see if we already have this node on the + * provided list */ + update = false; + PMIX_LIST_FOREACH(ndptr, tgt, pmix_nodeinfo_t) { + if (ndptr->nodeid == nd->nodeid || + (NULL != ndptr->hostname && NULL != nd->hostname && 0 == strcmp(ndptr->hostname, nd->hostname))) { + /* we assume that the data is updating the current + * values */ + if (NULL == ndptr->hostname && NULL != nd->hostname) { + ndptr->hostname = strdup(nd->hostname); + } + PMIX_RELEASE(nd); + nd = ndptr; + update = true; + break; + } + } + + /* transfer the cached items to the nodeinfo list */ + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + while (NULL != kp2) { + /* if this is an update, we have to ensure each data + * item only appears once on the list */ + if (update) { + PMIX_LIST_FOREACH_SAFE(k1, knext, &nd->info, pmix_kval_t) { + if (PMIX_CHECK_KEY(k1, kp2->key)) { + pmix_list_remove_item(&nd->info, &k1->super); + PMIX_RELEASE(k1); + break; + } + } + } + pmix_list_append(&nd->info, &kp2->super); + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + } + PMIX_LIST_DESTRUCT(&cache); + + pmix_list_append(tgt, &nd->super); + return PMIX_SUCCESS; +} + +/* process an app array - contains an array of + * app-level info for a single app. If the + * appnum is not included in the array, then + * it is assumed that only app is in the job. + * This assumption is checked and generates + * an error if violated */ +static pmix_status_t process_app_array(pmix_info_t *info, + pmix_job_t *trk) +{ + pmix_list_t cache, ncache; + size_t size, j; + pmix_info_t *iptr; + pmix_status_t rc = PMIX_SUCCESS; + uint32_t appnum; + pmix_apptrkr_t *app = NULL, *apptr; + pmix_kval_t *kp2, *k1, *knext; + pmix_nodeinfo_t *nd; + bool update; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "PROCESSING APP ARRAY"); + + /* apps have to belong to a job */ + if (NULL == trk) { + return PMIX_ERR_BAD_PARAM; + } + + /* array of app-level info */ + if (PMIX_DATA_ARRAY != info->value.type) { + PMIX_ERROR_LOG(PMIX_ERR_TYPE_MISMATCH); + return PMIX_ERR_TYPE_MISMATCH; + } + + /* setup arrays and lists */ + PMIX_CONSTRUCT(&cache, pmix_list_t); + PMIX_CONSTRUCT(&ncache, pmix_list_t); + size = info->value.data.darray->size; + iptr = (pmix_info_t*)info->value.data.darray->array; + + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_APPNUM)) { + PMIX_VALUE_GET_NUMBER(rc, &iptr[j].value, appnum, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto release; + } + if (NULL != app) { + /* this is an error - there can be only one app + * described in this array */ + PMIX_RELEASE(app); + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + return PMIX_ERR_BAD_PARAM; + } + app = PMIX_NEW(pmix_apptrkr_t); + app->appnum = appnum; + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&iptr[j], &ncache))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; + } + pmix_list_append(&cache, &kp2->super); + } + } + if (NULL == app) { + /* per the standard, they don't have to provide us with + * an appnum so long as only one app is in the job */ + if (0 == pmix_list_get_size(&trk->apps)) { + app = PMIX_NEW(pmix_apptrkr_t); + } else { + /* this is not allowed to happen - they are required + * to provide us with an app number per the standard */ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto release; + } + } + /* see if we already have this app on the + * provided list */ + update = false; + PMIX_LIST_FOREACH(apptr, &trk->apps, pmix_apptrkr_t) { + if (apptr->appnum == app->appnum) { + /* we assume that the data is updating the current + * values */ + PMIX_RELEASE(app); + app = apptr; + update = true; + break; + } + } + + /* point the app at its job */ + if (NULL == app->job) { + PMIX_RETAIN(trk); + app->job = trk; + } + + /* transfer the app-level data across */ + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + while (NULL != kp2) { + /* if this is an update, we have to ensure each data + * item only appears once on the list */ + if (update) { + PMIX_LIST_FOREACH_SAFE(k1, knext, &app->appinfo, pmix_kval_t) { + if (PMIX_CHECK_KEY(k1, kp2->key)) { + pmix_list_remove_item(&app->appinfo, &k1->super); + PMIX_RELEASE(k1); + break; + } + } + } + pmix_list_append(&app->appinfo, &kp2->super); + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + } + /* transfer the associated node-level data across */ + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + while (NULL != nd) { + pmix_list_append(&app->nodeinfo, &nd->super); + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + } + + release: + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + + return rc; +} + +/* process a job array */ +static pmix_status_t process_job_array(pmix_info_t *info, + pmix_job_t *trk, + uint32_t *flags, + char ***procs, + char ***nodes) +{ + pmix_list_t cache; + size_t j, size; + pmix_info_t *iptr; + pmix_kval_t *kp2; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "PROCESSING JOB ARRAY"); + + /* array of job-level info */ + if (PMIX_DATA_ARRAY != info->value.type) { + PMIX_ERROR_LOG(PMIX_ERR_TYPE_MISMATCH); + return PMIX_ERR_TYPE_MISMATCH; + } + size = info->value.data.darray->size; + iptr = (pmix_info_t*)info->value.data.darray->array; + PMIX_CONSTRUCT(&cache, pmix_list_t); + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_APP_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_app_array(&iptr[j], trk))) { + return rc; + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&iptr[j], &trk->nodeinfo))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_PROC_MAP)) { + /* not allowed to get this more than once */ + if (*flags & PMIX_HASH_PROC_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + /* parse the regex to get the argv array containing proc ranks on each node */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(iptr[j].value.data.string, procs))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* mark that we got the map */ + *flags |= PMIX_HASH_PROC_MAP; + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_MAP)) { + /* not allowed to get this more than once */ + if (*flags & PMIX_HASH_NODE_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + /* store the node map itself since that is + * what v3 uses */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODE_MAP); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = strdup(iptr[j].value.data.string); + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->internal, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + + /* parse the regex to get the argv array of node names */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_nodes(iptr[j].value.data.string, nodes))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* mark that we got the map */ + *flags |= PMIX_HASH_NODE_MAP; + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(kp2); + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + pmix_list_append(&trk->jobinfo, &kp2->super); + } + } + return PMIX_SUCCESS; +} + +static pmix_list_t mysessions, myjobs; static pmix_status_t hash_init(pmix_info_t info[], size_t ninfo) { pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "gds: hash init"); - PMIX_CONSTRUCT(&myhashes, pmix_list_t); + PMIX_CONSTRUCT(&mysessions, pmix_list_t); + PMIX_CONSTRUCT(&myjobs, pmix_list_t); return PMIX_SUCCESS; } @@ -164,7 +617,8 @@ static void hash_finalize(void) pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "gds: hash finalize"); - PMIX_LIST_DESTRUCT(&myhashes); + PMIX_LIST_DESTRUCT(&mysessions); + PMIX_LIST_DESTRUCT(&myjobs); } static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, @@ -194,7 +648,8 @@ static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, } static pmix_status_t store_map(pmix_hash_table_t *ht, - char **nodes, char **ppn) + char **nodes, char **ppn, + uint32_t flags) { pmix_status_t rc; pmix_value_t *val; @@ -204,6 +659,8 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, bool updated; pmix_kval_t *kp2; char **procs; + uint32_t totalprocs=0; + bool localldr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:hash:store_map", @@ -215,6 +672,22 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, return PMIX_ERR_BAD_PARAM; } + /* if they didn't provide the number of nodes, then + * compute it from the list of nodes */ + if (!(PMIX_HASH_NUM_NODES & flags)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NUM_NODES); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT32; + kp2->value->data.uint32 = pmix_argv_count(nodes); + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + } + for (n=0; NULL != nodes[n]; n++) { /* check and see if we already have data for this node */ val = NULL; @@ -232,18 +705,22 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, } iptr = (pmix_info_t*)val->data.darray->array; updated = false; + localldr = false; for (m=0; m < val->data.darray->size; m++) { - if (0 == strncmp(iptr[m].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&iptr[m], PMIX_LOCAL_PEERS)) { /* we will update this entry */ if (NULL != iptr[m].value.data.string) { free(iptr[m].value.data.string); } iptr[m].value.data.string = strdup(ppn[n]); - updated = true; - break; + updated = true; // no need to add the local_peers to the array + } else if (PMIX_CHECK_KEY(&iptr[m], PMIX_LOCALLDR)) { + rank = strtoul(ppn[n], NULL, 10); + iptr[m].value.data.rank = rank; + localldr = true; // no need to add localldr to the array } } - if (!updated) { + if (!updated || !localldr) { /* append this entry to the current data */ kp2 = PMIX_NEW(pmix_kval_t); if (NULL == kp2) { @@ -262,7 +739,18 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, return PMIX_ERR_NOMEM; } kp2->value->data.darray->type = PMIX_INFO; - kp2->value->data.darray->size = val->data.darray->size + 1; + /* if we didn't update the local leader, then we will + * add it here */ + m = 0; + if (!localldr) { + kp2->value->data.darray->size = val->data.darray->size + 1; + ++m; + } + /* if they didn't update the local peers, then we add it here */ + if (!updated) { + kp2->value->data.darray->size = val->data.darray->size + 1; + ++m; + } PMIX_INFO_CREATE(info, kp2->value->data.darray->size); if (NULL == info) { PMIX_RELEASE(kp2); @@ -272,7 +760,15 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, for (m=0; m < val->data.darray->size; m++) { PMIX_INFO_XFER(&info[m], &iptr[m]); } - PMIX_INFO_LOAD(&info[kp2->value->data.darray->size-1], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + if (!updated) { + PMIX_INFO_LOAD(&info[kp2->value->data.darray->size-m], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + --m; + } + if (!localldr) { + rank = strtoul(ppn[n], NULL, 10); + PMIX_INFO_LOAD(&info[kp2->value->data.darray->size-m], PMIX_LOCALLDR, &rank, PMIX_PROC_RANK); + --m; + } kp2->value->data.darray->array = info; if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { PMIX_ERROR_LOG(rc); @@ -300,14 +796,16 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, return PMIX_ERR_NOMEM; } kp2->value->data.darray->type = PMIX_INFO; - PMIX_INFO_CREATE(info, 1); + PMIX_INFO_CREATE(info, 2); if (NULL == info) { PMIX_RELEASE(kp2); return PMIX_ERR_NOMEM; } PMIX_INFO_LOAD(&info[0], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + rank = strtoul(ppn[n], NULL, 10); + PMIX_INFO_LOAD(&info[1], PMIX_LOCALLDR, &rank, PMIX_PROC_RANK); kp2->value->data.darray->array = info; - kp2->value->data.darray->size = 1; + kp2->value->data.darray->size = 2; if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(kp2); @@ -318,6 +816,7 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, /* split the list of procs so we can store their * individual location data */ procs = pmix_argv_split(ppn[n], ','); + totalprocs += pmix_argv_count(procs); for (m=0; NULL != procs[m]; m++) { /* store the hostname for each proc */ kp2 = PMIX_NEW(pmix_kval_t); @@ -333,6 +832,48 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, return rc; } PMIX_RELEASE(kp2); // maintain acctg + if (!(PMIX_HASH_PROC_DATA & flags)) { + /* add an entry for the nodeid */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODEID); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT32; + kp2->value->data.uint32 = n; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + pmix_argv_free(procs); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + /* add an entry for the local rank */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_LOCAL_RANK); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT16; + kp2->value->data.uint16 = m; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + pmix_argv_free(procs); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + /* add an entry for the node rank - for now, we assume + * only the one job is running */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODE_RANK); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT16; + kp2->value->data.uint16 = m; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + pmix_argv_free(procs); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + } } pmix_argv_free(procs); } @@ -352,22 +893,62 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, } PMIX_RELEASE(kp2); // maintain acctg + /* if they didn't provide the job size, compute it as + * being the number of provided procs (i.e., size of + * ppn list) */ + if (!(PMIX_HASH_JOB_SIZE & flags)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_JOB_SIZE); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT32; + kp2->value->data.uint32 = totalprocs; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + } + + /* if they didn't provide a value for max procs, just + * assume it is the same as the number of procs in the + * job and store it */ + if (!(PMIX_HASH_MAX_PROCS & flags)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_MAX_PROCS); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT32; + kp2->value->data.uint32 = totalprocs; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + } + + return PMIX_SUCCESS; } -pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, +pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, pmix_info_t info[], size_t ninfo) { - pmix_nspace_t *nptr = (pmix_nspace_t*)ns; - pmix_hash_trkr_t *trk, *t; + pmix_namespace_t *nptr = (pmix_namespace_t*)ns; + pmix_job_t *trk, *t; + pmix_session_t *s = NULL, *sptr; pmix_hash_table_t *ht; pmix_kval_t *kp2, *kvptr; pmix_info_t *iptr; char **nodes=NULL, **procs=NULL; uint8_t *tmp; + uint32_t sid=UINT32_MAX; pmix_rank_t rank; pmix_status_t rc=PMIX_SUCCESS; size_t n, j, size, len; + uint32_t flags = 0; + pmix_list_t cache, ncache; + pmix_nodeinfo_t *nd; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:hash:cache_job_info for nspace %s", @@ -376,7 +957,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(nptr->nspace, t->ns)) { trk = t; break; @@ -384,14 +965,14 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, } if (NULL == trk) { /* create a tracker as we will likely need it */ - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); if (NULL == trk) { return PMIX_ERR_NOMEM; } PMIX_RETAIN(nptr); trk->nptr = nptr; trk->ns = strdup(nptr->nspace); - pmix_list_append(&myhashes, &trk->super); + pmix_list_append(&myjobs, &trk->super); } /* if there isn't any data, then be content with just @@ -403,7 +984,141 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, /* cache the job info on the internal hash table for this nspace */ ht = &trk->internal; for (n=0; n < ninfo; n++) { - if (0 == strcmp(info[n].key, PMIX_NODE_MAP)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_SESSION_ID)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, sid, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto release; + } + /* see if we have this session */ + s = NULL; + PMIX_LIST_FOREACH(sptr, &mysessions, pmix_session_t) { + if (sptr->session == sid) { + s = sptr; + break; + } + } + if (NULL == s) { + s = PMIX_NEW(pmix_session_t); + s->session = sid; + pmix_list_append(&mysessions, &s->super); + } + /* point the job at it */ + if (NULL == trk->session) { + PMIX_RETAIN(s); + trk->session = s; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SESSION_INFO_ARRAY)) { + /* array of session-level info */ + if (PMIX_DATA_ARRAY != info[n].value.type) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + rc = PMIX_ERR_TYPE_MISMATCH; + goto release; + } + size = info[n].value.data.darray->size; + iptr = (pmix_info_t*)info[n].value.data.darray->array; + PMIX_CONSTRUCT(&cache, pmix_list_t); + PMIX_CONSTRUCT(&ncache, pmix_list_t); + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_SESSION_ID)) { + PMIX_VALUE_GET_NUMBER(rc, &iptr[j].value, sid, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + /* setup a session object */ + if (NULL != s) { + /* does this match the one we were previously given? */ + if (sid != s->session) { + /* no - see if we already have this session */ + PMIX_LIST_FOREACH(sptr, &mysessions, pmix_session_t) { + if (sptr->session == sid) { + s = sptr; + break; + } + } + if (sid != s->session) { + /* wasn't found, so create one */ + s = PMIX_NEW(pmix_session_t); + s->session = sid; + pmix_list_append(&mysessions, &s->super); + } + } + } else { + s = PMIX_NEW(pmix_session_t); + s->session = sid; + pmix_list_append(&mysessions, &s->super); + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&iptr[j], &ncache))) { + PMIX_ERROR_LOG(rc); + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + goto release; + } + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + goto release; + } + pmix_list_append(&cache, &kp2->super); + } + } + if (NULL == s) { + /* this is not allowed to happen - they are required + * to provide us with a session ID per the standard */ + PMIX_LIST_DESTRUCT(&cache); + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto release; + } + /* point the job at it */ + if (NULL == trk->session) { + PMIX_RETAIN(s); + trk->session = s; + } + /* transfer the data across */ + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + while (NULL != kp2) { + pmix_list_append(&s->sessioninfo, &kp2->super); + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + } + PMIX_LIST_DESTRUCT(&cache); + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + while (NULL != nd) { + pmix_list_append(&s->nodeinfo, &nd->super); + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + } + PMIX_LIST_DESTRUCT(&ncache); + } else if (PMIX_CHECK_KEY(&info[n], PMIX_JOB_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_job_array(&info[n], trk, &flags, &procs, &nodes))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_APP_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_app_array(&info[n], trk))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&info[n], &trk->nodeinfo))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NODE_MAP)) { + /* not allowed to get this more than once */ + if (flags & PMIX_HASH_NODE_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } /* store the node map itself since that is * what v3 uses */ kp2 = PMIX_NEW(pmix_kval_t); @@ -423,29 +1138,23 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, PMIX_ERROR_LOG(rc); goto release; } - /* if we have already found the proc map, then parse - * and store the detailed map */ - if (NULL != procs) { - if (PMIX_SUCCESS != (rc = store_map(ht, nodes, procs))) { - PMIX_ERROR_LOG(rc); - goto release; - } + /* mark that we got the map */ + flags |= PMIX_HASH_NODE_MAP; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_PROC_MAP)) { + /* not allowed to get this more than once */ + if (flags & PMIX_HASH_PROC_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; } - } else if (0 == strcmp(info[n].key, PMIX_PROC_MAP)) { /* parse the regex to get the argv array containing proc ranks on each node */ if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(info[n].value.data.string, &procs))) { PMIX_ERROR_LOG(rc); goto release; } - /* if we have already recv'd the node map, then parse - * and store the detailed map */ - if (NULL != nodes) { - if (PMIX_SUCCESS != (rc = store_map(ht, nodes, procs))) { - PMIX_ERROR_LOG(rc); - goto release; - } - } + /* mark that we got the map */ + flags |= PMIX_HASH_PROC_MAP; } else if (0 == strcmp(info[n].key, PMIX_PROC_DATA)) { + flags |= PMIX_HASH_PROC_DATA; /* an array of data pertaining to a specific proc */ if (PMIX_DATA_ARRAY != info[n].value.type) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); @@ -535,9 +1244,15 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, goto release; } PMIX_RELEASE(kp2); // maintain acctg - /* if this is the job size, then store it */ - if (0 == strncmp(info[n].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN)) { + /* if this is the job size, then store it in + * the nptr tracker and flag that we were given it */ + if (PMIX_CHECK_KEY(&info[n], PMIX_JOB_SIZE)) { nptr->nprocs = info[n].value.data.uint32; + flags |= PMIX_HASH_JOB_SIZE; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NUM_NODES)) { + flags |= PMIX_HASH_NUM_NODES; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_MAX_PROCS)) { + flags |= PMIX_HASH_MAX_PROCS; } } } @@ -569,6 +1284,17 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, trk->gdata_added = true; } + /* we must have the proc AND node maps */ + if (NULL == procs || NULL == nodes) { + rc = PMIX_ERR_NOT_FOUND; + goto release; + } + + if (PMIX_SUCCESS != (rc = store_map(ht, nodes, procs, flags))) { + PMIX_ERROR_LOG(rc); + goto release; + } + release: if (NULL != nodes) { pmix_argv_free(nodes); @@ -580,21 +1306,21 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, } static pmix_status_t register_info(pmix_peer_t *peer, - pmix_nspace_t *ns, + pmix_namespace_t *ns, pmix_buffer_t *reply) { - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_hash_table_t *ht; pmix_value_t *val, blob; pmix_status_t rc = PMIX_SUCCESS; pmix_info_t *info; size_t ninfo, n; - pmix_kval_t kv; + pmix_kval_t kv, *kvptr; pmix_buffer_t buf; pmix_rank_t rank; trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(ns->nspace, t->ns)) { trk = t; break; @@ -633,28 +1359,33 @@ static pmix_status_t register_info(pmix_peer_t *peer, PMIX_VALUE_RELEASE(val); } + /* add all values in the jobinfo list */ + PMIX_LIST_FOREACH(kvptr, &trk->jobinfo, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, peer, reply, kvptr, 1, PMIX_KVAL); + } + + /* get the proc-level data for each proc in the job */ for (rank=0; rank < ns->nprocs; rank++) { val = NULL; rc = pmix_hash_fetch(ht, rank, NULL, &val); - if (PMIX_SUCCESS != rc) { + if (PMIX_SUCCESS != rc && PMIX_ERR_PROC_ENTRY_NOT_FOUND != rc) { PMIX_ERROR_LOG(rc); if (NULL != val) { PMIX_VALUE_RELEASE(val); } - return rc; - } - if (NULL == val) { - return PMIX_ERR_NOT_FOUND; + return rc; } PMIX_CONSTRUCT(&buf, pmix_buffer_t); PMIX_BFROPS_PACK(rc, peer, &buf, &rank, 1, PMIX_PROC_RANK); - info = (pmix_info_t*)val->data.darray->array; - ninfo = val->data.darray->size; - for (n=0; n < ninfo; n++) { - kv.key = info[n].key; - kv.value = &info[n].value; - PMIX_BFROPS_PACK(rc, peer, &buf, &kv, 1, PMIX_KVAL); + if (NULL != val) { + info = (pmix_info_t*)val->data.darray->array; + ninfo = val->data.darray->size; + for (n=0; n < ninfo; n++) { + kv.key = info[n].key; + kv.value = &info[n].value; + PMIX_BFROPS_PACK(rc, peer, &buf, &kv, 1, PMIX_KVAL); + } } kv.key = PMIX_PROC_BLOB; kv.value = &blob; @@ -672,16 +1403,16 @@ static pmix_status_t register_info(pmix_peer_t *peer, } /* the purpose of this function is to pack the job-level - * info stored in the pmix_nspace_t into a buffer and send + * info stored in the pmix_namespace_t into a buffer and send * it to the given client */ static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, pmix_buffer_t *reply) { pmix_peer_t *peer = (pmix_peer_t*)pr; - pmix_nspace_t *ns = peer->nptr; + pmix_namespace_t *ns = peer->nptr; char *msg; pmix_status_t rc; - pmix_hash_trkr_t *trk, *t2; + pmix_job_t *trk, *t2; if (!PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && !PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { @@ -718,7 +1449,7 @@ static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, /* setup a tracker for this nspace as we will likely * need it again */ trk = NULL; - PMIX_LIST_FOREACH(t2, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t2, &myjobs, pmix_job_t) { if (ns == t2->nptr) { trk = t2; if (NULL == trk->ns) { @@ -728,11 +1459,11 @@ static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, } } if (NULL == trk) { - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); trk->ns = strdup(ns->nspace); PMIX_RETAIN(ns); trk->nptr = ns; - pmix_list_append(&myhashes, &trk->super); + pmix_list_append(&myjobs, &trk->super); } /* the job info for the specified nspace has @@ -775,10 +1506,11 @@ static pmix_status_t hash_store_job_info(const char *nspace, pmix_byte_object_t *bo; pmix_buffer_t buf2; int rank; - pmix_hash_trkr_t *htptr; + pmix_job_t *htptr; pmix_hash_table_t *ht; char **nodelist = NULL; pmix_info_t *info, *iptr; + pmix_namespace_t *ns, *nptr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%u] pmix:gds:hash store job info for nspace %s", @@ -798,9 +1530,27 @@ static pmix_status_t hash_store_job_info(const char *nspace, return rc; } + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(ns->nspace, nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + return rc; + } + nptr->nspace = strdup(nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + } + /* see if we already have a hash table for this nspace */ ht = NULL; - PMIX_LIST_FOREACH(htptr, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(htptr, &myjobs, pmix_job_t) { if (0 == strcmp(htptr->ns, nspace)) { ht = &htptr->internal; break; @@ -808,9 +1558,11 @@ static pmix_status_t hash_store_job_info(const char *nspace, } if (NULL == ht) { /* nope - create one */ - htptr = PMIX_NEW(pmix_hash_trkr_t); + htptr = PMIX_NEW(pmix_job_t); htptr->ns = strdup(nspace); - pmix_list_append(&myhashes, &htptr->super); + PMIX_RETAIN(nptr); + htptr->nptr = nptr; + pmix_list_append(&myjobs, &htptr->super); ht = &htptr->internal; } @@ -822,7 +1574,7 @@ static pmix_status_t hash_store_job_info(const char *nspace, pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%u] pmix:gds:hash store job info working key %s", pmix_globals.myid.nspace, pmix_globals.myid.rank, kptr->key); - if (0 == strcmp(kptr->key, PMIX_PROC_BLOB)) { + if (PMIX_CHECK_KEY(kptr, PMIX_PROC_BLOB)) { bo = &(kptr->value->data.bo); PMIX_CONSTRUCT(&buf2, pmix_buffer_t); PMIX_LOAD_BUFFER(pmix_client_globals.myserver, &buf2, bo->bytes, bo->size); @@ -873,7 +1625,7 @@ static pmix_status_t hash_store_job_info(const char *nspace, /* cleanup */ PMIX_DESTRUCT(&buf2); // releases the original kptr data PMIX_RELEASE(kp2); - } else if (0 == strcmp(kptr->key, PMIX_MAP_BLOB)) { + } else if (PMIX_CHECK_KEY(kptr, PMIX_MAP_BLOB)) { /* transfer the byte object for unpacking */ bo = &(kptr->value->data.bo); PMIX_CONSTRUCT(&buf2, pmix_buffer_t); @@ -1044,6 +1796,11 @@ static pmix_status_t hash_store_job_info(const char *nspace, PMIX_RELEASE(kptr); return rc; } + /* if this is the job size, then store it in + * the nptr tracker */ + if (0 == nptr->nprocs && PMIX_CHECK_KEY(kptr, PMIX_JOB_SIZE)) { + nptr->nprocs = kptr->value->data.uint32; + } } PMIX_RELEASE(kptr); kptr = PMIX_NEW(pmix_kval_t); @@ -1066,14 +1823,15 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, pmix_scope_t scope, pmix_kval_t *kv) { - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_status_t rc; pmix_kval_t *kp; + pmix_namespace_t *ns, *nptr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, - "[%s:%d] gds:hash:hash_store for proc [%s:%d] key %s type %s scope %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, - proc->nspace, proc->rank, kv->key, + "%s gds:hash:hash_store for proc %s key %s type %s scope %s", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(proc), kv->key, PMIx_Data_type_string(kv->value->type), PMIx_Scope_string(scope)); if (NULL == kv->key) { @@ -1082,7 +1840,7 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(proc->nspace, t->ns)) { trk = t; break; @@ -1090,9 +1848,29 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, } if (NULL == trk) { /* create one */ - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); trk->ns = strdup(proc->nspace); - pmix_list_append(&myhashes, &trk->super); + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(ns->nspace, proc->nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + PMIX_RELEASE(trk); + return rc; + } + nptr->nspace = strdup(proc->nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + } + PMIX_RETAIN(nptr); + trk->nptr = nptr; + pmix_list_append(&myjobs, &trk->super); } /* see if the proc is me */ @@ -1125,6 +1903,11 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, } } + /* if the number of procs for the nspace object is new, then update it */ + if (0 == trk->nptr->nprocs && PMIX_CHECK_KEY(kv, PMIX_JOB_SIZE)) { + trk->nptr->nprocs = kv->value->data.uint32; + } + /* store it in the corresponding hash table */ if (PMIX_INTERNAL == scope) { if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->internal, proc->rank, kv))) { @@ -1181,17 +1964,25 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, * host has received data from some other peer. It therefore * always contains data solely from remote procs, and we * shall store it accordingly */ -static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace, +static pmix_status_t hash_store_modex(struct pmix_namespace_t *nspace, pmix_list_t *cbs, - pmix_byte_object_t *bo) + pmix_buffer_t *buf) { + return pmix_gds_base_store_modex(nspace, cbs, buf, _hash_store_modex, NULL); +} + +static pmix_status_t _hash_store_modex(void * cbdata, + struct pmix_namespace_t *nspace, + pmix_list_t *cbs, + pmix_byte_object_t *bo) { - pmix_nspace_t *ns = (pmix_nspace_t*)nspace; - pmix_hash_trkr_t *trk, *t; + pmix_namespace_t *ns = (pmix_namespace_t*)nspace; + pmix_job_t *trk, *t; pmix_status_t rc = PMIX_SUCCESS; int32_t cnt; pmix_buffer_t pbkt; pmix_proc_t proc; pmix_kval_t *kv; + pmix_namespace_t *ns2, *nptr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:hash:store_modex for nspace %s", @@ -1200,7 +1991,7 @@ static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace, /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(ns->nspace, t->ns)) { trk = t; break; @@ -1208,9 +1999,29 @@ static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace, } if (NULL == trk) { /* create one */ - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); trk->ns = strdup(ns->nspace); - pmix_list_append(&myhashes, &trk->super); + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(ns2, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(ns->nspace, ns2->nspace)) { + nptr = ns2; + break; + } + } + if (NULL == nptr) { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + PMIX_RELEASE(trk); + return rc; + } + nptr->nspace = strdup(ns->nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + } + PMIX_RETAIN(nptr); + trk->nptr = nptr; + pmix_list_append(&myjobs, &trk->super); } /* this is data returned via the PMIx_Fence call when @@ -1240,14 +2051,20 @@ static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace, kv = PMIX_NEW(pmix_kval_t); PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &pbkt, kv, &cnt, PMIX_KVAL); while (PMIX_SUCCESS == rc) { - /* store this in the hash table */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc.rank, kv))) { - PMIX_ERROR_LOG(rc); - bo->bytes = pbkt.base_ptr; - bo->size = pbkt.bytes_used; // restore the incoming data - pbkt.base_ptr = NULL; - PMIX_DESTRUCT(&pbkt); - return rc; + if (PMIX_RANK_UNDEF == proc.rank) { + /* if the rank is undefined, then we store it on the + * remote table of rank=0 as we know that rank must + * always exist */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, 0, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else { + /* store this in the hash table */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc.rank, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } } PMIX_RELEASE(kv); // maintain accounting as the hash increments the ref count /* continue along */ @@ -1269,25 +2086,257 @@ static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace, } +static pmix_status_t dohash(pmix_hash_table_t *ht, + const char *key, + pmix_rank_t rank, + bool skip_genvals, + pmix_list_t *kvs) +{ + pmix_status_t rc; + pmix_value_t *val; + pmix_kval_t *kv, *k2; + pmix_info_t *info; + size_t n, ninfo; + bool found; + + rc = pmix_hash_fetch(ht, rank, key, &val); + if (PMIX_SUCCESS == rc) { + /* if the key was NULL, then all found keys will be + * returned as a pmix_data_array_t in the value */ + if (NULL == key) { + if (NULL == val->data.darray || + PMIX_INFO != val->data.darray->type || + 0 == val->data.darray->size) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + PMIX_RELEASE(val); + return PMIX_ERR_NOT_FOUND; + } + info = (pmix_info_t*)val->data.darray->array; + ninfo = val->data.darray->size; + for (n=0; n < ninfo; n++) { + /* if the rank is UNDEF, then we don't want + * anything that starts with "pmix" */ + if (skip_genvals && + 0 == strncmp(info[n].key, "pmix", 4)) { + continue; + } + /* see if we already have this on the list */ + found = false; + PMIX_LIST_FOREACH(k2, kvs, pmix_kval_t) { + if (PMIX_CHECK_KEY(&info[n], k2->key)) { + found = true; + break; + } + } + if (found) { + continue; + } + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + kv->key = strdup(info[n].key); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + PMIX_VALUE_RELEASE(val); + PMIX_RELEASE(kv); + return PMIX_ERR_NOMEM; + } + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + kv->value, &info[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_VALUE_RELEASE(val); + PMIX_RELEASE(kv); + return rc; + } + pmix_list_append(kvs, &kv->super); + } + PMIX_VALUE_RELEASE(val); + } else { + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + kv->key = strdup(key); + kv->value = val; + pmix_list_append(kvs, &kv->super); + } + } + return rc; +} + +static pmix_status_t fetch_nodeinfo(const char *key, pmix_list_t *tgt, + pmix_info_t *info, size_t ninfo, + pmix_list_t *kvs) +{ + size_t n; + pmix_status_t rc; + uint32_t nid=0; + char *hostname = NULL; + bool found = false; + pmix_nodeinfo_t *nd, *ndptr; + pmix_kval_t *kv, *kp2; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "FETCHING NODE INFO"); + + /* scan for the nodeID or hostname to identify + * which node they are asking about */ + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&info[n], PMIX_NODEID)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, nid, uint32_t); + if (PMIX_SUCCESS != rc) { + return rc; + } + found = true; + break; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_HOSTNAME)) { + hostname = info[n].value.data.string; + found = true; + break; + } + } + if (!found) { + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + } + + /* scan the list of nodes to find the matching entry */ + nd = NULL; + PMIX_LIST_FOREACH(ndptr, tgt, pmix_nodeinfo_t) { + if (NULL != hostname && 0 == strcmp(ndptr->hostname, hostname)) { + nd = ndptr; + break; + } + if (NULL == hostname && nid == ndptr->nodeid) { + nd = ndptr; + break; + } + } + if (NULL == nd) { + return PMIX_ERR_NOT_FOUND; + } + /* scan the info list of this node to generate the results */ + rc = PMIX_ERR_NOT_FOUND; + PMIX_LIST_FOREACH(kv, &nd->info, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kv, key)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(kv->key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, kv->value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + pmix_list_append(kvs, &kp2->super); + rc = PMIX_SUCCESS; + if (NULL != key) { + break; + } + } + } + + return rc; +} + +static pmix_status_t fetch_appinfo(const char *key, pmix_list_t *tgt, + pmix_info_t *info, size_t ninfo, + pmix_list_t *kvs) +{ + size_t n; + pmix_status_t rc; + uint32_t appnum; + bool found = false; + pmix_apptrkr_t *app, *apptr; + pmix_kval_t *kv, *kp2; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "FETCHING APP INFO"); + + /* scan for the appnum to identify + * which app they are asking about */ + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&info[n], PMIX_APPNUM)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, appnum, uint32_t); + if (PMIX_SUCCESS != rc) { + return rc; + } + found = true; + break; + } + } + if (!found) { + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + } + + /* scan the list of apps to find the matching entry */ + app = NULL; + PMIX_LIST_FOREACH(apptr, tgt, pmix_apptrkr_t) { + if (appnum == apptr->appnum) { + app = apptr; + break; + } + } + if (NULL == app) { + return PMIX_ERR_NOT_FOUND; + } + + /* see if they wanted to know something about a node that + * is associated with this app */ + rc = fetch_nodeinfo(key, &app->nodeinfo, info, ninfo, kvs); + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + + /* scan the info list of this app to generate the results */ + rc = PMIX_ERR_NOT_FOUND; + PMIX_LIST_FOREACH(kv, &app->appinfo, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kv, key)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(kv->key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, kv->value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + pmix_list_append(kvs, &kp2->super); + rc = PMIX_SUCCESS; + if (NULL != key) { + break; + } + } + } + + return rc; +} + static pmix_status_t hash_fetch(const pmix_proc_t *proc, pmix_scope_t scope, bool copy, const char *key, pmix_info_t qualifiers[], size_t nqual, pmix_list_t *kvs) { - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_status_t rc; pmix_value_t *val; - pmix_kval_t *kv; + pmix_kval_t *kv, *kvptr; pmix_info_t *info; size_t n, ninfo; pmix_hash_table_t *ht; + pmix_session_t *sptr; + uint32_t sid; + pmix_rank_t rnk; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, - "[%s:%u] pmix:gds:hash fetch %s for proc %s:%u on scope %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, + "%s pmix:gds:hash fetch %s for proc %s on scope %s", + PMIX_NAME_PRINT(&pmix_globals.myid), (NULL == key) ? "NULL" : key, - proc->nspace, proc->rank, PMIx_Scope_string(scope)); + PMIX_NAME_PRINT(proc), PMIx_Scope_string(scope)); /* if the rank is wildcard and the key is NULL, then * they are asking for a complete copy of the job-level @@ -1296,7 +2345,7 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, /* see if we have a tracker for this nspace - we will * if we already cached the job info for it */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(proc->nspace, t->ns)) { trk = t; break; @@ -1312,7 +2361,6 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, val = NULL; rc = pmix_hash_fetch(ht, PMIX_RANK_WILDCARD, NULL, &val); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); if (NULL != val) { PMIX_VALUE_RELEASE(val); } @@ -1326,7 +2374,6 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, if (PMIX_DATA_ARRAY != val->type || NULL == val->data.darray || PMIX_INFO != val->data.darray->type) { - PMIX_ERROR_LOG(PMIX_ERR_INVALID_VAL); PMIX_VALUE_RELEASE(val); return PMIX_ERR_INVALID_VAL; } @@ -1353,9 +2400,58 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, return PMIX_SUCCESS; } + /* if the nspace and rank are undefined, then they are asking + * for session-level information. */ + if (0 == strlen(proc->nspace) && PMIX_RANK_UNDEF == proc->rank) { + /* they must have included something identifying the info + * class they are querying */ + for (n=0; n < nqual; n++) { + if (PMIX_CHECK_KEY(&qualifiers[n], PMIX_SESSION_ID)) { + /* they want session-level info - see if we have + * that session */ + PMIX_VALUE_GET_NUMBER(rc, &qualifiers[n].value, sid, uint32_t); + if (PMIX_SUCCESS != rc) { + /* didn't provide a correct value */ + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_LIST_FOREACH(sptr, &mysessions, pmix_session_t) { + if (sptr->session == sid) { + /* see if they want info for a specific node */ + rc = fetch_nodeinfo(key, &sptr->nodeinfo, qualifiers, nqual, kvs); + /* if they did, then we are done */ + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + /* check the session info */ + PMIX_LIST_FOREACH(kvptr, &sptr->sessioninfo, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kvptr, key)) { + kv = PMIX_NEW(pmix_kval_t); + kv->key = strdup(kvptr->key); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kv->value, kvptr->value); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(kv); + return rc; + } + pmix_list_append(kvs, &kv->super); + if (NULL != key) { + /* we are done */ + return PMIX_SUCCESS; + } + } + } + } + } + /* if we get here, then the session wasn't found */ + return PMIX_ERR_NOT_FOUND; + } + } + } + /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(proc->nspace, t->ns)) { trk = t; break; @@ -1365,6 +2461,24 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, return PMIX_ERR_INVALID_NAMESPACE; } + /* if the rank isn't specified, check to see if they + * are looking for app-level or node-level info for + * this job */ + if (PMIX_RANK_UNDEF == proc->rank) { + /* see if they want info for a specific node */ + rc = fetch_nodeinfo(key, &trk->nodeinfo, qualifiers, nqual, kvs); + /* if they did, then we are done */ + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + /* see if they want info for a specific app */ + rc = fetch_appinfo(key, &trk->apps, qualifiers, nqual, kvs); + /* if they did, then we are done */ + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + } + /* fetch from the corresponding hash table - note that * we always provide a copy as we don't support * shared memory */ @@ -1384,59 +2498,56 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, } doover: - rc = pmix_hash_fetch(ht, proc->rank, key, &val); - if (PMIX_SUCCESS == rc) { - /* if the key was NULL, then all found keys will be - * returned as a pmix_data_array_t in the value */ - if (NULL == key) { - if (NULL == val->data.darray || - PMIX_INFO != val->data.darray->type || - 0 == val->data.darray->size) { - PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); - return PMIX_ERR_NOT_FOUND; + /* if rank=PMIX_RANK_UNDEF, then we need to search all + * known ranks for this nspace as any one of them could + * be the source */ + if (PMIX_RANK_UNDEF == proc->rank) { + for (rnk=0; rnk < trk->nptr->nprocs; rnk++) { + rc = dohash(ht, key, rnk, true, kvs); + if (PMIX_ERR_NOMEM == rc) { + return rc; } - info = (pmix_info_t*)val->data.darray->array; - ninfo = val->data.darray->size; - for (n=0; n < ninfo; n++) { + if (PMIX_SUCCESS == rc && NULL != key) { + return rc; + } + } + /* also need to check any job-level info */ + PMIX_LIST_FOREACH(kvptr, &trk->jobinfo, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kvptr, key)) { kv = PMIX_NEW(pmix_kval_t); - if (NULL == kv) { - PMIX_VALUE_RELEASE(val); - return PMIX_ERR_NOMEM; - } - kv->key = strdup(info[n].key); + kv->key = strdup(kvptr->key); kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - if (NULL == kv->value) { - PMIX_VALUE_RELEASE(val); - PMIX_RELEASE(kv); - return PMIX_ERR_NOMEM; - } - PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, - kv->value, &info[n].value); + PMIX_VALUE_XFER(rc, kv->value, kvptr->value); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_VALUE_RELEASE(val); PMIX_RELEASE(kv); return rc; } pmix_list_append(kvs, &kv->super); + if (NULL != key) { + break; + } } - PMIX_VALUE_RELEASE(val); - if (PMIX_GLOBAL == scope && ht == &trk->local) { + } + if (NULL == key) { + /* and need to add all job info just in case that was + * passed via a different GDS component */ + dohash(&trk->internal, NULL, PMIX_RANK_WILDCARD, false, kvs); + } + } else { + rc = dohash(ht, key, proc->rank, false, kvs); + } + if (PMIX_SUCCESS == rc) { + if (PMIX_GLOBAL == scope) { + if (ht == &trk->local) { /* need to do this again for the remote data */ ht = &trk->remote; goto doover; + } else if (ht == &trk->internal) { + /* check local */ + ht = &trk->local; + goto doover; } - return PMIX_SUCCESS; } - /* just return the value */ - kv = PMIX_NEW(pmix_kval_t); - if (NULL == kv) { - PMIX_VALUE_RELEASE(val); - return PMIX_ERR_NOMEM; - } - kv->key = strdup(key); - kv->value = val; - pmix_list_append(kvs, &kv->super); } else { if (PMIX_GLOBAL == scope || PMIX_SCOPE_UNDEF == scope) { @@ -1451,6 +2562,9 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, } } } + if (0 == pmix_list_get_size(kvs)) { + rc = PMIX_ERR_NOT_FOUND; + } return rc; } @@ -1471,13 +2585,13 @@ static pmix_status_t nspace_add(const char *nspace, static pmix_status_t nspace_del(const char *nspace) { - pmix_hash_trkr_t *t; + pmix_job_t *t; /* find the hash table for this nspace */ - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(nspace, t->ns)) { /* release it */ - pmix_list_remove_item(&myhashes, &t->super); + pmix_list_remove_item(&myjobs, &t->super); PMIX_RELEASE(t); break; } @@ -1541,6 +2655,12 @@ static pmix_status_t accept_kvs_resp(pmix_buffer_t *buf) PMIX_ERROR_LOG(rc); return rc; } + /* if the rank is UNDEF, then we store this on our own + * rank tables */ + if (PMIX_RANK_UNDEF == proct.rank) { + proct.rank = pmix_globals.myid.rank; + } + cnt = 1; kv = PMIX_NEW(pmix_kval_t); PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, @@ -1550,7 +2670,6 @@ static pmix_status_t accept_kvs_resp(pmix_buffer_t *buf) * the kval contains shmem connection info, then the * component will know what to do about it (or else * we selected the wrong component for this peer!) */ - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, &proct, PMIX_INTERNAL, kv); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pdl/configure.m4 b/opal/mca/pmix/pmix3x/pmix/src/mca/pdl/configure.m4 index c5082065b23..1e749df5b2d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pdl/configure.m4 +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pdl/configure.m4 @@ -1,8 +1,8 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2016-2017 Intel, Inc. All rights reserved. -dnl Copyright (c) 2016 Research Organization for Information Science +dnl Copyright (c) 2016-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016-2019 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl @@ -27,7 +27,7 @@ AC_DEFUN([MCA_pmix_pdl_CONFIG],[ # (we still need to configure them all so that things like "make # dist" work", but we just want the MCA system to (artificially) # conclude that it can't build any of the components. - AS_IF([test "$enable_dlopen" = "no"], + AS_IF([test $PMIX_ENABLE_DLOPEN_SUPPORT -eq 0], [want_pdl=0], [want_pdl=1]) MCA_CONFIGURE_FRAMEWORK([pdl], [$want_pdl]) @@ -35,7 +35,7 @@ AC_DEFUN([MCA_pmix_pdl_CONFIG],[ # If we found no suitable static pdl component and dlopen support # was not specifically disabled, this is an error. AS_IF([test "$MCA_pmix_pdl_STATIC_COMPONENTS" = "" && \ - test "$enable_dlopen" != "no"], + test $PMIX_ENABLE_DLOPEN_SUPPORT -eq 1], [AC_MSG_WARN([Did not find a suitable static pmix pdl component]) AC_MSG_WARN([You might need to install libltld (and its headers) or]) AC_MSG_WARN([specify --disable-dlopen to configure.]) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c index 9157d546616..1d48b462770 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,23 +36,7 @@ #include #endif #ifdef HAVE_NET_IF_H -#if defined(__APPLE__) && defined(_LP64) -/* Apple engineering suggested using options align=power as a - workaround for a bug in OS X 10.4 (Tiger) that prevented ioctl(..., - SIOCGIFCONF, ...) from working properly in 64 bit mode on Power PC. - It turns out that the underlying issue is the size of struct - ifconf, which the kernel expects to be 12 and natural 64 bit - alignment would make 16. The same bug appears in 64 bit mode on - Intel macs, but align=power is a no-op there, so instead, use the - pack pragma to instruct the compiler to pack on 4 byte words, which - has the same effect as align=power for our needs and works on both - Intel and Power PC Macs. */ -#pragma pack(push,4) -#endif #include -#if defined(__APPLE__) && defined(_LP64) -#pragma pack(pop) -#endif #endif #ifdef HAVE_NETDB_H #include @@ -173,7 +158,7 @@ static int if_bsdx_open(void) /* fill values into the pmix_pif_t */ memcpy(&a4, &(sin_addr->sin_addr), sizeof(struct in_addr)); - strncpy(intf->if_name, cur_ifaddrs->ifa_name, IF_NAMESIZE); + pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list) + 1; ((struct sockaddr_in*) &intf->if_addr)->sin_addr = a4; ((struct sockaddr_in*) &intf->if_addr)->sin_family = AF_INET; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c index 2dac2550d37..ff30d73500b 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,23 +36,7 @@ #include #endif #ifdef HAVE_NET_IF_H -#if defined(__APPLE__) && defined(_LP64) -/* Apple engineering suggested using options align=power as a - workaround for a bug in OS X 10.4 (Tiger) that prevented ioctl(..., - SIOCGIFCONF, ...) from working properly in 64 bit mode on Power PC. - It turns out that the underlying issue is the size of struct - ifconf, which the kernel expects to be 12 and natural 64 bit - alignment would make 16. The same bug appears in 64 bit mode on - Intel macs, but align=power is a no-op there, so instead, use the - pack pragma to instruct the compiler to pack on 4 byte words, which - has the same effect as align=power for our needs and works on both - Intel and Power PC Macs. */ -#pragma pack(push,4) -#endif #include -#if defined(__APPLE__) && defined(_LP64) -#pragma pack(pop) -#endif #endif #ifdef HAVE_NETDB_H #include @@ -198,7 +183,7 @@ static int if_bsdx_ipv6_open(void) return PMIX_ERR_OUT_OF_RESOURCE; } intf->af_family = AF_INET6; - strncpy(intf->if_name, cur_ifaddrs->ifa_name, IF_NAMESIZE); + pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list) + 1; ((struct sockaddr_in6*) &intf->if_addr)->sin6_addr = a6; ((struct sockaddr_in6*) &intf->if_addr)->sin6_family = AF_INET6; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c index 2f240f9d8a1..53bec6fb04a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,23 +36,7 @@ #include #endif #ifdef HAVE_NET_IF_H -#if defined(__APPLE__) && defined(_LP64) -/* Apple engineering suggested using options align=power as a - workaround for a bug in OS X 10.4 (Tiger) that prevented ioctl(..., - SIOCGIFCONF, ...) from working properly in 64 bit mode on Power PC. - It turns out that the underlying issue is the size of struct - ifconf, which the kernel expects to be 12 and natural 64 bit - alignment would make 16. The same bug appears in 64 bit mode on - Intel macs, but align=power is a no-op there, so instead, use the - pack pragma to instruct the compiler to pack on 4 byte words, which - has the same effect as align=power for our needs and works on both - Intel and Power PC Macs. */ -#pragma pack(push,4) -#endif #include -#if defined(__APPLE__) && defined(_LP64) -#pragma pack(pop) -#endif #endif #ifdef HAVE_NETDB_H #include @@ -95,12 +80,17 @@ static int if_linux_ipv6_open(void) { FILE *f; if ((f = fopen("/proc/net/if_inet6", "r"))) { - char ifname[IF_NAMESIZE]; + /* IF_NAMESIZE is normally 16 on Linux, + but the next scanf allows up to 21 bytes */ + char ifname[PMIX_IF_NAMESIZE]; unsigned int idx, pfxlen, scope, dadstat; struct in6_addr a6; int iter; uint32_t flag; - unsigned int addrbyte[16]; + unsigned int addrbyte[PMIX_IF_NAMESIZE]; + + memset(addrbyte, 0, PMIX_IF_NAMESIZE*sizeof(unsigned int)); + memset(ifname, 0, PMIX_IF_NAMESIZE*sizeof(char)); while (fscanf(f, "%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x %x %x %x %x %20s\n", &addrbyte[0], &addrbyte[1], &addrbyte[2], &addrbyte[3], @@ -117,8 +107,8 @@ static int if_linux_ipv6_open(void) addrbyte[8], addrbyte[9], addrbyte[10], addrbyte[11], addrbyte[12], addrbyte[13], addrbyte[14], addrbyte[15], scope); - /* we don't want any other scope less than link-local */ - if (scope < 0x20) { + /* Only interested in global (0x00) scope */ + if (scope != 0x00) { pmix_output_verbose(1, pmix_pif_base_framework.framework_output, "skipping interface %2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x scope %x\n", addrbyte[0], addrbyte[1], addrbyte[2], addrbyte[3], @@ -142,7 +132,7 @@ static int if_linux_ipv6_open(void) } /* now construct the pmix_pif_t */ - strncpy(intf->if_name, ifname, IF_NAMESIZE); + pmix_strncpy(intf->if_name, ifname, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list)+1; intf->if_kernel_index = (uint16_t) idx; ((struct sockaddr_in6*) &intf->if_addr)->sin6_addr = a6; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/pif.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/pif.h index 29c75b869c7..9d23fdf1ff5 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/pif.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/pif.h @@ -3,7 +3,7 @@ * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,23 +40,7 @@ #include #endif #ifdef HAVE_NET_IF_H -#if defined(__APPLE__) && defined(_LP64) -/* Apple engineering suggested using options align=power as a - workaround for a bug in OS X 10.4 (Tiger) that prevented ioctl(..., - SIOCGIFCONF, ...) from working properly in 64 bit mode on Power PC. - It turns out that the underlying issue is the size of struct - ifconf, which the kernel expects to be 12 and natural 64 bit - alignment would make 16. The same bug appears in 64 bit mode on - Intel macs, but align=power is a no-op there, so instead, use the - pack pragma to instruct the compiler to pack on 4 byte words, which - has the same effect as align=power for our needs and works on both - Intel and Power PC Macs. */ -#pragma pack(push,4) -#endif #include -#if defined(__APPLE__) && defined(_LP64) -#pragma pack(pop) -#endif #endif #ifdef HAVE_NETDB_H #include @@ -89,7 +73,7 @@ BEGIN_C_DECLS typedef struct pmix_pif_t { pmix_list_item_t super; - char if_name[IF_NAMESIZE+1]; + char if_name[PMIX_IF_NAMESIZE+1]; int if_index; uint16_t if_kernel_index; uint16_t af_family; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/posix_ipv4/pif_posix.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/posix_ipv4/pif_posix.c index c338b4f86a1..095a3027e71 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/posix_ipv4/pif_posix.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/posix_ipv4/pif_posix.c @@ -39,23 +39,7 @@ #include #endif #ifdef HAVE_NET_IF_H -#if defined(__APPLE__) && defined(_LP64) -/* Apple engineering suggested using options align=power as a - workaround for a bug in OS X 10.4 (Tiger) that prevented ioctl(..., - SIOCGIFCONF, ...) from working properly in 64 bit mode on Power PC. - It turns out that the underlying issue is the size of struct - ifconf, which the kernel expects to be 12 and natural 64 bit - alignment would make 16. The same bug appears in 64 bit mode on - Intel macs, but align=power is a no-op there, so instead, use the - pack pragma to instruct the compiler to pack on 4 byte words, which - has the same effect as align=power for our needs and works on both - Intel and Power PC Macs. */ -#pragma pack(push,4) -#endif #include -#if defined(__APPLE__) && defined(_LP64) -#pragma pack(pop) -#endif #endif #ifdef HAVE_NETDB_H #include @@ -265,7 +249,7 @@ static int if_posix_open(void) /* copy entry over into our data structure */ memset(intf->if_name, 0, sizeof(intf->if_name)); - strncpy(intf->if_name, ifr->ifr_name, sizeof(intf->if_name) - 1); + pmix_strncpy(intf->if_name, ifr->ifr_name, sizeof(intf->if_name) - 1); intf->if_flags = ifr->ifr_flags; /* every new address gets its own internal if_index */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c index 7403cebf0e2..c9895cb6617 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c @@ -3,7 +3,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -146,8 +146,8 @@ static int if_solaris_ipv6_open(void) i += sizeof (*lifreq)) { lifreq = (struct lifreq *)((caddr_t)lifconf.lifc_buf + i); - strncpy (lifquery.lifr_name, lifreq->lifr_name, - sizeof (lifquery.lifr_name)); + pmix_strncpy (lifquery.lifr_name, lifreq->lifr_name, + sizeof (lifquery.lifr_name)-1); /* lookup kernel index */ error = ioctl (sd, SIOCGLIFINDEX, &lifquery); @@ -190,7 +190,7 @@ static int if_solaris_ipv6_open(void) } intf->af_family = AF_INET6; - strncpy (intf->if_name, lifreq->lifr_name, IF_NAMESIZE); + pmix_strncpy (intf->if_name, lifreq->lifr_name, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list)+1; memcpy(&intf->if_addr, my_addr, sizeof (*my_addr)); intf->if_mask = 64; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pinstalldirs/config/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pinstalldirs/config/Makefile.am index d05743fb5f6..7a1f9c9c3de 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pinstalldirs/config/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pinstalldirs/config/Makefile.am @@ -4,7 +4,7 @@ # Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -19,4 +19,4 @@ libmca_pinstalldirs_config_la_SOURCES = \ # This file is generated; we do not want to include it in the tarball nodist_libmca_pinstalldirs_config_la_SOURCES = \ - install_dirs.h + pinstall_dirs.h diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/base/plog_base_stubs.c b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/base/plog_base_stubs.c index 226db25b275..221ec775f87 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/base/plog_base_stubs.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/base/plog_base_stubs.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -109,8 +109,8 @@ pmix_status_t pmix_plog_base_log(const pmix_proc_t *source, * channel that can successfully handle this request, * and any channel directives */ for (n=0; n < ndirs; n++) { - if (0 == strncmp(directives[n].key, PMIX_LOG_ONCE, PMIX_MAX_KEYLEN)) { - logonce = true; + if (PMIX_CHECK_KEY(&directives[n], PMIX_LOG_ONCE)) { + logonce = PMIX_INFO_TRUE(&directives[n]); break; } } @@ -237,14 +237,10 @@ pmix_status_t pmix_plog_base_log(const pmix_proc_t *source, rc = mycount->status; // save the status as it could change when the lock is released if (0 == mycount->nreqs) { - /* execute their callback */ - if (NULL != mycount->cbfunc) { - mycount->cbfunc(mycount->status, mycount->cbdata); - } PMIX_RELEASE_THREAD(&mycount->lock); PMIX_RELEASE(mycount); PMIX_RELEASE_THREAD(&pmix_plog_globals.lock); - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } PMIX_RELEASE_THREAD(&mycount->lock); PMIX_RELEASE_THREAD(&pmix_plog_globals.lock); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/default/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/default/Makefile.am index aa141f9d8ff..369a06269f3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/default/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/default/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -40,6 +40,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_plog_default_la_SOURCES = $(sources) mca_plog_default_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_plog_default_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_plog_default_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/Makefile.am index 497dfaaf1a7..0cdd43d60cb 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -40,6 +40,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_plog_stdfd_la_SOURCES = $(sources) mca_plog_stdfd_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_plog_stdfd_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_plog_stdfd_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/plog_stdfd.c b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/plog_stdfd.c index e6ed5a60ce3..2aceac179ad 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/plog_stdfd.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/plog_stdfd.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -90,7 +90,7 @@ static pmix_status_t mylog(const pmix_proc_t *source, /* check to see if there are any relevant directives */ for (n=0; n < ndirs; n++) { if (0 == strncmp(directives[n].key, PMIX_LOG_TIMESTAMP, PMIX_MAX_KEYLEN)) { - flags.timestamp = data[n].value.data.time; + flags.timestamp = directives[n].value.data.time; } else if (0 == strncmp(directives[n].key, PMIX_LOG_XML_OUTPUT, PMIX_MAX_KEYLEN)) { flags.xml = PMIX_INFO_TRUE(&directives[n]); } else if (0 == strncmp(directives[n].key, PMIX_LOG_TAG_OUTPUT, PMIX_MAX_KEYLEN)) { @@ -101,6 +101,9 @@ static pmix_status_t mylog(const pmix_proc_t *source, /* check to see if there are any stdfd entries */ rc = PMIX_ERR_TAKE_NEXT_OPTION; for (n=0; n < ndata; n++) { + if (PMIX_INFO_OP_IS_COMPLETE(&data[n])) { + continue; + } if (0 == strncmp(data[n].key, PMIX_LOG_STDERR, PMIX_MAX_KEYLEN)) { bo.bytes = data[n].value.data.string; bo.size = strlen(bo.bytes); @@ -117,6 +120,5 @@ static pmix_status_t mylog(const pmix_proc_t *source, rc = PMIX_SUCCESS; } } - return rc; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/syslog/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/syslog/Makefile.am index 7a09d28fac1..ba79c07fe73 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/syslog/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/syslog/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -40,6 +40,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_plog_syslog_la_SOURCES = $(sources) mca_plog_syslog_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_plog_syslog_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_plog_syslog_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/base.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/base.h index 9b4f58fdaf1..d832bf5478e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/base.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/base.h @@ -119,7 +119,7 @@ PMIX_EXPORT pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, size_t ninfo); PMIX_EXPORT pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *peer, char ***env); PMIX_EXPORT void pmix_pnet_base_child_finalized(pmix_proc_t *peer); -PMIX_EXPORT void pmix_pnet_base_local_app_finalized(pmix_nspace_t *nptr); +PMIX_EXPORT void pmix_pnet_base_local_app_finalized(pmix_namespace_t *nptr); PMIX_EXPORT void pmix_pnet_base_deregister_nspace(char *nspace); PMIX_EXPORT void pmix_pnet_base_collect_inventory(pmix_info_t directives[], size_t ndirs, pmix_inventory_cbfunc_t cbfunc, diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c index 22ea10829e3..447a8e1ca14 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2018 Research Organization for Information Science @@ -39,10 +39,11 @@ pmix_status_t pmix_pnet_base_allocate(char *nspace, pmix_list_t *ilist) { pmix_pnet_base_active_module_t *active; - pmix_status_t rc; - pmix_nspace_t *nptr, *ns; + pmix_status_t rc = PMIX_SUCCESS; + pmix_namespace_t *nptr, *ns; size_t n; char *nregex, *pregex; + char *params[2] = {"PMIX_MCA_", NULL}; if (!pmix_pnet_globals.initialized) { return PMIX_ERR_INIT; @@ -59,7 +60,7 @@ pmix_status_t pmix_pnet_base_allocate(char *nspace, nptr = NULL; /* find this nspace - note that it may not have * been registered yet */ - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, nspace)) { nptr = ns; break; @@ -67,30 +68,15 @@ pmix_status_t pmix_pnet_base_allocate(char *nspace, } if (NULL == nptr) { /* add it */ - nptr = PMIX_NEW(pmix_nspace_t); + nptr = PMIX_NEW(pmix_namespace_t); if (NULL == nptr) { return PMIX_ERR_NOMEM; } nptr->nspace = strdup(nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } - /* if the info param is NULL, then we make one pass thru the actives - * in case someone specified an allocation or collection of envars - * via MCA param */ - if (NULL == info) { - PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { - if (NULL != active->module->allocate) { - if (PMIX_SUCCESS == (rc = active->module->allocate(nptr, NULL, ilist))) { - break; - } - if (PMIX_ERR_TAKE_NEXT_OPTION != rc) { - /* true error */ - return rc; - } - } - } - } else { + if (NULL != info) { /* check for description of the node and proc maps */ nregex = NULL; pregex = NULL; @@ -131,7 +117,10 @@ pmix_status_t pmix_pnet_base_allocate(char *nspace, } } - return PMIX_SUCCESS; + /* add any local PMIx MCA params */ + rc = pmix_pnet_base_harvest_envars(params, NULL, ilist); + + return rc; } /* can only be called by a server */ @@ -141,7 +130,7 @@ pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, { pmix_pnet_base_active_module_t *active; pmix_status_t rc; - pmix_nspace_t *nptr, *ns; + pmix_namespace_t *nptr, *ns; if (!pmix_pnet_globals.initialized) { return PMIX_ERR_INIT; @@ -157,7 +146,7 @@ pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, /* find this proc's nspace object */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, nspace)) { nptr = ns; break; @@ -165,12 +154,12 @@ pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, } if (NULL == nptr) { /* add it */ - nptr = PMIX_NEW(pmix_nspace_t); + nptr = PMIX_NEW(pmix_namespace_t); if (NULL == nptr) { return PMIX_ERR_NOMEM; } nptr->nspace = strdup(nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { @@ -189,7 +178,7 @@ pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *proc, char ***env) { pmix_pnet_base_active_module_t *active; pmix_status_t rc; - pmix_nspace_t *nptr, *ns; + pmix_namespace_t *nptr, *ns; if (!pmix_pnet_globals.initialized) { return PMIX_ERR_INIT; @@ -202,7 +191,7 @@ pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *proc, char ***env) /* find this proc's nspace object */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, proc->nspace)) { nptr = ns; break; @@ -210,17 +199,18 @@ pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *proc, char ***env) } if (NULL == nptr) { /* add it */ - nptr = PMIX_NEW(pmix_nspace_t); + nptr = PMIX_NEW(pmix_namespace_t); if (NULL == nptr) { return PMIX_ERR_NOMEM; } nptr->nspace = strdup(proc->nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { if (NULL != active->module->setup_fork) { - if (PMIX_SUCCESS != (rc = active->module->setup_fork(nptr, proc, env))) { + rc = active->module->setup_fork(nptr, proc, env); + if (PMIX_SUCCESS != rc && PMIX_ERR_NOT_AVAILABLE != rc) { return rc; } } @@ -252,7 +242,7 @@ void pmix_pnet_base_child_finalized(pmix_proc_t *peer) return; } -void pmix_pnet_base_local_app_finalized(pmix_nspace_t *nptr) +void pmix_pnet_base_local_app_finalized(pmix_namespace_t *nptr) { pmix_pnet_base_active_module_t *active; @@ -277,7 +267,9 @@ void pmix_pnet_base_local_app_finalized(pmix_nspace_t *nptr) void pmix_pnet_base_deregister_nspace(char *nspace) { pmix_pnet_base_active_module_t *active; - pmix_nspace_t *nptr, *ns; + pmix_namespace_t *nptr, *ns; + pmix_pnet_job_t *job; + pmix_pnet_node_t *node; if (!pmix_pnet_globals.initialized) { return; @@ -290,7 +282,7 @@ void pmix_pnet_base_deregister_nspace(char *nspace) /* find this nspace object */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, nspace)) { nptr = ns; break; @@ -307,7 +299,24 @@ void pmix_pnet_base_deregister_nspace(char *nspace) } } - return; + PMIX_LIST_FOREACH(job, &pmix_pnet_globals.jobs, pmix_pnet_job_t) { + if (0 == strcmp(nspace, job->nspace)) { + pmix_list_remove_item(&pmix_pnet_globals.jobs, &job->super); + PMIX_RELEASE(job); + break; + } + } + + PMIX_LIST_FOREACH(node, &pmix_pnet_globals.nodes, pmix_pnet_node_t) { + pmix_pnet_local_procs_t *lp; + PMIX_LIST_FOREACH(lp, &node->local_jobs, pmix_pnet_local_procs_t) { + if (0 == strcmp(nspace, lp->nspace)) { + pmix_list_remove_item(&node->local_jobs, &lp->super); + PMIX_RELEASE(lp); + break; + } + } + } } static void cicbfunc(pmix_status_t status, @@ -560,6 +569,8 @@ pmix_status_t pmix_pnet_base_harvest_envars(char **incvars, char **excvars, } *string_key = '\0'; ++string_key; + pmix_output_verbose(5, pmix_pnet_base_framework.framework_output, + "pnet: adding envar %s", cs_env); PMIX_ENVAR_LOAD(&kv->value->data.envar, cs_env, string_key, ':'); pmix_list_append(ilist, &kv->super); free(cs_env); @@ -576,6 +587,8 @@ pmix_status_t pmix_pnet_base_harvest_envars(char **incvars, char **excvars, } PMIX_LIST_FOREACH_SAFE(kv, next, ilist, pmix_kval_t) { if (0 == strncmp(kv->value->data.envar.envar, excvars[j], len)) { + pmix_output_verbose(5, pmix_pnet_base_framework.framework_output, + "pnet: excluding envar %s", kv->value->data.envar.envar); pmix_list_remove_item(ilist, &kv->super); PMIX_RELEASE(kv); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/Makefile.am index 1223b43eca4..fe01cde836e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -49,6 +49,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_pnet_opa_la_SOURCES = $(component_sources) mca_pnet_opa_la_LIBADD = $(pnet_opa_LIBS) mca_pnet_opa_la_LDFLAGS = -module -avoid-version $(pnet_opa_LDFLAGS) +if NEED_LIBPMIX +mca_pnet_opa_la_LIBADD += $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_pnet_opa_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/configure.m4 b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/configure.m4 index d822ffaf74e..f613cba102d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/configure.m4 +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/configure.m4 @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -46,7 +46,7 @@ AC_DEFUN([MCA_pmix_pnet_opa_CONFIG],[ pmix_check_opamgt_dir= AC_MSG_CHECKING([if opamgt requested]) - AS_IF([test "$with_opamgt" == "no"], + AS_IF([test "$with_opamgt" = "no"], [AC_MSG_RESULT([no]) pmix_check_opamgt_happy=no], [AC_MSG_RESULT([yes]) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c index 2bddd22f18b..712b1644219 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * * $COPYRIGHT$ @@ -52,18 +52,18 @@ static pmix_status_t opa_init(void); static void opa_finalize(void); -static pmix_status_t allocate(pmix_nspace_t *nptr, +static pmix_status_t allocate(pmix_namespace_t *nptr, pmix_info_t *info, pmix_list_t *ilist); -static pmix_status_t setup_local_network(pmix_nspace_t *nptr, +static pmix_status_t setup_local_network(pmix_namespace_t *nptr, pmix_info_t info[], size_t ninfo); -static pmix_status_t setup_fork(pmix_nspace_t *nptr, +static pmix_status_t setup_fork(pmix_namespace_t *nptr, const pmix_proc_t *proc, char ***env); static void child_finalized(pmix_proc_t *peer); -static void local_app_finalized(pmix_nspace_t *nptr); -static void deregister_nspace(pmix_nspace_t *nptr); +static void local_app_finalized(pmix_namespace_t *nptr); +static void deregister_nspace(pmix_namespace_t *nptr); static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, pmix_inventory_cbfunc_t cbfunc, void *cbdata); static pmix_status_t deliver_inventory(pmix_info_t info[], size_t ninfo, @@ -229,7 +229,7 @@ static char* transports_print(uint64_t *unique_key) /* NOTE: if there is any binary data to be transferred, then * this function MUST pack it for transport as the host will * not know how to do so */ -static pmix_status_t allocate(pmix_nspace_t *nptr, +static pmix_status_t allocate(pmix_namespace_t *nptr, pmix_info_t *info, pmix_list_t *ilist) { @@ -251,16 +251,19 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, return PMIX_ERR_TAKE_NEXT_OPTION; } - if (0 == strncmp(info->key, PMIX_SETUP_APP_ENVARS, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(info, PMIX_SETUP_APP_ENVARS)) { envars = PMIX_INFO_TRUE(info); - } else if (0 == strncmp(info->key, PMIX_SETUP_APP_ALL, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(info, PMIX_SETUP_APP_ALL)) { envars = PMIX_INFO_TRUE(info); seckeys = PMIX_INFO_TRUE(info); - } else if (0 == strncmp(info->key, PMIX_SETUP_APP_NONENVARS, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(info, PMIX_SETUP_APP_NONENVARS) || + PMIX_CHECK_KEY(info, PMIX_ALLOC_NETWORK_SEC_KEY)) { seckeys = PMIX_INFO_TRUE(info); } if (seckeys) { + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet: opa providing seckeys"); /* put the number here - or else create an appropriate string. this just needs to * eventually be a string variable */ @@ -311,6 +314,10 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, } if (envars) { + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet: opa harvesting envars %s excluding %s", + (NULL == mca_pnet_opa_component.incparms) ? "NONE" : mca_pnet_opa_component.incparms, + (NULL == mca_pnet_opa_component.excparms) ? "NONE" : mca_pnet_opa_component.excparms); /* harvest envars to pass along */ if (NULL != mca_pnet_opa_component.include) { rc = pmix_pnet_base_harvest_envars(mca_pnet_opa_component.include, @@ -327,7 +334,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, return PMIX_ERR_TAKE_NEXT_OPTION; } -static pmix_status_t setup_local_network(pmix_nspace_t *nptr, +static pmix_status_t setup_local_network(pmix_namespace_t *nptr, pmix_info_t info[], size_t ninfo) { @@ -335,6 +342,9 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, pmix_kval_t *kv; + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet: opa setup_local_network"); + if (NULL != info) { for (n=0; n < ninfo; n++) { if (0 == strncmp(info[n].key, PMIX_PNET_OPA_BLOB, PMIX_MAX_KEYLEN)) { @@ -353,6 +363,14 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, return PMIX_ERR_NOMEM; } pmix_value_xfer(kv->value, &info[n].value); + if (PMIX_ENVAR == kv->value->type) { + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet:opa:setup_local_network adding %s=%s to environment", + kv->value->data.envar.envar, kv->value->data.envar.value); + } else { + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet:opa:setup_local_network loading blob"); + } pmix_list_append(&nptr->setup_data, &kv->super); } } @@ -361,12 +379,15 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, return PMIX_SUCCESS; } -static pmix_status_t setup_fork(pmix_nspace_t *nptr, +static pmix_status_t setup_fork(pmix_namespace_t *nptr, const pmix_proc_t *proc, char ***env) { pmix_kval_t *kv, *next; + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet: opa setup fork"); + /* if there are any cached nspace prep blobs, execute them, * ensuring that we only do so once per nspace - note that * we don't expect to find any envars here, though we could @@ -387,14 +408,14 @@ static void child_finalized(pmix_proc_t *peer) "pnet:opa child finalized"); } -static void local_app_finalized(pmix_nspace_t *nptr) +static void local_app_finalized(pmix_namespace_t *nptr) { pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, "pnet:opa app finalized"); } -static void deregister_nspace(pmix_nspace_t *nptr) +static void deregister_nspace(pmix_namespace_t *nptr) { pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, "pnet:opa deregister nspace"); @@ -414,7 +435,7 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, pmix_buffer_t bucket, pbkt; bool found = false; pmix_byte_object_t pbo; - char nodename[PMIX_MAXHOSTNAMELEN], *foo; + char nodename[PMIX_MAXHOSTNAMELEN] = {0}, *foo; pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, "pnet:opa collect inventory"); @@ -422,7 +443,7 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, /* setup the bucket - we will pass the results as a blob */ PMIX_CONSTRUCT(&bucket, pmix_buffer_t); /* pack our node name */ - gethostname(nodename, sizeof(nodename)); + gethostname(nodename, sizeof(nodename)-1); foo = &nodename[0]; PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &bucket, &foo, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.h index 75d50d4888f..6340d9f225d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -20,6 +20,8 @@ BEGIN_C_DECLS typedef struct { pmix_pnet_base_component_t super; + char *incparms; + char *excparms; char **include; char **exclude; } pmix_pnet_opa_component_t; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa_component.c index 9a726c3f4ed..5ef1572239b 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -69,33 +69,30 @@ pmix_pnet_opa_component_t mca_pnet_opa_component = { .exclude = NULL }; -static char *includeparam; -static char *excludeparam; - static pmix_status_t component_register(void) { pmix_mca_base_component_t *component = &mca_pnet_opa_component.super.base; - includeparam = "HFI_*,PSM2_*"; + mca_pnet_opa_component.incparms = "HFI_*,PSM2_*"; (void)pmix_mca_base_component_var_register(component, "include_envars", "Comma-delimited list of envars to harvest (\'*\' and \'?\' supported)", PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, PMIX_INFO_LVL_2, PMIX_MCA_BASE_VAR_SCOPE_LOCAL, - &includeparam); - if (NULL != includeparam) { - mca_pnet_opa_component.include = pmix_argv_split(includeparam, ','); + &mca_pnet_opa_component.incparms); + if (NULL != mca_pnet_opa_component.incparms) { + mca_pnet_opa_component.include = pmix_argv_split(mca_pnet_opa_component.incparms, ','); } - excludeparam = NULL; + mca_pnet_opa_component.excparms = NULL; (void)pmix_mca_base_component_var_register(component, "exclude_envars", "Comma-delimited list of envars to exclude (\'*\' and \'?\' supported)", PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, PMIX_INFO_LVL_2, PMIX_MCA_BASE_VAR_SCOPE_LOCAL, - &excludeparam); - if (NULL != excludeparam) { - mca_pnet_opa_component.exclude = pmix_argv_split(excludeparam, ','); + &mca_pnet_opa_component.excparms); + if (NULL != mca_pnet_opa_component.excparms) { + mca_pnet_opa_component.exclude = pmix_argv_split(mca_pnet_opa_component.excparms, ','); } return PMIX_SUCCESS; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/pnet.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/pnet.h index 3313ca67820..fb5cc7d3635 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/pnet.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/pnet.h @@ -60,7 +60,7 @@ typedef void (*pmix_pnet_base_module_fini_fn_t)(void); * each other, environmental variables picked up at the login node * for forwarding to compute nodes, or allocation of static endpts */ -typedef pmix_status_t (*pmix_pnet_base_module_allocate_fn_t)(pmix_nspace_t *nptr, +typedef pmix_status_t (*pmix_pnet_base_module_allocate_fn_t)(pmix_namespace_t *nptr, pmix_info_t *info, pmix_list_t *ilist); @@ -68,7 +68,7 @@ typedef pmix_status_t (*pmix_pnet_base_module_allocate_fn_t)(pmix_nspace_t *nptr * Give the local network library an opportunity to setup address information * for the application by passing in the layout type and a regex describing * the layout */ -typedef pmix_status_t (*pmix_pnet_base_module_setup_local_net_fn_t)(pmix_nspace_t *nptr, +typedef pmix_status_t (*pmix_pnet_base_module_setup_local_net_fn_t)(pmix_namespace_t *nptr, pmix_info_t info[], size_t ninfo); @@ -76,7 +76,7 @@ typedef pmix_status_t (*pmix_pnet_base_module_setup_local_net_fn_t)(pmix_nspace_ * Give the local network library an opportunity to add any envars to the * environment of a local application process prior to fork/exec */ -typedef pmix_status_t (*pmix_pnet_base_module_setup_fork_fn_t)(pmix_nspace_t *nptr, +typedef pmix_status_t (*pmix_pnet_base_module_setup_fork_fn_t)(pmix_namespace_t *nptr, const pmix_proc_t *proc, char ***env); @@ -90,13 +90,13 @@ typedef void (*pmix_pnet_base_module_child_finalized_fn_t)(pmix_proc_t *peer); * Provide an opportunity for the local network library to cleanup after * all local clients for a given application have terminated */ -typedef void (*pmix_pnet_base_module_local_app_finalized_fn_t)(pmix_nspace_t *nptr); +typedef void (*pmix_pnet_base_module_local_app_finalized_fn_t)(pmix_namespace_t *nptr); /** * Provide an opportunity for the fabric components to cleanup any * resource allocations (e.g., static ports) they may have assigned */ -typedef void (*pmix_pnet_base_module_dregister_nspace_fn_t)(pmix_nspace_t *nptr); +typedef void (*pmix_pnet_base_module_dregister_nspace_fn_t)(pmix_namespace_t *nptr); /** @@ -166,7 +166,7 @@ typedef struct { /* define a few API versions of the functions - main difference is the - * string nspace parameter instead of a pointer to pmix_nspace_t. This + * string nspace parameter instead of a pointer to pmix_namespace_t. This * is done as an optimization to avoid having every component look for * that pointer */ typedef pmix_status_t (*pmix_pnet_base_API_allocate_fn_t)(char *nspace, diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/Makefile.am index 946d81c8fba..048f34b0b63 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -49,6 +49,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_pnet_tcp_la_SOURCES = $(component_sources) mca_pnet_tcp_la_LIBADD = $(pnet_tcp_LIBS) mca_pnet_tcp_la_LDFLAGS = -module -avoid-version $(pnet_tcp_LDFLAGS) +if NEED_LIBPMIX +mca_pnet_tcp_la_LIBADD += $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_pnet_tcp_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.c index 3f7a44868e1..81e823ad245 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.c @@ -1,5 +1,7 @@ /* - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -47,17 +49,17 @@ static pmix_status_t tcp_init(void); static void tcp_finalize(void); -static pmix_status_t allocate(pmix_nspace_t *nptr, +static pmix_status_t allocate(pmix_namespace_t *nptr, pmix_info_t *info, pmix_list_t *ilist); -static pmix_status_t setup_local_network(pmix_nspace_t *nptr, +static pmix_status_t setup_local_network(pmix_namespace_t *nptr, pmix_info_t info[], size_t ninfo); -static pmix_status_t setup_fork(pmix_nspace_t *nptr, +static pmix_status_t setup_fork(pmix_namespace_t *nptr, const pmix_proc_t *peer, char ***env); static void child_finalized(pmix_proc_t *peer); -static void local_app_finalized(pmix_nspace_t *nptr); -static void deregister_nspace(pmix_nspace_t *nptr); +static void local_app_finalized(pmix_namespace_t *nptr); +static void deregister_nspace(pmix_namespace_t *nptr); static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, pmix_inventory_cbfunc_t cbfunc, void *cbdata); static pmix_status_t deliver_inventory(pmix_info_t info[], size_t ninfo, @@ -102,7 +104,7 @@ typedef struct { } tcp_port_tracker_t; static pmix_list_t allocations, available; -static pmix_status_t process_request(pmix_nspace_t *nptr, +static pmix_status_t process_request(pmix_namespace_t *nptr, char *idkey, int ports_per_node, tcp_port_tracker_t *trk, pmix_list_t *ilist); @@ -295,7 +297,7 @@ static inline void generate_key(uint64_t* unique_key) { * NOTE: this implementation is offered as an example that can * undoubtedly be vastly improved/optimized */ -static pmix_status_t allocate(pmix_nspace_t *nptr, +static pmix_status_t allocate(pmix_namespace_t *nptr, pmix_info_t *info, pmix_list_t *ilist) { @@ -329,16 +331,20 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, /* check directives to see if a crypto key and/or * network resource allocations requested */ PMIX_CONSTRUCT(&mylist, pmix_list_t); - if (0 == strncmp(info->key, PMIX_SETUP_APP_ENVARS, PMIX_MAX_KEYLEN) || - 0 == strncmp(info->key, PMIX_SETUP_APP_ALL, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(info, PMIX_SETUP_APP_ENVARS) || + PMIX_CHECK_KEY(info, PMIX_SETUP_APP_ALL)) { if (NULL != mca_pnet_tcp_component.include) { + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet: tcp harvesting envars %s excluding %s", + (NULL == mca_pnet_tcp_component.incparms) ? "NONE" : mca_pnet_tcp_component.incparms, + (NULL == mca_pnet_tcp_component.excparms) ? "NONE" : mca_pnet_tcp_component.excparms); rc = pmix_pnet_base_harvest_envars(mca_pnet_tcp_component.include, mca_pnet_tcp_component.exclude, ilist); return rc; } return PMIX_SUCCESS; - } else if (0 != strncmp(info->key, PMIX_ALLOC_NETWORK, PMIX_MAX_KEYLEN)) { + } else if (!PMIX_CHECK_KEY(info, PMIX_ALLOC_NETWORK)) { /* not a network allocation request */ return PMIX_SUCCESS; } @@ -443,11 +449,13 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, } /* nope - they asked for something that we cannot do */ if (NULL == avail) { + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOT_AVAILABLE; } /* setup to track the assignment */ trk = PMIX_NEW(tcp_port_tracker_t); if (NULL == trk) { + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOMEM; } trk->nspace = strdup(nptr->nspace); @@ -459,6 +467,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, /* return the allocated ports */ pmix_list_remove_item(&allocations, &trk->super); PMIX_RELEASE(trk); + PMIX_LIST_DESTRUCT(&mylist); return rc; } allocated = true; @@ -481,11 +490,13 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, } /* nope - they asked for something that we cannot do */ if (NULL == avail) { + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOT_AVAILABLE; } /* setup to track the assignment */ trk = PMIX_NEW(tcp_port_tracker_t); if (NULL == trk) { + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOMEM; } trk->nspace = strdup(nptr->nspace); @@ -497,6 +508,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, /* return the allocated ports */ pmix_list_remove_item(&allocations, &trk->super); PMIX_RELEASE(trk); + PMIX_LIST_DESTRUCT(&mylist); return rc; } allocated = true; @@ -505,6 +517,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, "pnet:tcp:allocate unsupported type %s for nspace %s", type, nptr->nspace); + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_TAKE_NEXT_OPTION; } @@ -519,6 +532,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, /* setup to track the assignment */ trk = PMIX_NEW(tcp_port_tracker_t); if (NULL == trk) { + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOMEM; } trk->nspace = strdup(nptr->nspace); @@ -530,6 +544,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, /* return the allocated ports */ pmix_list_remove_item(&allocations, &trk->super); PMIX_RELEASE(trk); + PMIX_LIST_DESTRUCT(&mylist); return rc; } allocated = true; @@ -583,6 +598,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, trk = PMIX_NEW(tcp_port_tracker_t); if (NULL == trk) { pmix_argv_free(reqs); + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOMEM; } trk->nspace = strdup(nptr->nspace); @@ -594,6 +610,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, /* return the allocated ports */ pmix_list_remove_item(&allocations, &trk->super); PMIX_RELEASE(trk); + PMIX_LIST_DESTRUCT(&mylist); return rc; } allocated = true; @@ -604,6 +621,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, ports_per_node, nptr->nspace); if (0 == ports_per_node) { /* nothing to allocate */ + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_TAKE_NEXT_OPTION; } avail = (tcp_available_ports_t*)pmix_list_get_first(&available); @@ -611,6 +629,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, /* setup to track the assignment */ trk = PMIX_NEW(tcp_port_tracker_t); if (NULL == trk) { + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOMEM; } trk->nspace = strdup(nptr->nspace); @@ -630,26 +649,32 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, } if (!allocated) { /* nope - we cannot help */ + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_TAKE_NEXT_OPTION; } } if (seckey) { + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet:tcp: generate seckey"); generate_key(unique_key); kv = PMIX_NEW(pmix_kval_t); if (NULL == kv) { + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOMEM; } kv->key = strdup(PMIX_ALLOC_NETWORK_SEC_KEY); kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); if (NULL == kv->value) { PMIX_RELEASE(kv); + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOMEM; } kv->value->type = PMIX_BYTE_OBJECT; kv->value->data.bo.bytes = (char*)malloc(2 * sizeof(uint64_t)); if (NULL == kv->value->data.bo.bytes) { PMIX_RELEASE(kv); + PMIX_LIST_DESTRUCT(&mylist); return PMIX_ERR_NOMEM; } memcpy(kv->value->data.bo.bytes, unique_key, 2 * sizeof(uint64_t)); @@ -696,7 +721,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, /* upon receipt of the launch message, each daemon adds the * static address assignments to the job-level info cache * for that job */ -static pmix_status_t setup_local_network(pmix_nspace_t *nptr, +static pmix_status_t setup_local_network(pmix_namespace_t *nptr, pmix_info_t info[], size_t ninfo) { @@ -712,6 +737,7 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, "pnet:tcp:setup_local_network"); if (NULL != info) { + idkey = strdup("default"); for (n=0; n < ninfo; n++) { /* look for my key */ if (0 == strncmp(info[n].key, PMIX_TCP_SETUP_APP_KEY, PMIX_MAX_KEYLEN)) { @@ -724,7 +750,12 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &bkt, &nkvals, &cnt, PMIX_SIZE); /* setup the info array */ - PMIX_INFO_CREATE(jinfo, nkvals); + PMIX_INFO_CONSTRUCT(&stinfo); + pmix_strncpy(stinfo.key, idkey, PMIX_MAX_KEYLEN); + stinfo.value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(stinfo.value.data.darray, nkvals, PMIX_INFO); + jinfo = (pmix_info_t*)stinfo.value.data.darray->array; + /* cycle thru the blob and extract the kvals */ kv = PMIX_NEW(pmix_kval_t); cnt = 1; @@ -736,7 +767,7 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, "recvd KEY %s %s", kv->key, (PMIX_STRING == kv->value->type) ? kv->value->data.string : "NON-STRING"); /* xfer the value to the info */ - (void)strncpy(jinfo[m].key, kv->key, PMIX_MAX_KEYLEN); + pmix_strncpy(jinfo[m].key, kv->key, PMIX_MAX_KEYLEN); PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, &jinfo[m].value, kv->value); /* if this is the ID key, save it */ @@ -762,12 +793,6 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, PMIX_INFO_FREE(jinfo, nkvals); return PMIX_ERR_BAD_PARAM; } - /* the data gets stored as a pmix_data_array_t on the provided key */ - PMIX_INFO_CONSTRUCT(&stinfo); - (void)strncpy(stinfo.key, idkey, PMIX_MAX_KEYLEN); - stinfo.value.type = PMIX_DATA_ARRAY; - PMIX_DATA_ARRAY_CREATE(stinfo.value.data.darray, nkvals, PMIX_INFO); - stinfo.value.data.darray->array = jinfo; /* cache the info on the job */ PMIX_GDS_CACHE_JOB_INFO(rc, pmix_globals.mypeer, nptr, @@ -782,9 +807,11 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, return PMIX_SUCCESS; } -static pmix_status_t setup_fork(pmix_nspace_t *nptr, +static pmix_status_t setup_fork(pmix_namespace_t *nptr, const pmix_proc_t *peer, char ***env) { + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet:tcp:setup_fork"); return PMIX_SUCCESS; } @@ -801,7 +828,7 @@ static void child_finalized(pmix_proc_t *peer) * provides an opportunity for the local network to cleanup * any resources consumed locally by the clients of that job. * We don't have anything we need to do */ -static void local_app_finalized(pmix_nspace_t *nptr) +static void local_app_finalized(pmix_namespace_t *nptr) { pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, "pnet:tcp app finalized"); @@ -811,7 +838,7 @@ static void local_app_finalized(pmix_nspace_t *nptr) * PMix function, which in turn calls my TCP component to release the * assignments for that job. The addresses are marked as "available" * for reuse on the next job. */ -static void deregister_nspace(pmix_nspace_t *nptr) +static void deregister_nspace(pmix_namespace_t *nptr) { tcp_port_tracker_t *trk; @@ -840,8 +867,8 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, pmix_inventory_cbfunc_t cbfunc, void *cbdata) { pmix_inventory_rollup_t *cd = (pmix_inventory_rollup_t*)cbdata; - char *prefix, myhost[PMIX_MAXHOSTNAMELEN]; - char myconnhost[PMIX_MAXHOSTNAMELEN]; + char *prefix, myhost[PMIX_MAXHOSTNAMELEN] = {0}; + char myconnhost[PMIX_MAXHOSTNAMELEN] = {0}; char name[32], uri[2048]; struct sockaddr_storage my_ss; char *foo; @@ -852,10 +879,13 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, pmix_byte_object_t pbo; pmix_kval_t *kv; + pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, + "pnet:tcp:collect_inventory"); + /* setup the bucket - we will pass the results as a blob */ PMIX_CONSTRUCT(&bucket, pmix_buffer_t); /* add our hostname */ - gethostname(myhost, sizeof(myhost)); + gethostname(myhost, sizeof(myhost)-1); foo = &myhost[0]; PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &bucket, &foo, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { @@ -890,11 +920,11 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, if (AF_INET == my_ss.ss_family) { prefix = "tcp4://"; inet_ntop(AF_INET, &((struct sockaddr_in*) &my_ss)->sin_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else if (AF_INET6 == my_ss.ss_family) { prefix = "tcp6://"; inet_ntop(AF_INET6, &((struct sockaddr_in6*) &my_ss)->sin6_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else { continue; } @@ -949,7 +979,7 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, return PMIX_SUCCESS; } -static pmix_status_t process_request(pmix_nspace_t *nptr, +static pmix_status_t process_request(pmix_namespace_t *nptr, char *idkey, int ports_per_node, tcp_port_tracker_t *trk, pmix_list_t *ilist) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.h index 63ffd878712..54e0fe0316f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -22,6 +22,8 @@ typedef struct { pmix_pnet_base_component_t super; char *static_ports; char *default_request; + char *incparms; + char *excparms; char **include; char **exclude; } pmix_pnet_tcp_component_t; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp_component.c index 64dc93c2409..b313ab36076 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp_component.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,9 +60,6 @@ pmix_pnet_tcp_component_t mca_pnet_tcp_component = { .exclude = NULL }; -static char *includeparam; -static char *excludeparam; - static pmix_status_t component_register(void) { pmix_mca_base_component_t *component = &mca_pnet_tcp_component.super.base; @@ -86,26 +83,26 @@ static pmix_status_t component_register(void) PMIX_MCA_BASE_VAR_SCOPE_READONLY, &mca_pnet_tcp_component.default_request); - includeparam = NULL; + mca_pnet_tcp_component.incparms = NULL; (void)pmix_mca_base_component_var_register(component, "include_envars", "Comma-delimited list of envars to harvest (\'*\' and \'?\' supported)", PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, PMIX_INFO_LVL_2, PMIX_MCA_BASE_VAR_SCOPE_LOCAL, - &includeparam); - if (NULL != includeparam) { - mca_pnet_tcp_component.include = pmix_argv_split(includeparam, ','); + &mca_pnet_tcp_component.incparms); + if (NULL != mca_pnet_tcp_component.incparms) { + mca_pnet_tcp_component.include = pmix_argv_split(mca_pnet_tcp_component.incparms, ','); } - excludeparam = NULL; + mca_pnet_tcp_component.excparms = NULL; (void)pmix_mca_base_component_var_register(component, "exclude_envars", "Comma-delimited list of envars to exclude (\'*\' and \'?\' supported)", PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, PMIX_INFO_LVL_2, PMIX_MCA_BASE_VAR_SCOPE_LOCAL, - &excludeparam); - if (NULL != excludeparam) { - mca_pnet_tcp_component.exclude = pmix_argv_split(excludeparam, ','); + &mca_pnet_tcp_component.excparms); + if (NULL != mca_pnet_tcp_component.excparms) { + mca_pnet_tcp_component.exclude = pmix_argv_split(mca_pnet_tcp_component.excparms, ','); } return PMIX_SUCCESS; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am index 3faf68a32c2..b71000ef555 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -46,6 +46,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_pnet_test_la_SOURCES = $(component_sources) mca_pnet_test_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_pnet_test_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_pnet_test_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c index a8808b43e6e..830e0c02e59 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * * $COPYRIGHT$ @@ -46,18 +46,18 @@ static pmix_status_t test_init(void); static void test_finalize(void); -static pmix_status_t allocate(pmix_nspace_t *nptr, +static pmix_status_t allocate(pmix_namespace_t *nptr, pmix_info_t *info, pmix_list_t *ilist); -static pmix_status_t setup_local_network(pmix_nspace_t *nptr, +static pmix_status_t setup_local_network(pmix_namespace_t *nptr, pmix_info_t info[], size_t ninfo); -static pmix_status_t setup_fork(pmix_nspace_t *nptr, +static pmix_status_t setup_fork(pmix_namespace_t *nptr, const pmix_proc_t *proc, char ***env); static void child_finalized(pmix_proc_t *peer); -static void local_app_finalized(pmix_nspace_t *nptr); -static void deregister_nspace(pmix_nspace_t *nptr); +static void local_app_finalized(pmix_namespace_t *nptr); +static void deregister_nspace(pmix_namespace_t *nptr); static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, pmix_inventory_cbfunc_t cbfunc, void *cbdata); static pmix_status_t deliver_inventory(pmix_info_t info[], size_t ninfo, @@ -94,7 +94,7 @@ static void test_finalize(void) /* NOTE: if there is any binary data to be transferred, then * this function MUST pack it for transport as the host will * not know how to do so */ -static pmix_status_t allocate(pmix_nspace_t *nptr, +static pmix_status_t allocate(pmix_namespace_t *nptr, pmix_info_t *info, pmix_list_t *ilist) { @@ -283,7 +283,7 @@ static pmix_status_t allocate(pmix_nspace_t *nptr, return PMIX_SUCCESS; } -static pmix_status_t setup_local_network(pmix_nspace_t *nptr, +static pmix_status_t setup_local_network(pmix_namespace_t *nptr, pmix_info_t info[], size_t ninfo) { @@ -345,8 +345,13 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, cnt = 1; PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &bkt, &nkvals, &cnt, PMIX_SIZE); - /* setup the info array */ - PMIX_INFO_CREATE(jinfo, nkvals); + /* the data gets stored as a pmix_data_array_t on the provided key */ + PMIX_INFO_CONSTRUCT(&stinfo); + pmix_strncpy(stinfo.key, idkey, PMIX_MAX_KEYLEN); + stinfo.value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(stinfo.value.data.darray, nkvals, PMIX_INFO); + jinfo = (pmix_info_t*)stinfo.value.data.darray->array; + /* cycle thru the blob and extract the kvals */ kv = PMIX_NEW(pmix_kval_t); cnt = 1; @@ -358,7 +363,7 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, "recvd KEY %s %s", kv->key, (PMIX_STRING == kv->value->type) ? kv->value->data.string : "NON-STRING"); /* xfer the value to the info */ - (void)strncpy(jinfo[m].key, kv->key, PMIX_MAX_KEYLEN); + pmix_strncpy(jinfo[m].key, kv->key, PMIX_MAX_KEYLEN); PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, &jinfo[m].value, kv->value); /* if this is the ID key, save it */ @@ -384,14 +389,7 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, PMIX_INFO_FREE(jinfo, nkvals); return PMIX_ERR_BAD_PARAM; } - /* the data gets stored as a pmix_data_array_t on the provided key */ - PMIX_INFO_CONSTRUCT(&stinfo); - (void)strncpy(stinfo.key, idkey, PMIX_MAX_KEYLEN); - stinfo.value.type = PMIX_DATA_ARRAY; - PMIX_DATA_ARRAY_CREATE(stinfo.value.data.darray, nkvals, PMIX_INFO); - stinfo.value.data.darray->array = jinfo; - - /* cache the info on the job */ + /* cache the info on the job */ PMIX_GDS_CACHE_JOB_INFO(rc, pmix_globals.mypeer, nptr, &stinfo, 1); PMIX_INFO_DESTRUCT(&stinfo); @@ -404,7 +402,7 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr, return PMIX_SUCCESS; } -static pmix_status_t setup_fork(pmix_nspace_t *nptr, +static pmix_status_t setup_fork(pmix_namespace_t *nptr, const pmix_proc_t *proc, char ***env) { @@ -457,12 +455,12 @@ static void child_finalized(pmix_proc_t *peer) peer->nspace, peer->rank); } -static void local_app_finalized(pmix_nspace_t *nptr) +static void local_app_finalized(pmix_namespace_t *nptr) { pmix_output(0, "pnet:test NSPACE %s LOCALLY FINALIZED", nptr->nspace); } -static void deregister_nspace(pmix_nspace_t *nptr) +static void deregister_nspace(pmix_namespace_t *nptr) { pmix_output(0, "pnet:test DEREGISTER NSPACE %s", nptr->nspace); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/base/preg_base_frame.c b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/base/preg_base_frame.c index dbf551ea640..706c2bc8aae 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/base/preg_base_frame.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/base/preg_base_frame.c @@ -14,6 +14,7 @@ * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -98,6 +99,7 @@ static void rvcon(pmix_regex_value_t *p) p->prefix = NULL; p->suffix = NULL; p->num_digits = 0; + p->skip = false; PMIX_CONSTRUCT(&p->ranges, pmix_list_t); } static void rvdes(pmix_regex_value_t *p) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/Makefile.am index fa51393622f..607dcdb0c96 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_preg_native_la_SOURCES = $(component_sources) mca_preg_native_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_preg_native_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_preg_native_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c index 1a98766f8e7..0c9d6188a0d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 IBM Corporation. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -152,9 +152,22 @@ static pmix_status_t generate_node_regex(const char *input, suffix = NULL; numdigits = (int)strlen(&vptr[startnum]); } + /* is this value already on our list? */ found = false; PMIX_LIST_FOREACH(vreg, &vids, pmix_regex_value_t) { + // The regex must preserve ordering of the values. + // If we disqualified this entry in a previous check then exclude it + // from future checks as well. This will prevent a later entry from + // being 'pulled forward' accidentally. For example, given: + // "a28n01,a99n02,a28n02" + // Without this 'skip' the loop would have 'a28n02' combine with + // 'a28n01' jumping over the 'a99n02' entry, and thus not preserving + // the order of the list when the regex is unpacked. + if( vreg->skip ) { + continue; + } + if (0 < strlen(prefix) && NULL == vreg->prefix) { continue; } @@ -163,6 +176,7 @@ static pmix_status_t generate_node_regex(const char *input, } if (0 < strlen(prefix) && NULL != vreg->prefix && 0 != strcmp(prefix, vreg->prefix)) { + vreg->skip = true; continue; } if (NULL == suffix && NULL != vreg->suffix) { @@ -173,9 +187,11 @@ static pmix_status_t generate_node_regex(const char *input, } if (NULL != suffix && NULL != vreg->suffix && 0 != strcmp(suffix, vreg->suffix)) { + vreg->skip = true; continue; } if (numdigits != vreg->num_digits) { + vreg->skip = true; continue; } /* found a match - flag it */ @@ -522,7 +538,7 @@ static pmix_status_t resolve_peers(const char *nodename, /* scope is irrelevant as the info we seek must be local */ cb.scope = PMIX_SCOPE_UNDEF; /* let the proc point to the nspace */ - (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; cb.proc = &proc; @@ -565,7 +581,7 @@ static pmix_status_t resolve_peers(const char *nodename, goto complete; } for (j=0; j < np; j++) { - (void)strncpy(p[j].nspace, nspace, PMIX_MAX_NSLEN); + pmix_strncpy(p[j].nspace, nspace, PMIX_MAX_NSLEN); p[j].rank = strtoul(ptr[j], NULL, 10); } rc = PMIX_SUCCESS; @@ -619,7 +635,7 @@ static pmix_status_t resolve_nodes(const char *nspace, /* scope is irrelevant as the info we seek must be local */ cb.scope = PMIX_SCOPE_UNDEF; /* put the nspace in the proc field */ - (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); /* the info will be associated with PMIX_RANK_WILDCARD */ proc.rank = PMIX_RANK_WILDCARD; cb.proc = &proc; @@ -877,7 +893,7 @@ static pmix_status_t regex_parse_value_range(char *base, char *range, for (found = false, i = 0; i < len; ++i) { if (isdigit((int) range[i])) { if (!found) { - start = atoi(range + i); + start = strtol(range + i, NULL, 10); found = true; break; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/preg_types.h b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/preg_types.h index 9f1b8a8ae51..932d8e552c4 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/preg_types.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/preg_types.h @@ -13,6 +13,7 @@ * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,6 +52,7 @@ typedef struct { char *suffix; int num_digits; pmix_list_t ranges; + bool skip; } pmix_regex_value_t; PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_regex_value_t); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/Makefile.am new file mode 100644 index 00000000000..1dd3853eb2d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/Makefile.am @@ -0,0 +1,59 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2019 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +if MCA_BUILD_PSEC_DUMMY_HANDSHAKE + +headers = psec_dummy_handshake.h +sources = \ + psec_dummy_handshake_component.c \ + psec_dummy_handshake.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_psec_dummy_handshake_DSO +lib = +lib_sources = +component = mca_psec_dummy_handshake.la +component_sources = $(headers) $(sources) +else +lib = libmca_psec_dummy_handshake.la +lib_sources = $(headers) $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_psec_dummy_handshake_la_SOURCES = $(component_sources) +mca_psec_dummy_handshake_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psec_dummy_handshake_la_LIBADD = $(top_builddir)/src/libpmix.la +endif + +noinst_LTLIBRARIES = $(lib) +libmca_psec_dummy_handshake_la_SOURCES = $(lib_sources) +libmca_psec_dummy_handshake_la_LDFLAGS = -module -avoid-version + +endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.c new file mode 100644 index 00000000000..ae1f9b62e59 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.c @@ -0,0 +1,170 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#include + +#include "src/include/pmix_globals.h" +#include "src/util/error.h" +#include "src/util/output.h" + +#include "src/mca/psec/base/base.h" +#include "psec_dummy_handshake.h" + +#include "src/mca/ptl/base/base.h" + +#define PMIX_PSEC_DUMMY_HNDSHK_STR "PMIX_PSEC_DUMMY_HANDSHAKE_STRING" + +static pmix_status_t simple_init(void); +static void simple_finalize(void); +static pmix_status_t create_cred(struct pmix_peer_t *peer, + const pmix_info_t directives[], size_t ndirs, + pmix_info_t **info, size_t *ninfo, + pmix_byte_object_t *cred); +static pmix_status_t client_hndshk(int sd); +static pmix_status_t server_hndshk(int sd); + +pmix_psec_module_t pmix_dummy_handshake_module = { + .name = "dummy_handshake", + /** init/finalize */ + .init = simple_init, + .finalize = simple_finalize, + /** Client-side */ + .create_cred = create_cred, + .client_handshake = client_hndshk, + /** Server-side */ + .validate_cred = NULL, + .server_handshake = server_hndshk +}; + +static pmix_status_t simple_init(void) +{ + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple init"); + return PMIX_SUCCESS; +} + +static void simple_finalize(void) +{ + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple finalize"); +} + +static pmix_status_t create_cred(struct pmix_peer_t *peer, + const pmix_info_t directives[], size_t ndirs, + pmix_info_t **info, size_t *ninfo, + pmix_byte_object_t *cred) +{ + char mycred[] = "dymmy_cred"; + + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple create_cred"); + + /* ensure initialization */ + PMIX_BYTE_OBJECT_CONSTRUCT(cred); + + cred->bytes = strdup(mycred); + cred->size = strlen(mycred) + 1; + + return PMIX_SUCCESS; +} + +static pmix_status_t server_hndshk(int sd) +{ + pmix_status_t rc, status = PMIX_SUCCESS; + char *hndshk_msg = NULL; + size_t size; + + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple server_hndshk"); + + asprintf(&hndshk_msg, "%s", PMIX_PSEC_DUMMY_HNDSHK_STR); + size = strlen(hndshk_msg); + + /* send size of handshake message */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(sd, (char*)&size, + sizeof(size)))) { + goto exit; + } + /* send handshake message */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(sd, hndshk_msg, + size))) { + goto exit; + } + /* recv hadshake status from client */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_recv_blocking(sd, (char*)&status, + sizeof(status)))) { + goto exit; + } + rc = status; + pmix_output(0, "[%s:%d] psec handshake status %d recv from client", + __FILE__, __LINE__, status); + +exit: + if (NULL != hndshk_msg) { + free(hndshk_msg); + } + + return rc; +} + +static pmix_status_t client_hndshk(int sd) +{ + char *hndshk_msg = NULL; + size_t size; + pmix_status_t rc, status = PMIX_SUCCESS; + + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple client_hndshk"); + + /* recv size of handshake message */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_recv_blocking(sd, (char*)&size, + sizeof(size_t)))) { + return rc; + } + hndshk_msg = (char*)malloc(size); + /* recv handshake message */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_recv_blocking(sd, (char*)hndshk_msg, + size))) { + free(hndshk_msg); + return rc; + } + /* verifying handshake data */ + if (size != strlen(PMIX_PSEC_DUMMY_HNDSHK_STR)) { + rc = PMIX_ERR_HANDSHAKE_FAILED; + goto exit; + } + if (0 != strncmp(hndshk_msg, PMIX_PSEC_DUMMY_HNDSHK_STR, size)) { + rc = PMIX_ERR_HANDSHAKE_FAILED; + goto exit; + } + + /* send hadshake status to the server */ + status = PMIX_SUCCESS; + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(sd, (char*)&status, + sizeof(status)))) { + goto exit; + } + pmix_output(0, "[%s:%d] psec handshake status %d sent to server", + __FILE__, __LINE__, status); +exit: + if (NULL != hndshk_msg) { + free(hndshk_msg); + } + return rc; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.h b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.h new file mode 100644 index 00000000000..74cc3632213 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.h @@ -0,0 +1,29 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SIMPLE_H +#define PMIX_SIMPLE_H + +#include + + +#include "src/mca/psec/psec.h" + +BEGIN_C_DECLS + +/* the component must be visible data for the linker to find it */ +PMIX_EXPORT extern pmix_psec_base_component_t mca_psec_dummy_handshake_component; +extern pmix_psec_module_t pmix_dummy_handshake_module; + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake_component.c new file mode 100644 index 00000000000..53fb13b6fed --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake_component.c @@ -0,0 +1,73 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include "pmix_common.h" + +#include "src/mca/base/pmix_mca_base_var.h" +#include "src/mca/psec/psec.h" +#include "psec_dummy_handshake.h" + +static pmix_status_t component_open(void); +static pmix_status_t component_close(void); +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); +static pmix_psec_module_t* assign_module(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +pmix_psec_base_component_t mca_psec_dummy_handshake_component = { + .base = { + PMIX_PSEC_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "dummy_handshake", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_query_component = component_query, + }, + .data = { + /* The component is checkpoint ready */ + PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + .assign_module = assign_module +}; + +static int component_open(void) +{ + return PMIX_SUCCESS; +} + +static int component_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 100; + *module = (pmix_mca_base_module_t *)&pmix_dummy_handshake_module; + return PMIX_SUCCESS; +} + + +static int component_close(void) +{ + return PMIX_SUCCESS; +} + +static pmix_psec_module_t* assign_module(void) +{ + return &pmix_dummy_handshake_module; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/munge/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/munge/Makefile.am index 5f01461190c..79756320d6a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/munge/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/munge/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -47,6 +47,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_psec_munge_la_SOURCES = $(component_sources) mca_psec_munge_la_LDFLAGS = -module -avoid-version $(psec_munge_LDFLAGS) mca_psec_munge_la_LIBADD = $(psec_munge_LIBS) +if NEED_LIBPMIX +mca_psec_munge_la_LIBADD += $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_psec_munge_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/Makefile.am index 9381d8ad60f..b1086a2aac2 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_psec_native_la_SOURCES = $(component_sources) mca_psec_native_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psec_native_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_psec_native_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c index 1af787399a5..60af0f7af1a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -25,7 +25,7 @@ #include "src/util/error.h" #include "src/util/output.h" -#include "src/mca/psec/psec.h" +#include "src/mca/psec/base/base.h" #include "psec_native.h" static pmix_status_t native_init(void); @@ -49,14 +49,14 @@ pmix_psec_module_t pmix_native_module = { static pmix_status_t native_init(void) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: native init"); return PMIX_SUCCESS; } static void native_finalize(void) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: native finalize"); } @@ -167,7 +167,7 @@ static pmix_status_t validate_cred(struct pmix_peer_t *peer, size_t n, m; uint32_t u32; - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: native validate_cred %s", (NULL == cred) ? "NULL" : "NON-NULL"); @@ -175,10 +175,10 @@ static pmix_status_t validate_cred(struct pmix_peer_t *peer, /* usock protocol - get the remote side's uid/gid */ #if defined(SO_PEERCRED) && (defined(HAVE_STRUCT_UCRED_UID) || defined(HAVE_STRUCT_UCRED_CR_UID)) /* Ignore received 'cred' and validate ucred for socket instead. */ - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec:native checking getsockopt on socket %d for peer credentials", pr->sd); if (getsockopt(pr->sd, SOL_SOCKET, SO_PEERCRED, &ucred, &crlen) < 0) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: getsockopt SO_PEERCRED failed: %s", strerror (pmix_socket_errno)); return PMIX_ERR_INVALID_CRED; @@ -192,10 +192,10 @@ static pmix_status_t validate_cred(struct pmix_peer_t *peer, #endif #elif defined(HAVE_GETPEEREID) - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec:native checking getpeereid on socket %d for peer credentials", pr->sd); if (0 != getpeereid(pr->sd, &euid, &egid)) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: getsockopt getpeereid failed: %s", strerror (pmix_socket_errno)); return PMIX_ERR_INVALID_CRED; @@ -255,14 +255,14 @@ static pmix_status_t validate_cred(struct pmix_peer_t *peer, /* check uid */ if (euid != pr->info->uid) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: socket cred contains invalid uid %u", euid); return PMIX_ERR_INVALID_CRED; } /* check gid */ if (egid != pr->info->gid) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: socket cred contains invalid gid %u", egid); return PMIX_ERR_INVALID_CRED; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/none/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/none/Makefile.am index 74236996375..cde03ba502f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/none/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/none/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_psec_none_la_SOURCES = $(component_sources) mca_psec_none_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psec_none_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_psec_none_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/psec.h b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/psec.h index 4057681f6f6..10c31e9bfa3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/psec.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/psec.h @@ -1,10 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -155,23 +156,12 @@ PMIX_EXPORT pmix_psec_module_t* pmix_psec_base_assign_module(const char *options pmix_output_verbose(2, pmix_globals.debug_output, \ "credential validated"); \ } \ - /* send them the result */ \ - if (PMIX_SUCCESS != (_r = pmix_ptl_base_send_blocking((p)->sd, (char*)&(_r), sizeof(int)))) { \ - PMIX_ERROR_LOG(_r); \ - } \ (r) = _r; \ } else if (NULL != (p)->nptr->compat.psec->server_handshake) { \ - /* execute the handshake if the security mode calls for it */ \ + /* request the handshake if the security mode calls for it */ \ pmix_output_verbose(2, pmix_globals.debug_output, \ - "executing handshake"); \ + "requesting handshake"); \ _r = PMIX_ERR_READY_FOR_HANDSHAKE; \ - if (PMIX_SUCCESS != (_r = pmix_ptl_base_send_blocking((p)->sd, (char*)&(_r), sizeof(int)))) { \ - PMIX_ERROR_LOG(_r); \ - } else { \ - if (PMIX_SUCCESS != (_r = p->nptr->compat.psec->server_handshake((p)->sd))) { \ - PMIX_ERROR_LOG(_r); \ - } \ - } \ (r) = _r; \ } else { \ /* this is not allowed */ \ @@ -179,6 +169,21 @@ PMIX_EXPORT pmix_psec_module_t* pmix_psec_base_assign_module(const char *options } \ } while(0) + +#define PMIX_PSEC_SERVER_HANDSHAKE_IFNEED(r, p, d, nd, in, nin, c) \ + if(PMIX_ERR_READY_FOR_HANDSHAKE == r) { \ + int _r; \ + /* execute the handshake if the security mode calls for it */ \ + pmix_output_verbose(2, pmix_globals.debug_output, \ + "executing handshake"); \ + if (PMIX_SUCCESS != (_r = p->nptr->compat.psec->server_handshake((p)->sd))) { \ + PMIX_ERROR_LOG(_r); \ + } \ + /* Update the reply status */ \ + (r) = _r; \ + } + + /**** COMPONENT STRUCTURE DEFINITION ****/ /* define a component-level API for initializing the component */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/base/psensor_base_stubs.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/base/psensor_base_stubs.c index c24b57d6986..b959372fe02 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/base/psensor_base_stubs.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/base/psensor_base_stubs.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -24,6 +24,7 @@ pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t erro { pmix_psensor_active_module_t *mod; pmix_status_t rc; + bool didit = false; pmix_output_verbose(5, pmix_psensor_base_framework.framework_output, "%s:%d sensor:base: starting sensors", @@ -36,9 +37,17 @@ pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t erro if (PMIX_SUCCESS != rc && PMIX_ERR_TAKE_NEXT_OPTION != rc) { return rc; } + didit = true; } } + /* if none of the components could do it, then report + * not supported upwards so the server knows to ask + * the host to try */ + if (!didit) { + return PMIX_ERR_NOT_SUPPORTED; + } + return PMIX_SUCCESS; } @@ -46,7 +55,7 @@ pmix_status_t pmix_psensor_base_stop(pmix_peer_t *requestor, char *id) { pmix_psensor_active_module_t *mod; - pmix_status_t rc; + pmix_status_t rc, ret = PMIX_SUCCESS; pmix_output_verbose(5, pmix_psensor_base_framework.framework_output, "%s:%d sensor:base: stopping sensors", @@ -57,10 +66,14 @@ pmix_status_t pmix_psensor_base_stop(pmix_peer_t *requestor, if (NULL != mod->module->stop) { rc = mod->module->stop(requestor, id); if (PMIX_SUCCESS != rc && PMIX_ERR_TAKE_NEXT_OPTION != rc) { - return rc; + if (PMIX_SUCCESS == ret) { + ret = rc; + } + /* need to continue to ensure that all + * sensors have been stopped */ } } } - return PMIX_SUCCESS; + return ret; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/Makefile.am index 30dce46e38e..638fcd6a32a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/Makefile.am @@ -1,6 +1,6 @@ # # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -31,6 +31,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_psensor_file_la_SOURCES = $(sources) mca_psensor_file_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psensor_file_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_psensor_file_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/psensor_file.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/psensor_file.c index ab4f9ce3f02..3a050823c2c 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/psensor_file.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/psensor_file.c @@ -6,7 +6,7 @@ * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. * - * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -258,7 +258,9 @@ static pmix_status_t stop(pmix_peer_t *requestor, char *id) cd = PMIX_NEW(file_caddy_t); PMIX_RETAIN(requestor); cd->requestor = requestor; - cd->id = strdup(id); + if (NULL != id) { + cd->id = strdup(id); + } /* need to push into our event base to add this to our trackers */ pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, @@ -343,7 +345,7 @@ static void file_sample(int sd, short args, void *cbdata) /* stop monitoring this client */ pmix_list_remove_item(&mca_psensor_file_component.trackers, &ft->super); /* generate an event */ - (void)strncpy(source.nspace, ft->requestor->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(source.nspace, ft->requestor->info->pname.nspace, PMIX_MAX_NSLEN); source.rank = ft->requestor->info->pname.rank; rc = PMIx_Notify_event(PMIX_MONITOR_FILE_ALERT, &source, ft->range, ft->info, ft->ninfo, opcbfunc, ft); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/Makefile.am index df4fe0466a7..95b978415d3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/Makefile.am @@ -1,7 +1,7 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -32,6 +32,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_psensor_heartbeat_la_SOURCES = $(sources) mca_psensor_heartbeat_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psensor_heartbeat_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_psensor_heartbeat_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c index 7d363c030b4..81de240b659 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c @@ -3,7 +3,7 @@ * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. * - * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,7 +30,7 @@ #include "src/util/output.h" #include "src/util/show_help.h" #include "src/include/pmix_globals.h" -#include "src/mca/ptl/ptl.h" +#include "src/mca/ptl/base/base.h" #include "src/mca/psensor/base/base.h" #include "psensor_heartbeat.h" @@ -63,6 +63,7 @@ typedef struct { pmix_data_range_t range; pmix_info_t *info; size_t ninfo; + bool stopped; } pmix_heartbeat_trkr_t; static void ft_constructor(pmix_heartbeat_trkr_t *ft) @@ -79,6 +80,7 @@ static void ft_constructor(pmix_heartbeat_trkr_t *ft) ft->range = PMIX_RANGE_NAMESPACE; ft->info = NULL; ft->ninfo = 0; + ft->stopped = false; } static void ft_destructor(pmix_heartbeat_trkr_t *ft) { @@ -168,6 +170,7 @@ static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error { pmix_heartbeat_trkr_t *ft; size_t n; + pmix_ptl_posted_recv_t *rcv; PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] checking heartbeat monitoring for requestor %s:%d", @@ -202,6 +205,17 @@ static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error return PMIX_ERR_BAD_PARAM; } + /* if the recv hasn't been posted, so so now */ + if (!mca_psensor_heartbeat_component.recv_active) { + /* setup to receive heartbeats */ + rcv = PMIX_NEW(pmix_ptl_posted_recv_t); + rcv->tag = PMIX_PTL_TAG_HEARTBEAT; + rcv->cbfunc = pmix_psensor_heartbeat_recv_beats; + /* add it to the beginning of the list of recvs */ + pmix_list_prepend(&pmix_ptl_globals.posted_recvs, &rcv->super); + mca_psensor_heartbeat_component.recv_active = true; + } + /* need to push into our event base to add this to our trackers */ pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1, EV_WRITE, add_tracker, ft); @@ -239,9 +253,11 @@ static pmix_status_t heartbeat_stop(pmix_peer_t *requestor, char *id) cd = PMIX_NEW(heartbeat_caddy_t); PMIX_RETAIN(requestor); cd->requestor = requestor; - cd->id = strdup(id); + if (NULL != id) { + cd->id = strdup(id); + } - /* need to push into our event base to add this to our trackers */ + /* need to push into our event base to remove this from our trackers */ pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, EV_WRITE, del_tracker, cd); PMIX_POST_OBJECT(cd); @@ -254,7 +270,7 @@ static void opcbfunc(pmix_status_t status, void *cbdata) { pmix_heartbeat_trkr_t *ft = (pmix_heartbeat_trkr_t*)cbdata; - PMIX_RELEASE(ft); + PMIX_RELEASE(ft); // maintain accounting } /* this function automatically gets periodically called @@ -274,23 +290,25 @@ static void check_heartbeat(int fd, short dummy, void *cbdata) pmix_globals.myid.nspace, pmix_globals.myid.rank, ft->requestor->info->pname.nspace, ft->requestor->info->pname.rank)); - if (0 == ft->nbeats) { + if (0 == ft->nbeats && !ft->stopped) { /* no heartbeat recvd in last window */ PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sensor:check_heartbeat failed for proc %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, ft->requestor->info->pname.nspace, ft->requestor->info->pname.rank)); - /* stop monitoring this client */ - pmix_list_remove_item(&mca_psensor_heartbeat_component.trackers, &ft->super); /* generate an event */ - (void)strncpy(source.nspace, ft->requestor->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(source.nspace, ft->requestor->info->pname.nspace, PMIX_MAX_NSLEN); source.rank = ft->requestor->info->pname.rank; + /* ensure the tracker remains throughout the process */ + PMIX_RETAIN(ft); + /* mark that the process appears stopped so we don't + * continue to report it */ + ft->stopped = true; rc = PMIx_Notify_event(PMIX_MONITOR_HEARTBEAT_ALERT, &source, ft->range, ft->info, ft->ninfo, opcbfunc, ft); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } - return; } else { PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sensor:check_heartbeat detected %d beats for proc %s:%d", @@ -316,6 +334,8 @@ static void add_beat(int sd, short args, void *cbdata) if (ft->requestor == b->peer) { /* increment the beat count */ ++ft->nbeats; + /* ensure we know that the proc is alive */ + ft->stopped = false; break; } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h index 2f904b60359..2052b0d9c66 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h @@ -2,7 +2,7 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. * - * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,6 +28,7 @@ BEGIN_C_DECLS typedef struct { pmix_psensor_base_component_t super; + bool recv_active; pmix_list_t trackers; } pmix_psensor_heartbeat_component_t; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c index 7f6f18f2ff7..1f56177dee2 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,14 +50,9 @@ pmix_psensor_heartbeat_component_t mca_psensor_heartbeat_component = { */ static int heartbeat_open(void) { - pmix_status_t rc; - PMIX_CONSTRUCT(&mca_psensor_heartbeat_component.trackers, pmix_list_t); - /* setup to receive heartbeats */ - PMIX_PTL_RECV(rc, pmix_globals.mypeer, pmix_psensor_heartbeat_recv_beats, PMIX_PTL_TAG_HEARTBEAT); - - return rc; + return PMIX_SUCCESS; } @@ -74,12 +69,7 @@ static int heartbeat_query(pmix_mca_base_module_t **module, int *priority) static int heartbeat_close(void) { - pmix_status_t rc; - - /* cancel our persistent recv */ - PMIX_PTL_CANCEL(rc, pmix_globals.mypeer, PMIX_PTL_TAG_HEARTBEAT); - PMIX_LIST_DESTRUCT(&mca_psensor_heartbeat_component.trackers); - return rc; + return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/base/pshmem_base_frame.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/base/pshmem_base_frame.c index 4c38005da67..30296755f65 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/base/pshmem_base_frame.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/base/pshmem_base_frame.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -60,6 +60,9 @@ static pmix_status_t pmix_pshmem_close(void) static pmix_status_t pmix_pshmem_open(pmix_mca_base_open_flag_t flags) { + if (initialized) { + return PMIX_SUCCESS; + } /* initialize globals */ initialized = true; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/Makefile.am index 68ba424b719..1483ae5de01 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/Makefile.am @@ -2,6 +2,7 @@ # # Copyright (c) 2017 Mellanox Technologies, Inc. # All rights reserved. +# Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -36,6 +37,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_pshmem_mmap_la_SOURCES = $(component_sources) mca_pshmem_mmap_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_pshmem_mmap_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_pshmem_mmap_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c index a004ac27316..6529c1fa4a8 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c @@ -3,7 +3,7 @@ * All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,9 +82,9 @@ static int _mmap_segment_create(pmix_pshmem_seg_t *sm_seg, const char *file_name if (ENOSPC == rc) { rc = PMIX_ERR_OUT_OF_RESOURCE; goto out; - } else if ((ENOTSUP != rc) + } else if (EINVAL != rc && ENOTSUP != rc #ifdef EOPNOTSUPP - && (EOPNOTSUPP != rc) + && EOPNOTSUPP != rc #endif ){ rc = PMIX_ERROR; @@ -121,7 +121,7 @@ static int _mmap_segment_create(pmix_pshmem_seg_t *sm_seg, const char *file_name sm_seg->seg_cpid = my_pid; sm_seg->seg_size = size; sm_seg->seg_base_addr = (unsigned char *)seg_addr; - (void)strncpy(sm_seg->seg_name, file_name, PMIX_PATH_MAX - 1); + pmix_strncpy(sm_seg->seg_name, file_name, PMIX_PATH_MAX); out: if (-1 != sm_seg->seg_id) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_frame.c index 2f1fd4f6a07..2e6a101752e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -198,9 +198,11 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_ptl_sr_t, static void pccon(pmix_pending_connection_t *p) { + p->need_id = false; memset(p->nspace, 0, PMIX_MAX_NSLEN+1); p->info = NULL; p->ninfo = 0; + p->peer = NULL; p->bfrops = NULL; p->psec = NULL; p->gds = NULL; @@ -258,6 +260,8 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_listener_t, static void qcon(pmix_ptl_queue_t *p) { p->peer = NULL; + p->buf = NULL; + p->tag = UINT32_MAX; } static void qdes(pmix_ptl_queue_t *p) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index 5a59300533b..043a68e1388 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science @@ -45,6 +45,7 @@ #include "src/server/pmix_server_ops.h" #include "src/util/error.h" #include "src/util/show_help.h" +#include "src/mca/psensor/psensor.h" #include "src/mca/ptl/base/base.h" @@ -54,16 +55,22 @@ static void _notify_complete(pmix_status_t status, void *cbdata) PMIX_RELEASE(chain); } +static void lcfn(pmix_status_t status, void *cbdata) +{ + pmix_peer_t *peer = (pmix_peer_t*)cbdata; + PMIX_RELEASE(peer); +} + void pmix_ptl_base_lost_connection(pmix_peer_t *peer, pmix_status_t err) { - pmix_server_trkr_t *trk; + pmix_server_trkr_t *trk, *tnxt; pmix_server_caddy_t *rinfo, *rnext; - pmix_regevents_info_t *reginfoptr, *regnext; - pmix_peer_events_info_t *pr, *pnext; pmix_rank_info_t *info, *pinfo; pmix_ptl_posted_recv_t *rcv; pmix_buffer_t buf; pmix_ptl_hdr_t hdr; + pmix_proc_t proc; + pmix_status_t rc; /* stop all events */ if (peer->recv_ev_active) { @@ -81,20 +88,17 @@ void pmix_ptl_base_lost_connection(pmix_peer_t *peer, pmix_status_t err) CLOSE_THE_SOCKET(peer->sd); if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && - !PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { + !PMIX_PROC_IS_TOOL(pmix_globals.mypeer)) { /* if I am a server, then we need to ensure that * we properly account for the loss of this client * from any local collectives in which it was * participating - note that the proc would not * have been added to any collective tracker until * after it successfully connected */ - PMIX_LIST_FOREACH(trk, &pmix_server_globals.collectives, pmix_server_trkr_t) { + PMIX_LIST_FOREACH_SAFE(trk, tnxt, &pmix_server_globals.collectives, pmix_server_trkr_t) { /* see if this proc is participating in this tracker */ PMIX_LIST_FOREACH_SAFE(rinfo, rnext, &trk->local_cbs, pmix_server_caddy_t) { - if (0 != strncmp(rinfo->peer->info->pname.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (rinfo->peer->info->pname.rank != peer->info->pname.rank) { + if (!PMIX_CHECK_PROCID(&rinfo->peer->info->pname, &peer->info->pname)) { continue; } /* it is - adjust the count */ @@ -102,24 +106,65 @@ void pmix_ptl_base_lost_connection(pmix_peer_t *peer, pmix_status_t err) /* remove it from the list */ pmix_list_remove_item(&trk->local_cbs, &rinfo->super); PMIX_RELEASE(rinfo); - /* we need to let the other participants know that this - * proc has disappeared as otherwise the collective will never - * complete */ - if (PMIX_FENCENB_CMD == trk->type) { - if (NULL != trk->modexcbfunc) { - trk->modexcbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, NULL, 0, trk, NULL, NULL); - } - } else if (PMIX_CONNECTNB_CMD == trk->type) { - if (NULL != trk->op_cbfunc) { - trk->op_cbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, trk); - } - } else if (PMIX_DISCONNECTNB_CMD == trk->type) { - if (NULL != trk->op_cbfunc) { - trk->op_cbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, trk); + /* if the host has already been called for this tracker, + * then do nothing here - just wait for the host to return + * from the operation */ + if (trk->host_called) { + continue; + } + if (trk->def_complete && trk->nlocal == pmix_list_get_size(&trk->local_cbs)) { + /* if this is a local-only collective, then resolve it now */ + if (trk->local) { + /* everyone else has called in - we need to let them know + * that this proc has disappeared + * as otherwise the collective will never complete */ + if (PMIX_FENCENB_CMD == trk->type) { + if (NULL != trk->modexcbfunc) { + trk->modexcbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, NULL, 0, trk, NULL, NULL); + } + } else if (PMIX_CONNECTNB_CMD == trk->type) { + if (NULL != trk->op_cbfunc) { + trk->op_cbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, trk); + } + } else if (PMIX_DISCONNECTNB_CMD == trk->type) { + if (NULL != trk->op_cbfunc) { + trk->op_cbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, trk); + } + } + } else { + /* if the host has not been called, then we need to see if + * the collective is locally complete without this lost + * participant. If so, then we need to pass the call + * up to the host as otherwise the global collective will hang */ + if (PMIX_FENCENB_CMD == trk->type) { + trk->host_called = true; + rc = pmix_host_server.fence_nb(trk->pcs, trk->npcs, + trk->info, trk->ninfo, + NULL, 0, trk->modexcbfunc, trk); + if (PMIX_SUCCESS != rc) { + pmix_list_remove_item(&pmix_server_globals.collectives, &trk->super); + PMIX_RELEASE(trk); + } + } else if (PMIX_CONNECTNB_CMD == trk->type) { + trk->host_called = true; + rc = pmix_host_server.connect(trk->pcs, trk->npcs, trk->info, trk->ninfo, trk->op_cbfunc, trk); + if (PMIX_SUCCESS != rc) { + pmix_list_remove_item(&pmix_server_globals.collectives, &trk->super); + PMIX_RELEASE(trk); + } + } else if (PMIX_DISCONNECTNB_CMD == trk->type) { + trk->host_called = true; + rc = pmix_host_server.disconnect(trk->pcs, trk->npcs, trk->info, trk->ninfo, trk->op_cbfunc, trk); + if (PMIX_SUCCESS != rc) { + pmix_list_remove_item(&pmix_server_globals.collectives, &trk->super); + PMIX_RELEASE(trk); + } + } } } } } + /* remove this proc from the list of ranks for this nspace if it is * still there - we must check for multiple copies as there will be * one for each "clone" of this peer */ @@ -129,35 +174,53 @@ void pmix_ptl_base_lost_connection(pmix_peer_t *peer, pmix_status_t err) } } /* reduce the number of local procs */ - --peer->nptr->nlocalprocs; + if (0 < peer->nptr->nlocalprocs) { + --peer->nptr->nlocalprocs; + } /* remove this client from our array */ pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); - /* cleanup any remaining events they have registered for */ - PMIX_LIST_FOREACH_SAFE(reginfoptr, regnext, &pmix_server_globals.events, pmix_regevents_info_t) { - PMIX_LIST_FOREACH_SAFE(pr, pnext, ®infoptr->peers, pmix_peer_events_info_t) { - if (peer == pr->peer) { - pmix_list_remove_item(®infoptr->peers, &pr->super); - PMIX_RELEASE(pr); - if (0 == pmix_list_get_size(®infoptr->peers)) { - pmix_list_remove_item(&pmix_server_globals.events, ®infoptr->super); - PMIX_RELEASE(reginfoptr); - break; - } - } - } + + /* purge any notifications cached for this client */ + pmix_server_purge_events(peer, NULL); + + if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { + /* only connection I can lose is to my server, so mark it */ + pmix_globals.connected = false; + } else { + /* cleanup any sensors that are monitoring them */ + pmix_psensor.stop(peer, NULL); } - if (!peer->finalized && !PMIX_PROC_IS_TOOL(peer)) { + + if (!peer->finalized && !PMIX_PROC_IS_TOOL(peer) && !pmix_globals.mypeer->finalized) { /* if this peer already called finalize, then * we are just seeing their connection go away * when they terminate - so do not generate * an event. If not, then we do */ - PMIX_REPORT_EVENT(err, peer, PMIX_RANGE_NAMESPACE, _notify_complete); + PMIX_REPORT_EVENT(err, peer, PMIX_RANGE_PROC_LOCAL, _notify_complete); } /* now decrease the refcount - might actually free the object */ PMIX_RELEASE(peer->info); + /* be sure to let the host know that the tool or client + * is gone - otherwise, it won't know to cleanup the + * resources it allocated to it */ + if (NULL != pmix_host_server.client_finalized && !peer->finalized) { + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; + /* now tell the host server */ + rc = pmix_host_server.client_finalized(&proc, peer->info->server_object, + lcfn, peer); + if (PMIX_SUCCESS == rc) { + /* we will release the peer when the server calls us back */ + peer->finalized = true; + return; + } + } + /* mark the peer as "gone" since a release doesn't guarantee + * that the peer object doesn't persist */ + peer->finalized = true; /* Release peer info */ PMIX_RELEASE(peer); } else { @@ -187,7 +250,7 @@ void pmix_ptl_base_lost_connection(pmix_peer_t *peer, pmix_status_t err) PMIX_DESTRUCT(&buf); /* if I called finalize, then don't generate an event */ if (!pmix_globals.mypeer->finalized) { - PMIX_REPORT_EVENT(err, pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete); + PMIX_REPORT_EVENT(err, pmix_client_globals.myserver, PMIX_RANGE_PROC_LOCAL, _notify_complete); } } } @@ -339,7 +402,9 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) if (NULL != msg) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "ptl:base:send_handler SENDING MSG"); + "ptl:base:send_handler SENDING MSG TO %s:%d TAG %u", + peer->info->pname.nspace, peer->info->pname.rank, + ntohl(msg->hdr.tag)); if (PMIX_SUCCESS == (rc = send_msg(peer->sd, msg))) { // message is complete pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, @@ -570,10 +635,10 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) if (NULL == queue->peer || queue->peer->sd < 0 || NULL == queue->peer->info || NULL == queue->peer->nptr) { /* this peer has lost connection */ + if (NULL != queue->buf) { + PMIX_RELEASE(queue->buf); + } PMIX_RELEASE(queue); - /* ensure we post the object before another thread - * picks it back up */ - PMIX_POST_OBJECT(queue); return; } @@ -583,6 +648,12 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) (queue->peer)->info->pname.nspace, (queue->peer)->info->pname.rank, (queue->tag)); + if (NULL == queue->buf) { + /* nothing to send? */ + PMIX_RELEASE(queue); + return; + } + snd = PMIX_NEW(pmix_ptl_send_t); snd->hdr.pindex = htonl(pmix_globals.pindex); snd->hdr.tag = htonl(queue->tag); @@ -619,12 +690,19 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) /* acquire the object */ PMIX_ACQUIRE_OBJECT(ms); - if (ms->peer->sd < 0) { - /* this peer's socket has been closed */ + if (NULL == ms->peer || ms->peer->sd < 0 || + NULL == ms->peer->info || NULL == ms->peer->nptr) { + /* this peer has lost connection */ + if (NULL != ms->bfr) { + PMIX_RELEASE(ms->bfr); + } + PMIX_RELEASE(ms); + return; + } + + if (NULL == ms->bfr) { + /* nothing to send? */ PMIX_RELEASE(ms); - /* ensure we post the object before another thread - * picks it back up */ - PMIX_POST_OBJECT(NULL); return; } @@ -735,7 +813,9 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) /* if the tag in this message is above the dynamic marker, then * that is an error */ if (PMIX_PTL_TAG_DYNAMIC <= msg->hdr.tag) { - pmix_output(0, "UNEXPECTED MESSAGE tag = %d", msg->hdr.tag); + pmix_output(0, "UNEXPECTED MESSAGE tag = %d from source %s:%d", + msg->hdr.tag, msg->peer->info->pname.nspace, + msg->peer->info->pname.rank); PMIX_REPORT_EVENT(PMIX_ERROR, msg->peer, PMIX_RANGE_NAMESPACE, _notify_complete); PMIX_RELEASE(msg); return; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl.h b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl.h index 01f849b8055..d413a210043 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl.h @@ -143,11 +143,23 @@ typedef struct pmix_ptl_module_t pmix_ptl_module_t; /***** MACROS FOR EXECUTING PTL FUNCTIONS *****/ -#define PMIX_PTL_SEND_RECV(r, p, b, c, d) \ - (r) = (p)->nptr->compat.ptl->send_recv((struct pmix_peer_t*)(p), b, c, d) - -#define PMIX_PTL_SEND_ONEWAY(r, p, b, t) \ - (r) = (p)->nptr->compat.ptl->send((struct pmix_peer_t*)(p), b, t) +#define PMIX_PTL_SEND_RECV(r, p, b, c, d) \ + do { \ + if ((p)->finalized) { \ + (r) = PMIX_ERR_UNREACH; \ + } else { \ + (r) = (p)->nptr->compat.ptl->send_recv((struct pmix_peer_t*)(p), b, c, d); \ + } \ + } while(0) + +#define PMIX_PTL_SEND_ONEWAY(r, p, b, t) \ + do { \ + if ((p)->finalized) { \ + (r) = PMIX_ERR_UNREACH; \ + } else { \ + (r) = (p)->nptr->compat.ptl->send((struct pmix_peer_t*)(p), b, t); \ + } \ + } while(0) #define PMIX_PTL_RECV(r, p, c, t) \ (r) = (p)->nptr->compat.ptl->recv((struct pmix_peer_t*)(p), c, t) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl_types.h b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl_types.h index 0008bb48d39..0017c5b8134 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl_types.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/ptl_types.h @@ -63,15 +63,16 @@ struct pmix_ptl_module_t; /* define a process type */ typedef uint16_t pmix_proc_type_t; + #define PMIX_PROC_UNDEF 0x0000 -#define PMIX_PROC_CLIENT 0x0001 -#define PMIX_PROC_SERVER 0x0002 -#define PMIX_PROC_TOOL 0x0004 -#define PMIX_PROC_V1 0x0008 -#define PMIX_PROC_V20 0x0010 -#define PMIX_PROC_V21 0x0020 -#define PMIX_PROC_V3 0x0040 -#define PMIX_PROC_LAUNCHER_ACT 0x1000 +#define PMIX_PROC_CLIENT 0x0001 // simple client process +#define PMIX_PROC_SERVER 0x0002 // simple server process +#define PMIX_PROC_TOOL 0x0004 // simple tool +#define PMIX_PROC_V1 0x0008 // process is using PMIx v1 protocols +#define PMIX_PROC_V20 0x0010 // process is using PMIx v2.0 protocols +#define PMIX_PROC_V21 0x0020 // process is using PMIx v2.1 protocols +#define PMIX_PROC_V3 0x0040 // process is using PMIx v3 protocols +#define PMIX_PROC_LAUNCHER_ACT 0x1000 // process acting as launcher #define PMIX_PROC_LAUNCHER (PMIX_PROC_TOOL | PMIX_PROC_SERVER | PMIX_PROC_LAUNCHER_ACT) #define PMIX_PROC_CLIENT_TOOL_ACT 0x2000 #define PMIX_PROC_CLIENT_TOOL (PMIX_PROC_TOOL | PMIX_PROC_CLIENT | PMIX_PROC_CLIENT_TOOL_ACT) @@ -196,11 +197,14 @@ typedef struct { pmix_event_t ev; pmix_listener_protocol_t protocol; int sd; + bool need_id; + uint8_t flag; char nspace[PMIX_MAX_NSLEN+1]; pmix_info_t *info; size_t ninfo; pmix_status_t status; struct sockaddr_storage addr; + struct pmix_peer_t *peer; char *bfrops; char *psec; char *gds; @@ -236,9 +240,6 @@ PMIX_EXPORT extern int pmix_ptl_base_output; #define PMIX_ACTIVATE_POST_MSG(ms) \ do { \ - pmix_output_verbose(5, pmix_ptl_base_output, \ - "[%s:%d] post msg", \ - __FILE__, __LINE__); \ pmix_event_assign(&((ms)->ev), pmix_globals.evbase, -1, \ EV_WRITE, pmix_ptl_base_process_msg, (ms)); \ PMIX_POST_OBJECT(ms); \ @@ -259,37 +260,42 @@ PMIX_EXPORT extern int pmix_ptl_base_output; * t - tag to be sent to * b - buffer to be sent */ -#define PMIX_SERVER_QUEUE_REPLY(p, t, b) \ - do { \ - pmix_ptl_send_t *snd; \ - uint32_t nbytes; \ - pmix_output_verbose(5, pmix_ptl_base_output, \ +#define PMIX_SERVER_QUEUE_REPLY(r, p, t, b) \ + do { \ + pmix_ptl_send_t *snd; \ + uint32_t nbytes; \ + pmix_output_verbose(5, pmix_ptl_base_output, \ "[%s:%d] queue callback called: reply to %s:%d on tag %d size %d", \ - __FILE__, __LINE__, \ - (p)->info->pname.nspace, \ - (p)->info->pname.rank, (t), (int)(b)->bytes_used); \ - snd = PMIX_NEW(pmix_ptl_send_t); \ - snd->hdr.pindex = htonl(pmix_globals.pindex); \ - snd->hdr.tag = htonl(t); \ - nbytes = (b)->bytes_used; \ - snd->hdr.nbytes = htonl(nbytes); \ - snd->data = (b); \ - /* always start with the header */ \ - snd->sdptr = (char*)&snd->hdr; \ - snd->sdbytes = sizeof(pmix_ptl_hdr_t); \ - /* if there is no message on-deck, put this one there */ \ - if (NULL == (p)->send_msg) { \ - (p)->send_msg = snd; \ - } else { \ - /* add it to the queue */ \ - pmix_list_append(&(p)->send_queue, &snd->super); \ - } \ - /* ensure the send event is active */ \ - if (!(p)->send_ev_active && 0 <= (p)->sd) { \ - (p)->send_ev_active = true; \ - PMIX_POST_OBJECT(snd); \ - pmix_event_add(&(p)->send_event, 0); \ - } \ + __FILE__, __LINE__, \ + (p)->info->pname.nspace, \ + (p)->info->pname.rank, (t), (int)(b)->bytes_used); \ + if ((p)->finalized) { \ + (r) = PMIX_ERR_UNREACH; \ + } else { \ + snd = PMIX_NEW(pmix_ptl_send_t); \ + snd->hdr.pindex = htonl(pmix_globals.pindex); \ + snd->hdr.tag = htonl(t); \ + nbytes = (b)->bytes_used; \ + snd->hdr.nbytes = htonl(nbytes); \ + snd->data = (b); \ + /* always start with the header */ \ + snd->sdptr = (char*)&snd->hdr; \ + snd->sdbytes = sizeof(pmix_ptl_hdr_t); \ + /* if there is no message on-deck, put this one there */ \ + if (NULL == (p)->send_msg) { \ + (p)->send_msg = snd; \ + } else { \ + /* add it to the queue */ \ + pmix_list_append(&(p)->send_queue, &snd->super); \ + } \ + /* ensure the send event is active */ \ + if (!(p)->send_ev_active && 0 <= (p)->sd) { \ + (p)->send_ev_active = true; \ + PMIX_POST_OBJECT(snd); \ + pmix_event_add(&(p)->send_event, 0); \ + } \ + (r) = PMIX_SUCCESS; \ + } \ } while (0) #define CLOSE_THE_SOCKET(s) \ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/Makefile.am index 6788aba19c4..0a5b86bfdac 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_ptl_tcp_la_SOURCES = $(component_sources) mca_ptl_tcp_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_ptl_tcp_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_ptl_tcp_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c index 53765bdf123..e86a4126405 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -13,7 +13,8 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,6 +24,7 @@ */ #include +#include "src/include/pmix_globals.h" #ifdef HAVE_FCNTL_H #include @@ -42,9 +44,13 @@ #ifdef HAVE_SYS_STAT_H #include #endif +#ifdef HAVE_DIRENT_H #include +#endif +#ifdef HAVE_SYS_SYSCTL_H +#include +#endif -#include "src/include/pmix_globals.h" #include "src/include/pmix_socket_errno.h" #include "src/client/pmix_client_ops.h" #include "src/server/pmix_server_ops.h" @@ -53,6 +59,7 @@ #include "src/util/os_path.h" #include "src/util/show_help.h" #include "src/mca/bfrops/base/base.h" +#include "src/mca/gds/gds.h" #include "src/mca/ptl/base/base.h" #include "ptl_tcp.h" @@ -77,8 +84,8 @@ pmix_ptl_module_t pmix_ptl_tcp_module = { .connect_to_peer = connect_to_peer }; -static pmix_status_t recv_connect_ack(int sd); -static pmix_status_t send_connect_ack(int sd); +static pmix_status_t recv_connect_ack(int sd, uint8_t myflag); +static pmix_status_t send_connect_ack(int sd, uint8_t *myflag, pmix_info_t info[], size_t ninfo); static pmix_status_t init(void) @@ -109,10 +116,11 @@ static pmix_status_t parse_uri_file(char *filename, char **uri, char **nspace, pmix_rank_t *rank); -static pmix_status_t try_connect(char *uri, int *sd); +static pmix_status_t try_connect(char *uri, int *sd, pmix_info_t info[], size_t ninfo); static pmix_status_t df_search(char *dirname, char *prefix, + pmix_info_t info[], size_t ninfo, int *sd, char **nspace, - pmix_rank_t *rank); + pmix_rank_t *rank, char **uri); static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_info_t *info, size_t ninfo) @@ -120,14 +128,19 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, char *evar, **uri, *suri = NULL, *suri2 = NULL; char *filename, *nspace=NULL; pmix_rank_t rank = PMIX_RANK_WILDCARD; - char *p, *p2, *server_nspace = NULL; + char *p, *p2, *server_nspace = NULL, *rendfile = NULL; int sd, rc; size_t n; - char myhost[PMIX_MAXHOSTNAMELEN]; + char myhost[PMIX_MAXHOSTNAMELEN] = {0}; bool system_level = false; bool system_level_only = false; bool reconnect = false; - pid_t pid = 0; + pid_t pid = 0, mypid; + pmix_list_t ilist; + pmix_info_caddy_t *kv; + pmix_info_t *iptr = NULL, mypidinfo, mycmdlineinfo, launcher; + size_t niptr = 0; + pmix_kval_t *urikv = NULL; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp: connecting to server"); @@ -200,14 +213,16 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, ++p2; nspace = strdup(p); rank = strtoull(p2, NULL, 10); + suri = strdup(uri[1]); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:client attempt connect to %s", uri[1]); /* go ahead and try to connect */ - if (PMIX_SUCCESS != (rc = try_connect(uri[1], &sd))) { + if (PMIX_SUCCESS != (rc = try_connect(uri[1], &sd, info, ninfo))) { free(nspace); pmix_argv_free(uri); + free(suri); return rc; } pmix_argv_free(uri); @@ -218,16 +233,17 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, /* get here if we are a tool - check any provided directives * to see where they want us to connect to */ suri = NULL; + PMIX_CONSTRUCT(&ilist, pmix_list_t); if (NULL != info) { for (n=0; n < ninfo; n++) { - if (0 == strcmp(info[n].key, PMIX_CONNECT_TO_SYSTEM)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_CONNECT_TO_SYSTEM)) { system_level_only = PMIX_INFO_TRUE(&info[n]); - } else if (0 == strncmp(info[n].key, PMIX_CONNECT_SYSTEM_FIRST, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_CONNECT_SYSTEM_FIRST)) { /* try the system-level */ system_level = PMIX_INFO_TRUE(&info[n]); - } else if (0 == strncmp(info[n].key, PMIX_SERVER_PIDINFO, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SERVER_PIDINFO)) { pid = info[n].value.data.pid; - } else if (0 == strncmp(info[n].key, PMIX_SERVER_NSPACE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SERVER_NSPACE)) { if (NULL != server_nspace) { /* they included it more than once */ if (0 == strcmp(server_nspace, info[n].value.data.string)) { @@ -239,10 +255,13 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, if (NULL != suri) { free(suri); } + if (NULL != rendfile) { + free(rendfile); + } return PMIX_ERR_BAD_PARAM; } server_nspace = strdup(info[n].value.data.string); - } else if (0 == strncmp(info[n].key, PMIX_SERVER_URI, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SERVER_URI)) { if (NULL != suri) { /* they included it more than once */ if (0 == strcmp(suri, info[n].value.data.string)) { @@ -254,25 +273,148 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, if (NULL != server_nspace) { free(server_nspace); } + if (NULL != rendfile) { + free(rendfile); + } return PMIX_ERR_BAD_PARAM; } suri = strdup(info[n].value.data.string); - } else if (0 == strncmp(info[n].key, PMIX_CONNECT_RETRY_DELAY, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_CONNECT_RETRY_DELAY)) { mca_ptl_tcp_component.wait_to_connect = info[n].value.data.uint32; - } else if (0 == strncmp(info[n].key, PMIX_CONNECT_MAX_RETRIES, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_CONNECT_MAX_RETRIES)) { mca_ptl_tcp_component.max_retries = info[n].value.data.uint32; - } else if (0 == strncmp(info[n].key, PMIX_RECONNECT_SERVER, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_RECONNECT_SERVER)) { reconnect = true; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_LAUNCHER_RENDEZVOUS_FILE)) { + if (NULL != rendfile) { + free(rendfile); + } + rendfile = strdup(info[n].value.data.string); + } else { + /* need to pass this to server */ + kv = PMIX_NEW(pmix_info_caddy_t); + kv->info = &info[n]; + pmix_list_append(&ilist, &kv->super); + } + } + } + /* add our pid to the array */ + kv = PMIX_NEW(pmix_info_caddy_t); + mypid = getpid(); + PMIX_INFO_LOAD(&mypidinfo, PMIX_PROC_PID, &mypid, PMIX_PID); + kv->info = &mypidinfo; + pmix_list_append(&ilist, &kv->super); + + /* if I am a launcher, tell them so */ + if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { + kv = PMIX_NEW(pmix_info_caddy_t); + PMIX_INFO_LOAD(&launcher, PMIX_LAUNCHER, NULL, PMIX_BOOL); + kv->info = &launcher; + pmix_list_append(&ilist, &kv->super); + } + + /* add our cmd line to the array */ +#if PMIX_HAVE_APPLE + int mib[3], argmax, nargs, num; + size_t size; + char *procargs, *cp, *cptr; + char **stack = NULL; + + /* Get the maximum process arguments size. */ + mib[0] = CTL_KERN; + mib[1] = KERN_ARGMAX; + size = sizeof(argmax); + + if (sysctl(mib, 2, &argmax, &size, NULL, 0) == -1) { + fprintf(stderr, "sysctl() argmax failed\n"); + return -1; + } + + /* Allocate space for the arguments. */ + procargs = (char *)malloc(argmax); + if (procargs == NULL) + return -1; + + /* Make a sysctl() call to get the raw argument space of the process. */ + mib[0] = CTL_KERN; + mib[1] = KERN_PROCARGS2; + mib[2] = getpid(); + + size = (size_t)argmax; + + if (sysctl(mib, 3, procargs, &size, NULL, 0) == -1) { + fprintf(stderr, "Lacked permissions\n");; + return 0; + } + + memcpy(&nargs, procargs, sizeof(nargs)); + /* this points to the executable - skip over that to get the rest */ + cp = procargs + sizeof(nargs); + cp += strlen(cp); + /* this is the first argv */ + pmix_argv_append_nosize(&stack, cp); + /* skip any embedded NULLs */ + while (cp < &procargs[size] && '\0' == *cp) { + ++cp; + } + if (cp != &procargs[size]) { + /* from this point, we have the argv separated by NULLs - split them out */ + cptr = cp; + num = 0; + while (cp < &procargs[size] && num < nargs) { + if ('\0' == *cp) { + pmix_argv_append_nosize(&stack, cptr); + ++cp; // skip over the NULL + cptr = cp; + ++num; + } else { + ++cp; } } } + p = pmix_argv_join(stack, ' '); + pmix_argv_free(stack); + free(procargs); +#else + char tmp[512]; + FILE *fp; + + /* open the pid's info file */ + snprintf(tmp, 512, "/proc/%lu/cmdline", (unsigned long)mypid); + fp = fopen(tmp, "r"); + if (NULL != fp) { + /* read the cmd line */ + fgets(tmp, 512, fp); + fclose(fp); + p = strdup(tmp); + } +#endif + /* pass it along */ + kv = PMIX_NEW(pmix_info_caddy_t); + PMIX_INFO_LOAD(&mycmdlineinfo, PMIX_CMD_LINE, p, PMIX_STRING); + kv->info = &mycmdlineinfo; + pmix_list_append(&ilist, &kv->super); + free(p); + + /* if we need to pass anything, setup an array */ + if (0 < (niptr = pmix_list_get_size(&ilist))) { + PMIX_INFO_CREATE(iptr, niptr); + n = 0; + while (NULL != (kv = (pmix_info_caddy_t*)pmix_list_remove_first(&ilist))) { + PMIX_INFO_XFER(&iptr[n], kv->info); + PMIX_RELEASE(kv); + ++n; + } + } + PMIX_LIST_DESTRUCT(&ilist); + if (NULL == suri && !reconnect && NULL != mca_ptl_tcp_component.super.uri) { suri = strdup(mca_ptl_tcp_component.super.uri); } /* mark that we are using the V2 protocol */ pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2; - gethostname(myhost, sizeof(myhost)); + gethostname(myhost, sizeof(myhost)-1); /* if we were given a URI via MCA param, then look no further */ if (NULL != suri) { if (NULL != server_nspace) { @@ -288,6 +430,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, rc = parse_uri_file(&suri[5], &suri2, &nspace, &rank); if (PMIX_SUCCESS != rc) { free(suri); + if (NULL != rendfile) { + free(rendfile); + } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } return PMIX_ERR_UNREACH; } free(suri); @@ -297,6 +445,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, p = strchr(suri, ';'); if (NULL == p) { free(suri); + if (NULL != rendfile) { + free(rendfile); + } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } return PMIX_ERR_BAD_PARAM; } *p = '\0'; @@ -308,6 +462,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, if (NULL == p) { free(suri2); free(suri); + if (NULL != rendfile) { + free(rendfile); + } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } return PMIX_ERR_BAD_PARAM; } *p = '\0'; @@ -321,18 +481,112 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:tool attempt connect using given URI %s", suri); /* go ahead and try to connect */ - if (PMIX_SUCCESS != (rc = try_connect(suri, &sd))) { + if (PMIX_SUCCESS != (rc = try_connect(suri, &sd, iptr, niptr))) { if (NULL != nspace) { free(nspace); } free(suri); + if (NULL != rendfile) { + free(rendfile); + } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } return rc; } + /* cleanup */ free(suri); suri = NULL; + if (NULL != rendfile) { + free(rendfile); + } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } goto complete; } + /* if they gave us a rendezvous file, use it */ + if (NULL != rendfile) { + /* try to read the file */ + rc = parse_uri_file(rendfile, &suri, &nspace, &rank); + free(rendfile); + rendfile = NULL; + if (PMIX_SUCCESS == rc) { + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "ptl:tcp:tool attempt connect to system server at %s", suri); + /* go ahead and try to connect */ + if (PMIX_SUCCESS == try_connect(suri, &sd, iptr, niptr)) { + /* don't free nspace - we will use it below */ + if (NULL != rendfile) { + free(rendfile); + } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } + goto complete; + } + } + /* cleanup */ + if (NULL != nspace) { + free(nspace); + } + if (NULL != suri) { + free(suri); + } + free(rendfile); + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } + /* since they gave us a specific rendfile and we couldn't + * connect to it, return an error */ + return PMIX_ERR_UNREACH; + } + + /* if they asked for system-level first or only, we start there */ + if (system_level || system_level_only) { + if (0 > asprintf(&filename, "%s/pmix.sys.%s", mca_ptl_tcp_component.system_tmpdir, myhost)) { + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } + return PMIX_ERR_NOMEM; + } + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "ptl:tcp:tool looking for system server at %s", + filename); + /* try to read the file */ + rc = parse_uri_file(filename, &suri, &nspace, &rank); + free(filename); + if (PMIX_SUCCESS == rc) { + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "ptl:tcp:tool attempt connect to system server at %s", suri); + /* go ahead and try to connect */ + if (PMIX_SUCCESS == try_connect(suri, &sd, iptr, niptr)) { + /* don't free nspace - we will use it below */ + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } + goto complete; + } + free(nspace); + } + } + + /* we get here if they either didn't ask for a system-level connection, + * or they asked for it and it didn't succeed. If they _only_ wanted + * a system-level connection, then we are done */ + if (system_level_only) { + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "ptl:tcp: connecting to system failed"); + if (NULL != suri) { + free(suri); + } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } + return PMIX_ERR_UNREACH; + } + /* if they gave us a pid, then look for it */ if (0 != pid) { if (NULL != server_nspace) { @@ -340,6 +594,9 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, server_nspace = NULL; } if (0 > asprintf(&filename, "pmix.%s.tool.%d", myhost, pid)) { + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } return PMIX_ERR_NOMEM; } pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, @@ -347,14 +604,20 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, filename); nspace = NULL; rc = df_search(mca_ptl_tcp_component.system_tmpdir, - filename, &sd, &nspace, &rank); + filename, iptr, niptr, &sd, &nspace, &rank, &suri); free(filename); if (PMIX_SUCCESS == rc) { goto complete; } + if (NULL != suri) { + free(suri); + } if (NULL != nspace) { free(nspace); } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } /* since they gave us a specific pid and we couldn't * connect to it, return an error */ return PMIX_ERR_UNREACH; @@ -364,6 +627,9 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, if (NULL != server_nspace) { if (0 > asprintf(&filename, "pmix.%s.tool.%s", myhost, server_nspace)) { free(server_nspace); + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } return PMIX_ERR_NOMEM; } free(server_nspace); @@ -373,54 +639,25 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, filename); nspace = NULL; rc = df_search(mca_ptl_tcp_component.system_tmpdir, - filename, &sd, &nspace, &rank); + filename, iptr, niptr, &sd, &nspace, &rank, &suri); free(filename); if (PMIX_SUCCESS == rc) { goto complete; } + if (NULL != suri) { + free(suri); + } if (NULL != nspace) { free(nspace); } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } /* since they gave us a specific nspace and we couldn't * connect to it, return an error */ return PMIX_ERR_UNREACH; } - /* if they asked for system-level, we start there */ - if (system_level || system_level_only) { - if (0 > asprintf(&filename, "%s/pmix.sys.%s", mca_ptl_tcp_component.system_tmpdir, myhost)) { - return PMIX_ERR_NOMEM; - } - pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "ptl:tcp:tool looking for system server at %s", - filename); - /* try to read the file */ - rc = parse_uri_file(filename, &suri, &nspace, &rank); - free(filename); - if (PMIX_SUCCESS == rc) { - pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "ptl:tcp:tool attempt connect to system server at %s", suri); - /* go ahead and try to connect */ - if (PMIX_SUCCESS == try_connect(suri, &sd)) { - /* don't free nspace - we will use it below */ - goto complete; - } - free(nspace); - } - } - - /* we get here if they either didn't ask for a system-level connection, - * or they asked for it and it didn't succeed. If they _only_ wanted - * a system-level connection, then we are done */ - if (system_level_only) { - pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "ptl:tcp: connecting to system failed"); - if (NULL != suri) { - free(suri); - } - return PMIX_ERR_UNREACH; - } - /* they didn't give us a pid, so we will search to see what session-level * tools are available to this user. We will take the first connection * that succeeds - this is based on the likelihood that there is only @@ -430,6 +667,9 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, if (NULL != suri) { free(suri); } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } return PMIX_ERR_NOMEM; } pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, @@ -437,7 +677,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, filename); nspace = NULL; rc = df_search(mca_ptl_tcp_component.system_tmpdir, - filename, &sd, &nspace, &rank); + filename, iptr, niptr, &sd, &nspace, &rank, &suri); free(filename); if (PMIX_SUCCESS != rc) { if (NULL != nspace){ @@ -446,12 +686,18 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, if (NULL != suri) { free(suri); } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } return PMIX_ERR_UNREACH; } + if (NULL != iptr) { + PMIX_INFO_FREE(iptr, niptr); + } complete: pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "sock_peer_try_connect: Connection across to server succeeded"); + "tcp_peer_try_connect: Connection across to server succeeded"); /* do a final bozo check */ if (NULL == nspace || PMIX_RANK_WILDCARD == rank) { @@ -476,7 +722,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); } if (NULL == pmix_client_globals.myserver->nptr) { - pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t); } if (NULL != pmix_client_globals.myserver->nptr->nspace) { free(pmix_client_globals.myserver->nptr->nspace); @@ -489,6 +735,16 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_client_globals.myserver->info->pname.nspace = strdup(pmix_client_globals.myserver->nptr->nspace); pmix_client_globals.myserver->info->pname.rank = rank; } + /* store the URI for subsequent lookups */ + urikv = PMIX_NEW(pmix_kval_t); + urikv->key = strdup(PMIX_SERVER_URI); + PMIX_VALUE_CREATE(urikv->value, 1); + urikv->value->type = PMIX_STRING; + asprintf(&urikv->value->data.string, "%s.%u;%s", nspace, rank, suri); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, PMIX_INTERNAL, + urikv); + PMIX_RELEASE(urikv); // maintain accounting pmix_ptl_base_set_nonblocking(sd); @@ -680,14 +936,15 @@ static pmix_status_t parse_uri_file(char *filename, return PMIX_SUCCESS; } -static pmix_status_t try_connect(char *uri, int *sd) +static pmix_status_t try_connect(char *uri, int *sd, pmix_info_t iptr[], size_t niptr) { char *p, *p2, *host; struct sockaddr_in *in; struct sockaddr_in6 *in6; size_t len; pmix_status_t rc; - bool retried = false; + int retries = 0; + uint8_t myflag; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix:tcp try connect to %s", uri); @@ -771,29 +1028,28 @@ static pmix_status_t try_connect(char *uri, int *sd) } /* send our identity and any authentication credentials to the server */ - if (PMIX_SUCCESS != (rc = send_connect_ack(*sd))) { + if (PMIX_SUCCESS != (rc = send_connect_ack(*sd, &myflag, iptr, niptr))) { PMIX_ERROR_LOG(rc); CLOSE_THE_SOCKET(*sd); return rc; } /* do whatever handshake is required */ - if (PMIX_SUCCESS != (rc = recv_connect_ack(*sd))) { + if (PMIX_SUCCESS != (rc = recv_connect_ack(*sd, myflag))) { CLOSE_THE_SOCKET(*sd); if (PMIX_ERR_TEMP_UNAVAILABLE == rc) { - /* give it two tries */ - if (!retried) { - retried = true; + ++retries; + if( retries < mca_ptl_tcp_component.handshake_max_retries ) { goto retry; } } - PMIX_ERROR_LOG(rc); return rc; } return PMIX_SUCCESS; } -static pmix_status_t send_connect_ack(int sd) +static pmix_status_t send_connect_ack(int sd, uint8_t *myflag, + pmix_info_t iptr[], size_t niptr) { char *msg; pmix_ptl_hdr_t hdr; @@ -806,7 +1062,7 @@ static pmix_status_t send_connect_ack(int sd) uid_t euid; gid_t egid; uint32_t u32; - bool self_defined = false; + pmix_buffer_t buf; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix:tcp SEND CONNECT ACK"); @@ -814,6 +1070,7 @@ static pmix_status_t send_connect_ack(int sd) /* if we are a server, then we shouldn't be here */ if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && !PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; } @@ -837,35 +1094,68 @@ static pmix_status_t send_connect_ack(int sd) /* allow space for a marker indicating client vs tool */ sdsize = 1; - if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { + /* Defined marker values: + * + * 0 => simple client process + * 1 => legacy tool - may or may not have an identifier + * 2 => legacy launcher - may or may not have an identifier + * ------------------------------------------ + * 3 => self-started tool process that needs an identifier + * 4 => self-started tool process that was given an identifier by caller + * 5 => tool that was started by a PMIx server - identifier specified by server + * 6 => self-started launcher that needs an identifier + * 7 => self-started launcher that was given an identifier by caller + * 8 => launcher that was started by a PMIx server - identifier specified by server + */ + if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { + if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { + /* if we are both launcher and client, then we need + * to tell the server we are both */ + flag = 8; + /* add space for our uid/gid for ACL purposes */ + sdsize += 2*sizeof(uint32_t); + /* add space for our identifier */ + sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t); + } else { + /* add space for our uid/gid for ACL purposes */ + sdsize += 2*sizeof(uint32_t); + /* if they gave us an identifier, we need to pass it */ + if (0 < strlen(pmix_globals.myid.nspace) && + PMIX_RANK_INVALID != pmix_globals.myid.rank) { + flag = 7; + sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t); + } else { + flag = 6; + } + } + + } else if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer) && + !PMIX_PROC_IS_TOOL(pmix_globals.mypeer)) { + /* we are a simple client */ flag = 0; /* reserve space for our nspace and rank info */ sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t); - } else if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { - flag = 2; - /* add space for our uid/gid for ACL purposes */ - sdsize += 2*sizeof(uint32_t); - /* if we already have an identifier, we need to pass it */ - if (0 < strlen(pmix_globals.myid.nspace) && - PMIX_RANK_INVALID != pmix_globals.myid.rank) { - sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t) + 1; - self_defined = true; - } else { - ++sdsize; // need space for the flag indicating if have id - } - } else { // must be a simple tool - flag = 1; + + } else { // must be a tool of some sort /* add space for our uid/gid for ACL purposes */ sdsize += 2*sizeof(uint32_t); - /* if we self-defined an identifier, we need to pass it */ - if (0 < strlen(pmix_globals.myid.nspace) && + if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { + /* if we are both tool and client, then we need + * to tell the server we are both */ + flag = 5; + /* add space for our identifier */ + sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t); + } else if (0 < strlen(pmix_globals.myid.nspace) && PMIX_RANK_INVALID != pmix_globals.myid.rank) { - sdsize += 1 + strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t); - self_defined = true; + /* we were given an identifier by the caller, pass it */ + sdsize += strlen(pmix_globals.myid.nspace) + 1 + sizeof(uint32_t); + flag = 4; } else { - ++sdsize; // need space for the flag indicating if have id + /* we are a self-started tool that needs an identifier */ + flag = 3; } } + *myflag = flag; /* add the name of our active sec module - we selected it * in pmix_client.c prior to entering here */ @@ -879,16 +1169,26 @@ static pmix_status_t send_connect_ack(int sd) /* add our active gds module for working with the server */ gds = (char*)pmix_client_globals.myserver->nptr->compat.gds->name; - /* set the number of bytes to be read beyond the header */ + /* if we were given info structs to pass to the server, pack them */ + PMIX_CONSTRUCT(&buf, pmix_buffer_t); + if (NULL != iptr) { + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &buf, &niptr, 1, PMIX_SIZE); + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &buf, iptr, niptr, PMIX_INFO); + } + + /* set the number of bytes to be read beyond the header - must + * NULL terminate the strings! */ hdr.nbytes = sdsize + strlen(PMIX_VERSION) + 1 + strlen(sec) + 1 \ + strlen(bfrops) + 1 + sizeof(bftype) \ - + strlen(gds) + 1 + sizeof(uint32_t) + cred.size; // must NULL terminate the strings! + + strlen(gds) + 1 + sizeof(uint32_t) + cred.size \ + + buf.bytes_used; /* create a space for our message */ sdsize = (sizeof(hdr) + hdr.nbytes); if (NULL == (msg = (char*)malloc(sdsize))) { PMIX_BYTE_OBJECT_DESTRUCT(&cred); free(sec); + PMIX_DESTRUCT(&buf); return PMIX_ERR_OUT_OF_RESOURCE; } memset(msg, 0, sdsize); @@ -920,7 +1220,7 @@ static pmix_status_t send_connect_ack(int sd) memcpy(msg+csize, &flag, 1); csize += 1; - if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { + if (0 == flag) { /* if we are a client, provide our nspace/rank */ memcpy(msg+csize, pmix_globals.myid.nspace, strlen(pmix_globals.myid.nspace)); csize += strlen(pmix_globals.myid.nspace)+1; @@ -928,9 +1228,18 @@ static pmix_status_t send_connect_ack(int sd) u32 = htonl((uint32_t)pmix_globals.myid.rank); memcpy(msg+csize, &u32, sizeof(uint32_t)); csize += sizeof(uint32_t); - } else { - /* if we are a tool, provide our uid/gid for ACL support - note - * that we have to convert so we can handle heterogeneity */ + } else if (3 == flag || 6 == flag) { + /* we are a tool or launcher that needs an identifier - add our ACLs */ + euid = geteuid(); + u32 = htonl(euid); + memcpy(msg+csize, &u32, sizeof(uint32_t)); + csize += sizeof(uint32_t); + egid = getegid(); + u32 = htonl(egid); + memcpy(msg+csize, &u32, sizeof(uint32_t)); + csize += sizeof(uint32_t); + } else if (4 == flag || 5 == flag || 7 == flag || 8 == flag) { + /* we are a tool or launcher that has an identifier - start with our ACLs */ euid = geteuid(); u32 = htonl(euid); memcpy(msg+csize, &u32, sizeof(uint32_t)); @@ -939,6 +1248,17 @@ static pmix_status_t send_connect_ack(int sd) u32 = htonl(egid); memcpy(msg+csize, &u32, sizeof(uint32_t)); csize += sizeof(uint32_t); + /* now add our identifier */ + memcpy(msg+csize, pmix_globals.myid.nspace, strlen(pmix_globals.myid.nspace)); + csize += strlen(pmix_globals.myid.nspace)+1; + /* again, need to convert */ + u32 = htonl((uint32_t)pmix_globals.myid.rank); + memcpy(msg+csize, &u32, sizeof(uint32_t)); + csize += sizeof(uint32_t); + } else { + /* not a valid flag */ + PMIX_DESTRUCT(&buf); + return PMIX_ERR_NOT_SUPPORTED; } /* provide our version */ @@ -957,46 +1277,33 @@ static pmix_status_t send_connect_ack(int sd) memcpy(msg+csize, gds, strlen(gds)); csize += strlen(gds)+1; - /* if we are not a client and self-defined an identifier, we need to pass it */ - if (!PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { - if (self_defined) { - flag = 1; - memcpy(msg+csize, &flag, 1); - ++csize; - memcpy(msg+csize, pmix_globals.myid.nspace, strlen(pmix_globals.myid.nspace)); - csize += strlen(pmix_globals.myid.nspace)+1; - /* again, need to convert */ - u32 = htonl((uint32_t)pmix_globals.myid.rank); - memcpy(msg+csize, &u32, sizeof(uint32_t)); - csize += sizeof(uint32_t); - } else { - flag = 0; - memcpy(msg+csize, &flag, 1); - ++csize; - } - } + /* provide the info struct bytes */ + memcpy(msg+csize, buf.base_ptr, buf.bytes_used); + csize += buf.bytes_used; /* send the entire message across */ if (PMIX_SUCCESS != pmix_ptl_base_send_blocking(sd, msg, sdsize)) { free(msg); + PMIX_DESTRUCT(&buf); return PMIX_ERR_UNREACH; } free(msg); + PMIX_DESTRUCT(&buf); return PMIX_SUCCESS; } /* we receive a connection acknowledgement from the server, * consisting of nothing more than a status report. If success, * then we initiate authentication method */ -static pmix_status_t recv_connect_ack(int sd) +static pmix_status_t recv_connect_ack(int sd, uint8_t myflag) { pmix_status_t reply; pmix_status_t rc; struct timeval tv, save; pmix_socklen_t sz; bool sockopt = true; + pmix_nspace_t nspace; uint32_t u32; - char nspace[PMIX_MAX_NSLEN+1]; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix: RECV CONNECT ACK FROM SERVER"); @@ -1004,19 +1311,23 @@ static pmix_status_t recv_connect_ack(int sd) /* get the current timeout value so we can reset to it */ sz = sizeof(save); if (0 != getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (void*)&save, &sz)) { - if (ENOPROTOOPT == errno) { + if (ENOPROTOOPT == errno || EOPNOTSUPP == errno) { sockopt = false; } else { return PMIX_ERR_UNREACH; } } else { /* set a timeout on the blocking recv so we don't hang */ - tv.tv_sec = 2; + tv.tv_sec = mca_ptl_tcp_component.handshake_wait_time; tv.tv_usec = 0; if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) { - pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "pmix: recv_connect_ack could not setsockopt SO_RCVTIMEO"); - return PMIX_ERR_UNREACH; + if (ENOPROTOOPT == errno || EOPNOTSUPP == errno) { + sockopt = false; + } else { + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "pmix: recv_connect_ack could not setsockopt SO_RCVTIMEO"); + return PMIX_ERR_UNREACH; + } } } @@ -1033,7 +1344,7 @@ static pmix_status_t recv_connect_ack(int sd) } reply = ntohl(u32); - if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { + if (0 == myflag) { /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { PMIX_PSEC_CLIENT_HANDSHAKE(rc, pmix_client_globals.myserver, sd); @@ -1055,26 +1366,23 @@ static pmix_status_t recv_connect_ack(int sd) } else { // we are a tool /* if the status indicates an error, then we are done */ if (PMIX_SUCCESS != reply) { - PMIX_ERROR_LOG(reply); return reply; } - /* recv our nspace */ - rc = pmix_ptl_base_recv_blocking(sd, nspace, PMIX_MAX_NSLEN+1); - if (PMIX_SUCCESS != rc) { - return rc; - } - /* if we already have our nspace, then just verify it matches */ - if (0 < strlen(pmix_globals.myid.nspace)) { - if (0 != strncmp(pmix_globals.myid.nspace, nspace, PMIX_MAX_NSLEN)) { - return PMIX_ERR_INIT; + /* if we needed an identifier, recv it */ + if (3 == myflag || 6 == myflag) { + /* first the nspace */ + rc = pmix_ptl_base_recv_blocking(sd, (char*)&nspace, PMIX_MAX_NSLEN+1); + if (PMIX_SUCCESS != rc) { + return rc; } - } else { - (void)strncpy(pmix_globals.myid.nspace, nspace, PMIX_MAX_NSLEN); - } - /* if we already have a rank, then leave it alone */ - if (PMIX_RANK_INVALID == pmix_globals.myid.rank) { - /* our rank is always zero */ - pmix_globals.myid.rank = 0; + PMIX_LOAD_NSPACE(pmix_globals.myid.nspace, nspace); + /* now the rank */ + rc = pmix_ptl_base_recv_blocking(sd, (char*)&u32, sizeof(uint32_t)); + if (PMIX_SUCCESS != rc) { + return rc; + } + /* convert and store */ + pmix_globals.myid.rank = htonl(u32); } /* get the server's nspace and rank so we can send to it */ @@ -1082,7 +1390,7 @@ static pmix_status_t recv_connect_ack(int sd) pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); } if (NULL == pmix_client_globals.myserver->nptr) { - pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t); } pmix_ptl_base_recv_blocking(sd, (char*)nspace, PMIX_MAX_NSLEN+1); if (NULL != pmix_client_globals.myserver->nptr->nspace) { @@ -1093,7 +1401,8 @@ static pmix_status_t recv_connect_ack(int sd) free(pmix_client_globals.myserver->info->pname.nspace); } pmix_client_globals.myserver->info->pname.nspace = strdup(nspace); - pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver->info->pname.rank), sizeof(int)); + pmix_ptl_base_recv_blocking(sd, (char*)&u32, sizeof(uint32_t)); + pmix_client_globals.myserver->info->pname.rank = htonl(u32); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix: RECV CONNECT CONFIRMATION FOR TOOL %s:%d FROM SERVER %s:%d", @@ -1102,7 +1411,18 @@ static pmix_status_t recv_connect_ack(int sd) pmix_client_globals.myserver->info->pname.rank); /* get the returned status from the security handshake */ - pmix_ptl_base_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t)); + rc = pmix_ptl_base_recv_blocking(sd, (char*)&u32, sizeof(pmix_status_t)); + if (PMIX_SUCCESS != rc) { + if (sockopt) { + /* return the socket to normal */ + if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) { + return PMIX_ERR_UNREACH; + } + } + return rc; + } + + reply = ntohl(u32); if (PMIX_SUCCESS != reply) { /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { @@ -1127,8 +1447,9 @@ static pmix_status_t recv_connect_ack(int sd) } static pmix_status_t df_search(char *dirname, char *prefix, + pmix_info_t info[], size_t ninfo, int *sd, char **nspace, - pmix_rank_t *rank) + pmix_rank_t *rank, char **uri) { char *suri, *nsp, *newdir; pmix_rank_t rk; @@ -1158,7 +1479,7 @@ static pmix_status_t df_search(char *dirname, char *prefix, } /* if it is a directory, down search */ if (S_ISDIR(buf.st_mode)) { - rc = df_search(newdir, prefix, sd, nspace, rank); + rc = df_search(newdir, prefix, info, ninfo, sd, nspace, rank, uri); free(newdir); if (PMIX_SUCCESS == rc) { closedir(cur_dirp); @@ -1178,11 +1499,11 @@ static pmix_status_t df_search(char *dirname, char *prefix, /* go ahead and try to connect */ pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix:tcp: attempting to connect to %s", suri); - if (PMIX_SUCCESS == try_connect(suri, sd)) { + if (PMIX_SUCCESS == try_connect(suri, sd, info, ninfo)) { (*nspace) = nsp; *rank = rk; closedir(cur_dirp); - free(suri); + *uri = suri; free(newdir); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.h b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.h index dd92a893818..5813bc7085c 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.h @@ -9,7 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,11 +48,15 @@ typedef struct { struct sockaddr_storage connection; char *session_filename; char *nspace_filename; + char *pid_filename; char *system_filename; + char *rendezvous_filename; int wait_to_connect; int max_retries; char *report_uri; bool remote_connections; + int handshake_wait_time; + int handshake_max_retries; } pmix_ptl_tcp_component_t; extern pmix_ptl_tcp_component_t mca_ptl_tcp_component; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index a880faa9c8d..cb800a6fdf9 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -12,9 +12,11 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2019 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,6 +62,7 @@ #include "src/util/os_path.h" #include "src/util/parse_options.h" #include "src/util/pif.h" +#include "src/util/pmix_environ.h" #include "src/util/show_help.h" #include "src/util/strnlen.h" #include "src/common/pmix_iof.h" @@ -115,11 +118,15 @@ static pmix_status_t setup_fork(const pmix_proc_t *proc, char ***env); .disable_ipv6_family = true, .session_filename = NULL, .nspace_filename = NULL, + .pid_filename = NULL, .system_filename = NULL, + .rendezvous_filename = NULL, .wait_to_connect = 4, .max_retries = 2, .report_uri = NULL, - .remote_connections = false + .remote_connections = false, + .handshake_wait_time = 4, + .handshake_max_retries = 2 }; static char **split_and_resolve(char **orig_str, char *name); @@ -148,7 +155,7 @@ static int component_register(void) (void)pmix_mca_base_component_var_register(component, "remote_connections", "Enable connections from remote tools", - PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + PMIX_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, PMIX_INFO_LVL_2, PMIX_MCA_BASE_VAR_SCOPE_LOCAL, &mca_ptl_tcp_component.remote_connections); @@ -220,6 +227,20 @@ static int component_register(void) PMIX_MCA_BASE_VAR_SCOPE_READONLY, &mca_ptl_tcp_component.max_retries); + (void)pmix_mca_base_component_var_register(component, "handshake_wait_time", + "Number of seconds to wait for the server reply to the handshake request", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_4, + PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &mca_ptl_tcp_component.handshake_wait_time); + + (void)pmix_mca_base_component_var_register(component, "handshake_max_retries", + "Number of times to retry the handshake request before giving up", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_4, + PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &mca_ptl_tcp_component.handshake_max_retries); + return PMIX_SUCCESS; } @@ -233,31 +254,28 @@ static pmix_status_t component_open(void) /* check for environ-based directives * on system tmpdir to use */ - if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer) || + PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { mca_ptl_tcp_component.session_tmpdir = strdup(pmix_server_globals.tmpdir); } else { if (NULL != (tdir = getenv("PMIX_SERVER_TMPDIR"))) { mca_ptl_tcp_component.session_tmpdir = strdup(tdir); + } else { + mca_ptl_tcp_component.session_tmpdir = strdup(pmix_tmp_directory()); } } - if (NULL != (tdir = getenv("PMIX_SYSTEM_TMPDIR"))) { - mca_ptl_tcp_component.system_tmpdir = strdup(tdir); - } - - if (NULL == (tdir = getenv("TMPDIR"))) { - if (NULL == (tdir = getenv("TEMP"))) { - if (NULL == (tdir = getenv("TMP"))) { - tdir = "/tmp"; - } + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer) || + PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { + mca_ptl_tcp_component.system_tmpdir = strdup(pmix_server_globals.system_tmpdir); + } else { + if (NULL != (tdir = getenv("PMIX_SYSTEM_TMPDIR"))) { + mca_ptl_tcp_component.system_tmpdir = strdup(tdir); + } else { + mca_ptl_tcp_component.system_tmpdir = strdup(pmix_tmp_directory()); } } - if (NULL == mca_ptl_tcp_component.session_tmpdir) { - mca_ptl_tcp_component.session_tmpdir = strdup(tdir); - } - if (NULL == mca_ptl_tcp_component.system_tmpdir) { - mca_ptl_tcp_component.system_tmpdir = strdup(tdir); - } + if (NULL != mca_ptl_tcp_component.report_uri && 0 != strcmp(mca_ptl_tcp_component.report_uri, "-") && 0 != strcmp(mca_ptl_tcp_component.report_uri, "+")) { @@ -271,12 +289,23 @@ pmix_status_t component_close(void) { if (NULL != mca_ptl_tcp_component.system_filename) { unlink(mca_ptl_tcp_component.system_filename); + free(mca_ptl_tcp_component.system_filename); } if (NULL != mca_ptl_tcp_component.session_filename) { unlink(mca_ptl_tcp_component.session_filename); + free(mca_ptl_tcp_component.session_filename); } if (NULL != mca_ptl_tcp_component.nspace_filename) { unlink(mca_ptl_tcp_component.nspace_filename); + free(mca_ptl_tcp_component.nspace_filename); + } + if (NULL != mca_ptl_tcp_component.pid_filename) { + unlink(mca_ptl_tcp_component.pid_filename); + free(mca_ptl_tcp_component.pid_filename); + } + if (NULL != mca_ptl_tcp_component.rendezvous_filename) { + unlink(mca_ptl_tcp_component.rendezvous_filename); + free(mca_ptl_tcp_component.rendezvous_filename); } if (NULL != urifile) { /* remove the file */ @@ -301,19 +330,8 @@ static int component_query(pmix_mca_base_module_t **module, int *priority) static pmix_status_t setup_fork(const pmix_proc_t *proc, char ***env) { - char *evar; - - if (0 > asprintf(&evar, "PMIX_SERVER_TMPDIR=%s", mca_ptl_tcp_component.session_tmpdir)) { - return PMIX_ERR_NOMEM; - } - pmix_argv_append_nosize(env, evar); - free(evar); - - if (0 > asprintf(&evar, "PMIX_SYSTEM_TMPDIR=%s", mca_ptl_tcp_component.system_tmpdir)) { - return PMIX_ERR_NOMEM; - } - pmix_argv_append_nosize(env, evar); - free(evar); + pmix_setenv("PMIX_SERVER_TMPDIR", mca_ptl_tcp_component.session_tmpdir, true, env); + pmix_setenv("PMIX_SYSTEM_TMPDIR", mca_ptl_tcp_component.system_tmpdir, true, env); return PMIX_SUCCESS; } @@ -342,9 +360,10 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, bool session_tool = false; bool system_tool = false; pmix_socklen_t addrlen; - char *prefix, myhost[PMIX_MAXHOSTNAMELEN]; - char myconnhost[PMIX_MAXHOSTNAMELEN]; + char *prefix, myhost[PMIX_MAXHOSTNAMELEN] = {0}; + char myconnhost[PMIX_MAXHOSTNAMELEN] = {0}; int myport; + pmix_kval_t *urikv; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp setup_listener"); @@ -357,51 +376,54 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, /* scan the info keys and process any override instructions */ if (NULL != info) { for (n=0; n < ninfo; n++) { - if (0 == strcmp(info[n].key, PMIX_TCP_IF_INCLUDE)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_TCP_IF_INCLUDE)) { if (NULL != mca_ptl_tcp_component.if_include) { free(mca_ptl_tcp_component.if_include); } mca_ptl_tcp_component.if_include = strdup(info[n].value.data.string); - } else if (0 == strcmp(info[n].key, PMIX_TCP_IF_EXCLUDE)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_TCP_IF_EXCLUDE)) { if (NULL != mca_ptl_tcp_component.if_exclude) { free(mca_ptl_tcp_component.if_exclude); } mca_ptl_tcp_component.if_exclude = strdup(info[n].value.data.string); - } else if (0 == strcmp(info[n].key, PMIX_TCP_IPV4_PORT)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_TCP_IPV4_PORT)) { mca_ptl_tcp_component.ipv4_port = info[n].value.data.integer; - } else if (0 == strcmp(info[n].key, PMIX_TCP_IPV6_PORT)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_TCP_IPV6_PORT)) { mca_ptl_tcp_component.ipv6_port = info[n].value.data.integer; - } else if (0 == strcmp(info[n].key, PMIX_TCP_DISABLE_IPV4)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_TCP_DISABLE_IPV4)) { mca_ptl_tcp_component.disable_ipv4_family = PMIX_INFO_TRUE(&info[n]); - } else if (0 == strcmp(info[n].key, PMIX_TCP_DISABLE_IPV6)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_TCP_DISABLE_IPV6)) { mca_ptl_tcp_component.disable_ipv6_family = PMIX_INFO_TRUE(&info[n]); - } else if (0 == strcmp(info[n].key, PMIX_SERVER_REMOTE_CONNECTIONS)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SERVER_REMOTE_CONNECTIONS)) { mca_ptl_tcp_component.remote_connections = PMIX_INFO_TRUE(&info[n]); - } else if (0 == strcmp(info[n].key, PMIX_TCP_URI)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_TCP_URI)) { if (NULL != mca_ptl_tcp_component.super.uri) { free(mca_ptl_tcp_component.super.uri); } mca_ptl_tcp_component.super.uri = strdup(info[n].value.data.string); - } else if (0 == strcmp(info[n].key, PMIX_TCP_REPORT_URI)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_TCP_REPORT_URI)) { if (NULL != mca_ptl_tcp_component.report_uri) { free(mca_ptl_tcp_component.report_uri); } mca_ptl_tcp_component.report_uri = strdup(info[n].value.data.string); - } else if (0 == strcmp(info[n].key, PMIX_SERVER_TMPDIR)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SERVER_TMPDIR)) { if (NULL != mca_ptl_tcp_component.session_tmpdir) { free(mca_ptl_tcp_component.session_tmpdir); } mca_ptl_tcp_component.session_tmpdir = strdup(info[n].value.data.string); - } else if (0 == strcmp(info[n].key, PMIX_SYSTEM_TMPDIR)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SYSTEM_TMPDIR)) { if (NULL != mca_ptl_tcp_component.system_tmpdir) { free(mca_ptl_tcp_component.system_tmpdir); } mca_ptl_tcp_component.system_tmpdir = strdup(info[n].value.data.string); } else if (0 == strcmp(info[n].key, PMIX_SERVER_TOOL_SUPPORT)) { session_tool = PMIX_INFO_TRUE(&info[n]); - } else if (0 == strcmp(info[n].key, PMIX_SERVER_SYSTEM_SUPPORT)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SERVER_SYSTEM_SUPPORT)) { system_tool = PMIX_INFO_TRUE(&info[n]); - } + } else if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer) && + PMIX_CHECK_KEY(&info[n], PMIX_LAUNCHER_RENDEZVOUS_FILE)) { + mca_ptl_tcp_component.rendezvous_filename = strdup(info[n].value.data.string); + } } } @@ -602,17 +624,17 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, goto sockerror; } - gethostname(myhost, sizeof(myhost)); + gethostname(myhost, sizeof(myhost)-1); if (AF_INET == mca_ptl_tcp_component.connection.ss_family) { prefix = "tcp4://"; myport = ntohs(((struct sockaddr_in*) &mca_ptl_tcp_component.connection)->sin_port); inet_ntop(AF_INET, &((struct sockaddr_in*) &mca_ptl_tcp_component.connection)->sin_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else if (AF_INET6 == mca_ptl_tcp_component.connection.ss_family) { prefix = "tcp6://"; myport = ntohs(((struct sockaddr_in6*) &mca_ptl_tcp_component.connection)->sin6_port); inet_ntop(AF_INET6, &((struct sockaddr_in6*) &mca_ptl_tcp_component.connection)->sin6_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else { goto sockerror; } @@ -625,6 +647,16 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp URI %s", lt->uri); + /* save the URI internally so we can report it */ + urikv = PMIX_NEW(pmix_kval_t); + urikv->key = strdup(PMIX_SERVER_URI); + PMIX_VALUE_CREATE(urikv->value, 1); + PMIX_VALUE_LOAD(urikv->value, lt->uri, PMIX_STRING); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, PMIX_INTERNAL, + urikv); + PMIX_RELEASE(urikv); // maintain accounting + if (NULL != mca_ptl_tcp_component.report_uri) { /* if the string is a "-", then output to stdout */ if (0 == strcmp(mca_ptl_tcp_component.report_uri, "-")) { @@ -652,6 +684,38 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, } } + /* if we were given a rendezvous file, then drop it */ + if (NULL != mca_ptl_tcp_component.rendezvous_filename) { + FILE *fp; + + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "WRITING RENDEZVOUS FILE %s", + mca_ptl_tcp_component.rendezvous_filename); + fp = fopen(mca_ptl_tcp_component.rendezvous_filename, "w"); + if (NULL == fp) { + pmix_output(0, "Impossible to open the file %s in write mode\n", mca_ptl_tcp_component.rendezvous_filename); + PMIX_ERROR_LOG(PMIX_ERR_FILE_OPEN_FAILURE); + CLOSE_THE_SOCKET(lt->socket); + free(mca_ptl_tcp_component.rendezvous_filename); + mca_ptl_tcp_component.rendezvous_filename = NULL; + goto sockerror; + } + + /* output my nspace and rank plus the URI */ + fprintf(fp, "%s\n", lt->uri); + /* add a flag that indicates we accept v3.0 protocols */ + fprintf(fp, "v%s\n", PMIX_VERSION); + fclose(fp); + /* set the file mode */ + if (0 != chmod(mca_ptl_tcp_component.rendezvous_filename, S_IRUSR | S_IWUSR | S_IRGRP)) { + PMIX_ERROR_LOG(PMIX_ERR_FILE_OPEN_FAILURE); + CLOSE_THE_SOCKET(lt->socket); + free(mca_ptl_tcp_component.rendezvous_filename); + mca_ptl_tcp_component.rendezvous_filename = NULL; + goto sockerror; + } + } + /* if we are going to support tools, then drop contact file(s) */ if (system_tool) { FILE *fp; @@ -692,10 +756,10 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, FILE *fp; pid_t mypid; - /* first output to a file based on pid */ + /* first output to a std file */ mypid = getpid(); - if (0 > asprintf(&mca_ptl_tcp_component.session_filename, "%s/pmix.%s.tool.%d", - mca_ptl_tcp_component.session_tmpdir, myhost, mypid)) { + if (0 > asprintf(&mca_ptl_tcp_component.session_filename, "%s/pmix.%s.tool", + mca_ptl_tcp_component.session_tmpdir, myhost)) { CLOSE_THE_SOCKET(lt->socket); goto sockerror; } @@ -726,6 +790,40 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, goto sockerror; } + /* now output to a file based on pid */ + mypid = getpid(); + if (0 > asprintf(&mca_ptl_tcp_component.pid_filename, "%s/pmix.%s.tool.%d", + mca_ptl_tcp_component.session_tmpdir, myhost, mypid)) { + CLOSE_THE_SOCKET(lt->socket); + goto sockerror; + } + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "WRITING TOOL FILE %s", + mca_ptl_tcp_component.pid_filename); + fp = fopen(mca_ptl_tcp_component.pid_filename, "w"); + if (NULL == fp) { + pmix_output(0, "Impossible to open the file %s in write mode\n", mca_ptl_tcp_component.pid_filename); + PMIX_ERROR_LOG(PMIX_ERR_FILE_OPEN_FAILURE); + CLOSE_THE_SOCKET(lt->socket); + free(mca_ptl_tcp_component.pid_filename); + mca_ptl_tcp_component.pid_filename = NULL; + goto sockerror; + } + + /* output my URI */ + fprintf(fp, "%s\n", lt->uri); + /* add a flag that indicates we accept v2.1 protocols */ + fprintf(fp, "%s\n", PMIX_VERSION); + fclose(fp); + /* set the file mode */ + if (0 != chmod(mca_ptl_tcp_component.pid_filename, S_IRUSR | S_IWUSR | S_IRGRP)) { + PMIX_ERROR_LOG(PMIX_ERR_FILE_OPEN_FAILURE); + CLOSE_THE_SOCKET(lt->socket); + free(mca_ptl_tcp_component.pid_filename); + mca_ptl_tcp_component.pid_filename = NULL; + goto sockerror; + } + /* now output it into a file based on my nspace */ if (0 > asprintf(&mca_ptl_tcp_component.nspace_filename, "%s/pmix.%s.tool.%s", @@ -800,7 +898,7 @@ static char **split_and_resolve(char **orig_str, char *name) { int i, ret, save, if_index; char **argv, *str, *tmp; - char if_name[IF_NAMESIZE]; + char if_name[PMIX_IF_NAMESIZE]; struct sockaddr_storage argv_inaddr, if_inaddr; uint32_t argv_prefix; @@ -899,21 +997,21 @@ static void connection_handler(int sd, short args, void *cbdata) pmix_ptl_hdr_t hdr; pmix_peer_t *peer; pmix_rank_t rank=0; - pmix_status_t rc; + pmix_status_t rc, reply; char *msg, *mg, *version; char *sec, *bfrops, *gds; pmix_bfrop_buffer_type_t bftype; char *nspace; uint32_t len, u32; size_t cnt, msglen, n; - uint8_t flag; - pmix_nspace_t *nptr, *tmp; + pmix_namespace_t *nptr, *tmp; bool found; pmix_rank_info_t *info; pmix_proc_t proc; pmix_info_t ginfo; pmix_proc_type_t proc_type; pmix_byte_object_t cred; + pmix_buffer_t buf; /* acquire the object */ PMIX_ACQUIRE_OBJECT(pnd); @@ -1008,7 +1106,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* get the process type of the connecting peer */ if (1 <= cnt) { - memcpy(&flag, mg, 1); + memcpy(&pnd->flag, mg, 1); ++mg; --cnt; } else { @@ -1018,7 +1116,7 @@ static void connection_handler(int sd, short args, void *cbdata) goto error; } - if (0 == flag) { + if (0 == pnd->flag) { /* they must be a client, so get their nspace/rank */ proc_type = PMIX_PROC_CLIENT; PMIX_STRNLEN(msglen, mg, cnt); @@ -1045,7 +1143,7 @@ static void connection_handler(int sd, short args, void *cbdata) rc = PMIX_ERR_BAD_PARAM; goto error; } - } else if (1 == flag) { + } else if (1 == pnd->flag) { /* they are a tool */ proc_type = PMIX_PROC_TOOL; /* extract the uid/gid */ @@ -1071,7 +1169,7 @@ static void connection_handler(int sd, short args, void *cbdata) rc = PMIX_ERR_BAD_PARAM; goto error; } - } else if (2 == flag) { + } else if (2 == pnd->flag) { /* they are a launcher */ proc_type = PMIX_PROC_LAUNCHER; /* extract the uid/gid */ @@ -1097,8 +1195,95 @@ static void connection_handler(int sd, short args, void *cbdata) rc = PMIX_ERR_BAD_PARAM; goto error; } + } else if (3 == pnd->flag || 6 == pnd->flag) { + /* they are a tool or launcher that needs an identifier */ + if (3 == pnd->flag) { + proc_type = PMIX_PROC_TOOL; + } else { + proc_type = PMIX_PROC_LAUNCHER; + } + /* extract the uid/gid */ + if (sizeof(uint32_t) <= cnt) { + memcpy(&u32, mg, sizeof(uint32_t)); + mg += sizeof(uint32_t); + cnt -= sizeof(uint32_t); + pnd->uid = ntohl(u32); + } else { + free(msg); + /* send an error reply to the client */ + rc = PMIX_ERR_BAD_PARAM; + goto error; + } + if (sizeof(uint32_t) <= cnt) { + memcpy(&u32, mg, sizeof(uint32_t)); + mg += sizeof(uint32_t); + cnt -= sizeof(uint32_t); + pnd->gid = ntohl(u32); + } else { + free(msg); + /* send an error reply to the client */ + rc = PMIX_ERR_BAD_PARAM; + goto error; + } + /* they need an id */ + pnd->need_id = true; + } else if (4 == pnd->flag || 5 == pnd->flag || 7 == pnd->flag || 8 == pnd->flag) { + /* they are a tool or launcher that has an identifier - start with our ACLs */ + if (4 == pnd->flag || 5 == pnd->flag) { + proc_type = PMIX_PROC_TOOL; + } else { + proc_type = PMIX_PROC_LAUNCHER; + } + /* extract the uid/gid */ + if (sizeof(uint32_t) <= cnt) { + memcpy(&u32, mg, sizeof(uint32_t)); + mg += sizeof(uint32_t); + cnt -= sizeof(uint32_t); + pnd->uid = ntohl(u32); + } else { + free(msg); + /* send an error reply to the client */ + rc = PMIX_ERR_BAD_PARAM; + goto error; + } + if (sizeof(uint32_t) <= cnt) { + memcpy(&u32, mg, sizeof(uint32_t)); + mg += sizeof(uint32_t); + cnt -= sizeof(uint32_t); + pnd->gid = ntohl(u32); + } else { + free(msg); + /* send an error reply to the client */ + rc = PMIX_ERR_BAD_PARAM; + goto error; + } + PMIX_STRNLEN(msglen, mg, cnt); + if (msglen < cnt) { + nspace = mg; + mg += strlen(nspace) + 1; + cnt -= strlen(nspace) + 1; + } else { + free(msg); + /* send an error reply to the client */ + rc = PMIX_ERR_BAD_PARAM; + goto error; + } + + if (sizeof(pmix_rank_t) <= cnt) { + /* have to convert this to host order */ + memcpy(&u32, mg, sizeof(uint32_t)); + rank = ntohl(u32); + mg += sizeof(uint32_t); + cnt -= sizeof(uint32_t); + } else { + free(msg); + /* send an error reply to the client */ + rc = PMIX_ERR_BAD_PARAM; + goto error; + } } else { /* we don't know what they are! */ + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); rc = PMIX_ERR_NOT_SUPPORTED; free(msg); goto error; @@ -1123,7 +1308,7 @@ static void connection_handler(int sd, short args, void *cbdata) proc_type = proc_type | PMIX_PROC_V20; bfrops = "v20"; bftype = pmix_bfrops_globals.default_type; // we can't know any better - gds = NULL; + gds = "ds12,hash"; } else { int major; major = strtoul(version, NULL, 10); @@ -1133,6 +1318,7 @@ static void connection_handler(int sd, short args, void *cbdata) proc_type = proc_type | PMIX_PROC_V3; } else { free(msg); + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); rc = PMIX_ERR_NOT_SUPPORTED; goto error; } @@ -1179,63 +1365,149 @@ static void connection_handler(int sd, short args, void *cbdata) } /* see if this is a tool connection request */ - if (0 != flag) { - /* does the server support tool connections? */ - if (NULL == pmix_host_server.tool_connected) { - /* send an error reply to the client */ - rc = PMIX_ERR_NOT_SUPPORTED; - goto error; + if (0 != pnd->flag) { + peer = PMIX_NEW(pmix_peer_t); + if (NULL == peer) { + /* probably cannot send an error reply if we are out of memory */ + free(msg); + CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd); + return; } - - if (PMIX_PROC_V3 & proc_type) { - /* the caller will have provided a flag indicating - * whether or not they have an assigned nspace/rank */ - if (cnt < 1) { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - free(msg); - /* send an error reply to the client */ - rc = PMIX_ERR_BAD_PARAM; - goto error; + pnd->peer = peer; + /* if this is a tool we launched, then the host may + * have already registered it as a client - so check + * to see if we already have a peer for it */ + if (5 == pnd->flag || 8 == pnd->flag) { + /* registration only adds the nspace and a rank in that + * nspace - it doesn't add the peer object to our array + * of local clients. So let's start by searching for + * the nspace object */ + nptr = NULL; + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(tmp->nspace, nspace)) { + nptr = tmp; + break; + } } - memcpy(&flag, mg, 1); - ++mg; - --cnt; - if (flag) { - PMIX_STRNLEN(msglen, mg, cnt); - if (msglen < cnt) { - nspace = mg; - mg += strlen(nspace) + 1; - cnt -= strlen(nspace) + 1; - } else { - free(msg); - /* send an error reply to the client */ - rc = PMIX_ERR_BAD_PARAM; + if (NULL == nptr) { + /* it is possible that this is a tool inside of + * a job-script as part of a multi-spawn operation. + * Since each tool invocation may have finalized and + * terminated, the tool will appear to "terminate", thus + * causing us to cleanup all references to it, and then + * reappear. So we don't reject this connection request. + * Instead, we create the nspace and rank objects for + * it and let the RM/host decide if this behavior + * is allowed */ + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; goto error; } - if (sizeof(pmix_rank_t) <= cnt) { - /* have to convert this to host order */ - memcpy(&u32, mg, sizeof(uint32_t)); - rank = ntohl(u32); - mg += sizeof(uint32_t); - cnt -= sizeof(uint32_t); - } else { - free(msg); - /* send an error reply to the client */ - rc = PMIX_ERR_BAD_PARAM; - goto error; + nptr->nspace = strdup(nspace); + } + /* now look for the rank */ + info = NULL; + found = false; + PMIX_LIST_FOREACH(info, &nptr->ranks, pmix_rank_info_t) { + if (info->pname.rank == rank) { + found = true; + break; } + } + if (!found) { + /* see above note about not finding nspace */ + info = PMIX_NEW(pmix_rank_info_t); + info->pname.nspace = strdup(nspace); + info->pname.rank = rank; + info->uid = pnd->uid; + info->gid = pnd->gid; + pmix_list_append(&nptr->ranks, &info->super); + } + PMIX_RETAIN(info); + peer->info = info; + PMIX_RETAIN(nptr); + } else { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd); + PMIX_RELEASE(peer); + return; + } + } + peer->nptr = nptr; + /* select their bfrops compat module */ + peer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(bfrops); + if (NULL == peer->nptr->compat.bfrops) { + PMIX_RELEASE(peer); + CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd); + return; + } + /* set the buffer type */ + peer->nptr->compat.type = bftype; + n = 0; + /* if info structs need to be passed along, then unpack them */ + if (0 < cnt) { + int32_t foo; + PMIX_CONSTRUCT(&buf, pmix_buffer_t); + PMIX_LOAD_BUFFER(peer, &buf, mg, cnt); + foo = 1; + PMIX_BFROPS_UNPACK(rc, peer, &buf, &pnd->ninfo, &foo, PMIX_SIZE); + foo = (int32_t)pnd->ninfo; + /* if we have an identifier, then we leave room to pass it */ + if (!pnd->need_id) { + pnd->ninfo += 5; + } else { + pnd->ninfo += 3; + } + PMIX_INFO_CREATE(pnd->info, pnd->ninfo); + PMIX_BFROPS_UNPACK(rc, peer, &buf, pnd->info, &foo, PMIX_INFO); + n = foo; + } else { + if (!pnd->need_id) { pnd->ninfo = 5; } else { pnd->ninfo = 3; } - } else { - pnd->ninfo = 3; + PMIX_INFO_CREATE(pnd->info, pnd->ninfo); + } + + /* pass along the proc_type */ + pnd->proc_type = proc_type; + /* pass along the bfrop, buffer_type, and sec fields so + * we can assign them once we create a peer object */ + pnd->psec = strdup(sec); + if (NULL != gds) { + pnd->gds = strdup(gds); + } + + /* does the server support tool connections? */ + if (NULL == pmix_host_server.tool_connected) { + if (pnd->need_id) { + /* we need someone to provide the tool with an + * identifier and they aren't available */ + /* send an error reply to the client */ + rc = PMIX_ERR_NOT_SUPPORTED; + PMIX_RELEASE(peer); + /* release the msg */ + free(msg); + goto error; + } else { + /* just process it locally */ + PMIX_LOAD_PROCID(&proc, nspace, rank); + cnct_cbfunc(PMIX_SUCCESS, &proc, (void*)pnd); + /* release the msg */ + free(msg); + return; + } } /* setup the info array to pass the relevant info * to the server */ - n = 0; - PMIX_INFO_CREATE(pnd->info, pnd->ninfo); /* provide the version */ PMIX_INFO_LOAD(&pnd->info[n], PMIX_VERSION_INFO, version, PMIX_STRING); ++n; @@ -1245,37 +1517,24 @@ static void connection_handler(int sd, short args, void *cbdata) /* and the group id */ PMIX_INFO_LOAD(&pnd->info[n], PMIX_GRPID, &pnd->gid, PMIX_UINT32); ++n; - /* if we have it, pass along our ID */ - if (flag) { + /* if we have it, pass along their ID */ + if (!pnd->need_id) { PMIX_INFO_LOAD(&pnd->info[n], PMIX_NSPACE, nspace, PMIX_STRING); ++n; PMIX_INFO_LOAD(&pnd->info[n], PMIX_RANK, &rank, PMIX_PROC_RANK); ++n; } - /* pass along the proc_type */ - pnd->proc_type = proc_type; - /* pass along the bfrop, buffer_type, and sec fields so - * we can assign them once we create a peer object */ - pnd->psec = strdup(sec); - if (NULL != bfrops) { - pnd->bfrops = strdup(bfrops); - } - pnd->buffer_type = bftype; - if (NULL != gds) { - pnd->gds = strdup(gds); - } /* release the msg */ free(msg); - /* request an nspace for this requestor - it will - * automatically be assigned rank=0 if the rank - * isn't already known */ + + /* pass it up for processing */ pmix_host_server.tool_connected(pnd->info, pnd->ninfo, cnct_cbfunc, pnd); return; } /* see if we know this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; @@ -1403,22 +1662,13 @@ static void connection_handler(int sd, short args, void *cbdata) /* validate the connection */ cred.bytes = pnd->cred; cred.size = pnd->len; - PMIX_PSEC_VALIDATE_CONNECTION(rc, peer, NULL, 0, NULL, NULL, &cred); - if (PMIX_SUCCESS != rc) { - pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "validation of client connection failed"); - info->proc_cnt--; - pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); - PMIX_RELEASE(peer); - /* send an error reply to the client */ - goto error; - } + PMIX_PSEC_VALIDATE_CONNECTION(reply, peer, NULL, 0, NULL, NULL, &cred); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "client connection validated"); + "client connection validated with status=%d", reply); /* tell the client all is good */ - u32 = htonl(PMIX_SUCCESS); + u32 = htonl(reply); if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { PMIX_ERROR_LOG(rc); info->proc_cnt--; @@ -1428,6 +1678,22 @@ static void connection_handler(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); return; } + /* If needed perform the handshake. The macro will update reply */ + PMIX_PSEC_SERVER_HANDSHAKE_IFNEED(reply, peer, NULL, 0, NULL, NULL, &cred); + + /* It is possible that connection validation failed + * We need to reply to the client first and cleanup after */ + if (PMIX_SUCCESS != reply) { + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "validation of client connection failed"); + info->proc_cnt--; + pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); + PMIX_RELEASE(peer); + /* send an error reply to the client */ + goto error; + } + + /* send the client's array index */ u32 = htonl(peer->index); if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { @@ -1443,12 +1709,16 @@ static void connection_handler(int sd, short args, void *cbdata) /* let the host server know that this client has connected */ if (NULL != pmix_host_server.client_connected) { - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; rc = pmix_host_server.client_connected(&proc, peer->info->server_object, NULL, NULL); - if (PMIX_SUCCESS != rc) { + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { PMIX_ERROR_LOG(rc); + info->proc_cnt--; + pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); + PMIX_RELEASE(peer); + goto error; } } @@ -1483,10 +1753,10 @@ static void process_cbfunc(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cd->cbdata; - pmix_nspace_t *nptr; + pmix_namespace_t *nptr; pmix_rank_info_t *info; pmix_peer_t *peer; - int rc; + pmix_status_t rc, reply; uint32_t u32; pmix_info_t ginfo; pmix_byte_object_t cred; @@ -1500,6 +1770,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { PMIX_ERROR_LOG(rc); CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd->peer); PMIX_RELEASE(pnd); PMIX_RELEASE(cd); return; @@ -1507,24 +1778,41 @@ static void process_cbfunc(int sd, short args, void *cbdata) /* if the request failed, then we are done */ if (PMIX_SUCCESS != cd->status) { + PMIX_RELEASE(pnd->peer); PMIX_RELEASE(pnd); PMIX_RELEASE(cd); return; } - /* send the nspace back to the tool */ - if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, cd->proc.nspace, PMIX_MAX_NSLEN+1))) { - PMIX_ERROR_LOG(rc); - CLOSE_THE_SOCKET(pnd->sd); - PMIX_RELEASE(pnd); - PMIX_RELEASE(cd); - return; + /* if we got an identifier, send it back to the tool */ + if (pnd->need_id) { + /* start with the nspace */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, cd->proc.nspace, PMIX_MAX_NSLEN+1))) { + PMIX_ERROR_LOG(rc); + CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd->peer); + PMIX_RELEASE(pnd); + PMIX_RELEASE(cd); + return; + } + + /* now the rank, suitably converted */ + u32 = ntohl(cd->proc.rank); + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { + PMIX_ERROR_LOG(rc); + CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd->peer); + PMIX_RELEASE(pnd); + PMIX_RELEASE(cd); + return; + } } /* send my nspace back to the tool */ if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, pmix_globals.myid.nspace, PMIX_MAX_NSLEN+1))) { PMIX_ERROR_LOG(rc); CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd->peer); PMIX_RELEASE(pnd); PMIX_RELEASE(cd); return; @@ -1535,61 +1823,41 @@ static void process_cbfunc(int sd, short args, void *cbdata) if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { PMIX_ERROR_LOG(rc); CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd->peer); PMIX_RELEASE(pnd); PMIX_RELEASE(cd); return; } - /* add this nspace to our pool */ - nptr = PMIX_NEW(pmix_nspace_t); - if (NULL == nptr) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - CLOSE_THE_SOCKET(pnd->sd); - PMIX_RELEASE(pnd); - PMIX_RELEASE(cd); - return; - } - nptr->nspace = strdup(cd->proc.nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); - /* add this tool rank to the nspace */ - info = PMIX_NEW(pmix_rank_info_t); - if (NULL == info) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - CLOSE_THE_SOCKET(pnd->sd); - PMIX_RELEASE(pnd); - PMIX_RELEASE(cd); - return; - } - info->pname.nspace = strdup(cd->proc.nspace); - info->pname.rank = 0; - /* need to include the uid/gid for validation */ - info->uid = pnd->uid; - info->gid = pnd->gid; - pmix_list_append(&nptr->ranks, &info->super); + /* shortcuts */ + peer = (pmix_peer_t*)pnd->peer; + nptr = peer->nptr; - /* setup a peer object for this tool */ - peer = PMIX_NEW(pmix_peer_t); - if (NULL == peer) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - CLOSE_THE_SOCKET(pnd->sd); - PMIX_RELEASE(pnd); - PMIX_RELEASE(cd); - return; + /* if this tool wasn't initially registered as a client, + * then add some required structures */ + if (5 != pnd->flag && 8 != pnd->flag) { + PMIX_RETAIN(nptr); + nptr->nspace = strdup(cd->proc.nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + info = PMIX_NEW(pmix_rank_info_t); + info->pname.nspace = strdup(nptr->nspace); + info->pname.rank = cd->proc.rank; + info->uid = pnd->uid; + info->gid = pnd->gid; + pmix_list_append(&nptr->ranks, &info->super); + PMIX_RETAIN(info); + peer->info = info; } + /* mark the peer proc type */ peer->proc_type = pnd->proc_type; /* save the protocol */ peer->protocol = pnd->protocol; - /* add in the nspace pointer */ - PMIX_RETAIN(nptr); - peer->nptr = nptr; - PMIX_RETAIN(info); - peer->info = info; /* save the uid/gid */ - peer->epilog.uid = info->uid; - peer->epilog.gid = info->gid; - nptr->epilog.uid = info->uid; - nptr->epilog.gid = info->gid; + peer->epilog.uid = peer->info->uid; + peer->epilog.gid = peer->info->gid; + nptr->epilog.uid = peer->info->uid; + nptr->epilog.gid = peer->info->gid; peer->proc_cnt = 1; peer->sd = pnd->sd; @@ -1598,7 +1866,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) peer->nptr->compat.psec = pmix_psec_base_assign_module(pnd->psec); if (NULL == peer->nptr->compat.psec) { PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1607,24 +1875,13 @@ static void process_cbfunc(int sd, short args, void *cbdata) * tool as we received this request via that channel, so simply * record it here for future use */ peer->nptr->compat.ptl = &pmix_ptl_tcp_module; - /* select their bfrops compat module */ - peer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(pnd->bfrops); - if (NULL == peer->nptr->compat.bfrops) { - PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); - PMIX_RELEASE(nptr); // will release the info object - CLOSE_THE_SOCKET(pnd->sd); - goto done; - } - /* set the buffer type */ - peer->nptr->compat.type = pnd->buffer_type; /* set the gds */ PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, pnd->gds, PMIX_STRING); peer->nptr->compat.gds = pmix_gds_base_assign_module(&ginfo, 1); PMIX_INFO_DESTRUCT(&ginfo); if (NULL == peer->nptr->compat.gds) { PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1643,7 +1900,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) req = PMIX_NEW(pmix_iof_req_t); if (NULL == req) { PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1658,13 +1915,28 @@ static void process_cbfunc(int sd, short args, void *cbdata) /* validate the connection */ cred.bytes = pnd->cred; cred.size = pnd->len; - PMIX_PSEC_VALIDATE_CONNECTION(rc, peer, NULL, 0, NULL, NULL, &cred); - if (PMIX_SUCCESS != rc) { + PMIX_PSEC_VALIDATE_CONNECTION(reply, peer, NULL, 0, NULL, NULL, &cred); + /* communicate the result to the other side */ + u32 = htonl(reply); + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(peer); + pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + PMIX_RELEASE(nptr); // will release the info object + CLOSE_THE_SOCKET(pnd->sd); + goto done; + } + + /* If needed perform the handshake. The macro will update reply */ + PMIX_PSEC_SERVER_HANDSHAKE_IFNEED(reply, peer, NULL, 0, NULL, NULL, &cred); + + /* If verification wasn't successful - stop here */ + if (PMIX_SUCCESS != reply) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "validation of tool credentials failed: %s", PMIx_Error_string(rc)); PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1677,12 +1949,12 @@ static void process_cbfunc(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); PMIX_RELEASE(cd); PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object /* probably cannot send an error reply if we are out of memory */ return; } - info->peerid = peer->index; + peer->info->peerid = peer->index; /* start the events for this tool */ pmix_event_assign(&peer->recv_event, pmix_globals.evbase, peer->sd, @@ -1708,8 +1980,8 @@ static void cnct_cbfunc(pmix_status_t status, pmix_setup_caddy_t *cd; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "pmix:tcp:cnct_cbfunc returning %s:%d", - proc->nspace, proc->rank); + "pmix:tcp:cnct_cbfunc returning %s:%d %s", + proc->nspace, proc->rank, PMIx_Error_string(status)); /* need to thread-shift this into our context */ cd = PMIX_NEW(pmix_setup_caddy_t); @@ -1718,7 +1990,8 @@ static void cnct_cbfunc(pmix_status_t status, return; } cd->status = status; - (void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); + cd->proc.rank = proc->rank; cd->cbdata = cbdata; PMIX_THREADSHIFT(cd, process_cbfunc); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/Makefile.am index e6606e2e844..2c91ac37c8d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_ptl_usock_la_SOURCES = $(component_sources) mca_ptl_usock_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_ptl_usock_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_ptl_usock_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock.c index 4d5afaa94d3..51417f3e032 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -146,7 +146,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); } if (NULL == pmix_client_globals.myserver->nptr) { - pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t); } if (NULL == pmix_client_globals.myserver->nptr->nspace) { pmix_client_globals.myserver->nptr->nspace = strdup(uri[0]); @@ -393,7 +393,7 @@ static pmix_status_t recv_connect_ack(int sd) /* get the current timeout value so we can reset to it */ sz = sizeof(save); if (0 != getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (void*)&save, &sz)) { - if (ENOPROTOOPT == errno) { + if (ENOPROTOOPT == errno || EOPNOTSUPP == errno) { sockopt = false; } else { return PMIX_ERR_UNREACH; @@ -611,7 +611,7 @@ void pmix_usock_send_handler(int sd, short flags, void *cbdata) return; } else { // report the error - event_del(&peer->send_event); + pmix_event_del(&peer->send_event); peer->send_ev_active = false; PMIX_RELEASE(msg); peer->send_msg = NULL; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c index b09e147ace0..36637cc9882 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -12,9 +12,11 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -149,6 +151,10 @@ pmix_status_t component_close(void) static int component_query(pmix_mca_base_module_t **module, int *priority) { + if (PMIX_PROC_IS_TOOL(pmix_globals.mypeer)) { + return PMIX_ERR_NOT_SUPPORTED; + } + *module = (pmix_mca_base_module_t*)&pmix_ptl_usock_module; return PMIX_SUCCESS; } @@ -338,10 +344,10 @@ static void connection_handler(int sd, short args, void *cbdata) { pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cbdata; char *msg, *ptr, *nspace, *version, *sec, *bfrops, *gds; - pmix_status_t rc; + pmix_status_t rc, reply; unsigned int rank; pmix_usock_hdr_t hdr; - pmix_nspace_t *nptr, *tmp; + pmix_namespace_t *nptr, *tmp; pmix_rank_info_t *info; pmix_peer_t *psave = NULL; bool found; @@ -353,6 +359,7 @@ static void connection_handler(int sd, short args, void *cbdata) unsigned int msglen; pmix_info_t ginfo; pmix_byte_object_t cred; + uint32_t u32; /* acquire the object */ PMIX_ACQUIRE_OBJECT(pnd); @@ -481,6 +488,10 @@ static void connection_handler(int sd, short args, void *cbdata) cred.bytes = ptr; ptr += cred.size; len -= cred.size; + } else { + /* set cred pointer to NULL to guard against validation + * methods that assume a zero length credential is NULL */ + cred.bytes = NULL; } } @@ -541,7 +552,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* see if we know this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; @@ -682,12 +693,34 @@ static void connection_handler(int sd, short args, void *cbdata) * record it here for future use */ nptr->compat.ptl = &pmix_ptl_usock_module; - /* validate the connection - the macro will send the status result to the client */ - PMIX_PSEC_VALIDATE_CONNECTION(rc, psave, NULL, 0, NULL, 0, &cred); /* now done with the msg */ free(msg); - if (PMIX_SUCCESS != rc) { + /* validate the connection - the macro will send the status result to the client */ + PMIX_PSEC_VALIDATE_CONNECTION(reply, psave, NULL, 0, NULL, 0, &cred); + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "client connection validated with status=%d", reply); + + /* Communicate the result of validation to the client */ + u32 = htonl(reply); + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { + PMIX_ERROR_LOG(rc); + info->proc_cnt--; + PMIX_RELEASE(info); + pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL); + PMIX_RELEASE(psave); + /* error reply was sent by the above macro */ + CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd); + return; + } + + /* If needed perform the handshake. The macro will update reply */ + PMIX_PSEC_SERVER_HANDSHAKE_IFNEED(reply, psave, NULL, 0, NULL, 0, &cred); + + /* It is possible that connection validation failed + * We need to reply to the client first and cleanup after */ + if (PMIX_SUCCESS != reply) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "validation of client credentials failed: %s", PMIx_Error_string(rc)); @@ -701,6 +734,8 @@ static void connection_handler(int sd, short args, void *cbdata) return; } + + /* send the client's array index */ if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&psave->index, sizeof(int)))) { PMIX_ERROR_LOG(rc); @@ -718,10 +753,10 @@ static void connection_handler(int sd, short args, void *cbdata) /* let the host server know that this client has connected */ if (NULL != pmix_host_server.client_connected) { - (void)strncpy(proc.nspace, psave->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, psave->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = psave->info->pname.rank; rc = pmix_host_server.client_connected(&proc, psave->info->server_object, NULL, NULL); - if (PMIX_SUCCESS != rc) { + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { PMIX_ERROR_LOG(rc); info->proc_cnt--; PMIX_RELEASE(info); diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c index bdfe4ebc416..c083ad645f3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c @@ -12,9 +12,9 @@ * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2016-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -52,6 +52,9 @@ extern bool pmix_init_called; void pmix_rte_finalize(void) { + int i; + pmix_notify_caddy_t *cd; + if( --pmix_initialized != 0 ) { if( pmix_initialized < 0 ) { fprintf(stderr, "PMIx Finalize called too many times\n"); @@ -104,14 +107,17 @@ void pmix_rte_finalize(void) PMIX_RELEASE(pmix_globals.mypeer); PMIX_DESTRUCT(&pmix_globals.events); PMIX_LIST_DESTRUCT(&pmix_globals.cached_events); - { - pmix_notify_caddy_t *cd; - while (NULL != (cd=(pmix_notify_caddy_t *)pmix_ring_buffer_pop(&pmix_globals.notifications))) { + /* clear any notifications */ + for (i=0; i < pmix_globals.max_events; i++) { + pmix_hotel_checkout_and_return_occupant(&pmix_globals.notifications, i, (void**)&cd); + if (NULL != cd) { PMIX_RELEASE(cd); } } PMIX_DESTRUCT(&pmix_globals.notifications); PMIX_LIST_DESTRUCT(&pmix_globals.iof_requests); + free(pmix_globals.hostname); + PMIX_LIST_DESTRUCT(&pmix_globals.nspaces); /* now safe to release the event base */ if (!pmix_globals.external_evbase) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c index 7a9fd4d872b..b3255e4e5da 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -33,7 +33,9 @@ #include #endif #include PMIX_EVENT_HEADER -#include "event2/thread.h" +#if ! PMIX_HAVE_LIBEV +#include PMIX_EVENT2_THREAD_HEADER +#endif #include @@ -70,6 +72,8 @@ PMIX_EXPORT bool pmix_init_called = false; PMIX_EXPORT pmix_globals_t pmix_globals = { .init_cntr = 0, .mypeer = NULL, + .hostname = NULL, + .nodeid = UINT32_MAX, .pindex = 0, .evbase = NULL, .external_evbase = false, @@ -80,6 +84,15 @@ PMIX_EXPORT pmix_globals_t pmix_globals = { }; +static void _notification_eviction_cbfunc(struct pmix_hotel_t *hotel, + int room_num, + void *occupant) +{ + pmix_notify_caddy_t *cache = (pmix_notify_caddy_t*)occupant; + PMIX_RELEASE(cache); +} + + int pmix_rte_init(pmix_proc_type_t type, pmix_info_t info[], size_t ninfo, pmix_ptl_cbfunc_t cbfunc) @@ -87,6 +100,7 @@ int pmix_rte_init(pmix_proc_type_t type, int ret, debug_level; char *error = NULL, *evar; size_t n; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; if( ++pmix_initialized != 1 ) { if( pmix_initialized < 1 ) { @@ -147,6 +161,8 @@ int pmix_rte_init(pmix_proc_type_t type, } /* setup the globals structure */ + gethostname(hostname, PMIX_MAXHOSTNAMELEN-1); + pmix_globals.hostname = strdup(hostname); memset(&pmix_globals.myid.nspace, 0, PMIX_MAX_NSLEN+1); pmix_globals.myid.rank = PMIX_RANK_INVALID; PMIX_CONSTRUCT(&pmix_globals.events, pmix_events_t); @@ -154,8 +170,17 @@ int pmix_rte_init(pmix_proc_type_t type, pmix_globals.event_window.tv_usec = 0; PMIX_CONSTRUCT(&pmix_globals.cached_events, pmix_list_t); /* construct the global notification ring buffer */ - PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_ring_buffer_t); - pmix_ring_buffer_init(&pmix_globals.notifications, 256); + PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_hotel_t); + ret = pmix_hotel_init(&pmix_globals.notifications, pmix_globals.max_events, + pmix_globals.evbase, pmix_globals.event_eviction_time, + _notification_eviction_cbfunc); + PMIX_CONSTRUCT(&pmix_globals.nspaces, pmix_list_t); + + if (PMIX_SUCCESS != ret) { + error = "notification hotel init"; + goto return_error; + } + /* and setup the iof request tracking list */ PMIX_CONSTRUCT(&pmix_globals.iof_requests, pmix_list_t); @@ -223,7 +248,7 @@ int pmix_rte_init(pmix_proc_type_t type, pmix_globals.mypeer->proc_type = type | PMIX_PROC_V3; /* create an nspace object for ourselves - we will * fill in the nspace name later */ - pmix_globals.mypeer->nptr = PMIX_NEW(pmix_nspace_t); + pmix_globals.mypeer->nptr = PMIX_NEW(pmix_namespace_t); if (NULL == pmix_globals.mypeer->nptr) { PMIX_RELEASE(pmix_globals.mypeer); ret = PMIX_ERR_NOMEM; @@ -233,9 +258,19 @@ int pmix_rte_init(pmix_proc_type_t type, /* scan incoming info for directives */ if (NULL != info) { for (n=0; n < ninfo; n++) { - if (0 == strcmp(PMIX_EVENT_BASE, info[n].key)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_BASE)) { pmix_globals.evbase = (pmix_event_base_t*)info[n].value.data.ptr; pmix_globals.external_evbase = true; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_HOSTNAME)) { + if (NULL != pmix_globals.hostname) { + free(pmix_globals.hostname); + } + pmix_globals.hostname = strdup(info[n].value.data.string); + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NODEID)) { + PMIX_VALUE_GET_NUMBER(ret, &info[n].value, pmix_globals.nodeid, uint32_t); + if (PMIX_SUCCESS != ret) { + goto return_error; + } } } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_params.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_params.c index 4524c216a94..8d49e8bdaad 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_params.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_params.c @@ -19,9 +19,9 @@ * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -242,6 +242,30 @@ pmix_status_t pmix_register_params(void) PMIX_INFO_LVL_9, PMIX_MCA_BASE_VAR_SCOPE_READONLY, &pmix_globals.timestamp_output); + /* max size of the notification hotel */ + pmix_globals.max_events = 512; + (void) pmix_mca_base_var_register ("pmix", "pmix", "max", "events", + "Maximum number of event notifications to cache", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_globals.max_events); + + /* how long to cache an event */ + pmix_globals.event_eviction_time = 120; + (void) pmix_mca_base_var_register ("pmix", "pmix", "event", "eviction_time", + "Maximum number of seconds to cache an event", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_globals.event_eviction_time); + + /* max number of IOF messages to cache */ + pmix_server_globals.max_iof_cache = 1024 * 1024; + (void) pmix_mca_base_var_register ("pmix", "pmix", "max", "iof_cache", + "Maximum number of IOF messages to cache", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_server_globals.max_iof_cache); + return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c index df0af87c280..7e40422a0bd 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c @@ -1,8 +1,10 @@ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,7 +13,6 @@ */ #include -#include "src/include/types.h" #ifdef HAVE_UNISTD_H #include @@ -47,6 +48,12 @@ typedef struct { bool engine_constructed; pmix_thread_t engine; +#if PMIX_HAVE_LIBEV + ev_async async; + pthread_mutex_t mutex; + pthread_cond_t cond; + pmix_list_t list; +#endif } pmix_progress_tracker_t; static void tracker_constructor(pmix_progress_tracker_t *p) @@ -56,6 +63,10 @@ static void tracker_constructor(pmix_progress_tracker_t *p) p->ev_base = NULL; p->ev_active = false; p->engine_constructed = false; +#if PMIX_HAVE_LIBEV + pthread_mutex_init(&p->mutex, NULL); + PMIX_CONSTRUCT(&p->list, pmix_list_t); +#endif } static void tracker_destructor(pmix_progress_tracker_t *p) @@ -71,6 +82,10 @@ static void tracker_destructor(pmix_progress_tracker_t *p) if (p->engine_constructed) { PMIX_DESTRUCT(&p->engine); } +#if PMIX_HAVE_LIBEV + pthread_mutex_destroy(&p->mutex); + PMIX_LIST_DESTRUCT(&p->list); +#endif } static PMIX_CLASS_INSTANCE(pmix_progress_tracker_t, @@ -78,6 +93,114 @@ static PMIX_CLASS_INSTANCE(pmix_progress_tracker_t, tracker_constructor, tracker_destructor); +#if PMIX_HAVE_LIBEV + +typedef enum { + PMIX_EVENT_ACTIVE, + PMIX_EVENT_ADD, + PMIX_EVENT_DEL +} pmix_event_type_t; + +typedef struct { + pmix_list_item_t super; + struct event *ev; + struct timeval *tv; + int res; + short ncalls; + pmix_event_type_t type; +} pmix_event_caddy_t; + +static PMIX_CLASS_INSTANCE(pmix_event_caddy_t, + pmix_list_item_t, + NULL, NULL); + +static pmix_progress_tracker_t* pmix_progress_tracker_get_by_base(struct event_base *); + +static void pmix_libev_ev_async_cb (EV_P_ ev_async *w, int revents) +{ + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base((struct event_base *)EV_A); + assert(NULL != trk); + pthread_mutex_lock (&trk->mutex); + pmix_event_caddy_t *cd, *next; + PMIX_LIST_FOREACH_SAFE(cd, next, &trk->list, pmix_event_caddy_t) { + switch (cd->type) { + case PMIX_EVENT_ADD: + (void)event_add(cd->ev, cd->tv); + break; + case PMIX_EVENT_DEL: + (void)event_del(cd->ev); + break; + case PMIX_EVENT_ACTIVE: + (void)event_active(cd->ev, cd->res, cd->ncalls); + break; + } + pmix_list_remove_item(&trk->list, &cd->super); + PMIX_RELEASE(cd); + } + pthread_mutex_unlock (&trk->mutex); +} + +int pmix_event_add(struct event *ev, struct timeval *tv) { + int res; + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base(ev->ev_base); + if ((NULL != trk) && !pthread_equal(pthread_self(), trk->engine.t_handle)) { + pmix_event_caddy_t *cd = PMIX_NEW(pmix_event_caddy_t); + cd->type = PMIX_EVENT_ADD; + cd->ev = ev; + cd->tv = tv; + pthread_mutex_lock(&trk->mutex); + pmix_list_append(&trk->list, &cd->super); + ev_async_send ((struct ev_loop *)trk->ev_base, &trk->async); + pthread_mutex_unlock(&trk->mutex); + res = PMIX_SUCCESS; + } else { + res = event_add(ev, tv); + } + return res; +} + +int pmix_event_del(struct event *ev) { + int res; + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base(ev->ev_base); + if ((NULL != trk) && !pthread_equal(pthread_self(), trk->engine.t_handle)) { + pmix_event_caddy_t *cd = PMIX_NEW(pmix_event_caddy_t); + cd->type = PMIX_EVENT_DEL; + cd->ev = ev; + pthread_mutex_lock(&trk->mutex); + pmix_list_append(&trk->list, &cd->super); + ev_async_send ((struct ev_loop *)trk->ev_base, &trk->async); + pthread_mutex_unlock(&trk->mutex); + res = PMIX_SUCCESS; + } else { + res = event_del(ev); + } + return res; +} + +void pmix_event_active (struct event *ev, int res, short ncalls) { + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base(ev->ev_base); + if ((NULL != trk) && !pthread_equal(pthread_self(), trk->engine.t_handle)) { + pmix_event_caddy_t *cd = PMIX_NEW(pmix_event_caddy_t); + cd->type = PMIX_EVENT_ACTIVE; + cd->ev = ev; + cd->res = res; + cd->ncalls = ncalls; + pthread_mutex_lock(&trk->mutex); + pmix_list_append(&trk->list, &cd->super); + ev_async_send ((struct ev_loop *)trk->ev_base, &trk->async); + pthread_mutex_unlock(&trk->mutex); + } else { + event_active(ev, res, ncalls); + } +} + +void pmix_event_base_loopexit (pmix_event_base_t *ev_base) { + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base(ev_base); + assert(NULL != trk); + ev_async_send ((struct ev_loop *)trk->ev_base, &trk->async); +} +#endif + static bool inited = false; static pmix_list_t tracking; static struct timeval long_timeout = { @@ -116,10 +239,9 @@ static void stop_progress_engine(pmix_progress_tracker_t *trk) { assert(trk->ev_active); trk->ev_active = false; - /* break the event loop - this will cause the loop to exit upon completion of any current event */ - pmix_event_base_loopbreak(trk->ev_base); + pmix_event_base_loopexit(trk->ev_base); pmix_thread_join(&trk->engine, NULL); } @@ -190,6 +312,11 @@ pmix_event_base_t *pmix_progress_thread_init(const char *name) dummy_timeout_cb, trk); pmix_event_add(&trk->block, &long_timeout); +#if PMIX_HAVE_LIBEV + ev_async_init (&trk->async, pmix_libev_ev_async_cb); + ev_async_start((struct ev_loop *)trk->ev_base, &trk->async); +#endif + /* construct the thread object */ PMIX_CONSTRUCT(&trk->engine, pmix_thread_t); trk->engine_constructed = true; @@ -300,6 +427,21 @@ int pmix_progress_thread_pause(const char *name) return PMIX_ERR_NOT_FOUND; } +#if PMIX_HAVE_LIBEV +static pmix_progress_tracker_t* pmix_progress_tracker_get_by_base(pmix_event_base_t *base) { + pmix_progress_tracker_t *trk; + + if (inited) { + PMIX_LIST_FOREACH(trk, &tracking, pmix_progress_tracker_t) { + if(trk->ev_base == base) { + return trk; + } + } + } + return NULL; +} +#endif + int pmix_progress_thread_resume(const char *name) { pmix_progress_tracker_t *trk; diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c index 29046877758..2ea33a056c1 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c @@ -1,13 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -49,7 +50,9 @@ #include #include #include PMIX_EVENT_HEADER +#if ! PMIX_HAVE_LIBEV #include PMIX_EVENT2_THREAD_HEADER +#endif #include "src/util/argv.h" #include "src/util/error.h" @@ -66,6 +69,7 @@ #include "src/mca/bfrops/base/base.h" #include "src/mca/gds/base/base.h" #include "src/mca/preg/preg.h" +#include "src/mca/psensor/base/base.h" #include "src/mca/ptl/base/base.h" #include "src/hwloc/hwloc-internal.h" @@ -85,18 +89,8 @@ static char *gds_mode = NULL; static pid_t mypid; // local functions for connection support -static void iof_eviction_cbfunc(struct pmix_hotel_t *hotel, - int room_num, - void *occupant) -{ - pmix_setup_caddy_t *cache = (pmix_setup_caddy_t*)occupant; - PMIX_RELEASE(cache); -} - pmix_status_t pmix_server_initialize(void) { - pmix_status_t rc; - /* setup the server-specific globals */ PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t); pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1); @@ -105,16 +99,7 @@ pmix_status_t pmix_server_initialize(void) PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.events, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); - PMIX_CONSTRUCT(&pmix_server_globals.nspaces, pmix_list_t); - PMIX_CONSTRUCT(&pmix_server_globals.iof, pmix_hotel_t); - rc = pmix_hotel_init(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE, - pmix_globals.evbase, PMIX_IOF_MAX_STAY, - iof_eviction_cbfunc); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } + PMIX_CONSTRUCT(&pmix_server_globals.iof, pmix_list_t); pmix_output_verbose(2, pmix_server_globals.base_output, "pmix:server init called"); @@ -173,6 +158,8 @@ pmix_status_t pmix_server_initialize(void) return PMIX_SUCCESS; } +static pmix_server_module_t myhostserver = {0}; + PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, pmix_info_t info[], size_t ninfo) { @@ -181,7 +168,6 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, size_t n, m; pmix_kval_t *kv; bool protect, nspace_given = false, rank_given = false; - bool topology_req = false; pmix_info_t ginfo; char *protected[] = { PMIX_USERID, @@ -202,7 +188,11 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, "pmix:server init called"); /* setup the function pointers */ - pmix_host_server = *module; + if (NULL == module) { + pmix_host_server = myhostserver; + } else { + pmix_host_server = *module; + } if (NULL != info) { for (n=0; n < ninfo; n++) { @@ -212,6 +202,8 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, } } else if (0 == strncmp(info[n].key, PMIX_SERVER_TMPDIR, PMIX_MAX_KEYLEN)) { pmix_server_globals.tmpdir = strdup(info[n].value.data.string); + } else if (0 == strncmp(info[n].key, PMIX_SYSTEM_TMPDIR, PMIX_MAX_KEYLEN)) { + pmix_server_globals.system_tmpdir = strdup(info[n].value.data.string); } } } @@ -222,6 +214,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, pmix_server_globals.tmpdir = strdup(evar); } } + if (NULL == pmix_server_globals.system_tmpdir) { + if (NULL == (evar = getenv("PMIX_SYSTEM_TMPDIR"))) { + pmix_server_globals.system_tmpdir = strdup(pmix_tmp_directory()); + } else { + pmix_server_globals.system_tmpdir = strdup(evar); + } + } /* setup the runtime - this init's the globals, * opens and initializes the required frameworks */ @@ -298,17 +297,11 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, if (NULL != info) { for (n=0; n < ninfo; n++) { if (0 == strncmp(info[n].key, PMIX_SERVER_NSPACE, PMIX_MAX_KEYLEN)) { - (void)strncpy(pmix_globals.myid.nspace, info[n].value.data.string, PMIX_MAX_NSLEN); + pmix_strncpy(pmix_globals.myid.nspace, info[n].value.data.string, PMIX_MAX_NSLEN); nspace_given = true; } else if (0 == strncmp(info[n].key, PMIX_SERVER_RANK, PMIX_MAX_KEYLEN)) { pmix_globals.myid.rank = info[n].value.data.rank; rank_given = true; - } else if (0 == strncmp(info[n].key, PMIX_TOPOLOGY, PMIX_MAX_KEYLEN) || - 0 == strncmp(info[n].key, PMIX_TOPOLOGY_XML, PMIX_MAX_KEYLEN) || - 0 == strncmp(info[n].key, PMIX_TOPOLOGY_FILE, PMIX_MAX_KEYLEN) || - 0 == strncmp(info[n].key, PMIX_HWLOC_XML_V1, PMIX_MAX_KEYLEN) || - 0 == strncmp(info[n].key, PMIX_HWLOC_XML_V2, PMIX_MAX_KEYLEN)) { - topology_req = true; } else { /* check the list of protected keys */ protect = false; @@ -342,9 +335,9 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, /* look for our namespace, if one was given */ if (NULL == (evar = getenv("PMIX_SERVER_NAMESPACE"))) { /* use a fake namespace */ - (void)strncpy(pmix_globals.myid.nspace, "pmix-server", PMIX_MAX_NSLEN); + pmix_strncpy(pmix_globals.myid.nspace, "pmix-server", PMIX_MAX_NSLEN); } else { - (void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN); + pmix_strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN); } } if (!rank_given) { @@ -366,10 +359,10 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, rinfo = pmix_globals.mypeer->info; } if (NULL == pmix_globals.mypeer->nptr) { - pmix_globals.mypeer->nptr = PMIX_NEW(pmix_nspace_t); + pmix_globals.mypeer->nptr = PMIX_NEW(pmix_namespace_t); /* ensure our own nspace is first on the list */ PMIX_RETAIN(pmix_globals.mypeer->nptr); - pmix_list_prepend(&pmix_server_globals.nspaces, &pmix_globals.mypeer->nptr->super); + pmix_list_prepend(&pmix_globals.nspaces, &pmix_globals.mypeer->nptr->super); } pmix_globals.mypeer->nptr->nspace = strdup(pmix_globals.myid.nspace); rinfo->pname.nspace = strdup(pmix_globals.mypeer->nptr->nspace); @@ -390,11 +383,19 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, } /* if requested, setup the topology */ - if (topology_req) { - if (PMIX_SUCCESS != (rc = pmix_hwloc_get_topology(info, ninfo))) { - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } + if (PMIX_SUCCESS != (rc = pmix_hwloc_get_topology(info, ninfo))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } + + /* open the psensor framework */ + if (PMIX_SUCCESS != (rc = pmix_mca_base_framework_open(&pmix_psensor_base_framework, 0))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } + if (PMIX_SUCCESS != (rc = pmix_psensor_base_select())) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; } /* setup the wildcard recv for inbound messages from clients */ @@ -431,8 +432,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) { int i; pmix_peer_t *peer; - pmix_nspace_t *ns; - pmix_setup_caddy_t *cd; + pmix_namespace_t *ns; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { @@ -460,14 +460,6 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) pmix_ptl_base_stop_listening(); - /* cleanout any IOF */ - for (i=0; i < PMIX_IOF_HOTEL_SIZE; i++) { - pmix_hotel_checkout_and_return_occupant(&pmix_server_globals.iof, i, (void**)&cd); - if (NULL != cd) { - PMIX_RELEASE(cd); - } - } - PMIX_DESTRUCT(&pmix_server_globals.iof); for (i=0; i < pmix_server_globals.clients.size; i++) { if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) { /* ensure that we do the specified cleanup - if this is an @@ -483,13 +475,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); PMIX_LIST_DESTRUCT(&pmix_server_globals.gdata); PMIX_LIST_DESTRUCT(&pmix_server_globals.events); - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { /* ensure that we do the specified cleanup - if this is an * abnormal termination, then the nspace object may not be * at zero refcount */ pmix_execute_epilog(&ns->epilog); } - PMIX_LIST_DESTRUCT(&pmix_server_globals.nspaces); + PMIX_LIST_DESTRUCT(&pmix_server_globals.iof); pmix_hwloc_cleanup(); @@ -511,6 +503,8 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) if (NULL != pmix_server_globals.tmpdir) { free(pmix_server_globals.tmpdir); } + /* close the psensor framework */ + (void)pmix_mca_base_framework_close(&pmix_psensor_base_framework); /* close the pnet framework */ (void)pmix_mca_base_framework_close(&pmix_pnet_base_framework); @@ -532,10 +526,17 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) return PMIX_SUCCESS; } +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_lock_t *lock = (pmix_lock_t*)cbdata; + lock->status = status; + PMIX_WAKEUP_THREAD(lock); +} + static void _register_nspace(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; - pmix_nspace_t *nptr, *tmp; + pmix_namespace_t *nptr, *tmp; pmix_status_t rc; size_t i; @@ -546,20 +547,20 @@ static void _register_nspace(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { nptr = tmp; break; } } if (NULL == nptr) { - nptr = PMIX_NEW(pmix_nspace_t); + nptr = PMIX_NEW(pmix_namespace_t); if (NULL == nptr) { rc = PMIX_ERR_NOMEM; goto release; } nptr->nspace = strdup(cd->proc.nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } nptr->nlocalprocs = cd->nlocalprocs; @@ -591,18 +592,18 @@ static void _register_nspace(int sd, short args, void *cbdata) cd->info, cd->ninfo); release: - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } + cd->opcbfunc(rc, cd->cbdata); PMIX_RELEASE(cd); } /* setup the data for a job */ -PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int nlocalprocs, +PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const pmix_nspace_t nspace, int nlocalprocs, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_setup_caddy_t *cd; + pmix_status_t rc; + pmix_lock_t mylock; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { @@ -612,7 +613,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n PMIX_RELEASE_THREAD(&pmix_global_lock); cd = PMIX_NEW(pmix_setup_caddy_t); - (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); + pmix_strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); cd->nlocalprocs = nlocalprocs; cd->opcbfunc = cbfunc; cd->cbdata = cbdata; @@ -622,16 +623,128 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n cd->info = info; } + /* if the provided callback is NULL, then substitute + * our own internal cbfunc and block here */ + if (NULL == cbfunc) { + PMIX_CONSTRUCT_LOCK(&mylock); + cd->opcbfunc = opcbfunc; + cd->cbdata = &mylock; + PMIX_THREADSHIFT(cd, _register_nspace); + PMIX_WAIT_THREAD(&mylock); + rc = mylock.status; + PMIX_DESTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS == rc) { + rc = PMIX_OPERATION_SUCCEEDED; + } + return rc; + } + /* we have to push this into our event library to avoid * potential threading issues */ PMIX_THREADSHIFT(cd, _register_nspace); return PMIX_SUCCESS; } +void pmix_server_purge_events(pmix_peer_t *peer, + pmix_proc_t *proc) +{ + pmix_regevents_info_t *reginfo, *regnext; + pmix_peer_events_info_t *prev, *pnext; + pmix_iof_req_t *req, *nxt; + int i; + pmix_notify_caddy_t *ncd; + size_t n, m, p, ntgs; + pmix_proc_t *tgs, *tgt; + pmix_dmdx_local_t *dlcd, *dnxt; + + /* since the client is finalizing, remove them from any event + * registrations they may still have on our list */ + PMIX_LIST_FOREACH_SAFE(reginfo, regnext, &pmix_server_globals.events, pmix_regevents_info_t) { + PMIX_LIST_FOREACH_SAFE(prev, pnext, ®info->peers, pmix_peer_events_info_t) { + if ((NULL != peer && prev->peer == peer) || + (NULL != proc && PMIX_CHECK_PROCID(proc, &prev->peer->info->pname))) { + pmix_list_remove_item(®info->peers, &prev->super); + PMIX_RELEASE(prev); + if (0 == pmix_list_get_size(®info->peers)) { + pmix_list_remove_item(&pmix_server_globals.events, ®info->super); + PMIX_RELEASE(reginfo); + break; + } + } + } + } + + /* since the client is finalizing, remove them from any IOF + * registrations they may still have on our list */ + PMIX_LIST_FOREACH_SAFE(req, nxt, &pmix_globals.iof_requests, pmix_iof_req_t) { + if ((NULL != peer && PMIX_CHECK_PROCID(&req->peer->info->pname, &peer->info->pname)) || + (NULL != proc && PMIX_CHECK_PROCID(&req->peer->info->pname, proc))) { + pmix_list_remove_item(&pmix_globals.iof_requests, &req->super); + PMIX_RELEASE(req); + } + } + + /* see if this proc is involved in any direct modex requests */ + PMIX_LIST_FOREACH_SAFE(dlcd, dnxt, &pmix_server_globals.local_reqs, pmix_dmdx_local_t) { + if ((NULL != peer && PMIX_CHECK_PROCID(&peer->info->pname, &dlcd->proc)) || + (NULL != proc && PMIX_CHECK_PROCID(proc, &dlcd->proc))) { + /* cleanup this request */ + pmix_list_remove_item(&pmix_server_globals.local_reqs, &dlcd->super); + /* we can release the dlcd item here because we are not + * releasing the tracker held by the host - we are only + * releasing one item on that tracker */ + PMIX_RELEASE(dlcd); + } + } + + /* purge this client from any cached notifications */ + for (i=0; i < pmix_globals.max_events; i++) { + pmix_hotel_knock(&pmix_globals.notifications, i, (void**)&ncd); + if (NULL != ncd && NULL != ncd->targets && 0 < ncd->ntargets) { + tgt = NULL; + for (n=0; n < ncd->ntargets; n++) { + if ((NULL != peer && PMIX_CHECK_PROCID(&peer->info->pname, &ncd->targets[n])) || + (NULL != proc && PMIX_CHECK_PROCID(proc, &ncd->targets[n]))) { + tgt = &ncd->targets[n]; + break; + } + } + if (NULL != tgt) { + /* if this client was the only target, then just + * evict the notification */ + if (1 == ncd->ntargets) { + pmix_hotel_checkout(&pmix_globals.notifications, i); + PMIX_RELEASE(ncd); + } else if (PMIX_RANK_WILDCARD == tgt->rank && + NULL != proc && PMIX_RANK_WILDCARD == proc->rank) { + /* we have to remove this target, but leave the rest */ + ntgs = ncd->ntargets - 1; + PMIX_PROC_CREATE(tgs, ntgs); + p=0; + for (m=0; m < ncd->ntargets; m++) { + if (tgt != &ncd->targets[m]) { + memcpy(&tgs[p], &ncd->targets[n], sizeof(pmix_proc_t)); + ++p; + } + } + PMIX_PROC_FREE(ncd->targets, ncd->ntargets); + ncd->targets = tgs; + ncd->ntargets = ntgs; + } + } + } + } + + if (NULL != peer) { + /* ensure we honor any peer-level epilog requests */ + pmix_execute_epilog(&peer->epilog); + } +} + static void _deregister_nspace(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; - pmix_nspace_t *tmp; + pmix_namespace_t *tmp; pmix_status_t rc; PMIX_ACQUIRE_OBJECT(cd); @@ -640,33 +753,39 @@ static void _deregister_nspace(int sd, short args, void *cbdata) "pmix:server _deregister_nspace %s", cd->proc.nspace); - /* release any job-level messaging resources */ + /* release any job-level network resources */ pmix_pnet.deregister_nspace(cd->proc.nspace); /* let our local storage clean up */ PMIX_GDS_DEL_NSPACE(rc, cd->proc.nspace); + /* remove any event registrations, IOF registrations, and + * cached notifications targeting procs from this nspace */ + pmix_server_purge_events(NULL, &cd->proc); + /* release this nspace */ - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { - pmix_list_remove_item(&pmix_server_globals.nspaces, &tmp->super); + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { + if (PMIX_CHECK_NSPACE(tmp->nspace, cd->proc.nspace)) { + /* perform any nspace-level epilog */ + pmix_execute_epilog(&tmp->epilog); + /* remove and release it */ + pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super); PMIX_RELEASE(tmp); break; } } /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } + cd->opcbfunc(rc, cd->cbdata); PMIX_RELEASE(cd); } -PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[], +PMIX_EXPORT void PMIx_server_deregister_nspace(const pmix_nspace_t nspace, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_setup_caddy_t *cd; + pmix_lock_t mylock; pmix_output_verbose(2, pmix_server_globals.base_output, "pmix:server deregister nspace %s", @@ -682,11 +801,23 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[], } PMIX_RELEASE_THREAD(&pmix_global_lock); - cd = PMIX_NEW(pmix_setup_caddy_t); - (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); + cd = PMIX_NEW(pmix_setup_caddy_t); + PMIX_LOAD_PROCID(&cd->proc, nspace, PMIX_RANK_WILDCARD); cd->opcbfunc = cbfunc; cd->cbdata = cbdata; + /* if the provided callback is NULL, then substitute + * our own internal cbfunc and block here */ + if (NULL == cbfunc) { + PMIX_CONSTRUCT_LOCK(&mylock); + cd->opcbfunc = opcbfunc; + cd->cbdata = &mylock; + PMIX_THREADSHIFT(cd, _deregister_nspace); + PMIX_WAIT_THREAD(&mylock); + PMIX_DESTRUCT_LOCK(&mylock); + return; + } + /* we have to push this into our event library to avoid * potential threading issues */ PMIX_THREADSHIFT(cd, _deregister_nspace); @@ -773,7 +904,7 @@ void pmix_server_execute_collective(int sd, short args, void *cbdata) } if (trk->hybrid || first) { /* setup the nspace */ - (void)strncpy(proc.nspace, cd->peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, cd->peer->info->pname.nspace, PMIX_MAX_NSLEN); first = false; } proc.rank = cd->peer->info->pname.rank; @@ -850,7 +981,7 @@ static void _register_client(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_rank_info_t *info, *iptr; - pmix_nspace_t *nptr, *ns; + pmix_namespace_t *nptr, *ns; pmix_server_trkr_t *trk; pmix_trkr_caddy_t *tcd; bool all_def; @@ -860,25 +991,26 @@ static void _register_client(int sd, short args, void *cbdata) PMIX_ACQUIRE_OBJECT(cd); pmix_output_verbose(2, pmix_server_globals.base_output, - "pmix:server _register_client for nspace %s rank %d", - cd->proc.nspace, cd->proc.rank); + "pmix:server _register_client for nspace %s rank %d %s object", + cd->proc.nspace, cd->proc.rank, + (NULL == cd->server_object) ? "NULL" : "NON-NULL"); /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, cd->proc.nspace)) { nptr = ns; break; } } if (NULL == nptr) { - nptr = PMIX_NEW(pmix_nspace_t); + nptr = PMIX_NEW(pmix_namespace_t); if (NULL == nptr) { rc = PMIX_ERR_NOMEM; goto cleanup; } nptr->nspace = strdup(cd->proc.nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } /* setup a peer object for this client - since the host server * only deals with the original processes and not any clones, @@ -919,7 +1051,7 @@ static void _register_client(int sd, short args, void *cbdata) * if the nspaces are all defined */ if (all_def) { /* so far, they have all been defined - check this one */ - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 < ns->nlocalprocs && 0 == strcmp(trk->pcs[i].nspace, ns->nspace)) { all_def = ns->all_registered; @@ -962,9 +1094,7 @@ static void _register_client(int sd, short args, void *cbdata) cleanup: /* let the caller know we are done */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } + cd->opcbfunc(rc, cd->cbdata); PMIX_RELEASE(cd); } @@ -973,6 +1103,8 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_setup_caddy_t *cd; + pmix_status_t rc; + pmix_lock_t mylock; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { @@ -989,7 +1121,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, if (NULL == cd) { return PMIX_ERR_NOMEM; } - (void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); cd->proc.rank = proc->rank; cd->uid = uid; cd->gid = gid; @@ -997,6 +1129,22 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, cd->opcbfunc = cbfunc; cd->cbdata = cbdata; + /* if the provided callback is NULL, then substitute + * our own internal cbfunc and block here */ + if (NULL == cbfunc) { + PMIX_CONSTRUCT_LOCK(&mylock); + cd->opcbfunc = opcbfunc; + cd->cbdata = &mylock; + PMIX_THREADSHIFT(cd, _register_client); + PMIX_WAIT_THREAD(&mylock); + rc = mylock.status; + PMIX_DESTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS == rc) { + rc = PMIX_OPERATION_SUCCEEDED; + } + return rc; + } + /* we have to push this into our event library to avoid * potential threading issues */ PMIX_THREADSHIFT(cd, _register_client); @@ -1007,7 +1155,7 @@ static void _deregister_client(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_rank_info_t *info; - pmix_nspace_t *nptr, *tmp; + pmix_namespace_t *nptr, *tmp; pmix_peer_t *peer; PMIX_ACQUIRE_OBJECT(cd); @@ -1018,7 +1166,7 @@ static void _deregister_client(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { nptr = tmp; break; @@ -1058,7 +1206,14 @@ static void _deregister_client(int sd, short args, void *cbdata) * for tools, so don't clean them up */ if (!PMIX_PROC_IS_TOOL(peer)) { pmix_pnet.child_finalized(&cd->proc); + pmix_psensor.stop(peer, NULL); } + /* honor any registered epilogs */ + pmix_execute_epilog(&peer->epilog); + /* ensure we close the socket to this peer so we don't + * generate "connection lost" events should it be + * subsequently "killed" by the host */ + CLOSE_THE_SOCKET(peer->sd); } if (nptr->nlocalprocs == nptr->nfinalized) { pmix_pnet.local_app_finalized(nptr); @@ -1070,9 +1225,7 @@ static void _deregister_client(int sd, short args, void *cbdata) } cleanup: - if (NULL != cd->opcbfunc) { - cd->opcbfunc(PMIX_SUCCESS, cd->cbdata); - } + cd->opcbfunc(PMIX_SUCCESS, cd->cbdata); PMIX_RELEASE(cd); } @@ -1080,6 +1233,7 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_setup_caddy_t *cd; + pmix_lock_t mylock; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { @@ -1102,11 +1256,23 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc, } return; } - (void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); cd->proc.rank = proc->rank; cd->opcbfunc = cbfunc; cd->cbdata = cbdata; + /* if the provided callback is NULL, then substitute + * our own internal cbfunc and block here */ + if (NULL == cbfunc) { + PMIX_CONSTRUCT_LOCK(&mylock); + cd->opcbfunc = opcbfunc; + cd->cbdata = &mylock; + PMIX_THREADSHIFT(cd, _deregister_client); + PMIX_WAIT_THREAD(&mylock); + PMIX_DESTRUCT_LOCK(&mylock); + return; + } + /* we have to push this into our event library to avoid * potential threading issues */ PMIX_THREADSHIFT(cd, _deregister_client); @@ -1190,7 +1356,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_rank_info_t *info, *iptr; - pmix_nspace_t *nptr, *ns; + pmix_namespace_t *nptr, *ns; char *data = NULL; size_t sz = 0; pmix_dmdx_remote_t *dcd; @@ -1202,15 +1368,15 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_ACQUIRE_OBJECT(cd); pmix_output_verbose(2, pmix_server_globals.base_output, - "DMODX LOOKING FOR %s:%d", - cd->proc.nspace, cd->proc.rank); + "DMODX LOOKING FOR %s", + PMIX_NAME_PRINT(&cd->proc)); /* this should be one of my clients, but a race condition * could cause this request to arrive prior to us having * been informed of it - so first check to see if we know * about this nspace yet */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, cd->proc.nspace)) { nptr = ns; break; @@ -1336,11 +1502,12 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, } pmix_output_verbose(2, pmix_server_globals.base_output, - "pmix:server dmodex request%s:%d", - proc->nspace, proc->rank); + "%s pmix:server dmodex request for proc %s", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(proc)); cd = PMIX_NEW(pmix_setup_caddy_t); - (void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); cd->proc.rank = proc->rank; cd->cbfunc = cbfunc; cd->cbdata = cbdata; @@ -1358,7 +1525,7 @@ static void _store_internal(int sd, short args, void *cbdata) PMIX_ACQUIRE_OBJECT(cd); - (void)strncpy(proc.nspace, cd->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, cd->pname.nspace, PMIX_MAX_NSLEN); proc.rank = cd->pname.rank; PMIX_GDS_STORE_KV(cd->status, pmix_globals.mypeer, &proc, PMIX_INTERNAL, cd->kv); @@ -1368,7 +1535,7 @@ static void _store_internal(int sd, short args, void *cbdata) } PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, - const char *key, pmix_value_t *val) + const pmix_key_t key, pmix_value_t *val) { pmix_shift_caddy_t *cd; pmix_status_t rc; @@ -1482,7 +1649,7 @@ static void _setup_app(int sd, short args, void *cbdata) } n = 0; PMIX_LIST_FOREACH(kv, &ilist, pmix_kval_t) { - (void)strncpy(fcd->info[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix_strncpy(fcd->info[n].key, kv->key, PMIX_MAX_KEYLEN); pmix_value_xfer(&fcd->info[n].value, kv->value); ++n; } @@ -1506,7 +1673,7 @@ static void _setup_app(int sd, short args, void *cbdata) PMIX_RELEASE(cd); } -pmix_status_t PMIx_server_setup_application(const char nspace[], +pmix_status_t PMIx_server_setup_application(const pmix_nspace_t nspace, pmix_info_t info[], size_t ninfo, pmix_setup_application_cbfunc_t cbfunc, void *cbdata) { @@ -1557,7 +1724,7 @@ static void _setup_local_support(int sd, short args, void *cbdata) PMIX_RELEASE(cd); } -pmix_status_t PMIx_server_setup_local_support(const char nspace[], +pmix_status_t PMIx_server_setup_local_support(const pmix_nspace_t nspace, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { @@ -1595,7 +1762,7 @@ static void _iofdeliver(int sd, short args, void *cbdata) pmix_buffer_t *msg; bool found = false; bool cached = false; - int ignore; + pmix_iof_cache_t *iof; pmix_output_verbose(2, pmix_server_globals.iof_output, "PMIX:SERVER delivering IOF from %s on channel %0x", @@ -1609,17 +1776,16 @@ static void _iofdeliver(int sd, short args, void *cbdata) continue; } /* see if the source matches the request */ - if (0 != strncmp(cd->procs->nspace, req->pname.nspace, PMIX_MAX_NSLEN) || - (PMIX_RANK_WILDCARD != req->pname.rank && cd->procs->rank != req->pname.rank)) { + if (!PMIX_CHECK_PROCID(cd->procs, &req->pname)) { continue; } /* never forward back to the source! This can happen if the source - * is a launcher */ + * is a launcher - also, never forward to a peer that is no + * longer with us */ if (NULL == req->peer->info || req->peer->finalized) { continue; } - if (0 == strncmp(cd->procs->nspace, req->peer->info->pname.nspace, PMIX_MAX_NSLEN) && - cd->procs->rank == req->peer->info->pname.rank) { + if (PMIX_CHECK_PROCID(cd->procs, &req->peer->info->pname)) { continue; } found = true; @@ -1660,15 +1826,21 @@ static void _iofdeliver(int sd, short args, void *cbdata) /* if nobody has registered for this yet, then cache it */ if (!found) { - /* add this output to our hotel so it is cached until someone + pmix_output_verbose(2, pmix_server_globals.iof_output, + "PMIx:SERVER caching IOF"); + if (pmix_server_globals.max_iof_cache == pmix_list_get_size(&pmix_server_globals.iof)) { + /* remove the oldest cached message */ + iof = (pmix_iof_cache_t*)pmix_list_remove_first(&pmix_server_globals.iof); + PMIX_RELEASE(iof); + } + /* add this output to our cache so it is cached until someone * registers to receive it */ - if (PMIX_SUCCESS != (rc = pmix_hotel_checkin(&pmix_server_globals.iof, cd, &ignore))) { - /* we can't cache it for some reason */ - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(cd); - return; - } - cached = true; + iof = PMIX_NEW(pmix_iof_cache_t); + memcpy(&iof->source, cd->procs, sizeof(pmix_proc_t)); + iof->channel = cd->channels; + iof->bo = cd->bo; + cd->bo = NULL; // protect the data + pmix_list_append(&pmix_server_globals.iof, &iof->super); } @@ -1676,11 +1848,6 @@ static void _iofdeliver(int sd, short args, void *cbdata) cd->opcbfunc(rc, cd->cbdata); } if (!cached) { - if (NULL != cd->info) { - PMIX_INFO_FREE(cd->info, cd->ninfo); - } - PMIX_PROC_FREE(cd->procs, 1); - PMIX_BYTE_OBJECT_FREE(cd->bo, 1); PMIX_RELEASE(cd); } } @@ -1706,7 +1873,8 @@ pmix_status_t PMIx_server_IOF_deliver(const pmix_proc_t *source, PMIX_RELEASE(cd); return PMIX_ERR_NOMEM; } - (void)strncpy(cd->procs[0].nspace, source->nspace, PMIX_MAX_NSLEN); + cd->nprocs = 1; + pmix_strncpy(cd->procs[0].nspace, source->nspace, PMIX_MAX_NSLEN); cd->procs[0].rank = source->rank; cd->channels = channel; PMIX_BYTE_OBJECT_CREATE(cd->bo, 1); @@ -1714,9 +1882,9 @@ pmix_status_t PMIx_server_IOF_deliver(const pmix_proc_t *source, PMIX_RELEASE(cd); return PMIX_ERR_NOMEM; } + cd->nbo = 1; cd->bo[0].bytes = (char*)malloc(bo->size); if (NULL == cd->bo[0].bytes) { - PMIX_BYTE_OBJECT_FREE(cd->bo, 1); PMIX_RELEASE(cd); return PMIX_ERR_NOMEM; } @@ -1725,7 +1893,6 @@ pmix_status_t PMIx_server_IOF_deliver(const pmix_proc_t *source, if (0 < ninfo) { PMIX_INFO_CREATE(cd->info, ninfo); if (NULL == cd->info) { - PMIX_BYTE_OBJECT_FREE(cd->bo, 1); PMIX_RELEASE(cd); return PMIX_ERR_NOMEM; } @@ -1789,7 +1956,7 @@ static void clct_complete(pmix_status_t status, /* transfer the results */ n=0; PMIX_LIST_FOREACH(kv, &cd->payload, pmix_kval_t) { - (void)strncpy(cd->info[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix_strncpy(cd->info[n].key, kv->key, PMIX_MAX_KEYLEN); rc = pmix_value_xfer(&cd->info[n].value, kv->value); if (PMIX_SUCCESS != rc) { PMIX_INFO_FREE(cd->info, cd->ninfo); @@ -1821,10 +1988,10 @@ static void clct_complete(pmix_status_t status, static void clct(int sd, short args, void *cbdata) { pmix_inventory_rollup_t *cd = (pmix_inventory_rollup_t*)cbdata; - pmix_status_t rc; #if PMIX_HAVE_HWLOC /* if we don't know our topology, we better get it now */ + pmix_status_t rc; if (NULL == pmix_hwloc_topology) { if (PMIX_SUCCESS != (rc = pmix_hwloc_get_topology(NULL, 0))) { PMIX_ERROR_LOG(rc); @@ -1989,6 +2156,10 @@ static void connection_cleanup(int sd, short args, void *cbdata) { pmix_server_caddy_t *cd = (pmix_server_caddy_t*)cbdata; + /* ensure that we know the peer has finalized else we + * will generate an event - yes, it should have been + * done, but it is REALLY important that it be set */ + cd->peer->finalized = true; pmix_ptl_base_lost_connection(cd->peer, PMIX_SUCCESS); /* cleanup the caddy */ PMIX_RELEASE(cd); @@ -2026,10 +2197,6 @@ static void op_cbfunc2(pmix_status_t status, void *cbdata) PMIX_RELEASE(reply); } - /* ensure that we know the peer has finalized else we - * will generate an event - yes, it should have been - * done, but it is REALLY important that it be set */ - cd->peer->finalized = true; /* cleanup any lingering references to this peer - note * that we cannot call the lost_connection function * directly as we need the connection to still @@ -2059,40 +2226,47 @@ static void _spcb(int sd, short args, void *cbdata) PMIX_BFROPS_PACK(rc, cd->cd->peer, reply, &cd->status, 1, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE(reply); goto cleanup; } - if (PMIX_SUCCESS == cd->status) { - /* pass back the name of the nspace */ - PMIX_BFROPS_PACK(rc, cd->cd->peer, reply, &cd->pname.nspace, 1, PMIX_STRING); - /* add the job-level info, if we have it */ - (void)strncpy(proc.nspace, cd->pname.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - /* this is going to a local client, so let the gds - * have the option of returning a copy of the data, - * or a pointer to local storage */ - PMIX_CONSTRUCT(&cb, pmix_cb_t); - cb.proc = &proc; - cb.scope = PMIX_SCOPE_UNDEF; - cb.copy = false; - PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); - if (PMIX_SUCCESS == rc) { - PMIX_LIST_FOREACH(kv, &cb.kvs, pmix_kval_t) { - PMIX_BFROPS_PACK(rc, cd->cd->peer, reply, kv, 1, PMIX_KVAL); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(reply); - PMIX_DESTRUCT(&cb); - goto cleanup; - } + /* pass back the name of the nspace */ + PMIX_BFROPS_PACK(rc, cd->cd->peer, reply, &cd->pname.nspace, 1, PMIX_STRING); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(reply); + goto cleanup; + } + /* add the job-level info, if we have it */ + pmix_strncpy(proc.nspace, cd->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + /* this is going to a local client, so let the gds + * have the option of returning a copy of the data, + * or a pointer to local storage */ + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.proc = &proc; + cb.scope = PMIX_SCOPE_UNDEF; + cb.copy = false; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS == rc) { + PMIX_LIST_FOREACH(kv, &cb.kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, cd->cd->peer, reply, kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(reply); + PMIX_DESTRUCT(&cb); + goto cleanup; } - PMIX_DESTRUCT(&cb); } + PMIX_DESTRUCT(&cb); } /* the function that created the server_caddy did a * retain on the peer, so we don't have to worry about * it still being present - tell the originator the result */ - PMIX_SERVER_QUEUE_REPLY(cd->cd->peer, cd->cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->cd->peer, cd->cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } cleanup: /* cleanup */ @@ -2152,7 +2326,10 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda /* the function that created the server_caddy did a * retain on the peer, so we don't have to worry about * it still being present - tell the originator the result */ - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } /* cleanup */ PMIX_RELEASE(cd); } @@ -2166,19 +2343,33 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) { pmix_shift_caddy_t *scd = (pmix_shift_caddy_t*)cbdata; pmix_server_trkr_t *tracker = scd->tracker; - pmix_buffer_t xfer, *reply, bkt; - pmix_byte_object_t bo, bo2; - pmix_server_caddy_t *cd; + pmix_buffer_t xfer, *reply; + pmix_server_caddy_t *cd, *nxt; pmix_status_t rc = PMIX_SUCCESS, ret; pmix_nspace_caddy_t *nptr; pmix_list_t nslist; - int32_t cnt = 1; - char byte; bool found; - pmix_collect_t ctype; PMIX_ACQUIRE_OBJECT(scd); + if (NULL == tracker) { + /* give them a release if they want it - this should + * never happen, but protect against the possibility */ + if (NULL != scd->cbfunc.relfn) { + scd->cbfunc.relfn(scd->cbdata); + } + PMIX_RELEASE(scd); + return; + } + + /* if we get here, then there are processes waiting + * for a response */ + + /* if the timer is active, clear it */ + if (tracker->event_active) { + pmix_event_del(&tracker->ev); + } + /* pass the blobs being returned */ PMIX_CONSTRUCT(&xfer, pmix_buffer_t); PMIX_LOAD_BUFFER(pmix_globals.mypeer, &xfer, scd->data, scd->ndata); @@ -2200,7 +2391,7 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) goto finish_collective; } - // collect the pmix_nspace_t's of all local participants + // collect the pmix_namespace_t's of all local participants PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { // see if we already have this nspace found = false; @@ -2219,74 +2410,17 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) } } - /* Loop over the enclosed byte object envelopes and - * store them in our GDS module */ - cnt = 1; - PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, - &xfer, &bo, &cnt, PMIX_BYTE_OBJECT); - while (PMIX_SUCCESS == rc) { - PMIX_LOAD_BUFFER(pmix_globals.mypeer, &bkt, bo.bytes, bo.size); - /* unpack the data collection flag */ - cnt = 1; - PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, - &bkt, &byte, &cnt, PMIX_BYTE); - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { - /* no data was returned, so we are done with this blob */ - break; - } + PMIX_LIST_FOREACH(nptr, &nslist, pmix_nspace_caddy_t) { + PMIX_GDS_STORE_MODEX(rc, nptr->ns, &tracker->local_cbs, &xfer); if (PMIX_SUCCESS != rc) { - /* we have an error */ - break; - } - - // Check that this blob was accumulated with the same data collection setting - ctype = (pmix_collect_t)byte; - if (ctype != tracker->collect_type) { - rc = PMIX_ERR_INVALID_ARG; - break; - } - /* unpack the enclosed blobs from the various peers */ - cnt = 1; - PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, - &bkt, &bo2, &cnt, PMIX_BYTE_OBJECT); - while (PMIX_SUCCESS == rc) { - /* unpack all the kval's from this peer and store them in - * our GDS. Note that PMIx by design holds all data at - * the server level until requested. If our GDS is a - * shared memory region, then the data may be available - * right away - but the client still has to be notified - * of its presence. */ - PMIX_LIST_FOREACH(nptr, &nslist, pmix_nspace_caddy_t) { - PMIX_GDS_STORE_MODEX(rc, nptr->ns, &tracker->local_cbs, &bo2); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - break; - } - } - PMIX_BYTE_OBJECT_DESTRUCT(&bo2); - /* get the next blob */ - cnt = 1; - PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, - &bkt, &bo2, &cnt, PMIX_BYTE_OBJECT); - } - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { - rc = PMIX_SUCCESS; - } else if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - goto finish_collective; + break; } - /* unpack and process the next blob */ - cnt = 1; - PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, - &xfer, &bo, &cnt, PMIX_BYTE_OBJECT); - } - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { - rc = PMIX_SUCCESS; } finish_collective: /* loop across all procs in the tracker, sending them the reply */ - PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { + PMIX_LIST_FOREACH_SAFE(cd, nxt, &tracker->local_cbs, pmix_server_caddy_t) { reply = PMIX_NEW(pmix_buffer_t); if (NULL == reply) { rc = PMIX_ERR_NOMEM; @@ -2301,7 +2435,13 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) pmix_output_verbose(2, pmix_server_globals.base_output, "server:modex_cbfunc reply being sent to %s:%u", cd->peer->info->pname.nspace, cd->peer->info->pname.rank); - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } + /* remove this entry */ + pmix_list_remove_item(&tracker->local_cbs, &cd->super); + PMIX_RELEASE(cd); } cleanup: @@ -2334,15 +2474,6 @@ static void modex_cbfunc(pmix_status_t status, const char *data, size_t ndata, v pmix_output_verbose(2, pmix_server_globals.base_output, "server:modex_cbfunc called with %d bytes", (int)ndata); - if (NULL == tracker) { - /* nothing to do - but be sure to give them - * a release if they want it */ - if (NULL != relfn) { - relfn(relcbd); - } - return; - } - /* need to thread-shift this callback as it accesses global data */ scd = PMIX_NEW(pmix_shift_caddy_t); if (NULL == scd) { @@ -2371,7 +2502,10 @@ static void get_cbfunc(pmix_status_t status, const char *data, size_t ndata, voi pmix_output_verbose(2, pmix_server_globals.base_output, "server:get_cbfunc called with %d bytes", (int)ndata); - /* no need to thread-shift here as no global data is accessed */ + /* no need to thread-shift here as no global data is accessed + * and we are called from another internal function + * (see pmix_server_get.c:pmix_pending_resolve) that + * has already been thread-shifted */ if (NULL == cd) { /* nothing to do - but be sure to give them @@ -2407,7 +2541,10 @@ static void get_cbfunc(pmix_status_t status, const char *data, size_t ndata, voi pmix_output_hexdump(10, pmix_server_globals.base_output, reply->base_ptr, (reply->bytes_used < 256 ? reply->bytes_used : 256)); - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } cleanup: /* if someone wants a release, give it to them */ @@ -2434,6 +2571,19 @@ static void _cnct(int sd, short args, void *cbdata) PMIX_ACQUIRE_OBJECT(scd); + if (NULL == tracker) { + /* nothing to do */ + return; + } + + /* if we get here, then there are processes waiting + * for a response */ + + /* if the timer is active, clear it */ + if (tracker->event_active) { + pmix_event_del(&tracker->ev); + } + /* find the unique nspaces that are participating */ PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { if (NULL == nspaces) { @@ -2483,7 +2633,7 @@ static void _cnct(int sd, short args, void *cbdata) * local storage */ /* add the job-level info, if necessary */ proc.rank = PMIX_RANK_WILDCARD; - (void)strncpy(proc.nspace, nspaces[i], PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, nspaces[i], PMIX_MAX_NSLEN); PMIX_CONSTRUCT(&cb, pmix_cb_t); /* this is for a local client, so give the gds the * option of returning a complete copy of the data, @@ -2547,7 +2697,10 @@ static void _cnct(int sd, short args, void *cbdata) pmix_output_verbose(2, pmix_server_globals.base_output, "server:cnct_cbfunc reply being sent to %s:%u", cd->peer->info->pname.nspace, cd->peer->info->pname.rank); - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } } cleanup: @@ -2569,11 +2722,6 @@ static void cnct_cbfunc(pmix_status_t status, void *cbdata) pmix_output_verbose(2, pmix_server_globals.base_output, "server:cnct_cbfunc called"); - if (NULL == tracker) { - /* nothing to do */ - return; - } - /* need to thread-shift this callback as it accesses global data */ scd = PMIX_NEW(pmix_shift_caddy_t); if (NULL == scd) { @@ -2595,6 +2743,19 @@ static void _discnct(int sd, short args, void *cbdata) PMIX_ACQUIRE_OBJECT(scd); + if (NULL == tracker) { + /* nothing to do */ + return; + } + + /* if we get here, then there are processes waiting + * for a response */ + + /* if the timer is active, clear it */ + if (tracker->event_active) { + pmix_event_del(&tracker->ev); + } + /* loop across all local procs in the tracker, sending them the reply */ PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { /* setup the reply */ @@ -2614,7 +2775,10 @@ static void _discnct(int sd, short args, void *cbdata) pmix_output_verbose(2, pmix_server_globals.base_output, "server:cnct_cbfunc reply being sent to %s:%u", cd->peer->info->pname.nspace, cd->peer->info->pname.rank); - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } } cleanup: @@ -2636,11 +2800,6 @@ static void discnct_cbfunc(pmix_status_t status, void *cbdata) "server:discnct_cbfunc called on nspace %s", (NULL == tracker) ? "NULL" : tracker->pname.nspace); - if (NULL == tracker) { - /* nothing to do */ - return; - } - /* need to thread-shift this callback as it accesses global data */ scd = PMIX_NEW(pmix_shift_caddy_t); if (NULL == scd) { @@ -2673,7 +2832,10 @@ static void regevents_cbfunc(pmix_status_t status, void *cbdata) PMIX_ERROR_LOG(rc); } // send reply - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } PMIX_RELEASE(cd); } @@ -2697,10 +2859,71 @@ static void notifyerror_cbfunc (pmix_status_t status, void *cbdata) PMIX_ERROR_LOG(rc); } // send reply - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } PMIX_RELEASE(cd); } +static void alloc_cbfunc(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + pmix_query_caddy_t *qcd = (pmix_query_caddy_t*)cbdata; + pmix_server_caddy_t *cd = (pmix_server_caddy_t*)qcd->cbdata; + pmix_buffer_t *reply; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_server_globals.base_output, + "pmix:alloc callback with status %d", status); + + reply = PMIX_NEW(pmix_buffer_t); + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(cd); + return; + } + PMIX_BFROPS_PACK(rc, cd->peer, reply, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto complete; + } + /* pack the returned data */ + PMIX_BFROPS_PACK(rc, cd->peer, reply, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto complete; + } + if (0 < ninfo) { + PMIX_BFROPS_PACK(rc, cd->peer, reply, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + } + + complete: + // send reply + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } + + // cleanup + if (NULL != qcd->queries) { + PMIX_QUERY_FREE(qcd->queries, qcd->nqueries); + } + if (NULL != qcd->info) { + PMIX_INFO_FREE(qcd->info, qcd->ninfo); + } + PMIX_RELEASE(qcd); + PMIX_RELEASE(cd); + if (NULL != release_fn) { + release_fn(release_cbdata); + } +} static void query_cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, @@ -2740,9 +2963,74 @@ static void query_cbfunc(pmix_status_t status, } } + /* cache the data for any future requests */ + + complete: + // send reply + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } + + // cleanup + if (NULL != qcd->queries) { + PMIX_QUERY_FREE(qcd->queries, qcd->nqueries); + } + if (NULL != qcd->info) { + PMIX_INFO_FREE(qcd->info, qcd->ninfo); + } + PMIX_RELEASE(qcd); + PMIX_RELEASE(cd); + if (NULL != release_fn) { + release_fn(release_cbdata); + } +} + +static void jctrl_cbfunc(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + pmix_query_caddy_t *qcd = (pmix_query_caddy_t*)cbdata; + pmix_server_caddy_t *cd = (pmix_server_caddy_t*)qcd->cbdata; + pmix_buffer_t *reply; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_server_globals.base_output, + "pmix:jctrl callback with status %d", status); + + reply = PMIX_NEW(pmix_buffer_t); + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(cd); + return; + } + PMIX_BFROPS_PACK(rc, cd->peer, reply, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto complete; + } + /* pack the returned data */ + PMIX_BFROPS_PACK(rc, cd->peer, reply, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto complete; + } + if (0 < ninfo) { + PMIX_BFROPS_PACK(rc, cd->peer, reply, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + } + complete: // send reply - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } + // cleanup if (NULL != qcd->queries) { PMIX_QUERY_FREE(qcd->queries, qcd->nqueries); @@ -2752,6 +3040,68 @@ static void query_cbfunc(pmix_status_t status, } PMIX_RELEASE(qcd); PMIX_RELEASE(cd); + if (NULL != release_fn) { + release_fn(release_cbdata); + } +} + +static void monitor_cbfunc(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + pmix_query_caddy_t *qcd = (pmix_query_caddy_t*)cbdata; + pmix_server_caddy_t *cd = (pmix_server_caddy_t*)qcd->cbdata; + pmix_buffer_t *reply; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_server_globals.base_output, + "pmix:monitor callback with status %d", status); + + reply = PMIX_NEW(pmix_buffer_t); + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(cd); + return; + } + PMIX_BFROPS_PACK(rc, cd->peer, reply, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto complete; + } + /* pack the returned data */ + PMIX_BFROPS_PACK(rc, cd->peer, reply, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto complete; + } + if (0 < ninfo) { + PMIX_BFROPS_PACK(rc, cd->peer, reply, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + } + + complete: + // send reply + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } + + // cleanup + if (NULL != qcd->queries) { + PMIX_QUERY_FREE(qcd->queries, qcd->nqueries); + } + if (NULL != qcd->info) { + PMIX_INFO_FREE(qcd->info, qcd->ninfo); + } + PMIX_RELEASE(qcd); + PMIX_RELEASE(cd); + if (NULL != release_fn) { + release_fn(release_cbdata); + } } static void cred_cbfunc(pmix_status_t status, @@ -2805,7 +3155,11 @@ static void cred_cbfunc(pmix_status_t status, complete: // send reply - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } + // cleanup if (NULL != qcd->info) { PMIX_INFO_FREE(qcd->info, qcd->ninfo); @@ -2852,7 +3206,10 @@ static void validate_cbfunc(pmix_status_t status, complete: // send reply - PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, cd->peer, cd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } // cleanup if (NULL != qcd->info) { PMIX_INFO_FREE(qcd->info, qcd->ninfo); @@ -2894,7 +3251,10 @@ static void _iofreg(int sd, short args, void *cbdata) pmix_output_verbose(2, pmix_server_globals.iof_output, "server:_iofreg reply being sent to %s:%u", scd->peer->info->pname.nspace, scd->peer->info->pname.rank); - PMIX_SERVER_QUEUE_REPLY(scd->peer, scd->hdr.tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, scd->peer, scd->hdr.tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } cleanup: /* release the cached info */ @@ -2951,8 +3311,6 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, pmix_server_caddy_t *cd; pmix_proc_t proc; pmix_buffer_t *reply; - pmix_regevents_info_t *reginfo; - pmix_peer_events_info_t *prev; /* retrieve the cmd */ cnt = 1; @@ -2962,8 +3320,8 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, return rc; } pmix_output_verbose(2, pmix_server_globals.base_output, - "recvd pmix cmd %d from %s:%u", - cmd, peer->info->pname.nspace, peer->info->pname.rank); + "recvd pmix cmd %s from %s:%u", + pmix_command_string(cmd), peer->info->pname.nspace, peer->info->pname.rank); if (PMIX_REQ_CMD == cmd) { reply = PMIX_NEW(pmix_buffer_t); @@ -2976,7 +3334,10 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, PMIX_ERROR_LOG(rc); return rc; } - PMIX_SERVER_QUEUE_REPLY(peer, tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, peer, tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } peer->nptr->ndelivered++; return PMIX_SUCCESS; } @@ -3001,7 +3362,10 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } - PMIX_SERVER_QUEUE_REPLY(peer, tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, peer, tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } } return PMIX_SUCCESS; // don't reply twice } @@ -3025,20 +3389,9 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_FINALIZE_CMD == cmd) { pmix_output_verbose(2, pmix_server_globals.base_output, "recvd FINALIZE"); - /* mark that this peer called finalize */ - peer->finalized = true; peer->nptr->nfinalized++; - /* since the client is finalizing, remove them from any event - * registrations they may still have on our list */ - PMIX_LIST_FOREACH(reginfo, &pmix_server_globals.events, pmix_regevents_info_t) { - PMIX_LIST_FOREACH(prev, ®info->peers, pmix_peer_events_info_t) { - if (prev->peer == peer) { - pmix_list_remove_item(®info->peers, &prev->super); - PMIX_RELEASE(prev); - break; - } - } - } + /* purge events */ + pmix_server_purge_events(peer, NULL); /* turn off the recv event - we shouldn't hear anything * more from this proc */ if (peer->recv_ev_active) { @@ -3048,14 +3401,18 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, PMIX_GDS_CADDY(cd, peer, tag); /* call the local server, if supported */ if (NULL != pmix_host_server.client_finalized) { - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; /* now tell the host server */ - if (PMIX_SUCCESS == (rc = pmix_host_server.client_finalized(&proc, peer->info->server_object, - op_cbfunc2, cd))) { + rc = pmix_host_server.client_finalized(&proc, peer->info->server_object, + op_cbfunc2, cd); + if (PMIX_SUCCESS == rc) { /* don't reply to them ourselves - we will do so when the host * server calls us back */ return rc; + } else if (PMIX_OPERATION_SUCCEEDED == rc) { + /* they did it atomically */ + rc = PMIX_SUCCESS; } /* if the call doesn't succeed (e.g., they provided the stub * but return NOT_SUPPORTED), then the callback function @@ -3063,7 +3420,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, * any lingering references to this peer and answer * the client. Thus, we call the callback function ourselves * in this case */ - op_cbfunc2(PMIX_SUCCESS, cd); + op_cbfunc2(rc, cd); /* return SUCCESS as the cbfunc generated the return msg * and released the cd object */ return PMIX_SUCCESS; @@ -3120,14 +3477,18 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_CONNECTNB_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_connect(cd, buf, cnct_cbfunc); - PMIX_RELEASE(cd); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_DISCONNECTNB_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_disconnect(cd, buf, discnct_cbfunc); - PMIX_RELEASE(cd); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(cd); + } return rc; } @@ -3146,61 +3507,81 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_NOTIFY_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_event_recvd_from_client(peer, buf, notifyerror_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_event_recvd_from_client(peer, buf, notifyerror_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_QUERY_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_query(peer, buf, query_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_query(peer, buf, query_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_LOG_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_log(peer, buf, op_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_log(peer, buf, op_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_ALLOC_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_alloc(peer, buf, query_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_alloc(peer, buf, alloc_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_JOB_CONTROL_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_job_ctrl(peer, buf, query_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_job_ctrl(peer, buf, jctrl_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_MONITOR_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_monitor(peer, buf, query_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_monitor(peer, buf, monitor_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_GET_CREDENTIAL_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_get_credential(peer, buf, cred_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_get_credential(peer, buf, cred_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_VALIDATE_CRED_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_validate_credential(peer, buf, validate_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_validate_credential(peer, buf, validate_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_IOF_PULL_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_iofreg(peer, buf, iof_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_iofreg(peer, buf, iof_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } if (PMIX_IOF_PUSH_CMD == cmd) { PMIX_GDS_CADDY(cd, peer, tag); - rc = pmix_server_iofstdin(peer, buf, op_cbfunc, cd); + if (PMIX_SUCCESS != (rc = pmix_server_iofstdin(peer, buf, op_cbfunc, cd))) { + PMIX_RELEASE(cd); + } return rc; } @@ -3228,10 +3609,16 @@ void pmix_server_message_handler(struct pmix_peer_t *pr, PMIX_ERROR_LOG(PMIX_ERR_NOMEM); return; } + if (PMIX_OPERATION_SUCCEEDED == ret) { + ret = PMIX_SUCCESS; + } PMIX_BFROPS_PACK(rc, pr, reply, &ret, 1, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } - PMIX_SERVER_QUEUE_REPLY(peer, hdr->tag, reply); + PMIX_SERVER_QUEUE_REPLY(rc, peer, hdr->tag, reply); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(reply); + } } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c index a0e474e596d..c8fe13cdd6e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -51,6 +51,7 @@ #include "src/mca/gds/gds.h" #include "src/util/argv.h" #include "src/util/error.h" +#include "src/util/name_fns.h" #include "src/util/output.h" #include "src/util/pmix_environ.h" @@ -84,7 +85,7 @@ PMIX_CLASS_INSTANCE(pmix_dmdx_reply_caddy_t, static void dmdx_cbfunc(pmix_status_t status, const char *data, size_t ndata, void *cbdata, pmix_release_cbfunc_t relfn, void *relcbdata); -static pmix_status_t _satisfy_request(pmix_nspace_t *ns, pmix_rank_t rank, +static pmix_status_t _satisfy_request(pmix_namespace_t *ns, pmix_rank_t rank, pmix_server_caddy_t *cd, pmix_modex_cbfunc_t cbfunc, void *cbdata, bool *scope); static pmix_status_t create_local_tracker(char nspace[], pmix_rank_t rank, @@ -119,13 +120,14 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_rank_t rank; char *cptr; char nspace[PMIX_MAX_NSLEN+1]; - pmix_nspace_t *ns, *nptr; + pmix_namespace_t *ns, *nptr; pmix_info_t *info=NULL; size_t ninfo=0; pmix_dmdx_local_t *lcd; pmix_dmdx_request_t *req; bool local; bool localonly = false; + bool diffnspace = false; struct timeval tv = {0, 0}; pmix_buffer_t pbkt, pkt; pmix_byte_object_t bo; @@ -133,10 +135,10 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_proc_t proc; char *data; size_t sz, n; - pmix_peer_t *peer; pmix_output_verbose(2, pmix_server_globals.get_output, - "recvd GET"); + "%s recvd GET", + PMIX_NAME_PRINT(&pmix_globals.myid)); /* setup */ memset(nspace, 0, sizeof(nspace)); @@ -148,7 +150,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, PMIX_ERROR_LOG(rc); return rc; } - (void)strncpy(nspace, cptr, PMIX_MAX_NSLEN); + pmix_strncpy(nspace, cptr, PMIX_MAX_NSLEN); free(cptr); cnt = 1; PMIX_BFROPS_UNPACK(rc, cd->peer, buf, &rank, &cnt, PMIX_PROC_RANK); @@ -191,13 +193,19 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, /* find the nspace object for this client */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(nspace, ns->nspace)) { nptr = ns; break; } } + /* check if the nspace of the requestor is different from + * the nspace of the target process */ + if (!PMIX_CHECK_NSPACE(nspace, cd->peer->info->pname.nspace)) { + diffnspace = true; + } + pmix_output_verbose(2, pmix_server_globals.get_output, "%s:%d EXECUTE GET FOR %s:%d ON BEHALF OF %s:%d", pmix_globals.myid.nspace, @@ -245,14 +253,22 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, if (PMIX_ERR_NOMEM == rc) { PMIX_INFO_FREE(info, ninfo); return rc; - } else if (PMIX_ERR_NOT_FOUND != rc) { - return rc; } - - /* do NOT create the nspace tracker here so any request - * by another local client that hits before the RM responds - * to our request will get added to the local tracker so - * they receive their data upon completion */ + if (PMIX_SUCCESS == rc) { + /* if they specified a timeout for this specific + * request, set it up now */ + if (0 < tv.tv_sec) { + pmix_event_evtimer_set(pmix_globals.evbase, &req->ev, + get_timeout, req); + pmix_event_evtimer_add(&req->ev, &tv); + req->event_active = true; + } + /* we already asked for this info - no need to + * do it again */ + return PMIX_SUCCESS; + } + /* only other return code is NOT_FOUND, indicating that + * we created a new tracker */ /* Its possible there will be no local processes on this * host, so lets ask for this explicitly. There can @@ -260,49 +276,73 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, * up on its own, but at worst the direct modex * will simply overwrite the info later */ if (NULL != pmix_host_server.direct_modex) { - pmix_host_server.direct_modex(&lcd->proc, info, ninfo, dmdx_cbfunc, lcd); + rc = pmix_host_server.direct_modex(&lcd->proc, info, ninfo, dmdx_cbfunc, lcd); + if (PMIX_SUCCESS != rc) { + PMIX_INFO_FREE(info, ninfo); + pmix_list_remove_item(&pmix_server_globals.local_reqs, &lcd->super); + PMIX_RELEASE(lcd); + return rc; + } + /* if they specified a timeout for this specific + * request, set it up now */ + if (0 < tv.tv_sec) { + pmix_event_evtimer_set(pmix_globals.evbase, &req->ev, + get_timeout, req); + pmix_event_evtimer_add(&req->ev, &tv); + req->event_active = true; + } + } else { + /* if we don't have direct modex feature, just respond with "not found" */ + PMIX_INFO_FREE(info, ninfo); + pmix_list_remove_item(&pmix_server_globals.local_reqs, &lcd->super); + PMIX_RELEASE(lcd); + return PMIX_ERR_NOT_FOUND; } - /* if they specified a timeout, set it up now */ - if (0 < tv.tv_sec) { - pmix_event_evtimer_set(pmix_globals.evbase, &req->ev, - get_timeout, req); - pmix_event_evtimer_add(&req->ev, &tv); - req->event_active = true; - } return PMIX_SUCCESS; } - /* this nspace is known, so we can process the request. - * if the rank is wildcard, then they are asking for the - * job-level info for this nspace - provide it */ - if (PMIX_RANK_WILDCARD == rank) { + /* the target nspace is known, so we can process the request. + * if the rank is wildcard, or the nspace is different, then + * they are asking for the job-level info for this nspace - provide it */ + if (PMIX_RANK_WILDCARD == rank || diffnspace) { /* see if we have the job-level info - we won't have it * if we have no local procs and haven't already asked * for it, so there is no guarantee we have it */ data = NULL; sz = 0; - (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; /* if we have local procs for this nspace, then we * can retrieve the info from that GDS. Otherwise, * we need to retrieve it from our own */ PMIX_CONSTRUCT(&cb, pmix_cb_t); - peer = pmix_globals.mypeer; /* this data is for a local client, so give the gds the * option of returning a complete copy of the data, * or returning a pointer to local storage */ cb.proc = &proc; cb.scope = PMIX_SCOPE_UNDEF; cb.copy = false; - PMIX_GDS_FETCH_KV(rc, peer, &cb); + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); if (PMIX_SUCCESS != rc) { PMIX_DESTRUCT(&cb); return rc; } + /* if the requested rank is not WILDCARD, then retrieve the + * job-specific data for that rank - a scope of UNDEF + * will direct the GDS to provide it. Anything found will + * simply be added to the cb.kvs list */ + if (PMIX_RANK_WILDCARD != rank) { + proc.rank = rank; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT(&cb); + return rc; + } + } PMIX_CONSTRUCT(&pkt, pmix_buffer_t); /* assemble the provided data into a byte object */ - PMIX_GDS_ASSEMB_KVS_REQ(rc, peer, &proc, &cb.kvs, &pkt, cd); + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pkt, cd); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&cb); @@ -312,7 +352,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, PMIX_DESTRUCT(&pkt); /* pack it into the payload */ PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); - PMIX_BFROPS_PACK(rc, cd->peer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); free(bo.bytes); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); @@ -323,7 +363,11 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, /* unload the resulting payload */ PMIX_UNLOAD_BUFFER(&pbkt, data, sz); PMIX_DESTRUCT(&pbkt); + /* call the internal callback function - it will + * release the cbdata */ cbfunc(PMIX_SUCCESS, data, sz, cbdata, relfn, data); + /* return success so the server doesn't duplicate + * the release of cbdata */ return PMIX_SUCCESS; } @@ -353,6 +397,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, cbfunc, cbdata, &lcd, &req); if (PMIX_ERR_NOMEM == rc) { PMIX_INFO_FREE(info, ninfo); + return rc; } pmix_output_verbose(2, pmix_server_globals.get_output, "%s:%d TRACKER CREATED - WAITING", @@ -365,15 +410,20 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_event_evtimer_add(&req->ev, &tv); req->event_active = true; } - return rc; + /* the peer object has been added to the new lcd tracker, + * so return success here */ + return PMIX_SUCCESS; } - /* see if we already have this data */ + /* if everyone has registered, see if we already have this data */ rc = _satisfy_request(nptr, rank, cd, cbfunc, cbdata, &local); if( PMIX_SUCCESS == rc ){ /* request was successfully satisfied */ PMIX_INFO_FREE(info, ninfo); - return rc; + /* return success as the satisfy_request function + * calls the cbfunc for us, and it will have + * released the cbdata object */ + return PMIX_SUCCESS; } pmix_output_verbose(2, pmix_server_globals.get_output, @@ -395,18 +445,24 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, * we do, then we can just wait for it to arrive */ rc = create_local_tracker(nspace, rank, info, ninfo, cbfunc, cbdata, &lcd, &req); + if (PMIX_ERR_NOMEM == rc || NULL == lcd) { + /* we have a problem */ + PMIX_INFO_FREE(info, ninfo); + return PMIX_ERR_NOMEM; + } + /* if they specified a timeout, set it up now */ + if (0 < tv.tv_sec) { + pmix_event_evtimer_set(pmix_globals.evbase, &req->ev, + get_timeout, req); + pmix_event_evtimer_add(&req->ev, &tv); + req->event_active = true; + } if (PMIX_SUCCESS == rc) { /* we are already waiting for the data - nothing more * for us to do as the function added the new request * to the tracker for us */ return PMIX_SUCCESS; } - if (PMIX_ERR_NOT_FOUND != rc || NULL == lcd) { - /* we have a problem - e.g., out of memory */ - cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL); - PMIX_INFO_FREE(info, ninfo); - return rc; - } /* Getting here means that we didn't already have a request for * for data pending, and so we created a new tracker for this @@ -414,13 +470,6 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, * if this is one, then we have nothing further to do - we will * fulfill the request once the process commits its data */ if (local) { - /* if they specified a timeout, set it up now */ - if (0 < tv.tv_sec) { - pmix_event_evtimer_set(pmix_globals.evbase, &req->ev, - get_timeout, req); - pmix_event_evtimer_add(&req->ev, &tv); - req->event_active = true; - } return PMIX_SUCCESS; } @@ -429,12 +478,11 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, * whomever is hosting the target process */ if (NULL != pmix_host_server.direct_modex) { rc = pmix_host_server.direct_modex(&lcd->proc, info, ninfo, dmdx_cbfunc, lcd); - /* if they specified a timeout, set it up now */ - if (0 < tv.tv_sec) { - pmix_event_evtimer_set(pmix_globals.evbase, &req->ev, - get_timeout, req); - pmix_event_evtimer_add(&req->ev, &tv); - req->event_active = true; + if (PMIX_SUCCESS != rc) { + /* may have a function entry but not support the request */ + PMIX_INFO_FREE(info, ninfo); + pmix_list_remove_item(&pmix_server_globals.local_reqs, &lcd->super); + PMIX_RELEASE(lcd); } } else { pmix_output_verbose(2, pmix_server_globals.get_output, @@ -442,7 +490,6 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_globals.myid.nspace, pmix_globals.myid.rank); /* if we don't have direct modex feature, just respond with "not found" */ - cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL); PMIX_INFO_FREE(info, ninfo); pmix_list_remove_item(&pmix_server_globals.local_reqs, &lcd->super); PMIX_RELEASE(lcd); @@ -490,7 +537,7 @@ static pmix_status_t create_local_tracker(char nspace[], pmix_rank_t rank, if (NULL == lcd){ return PMIX_ERR_NOMEM; } - strncpy(lcd->proc.nspace, nspace, PMIX_MAX_NSLEN); + pmix_strncpy(lcd->proc.nspace, nspace, PMIX_MAX_NSLEN); lcd->proc.rank = rank; lcd->info = info; lcd->ninfo = ninfo; @@ -515,9 +562,10 @@ static pmix_status_t create_local_tracker(char nspace[], pmix_rank_t rank, return rc; } -void pmix_pending_nspace_requests(pmix_nspace_t *nptr) +void pmix_pending_nspace_requests(pmix_namespace_t *nptr) { pmix_dmdx_local_t *cd, *cd_next; + pmix_status_t rc; /* Now that we know all local ranks, go along request list and ask for remote data * for the non-local ranks, and resolve all pending requests for local procs @@ -540,10 +588,12 @@ void pmix_pending_nspace_requests(pmix_nspace_t *nptr) /* if not found - this is remote process and we need to send * corresponding direct modex request */ - if( !found ){ - if( NULL != pmix_host_server.direct_modex ){ - pmix_host_server.direct_modex(&cd->proc, cd->info, cd->ninfo, dmdx_cbfunc, cd); - } else { + if (!found){ + rc = PMIX_ERR_NOT_SUPPORTED; + if (NULL != pmix_host_server.direct_modex){ + rc = pmix_host_server.direct_modex(&cd->proc, cd->info, cd->ninfo, dmdx_cbfunc, cd); + } + if (PMIX_SUCCESS != rc) { pmix_dmdx_request_t *req, *req_next; PMIX_LIST_FOREACH_SAFE(req, req_next, &cd->loc_reqs, pmix_dmdx_request_t) { req->cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, req->cbdata, NULL, NULL); @@ -557,7 +607,7 @@ void pmix_pending_nspace_requests(pmix_nspace_t *nptr) } } -static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, +static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, pmix_server_caddy_t *cd, pmix_modex_cbfunc_t cbfunc, void *cbdata, bool *local) @@ -573,6 +623,7 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, char *data = NULL; size_t sz = 0; pmix_scope_t scope = PMIX_SCOPE_UNDEF; + bool diffnspace = false; pmix_output_verbose(2, pmix_server_globals.get_output, "%s:%d SATISFY REQUEST CALLED", @@ -584,12 +635,20 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, * a remote peer, or due to data from a local client * having been committed */ PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); - (void)strncpy(proc.nspace, nptr->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, nptr->nspace, PMIX_MAX_NSLEN); - /* if we have local clients of this nspace, then we use - * the corresponding GDS to retrieve the data. Otherwise, - * the data will have been stored under our GDS */ - if (0 < nptr->nlocalprocs) { + if (!PMIX_CHECK_NSPACE(nptr->nspace, cd->peer->info->pname.nspace)) { + diffnspace = true; + } + + /* if rank is PMIX_RANK_UNDEF, then it was stored in our GDS */ + if (PMIX_RANK_UNDEF == rank) { + scope = PMIX_GLOBAL; // we have to search everywhere + peer = pmix_globals.mypeer; + } else if (0 < nptr->nlocalprocs) { + /* if we have local clients of this nspace, then we use + * the corresponding GDS to retrieve the data. Otherwise, + * the data will have been stored under our GDS */ if (local) { *local = true; } @@ -629,8 +688,7 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, /* if they are asking about a rank from an nspace different * from their own, or they gave a rank of "wildcard", then * include a copy of the job-level info */ - if (PMIX_RANK_WILDCARD == rank || - 0 != strncmp(nptr->nspace, cd->peer->info->pname.nspace, PMIX_MAX_NSLEN)) { + if (PMIX_RANK_WILDCARD == rank || diffnspace) { proc.rank = PMIX_RANK_WILDCARD; PMIX_CONSTRUCT(&cb, pmix_cb_t); /* this data is requested by a local client, so give the gds the option @@ -643,7 +701,7 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, if (PMIX_SUCCESS == rc) { PMIX_CONSTRUCT(&pkt, pmix_buffer_t); /* assemble the provided data into a byte object */ - PMIX_GDS_ASSEMB_KVS_REQ(rc, cd->peer, &proc, &cb.kvs, &pkt, cd); + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pkt, cd); if (rc != PMIX_SUCCESS) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&pkt); @@ -689,7 +747,8 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, /* retrieve the data for the specific rank they are asking about */ if (PMIX_RANK_WILDCARD != rank) { - if (!PMIX_PROC_IS_SERVER(peer) && !peer->commit_cnt) { + if (!PMIX_PROC_IS_SERVER(peer) && 0 == peer->commit_cnt) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); /* this condition works only for local requests, server does * count commits for local ranks, and check this count when * local request. @@ -712,7 +771,11 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, found = true; PMIX_CONSTRUCT(&pkt, pmix_buffer_t); /* assemble the provided data into a byte object */ - PMIX_GDS_ASSEMB_KVS_REQ(rc, cd->peer, &proc, &cb.kvs, &pkt, cd); + if (PMIX_RANK_UNDEF == rank || diffnspace) { + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pkt, cd); + } else { + PMIX_GDS_ASSEMB_KVS_REQ(rc, cd->peer, &proc, &cb.kvs, &pkt, cd); + } if (rc != PMIX_SUCCESS) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&pkt); @@ -758,6 +821,7 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, } PMIX_DESTRUCT(&cb); } + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); PMIX_DESTRUCT(&pbkt); @@ -771,7 +835,7 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, } /* Resolve pending requests to this namespace/rank */ -pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, pmix_rank_t rank, +pmix_status_t pmix_pending_resolve(pmix_namespace_t *nptr, pmix_rank_t rank, pmix_status_t status, pmix_dmdx_local_t *lcd) { pmix_dmdx_local_t *cd, *ptr; @@ -783,7 +847,7 @@ pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, pmix_rank_t rank, ptr = NULL; if (NULL != nptr) { PMIX_LIST_FOREACH(cd, &pmix_server_globals.local_reqs, pmix_dmdx_local_t) { - if (0 != strncmp(nptr->nspace, cd->proc.nspace, PMIX_MAX_NSLEN) || + if (!PMIX_CHECK_NSPACE(nptr->nspace, cd->proc.nspace) || rank != cd->proc.rank) { continue; } @@ -798,6 +862,13 @@ pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, pmix_rank_t rank, ptr = lcd; } + /* if there are no local reqs on this request (e.g., only + * one proc requested it and that proc has died), then + * just remove the request */ + if (0 == pmix_list_get_size(&ptr->loc_reqs)) { + goto cleanup; + } + /* somebody was interested in this rank */ if (PMIX_SUCCESS != status){ /* if we've got an error for this request - just forward it*/ @@ -822,8 +893,10 @@ pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, pmix_rank_t rank, } PMIX_RELEASE(scd); } + + cleanup: /* remove all requests to this rank and cleanup the corresponding structure */ - pmix_list_remove_item(&pmix_server_globals.local_reqs, (pmix_list_item_t*)ptr); + pmix_list_remove_item(&pmix_server_globals.local_reqs, &ptr->super); PMIX_RELEASE(ptr); return PMIX_SUCCESS; @@ -838,7 +911,7 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) pmix_rank_info_t *rinfo; int32_t cnt; pmix_kval_t *kv; - pmix_nspace_t *ns, *nptr; + pmix_namespace_t *ns, *nptr; pmix_status_t rc; pmix_list_t nspaces; pmix_nspace_caddy_t *nm; @@ -856,7 +929,7 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) /* find the nspace object for the proc whose data is being received */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(caddy->lcd->proc.nspace, ns->nspace)) { nptr = ns; break; @@ -867,10 +940,10 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) /* We may not have this namespace because there are no local * processes from it running on this host - so just record it * so we know we have the data for any future requests */ - nptr = PMIX_NEW(pmix_nspace_t); + nptr = PMIX_NEW(pmix_namespace_t); nptr->nspace = strdup(caddy->lcd->proc.nspace); /* add to the list */ - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } /* if the request was successfully satisfied, then store the data. @@ -931,7 +1004,7 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) PMIX_DESTRUCT(&cb); goto complete; } - (void)strncpy(cb.proc->nspace, nm->ns->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(cb.proc->nspace, nm->ns->nspace, PMIX_MAX_NSLEN); cb.proc->rank = PMIX_RANK_WILDCARD; cb.scope = PMIX_INTERNAL; cb.copy = false; diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c index 570e1c82a42..37ec6c5b412 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2016-2017 Mellanox Technologies, Inc. @@ -46,12 +46,16 @@ #ifdef HAVE_SYS_TYPES_H #include #endif +#ifdef HAVE_TIME_H +#include +#endif #include PMIX_EVENT_HEADER #include "src/class/pmix_hotel.h" #include "src/class/pmix_list.h" #include "src/mca/bfrops/bfrops.h" #include "src/mca/plog/plog.h" +#include "src/mca/psensor/psensor.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -115,16 +119,12 @@ pmix_status_t pmix_server_abort(pmix_peer_t *peer, pmix_buffer_t *buf, /* let the local host's server execute it */ if (NULL != pmix_host_server.abort) { - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; rc = pmix_host_server.abort(&proc, peer->info->server_object, status, msg, procs, nprocs, cbfunc, cbdata); } else { rc = PMIX_ERR_NOT_SUPPORTED; - /* release the caller */ - if (NULL != cbfunc) { - cbfunc(rc, cbdata); - } } PMIX_PROC_FREE(procs, nprocs); @@ -144,7 +144,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) pmix_buffer_t b2, pbkt; pmix_kval_t *kp; pmix_scope_t scope; - pmix_nspace_t *nptr; + pmix_namespace_t *nptr; pmix_rank_info_t *info; pmix_proc_t proc; pmix_dmdx_remote_t *dcd, *dcdnext; @@ -155,7 +155,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) /* shorthand */ info = peer->info; nptr = peer->nptr; - (void)strncpy(proc.nspace, nptr->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, nptr->nspace, PMIX_MAX_NSLEN); proc.rank = info->pname.rank; pmix_output_verbose(2, pmix_server_globals.base_output, @@ -292,7 +292,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) * regardless of location * nprocs - the number of procs in the array */ -static pmix_server_trkr_t* get_tracker(pmix_proc_t *procs, +static pmix_server_trkr_t* get_tracker(char *id, pmix_proc_t *procs, size_t nprocs, pmix_cmd_t type) { pmix_server_trkr_t *trk; @@ -303,7 +303,7 @@ static pmix_server_trkr_t* get_tracker(pmix_proc_t *procs, "get_tracker called with %d procs", (int)nprocs); /* bozo check - should never happen outside of programmer error */ - if (NULL == procs) { + if (NULL == procs && NULL == id) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); return NULL; } @@ -316,28 +316,35 @@ static pmix_server_trkr_t* get_tracker(pmix_proc_t *procs, * shouldn't take long */ PMIX_LIST_FOREACH(trk, &pmix_server_globals.collectives, pmix_server_trkr_t) { /* Collective operation if unique identified by - * the set of participating processes and the type of collective + * the set of participating processes and the type of collective, + * or by the operation ID */ - if (nprocs != trk->npcs) { - continue; - } - if (type != trk->type) { - continue; - } - matches = 0; - for (i=0; i < nprocs; i++) { - /* the procs may be in different order, so we have - * to do an exhaustive search */ - for (j=0; j < trk->npcs; j++) { - if (0 == strcmp(procs[i].nspace, trk->pcs[j].nspace) && - procs[i].rank == trk->pcs[j].rank) { - ++matches; - break; + if (NULL != id) { + if (NULL != trk->id && 0 == strcmp(id, trk->id)) { + return trk; + } + } else { + if (nprocs != trk->npcs) { + continue; + } + if (type != trk->type) { + continue; + } + matches = 0; + for (i=0; i < nprocs; i++) { + /* the procs may be in different order, so we have + * to do an exhaustive search */ + for (j=0; j < trk->npcs; j++) { + if (0 == strcmp(procs[i].nspace, trk->pcs[j].nspace) && + procs[i].rank == trk->pcs[j].rank) { + ++matches; + break; + } } } - } - if (trk->npcs == matches) { - return trk; + if (trk->npcs == matches) { + return trk; + } } } /* No tracker was found */ @@ -360,14 +367,15 @@ static pmix_server_trkr_t* get_tracker(pmix_proc_t *procs, * regardless of location * nprocs - the number of procs in the array */ -static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, +static pmix_server_trkr_t* new_tracker(char *id, pmix_proc_t *procs, size_t nprocs, pmix_cmd_t type) { pmix_server_trkr_t *trk; size_t i; bool all_def; - pmix_nspace_t *nptr, *ns; + pmix_namespace_t *nptr, *ns; pmix_rank_info_t *info; + pmix_rank_t ns_local = 0; pmix_output_verbose(5, pmix_server_globals.base_output, "new_tracker called with %d procs", (int)nprocs); @@ -379,7 +387,8 @@ static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, } pmix_output_verbose(5, pmix_server_globals.base_output, - "adding new tracker with %d procs", (int)nprocs); + "adding new tracker %s with %d procs", + (NULL == id) ? "NO-ID" : id, (int)nprocs); /* this tracker is new - create it */ trk = PMIX_NEW(pmix_server_trkr_t); @@ -388,26 +397,35 @@ static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, return NULL; } - /* copy the procs */ - PMIX_PROC_CREATE(trk->pcs, nprocs); - if (NULL == trk->pcs) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - PMIX_RELEASE(trk); - return NULL; + if (NULL != id) { + trk->id = strdup(id); + } + + if (NULL != procs) { + /* copy the procs */ + PMIX_PROC_CREATE(trk->pcs, nprocs); + if (NULL == trk->pcs) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(trk); + return NULL; + } + memcpy(trk->pcs, procs, nprocs * sizeof(pmix_proc_t)); + trk->npcs = nprocs; } - trk->npcs = nprocs; trk->type = type; all_def = true; for (i=0; i < nprocs; i++) { - (void)strncpy(trk->pcs[i].nspace, procs[i].nspace, PMIX_MAX_NSLEN); - trk->pcs[i].rank = procs[i].rank; + if (NULL == id) { + pmix_strncpy(trk->pcs[i].nspace, procs[i].nspace, PMIX_MAX_NSLEN); + trk->pcs[i].rank = procs[i].rank; + } if (!all_def) { continue; } /* is this nspace known to us? */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(procs[i].nspace, ns->nspace)) { nptr = ns; break; @@ -433,6 +451,7 @@ static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, * of the loop */ } /* is this one of my local ranks? */ + ns_local = 0; PMIX_LIST_FOREACH(info, &nptr->ranks, pmix_rank_info_t) { if (procs[i].rank == info->pname.rank || PMIX_RANK_WILDCARD == procs[i].rank) { @@ -440,12 +459,26 @@ static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, "adding local proc %s.%d to tracker", info->pname.nspace, info->pname.rank); /* track the count */ - ++trk->nlocal; + ns_local++; if (PMIX_RANK_WILDCARD != procs[i].rank) { break; } } } + + trk->nlocal += ns_local; + if (!ns_local) { + trk->local = false; + } else if (PMIX_RANK_WILDCARD == procs[i].rank) { + /* If proc is a wildcard we need to additionally check + * that all of the processes in the namespace were + * locally found. + * Otherwise this tracker is not local + */ + if (ns_local != nptr->nprocs) { + trk->local = false; + } + } } if (all_def) { trk->def_complete = true; @@ -550,18 +583,23 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, /* see if we are to collect data or enforce a timeout - we don't internally care * about any other directives */ for (n=0; n < ninfo; n++) { - if (0 == strcmp(info[n].key, PMIX_COLLECT_DATA)) { - collect_data = true; - } else if (0 == strncmp(info[n].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN)) { - tv.tv_sec = info[n].value.data.uint32; + if (PMIX_CHECK_KEY(&info[n], PMIX_COLLECT_DATA)) { + collect_data = PMIX_INFO_TRUE(&info[n]); + } else if (PMIX_CHECK_KEY(&info[n], PMIX_TIMEOUT)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, tv.tv_sec, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_PROC_FREE(procs, nprocs); + PMIX_INFO_FREE(info, ninfo); + return rc; + } } } } /* find/create the local tracker for this operation */ - if (NULL == (trk = get_tracker(procs, nprocs, PMIX_FENCENB_CMD))) { + if (NULL == (trk = get_tracker(NULL, procs, nprocs, PMIX_FENCENB_CMD))) { /* If no tracker was found - create and initialize it once */ - if (NULL == (trk = new_tracker(procs, nprocs, PMIX_FENCENB_CMD))) { + if (NULL == (trk = new_tracker(NULL, procs, nprocs, PMIX_FENCENB_CMD))) { /* only if a bozo error occurs */ PMIX_ERROR_LOG(PMIX_ERROR); /* DO NOT HANG */ @@ -628,6 +666,37 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { pmix_output_verbose(2, pmix_server_globals.base_output, "fence complete"); + /* if this is a purely local fence (i.e., all participants are local), + * then it is done and we notify accordingly */ + if (trk->local) { + /* the modexcbfunc thread-shifts the call prior to processing, + * so it is okay to call it directly from here. The switchyard + * will acknowledge successful acceptance of the fence request, + * but the client still requires a return from the callback in + * that scenario, so we leave this caddy on the list of local cbs */ + trk->modexcbfunc(PMIX_SUCCESS, NULL, 0, trk, NULL, NULL); + rc = PMIX_SUCCESS; + goto cleanup; + } + /* this fence involves non-local procs - check if the + * host supports it */ + if (NULL == pmix_host_server.fence_nb) { + rc = PMIX_ERR_NOT_SUPPORTED; + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the fence completion + * function doesn't need to do so */ + pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the fence completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->host_called = false; // the host will not be calling us back + trk->modexcbfunc(rc, NULL, 0, trk, NULL, NULL); + goto cleanup; + } /* if the user asked us to collect data, then we have * to provide any locally collected data to the host * server so they can circulate it - only take data @@ -649,7 +718,7 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, PMIX_LIST_FOREACH(scd, &trk->local_cbs, pmix_server_caddy_t) { /* get any remote contribution - note that there * may not be a contribution */ - (void)strncpy(pcs.nspace, scd->peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(pcs.nspace, scd->peer->info->pname.nspace, PMIX_MAX_NSLEN); pcs.rank = scd->peer->info->pname.rank; PMIX_CONSTRUCT(&cb, pmix_cb_t); cb.proc = &pcs; @@ -702,15 +771,52 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, PMIX_BYTE_OBJECT_DESTRUCT(&bo); // releases the data if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&cb); + PMIX_DESTRUCT(&bucket); + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the fence completion + * function doesn't need to do so */ + pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the fence completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->modexcbfunc(rc, NULL, 0, trk, NULL, NULL); goto cleanup; } /* now unload the blob and pass it upstairs */ PMIX_UNLOAD_BUFFER(&bucket, data, sz); PMIX_DESTRUCT(&bucket); - pmix_host_server.fence_nb(trk->pcs, trk->npcs, - trk->info, trk->ninfo, - data, sz, trk->modexcbfunc, trk); + trk->host_called = true; + rc = pmix_host_server.fence_nb(trk->pcs, trk->npcs, + trk->info, trk->ninfo, + data, sz, trk->modexcbfunc, trk); + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the fence completion + * function doesn't need to do so */ + pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the fence completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->host_called = false; // the host will not be calling us back + trk->modexcbfunc(rc, NULL, 0, trk, NULL, NULL); + } else if (PMIX_OPERATION_SUCCEEDED == rc) { + /* the operation was atomically completed and the host will + * not be calling us back - ensure we notify all participants. + * the modexcbfunc thread-shifts the call prior to processing, + * so it is okay to call it directly from here */ + trk->host_called = false; // the host will not be calling us back + trk->modexcbfunc(PMIX_SUCCESS, NULL, 0, trk, NULL, NULL); + /* ensure that the switchyard doesn't release the caddy */ + rc = PMIX_SUCCESS; + } } cleanup: @@ -791,12 +897,12 @@ pmix_status_t pmix_server_publish(pmix_peer_t *peer, goto cleanup; } } - (void)strncpy(cd->info[cd->ninfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); + pmix_strncpy(cd->info[cd->ninfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); cd->info[cd->ninfo-1].value.type = PMIX_UINT32; cd->info[cd->ninfo-1].value.data.uint32 = uid; /* call the local server */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; rc = pmix_host_server.publish(&proc, cd->info, cd->ninfo, opcbfunc, cd); @@ -905,12 +1011,12 @@ pmix_status_t pmix_server_lookup(pmix_peer_t *peer, goto cleanup; } } - (void)strncpy(cd->info[cd->ninfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); + pmix_strncpy(cd->info[cd->ninfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); cd->info[cd->ninfo-1].value.type = PMIX_UINT32; cd->info[cd->ninfo-1].value.data.uint32 = uid; /* call the local server */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; rc = pmix_host_server.lookup(&proc, cd->keys, cd->info, cd->ninfo, lkcbfunc, cd); @@ -1001,12 +1107,12 @@ pmix_status_t pmix_server_unpublish(pmix_peer_t *peer, goto cleanup; } } - (void)strncpy(cd->info[cd->ninfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); + pmix_strncpy(cd->info[cd->ninfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); cd->info[cd->ninfo-1].value.type = PMIX_UINT32; cd->info[cd->ninfo-1].value.data.uint32 = uid; /* call the local server */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; rc = pmix_host_server.unpublish(&proc, cd->keys, cd->info, cd->ninfo, opcbfunc, cd); @@ -1028,88 +1134,90 @@ static void spcbfunc(pmix_status_t status, { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_iof_req_t *req; - pmix_setup_caddy_t *occupant; - int i; pmix_buffer_t *msg; pmix_status_t rc; + pmix_iof_cache_t *iof, *ionext; /* if it was successful, and there are IOF requests, then * register them now */ if (PMIX_SUCCESS == status && PMIX_FWD_NO_CHANNELS != cd->channels) { /* record the request */ req = PMIX_NEW(pmix_iof_req_t); - if (NULL != req) { - PMIX_RETAIN(cd->peer); - req->peer = cd->peer; - req->pname.nspace = strdup(nspace); - req->pname.rank = PMIX_RANK_WILDCARD; - req->channels = cd->channels; - pmix_list_append(&pmix_globals.iof_requests, &req->super); + if (NULL == req) { + status = PMIX_ERR_NOMEM; + goto cleanup; } + PMIX_RETAIN(cd->peer); + req->peer = cd->peer; + req->pname.nspace = strdup(nspace); + req->pname.rank = PMIX_RANK_WILDCARD; + req->channels = cd->channels; + pmix_list_append(&pmix_globals.iof_requests, &req->super); /* process any cached IO */ - for (i=0; i < PMIX_IOF_HOTEL_SIZE; i++) { - pmix_hotel_knock(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE-i-1, (void**)&occupant); - if (NULL != occupant) { - if (!(occupant->channels & req->channels)) { - continue; - } - /* if the source matches the request, then forward this along */ - if (0 != strncmp(occupant->procs->nspace, req->pname.nspace, PMIX_MAX_NSLEN) || - (PMIX_RANK_WILDCARD != req->pname.rank && occupant->procs->rank != req->pname.rank)) { - continue; - } - /* never forward back to the source! This can happen if the source - * is a launcher */ - if (0 == strncmp(occupant->procs->nspace, req->peer->info->pname.nspace, PMIX_MAX_NSLEN) && - occupant->procs->rank == req->peer->info->pname.rank) { - continue; - } - /* setup the msg */ - if (NULL == (msg = PMIX_NEW(pmix_buffer_t))) { - PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); - rc = PMIX_ERR_OUT_OF_RESOURCE; - break; - } - /* provide the source */ - PMIX_BFROPS_PACK(rc, req->peer, msg, occupant->procs, 1, PMIX_PROC); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - break; - } - /* provide the channel */ - PMIX_BFROPS_PACK(rc, req->peer, msg, &occupant->channels, 1, PMIX_IOF_CHANNEL); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - break; - } - /* pack the data */ - PMIX_BFROPS_PACK(rc, req->peer, msg, occupant->bo, 1, PMIX_BYTE_OBJECT); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - break; - } - /* send it to the requestor */ - PMIX_PTL_SEND_ONEWAY(rc, req->peer, msg, PMIX_PTL_TAG_IOF); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - } - /* remove it from the hotel since it has now been forwarded */ - pmix_hotel_checkout(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE-i-1); - PMIX_RELEASE(occupant); + PMIX_LIST_FOREACH_SAFE(iof, ionext, &pmix_server_globals.iof, pmix_iof_cache_t) { + /* if the channels don't match, then ignore it */ + if (!(iof->channel & req->channels)) { + continue; + } + /* if the source does not match the request, then ignore it */ + if (!PMIX_CHECK_PROCID(&iof->source, &req->pname)) { + continue; + } + /* never forward back to the source! This can happen if the source + * is a launcher */ + if (PMIX_CHECK_PROCID(&iof->source, &req->peer->info->pname)) { + continue; + } + pmix_output_verbose(2, pmix_server_globals.iof_output, + "PMIX:SERVER:SPAWN delivering cached IOF from %s:%d to %s:%d", + iof->source.nspace, iof->source.rank, + req->pname.nspace, req->pname.rank); + /* setup the msg */ + if (NULL == (msg = PMIX_NEW(pmix_buffer_t))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + rc = PMIX_ERR_OUT_OF_RESOURCE; + break; + } + /* provide the source */ + PMIX_BFROPS_PACK(rc, req->peer, msg, &iof->source, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* provide the channel */ + PMIX_BFROPS_PACK(rc, req->peer, msg, &iof->channel, 1, PMIX_IOF_CHANNEL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* pack the data */ + PMIX_BFROPS_PACK(rc, req->peer, msg, iof->bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* send it to the requestor */ + PMIX_PTL_SEND_ONEWAY(rc, req->peer, msg, PMIX_PTL_TAG_IOF); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); } + /* remove it from the list since it has now been forwarded */ + pmix_list_remove_item(&pmix_server_globals.iof, &iof->super); + PMIX_RELEASE(iof); } } + cleanup: /* cleanup the caddy */ if (NULL != cd->info) { PMIX_INFO_FREE(cd->info, cd->ninfo); } if (NULL != cd->apps) { - PMIX_APP_CREATE(cd->apps, cd->napps); + PMIX_APP_FREE(cd->apps, cd->napps); } if (NULL != cd->spcbfunc) { cd->spcbfunc(status, nspace, cd->cbdata); @@ -1133,7 +1241,6 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer, "recvd SPAWN from %s:%d", peer->info->pname.nspace, peer->info->pname.rank); if (NULL == pmix_host_server.spawn) { - PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; } @@ -1199,7 +1306,8 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer, } } } - /* we will construct any required iof request tracker upon completion of the spawn */ + /* we will construct any required iof request tracker upon completion of the spawn + * as we need the nspace of the spawned application! */ } /* add the directive to the end */ if (PMIX_PROC_IS_TOOL(peer)) { @@ -1241,7 +1349,7 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer, } } /* call the local server */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; rc = pmix_host_server.spawn(&proc, cd->info, cd->ninfo, cd->apps, cd->napps, spcbfunc, cd); @@ -1325,15 +1433,11 @@ pmix_status_t pmix_server_disconnect(pmix_server_caddy_t *cd, } /* find/create the local tracker for this operation */ - if (NULL == (trk = get_tracker(procs, nprocs, PMIX_DISCONNECTNB_CMD))) { + if (NULL == (trk = get_tracker(NULL, procs, nprocs, PMIX_DISCONNECTNB_CMD))) { /* we don't have this tracker yet, so get a new one */ - if (NULL == (trk = new_tracker(procs, nprocs, PMIX_DISCONNECTNB_CMD))) { + if (NULL == (trk = new_tracker(NULL, procs, nprocs, PMIX_DISCONNECTNB_CMD))) { /* only if a bozo error occurs */ PMIX_ERROR_LOG(PMIX_ERROR); - /* DO NOT HANG */ - if (NULL != cbfunc) { - cbfunc(PMIX_ERROR, cd); - } rc = PMIX_ERROR; goto cleanup; } @@ -1351,7 +1455,6 @@ pmix_status_t pmix_server_disconnect(pmix_server_caddy_t *cd, /* add this contributor to the tracker so they get * notified when we are done */ - PMIX_RETAIN(cd); // prevent the caddy from being released when we return pmix_list_append(&trk->local_cbs, &cd->super); /* if all local contributions have been received, * let the local host's server know that we are at the @@ -1359,7 +1462,32 @@ pmix_status_t pmix_server_disconnect(pmix_server_caddy_t *cd, * across all participants has been completed */ if (trk->def_complete && pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { + trk->host_called = true; rc = pmix_host_server.disconnect(trk->pcs, trk->npcs, trk->info, trk->ninfo, cbfunc, trk); + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the op completion + * function doesn't need to do so */ + pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the op completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->host_called = false; // the host will not be calling us back + cbfunc(rc, trk); + } else if (PMIX_OPERATION_SUCCEEDED == rc) { + /* the operation was atomically completed and the host will + * not be calling us back - ensure we notify all participants. + * the cbfunc thread-shifts the call prior to processing, + * so it is okay to call it directly from here */ + trk->host_called = false; // the host will not be calling us back + cbfunc(PMIX_SUCCESS, trk); + /* ensure that the switchyard doesn't release the caddy */ + rc = PMIX_SUCCESS; + } } else { rc = PMIX_SUCCESS; } @@ -1469,9 +1597,9 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, } /* find/create the local tracker for this operation */ - if (NULL == (trk = get_tracker(procs, nprocs, PMIX_CONNECTNB_CMD))) { + if (NULL == (trk = get_tracker(NULL, procs, nprocs, PMIX_CONNECTNB_CMD))) { /* we don't have this tracker yet, so get a new one */ - if (NULL == (trk = new_tracker(procs, nprocs, PMIX_CONNECTNB_CMD))) { + if (NULL == (trk = new_tracker(NULL, procs, nprocs, PMIX_CONNECTNB_CMD))) { /* only if a bozo error occurs */ PMIX_ERROR_LOG(PMIX_ERROR); /* DO NOT HANG */ @@ -1495,17 +1623,7 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, /* add this contributor to the tracker so they get * notified when we are done */ - PMIX_RETAIN(cd); // prevent the caddy from being released when we return pmix_list_append(&trk->local_cbs, &cd->super); - /* if a timeout was specified, set it */ - if (0 < tv.tv_sec) { - PMIX_RETAIN(trk); - cd->trk = trk; - pmix_event_evtimer_set(pmix_globals.evbase, &cd->ev, - connect_timeout, cd); - pmix_event_evtimer_add(&cd->ev, &tv); - cd->event_active = true; - } /* if all local contributions have been received, * let the local host's server know that we are at the @@ -1513,10 +1631,44 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, * across all participants has been completed */ if (trk->def_complete && pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { + trk->host_called = true; rc = pmix_host_server.connect(trk->pcs, trk->npcs, trk->info, trk->ninfo, cbfunc, trk); + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the op completion + * function doesn't need to do so */ + pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the op completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->host_called = false; // the host will not be calling us back + cbfunc(rc, trk); + } else if (PMIX_OPERATION_SUCCEEDED == rc) { + /* the operation was atomically completed and the host will + * not be calling us back - ensure we notify all participants. + * the cbfunc thread-shifts the call prior to processing, + * so it is okay to call it directly from here */ + trk->host_called = false; // the host will not be calling us back + cbfunc(PMIX_SUCCESS, trk); + /* ensure that the switchyard doesn't release the caddy */ + rc = PMIX_SUCCESS; + } } else { rc = PMIX_SUCCESS; } + /* if a timeout was specified, set it */ + if (PMIX_SUCCESS == rc && 0 < tv.tv_sec) { + PMIX_RETAIN(trk); + cd->trk = trk; + pmix_event_evtimer_set(pmix_globals.evbase, &cd->ev, + connect_timeout, cd); + pmix_event_evtimer_add(&cd->ev, &tv); + cd->event_active = true; + } cleanup: if (NULL != procs) { @@ -1528,6 +1680,177 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, return rc; } +static void _check_cached_events(int sd, short args, void *cbdata) +{ + pmix_setup_caddy_t *scd = (pmix_setup_caddy_t*)cbdata; + pmix_notify_caddy_t *cd; + pmix_range_trkr_t rngtrk; + pmix_proc_t proc; + int i; + size_t k, n; + bool found, matched; + pmix_buffer_t *relay; + pmix_status_t ret = PMIX_SUCCESS; + pmix_cmd_t cmd = PMIX_NOTIFY_CMD; + + /* check if any matching notifications have been cached */ + rngtrk.procs = NULL; + rngtrk.nprocs = 0; + for (i=0; i < pmix_globals.max_events; i++) { + pmix_hotel_knock(&pmix_globals.notifications, i, (void**)&cd); + if (NULL == cd) { + continue; + } + found = false; + if (NULL == scd->codes) { + if (!cd->nondefault) { + /* they registered a default event handler - always matches */ + found = true; + } + } else { + for (k=0; k < scd->ncodes; k++) { + if (scd->codes[k] == cd->status) { + found = true; + break; + } + } + } + if (!found) { + continue; + } + /* check if the affected procs (if given) match those they + * wanted to know about */ + if (!pmix_notify_check_affected(cd->affected, cd->naffected, + scd->procs, scd->nprocs)) { + continue; + } + /* check the range */ + if (NULL == cd->targets) { + rngtrk.procs = &cd->source; + rngtrk.nprocs = 1; + } else { + rngtrk.procs = cd->targets; + rngtrk.nprocs = cd->ntargets; + } + rngtrk.range = cd->range; + PMIX_LOAD_PROCID(&proc, scd->peer->info->pname.nspace, scd->peer->info->pname.rank); + if (!pmix_notify_check_range(&rngtrk, &proc)) { + continue; + } + /* if we were given specific targets, check if this is one */ + found = false; + if (NULL != cd->targets) { + matched = false; + for (n=0; n < cd->ntargets; n++) { + /* if the source of the event is the same peer just registered, then ignore it + * as the event notification system will have already locally + * processed it */ + if (PMIX_CHECK_PROCID(&cd->source, &scd->peer->info->pname)) { + continue; + } + if (PMIX_CHECK_PROCID(&scd->peer->info->pname, &cd->targets[n])) { + matched = true; + /* track the number of targets we have left to notify */ + --cd->nleft; + /* if this is the last one, then evict this event + * from the cache */ + if (0 == cd->nleft) { + pmix_hotel_checkout(&pmix_globals.notifications, cd->room); + found = true; // mark that we should release cd + } + break; + } + } + if (!matched) { + /* do not notify this one */ + continue; + } + } + + /* all matches - notify */ + relay = PMIX_NEW(pmix_buffer_t); + if (NULL == relay) { + /* nothing we can do */ + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + ret = PMIX_ERR_NOMEM; + break; + } + /* pack the info data stored in the event */ + PMIX_BFROPS_PACK(ret, scd->peer, relay, &cmd, 1, PMIX_COMMAND); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + PMIX_BFROPS_PACK(ret, scd->peer, relay, &cd->status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + PMIX_BFROPS_PACK(ret, scd->peer, relay, &cd->source, 1, PMIX_PROC); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + PMIX_BFROPS_PACK(ret, scd->peer, relay, &cd->ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + if (0 < cd->ninfo) { + PMIX_BFROPS_PACK(ret, scd->peer, relay, cd->info, cd->ninfo, PMIX_INFO); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + } + PMIX_SERVER_QUEUE_REPLY(ret, scd->peer, 0, relay); + if (PMIX_SUCCESS != ret) { + PMIX_RELEASE(relay); + } + if (found) { + PMIX_RELEASE(cd); + } + } + /* release the caddy */ + if (NULL != scd->codes) { + free(scd->codes); + } + if (NULL != scd->info) { + PMIX_INFO_FREE(scd->info, scd->ninfo); + } + if (NULL != scd->opcbfunc) { + scd->opcbfunc(ret, scd->cbdata); + } + PMIX_RELEASE(scd); +} + +/* provide a callback function for the host when it finishes + * processing the registration */ +static void regevopcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + + /* if the registration succeeded, then check local cache */ + if (PMIX_SUCCESS == status) { + _check_cached_events(0, 0, cd); + return; + } + + /* it didn't succeed, so cleanup and execute the callback + * so we don't hang */ + if (NULL != cd->codes) { + free(cd->codes); + } + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + if (NULL != cd->opcbfunc) { + cd->opcbfunc(status, cd->cbdata); + } + PMIX_RELEASE(cd); +} + + pmix_status_t pmix_server_register_events(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_op_cbfunc_t cbfunc, @@ -1537,16 +1860,12 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, pmix_status_t rc; pmix_status_t *codes = NULL; pmix_info_t *info = NULL; - size_t ninfo=0, ncodes, n, k; + size_t ninfo=0, ncodes, n; pmix_regevents_info_t *reginfo; - pmix_peer_events_info_t *prev; - pmix_notify_caddy_t *cd; + pmix_peer_events_info_t *prev = NULL; pmix_setup_caddy_t *scd; - int i; bool enviro_events = false; - bool found, matched; - pmix_buffer_t *relay; - pmix_cmd_t cmd = PMIX_NOTIFY_CMD; + bool found; pmix_proc_t *affected = NULL; size_t naffected = 0; @@ -1600,9 +1919,7 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, /* check the directives */ for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_ENVIRO_LEVEL, PMIX_MAX_KEYLEN)) { - enviro_events = PMIX_INFO_TRUE(&info[n]); - } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_AFFECTED_PROC)) { if (NULL != affected) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); rc = PMIX_ERR_BAD_PARAM; @@ -1611,7 +1928,7 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, naffected = 1; PMIX_PROC_CREATE(affected, naffected); memcpy(affected, info[n].value.data.proc, sizeof(pmix_proc_t)); - } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROCS, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_AFFECTED_PROCS)) { if (NULL != affected) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); rc = PMIX_ERR_BAD_PARAM; @@ -1623,6 +1940,14 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, } } + /* check the codes for system events */ + for (n=0; n < ncodes; n++) { + if (PMIX_SYSTEM_EVENT(codes[n])) { + enviro_events = true; + break; + } + } + /* if they asked for enviro events, and our host doesn't support * register_events, then we cannot meet the request */ if (enviro_events && NULL == pmix_host_server.register_events) { @@ -1631,10 +1956,36 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, goto cleanup; } + /* if they didn't send us any codes, then they are registering a + * default event handler. In that case, check only for default + * handlers and add this request to it, if not already present */ + if (0 == ncodes) { + PMIX_LIST_FOREACH(reginfo, &pmix_server_globals.events, pmix_regevents_info_t) { + if (PMIX_MAX_ERR_CONSTANT == reginfo->code) { + /* both are default handlers */ + prev = PMIX_NEW(pmix_peer_events_info_t); + if (NULL == prev) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + PMIX_RETAIN(peer); + prev->peer = peer; + if (NULL != affected) { + PMIX_PROC_CREATE(prev->affected, naffected); + prev->naffected = naffected; + memcpy(prev->affected, affected, naffected * sizeof(pmix_proc_t)); + } + pmix_list_append(®info->peers, &prev->super); + break; + } + } + rc = PMIX_OPERATION_SUCCEEDED; + goto cleanup; + } + /* store the event registration info so we can call the registered * client when the server notifies the event */ - k=0; - do { + for (n=0; n < ncodes; n++) { found = false; PMIX_LIST_FOREACH(reginfo, &pmix_server_globals.events, pmix_regevents_info_t) { if (NULL == codes) { @@ -1648,36 +1999,29 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, } else { if (PMIX_MAX_ERR_CONSTANT == reginfo->code) { continue; - } else if (codes[k] == reginfo->code) { + } else if (codes[n] == reginfo->code) { found = true; break; } } } if (found) { - /* found it - add this peer if we don't already have it */ - found = false; - PMIX_LIST_FOREACH(prev, ®info->peers, pmix_peer_events_info_t) { - if (prev->peer == peer) { - /* already have it */ - rc = PMIX_SUCCESS; - found = true; - break; - } - } - if (!found) { - /* get here if we don't already have this peer */ - prev = PMIX_NEW(pmix_peer_events_info_t); - if (NULL == prev) { - rc = PMIX_ERR_NOMEM; - goto cleanup; - } - PMIX_RETAIN(peer); - prev->peer = peer; - prev->enviro_events = enviro_events; - pmix_list_append(®info->peers, &prev->super); + /* found it - add this request */ + prev = PMIX_NEW(pmix_peer_events_info_t); + if (NULL == prev) { + rc = PMIX_ERR_NOMEM; + goto cleanup; } - } else { + PMIX_RETAIN(peer); + prev->peer = peer; + if (NULL != affected) { + PMIX_PROC_CREATE(prev->affected, naffected); + prev->naffected = naffected; + memcpy(prev->affected, affected, naffected * sizeof(pmix_proc_t)); + } + prev->enviro_events = enviro_events; + pmix_list_append(®info->peers, &prev->super); + } else { /* if we get here, then we didn't find an existing registration for this code */ reginfo = PMIX_NEW(pmix_regevents_info_t); if (NULL == reginfo) { @@ -1687,7 +2031,7 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, if (NULL == codes) { reginfo->code = PMIX_MAX_ERR_CONSTANT; } else { - reginfo->code = codes[k]; + reginfo->code = codes[n]; } pmix_list_append(&pmix_server_globals.events, ®info->super); prev = PMIX_NEW(pmix_peer_events_info_t); @@ -1697,14 +2041,23 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, } PMIX_RETAIN(peer); prev->peer = peer; + if (NULL != affected) { + PMIX_PROC_CREATE(prev->affected, naffected); + prev->naffected = naffected; + memcpy(prev->affected, affected, naffected * sizeof(pmix_proc_t)); + } prev->enviro_events = enviro_events; pmix_list_append(®info->peers, &prev->super); } - ++k; - } while (k < ncodes); + } /* if they asked for enviro events, call the local server */ if (enviro_events) { + /* if they don't support this, then we cannot do it */ + if (NULL == pmix_host_server.register_events) { + rc = PMIX_ERR_NOT_SUPPORTED; + goto cleanup; + } /* need to ensure the arrays don't go away until after the * host RM is done with them */ scd = PMIX_NEW(pmix_setup_caddy_t); @@ -1712,169 +2065,82 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, rc = PMIX_ERR_NOMEM; goto cleanup; } - if (NULL != codes) { - scd->codes = (pmix_status_t*)malloc(ncodes * sizeof(pmix_status_t)); - if (NULL == scd->codes) { - rc = PMIX_ERR_NOMEM; - PMIX_RELEASE(scd); - goto cleanup; - } - memcpy(scd->codes, codes, ncodes * sizeof(pmix_status_t)); - scd->ncodes = ncodes; - } - if (NULL != info) { - PMIX_INFO_CREATE(scd->info, ninfo); - if (NULL == scd->info) { - rc = PMIX_ERR_NOMEM; - if (NULL != scd->codes) { - free(scd->codes); - } - PMIX_RELEASE(scd); - goto cleanup; - } - /* copy the info across */ - for (n=0; n < ninfo; n++) { - PMIX_INFO_XFER(&scd->info[n], &info[n]); - } - scd->ninfo = ninfo; - } + PMIX_RETAIN(peer); + scd->peer = peer; + scd->codes = codes; + scd->ncodes = ncodes; + scd->info = info; + scd->ninfo = ninfo; scd->opcbfunc = cbfunc; scd->cbdata = cbdata; - if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(scd->codes, scd->ncodes, scd->info, scd->ninfo, opcbfunc, scd))) { + if (PMIX_SUCCESS == (rc = pmix_host_server.register_events(scd->codes, scd->ncodes, scd->info, scd->ninfo, regevopcbfunc, scd))) { + /* the host will call us back when completed */ pmix_output_verbose(2, pmix_server_globals.event_output, - "server register events: host server reg events returned rc =%d", rc); - if (NULL != scd->codes) { - free(scd->codes); - } - if (NULL != scd->info) { - PMIX_INFO_FREE(scd->info, scd->ninfo); + "server register events: host server processing event registration"); + if (NULL != affected) { + free(affected); } - PMIX_RELEASE(scd); + return rc; + } else if (PMIX_OPERATION_SUCCEEDED == rc) { + /* we need to check cached notifications, but we want to ensure + * that occurs _after_ the client returns from registering the + * event handler in case the event is flagged for do_not_cache. + * Setup an event to fire after we return as that means it will + * occur after we send the registration response back to the client, + * thus guaranteeing that the client will get their registration + * callback prior to delivery of an event notification */ + PMIX_RETAIN(peer); + scd->peer = peer; + scd->procs = affected; + scd->nprocs = naffected; + scd->opcbfunc = NULL; + scd->cbdata = NULL; + PMIX_THREADSHIFT(scd, _check_cached_events); + return rc; } else { - goto check; + /* host returned a genuine error and won't be calling the callback function */ + pmix_output_verbose(2, pmix_server_globals.event_output, + "server register events: host server reg events returned rc =%d", rc); + PMIX_RELEASE(scd); + goto cleanup; + } + } else { + rc = PMIX_OPERATION_SUCCEEDED; + /* we need to check cached notifications, but we want to ensure + * that occurs _after_ the client returns from registering the + * event handler in case the event is flagged for do_not_cache. + * Setup an event to fire after we return as that means it will + * occur after we send the registration response back to the client, + * thus guaranteeing that the client will get their registration + * callback prior to delivery of an event notification */ + scd = PMIX_NEW(pmix_setup_caddy_t); + PMIX_RETAIN(peer); + scd->peer = peer; + scd->codes = codes; + scd->ncodes = ncodes; + scd->procs = affected; + scd->nprocs = naffected; + scd->opcbfunc = NULL; + scd->cbdata = NULL; + PMIX_THREADSHIFT(scd, _check_cached_events); + if (NULL != info) { + PMIX_INFO_FREE(info, ninfo); } + return rc; } cleanup: pmix_output_verbose(2, pmix_server_globals.event_output, "server register events: ninfo =%lu rc =%d", ninfo, rc); - /* be sure to execute the callback */ - if (NULL != cbfunc) { - cbfunc(rc, cbdata); - } if (NULL != info) { PMIX_INFO_FREE(info, ninfo); } - if (PMIX_SUCCESS != rc) { - if (NULL != codes) { - free(codes); - } - if (NULL != affected) { - PMIX_PROC_FREE(affected, naffected); - } - return rc; - } - - check: - /* check if any matching notifications have been cached */ - for (i=0; i < pmix_globals.notifications.size; i++) { - if (NULL == (cd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { - break; - } - found = false; - if (NULL == codes) { - if (!cd->nondefault) { - /* they registered a default event handler - always matches */ - found = true; - } - } else { - for (k=0; k < ncodes; k++) { - if (codes[k] == cd->status) { - found = true; - break; - } - } - } - if (!found) { - continue; - } - /* if we were given specific targets, check if this is one */ - if (NULL != cd->targets) { - matched = false; - for (n=0; n < cd->ntargets; n++) { - if (0 != strncmp(peer->info->pname.nspace, cd->targets[n].nspace, PMIX_MAX_NSLEN)) { - continue; - } - /* if the source of the event is the same peer just registered, then ignore it - * as the event notification system will have already locally - * processed it */ - if (0 == strncmp(peer->info->pname.nspace, cd->source.nspace, PMIX_MAX_NSLEN) && - peer->info->pname.rank == cd->source.rank) { - continue; - } - if (PMIX_RANK_WILDCARD == cd->targets[n].rank || - peer->info->pname.rank == cd->targets[n].rank) { - matched = true; - break; - } - } - if (!matched) { - /* do not notify this one */ - continue; - } - } - /* if they specified affected proc(s) they wanted to know about, check */ - if (!pmix_notify_check_affected(cd->affected, cd->naffected, - affected, naffected)) { - continue; - } - /* all matches - notify */ - relay = PMIX_NEW(pmix_buffer_t); - if (NULL == relay) { - /* nothing we can do */ - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - rc = PMIX_ERR_NOMEM; - break; - } - /* pack the info data stored in the event */ - PMIX_BFROPS_PACK(rc, peer, relay, &cmd, 1, PMIX_COMMAND); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - break; - } - PMIX_BFROPS_PACK(rc, peer, relay, &cd->status, 1, PMIX_STATUS); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - break; - } - PMIX_BFROPS_PACK(rc, peer, relay, &cd->source, 1, PMIX_PROC); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - break; - } - PMIX_BFROPS_PACK(rc, peer, relay, &cd->ninfo, 1, PMIX_SIZE); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - break; - } - if (0 < cd->ninfo) { - PMIX_BFROPS_PACK(rc, peer, relay, cd->info, cd->ninfo, PMIX_INFO); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - break; - } - } - PMIX_SERVER_QUEUE_REPLY(peer, 0, relay); - } - if (!enviro_events) { - if (NULL != codes) { - free(codes); - } + if (NULL != codes) { + free(codes); } if (NULL != affected) { PMIX_PROC_FREE(affected, naffected); } - return rc; } @@ -1932,6 +2198,59 @@ static void local_cbfunc(pmix_status_t status, void *cbdata) PMIX_RELEASE(cd); } +static void intermed_step(pmix_status_t status, void *cbdata) +{ + pmix_notify_caddy_t *cd = (pmix_notify_caddy_t*)cbdata; + pmix_status_t rc; + + if (PMIX_SUCCESS != status) { + rc = status; + goto complete; + } + + /* check the range directive - if it is LOCAL, then we are + * done. Otherwise, it needs to go up to our + * host for dissemination */ + if (PMIX_RANGE_LOCAL == cd->range) { + rc = PMIX_SUCCESS; + goto complete; + } + + if (NULL == pmix_host_server.notify_event) { + rc = PMIX_ERR_NOT_SUPPORTED; + goto complete; + } + + /* since our host is going to send this everywhere, it may well + * come back to us. We already processed it, so mark it here + * to ensure we don't do it again. We previously inserted the + * PMIX_SERVER_INTERNAL_NOTIFY key at the very end of the + * info array - just overwrite that position */ + PMIX_INFO_LOAD(&cd->info[cd->ninfo-1], PMIX_EVENT_PROXY, &pmix_globals.myid, PMIX_PROC); + + /* pass it to our host RM for distribution */ + rc = pmix_host_server.notify_event(cd->status, &cd->source, cd->range, + cd->info, cd->ninfo, local_cbfunc, cd); + if (PMIX_SUCCESS == rc) { + /* let the callback function respond for us */ + return; + } + if (PMIX_OPERATION_SUCCEEDED == rc) { + rc = PMIX_SUCCESS; // local_cbfunc will not be called + } + + complete: + if (NULL != cd->cbfunc) { + cd->cbfunc(rc, cd->cbdata); + } + PMIX_RELEASE(cd); +} + +/* Receive an event sent by the client library. Since it was sent + * to us by one client, we have to both process it locally to ensure + * we notify all relevant local clients AND (assuming a range other + * than LOCAL) deliver to our host, requesting that they send it + * to all peer servers in the current session */ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_op_cbfunc_t cbfunc, @@ -1940,13 +2259,12 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, int32_t cnt; pmix_status_t rc; pmix_notify_caddy_t *cd; + size_t ninfo, n; pmix_output_verbose(2, pmix_server_globals.event_output, - "recvd event notification from client"); - - if (NULL == pmix_host_server.notify_event) { - return PMIX_ERR_NOT_SUPPORTED; - } + "%s:%d recvd event notification from client %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + peer->info->pname.nspace, peer->info->pname.rank); cd = PMIX_NEW(pmix_notify_caddy_t); if (NULL == cd) { @@ -1955,8 +2273,7 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, cd->cbfunc = cbfunc; cd->cbdata = cbdata; /* set the source */ - (void)strncpy(cd->source.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); - cd->source.rank = peer->info->pname.rank; + PMIX_LOAD_PROCID(&cd->source, peer->info->pname.nspace, peer->info->pname.rank); /* unpack status */ cnt = 1; @@ -1976,18 +2293,19 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, /* unpack the info keys */ cnt = 1; - PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); + PMIX_BFROPS_UNPACK(rc, peer, buf, &ninfo, &cnt, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } - if (0 < cd->ninfo) { - PMIX_INFO_CREATE(cd->info, cd->ninfo); - if (NULL == cd->info) { - rc = PMIX_ERR_NOMEM; - goto exit; - } - cnt = cd->ninfo; + cd->ninfo = ninfo + 1; + PMIX_INFO_CREATE(cd->info, cd->ninfo); + if (NULL == cd->info) { + rc = PMIX_ERR_NOMEM; + goto exit; + } + if (0 < ninfo) { + cnt = ninfo; PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); @@ -1995,30 +2313,34 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, } } - /* check the range directive - if it is LOCAL, then we just - * process it ourselves. Otherwise, it needs to go up to our - * host for dissemination */ - if (PMIX_RANGE_LOCAL == cd->range) { - if (PMIX_SUCCESS != (rc = pmix_server_notify_client_of_event(cd->status, - &cd->source, - cd->range, - cd->info, cd->ninfo, - local_cbfunc, cd))) { + /* check to see if we already processed this event - it is possible + * that a local client "echoed" it back to us and we want to avoid + * a potential infinite loop */ + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&cd->info[n], PMIX_SERVER_INTERNAL_NOTIFY)) { + /* yep, we did - so don't do it again! */ + rc = PMIX_OPERATION_SUCCEEDED; goto exit; } - return PMIX_SUCCESS; } - /* when we receive an event from a client, we just pass it to - * our host RM for distribution - if any targeted recipients - * are local to us, the host RM will let us know */ - pmix_host_server.notify_event(cd->status, &cd->source, cd->range, - cd->info, cd->ninfo, local_cbfunc, cd); - return PMIX_SUCCESS; + /* add an info object to mark that we recvd this internally */ + PMIX_INFO_LOAD(&cd->info[ninfo], PMIX_SERVER_INTERNAL_NOTIFY, NULL, PMIX_BOOL); + /* process it */ + if (PMIX_SUCCESS != (rc = pmix_server_notify_client_of_event(cd->status, + &cd->source, + cd->range, + cd->info, cd->ninfo, + intermed_step, cd))) { + goto exit; + } + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(cd); + } + return rc; exit: PMIX_RELEASE(cd); - cbfunc(rc, cbdata); return rc; } @@ -2031,14 +2353,14 @@ pmix_status_t pmix_server_query(pmix_peer_t *peer, pmix_status_t rc; pmix_query_caddy_t *cd; pmix_proc_t proc; + pmix_cb_t cb; + size_t n, p; + pmix_list_t results; + pmix_kval_t *kv, *kvnxt; pmix_output_verbose(2, pmix_server_globals.base_output, "recvd query from client"); - if (NULL == pmix_host_server.query) { - return PMIX_ERR_NOT_SUPPORTED; - } - cd = PMIX_NEW(pmix_query_caddy_t); if (NULL == cd) { return PMIX_ERR_NOMEM; @@ -2049,36 +2371,136 @@ pmix_status_t pmix_server_query(pmix_peer_t *peer, PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->nqueries, &cnt, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - goto exit; + PMIX_RELEASE(cd); + return rc; } /* unpack the queries */ if (0 < cd->nqueries) { PMIX_QUERY_CREATE(cd->queries, cd->nqueries); if (NULL == cd->queries) { rc = PMIX_ERR_NOMEM; - goto exit; + PMIX_RELEASE(cd); + return rc; } cnt = cd->nqueries; PMIX_BFROPS_UNPACK(rc, peer, buf, cd->queries, &cnt, PMIX_QUERY); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - goto exit; + PMIX_RELEASE(cd); + return rc; + } + } + + /* check the directives to see if they want us to refresh + * the local cached results - if we wanted to optimize this + * more, we would check each query and allow those that don't + * want to be refreshed to be executed locally, and those that + * did would be sent to the host. However, for now we simply + * determine that if we don't have it, then ask for everything */ + memset(proc.nspace, 0, PMIX_MAX_NSLEN+1); + proc.rank = PMIX_RANK_INVALID; + PMIX_CONSTRUCT(&results, pmix_list_t); + + for (n=0; n < cd->nqueries; n++) { + for (p=0; p < cd->queries[n].nqual; p++) { + if (PMIX_CHECK_KEY(&cd->queries[n].qualifiers[p], PMIX_QUERY_REFRESH_CACHE)) { + if (PMIX_INFO_TRUE(&cd->queries[n].qualifiers[p])) { + PMIX_LIST_DESTRUCT(&results); + goto query; + } + } else if (PMIX_CHECK_KEY(&cd->queries[n].qualifiers[p], PMIX_PROCID)) { + PMIX_LOAD_NSPACE(proc.nspace, cd->queries[n].qualifiers[p].value.data.proc->nspace); + proc.rank = cd->queries[n].qualifiers[p].value.data.proc->rank; + } else if (PMIX_CHECK_KEY(&cd->queries[n].qualifiers[p], PMIX_NSPACE)) { + PMIX_LOAD_NSPACE(proc.nspace, cd->queries[n].qualifiers[p].value.data.string); + } else if (PMIX_CHECK_KEY(&cd->queries[n].qualifiers[p], PMIX_RANK)) { + proc.rank = cd->queries[n].qualifiers[p].value.data.rank; + } else if (PMIX_CHECK_KEY(&cd->queries[n].qualifiers[p], PMIX_HOSTNAME)) { + if (0 != strcmp(cd->queries[n].qualifiers[p].value.data.string, pmix_globals.hostname)) { + /* asking about a different host, so ask for the info */ + PMIX_LIST_DESTRUCT(&results); + goto query; + } + } + } + /* we get here if a refresh isn't required - first try a local + * "get" on the data to see if we already have it */ + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.copy = false; + /* set the proc */ + if (PMIX_RANK_INVALID == proc.rank && + 0 == strlen(proc.nspace)) { + /* use our id */ + cb.proc = &pmix_globals.myid; + } else { + if (0 == strlen(proc.nspace)) { + /* use our nspace */ + PMIX_LOAD_NSPACE(cb.proc->nspace, pmix_globals.myid.nspace); + } + if (PMIX_RANK_INVALID == proc.rank) { + /* user the wildcard rank */ + proc.rank = PMIX_RANK_WILDCARD; + } + cb.proc = &proc; + } + for (p=0; NULL != cd->queries[n].keys[p]; p++) { + cb.key = cd->queries[n].keys[p]; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS != rc) { + /* needs to be passed to the host */ + PMIX_LIST_DESTRUCT(&results); + PMIX_DESTRUCT(&cb); + goto query; + } + /* need to retain this result */ + PMIX_LIST_FOREACH_SAFE(kv, kvnxt, &cb.kvs, pmix_kval_t) { + pmix_list_remove_item(&cb.kvs, &kv->super); + pmix_list_append(&results, &kv->super); + } + PMIX_DESTRUCT(&cb); + } + } + + /* if we get here, then all queries were completely locally + * resolved, so construct the results for return */ + rc = PMIX_ERR_NOT_FOUND; + if (0 < (cd->ninfo = pmix_list_get_size(&results))) { + PMIX_INFO_CREATE(cd->info, cd->ninfo); + n = 0; + PMIX_LIST_FOREACH_SAFE(kv, kvnxt, &results, pmix_kval_t) { + PMIX_LOAD_KEY(cd->info[n].key, kv->key); + rc = pmix_value_xfer(&cd->info[n].value, kv->value); + if (PMIX_SUCCESS != rc) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + cd->info = NULL; + cd->ninfo = 0; + break; + } + ++n; } } + /* done with the list of results */ + PMIX_LIST_DESTRUCT(&results); + /* we can just call the cbfunc here as we are already + * in an event - let our internal cbfunc do a threadshift + * if necessary */ + cbfunc(PMIX_SUCCESS, cd->info, cd->ninfo, cd, NULL, NULL); + return PMIX_SUCCESS; + + query: + if (NULL == pmix_host_server.query) { + PMIX_RELEASE(cd); + return PMIX_ERR_NOT_SUPPORTED; + } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->pname.rank; + PMIX_LOAD_PROCID(&proc, peer->info->pname.nspace, peer->info->pname.rank); /* ask the host for the info */ if (PMIX_SUCCESS != (rc = pmix_host_server.query(&proc, cd->queries, cd->nqueries, cbfunc, cd))) { - goto exit; + PMIX_RELEASE(cd); } - return PMIX_SUCCESS; - - exit: - PMIX_RELEASE(cd); return rc; } @@ -2110,7 +2532,7 @@ pmix_status_t pmix_server_log(pmix_peer_t *peer, * the request itself */ /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; cd = PMIX_NEW(pmix_shift_caddy_t); @@ -2238,7 +2660,7 @@ pmix_status_t pmix_server_alloc(pmix_peer_t *peer, } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; /* ask the host to execute the request */ @@ -2270,7 +2692,7 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, int32_t cnt, m; pmix_status_t rc; pmix_query_caddy_t *cd; - pmix_nspace_t *nptr, *tmp; + pmix_namespace_t *nptr, *tmp; pmix_peer_t *pr; pmix_proc_t proc; size_t n; @@ -2293,6 +2715,8 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, } cd->cbdata = cbdata; + PMIX_CONSTRUCT(&epicache, pmix_list_t); + /* unpack the number of targets */ cnt = 1; PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ntargets, &cnt, PMIX_SIZE); @@ -2311,7 +2735,6 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, } /* check targets to find proper place to put any epilog requests */ - PMIX_CONSTRUCT(&epicache, pmix_list_t); if (NULL == cd->targets) { epicd = PMIX_NEW(pmix_srvr_epi_caddy_t); epicd->epi = &peer->nptr->epilog; @@ -2320,20 +2743,20 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, for (n=0; n < cd->ntargets; n++) { /* find the nspace of this proc */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, cd->targets[n].nspace)) { nptr = tmp; break; } } if (NULL == nptr) { - nptr = PMIX_NEW(pmix_nspace_t); + nptr = PMIX_NEW(pmix_namespace_t); if (NULL == nptr) { rc = PMIX_ERR_NOMEM; goto exit; } nptr->nspace = strdup(cd->targets[n].nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } /* if the rank is wildcard, then we use the epilog for the nspace */ if (PMIX_RANK_WILDCARD == cd->targets[n].rank) { @@ -2387,7 +2810,7 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, cnt = 0; // track how many infos are cleanup related for (n=0; n < cd->ninfo; n++) { - if (0 == strncmp(cd->info[n].key, PMIX_REGISTER_CLEANUP, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&cd->info[n], PMIX_REGISTER_CLEANUP)) { ++cnt; if (PMIX_STRING != cd->info[n].value.type || NULL == cd->info[n].value.data.string) { @@ -2403,7 +2826,7 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, } cf->path = strdup(cd->info[n].value.data.string); pmix_list_append(&cachefiles, &cf->super); - } else if (0 == strncmp(cd->info[n].key, PMIX_REGISTER_CLEANUP_DIR, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_REGISTER_CLEANUP_DIR)) { ++cnt; if (PMIX_STRING != cd->info[n].value.type || NULL == cd->info[n].value.data.string) { @@ -2419,10 +2842,10 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, } cdir->path = strdup(cd->info[n].value.data.string); pmix_list_append(&cachedirs, &cdir->super); - } else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_RECURSIVE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_CLEANUP_RECURSIVE)) { recurse = PMIX_INFO_TRUE(&cd->info[n]); ++cnt; - } else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_IGNORE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_CLEANUP_IGNORE)) { if (PMIX_STRING != cd->info[n].value.type || NULL == cd->info[n].value.data.string) { /* return an error */ @@ -2438,7 +2861,7 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, cf->path = strdup(cd->info[n].value.data.string); pmix_list_append(&ignorefiles, &cf->super); ++cnt; - } else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_LEAVE_TOPDIR, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_CLEANUP_LEAVE_TOPDIR)) { leave_topdir = PMIX_INFO_TRUE(&cd->info[n]); ++cnt; } @@ -2491,7 +2914,6 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, rc = PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES; PMIX_LIST_DESTRUCT(&cachedirs); PMIX_LIST_DESTRUCT(&cachefiles); - PMIX_LIST_DESTRUCT(&epicache); goto exit; } } @@ -2536,15 +2958,13 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, PMIX_LIST_DESTRUCT(&cachefiles); if (cnt == (int)cd->ninfo) { /* nothing more to do */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, cd, NULL, NULL); - } - return PMIX_SUCCESS; + rc = PMIX_OPERATION_SUCCEEDED; + goto exit; } } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; /* ask the host to execute the request */ @@ -2554,10 +2974,12 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, cbfunc, cd))) { goto exit; } + PMIX_LIST_DESTRUCT(&epicache); return PMIX_SUCCESS; exit: PMIX_RELEASE(cd); + PMIX_LIST_DESTRUCT(&epicache); return rc; } @@ -2575,9 +2997,6 @@ pmix_status_t pmix_server_monitor(pmix_peer_t *peer, pmix_output_verbose(2, pmix_server_globals.base_output, "recvd monitor request from client"); - if (NULL == pmix_host_server.monitor) { - return PMIX_ERR_NOT_SUPPORTED; - } cd = PMIX_NEW(pmix_query_caddy_t); if (NULL == cd) { @@ -2620,8 +3039,26 @@ pmix_status_t pmix_server_monitor(pmix_peer_t *peer, } } + /* see if they are requesting one of the monitoring + * methods we internally support */ + rc = pmix_psensor.start(peer, error, &monitor, cd->info, cd->ninfo); + if (PMIX_SUCCESS == rc) { + rc = PMIX_OPERATION_SUCCEEDED; + goto exit; + } + if (PMIX_ERR_NOT_SUPPORTED != rc) { + goto exit; + } + + /* if we don't internally support it, see if + * our host does */ + if (NULL == pmix_host_server.monitor) { + rc = PMIX_ERR_NOT_SUPPORTED; + goto exit; + } + /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; /* ask the host to execute the request */ @@ -2680,7 +3117,7 @@ pmix_status_t pmix_server_get_credential(pmix_peer_t *peer, } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; /* ask the host to execute the request */ @@ -2745,7 +3182,7 @@ pmix_status_t pmix_server_validate_credential(pmix_peer_t *peer, } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); proc.rank = peer->info->pname.rank; /* ask the host to execute the request */ @@ -2772,9 +3209,8 @@ pmix_status_t pmix_server_iofreg(pmix_peer_t *peer, pmix_iof_req_t *req; bool notify, match; size_t n; - int i; - pmix_setup_caddy_t *occupant; pmix_buffer_t *msg; + pmix_iof_cache_t *iof, *ionext; pmix_output_verbose(2, pmix_server_globals.iof_output, "recvd IOF PULL request from client"); @@ -2843,8 +3279,7 @@ pmix_status_t pmix_server_iofreg(pmix_peer_t *peer, continue; } /* do we already have this source for this peer? */ - if (0 == strncmp(cd->procs[n].nspace, req->pname.nspace, PMIX_MAX_NSLEN) && - (PMIX_RANK_WILDCARD == req->pname.rank || cd->procs[n].rank == req->pname.rank)) { + if (PMIX_CHECK_PROCID(&cd->procs[n], &req->pname)) { match = true; if ((req->channels & cd->channels) != cd->channels) { /* this is a channel update */ @@ -2871,54 +3306,60 @@ pmix_status_t pmix_server_iofreg(pmix_peer_t *peer, pmix_list_append(&pmix_globals.iof_requests, &req->super); } /* process any cached IO */ - for (i=0; i < PMIX_IOF_HOTEL_SIZE; i++) { - pmix_hotel_knock(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE-i-1, (void**)&occupant); - if (NULL != occupant) { - if (!(occupant->channels & req->channels)) { - continue; - } - /* if the source matches the request, then forward this along */ - if (0 != strncmp(occupant->procs->nspace, req->pname.nspace, PMIX_MAX_NSLEN) || - (PMIX_RANK_WILDCARD != req->pname.rank && occupant->procs->rank != req->pname.rank)) { - continue; - } - /* setup the msg */ - if (NULL == (msg = PMIX_NEW(pmix_buffer_t))) { - PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); - rc = PMIX_ERR_OUT_OF_RESOURCE; - break; - } - /* provide the source */ - PMIX_BFROPS_PACK(rc, req->peer, msg, occupant->procs, 1, PMIX_PROC); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - break; - } - /* provide the channel */ - PMIX_BFROPS_PACK(rc, req->peer, msg, &occupant->channels, 1, PMIX_IOF_CHANNEL); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - break; - } - /* pack the data */ - PMIX_BFROPS_PACK(rc, req->peer, msg, occupant->bo, 1, PMIX_BYTE_OBJECT); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - break; - } - /* send it to the requestor */ - PMIX_PTL_SEND_ONEWAY(rc, req->peer, msg, PMIX_PTL_TAG_IOF); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - } - /* remove it from the hotel since it has now been forwarded */ - pmix_hotel_checkout(&pmix_server_globals.iof, PMIX_IOF_HOTEL_SIZE-i-1); - PMIX_RELEASE(occupant); + PMIX_LIST_FOREACH_SAFE(iof, ionext, &pmix_server_globals.iof, pmix_iof_cache_t) { + /* if the channels don't match, then ignore it */ + if (!(iof->channel & req->channels)) { + continue; + } + /* if the source does not match the request, then ignore it */ + if (!PMIX_CHECK_PROCID(&iof->source, &req->pname)) { + continue; + } + /* never forward back to the source! This can happen if the source + * is a launcher */ + if (PMIX_CHECK_PROCID(&iof->source, &req->peer->info->pname)) { + continue; + } + pmix_output_verbose(2, pmix_server_globals.iof_output, + "PMIX:SERVER:IOFREQ delivering cached IOF from %s:%d to %s:%d", + iof->source.nspace, iof->source.rank, + req->peer->info->pname.nspace, req->peer->info->pname.rank); + /* setup the msg */ + if (NULL == (msg = PMIX_NEW(pmix_buffer_t))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + rc = PMIX_ERR_OUT_OF_RESOURCE; + break; } + /* provide the source */ + PMIX_BFROPS_PACK(rc, req->peer, msg, &iof->source, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* provide the channel */ + PMIX_BFROPS_PACK(rc, req->peer, msg, &iof->channel, 1, PMIX_IOF_CHANNEL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* pack the data */ + PMIX_BFROPS_PACK(rc, req->peer, msg, iof->bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + break; + } + /* send it to the requestor */ + PMIX_PTL_SEND_ONEWAY(rc, req->peer, msg, PMIX_PTL_TAG_IOF); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + } + /* remove it from the list since it has now been forwarded */ + pmix_list_remove_item(&pmix_server_globals.iof, &iof->super); + PMIX_RELEASE(iof); } } if (notify) { @@ -3037,7 +3478,7 @@ pmix_status_t pmix_server_iofstdin(pmix_peer_t *peer, } /* pass the data to the host */ - (void)strncpy(source.nspace, peer->nptr->nspace, PMIX_MAX_NSLEN); + pmix_strncpy(source.nspace, peer->nptr->nspace, PMIX_MAX_NSLEN); source.rank = peer->info->pname.rank; if (PMIX_SUCCESS != (rc = pmix_host_server.push_stdin(&source, cd->procs, cd->nprocs, cd->info, cd->ninfo, cd->bo, @@ -3054,6 +3495,9 @@ pmix_status_t pmix_server_iofstdin(pmix_peer_t *peer, /***** INSTANCE SERVER LIBRARY CLASSES *****/ static void tcon(pmix_server_trkr_t *t) { + t->event_active = false; + t->host_called = false; + t->id = NULL; memset(t->pname.nspace, 0, PMIX_MAX_NSLEN+1); t->pname.rank = PMIX_RANK_UNDEF; t->pcs = NULL; @@ -3073,6 +3517,9 @@ static void tcon(pmix_server_trkr_t *t) } static void tdes(pmix_server_trkr_t *t) { + if (NULL != t->id) { + free(t->id); + } PMIX_DESTRUCT_LOCK(&t->lock); if (NULL != t->pcs) { free(t->pcs); @@ -3120,6 +3567,8 @@ static void scadcon(pmix_setup_caddy_t *p) p->ncodes = 0; p->procs = NULL; p->nprocs = 0; + p->apps = NULL; + p->napps = 0; p->server_object = NULL; p->nlocalprocs = 0; p->info = NULL; @@ -3127,6 +3576,7 @@ static void scadcon(pmix_setup_caddy_t *p) p->keys = NULL; p->channels = PMIX_FWD_NO_CHANNELS; p->bo = NULL; + p->nbo = 0; p->cbfunc = NULL; p->opcbfunc = NULL; p->setupcbfunc = NULL; @@ -3139,6 +3589,13 @@ static void scaddes(pmix_setup_caddy_t *p) if (NULL != p->peer) { PMIX_RELEASE(p->peer); } + PMIX_PROC_FREE(p->procs, p->nprocs); + if (NULL != p->apps) { + PMIX_APP_FREE(p->apps, p->napps); + } + if (NULL != p->bo) { + PMIX_BYTE_OBJECT_FREE(p->bo, p->nbo); + } PMIX_DESTRUCT_LOCK(&p->lock); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, @@ -3148,11 +3605,25 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, static void ncon(pmix_notify_caddy_t *p) { PMIX_CONSTRUCT_LOCK(&p->lock); +#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME + struct timespec tp; + (void) clock_gettime(CLOCK_MONOTONIC, &tp); + p->ts = tp.tv_sec; +#else + /* Fall back to gettimeofday() if we have nothing else */ + struct timeval tv; + gettimeofday(&tv, NULL); + p->ts = tv.tv_sec; +#endif + p->room = -1; memset(p->source.nspace, 0, PMIX_MAX_NSLEN+1); p->source.rank = PMIX_RANK_UNDEF; p->range = PMIX_RANGE_UNDEF; p->targets = NULL; p->ntargets = 0; + p->nleft = SIZE_MAX; + p->affected = NULL; + p->naffected = 0; p->nondefault = false; p->info = NULL; p->ninfo = 0; @@ -3163,6 +3634,7 @@ static void ndes(pmix_notify_caddy_t *p) if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } + PMIX_PROC_FREE(p->affected, p->naffected); if (NULL != p->targets) { free(p->targets); } @@ -3230,12 +3702,17 @@ PMIX_CLASS_INSTANCE(pmix_dmdx_local_t, static void prevcon(pmix_peer_events_info_t *p) { p->peer = NULL; + p->affected = NULL; + p->naffected = 0; } static void prevdes(pmix_peer_events_info_t *p) { if (NULL != p->peer) { PMIX_RELEASE(p->peer); } + if (NULL != p->affected) { + PMIX_PROC_FREE(p->affected, p->naffected); + } } PMIX_CLASS_INSTANCE(pmix_peer_events_info_t, pmix_list_item_t, @@ -3276,3 +3753,15 @@ static void ildes(pmix_inventory_rollup_t *p) PMIX_CLASS_INSTANCE(pmix_inventory_rollup_t, pmix_object_t, ilcon, ildes); + +static void iocon(pmix_iof_cache_t *p) +{ + p->bo = NULL; +} +static void iodes(pmix_iof_cache_t *p) +{ + PMIX_BYTE_OBJECT_FREE(p->bo, 1); // macro protects against NULL +} +PMIX_CLASS_INSTANCE(pmix_iof_cache_t, + pmix_list_item_t, + iocon, iodes); diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.h index e90137c90cc..06fddc1fe96 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.h @@ -1,19 +1,24 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ */ #ifndef PMIX_SERVER_OPS_H #define PMIX_SERVER_OPS_H +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif + #include #include "src/include/types.h" #include @@ -57,6 +62,12 @@ typedef struct { size_t napps; pmix_iof_channel_t channels; pmix_byte_object_t *bo; + size_t nbo; + /* timestamp receipt of the notification so we + * can evict the oldest one if we get overwhelmed */ + time_t ts; + /* what room of the hotel they are in */ + int room; pmix_op_cbfunc_t opcbfunc; pmix_dmodex_response_fn_t cbfunc; pmix_setup_application_cbfunc_t setupcbfunc; @@ -120,6 +131,8 @@ typedef struct { pmix_list_item_t super; pmix_peer_t *peer; bool enviro_events; + pmix_proc_t *affected; + size_t naffected; } pmix_peer_events_info_t; PMIX_CLASS_DECLARATION(pmix_peer_events_info_t); @@ -130,6 +143,14 @@ typedef struct { } pmix_regevents_info_t; PMIX_CLASS_DECLARATION(pmix_regevents_info_t); +typedef struct { + pmix_list_item_t super; + pmix_proc_t source; + pmix_iof_channel_t channel; + pmix_byte_object_t *bo; +} pmix_iof_cache_t; +PMIX_CLASS_DECLARATION(pmix_iof_cache_t); + typedef struct { pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about pmix_pointer_array_t clients; // array of pmix_peer_t local clients @@ -138,9 +159,11 @@ typedef struct { pmix_list_t local_reqs; // list of pmix_dmdx_local_t awaiting arrival of data from local neighbours pmix_list_t gdata; // cache of data given to me for passing to all clients pmix_list_t events; // list of pmix_regevents_info_t registered events - pmix_hotel_t iof; // IO to be forwarded to clients + pmix_list_t iof; // IO to be forwarded to clients + size_t max_iof_cache; // max number of IOF messages to cache bool tool_connections_allowed; char *tmpdir; // temporary directory for this server + char *system_tmpdir; // system tmpdir // verbosity for server get operations int get_output; int get_verbose; @@ -194,8 +217,8 @@ typedef struct { bool pmix_server_trk_update(pmix_server_trkr_t *trk); -void pmix_pending_nspace_requests(pmix_nspace_t *nptr); -pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, pmix_rank_t rank, +void pmix_pending_nspace_requests(pmix_namespace_t *nptr); +pmix_status_t pmix_pending_resolve(pmix_namespace_t *nptr, pmix_rank_t rank, pmix_status_t status, pmix_dmdx_local_t *lcd); @@ -312,7 +335,11 @@ void pmix_server_message_handler(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata); +void pmix_server_purge_events(pmix_peer_t *peer, + pmix_proc_t *proc); + PMIX_EXPORT extern pmix_server_module_t pmix_host_server; PMIX_EXPORT extern pmix_server_globals_t pmix_server_globals; + #endif // PMIX_SERVER_OPS_H diff --git a/opal/mca/pmix/pmix3x/pmix/src/threads/mutex_unix.h b/opal/mca/pmix/pmix3x/pmix/src/threads/mutex_unix.h index f61d549923e..229be8f1772 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/threads/mutex_unix.h +++ b/opal/mca/pmix/pmix3x/pmix/src/threads/mutex_unix.h @@ -77,14 +77,14 @@ PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_recursive_mutex_t); .m_lock_debug = 0, \ .m_lock_file = NULL, \ .m_lock_line = 0, \ - .m_lock_atomic = { .u = { .lock = PMIX_ATOMIC_LOCK_UNLOCKED } },\ + .m_lock_atomic = {PMIX_ATOMIC_LOCK_UNLOCKED}, \ } #else #define PMIX_MUTEX_STATIC_INIT \ { \ .super = PMIX_OBJ_STATIC_INIT(pmix_mutex_t), \ .m_lock_pthread = PTHREAD_MUTEX_INITIALIZER, \ - .m_lock_atomic = { .u = { .lock = PMIX_ATOMIC_LOCK_UNLOCKED } },\ + .m_lock_atomic = {PMIX_ATOMIC_LOCK_UNLOCKED}, \ } #endif @@ -98,14 +98,14 @@ PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_recursive_mutex_t); .m_lock_debug = 0, \ .m_lock_file = NULL, \ .m_lock_line = 0, \ - .m_lock_atomic = { .u = { .lock = PMIX_ATOMIC_LOCK_UNLOCKED } },\ + .m_lock_atomic = {PMIX_ATOMIC_LOCK_UNLOCKED}, \ } #else #define PMIX_RECURSIVE_MUTEX_STATIC_INIT \ { \ .super = PMIX_OBJ_STATIC_INIT(pmix_mutex_t), \ .m_lock_pthread = PMIX_PTHREAD_RECURSIVE_MUTEX_INITIALIZER, \ - .m_lock_atomic = { .u = { .lock = PMIX_ATOMIC_LOCK_UNLOCKED } },\ + .m_lock_atomic = {PMIX_ATOMIC_LOCK_UNLOCKED}, \ } #endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/threads/thread_usage.h b/opal/mca/pmix/pmix3x/pmix/src/threads/thread_usage.h index 59825645c90..ff3e5041805 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/threads/thread_usage.h +++ b/opal/mca/pmix/pmix3x/pmix/src/threads/thread_usage.h @@ -33,31 +33,30 @@ /** - * Use an atomic operation for increment/decrement if pmix_using_threads() - * indicates that threads are in use by the application or library. + * Use an atomic operation for increment/decrement */ #define PMIX_THREAD_DEFINE_ATOMIC_OP(type, name, operator, suffix) \ -static inline type pmix_thread_ ## name ## _fetch_ ## suffix (volatile type *addr, type delta) \ +static inline type pmix_thread_ ## name ## _fetch_ ## suffix (pmix_atomic_ ## type *addr, type delta) \ { \ return pmix_atomic_ ## name ## _fetch_ ## suffix (addr, delta); \ } \ \ -static inline type pmix_thread_fetch_ ## name ## _ ## suffix (volatile type *addr, type delta) \ +static inline type pmix_thread_fetch_ ## name ## _ ## suffix (pmix_atomic_ ## type *addr, type delta) \ { \ return pmix_atomic_fetch_ ## name ## _ ## suffix (addr, delta); \ } #define PMIX_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(type, addr_type, suffix) \ -static inline bool pmix_thread_compare_exchange_strong_ ## suffix (volatile addr_type *addr, type *compare, type value) \ +static inline bool pmix_thread_compare_exchange_strong_ ## suffix (pmix_atomic_ ## addr_type *addr, type *compare, type value) \ { \ - return pmix_atomic_compare_exchange_strong_ ## suffix ((volatile type *) addr, compare, value); \ + return pmix_atomic_compare_exchange_strong_ ## suffix (addr, (addr_type *) compare, (addr_type) value); \ } #define PMIX_THREAD_DEFINE_ATOMIC_SWAP(type, addr_type, suffix) \ -static inline type pmix_thread_swap_ ## suffix (volatile addr_type *ptr, type newvalue) \ +static inline type pmix_thread_swap_ ## suffix (pmix_atomic_ ## addr_type *ptr, type newvalue) \ { \ - return pmix_atomic_swap_ ## suffix ((volatile type *) ptr, newvalue); \ + return (type) pmix_atomic_swap_ ## suffix (ptr, (addr_type) newvalue); \ } PMIX_THREAD_DEFINE_ATOMIC_OP(int32_t, add, +, 32) @@ -112,13 +111,13 @@ PMIX_THREAD_DEFINE_ATOMIC_SWAP(void *, intptr_t, ptr) #define PMIX_THREAD_COMPARE_EXCHANGE_STRONG_32 pmix_thread_compare_exchange_strong_32 #define PMIX_ATOMIC_COMPARE_EXCHANGE_STRONG_32 pmix_thread_compare_exchange_strong_32 -#define PMIX_THREAD_COMPARE_EXCHANGE_STRONG_PTR(x, y, z) pmix_thread_compare_exchange_strong_ptr ((volatile intptr_t *) x, (void *) y, (void *) z) +#define PMIX_THREAD_COMPARE_EXCHANGE_STRONG_PTR(x, y, z) pmix_thread_compare_exchange_strong_ptr ((pmix_atomic_intptr_t *) x, (intptr_t *) y, (intptr_t) z) #define PMIX_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR PMIX_THREAD_COMPARE_EXCHANGE_STRONG_PTR #define PMIX_THREAD_SWAP_32 pmix_thread_swap_32 #define PMIX_ATOMIC_SWAP_32 pmix_thread_swap_32 -#define PMIX_THREAD_SWAP_PTR(x, y) pmix_thread_swap_ptr ((volatile intptr_t *) x, (void *) y) +#define PMIX_THREAD_SWAP_PTR(x, y) pmix_thread_swap_ptr ((pmix_atomic_intptr_t *) x, (intptr_t) y) #define PMIX_ATOMIC_SWAP_PTR PMIX_THREAD_SWAP_PTR /* define 64-bit macros is 64-bit atomic math is available */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/threads/wait_sync.h b/opal/mca/pmix/pmix3x/pmix/src/threads/wait_sync.h index 311ecbfe7f1..225c8f157b8 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/threads/wait_sync.h +++ b/opal/mca/pmix/pmix3x/pmix/src/threads/wait_sync.h @@ -28,7 +28,7 @@ BEGIN_C_DECLS typedef struct pmix_wait_sync_t { - int32_t count; + pmix_atomic_int32_t count; int32_t status; pthread_cond_t condition; pthread_mutex_t lock; diff --git a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c index ae9b331eab1..585ea08fe49 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -50,7 +50,9 @@ #endif /* HAVE_DIRENT_H */ #include PMIX_EVENT_HEADER +#if ! PMIX_HAVE_LIBEV #include PMIX_EVENT2_THREAD_HEADER +#endif #include "src/class/pmix_list.h" #include "src/util/argv.h" @@ -63,6 +65,7 @@ #include "src/runtime/pmix_rte.h" #include "src/mca/bfrops/base/base.h" #include "src/mca/gds/base/base.h" +#include "src/mca/pnet/base/base.h" #include "src/mca/ptl/base/base.h" #include "src/mca/psec/psec.h" #include "src/include/pmix_globals.h" @@ -162,30 +165,9 @@ static void pmix_tool_notify_recv(struct pmix_peer_t *peer, PMIX_RELEASE(chain); goto error; } - /* check for directives */ - for (cnt=0; cnt < (int)ninfo; cnt++) { - if (0 == strncmp(chain->info[cnt].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - chain->nondefault = PMIX_INFO_TRUE(&chain->info[cnt]); - } else if (0 == strncmp(chain->info[cnt].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - PMIX_PROC_CREATE(chain->affected, 1); - if (NULL == chain->affected) { - PMIX_RELEASE(chain); - goto error; - } - chain->naffected = 1; - memcpy(chain->affected, chain->info[cnt].value.data.proc, sizeof(pmix_proc_t)); - } else if (0 == strncmp(chain->info[cnt].key, PMIX_EVENT_AFFECTED_PROCS, PMIX_MAX_KEYLEN)) { - chain->naffected = chain->info[cnt].value.data.darray->size; - PMIX_PROC_CREATE(chain->affected, chain->naffected); - if (NULL == chain->affected) { - chain->naffected = 0; - PMIX_RELEASE(chain); - goto error; - } - memcpy(chain->affected, chain->info[cnt].value.data.darray->array, chain->naffected * sizeof(pmix_proc_t)); - } - } } + /* prep the chain for processing */ + pmix_prep_event_chain(chain, chain->info, ninfo, false); pmix_output_verbose(2, pmix_client_globals.event_output, "[%s:%d] pmix:tool_notify_recv - processing event %s from source %s:%d, calling errhandler", @@ -216,7 +198,7 @@ static void tool_iof_handler(struct pmix_peer_t *pr, pmix_status_t rc; pmix_output_verbose(2, pmix_client_globals.iof_output, - "recvd IOF"); + "recvd IOF with %d bytes", (int)buf->bytes_used); /* if the buffer is empty, they are simply closing the channel */ if (0 == buf->bytes_used) { @@ -280,9 +262,7 @@ static void job_data(struct pmix_peer_t *pr, PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, pmix_info_t info[], size_t ninfo) { - pmix_kval_t *kptr; pmix_status_t rc; - char hostname[PMIX_MAX_NSLEN]; char *evar, *nspace = NULL; pmix_rank_t rank = PMIX_RANK_UNDEF; bool gdsfound, do_not_connect = false; @@ -311,7 +291,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, * rank should be known. So return them here if * requested */ if (NULL != proc) { - (void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); proc->rank = pmix_globals.myid.rank; } ++pmix_globals.init_cntr; @@ -348,10 +328,29 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, /* they want us to forward our stdin to someone */ fwd_stdin = true; } else if (0 == strncmp(info[n].key, PMIX_LAUNCHER, PMIX_MAX_KEYLEN)) { - ptype = PMIX_PROC_LAUNCHER; + ptype |= PMIX_PROC_LAUNCHER; + } else if (0 == strncmp(info[n].key, PMIX_SERVER_TMPDIR, PMIX_MAX_KEYLEN)) { + pmix_server_globals.tmpdir = strdup(info[n].value.data.string); + } else if (0 == strncmp(info[n].key, PMIX_SYSTEM_TMPDIR, PMIX_MAX_KEYLEN)) { + pmix_server_globals.system_tmpdir = strdup(info[n].value.data.string); } } } + if (NULL == pmix_server_globals.tmpdir) { + if (NULL == (evar = getenv("PMIX_SERVER_TMPDIR"))) { + pmix_server_globals.tmpdir = strdup(pmix_tmp_directory()); + } else { + pmix_server_globals.tmpdir = strdup(evar); + } + } + if (NULL == pmix_server_globals.system_tmpdir) { + if (NULL == (evar = getenv("PMIX_SYSTEM_TMPDIR"))) { + pmix_server_globals.system_tmpdir = strdup(pmix_tmp_directory()); + } else { + pmix_server_globals.system_tmpdir = strdup(evar); + } + } + if ((nspace_given && !rank_given) || (!nspace_given && rank_given)) { /* can't have one and not the other */ @@ -408,7 +407,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, /* if we are a launcher, then we also need to act as a server, * so setup the server-related structures here */ - if (PMIX_PROC_LAUNCHER == ptype) { + if (PMIX_PROC_LAUNCHER_ACT & ptype) { if (PMIX_SUCCESS != (rc = pmix_server_initialize())) { PMIX_ERROR_LOG(rc); if (NULL != nspace) { @@ -422,14 +421,6 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, } /* setup the function pointers */ memset(&pmix_host_server, 0, sizeof(pmix_server_module_t)); - /* setup our tmpdir */ - if (NULL == pmix_server_globals.tmpdir) { - if (NULL == (evar = getenv("PMIX_SERVER_TMPDIR"))) { - pmix_server_globals.tmpdir = strdup(pmix_tmp_directory()); - } else { - pmix_server_globals.tmpdir = strdup(evar); - } - } } /* setup the runtime - this init's the globals, @@ -448,7 +439,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, } /* if we were given a name, then set it now */ if (nspace_given || nspace_in_enviro) { - (void)strncpy(pmix_globals.myid.nspace, nspace, PMIX_MAX_NSLEN); + pmix_strncpy(pmix_globals.myid.nspace, nspace, PMIX_MAX_NSLEN); free(nspace); pmix_globals.myid.rank = rank; } @@ -473,7 +464,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_NOMEM; } - pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_namespace_t); if (NULL == pmix_client_globals.myserver->nptr) { PMIX_RELEASE(pmix_client_globals.myserver); if (gdsfound) { @@ -603,7 +594,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, } if (!nspace_given) { /* Success, so copy the nspace and rank to the proc struct they gave us */ - (void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + pmix_strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); } if (!rank_given) { proc->rank = pmix_globals.myid.rank; @@ -621,36 +612,21 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, pmix_globals.mypeer->info->pname.nspace = strdup(pmix_globals.myid.nspace); pmix_globals.mypeer->info->pname.rank = pmix_globals.myid.rank; - /* if we are acting as a client, then send a request for our - * job info - we do this as a non-blocking - * transaction because some systems cannot handle very large - * blocking operations and error out if we try them. */ - if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { - req = PMIX_NEW(pmix_buffer_t); - PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, - req, &cmd, 1, PMIX_COMMAND); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(req); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - /* send to the server */ - PMIX_CONSTRUCT(&cb, pmix_cb_t); - PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, - req, job_data, (void*)&cb); - if (PMIX_SUCCESS != rc) { - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - /* wait for the data to return */ - PMIX_WAIT_THREAD(&cb.lock); - rc = cb.status; - PMIX_DESTRUCT(&cb); - if (PMIX_SUCCESS != rc) { + /* if we are acting as a server, then start listening */ + if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { + /* setup the wildcard recv for inbound messages from clients */ + rcv = PMIX_NEW(pmix_ptl_posted_recv_t); + rcv->tag = UINT32_MAX; + rcv->cbfunc = pmix_server_message_handler; + /* add it to the end of the list of recvs */ + pmix_list_append(&pmix_ptl_globals.posted_recvs, &rcv->super); + /* open the pnet framework so we can harvest envars */ + rc = pmix_mca_base_framework_open(&pmix_pnet_base_framework, 0); + if (PMIX_SUCCESS != rc){ PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + /* note that we do not select active plugins as we don't need them */ } /* setup IOF */ @@ -699,7 +675,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, &stdinev.ev, fd, PMIX_EV_READ, pmix_iof_read_local_handler, &stdinev); - } \ + } /* check to see if we want the stdin read event to be * active - we will always at least define the event, * but may delay its activation @@ -732,326 +708,74 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, /* increment our init reference counter */ pmix_globals.init_cntr++; - if (!PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { - /* now finish the initialization by filling our local - * datastore with typical job-related info. No point - * in having the server generate these as we are - * obviously a singleton, and so the values are well-known */ - (void)strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); - wildcard.rank = pmix_globals.myid.rank; - - /* the jobid is just our nspace */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_JOBID); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup(pmix_globals.myid.nspace); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* our rank */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_RANK); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_INT; - kptr->value->data.integer = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* nproc offset */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_NPROC_OFFSET); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* node size */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_NODE_SIZE); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* local peers */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_LOCAL_PEERS); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup("0"); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* local leader */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_LOCALLDR); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* universe size */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_UNIV_SIZE); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* job size - we are our very own job, so we have no peers */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_JOB_SIZE); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* local size - only us in our job */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_LOCAL_SIZE); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* max procs - since we are a self-started tool, there is no - * allocation within which we can grow ourselves */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_MAX_PROCS); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* app number */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_APPNUM); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* app leader */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_APPLDR); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* app rank */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_APP_RANK); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* global rank */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_GLOBAL_RANK); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { + /* if we are acting as a client, then send a request for our + * job info - we do this as a non-blocking + * transaction because some systems cannot handle very large + * blocking operations and error out if we try them. */ + if (PMIX_PROC_IS_CLIENT(pmix_globals.mypeer)) { + req = PMIX_NEW(pmix_buffer_t); + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + req, &cmd, 1, PMIX_COMMAND); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE(req); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } - PMIX_RELEASE(kptr); // maintain accounting - - /* local rank - we are alone in our job */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_LOCAL_RANK); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT16; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); + /* send to the server */ + PMIX_CONSTRUCT(&cb, pmix_cb_t); + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + req, job_data, (void*)&cb); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } - PMIX_RELEASE(kptr); // maintain accounting - - /* we cannot know the node rank as we don't know what - * other processes are executing on this node - so - * we'll add that info to the server-tool handshake - * and load it from there */ - - /* hostname */ - gethostname(hostname, PMIX_MAX_NSLEN); - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_HOSTNAME); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup(hostname); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); + /* wait for the data to return */ + PMIX_WAIT_THREAD(&cb.lock); + rc = cb.status; + PMIX_DESTRUCT(&cb); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } - PMIX_RELEASE(kptr); // maintain accounting - - /* we cannot know the RM's nodeid for this host, so - * we'll add that info to the server-tool handshake - * and load it from there */ - - /* the nodemap is simply our hostname as there is no - * regex to generate */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_NODE_MAP); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup(hostname); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); + /* quick check to see if we got something back. If this + * is a launcher that is being executed multiple times + * in a job-script, then the original registration data + * will have been deleted after the first invocation. In + * such a case, we simply regenerate it locally as it is + * well-known */ + pmix_cb_t cb; + PMIX_CONSTRUCT(&cb, pmix_cb_t); + pmix_strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + wildcard.rank = PMIX_RANK_WILDCARD; + cb.proc = &wildcard; + cb.copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + PMIX_DESTRUCT(&cb); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; + pmix_output_verbose(5, pmix_client_globals.get_output, + "pmix:tool:client data not found in internal storage"); + rc = pmix_tool_init_info(); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } } - PMIX_RELEASE(kptr); // maintain accounting - - /* likewise, the proc map is just our rank as we are - * the only proc in this job */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_PROC_MAP); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup("0"); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); + } else { + /* now finish the initialization by filling our local + * datastore with typical job-related info. No point + * in having the server generate these as we are + * obviously a singleton, and so the values are well-known */ + rc = pmix_tool_init_info(); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } - PMIX_RELEASE(kptr); // maintain accounting } PMIX_RELEASE_THREAD(&pmix_global_lock); /* if we are acting as a server, then start listening */ if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { - /* setup the wildcard recv for inbound messages from clients */ - rcv = PMIX_NEW(pmix_ptl_posted_recv_t); - rcv->tag = UINT32_MAX; - rcv->cbfunc = pmix_server_message_handler; - /* add it to the end of the list of recvs */ - pmix_list_append(&pmix_ptl_globals.posted_recvs, &rcv->super); - /* start listening for connections */ if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) { pmix_show_help("help-pmix-server.txt", "listener-thread-start", true); @@ -1062,6 +786,307 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, return rc; } +pmix_status_t pmix_tool_init_info(void) +{ + pmix_kval_t *kptr; + pmix_status_t rc; + pmix_proc_t wildcard; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; + + pmix_strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + wildcard.rank = pmix_globals.myid.rank; + + /* the jobid is just our nspace */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_JOBID); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup(pmix_globals.myid.nspace); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* our rank */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_RANK); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_INT; + kptr->value->data.integer = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* nproc offset */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_NPROC_OFFSET); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* node size */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_NODE_SIZE); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* local peers */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_LOCAL_PEERS); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup("0"); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* local leader */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_LOCALLDR); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* universe size */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_UNIV_SIZE); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* job size - we are our very own job, so we have no peers */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_JOB_SIZE); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* local size - only us in our job */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_LOCAL_SIZE); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* max procs - since we are a self-started tool, there is no + * allocation within which we can grow ourselves */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_MAX_PROCS); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* app number */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_APPNUM); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* app leader */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_APPLDR); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* app rank */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_APP_RANK); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* global rank */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_GLOBAL_RANK); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* local rank - we are alone in our job */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_LOCAL_RANK); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT16; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* we cannot know the node rank as we don't know what + * other processes are executing on this node - so + * we'll add that info to the server-tool handshake + * and load it from there */ + + /* hostname */ + if (NULL != pmix_globals.hostname) { + pmix_strncpy(hostname, pmix_globals.hostname, PMIX_MAXHOSTNAMELEN); + } else { + gethostname(hostname, PMIX_MAXHOSTNAMELEN-1); + } + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_HOSTNAME); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup(hostname); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* we cannot know the RM's nodeid for this host, so + * we'll add that info to the server-tool handshake + * and load it from there */ + + /* the nodemap is simply our hostname as there is no + * regex to generate */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_NODE_MAP); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup(hostname); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* likewise, the proc map is just our rank as we are + * the only proc in this job */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_PROC_MAP); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup("0"); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + return PMIX_SUCCESS; +} + + typedef struct { pmix_lock_t lock; pmix_event_t ev; @@ -1094,8 +1119,8 @@ static void finwait_cbfunc(struct pmix_peer_t *pr, if (tev->active) { tev->active = false; pmix_event_del(&tev->ev); // stop the timer - PMIX_WAKEUP_THREAD(&tev->lock); } + PMIX_WAKEUP_THREAD(&tev->lock); } PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) @@ -1104,10 +1129,9 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) pmix_cmd_t cmd = PMIX_FINALIZE_CMD; pmix_status_t rc; pmix_tool_timeout_t tev; - struct timeval tv = {2, 0}; + struct timeval tv = {5, 0}; int n; pmix_peer_t *peer; - pmix_setup_caddy_t *cd; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (1 != pmix_globals.init_cntr) { @@ -1116,6 +1140,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; + pmix_globals.mypeer->finalized = true; PMIX_RELEASE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, @@ -1163,6 +1188,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) /* wait for the ack to return */ PMIX_WAIT_THREAD(&tev.lock); PMIX_DESTRUCT_LOCK(&tev.lock); + if (tev.active) { pmix_event_del(&tev.ev); } @@ -1179,7 +1205,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) (void)pmix_progress_thread_pause(NULL); } - PMIX_RELEASE(pmix_client_globals.myserver); +// PMIX_RELEASE(pmix_client_globals.myserver); PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); for (n=0; n < pmix_client_globals.peers.size; n++) { if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_client_globals.peers, n))) { @@ -1190,26 +1216,20 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) if (PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { pmix_ptl_base_stop_listening(); - /* cleanout any IOF */ - for (n=0; n < PMIX_IOF_HOTEL_SIZE; n++) { - pmix_hotel_checkout_and_return_occupant(&pmix_server_globals.iof, n, (void**)&cd); - if (NULL != cd) { - PMIX_RELEASE(cd); - } - } - PMIX_DESTRUCT(&pmix_server_globals.iof); for (n=0; n < pmix_server_globals.clients.size; n++) { if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, n))) { PMIX_RELEASE(peer); } } + + (void)pmix_mca_base_framework_close(&pmix_pnet_base_framework); PMIX_DESTRUCT(&pmix_server_globals.clients); PMIX_LIST_DESTRUCT(&pmix_server_globals.collectives); PMIX_LIST_DESTRUCT(&pmix_server_globals.remote_pnd); PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); PMIX_LIST_DESTRUCT(&pmix_server_globals.gdata); PMIX_LIST_DESTRUCT(&pmix_server_globals.events); - PMIX_LIST_DESTRUCT(&pmix_server_globals.nspaces); + PMIX_LIST_DESTRUCT(&pmix_server_globals.iof); } /* shutdown services */ @@ -1220,6 +1240,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) /* finalize the class/object system */ pmix_class_finalize(); + return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/tools/pevent/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/tools/pevent/Makefile.am index 92fce9ac2de..10e2b321a9e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tools/pevent/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/tools/pevent/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -29,4 +29,5 @@ endif # PMIX_INSTALL_BINARIES pevent_SOURCES = pevent.c pevent_LDADD = \ + $(PMIX_EXTRA_LTLIB) \ $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix3x/pmix/src/tools/pevent/pevent.c b/opal/mca/pmix/pmix3x/pmix/src/tools/pevent/pevent.c index f472e78ed45..a5eccb86665 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tools/pevent/pevent.c +++ b/opal/mca/pmix/pmix3x/pmix/src/tools/pevent/pevent.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -23,7 +23,9 @@ * */ -#define _GNU_SOURCE +#include "pmix_config.h" +#include "pmix_common.h" + #include #include #include diff --git a/opal/mca/pmix/pmix3x/pmix/src/tools/plookup/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/tools/plookup/Makefile.am index a273ea65d8d..bf7a64d5c45 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tools/plookup/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/tools/plookup/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -29,4 +29,5 @@ endif # PMIX_INSTALL_BINARIES plookup_SOURCES = plookup.c plookup_LDADD = \ - $(top_builddir)/src/libpmix.la + $(PMIX_EXTRA_LTLIB) \ + $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix3x/pmix/src/tools/plookup/plookup.c b/opal/mca/pmix/pmix3x/pmix/src/tools/plookup/plookup.c index 3c8cccac2b2..d206ace840b 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tools/plookup/plookup.c +++ b/opal/mca/pmix/pmix3x/pmix/src/tools/plookup/plookup.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -269,7 +269,7 @@ int main(int argc, char **argv) /* setup the keys */ PMIX_PDATA_CREATE(pdata, ndata); for (n=0; n < ndata; n++) { - (void)strncpy(pdata[n].key, keys[n], PMIX_MAX_KEYLEN); + pmix_strncpy(pdata[n].key, keys[n], PMIX_MAX_KEYLEN); } /* perform the lookup */ rc = PMIx_Lookup(pdata, ndata, info, ninfo); diff --git a/opal/mca/pmix/pmix3x/pmix/src/tools/pmix_info/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/tools/pmix_info/Makefile.am index 8fe2640c7ae..343cfaa3b84 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tools/pmix_info/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/tools/pmix_info/Makefile.am @@ -31,7 +31,6 @@ AM_CFLAGS = \ -DPMIX_BUILD_LDFLAGS="\"@LDFLAGS@\"" \ -DPMIX_BUILD_LIBS="\"@LIBS@\"" \ -DPMIX_CC_ABSOLUTE="\"@PMIX_CC_ABSOLUTE@\"" \ - -DPMIX_CONFIGURE_CLI="\"@PMIX_CONFIGURE_CLI@\"" \ -DPMIX_GREEK_VERSION="\"@PMIX_GREEK_VERSION@\"" \ -DPMIX_REPO_REV="\"@PMIX_REPO_REV@\"" \ -DPMIX_RELEASE_DATE="\"@PMIX_RELEASE_DATE@\"" @@ -51,4 +50,5 @@ pmix_info_SOURCES = \ support.c pmix_info_LDADD = \ - $(top_builddir)/src/libpmix.la + $(PMIX_EXTRA_LTLIB) \ + $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix3x/pmix/src/tools/pps/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/tools/pps/Makefile.am index 930a4672e95..ac7bc9eb423 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tools/pps/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/tools/pps/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -29,4 +29,5 @@ endif # PMIX_INSTALL_BINARIES pps_SOURCES = pps.c pps_LDADD = \ - $(top_builddir)/src/libpmix.la + $(PMIX_EXTRA_LTLIB) \ + $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/argv.c b/opal/mca/pmix/pmix3x/pmix/src/util/argv.c index f5c08f80a0c..8eb1e3f6dc3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/argv.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/argv.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -221,7 +221,7 @@ static char **pmix_argv_split_inter(const char *src_string, int delimiter, if (NULL == argtemp) return NULL; - strncpy(argtemp, src_string, arglen); + pmix_strncpy(argtemp, src_string, arglen); argtemp[arglen] = '\0'; if (PMIX_SUCCESS != pmix_argv_append(&argc, &argv, argtemp)) { @@ -235,7 +235,7 @@ static char **pmix_argv_split_inter(const char *src_string, int delimiter, /* short argument, copy to buffer and add */ else { - strncpy(arg, src_string, arglen); + pmix_strncpy(arg, src_string, arglen); arg[arglen] = '\0'; if (PMIX_SUCCESS != pmix_argv_append(&argc, &argv, arg)) diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/basename.c b/opal/mca/pmix/pmix3x/pmix/src/util/basename.c index 64e5c27e7e9..aa2076d7eaa 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/basename.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/basename.c @@ -12,7 +12,7 @@ * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -123,11 +123,7 @@ char* pmix_dirname(const char* filename) } if( p != filename ) { char* ret = (char*)malloc( p - filename + 1 ); -#ifdef HAVE_STRNCPY_S - strncpy_s( ret, (p - filename + 1), filename, p - filename ); -#else - strncpy(ret, filename, p - filename); -#endif + pmix_strncpy(ret, filename, p - filename); ret[p - filename] = '\0'; return pmix_make_filename_os_friendly(ret); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/compress.c b/opal/mca/pmix/pmix3x/pmix/src/util/compress.c index 867a3d5e57d..d71cdf37c63 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/compress.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ bool pmix_util_compress_string(char *instring, size_t len, outlen; uint8_t *tmp, *ptr; uint32_t inlen; + int rc; /* set default output */ *outbytes = NULL; @@ -43,7 +44,6 @@ bool pmix_util_compress_string(char *instring, /* get an upper bound on the required output storage */ len = deflateBound(&strm, inlen); if (NULL == (tmp = (uint8_t*)malloc(len))) { - *outbytes = NULL; return false; } strm.next_in = (uint8_t*)instring; @@ -54,8 +54,12 @@ bool pmix_util_compress_string(char *instring, strm.avail_out = len; strm.next_out = tmp; - deflate (&strm, Z_FINISH); + rc = deflate (&strm, Z_FINISH); deflateEnd (&strm); + if (Z_OK != rc) { + free(tmp); + return false; + } /* allocate 4 bytes beyond the size reqd by zlib so we * can pass the size of the uncompressed string to the diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/error.c b/opal/mca/pmix/pmix3x/pmix/src/util/error.c index 452582407b2..7930be1c339 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/error.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/error.c @@ -159,28 +159,24 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum) return "LOST-PEER-CONNECTION"; case PMIX_ERR_LOST_CONNECTION_TO_CLIENT: return "LOST-CONNECTION-TO-CLIENT"; - - case PMIX_QUERY_PARTIAL_SUCCESS: return "QUERY-PARTIAL-SUCCESS"; - - case PMIX_NOTIFY_ALLOC_COMPLETE: return "PMIX ALLOC OPERATION COMPLETE"; - - case PMIX_JCTRL_CHECKPOINT: return "PMIX JOB CONTROL CHECKPOINT"; case PMIX_JCTRL_CHECKPOINT_COMPLETE: return "PMIX JOB CONTROL CHECKPOINT COMPLETE"; case PMIX_JCTRL_PREEMPT_ALERT: return "PMIX PRE-EMPTION ALERT"; - - case PMIX_MONITOR_HEARTBEAT_ALERT: return "PMIX HEARTBEAT ALERT"; case PMIX_MONITOR_FILE_ALERT: return "PMIX FILE MONITOR ALERT"; + case PMIX_PROC_TERMINATED: + return "PROC-TERMINATED"; + case PMIX_ERR_INVALID_TERMINATION: + return "INVALID-TERMINATION"; case PMIX_ERR_EVENT_REGISTRATION: return "EVENT-REGISTRATION"; @@ -196,19 +192,28 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum) return "PROC-HAS-CONNECTED"; case PMIX_CONNECT_REQUESTED: return "CONNECT-REQUESTED"; + case PMIX_OPENMP_PARALLEL_ENTERED: + return "OPENMP-PARALLEL-ENTERED"; + case PMIX_OPENMP_PARALLEL_EXITED: + return "OPENMP-PARALLEL-EXITED"; + case PMIX_LAUNCH_DIRECTIVE: return "LAUNCH-DIRECTIVE"; case PMIX_LAUNCHER_READY: return "LAUNCHER-READY"; case PMIX_OPERATION_IN_PROGRESS: return "OPERATION-IN-PROGRESS"; - + case PMIX_OPERATION_SUCCEEDED: + return "OPERATION-SUCCEEDED"; + case PMIX_ERR_INVALID_OPERATION: + return "INVALID-OPERATION"; case PMIX_ERR_NODE_DOWN: return "NODE-DOWN"; case PMIX_ERR_NODE_OFFLINE: return "NODE-OFFLINE"; - + case PMIX_ERR_SYS_OTHER: + return "UNDEFINED-SYSTEM-EVENT"; case PMIX_EVENT_NO_ACTION_TAKEN: return "EVENT-NO-ACTION-TAKEN"; diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/hash.c b/opal/mca/pmix/pmix3x/pmix/src/util/hash.c index 1a0a95744ea..806781186b2 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/hash.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/hash.c @@ -6,7 +6,7 @@ * reserved. * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -172,7 +172,7 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, pmix_rank_t rank, /* copy the list elements */ n=0; PMIX_LIST_FOREACH(hv, &proc_data->data, pmix_kval_t) { - (void)strncpy(info[n].key, hv->key, PMIX_MAX_KEYLEN); + pmix_strncpy(info[n].key, hv->key, PMIX_MAX_KEYLEN); pmix_value_xfer(&info[n].value, hv->value); ++n; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/keyval_parse.c b/opal/mca/pmix/pmix3x/pmix/src/util/keyval_parse.c index c07e65e6681..52c68fa50d3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/keyval_parse.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/keyval_parse.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -136,7 +136,7 @@ static int parse_line(void) key_buffer = tmp; } - strncpy(key_buffer, pmix_util_keyval_yytext, key_buffer_len); + pmix_strncpy(key_buffer, pmix_util_keyval_yytext, key_buffer_len-1); /* The first thing we have to see is an "=" */ @@ -259,7 +259,7 @@ static int save_param_name (void) key_buffer = tmp; } - strncpy (key_buffer, pmix_util_keyval_yytext, key_buffer_len); + pmix_strncpy (key_buffer, pmix_util_keyval_yytext, key_buffer_len-1); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/name_fns.c b/opal/mca/pmix/pmix3x/pmix/src/util/name_fns.c index 14f19aef022..96b46ea9d58 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/name_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/name_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -98,6 +98,7 @@ char* pmix_util_print_name_args(const pmix_proc_t *name) { pmix_print_args_buffers_t *ptr; char *rank; + int index; /* get the next buffer */ ptr = get_print_name_buffer(); @@ -105,29 +106,36 @@ char* pmix_util_print_name_args(const pmix_proc_t *name) PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); return pmix_print_args_null; } - /* cycle around the ring */ - if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { - ptr->cntr = 0; - } /* protect against NULL names */ if (NULL == name) { - snprintf(ptr->buffers[ptr->cntr++], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "[NO-NAME]"); - return ptr->buffers[ptr->cntr-1]; + index = ptr->cntr; + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "[NO-NAME]"); + ptr->cntr++; + if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { + ptr->cntr = 0; + } + return ptr->buffers[index]; } rank = pmix_util_print_rank(name->rank); - snprintf(ptr->buffers[ptr->cntr++], + index = ptr->cntr; + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, - "[%s,%s]", name->nspace, rank); + "[%s:%s]", name->nspace, rank); + ptr->cntr++; + if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { + ptr->cntr = 0; + } - return ptr->buffers[ptr->cntr-1]; + return ptr->buffers[index]; } char* pmix_util_print_rank(const pmix_rank_t vpid) { pmix_print_args_buffers_t *ptr; + int index; ptr = get_print_name_buffer(); @@ -136,19 +144,19 @@ char* pmix_util_print_rank(const pmix_rank_t vpid) return pmix_print_args_null; } - /* cycle around the ring */ - if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { - ptr->cntr = 0; - } - + index = ptr->cntr; if (PMIX_RANK_UNDEF == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "UNDEF"); + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "UNDEF"); } else if (PMIX_RANK_WILDCARD == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD"); + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD"); } else { - snprintf(ptr->buffers[ptr->cntr++], + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "%ld", (long)vpid); } - return ptr->buffers[ptr->cntr-1]; + ptr->cntr++; + if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { + ptr->cntr = 0; + } + return ptr->buffers[index]; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/net.c b/opal/mca/pmix/pmix3x/pmix/src/util/net.c index 22baf7d5a42..34661df3f2d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/net.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/net.c @@ -50,23 +50,7 @@ #include #endif #ifdef HAVE_NET_IF_H -#if defined(__APPLE__) && defined(_LP64) -/* Apple engineering suggested using options align=power as a - workaround for a bug in OS X 10.4 (Tiger) that prevented ioctl(..., - SIOCGIFCONF, ...) from working properly in 64 bit mode on Power PC. - It turns out that the underlying issue is the size of struct - ifconf, which the kernel expects to be 12 and natural 64 bit - alignment would make 16. The same bug appears in 64 bit mode on - Intel macs, but align=power is a no-op there, so instead, use the - pack pragma to instruct the compiler to pack on 4 byte words, which - has the same effect as align=power for our needs and works on both - Intel and Power PC Macs. */ -#pragma pack(push,4) -#endif #include -#if defined(__APPLE__) && defined(_LP64) -#pragma pack(pop) -#endif #endif #ifdef HAVE_NETDB_H #include diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/os_path.c b/opal/mca/pmix/pmix3x/pmix/src/util/os_path.c index 6abedaf8a30..3f94f9bb7dc 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/os_path.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/os_path.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +34,7 @@ static const char *path_sep = PMIX_PATH_SEP; -char *pmix_os_path(bool relative, ...) +char *pmix_os_path(int relative, ...) { va_list ap; char *element, *path; diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/os_path.h b/opal/mca/pmix/pmix3x/pmix/src/util/os_path.h index 9f3c71bfb57..3933d04630c 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/os_path.h +++ b/opal/mca/pmix/pmix3x/pmix/src/util/os_path.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -64,8 +64,13 @@ BEGIN_C_DECLS * provided path elements, separated by the path separator character * appropriate to the local operating system. The path_name string has been malloc'd * and therefore the user is responsible for free'ing the field. + * + * Note that the "relative" argument is int instead of bool, because + * passing a parameter that undergoes default argument promotion to + * va_start() has undefined behavior (according to clang warnings on + * MacOS High Sierra). */ -PMIX_EXPORT char *pmix_os_path(bool relative, ...) __pmix_attribute_malloc__ __pmix_attribute_sentinel__ __pmix_attribute_warn_unused_result__; +PMIX_EXPORT char *pmix_os_path(int relative, ...) __pmix_attribute_malloc__ __pmix_attribute_sentinel__ __pmix_attribute_warn_unused_result__; /** * Convert the path to be OS friendly. On UNIX this function will diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/output.c b/opal/mca/pmix/pmix3x/pmix/src/util/output.c index 1d3d4148dcd..4e90280c8ae 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/output.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -125,7 +125,7 @@ PMIX_CLASS_INSTANCE(pmix_output_stream_t, pmix_object_t, construct, destruct); bool pmix_output_init(void) { int i; - char hostname[PMIX_MAXHOSTNAMELEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; char *str; if (initialized) { @@ -176,7 +176,7 @@ bool pmix_output_init(void) } else { verbose.lds_want_stderr = true; } - gethostname(hostname, sizeof(hostname)); + gethostname(hostname, sizeof(hostname)-1); hostname[sizeof(hostname)-1] = '\0'; if (0 > asprintf(&verbose.lds_prefix, "[%s:%05d] ", hostname, getpid())) { return PMIX_ERR_NOMEM; @@ -256,7 +256,7 @@ bool pmix_output_switch(int output_id, bool enable) void pmix_output_reopen_all(void) { char *str; - char hostname[PMIX_MAXHOSTNAMELEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; str = getenv("PMIX_OUTPUT_STDERR_FD"); if (NULL != str) { @@ -332,15 +332,10 @@ PMIX_EXPORT void pmix_output(int output_id, const char *format, ...) /* * Send a message to a stream if the verbose level is high enough */ - PMIX_EXPORT void pmix_output_verbose(int level, int output_id, const char *format, ...) + PMIX_EXPORT bool pmix_output_check_verbosity(int level, int output_id) { - if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && - info[output_id].ldi_verbose_level >= level) { - va_list arglist; - va_start(arglist, format); - output(output_id, format, arglist); - va_end(arglist); - } + return (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && + info[output_id].ldi_verbose_level >= level); } @@ -694,7 +689,7 @@ static int open_file(int i) if (NULL == filename) { return PMIX_ERR_OUT_OF_RESOURCE; } - strncpy(filename, output_dir, PMIX_PATH_MAX); + pmix_strncpy(filename, output_dir, PMIX_PATH_MAX-1); strcat(filename, "/"); if (NULL != output_prefix) { strcat(filename, output_prefix); diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/output.h b/opal/mca/pmix/pmix3x/pmix/src/util/output.h index c3274bab7d0..5e8fa677b5e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/output.h +++ b/opal/mca/pmix/pmix3x/pmix/src/util/output.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -414,12 +414,13 @@ PMIX_EXPORT void pmix_output(int output_id, const char *format, ...) __pmix_attr * * @see pmix_output_set_verbosity() */ -PMIX_EXPORT void pmix_output_verbose(int verbose_level, int output_id, - const char *format, ...) __pmix_attribute_format__(__printf__, 3, 4); +#define pmix_output_verbose(verbose_level, output_id, ...) \ + if (pmix_output_check_verbosity(verbose_level, output_id)) { \ + pmix_output(output_id, __VA_ARGS__); \ + } + +PMIX_EXPORT bool pmix_output_check_verbosity(int verbose_level, int output_id); -/** -* Same as pmix_output_verbose(), but takes a va_list form of varargs. -*/ PMIX_EXPORT void pmix_output_vverbose(int verbose_level, int output_id, const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0); diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/path.c b/opal/mca/pmix/pmix3x/pmix/src/util/path.c index ad7e911a46d..8705759c3b9 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/path.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/path.c @@ -15,6 +15,8 @@ * All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,9 +56,6 @@ #ifdef HAVE_SYS_STATVFS_H #include #endif -#ifdef HAVE_SYS_MOUNT_H -#include -#endif #ifdef HAVE_MNTENT_H #include #endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/pif.c b/opal/mca/pmix/pmix3x/pmix/src/util/pif.c index 78ca9559ffc..9696502f4ba 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/pif.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/pif.c @@ -16,7 +16,7 @@ * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,23 +51,7 @@ #include #endif #ifdef HAVE_NET_IF_H -#if defined(__APPLE__) && defined(_LP64) -/* Apple engineering suggested using options align=power as a - workaround for a bug in OS X 10.4 (Tiger) that prevented ioctl(..., - SIOCGIFCONF, ...) from working properly in 64 bit mode on Power PC. - It turns out that the underlying issue is the size of struct - ifconf, which the kernel expects to be 12 and natural 64 bit - alignment would make 16. The same bug appears in 64 bit mode on - Intel macs, but align=power is a no-op there, so instead, use the - pack pragma to instruct the compiler to pack on 4 byte words, which - has the same effect as align=power for our needs and works on both - Intel and Power PC Macs. */ -#pragma pack(push,4) -#endif #include -#if defined(__APPLE__) && defined(_LP64) -#pragma pack(pop) -#endif #endif #ifdef HAVE_NETDB_H #include @@ -218,7 +202,7 @@ int pmix_ifaddrtoname(const char* if_addr, char* if_name, int length) memcpy (&ipv4, r->ai_addr, r->ai_addrlen); if (inaddr->sin_addr.s_addr == ipv4.sin_addr.s_addr) { - strncpy(if_name, intf->if_name, length); + pmix_strncpy(if_name, intf->if_name, length-1); freeaddrinfo (res); return PMIX_SUCCESS; } @@ -226,7 +210,7 @@ int pmix_ifaddrtoname(const char* if_addr, char* if_name, int length) else { if (IN6_ARE_ADDR_EQUAL(&((struct sockaddr_in6*) &intf->if_addr)->sin6_addr, &((struct sockaddr_in6*) r->ai_addr)->sin6_addr)) { - strncpy(if_name, intf->if_name, length); + pmix_strncpy(if_name, intf->if_name, length-1); freeaddrinfo (res); return PMIX_SUCCESS; } @@ -493,7 +477,7 @@ int pmix_ifindextoname(int if_index, char* if_name, int length) intf != (pmix_pif_t*)pmix_list_get_end(&pmix_if_list); intf = (pmix_pif_t*)pmix_list_get_next(intf)) { if (intf->if_index == if_index) { - strncpy(if_name, intf->if_name, length); + pmix_strncpy(if_name, intf->if_name, length-1); return PMIX_SUCCESS; } } @@ -514,7 +498,7 @@ int pmix_ifkindextoname(int if_kindex, char* if_name, int length) intf != (pmix_pif_t*)pmix_list_get_end(&pmix_if_list); intf = (pmix_pif_t*)pmix_list_get_next(intf)) { if (intf->if_kernel_index == if_kindex) { - strncpy(if_name, intf->if_name, length); + pmix_strncpy(if_name, intf->if_name, length-1); return PMIX_SUCCESS; } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/pif.h b/opal/mca/pmix/pmix3x/pmix/src/util/pif.h index fb9f1b79a24..57ed1bfd749 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/pif.h +++ b/opal/mca/pmix/pmix3x/pmix/src/util/pif.h @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,9 +38,7 @@ #include #endif -#ifndef IF_NAMESIZE -#define IF_NAMESIZE 32 -#endif +#define PMIX_IF_NAMESIZE 256 BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/pmix_environ.c b/opal/mca/pmix/pmix3x/pmix/src/util/pmix_environ.c index 1e1cfaaa880..2662a86bff7 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/pmix_environ.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/pmix_environ.c @@ -12,8 +12,10 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,10 +32,12 @@ #include #include "src/util/printf.h" +#include "src/util/error.h" #include "src/util/argv.h" #include "src/util/pmix_environ.h" #define PMIX_DEFAULT_TMPDIR "/tmp" +#define PMIX_MAX_ENVAR_LENGTH 100000 /* * Merge two environ-like char arrays, ensuring that there are no @@ -74,7 +78,7 @@ char **pmix_environ_merge(char **minor, char **major) pmix_setenv(minor[i], NULL, false, &ret); } else { - /* strdup minor[i] in case it's a constat string */ + /* strdup minor[i] in case it's a constant string */ name = strdup(minor[i]); value = name + (value - minor[i]); @@ -99,9 +103,60 @@ char **pmix_environ_merge(char **minor, char **major) int i; char *newvalue, *compare; size_t len; + bool valid; - /* Make the new value */ + /* Check the bozo case */ + if( NULL == env ) { + return PMIX_ERR_BAD_PARAM; + } + if (NULL != value) { + /* check the string for unacceptable length - i.e., ensure + * it is NULL-terminated */ + valid = false; + for (i=0; i < PMIX_MAX_ENVAR_LENGTH; i++) { + if ('\0' == value[i]) { + valid = true; + break; + } + } + if (!valid) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + } + + /* If this is the "environ" array, use putenv or setenv */ + if (*env == environ) { + /* THIS IS POTENTIALLY A MEMORY LEAK! But I am doing it + because so that we don't violate the law of least + astonishmet for PMIX developers (i.e., those that don't + check the return code of pmix_setenv() and notice that we + returned an error if you passed in the real environ) */ +#if defined (HAVE_SETENV) + if (NULL == value) { + /* this is actually an unsetenv request */ + unsetenv(name); + } else { + setenv(name, value, overwrite); + } +#else + /* Make the new value */ + if (NULL == value) { + i = asprintf(&newvalue, "%s=", name); + } else { + i = asprintf(&newvalue, "%s=%s", name, value); + } + if (NULL == newvalue || 0 > i) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + putenv(newvalue); + /* cannot free it as putenv doesn't copy the value */ +#endif + return PMIX_SUCCESS; + } + + /* Make the new value */ if (NULL == value) { i = asprintf(&newvalue, "%s=", name); } else { @@ -111,28 +166,13 @@ char **pmix_environ_merge(char **minor, char **major) return PMIX_ERR_OUT_OF_RESOURCE; } - /* Check the bozo case */ - - if( NULL == env ) { - return PMIX_ERR_BAD_PARAM; - } else if (NULL == *env) { + if (NULL == *env) { i = 0; pmix_argv_append(&i, env, newvalue); free(newvalue); return PMIX_SUCCESS; } - /* If this is the "environ" array, use putenv */ - if( *env == environ ) { - /* THIS IS POTENTIALLY A MEMORY LEAK! But I am doing it - because so that we don't violate the law of least - astonishmet for PMIX developers (i.e., those that don't - check the return code of pmix_setenv() and notice that we - returned an error if you passed in the real environ) */ - putenv(newvalue); - return PMIX_SUCCESS; - } - /* Make something easy to compare to */ i = asprintf(&compare, "%s=", name); diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/show_help.c b/opal/mca/pmix/pmix3x/pmix/src/util/show_help.c index 1654d39ead3..ae95ac691b1 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/show_help.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/show_help.c @@ -12,7 +12,7 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -49,9 +49,9 @@ static char **search_dirs = NULL; * Local functions */ static int pmix_show_vhelp_internal(const char *filename, const char *topic, - bool want_error_header, va_list arglist); + int want_error_header, va_list arglist); static int pmix_show_help_internal(const char *filename, const char *topic, - bool want_error_header, ...); + int want_error_header, ...); pmix_show_help_fn_t pmix_show_help = pmix_show_help_internal; pmix_show_vhelp_fn_t pmix_show_vhelp = pmix_show_vhelp_internal; @@ -90,7 +90,7 @@ int pmix_show_help_finalize(void) * not optimization. :-) */ static int array2string(char **outstring, - bool want_error_header, char **lines) + int want_error_header, char **lines) { int i, count; size_t len; @@ -298,7 +298,7 @@ static int load_array(char ***array, const char *filename, const char *topic) } char *pmix_show_help_vstring(const char *filename, const char *topic, - bool want_error_header, va_list arglist) + int want_error_header, va_list arglist) { int rc; char *single_string, *output, **array = NULL; @@ -324,7 +324,7 @@ char *pmix_show_help_vstring(const char *filename, const char *topic, } char *pmix_show_help_string(const char *filename, const char *topic, - bool want_error_handler, ...) + int want_error_handler, ...) { char *output; va_list arglist; @@ -338,7 +338,7 @@ char *pmix_show_help_string(const char *filename, const char *topic, } static int pmix_show_vhelp_internal(const char *filename, const char *topic, - bool want_error_header, va_list arglist) + int want_error_header, va_list arglist) { char *output; @@ -356,7 +356,7 @@ static int pmix_show_vhelp_internal(const char *filename, const char *topic, } static int pmix_show_help_internal(const char *filename, const char *topic, - bool want_error_header, ...) + int want_error_header, ...) { va_list arglist; int rc; diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/show_help.h b/opal/mca/pmix/pmix3x/pmix/src/util/show_help.h index 8c23887dc38..1129a762a71 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/show_help.h +++ b/opal/mca/pmix/pmix3x/pmix/src/util/show_help.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -129,9 +129,14 @@ PMIX_EXPORT int pmix_show_help_finalize(void); * (typically $prefix/share/pmix), and looks up the message * based on the topic, and displays it. If want_error_header is * true, a header and footer of asterisks are also displayed. + * + * Note that the "want_error_header" argument is int instead of bool, + * because passing a parameter that undergoes default argument + * promotion to va_start() has undefined behavior (according to clang + * warnings on MacOS High Sierra). */ typedef int (*pmix_show_help_fn_t)(const char *filename, const char *topic, - bool want_error_header, ...); + int want_error_header, ...); PMIX_EXPORT extern pmix_show_help_fn_t pmix_show_help; /** @@ -139,7 +144,7 @@ PMIX_EXPORT extern pmix_show_help_fn_t pmix_show_help; * a va_list form of varargs. */ typedef int (*pmix_show_vhelp_fn_t)(const char *filename, const char *topic, - bool want_error_header, va_list ap); + int want_error_header, va_list ap); PMIX_EXPORT extern pmix_show_vhelp_fn_t pmix_show_vhelp; /** @@ -148,7 +153,7 @@ PMIX_EXPORT extern pmix_show_vhelp_fn_t pmix_show_vhelp; */ PMIX_EXPORT char* pmix_show_help_string(const char *filename, const char *topic, - bool want_error_header, ...); + int want_error_header, ...); /** * This function does the same thing as pmix_show_help_string(), but @@ -156,7 +161,7 @@ PMIX_EXPORT char* pmix_show_help_string(const char *filename, */ PMIX_EXPORT char* pmix_show_help_vstring(const char *filename, const char *topic, - bool want_error_header, va_list ap); + int want_error_header, va_list ap); /** * This function adds another search location for the files that diff --git a/opal/mca/pmix/pmix3x/pmix/test/Makefile.am b/opal/mca/pmix/pmix3x/pmix/test/Makefile.am index 64ad119878f..c886e3b1fb3 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/test/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -29,13 +29,65 @@ endif headers = test_common.h cli_stages.h server_callbacks.h utils.h test_fence.h \ test_publish.h test_spawn.h test_cd.h test_resolve_peers.h test_error.h \ - test_replace.h test_internal.h + test_replace.h test_internal.h test_server.h AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/src/api -noinst_SCRIPTS = pmix_client_otheruser.sh +noinst_SCRIPTS = pmix_client_otheruser.sh \ + run_tests00.pl \ + run_tests01.pl \ + run_tests02.pl \ + run_tests03.pl \ + run_tests04.pl \ + run_tests05.pl \ + run_tests06.pl \ + run_tests07.pl \ + run_tests08.pl \ + run_tests09.pl \ + run_tests10.pl \ + run_tests11.pl \ + run_tests12.pl \ + run_tests13.pl \ + run_tests14.pl \ + run_tests15.pl + noinst_PROGRAMS = +######################### +# Support for "make check" + +check_PROGRAMS = \ + pmix_test \ + pmix_client \ + pmix_regex + +if WANT_PMI_BACKWARD +check_PROGRAMS += \ + pmi_client \ + pmi2_client +endif + +TESTS = \ + run_tests00.pl \ + run_tests01.pl \ + run_tests02.pl \ + run_tests03.pl \ + run_tests04.pl \ + run_tests05.pl \ + run_tests06.pl \ + run_tests07.pl \ + run_tests08.pl \ + run_tests09.pl \ + run_tests10.pl \ + run_tests11.pl \ + run_tests12.pl \ + run_tests13.pl \ + run_tests14.pl \ + run_tests15.pl + + +########################## + if WANT_PMI_BACKWARD noinst_PROGRAMS += pmi_client pmi2_client endif @@ -43,7 +95,7 @@ endif noinst_PROGRAMS += pmix_test pmix_client pmix_regex pmix_test_SOURCES = $(headers) \ - pmix_test.c test_common.c cli_stages.c server_callbacks.c utils.c + pmix_test.c test_common.c cli_stages.c server_callbacks.c test_server.c utils.c pmix_test_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmix_test_LDADD = \ $(top_builddir)/src/libpmix.la @@ -70,7 +122,7 @@ pmix_client_LDADD = \ $(top_builddir)/src/libpmix.la pmix_regex_SOURCES = $(headers) \ - pmix_regex.c test_common.c cli_stages.c server_callbacks.c utils.c + pmix_regex.c test_common.c cli_stages.c server_callbacks.c test_server.c utils.c pmix_regex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmix_regex_LDADD = \ $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix3x/pmix/test/cli_stages.c b/opal/mca/pmix/pmix3x/pmix/test/cli_stages.c index 04049ad9cc3..5fbfec419dc 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/cli_stages.c +++ b/opal/mca/pmix/pmix3x/pmix/test/cli_stages.c @@ -1,8 +1,10 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,18 +23,27 @@ int cli_rank(cli_info_t *cli) int i; for(i=0; i < cli_info_cnt; i++){ if( cli == &cli_info[i] ){ - return i; + return cli->rank; } } return -1; } -void cli_init(int nprocs, cli_state_t order[]) +void cli_init(int nprocs) { int n, i; + cli_state_t order[CLI_TERM+1]; + cli_info = malloc( sizeof(cli_info_t) * nprocs); cli_info_cnt = nprocs; + order[CLI_UNINIT] = CLI_FORKED; + order[CLI_FORKED] = CLI_FIN; + order[CLI_CONNECTED] = CLI_UNDEF; + order[CLI_FIN] = CLI_TERM; + order[CLI_DISCONN] = CLI_UNDEF; + order[CLI_TERM] = CLI_UNDEF; + for (n=0; n < nprocs; n++) { cli_info[n].sd = -1; cli_info[n].ev = NULL; @@ -47,7 +58,7 @@ void cli_init(int nprocs, cli_state_t order[]) } } -void cli_connect(cli_info_t *cli, int sd, struct event_base * ebase, event_callback_fn callback) +void cli_connect(cli_info_t *cli, int sd, pmix_event_base_t * ebase, event_callback_fn callback) { if( CLI_CONNECTED != cli->next_state[cli->state] ){ TEST_ERROR(("Rank %d has bad next state: expect %d have %d!", @@ -57,9 +68,9 @@ void cli_connect(cli_info_t *cli, int sd, struct event_base * ebase, event_callb } cli->sd = sd; - cli->ev = event_new(ebase, sd, - EV_READ|EV_PERSIST, callback, cli); - event_add(cli->ev,NULL); + cli->ev = pmix_event_new(ebase, sd, + EV_READ|EV_PERSIST, callback, cli); + pmix_event_add(cli->ev,NULL); pmix_ptl_base_set_nonblocking(sd); TEST_VERBOSE(("Connection accepted from rank %d", cli_rank(cli) )); cli->state = CLI_CONNECTED; @@ -94,12 +105,12 @@ void cli_disconnect(cli_info_t *cli) } if( NULL == cli->ev ){ - TEST_ERROR(("Bad ev = NULL of rank = %d ", cli->sd, cli_rank(cli))); + TEST_ERROR(("Bad ev = NULL of rank = %d ", cli_rank(cli))); test_abort = true; } else { TEST_VERBOSE(("remove event of rank %d from event queue", cli_rank(cli))); - event_del(cli->ev); - event_free(cli->ev); + pmix_event_del(cli->ev); + pmix_event_free(cli->ev); cli->ev = NULL; } @@ -198,8 +209,9 @@ void cli_wait_all(double timeout) TEST_VERBOSE(("waitpid = %d", pid)); for(i=0; i < cli_info_cnt; i++){ if( cli_info[i].pid == pid ){ - TEST_VERBOSE(("the child with pid = %d has rank = %d\n" - "\t\texited = %d, signalled = %d", pid, i, + TEST_VERBOSE(("the child with pid = %d has rank = %d, ns = %s\n" + "\t\texited = %d, signalled = %d", pid, + cli_info[i].rank, cli_info[i].ns, WIFEXITED(status), WIFSIGNALED(status) )); if( WIFEXITED(status) || WIFSIGNALED(status) ){ cli_cleanup(&cli_info[i]); @@ -211,6 +223,9 @@ void cli_wait_all(double timeout) if( errno == ECHILD ){ TEST_VERBOSE(("No more children to wait. Happens on the last cli_wait_all call " "which is used to ensure that all children terminated.\n")); + if (pmix_test_verbose) { + sleep(1); + } break; } else { TEST_ERROR(("waitpid(): %d : %s", errno, strerror(errno))); diff --git a/opal/mca/pmix/pmix3x/pmix/test/cli_stages.h b/opal/mca/pmix/pmix3x/pmix/test/cli_stages.h index 343af2de043..011023d7a79 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/cli_stages.h +++ b/opal/mca/pmix/pmix3x/pmix/test/cli_stages.h @@ -1,8 +1,10 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -10,6 +12,9 @@ * $HEADER$ */ +#ifndef CLI_STAGES_H +#define CLI_STAGES_H + #include #include #include @@ -41,7 +46,7 @@ typedef struct { pmix_event_t *ev; cli_state_t state; cli_state_t next_state[CLI_TERM+1]; - int rank; + pmix_rank_t rank; char *ns; } cli_info_t; @@ -50,8 +55,8 @@ extern int cli_info_cnt; extern bool test_abort; int cli_rank(cli_info_t *cli); -void cli_init(int nprocs, cli_state_t order[]); -void cli_connect(cli_info_t *cli, int sd, struct event_base * ebase, event_callback_fn callback); +void cli_init(int nprocs); +void cli_connect(cli_info_t *cli, int sd, pmix_event_base_t * ebase, event_callback_fn callback); void cli_finalize(cli_info_t *cli); void cli_disconnect(cli_info_t *cli); void cli_terminate(cli_info_t *cli); @@ -75,3 +80,5 @@ void op_callbk(pmix_status_t status, void errhandler_reg_callbk (pmix_status_t status, size_t errhandler_ref, void *cbdata); + +#endif // CLI_STAGES_H diff --git a/opal/mca/pmix/pmix3x/pmix/test/pmi_client.c b/opal/mca/pmix/pmix3x/pmix/test/pmi_client.c index ad21f6db3cc..819429b4fd7 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/pmi_client.c +++ b/opal/mca/pmix/pmix3x/pmix/test/pmi_client.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -24,6 +24,9 @@ static int _legacy = 0; /* Verbose level 0-silent, 1-fatal, 2-error, 3+ debug*/ static int _verbose = 1; +static int spawned, size, rank=-1, appnum; +static char jobid[255]; + static void log_fatal(const char *format, ...) { va_list arglist; @@ -36,7 +39,7 @@ static void log_fatal(const char *format, ...) va_end(arglist); return; } - fprintf(stderr, "FATAL: %s", output); + fprintf(stderr, "%d:FATAL: %s", rank, output); free(output); } va_end(arglist); @@ -54,7 +57,7 @@ static void log_error(const char *format, ...) va_end(arglist); return; } - fprintf(stderr, "ERROR: %s", output); + fprintf(stderr, "%d:ERROR: %s", rank, output); free(output); } va_end(arglist); @@ -72,7 +75,7 @@ static void log_info(const char *format, ...) va_end(arglist); return; } - fprintf(stderr, "INFO: %s", output); + fprintf(stderr, "%d:INFO: %s", rank, output); free(output); } va_end(arglist); @@ -81,7 +84,7 @@ static void log_info(const char *format, ...) #define log_assert(e, msg) \ do { \ if (!(e)) { \ - log_fatal("%s at %s:%d\n", msg, __func__, __LINE__); \ + log_fatal("%d:%s at %s:%d\n", rank, msg, __func__, __LINE__); \ rc = -1; \ } \ } while (0) @@ -99,10 +102,6 @@ static int test_item5(void); static int test_item6(void); static int test_item7(void); -static int spawned, size, rank, appnum; -static char jobid[255]; - - int main(int argc, char **argv) { int ret = 0; @@ -372,21 +371,24 @@ static int test_item6(void) { int rc = 0; char val[100]; - const char *tkey = __func__; + char *tkey; const char *tval = __FILE__; + asprintf(&tkey, "%d:%s", rank, __func__); if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { log_fatal("PMI_KVS_Put %d\n", rc); + free(tkey); return rc; } if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { log_fatal("PMI_KVS_Get %d\n", rc); + free(tkey); return rc; } log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); - + free(tkey); log_assert(!strcmp(tval, val), "value does not meet expectation"); return rc; @@ -398,16 +400,16 @@ static int test_item7(void) char tkey[100]; char tval[100]; char val[100]; - int i = 0; + int i = 0, j; + +log_info("TEST7\n"); for (i = 0; i < size; i++) { - sprintf(tkey, "KEY-%d", i); + sprintf(tkey, "%d:KEY-%d", rank, i); sprintf(tval, "VALUE-%d", i); - if (i == rank) { - if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { - log_fatal("PMI_KVS_Put [%s=%s] %d\n", tkey, tval, rc); - return rc; - } + if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { + log_fatal("PMI_KVS_Put [%s=%s] %d\n", tkey, tval, rc); + return rc; } } @@ -416,22 +418,27 @@ static int test_item7(void) return rc; } + + log_info("BARRIER\n"); if (PMI_SUCCESS != (rc = PMI_Barrier())) { log_fatal("PMI_Barrier %d\n", rc); return rc; } for (i = 0; i < size; i++) { - sprintf(tkey, "KEY-%d", i); - sprintf(tval, "VALUE-%d", i); - if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { - log_fatal("PMI_KVS_Get [%s=?] %d\n", tkey, rc); - return rc; - } + for (j=0; j < size; j++) { + sprintf(tkey, "%d:KEY-%d", i, j); + sprintf(tval, "VALUE-%d", j); + log_info("Get key %s\n", tkey); + if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { + log_fatal("PMI_KVS_Get [%s=?] %d\n", tkey, rc); + return rc; + } - log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); + log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); - log_assert(!strcmp(tval, val), "value does not meet expectation"); + log_assert(!strcmp(tval, val), "value does not meet expectation"); + } } return rc; diff --git a/opal/mca/pmix/pmix3x/pmix/test/pmix_client.c b/opal/mca/pmix/pmix3x/pmix/test/pmix_client.c index e00a44e22e2..3d1b46fcfac 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/pmix_client.c +++ b/opal/mca/pmix/pmix3x/pmix/test/pmix_client.c @@ -13,8 +13,8 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015-2017 Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,7 +78,7 @@ int main(int argc, char **argv) parse_cmd(argc, argv, ¶ms); // We don't know rank at this place! - TEST_VERBOSE(("Client ns %s rank %d: Start", params.nspace, params.rank)); + TEST_VERBOSE(("Client %s:%d started PID:%d", params.nspace, params.rank, getpid())); /* handle early-fail test case */ if (1 == params.early_fail && 0 == params.rank) { @@ -236,6 +236,7 @@ int main(int argc, char **argv) TEST_VERBOSE(("Client ns %s rank %d:PMIx_Finalize successfully completed", myproc.nspace, myproc.rank)); } + TEST_VERBOSE(("Client %s:%d finished PID:%d", params.nspace, params.rank, getpid())); TEST_OUTPUT_CLEAR(("OK\n")); TEST_CLOSE_FILE(); FREE_TEST_PARAMS(params); diff --git a/opal/mca/pmix/pmix3x/pmix/test/pmix_test.c b/opal/mca/pmix/pmix3x/pmix/test/pmix_test.c index c1a8130b1a4..9ceeb72d539 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/pmix_test.c +++ b/opal/mca/pmix/pmix3x/pmix/test/pmix_test.c @@ -13,11 +13,12 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. * All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +36,8 @@ #include "server_callbacks.h" #include "utils.h" -#include "src/include/pmix_globals.h" +#include "test_server.h" +#include "test_common.h" bool spawn_wait = false; @@ -47,7 +49,6 @@ int main(int argc, char **argv) struct stat stat_buf; struct timeval tv; double test_start; - cli_state_t order[CLI_TERM+1]; test_params params; INIT_TEST_PARAMS(params); int test_fail = 0; @@ -68,6 +69,14 @@ int main(int argc, char **argv) parse_cmd(argc, argv, ¶ms); TEST_VERBOSE(("Start PMIx_lite smoke test (timeout is %d)", params.timeout)); + /* set common argv and env */ + client_env = pmix_argv_copy(environ); + set_client_argv(¶ms, &client_argv); + + tmp = pmix_argv_join(client_argv, ' '); + TEST_VERBOSE(("Executing test: %s", tmp)); + free(tmp); + /* verify executable */ if( 0 > ( rc = stat(params.binary, &stat_buf) ) ){ TEST_ERROR(("Cannot stat() executable \"%s\": %d: %s", params.binary, errno, strerror(errno))); @@ -83,48 +92,29 @@ int main(int argc, char **argv) return 0; } - /* setup the server library */ - pmix_info_t info[1]; - (void)strncpy(info[0].key, PMIX_SOCKET_MODE, PMIX_MAX_KEYLEN); - info[0].value.type = PMIX_UINT32; - info[0].value.data.uint32 = 0666; - - if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, 1))) { - TEST_ERROR(("Init failed with error %d", rc)); + if (PMIX_SUCCESS != (rc = server_init(¶ms))) { FREE_TEST_PARAMS(params); return rc; } - /* register the errhandler */ - PMIx_Register_event_handler(NULL, 0, NULL, 0, - errhandler, errhandler_reg_callbk, NULL); - - order[CLI_UNINIT] = CLI_FORKED; - order[CLI_FORKED] = CLI_FIN; - order[CLI_CONNECTED] = CLI_UNDEF; - order[CLI_FIN] = CLI_TERM; - order[CLI_DISCONN] = CLI_UNDEF; - order[CLI_TERM] = CLI_UNDEF; - cli_init(params.nprocs, order); - /* set common argv and env */ - client_env = pmix_argv_copy(environ); - set_client_argv(¶ms, &client_argv); - - tmp = pmix_argv_join(client_argv, ' '); - TEST_VERBOSE(("Executing test: %s", tmp)); - free(tmp); + cli_init(params.lsize); int launched = 0; /* set namespaces and fork clients */ if (NULL == params.ns_dist) { + uint32_t i; + int base_rank = 0; + + /* compute my start counter */ + for(i = 0; i < (uint32_t)my_server_id; i++) { + base_rank += (params.nprocs % params.nservers) > (uint32_t)i ? + params.nprocs / params.nservers + 1 : + params.nprocs / params.nservers; + } /* we have a single namespace for all clients */ ns_nprocs = params.nprocs; - rc = launch_clients(ns_nprocs, params.binary, &client_env, &client_argv); - if (PMIX_SUCCESS != rc) { - FREE_TEST_PARAMS(params); - return rc; - } - launched += ns_nprocs; + launched += server_launch_clients(params.lsize, params.nprocs, base_rank, + ¶ms, &client_env, &client_argv); } else { char *pch; pch = strtok(params.ns_dist, ":"); @@ -136,17 +126,13 @@ int main(int argc, char **argv) return PMIX_ERROR; } if (0 < ns_nprocs) { - rc = launch_clients(ns_nprocs, params.binary, &client_env, &client_argv); - if (PMIX_SUCCESS != rc) { - FREE_TEST_PARAMS(params); - return rc; - } + launched += server_launch_clients(ns_nprocs, ns_nprocs, 0, ¶ms, + &client_env, &client_argv); } pch = strtok (NULL, ":"); - launched += ns_nprocs; } } - if (params.nprocs != (uint32_t)launched) { + if (params.lsize != (uint32_t)launched) { TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter.")); cli_kill_all(); test_fail = 1; @@ -185,24 +171,16 @@ int main(int argc, char **argv) PMIX_WAIT_FOR_COMPLETION(spawn_wait); } - pmix_argv_free(client_argv); - pmix_argv_free(client_env); - /* deregister the errhandler */ PMIx_Deregister_event_handler(0, op_callbk, NULL); cli_wait_all(1.0); - /* finalize the server library */ - if (PMIX_SUCCESS != (rc = PMIx_server_finalize())) { - TEST_ERROR(("Finalize failed with error %d", rc)); - } + test_fail += server_finalize(¶ms); FREE_TEST_PARAMS(params); - - if (0 == test_fail) { - TEST_OUTPUT(("Test finished OK!")); - } + pmix_argv_free(client_argv); + pmix_argv_free(client_env); return test_fail; } diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests00.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests00.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests00.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests01.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests01.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests01.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests02.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests02.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests02.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests03.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests03.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests03.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests04.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests04.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests04.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests05.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests05.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests05.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests06.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests06.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests06.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests07.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests07.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests07.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests08.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests08.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests08.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests09.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests09.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests09.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests10.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests10.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests10.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests11.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests11.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests11.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests12.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests12.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests12.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests13.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests13.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests13.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests14.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests14.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests14.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests15.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests15.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests15.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/server_callbacks.c b/opal/mca/pmix/pmix3x/pmix/test/server_callbacks.c index 783d45d6282..ae16129ecf2 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/server_callbacks.c +++ b/opal/mca/pmix/pmix3x/pmix/test/server_callbacks.c @@ -2,7 +2,7 @@ * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -17,6 +17,7 @@ #include #include "server_callbacks.h" #include "src/util/argv.h" +#include "test_server.h" extern bool spawn_wait; @@ -36,28 +37,6 @@ pmix_server_module_t mymodule = { .deregister_events = deregevents_fn }; -typedef struct { - pmix_list_item_t super; - pmix_modex_data_t data; -} pmix_test_data_t; - -static void pcon(pmix_test_data_t *p) -{ - p->data.blob = NULL; - p->data.size = 0; -} - -static void pdes(pmix_test_data_t *p) -{ - if (NULL != p->data.blob) { - free(p->data.blob); - } -} - -PMIX_CLASS_INSTANCE(pmix_test_data_t, - pmix_list_item_t, - pcon, pdes); - typedef struct { pmix_list_item_t super; pmix_info_t data; @@ -95,12 +74,25 @@ pmix_status_t connected(const pmix_proc_t *proc, void *server_object, pmix_status_t finalized(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata) { - if( CLI_TERM <= cli_info[proc->rank].state ){ + cli_info_t *cli = NULL; + int i; + for (i = 0; i < cli_info_cnt; i++) { + if((proc->rank == cli_info[i].rank) && + (0 == strcmp(proc->nspace, cli_info[i].ns))){ + cli = &cli_info[i]; + break; + } + } + if (NULL == cli) { + TEST_ERROR(("cannot found rank %d", proc->rank)); + return PMIX_SUCCESS; + } + if( CLI_TERM <= cli->state ){ TEST_ERROR(("double termination of rank %d", proc->rank)); return PMIX_SUCCESS; } - TEST_VERBOSE(("Rank %d terminated", proc->rank)); - cli_finalize(&cli_info[proc->rank]); + TEST_VERBOSE(("Rank %s:%d terminated", proc->nspace, proc->rank)); + cli_finalize(cli); finalized_count++; if (finalized_count == cli_info_cnt) { if (NULL != pmix_test_published_list) { @@ -135,16 +127,13 @@ pmix_status_t fencenb_fn(const pmix_proc_t procs[], size_t nprocs, TEST_VERBOSE(("Getting data for %s:%d", procs[0].nspace, procs[0].rank)); - /* In a perfect world, we should wait until - * the test servers from all involved procs - * respond. We don't have multi-server capability - * yet, so we'll just respond right away and - * return what we were given */ - - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, data, ndata, cbdata, NULL, NULL); + if ((pmix_list_get_size(server_list) == 1) && (my_server_id == 0)) { + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, data, ndata, cbdata, NULL, NULL); + } + return PMIX_SUCCESS; } - return PMIX_SUCCESS; + return server_fence_contrib(data, ndata, cbfunc, cbdata); } pmix_status_t dmodex_fn(const pmix_proc_t *proc, @@ -153,12 +142,12 @@ pmix_status_t dmodex_fn(const pmix_proc_t *proc, { TEST_VERBOSE(("Getting data for %s:%d", proc->nspace, proc->rank)); - /* In a perfect world, we should call another server - * to get the data for one of its clients. We don't - * have multi-server capability yet, so we'll just - * respond right away */ - - return PMIX_ERR_NOT_FOUND; + /* return not_found fot single server mode */ + if ((pmix_list_get_size(server_list) == 1) && (my_server_id == 0)) { + return PMIX_ERR_NOT_FOUND; + } + // TODO: add support tracker for dmodex requests + return server_dmdx_get(proc->nspace, proc->rank, cbfunc, cbdata); } pmix_status_t publish_fn(const pmix_proc_t *proc, @@ -199,6 +188,7 @@ pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, pmix_lookup_cbfunc_t cbfunc, void *cbdata) { size_t i, ndata, ret; + pmix_status_t rc = PMIX_SUCCESS; pmix_pdata_t *pdata; pmix_test_info_t *tinfo; if (NULL == pmix_test_published_list) { @@ -221,13 +211,15 @@ pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, } } if (ret != ndata) { - return PMIX_ERR_NOT_FOUND; + rc = PMIX_ERR_NOT_FOUND; + goto error; } if (NULL != cbfunc) { cbfunc(PMIX_SUCCESS, pdata, ndata, cbdata); } +error: PMIX_PDATA_FREE(pdata, ndata); - return PMIX_SUCCESS; + return rc; } pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys, diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/Makefile.am b/opal/mca/pmix/pmix3x/pmix/test/simple/Makefile.am index 4beeab62b16..5ab9f568bb0 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -25,7 +25,7 @@ headers = simptest.h noinst_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex \ test_pmix simptool simpdie simplegacy simptimeout \ - gwtest gwclient stability quietclient + gwtest gwclient stability quietclient simpjctrl simptest_SOURCES = $(headers) \ simptest.c @@ -85,7 +85,7 @@ simplegacy_SOURCES = $(headers) \ simplegacy.c simplegacy_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simplegacy_LDADD = \ - $(top_builddir)/src/libpmix.la + $(top_builddir)/src/libpmi.la simptimeout_SOURCES = $(headers) \ simptimeout.c @@ -116,3 +116,9 @@ quietclient_SOURCES = $(headers) \ quietclient_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) quietclient_LDADD = \ $(top_builddir)/src/libpmix.la + +simpjctrl_SOURCES = \ + simpjctrl.c +simpjctrl_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) +simpjctrl_LDADD = \ + $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/gwtest.c b/opal/mca/pmix/pmix3x/pmix/test/simple/gwtest.c index 2f1fae47017..3d9f8ee8d7d 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/gwtest.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/gwtest.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -392,9 +392,9 @@ int main(int argc, char **argv) /* setup to see sigchld on the forked tests */ PMIX_CONSTRUCT(&children, pmix_list_t); - event_assign(&handler, pmix_globals.evbase, SIGCHLD, - EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); - event_add(&handler, NULL); + pmix_event_assign(&handler, pmix_globals.evbase, SIGCHLD, + EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); + pmix_event_add(&handler, NULL); /* we have a single namespace for all clients */ atmp = NULL; @@ -1023,7 +1023,7 @@ static void wait_signal_callback(int fd, short event, void *arg) pid_t pid; wait_tracker_t *t2; - if (SIGCHLD != event_get_signal(sig)) { + if (SIGCHLD != pmix_event_get_signal(sig)) { return; } diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/quietclient.c b/opal/mca/pmix/pmix3x/pmix/test/simple/quietclient.c index 428ba4e341b..d91e7e58e35 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/quietclient.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/quietclient.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -103,12 +103,15 @@ int main(int argc, char **argv) pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; - uint32_t nprocs, n; + uint32_t nprocs, n, k, nlocal; int cnt, j; volatile bool active; pmix_info_t *iptr; size_t ninfo; pmix_status_t code; + char **peers; + bool all_local, local; + pmix_rank_t *locals = NULL; /* init us and declare we are a test programming model */ PMIX_INFO_CREATE(iptr, 2); @@ -152,11 +155,11 @@ int main(int argc, char **argv) usleep(10); } - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } @@ -173,6 +176,27 @@ int main(int argc, char **argv) goto done; } + /* get a list of our local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get local peers failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + /* split the returned string to get the rank of each local peer */ + peers = pmix_argv_split(val->data.string, ','); + PMIX_VALUE_RELEASE(val); + nlocal = pmix_argv_count(peers); + if (nprocs == nlocal) { + all_local = true; + } else { + all_local = false; + locals = (pmix_rank_t*)malloc(pmix_argv_count(peers) * sizeof(pmix_rank_t)); + for (cnt=0; NULL != peers[cnt]; cnt++) { + locals[cnt] = strtoul(peers[cnt], NULL, 10); + } + } + pmix_argv_free(peers); + for (cnt=0; cnt < MAXCNT; cnt++) { (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_UINT64; @@ -213,42 +237,67 @@ int main(int argc, char **argv) for (j=0; j <= cnt; j++) { for (n=0; n < nprocs; n++) { proc.rank = n; - (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", - myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); - continue; - } - if (NULL == val) { - pmix_output(0, "Client ns %s rank %d: NULL value returned", - myproc.nspace, myproc.rank); - break; - } - if (PMIX_UINT64 != val->type) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; + if (all_local) { + local = true; + } else { + local = false; + /* see if this proc is local to us */ + for (k=0; k < nlocal; k++) { + if (proc.rank == locals[k]) { + local = true; + break; + } + } } - if (1234 != val->data.uint64) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + if (local) { + (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", + myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); + continue; + } + if (NULL == val) { + pmix_output(0, "Client ns %s rank %d: NULL value returned", + myproc.nspace, myproc.rank); + break; + } + if (PMIX_UINT64 != val->type) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + if (1234 != val->data.uint64) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } PMIX_VALUE_RELEASE(val); free(tmp); - continue; - } - PMIX_VALUE_RELEASE(val); - free(tmp); - if (n != myproc.rank) { + /* now check that we don't get data for a remote proc - note that we + * always can get our own remote data as we published it */ + if (proc.rank != myproc.rank) { + (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); + if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + /* this data should _not_ be found as we are on the same node + * and the data was "put" with a PMIX_REMOTE scope */ + pmix_output(0, "ERROR: Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", + myproc.nspace, myproc.rank, j, tmp); + } + PMIX_VALUE_RELEASE(val); + free(tmp); + } + } else { (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - /* this data should _not_ be found as we are on the same node - * and the data was "put" with a PMIX_REMOTE scope */ - continue; + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed for remote proc", + myproc.nspace, myproc.rank, j, tmp); + } + if (NULL != val) { + PMIX_VALUE_RELEASE(val); } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", - myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); free(tmp); } } diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpclient.c index ca277c5e28f..80aea143083 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpclient.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -127,13 +127,16 @@ int main(int argc, char **argv) pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; - uint32_t nprocs, n; + uint32_t nprocs, n, k, nlocal; int cnt, j; bool doabort = false; volatile bool active; pmix_info_t info, *iptr; size_t ninfo; pmix_status_t code; + char **peers; + bool all_local, local; + pmix_rank_t *locals = NULL; if (1 < argc) { if (0 == strcmp("-abort", argv[1])) { @@ -184,17 +187,18 @@ int main(int argc, char **argv) usleep(10); } - /* get our universe size */ + + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); @@ -206,6 +210,27 @@ int main(int argc, char **argv) goto done; } + /* get a list of our local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get local peers failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + /* split the returned string to get the rank of each local peer */ + peers = pmix_argv_split(val->data.string, ','); + PMIX_VALUE_RELEASE(val); + nlocal = pmix_argv_count(peers); + if (nprocs == nlocal) { + all_local = true; + } else { + all_local = false; + locals = (pmix_rank_t*)malloc(pmix_argv_count(peers) * sizeof(pmix_rank_t)); + for (cnt=0; NULL != peers[cnt]; cnt++) { + locals[cnt] = strtoul(peers[cnt], NULL, 10); + } + } + pmix_argv_free(peers); + for (cnt=0; cnt < MAXCNT; cnt++) { (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_UINT64; @@ -246,43 +271,71 @@ int main(int argc, char **argv) for (j=0; j <= cnt; j++) { for (n=0; n < nprocs; n++) { proc.rank = n; - (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", - myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); - continue; - } - if (NULL == val) { - pmix_output(0, "Client ns %s rank %d: NULL value returned", - myproc.nspace, myproc.rank); - break; - } - if (PMIX_UINT64 != val->type) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; + if (all_local) { + local = true; + } else { + local = false; + /* see if this proc is local to us */ + for (k=0; k < nlocal; k++) { + if (proc.rank == locals[k]) { + local = true; + break; + } + } } - if (1234 != val->data.uint64) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + if (local) { + (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", + myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); + continue; + } + if (NULL == val) { + pmix_output(0, "Client ns %s rank %d: NULL value returned", + myproc.nspace, myproc.rank); + break; + } + if (PMIX_UINT64 != val->type) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + if (1234 != val->data.uint64) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); - continue; - } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); - if (n != myproc.rank) { + /* now check that we don't get data for a remote proc - note that we + * always can get our own remote data as we published it */ + if (proc.rank != myproc.rank) { + (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + /* this data should _not_ be found as we are on the same node + * and the data was "put" with a PMIX_REMOTE scope */ + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); + } else { + pmix_output(0, "ERROR: Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", + myproc.nspace, myproc.rank, j, tmp); + } + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + free(tmp); + } + } else { (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - /* this data should _not_ be found as we are on the same node - * and the data was "put" with a PMIX_REMOTE scope */ + if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - continue; + } else { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed for remote proc", + myproc.nspace, myproc.rank, j, tmp); } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", - myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } @@ -320,7 +373,7 @@ int main(int argc, char **argv) /* log something */ PMIX_INFO_CONSTRUCT(&info); - PMIX_INFO_LOAD(&info, PMIX_LOG_STDERR, "test log msg", PMIX_STRING); + PMIX_INFO_LOAD(&info, PMIX_LOG_STDERR, "test log msg\n", PMIX_STRING); active = true; rc = PMIx_Log_nb(&info, 1, NULL, 0, opcbfunc, (void*)&active); if (PMIX_SUCCESS != rc) { diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdie.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdie.c index db62d7832c4..cb0ae490227 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdie.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdie.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -87,7 +87,9 @@ int main(int argc, char **argv) pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; - + pmix_status_t code[5] = {PMIX_ERR_PROC_ABORTING, PMIX_ERR_PROC_ABORTED, + PMIX_ERR_PROC_REQUESTED_ABORT, PMIX_ERR_JOB_TERMINATED, + PMIX_ERR_UNREACH}; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); @@ -95,20 +97,21 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); completed = false; /* register our errhandler */ - PMIx_Register_event_handler(NULL, 0, NULL, 0, + PMIx_Register_event_handler(code, 5, NULL, 0, notification_fn, errhandler_reg_callbk, NULL); /* call fence to sync */ diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdmodex.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdmodex.c index b12afdb9b3c..2a7e067d148 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdmodex.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdmodex.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -98,9 +98,12 @@ int main(int argc, char **argv) pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; - uint32_t n, num_gets; + uint32_t n, num_gets, k, nlocal; bool active; bool dofence = true; + bool local, all_local; + char **peers; + pmix_rank_t *locals; if (NULL != getenv("PMIX_SIMPDMODEX_ASYNC")) { dofence = false; @@ -113,16 +116,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); @@ -144,7 +148,7 @@ int main(int argc, char **argv) (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank); value.type = PMIX_STRING; value.data.string = "1234"; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { + if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, tmp, &value))) { pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %d", myproc.nspace, myproc.rank, rc); goto done; } @@ -174,24 +178,60 @@ int main(int argc, char **argv) } } + /* get a list of our local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get local peers failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + /* split the returned string to get the rank of each local peer */ + peers = pmix_argv_split(val->data.string, ','); + PMIX_VALUE_RELEASE(val); + nlocal = pmix_argv_count(peers); + if (nprocs == nlocal) { + all_local = true; + } else { + all_local = false; + locals = (pmix_rank_t*)malloc(pmix_argv_count(peers) * sizeof(pmix_rank_t)); + for (n=0; NULL != peers[n]; n++) { + locals[n] = strtoul(peers[n], NULL, 10); + } + } + pmix_argv_free(peers); + /* get the committed data - ask for someone who doesn't exist as well */ num_gets = 0; for (n=0; n < nprocs; n++) { - (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, n); - proc.rank = n; - if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, - NULL, 0, valcbfunc, tmp))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); - goto done; + if (all_local) { + local = true; + } else { + local = false; + /* see if this proc is local to us */ + for (k=0; k < nlocal; k++) { + if (proc.rank == locals[k]) { + local = true; + break; + } + } } - ++num_gets; - (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, n); - if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, - NULL, 0, valcbfunc, tmp))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); - goto done; + if (local) { + (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, n); + proc.rank = n; + if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, + NULL, 0, valcbfunc, tmp))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); + goto done; + } + ++num_gets; + } else { + (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, n); + if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, + NULL, 0, valcbfunc, tmp))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); + goto done; + } + ++num_gets; } - ++num_gets; } if (dofence) { diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdyn.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdyn.c index a20b8418a4f..ef5286dd6e5 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdyn.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdyn.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -62,16 +62,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); @@ -85,19 +86,12 @@ int main(int argc, char **argv) /* rank=0 calls spawn */ if (0 == myproc.rank) { PMIX_APP_CREATE(app, 1); - app->cmd = strdup("gumby"); + app->cmd = strdup("./simpclient"); app->maxprocs = 2; - pmix_argv_append_nosize(&app->argv, "gumby"); + pmix_argv_append_nosize(&app->argv, "simpclient"); pmix_argv_append_nosize(&app->argv, "-n"); pmix_argv_append_nosize(&app->argv, "2"); pmix_setenv("PMIX_ENV_VALUE", "3", true, &app->env); - PMIX_INFO_CREATE(app->info, 2); - (void)strncpy(app->info[0].key, "DARTH", PMIX_MAX_KEYLEN); - app->info[0].value.type = PMIX_INT8; - app->info[0].value.data.int8 = 12; - (void)strncpy(app->info[1].key, "VADER", PMIX_MAX_KEYLEN); - app->info[1].value.type = PMIX_DOUBLE; - app->info[1].value.data.dval = 12.34; pmix_output(0, "Client ns %s rank %d: calling PMIx_Spawn", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { @@ -106,25 +100,18 @@ int main(int argc, char **argv) } PMIX_APP_FREE(app, 1); - /* check to see if we got the expected info back */ - if (0 != strncmp(nsp2, "DYNSPACE", PMIX_MAX_NSLEN)) { - pmix_output(0, "Client ns %s rank %d: PMIx_Spawn returned incorrect nspace: %s", myproc.nspace, myproc.rank, nsp2); - goto done; - } else { - pmix_output(0, "Client ns %s rank %d: PMIx_Spawn succeeded returning nspace: %s", myproc.nspace, myproc.rank, nsp2); - } - /* get their universe size */ + /* get their job size */ (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; val = NULL; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) || + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val)) || NULL == val) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + pmix_output(0, "Client ns %s rank %d: PMIx_Get job %s size failed: %d", myproc.nspace, myproc.rank, nsp2, rc); goto done; } ntmp = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe %s size %d", myproc.nspace, myproc.rank, nsp2, (int)ntmp); + pmix_output(0, "Client %s:%d job %s size %d", myproc.nspace, myproc.rank, nsp2, (int)ntmp); } /* just cycle the connect/disconnect functions */ diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpft.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpft.c index 57a6bfc8c6b..a6acf5f89ca 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpft.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpft.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -83,16 +83,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); completed = false; /* register our errhandler */ diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpjctrl.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpjctrl.c new file mode 100644 index 00000000000..037f7eae383 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpjctrl.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +#include +#include "simptest.h" + +static pmix_proc_t myproc; + +/* this is the event notification function we pass down below + * when registering for general events - i.e.,, the default + * handler. We don't technically need to register one, but it + * is usually good practice to catch any events that occur */ +static void notification_fn(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + if (NULL != cbfunc) { + cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); + } +} + +/* event handler registration is done asynchronously because it + * may involve the PMIx server registering with the host RM for + * external events. So we provide a callback function that returns + * the status of the request (success or an error), plus a numerical index + * to the registered event. The index is used later on to deregister + * an event handler - if we don't explicitly deregister it, then the + * PMIx server will do so when it see us exit */ +static void evhandler_reg_callbk(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + mylock_t *lk = (mylock_t*)cbdata; + + if (PMIX_SUCCESS != status) { + fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", + myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref); + } + lk->status = status; + DEBUG_WAKEUP_THREAD(lk); +} + +static void infocbfunc(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + mylock_t *lk = (mylock_t*)cbdata; + + fprintf(stderr, "Callback recvd with status %d\n", status); + + /* release the caller */ + if (NULL != release_fn) { + release_fn(release_cbdata); + } + + lk->status = status; + DEBUG_WAKEUP_THREAD(lk); +} + +int main(int argc, char **argv) +{ + int rc; + pmix_value_t value; + pmix_value_t *val = &value; + pmix_proc_t proc; + uint32_t nprocs, n; + pmix_info_t *info, *iptr; + bool flag; + mylock_t mylock; + pmix_data_array_t *dptr; + + /* init us - note that the call to "init" includes the return of + * any job-related info provided by the RM. */ + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); + exit(0); + } + fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); + + + /* register our default event handler - again, this isn't strictly + * required, but is generally good practice */ + DEBUG_CONSTRUCT_LOCK(&mylock); + PMIx_Register_event_handler(NULL, 0, NULL, 0, + notification_fn, evhandler_reg_callbk, (void*)&mylock); + DEBUG_WAIT_THREAD(&mylock); + if (0 != mylock.status) { + fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); + exit(mylock.status); + } + DEBUG_DESTRUCT_LOCK(&mylock); + + /* job-related info is found in our nspace, assigned to the + * wildcard rank as it doesn't relate to a specific rank. Setup + * a name to retrieve such values */ + PMIX_PROC_CONSTRUCT(&proc); + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + + /* get our job size */ + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %s\n", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + nprocs = val->data.uint32; + PMIX_VALUE_RELEASE(val); + fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs); + + /* inform the RM that we are preemptible, and that our checkpoint methods are + * "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */ + PMIX_INFO_CREATE(info, 2); + flag = true; + PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void*)&flag, PMIX_BOOL); + /* can't use "load" to load a pmix_data_array_t */ + (void)strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN); + info[1].value.type = PMIX_DATA_ARRAY; + dptr = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + info[1].value.data.darray = dptr; + dptr->type = PMIX_INFO; + dptr->size = 2; + PMIX_INFO_CREATE(dptr->array, dptr->size); + rc = SIGUSR2; + iptr = (pmix_info_t*)dptr->array; + PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT); + rc = PMIX_JCTRL_CHECKPOINT; + PMIX_INFO_LOAD(&iptr[1], PMIX_JOB_CTRL_CHECKPOINT_EVENT, &rc, PMIX_STATUS); + + /* since this is informational and not a requested operation, the target parameter + * doesn't mean anything and can be ignored */ + DEBUG_CONSTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&mylock))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + DEBUG_WAIT_THREAD(&mylock); + PMIX_INFO_FREE(info, 2); + if (0 != mylock.status) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, mylock.status); + exit(mylock.status); + } + DEBUG_DESTRUCT_LOCK(&mylock); + + /* now request that this process be monitored using heartbeats */ + PMIX_INFO_CREATE(iptr, 1); + PMIX_INFO_LOAD(&iptr[0], PMIX_MONITOR_HEARTBEAT, NULL, PMIX_POINTER); + + PMIX_INFO_CREATE(info, 3); + PMIX_INFO_LOAD(&info[0], PMIX_MONITOR_ID, "MONITOR1", PMIX_STRING); + n = 5; // require a heartbeat every 5 seconds + PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_HEARTBEAT_TIME, &n, PMIX_UINT32); + n = 2; // two heartbeats can be missed before declaring us "stalled" + PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32); + + /* make the request */ + DEBUG_CONSTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT, + info, 3, infocbfunc, (void*)&mylock))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + DEBUG_WAIT_THREAD(&mylock); + PMIX_INFO_FREE(iptr, 1); + PMIX_INFO_FREE(info, 3); + if (0 != mylock.status) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, mylock.status); + exit(mylock.status); + } + DEBUG_DESTRUCT_LOCK(&mylock); + + /* send a heartbeat */ + PMIx_Heartbeat(); + + /* call fence to synchronize with our peers - no need to + * collect any info as we didn't "put" anything */ + PMIX_INFO_CREATE(info, 1); + flag = false; + PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); + if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + PMIX_INFO_FREE(info, 1); + + + done: + /* finalize us */ + fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); + if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { + fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); + } else { + fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); + } + fflush(stderr); + return(0); +} diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simplegacy.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simplegacy.c index 1b15366cd4a..98f40a15dd4 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simplegacy.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simplegacy.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -24,230 +24,90 @@ */ #include -#include +#include #include #include #include #include -#include "src/class/pmix_object.h" -#include "src/util/output.h" -#include "src/util/printf.h" - #define MAXCNT 3 -static volatile bool completed = false; -static pmix_proc_t myproc; - -static void notification_fn(size_t evhdlr_registration_id, - pmix_status_t status, - const pmix_proc_t *source, - pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, - void *cbdata) -{ - pmix_output(0, "Client %s:%d NOTIFIED with status %s", myproc.nspace, myproc.rank, PMIx_Error_string(status)); - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } - completed = true; -} - -static void errhandler_reg_callbk(pmix_status_t status, - size_t errhandler_ref, - void *cbdata) -{ - volatile bool *active = (volatile bool*)cbdata; - - pmix_output(0, "Client: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu", - status, (unsigned long)errhandler_ref); - *active = false; -} - -/* this is an event notification function that we explicitly request - * be called when the PMIX_MODEL_DECLARED notification is issued. - * We could catch it in the general event notification function and test - * the status to see if the status matched, but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know whenever a model is declared as a means - * of testing server self-notification */ -static void model_callback(size_t evhdlr_registration_id, - pmix_status_t status, - const pmix_proc_t *source, - pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, - void *cbdata) -{ - size_t n; - - /* just let us know it was received */ - fprintf(stderr, "%s:%d Model event handler called with status %d(%s)\n", - myproc.nspace, myproc.rank, status, PMIx_Error_string(status)); - for (n=0; n < ninfo; n++) { - if (PMIX_STRING == info[n].value.type) { - fprintf(stderr, "%s:%d\t%s:\t%s\n", - myproc.nspace, myproc.rank, - info[n].key, info[n].value.data.string); - } - } - - /* we must NOT tell the event handler state machine that we - * are the last step as that will prevent it from notifying - * anyone else that might be listening for declarations */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } -} - -/* event handler registration is done asynchronously */ -static void model_registration_callback(pmix_status_t status, - size_t evhandler_ref, - void *cbdata) -{ - volatile int *active = (volatile int*)cbdata; - - fprintf(stderr, "simpclient EVENT HANDLER REGISTRATION RETURN STATUS %d, ref=%lu\n", - status, (unsigned long)evhandler_ref); - *active = false; -} - int main(int argc, char **argv) { - int rc; - pmix_value_t value; - pmix_value_t *val = &value; + int rc, j, n; char *tmp; - pmix_proc_t proc; - uint32_t nprocs, n; - int cnt, j; - volatile bool active; - pmix_info_t info, *iptr; - size_t ninfo; - pmix_status_t code; - - /* init us and declare we are a test programming model */ - PMIX_INFO_CREATE(iptr, 2); - PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); - PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + int spawned; + int rank; + int nprocs; + char value[1024]; + + fprintf(stderr, "Client calling init\n"); + if (PMI_SUCCESS != (rc = PMI_Init(&spawned))) { + fprintf(stderr, "Client PMI_Init failed: %d\n", rc); exit(rc); } - PMIX_INFO_FREE(iptr, 2); - pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); + fprintf(stderr, "Client Running\n"); /* test something */ - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + if (PMI_SUCCESS != (rc = PMI_Get_rank(&rank))) { + fprintf(stderr, "Client PMI_Get_rank failed: %d\n", rc); exit(rc); } - nprocs = val->data .uint32; - PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); - - /* register a handler specifically for when models declare */ - active = true; - ninfo = 1; - PMIX_INFO_CREATE(iptr, ninfo); - PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); - code = PMIX_MODEL_DECLARED; - PMIx_Register_event_handler(&code, 1, iptr, ninfo, - model_callback, model_registration_callback, (void*)&active); - while (active) { - usleep(10); - } - PMIX_INFO_FREE(iptr, ninfo); - - /* register our errhandler */ - active = true; - PMIx_Register_event_handler(NULL, 0, NULL, 0, - notification_fn, errhandler_reg_callbk, (void*)&active); - while (active) { - usleep(10); + if (PMI_SUCCESS != (rc = PMI_Get_universe_size(&nprocs))) { + fprintf(stderr, "Client %d: PMI_Get_universe_size failed: %d\n", rank, rc); + exit(rc); } + fprintf(stderr, "Client %d job size %d\n", rank, nprocs); - memset(&info, 0, sizeof(pmix_info_t)); - (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); - info.value.type = PMIX_UNDEF; - info.value.data.flag = 1; - - for (cnt=0; cnt < MAXCNT; cnt++) { - pmix_output(0, "EXECUTING LOOP %d", cnt); - for (j=0; j < 10; j++) { - (void)asprintf(&tmp, "%s-%d-gasnet-%d-%d", myproc.nspace, myproc.rank, cnt, j); - value.type = PMIX_UINT64; - value.data.uint64 = 1234; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, tmp, &value))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Put failed: %s", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); - goto done; - } - free(tmp); - } - - if (PMIX_SUCCESS != (rc = PMIx_Commit())) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Commit failed: %s", - myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); + for (j=0; j < 10; j++) { + (void)asprintf(&tmp, "%d-gasnet-0-%d", rank, j); + if (PMI_SUCCESS != (rc = PMI_KVS_Put("foobar", tmp, "myvalue"))) { + fprintf(stderr, "Client %d: j %d PMI_KVS_Put failed: %d\n", + rank, j, rc); goto done; } + free(tmp); + } - /* call fence to ensure the data is received */ - if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, &info, 1))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Fence failed: %s", - myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); + if (PMIX_SUCCESS != (rc = PMI_KVS_Commit("foobar"))) { + fprintf(stderr, "Client %d: PMI_KVS_Commit failed: %d\n", rank, rc); goto done; - } + } - /* check the returned data */ - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_UNDEF; - for (j=0; j < 10; j++) { - for (n=0; n < nprocs; n++) { - (void)asprintf(&tmp, "%s-%d-gasnet-%d-%d", myproc.nspace, n, cnt, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", - myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); - continue; - } - if (NULL == val) { - pmix_output(0, "Client ns %s rank %d: NULL value returned", - myproc.nspace, myproc.rank); - break; - } - if (PMIX_UINT64 != val->type) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; - } - if (1234 != val->data.uint64) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; - } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); + fprintf(stderr, "Client rank %d: CALLING PMI_Barrier\n", rank); + + /* call fence to ensure the data is received */ + if (PMI_SUCCESS != (rc = PMI_Barrier())) { + fprintf(stderr, "Client %d: PMI_Barrier failed: %d\n", rank, rc); + goto done; + } + + /* check the returned data */ + for (j=0; j < 10; j++) { + for (n=0; n < nprocs; n++) { + (void)asprintf(&tmp, "%d-gasnet-0-%d", n, j); + fprintf(stderr, "Client %d: Calling get\n", rank); + if (PMI_SUCCESS != (rc = PMI_KVS_Get("foobar", tmp, value, 1024))) { + fprintf(stderr, "Client %d: PMI_Get failed: %d\n", rank, rc); + continue; } + if (0 == strcmp(value, "myvalue")) { + fprintf(stderr, "Client %d: PMI_Get returned correct value\n", rank); + } else { + fprintf(stderr, "Client %d: PMI_Get returned incorrect value\n", rank); + } + free(tmp); } } done: /* finalize us */ - pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + fprintf(stderr, "Client rank %d: Finalizing\n", rank); + if (PMI_SUCCESS != (rc = PMI_Finalize())) { + fprintf(stderr, "Client rank %d: finalize failed %d\n", rank, rc); } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); + fprintf(stderr, "Client %d:PMI_Finalize successfully completed\n", rank); } fflush(stderr); return(rc); diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simppub.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simppub.c index 12d6c68735e..2ccf9b258f4 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simppub.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simppub.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -54,16 +54,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simptest.c index 710ecbc135e..b78165f704a 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simptest.c @@ -13,9 +13,9 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -38,7 +38,6 @@ #include #include #include -#include PMIX_EVENT_HEADER #if PMIX_HAVE_HWLOC #include @@ -107,6 +106,18 @@ static void log_fn(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t alloc_fn(const pmix_proc_t *client, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t jctrl_fn(const pmix_proc_t *requestor, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t mon_fn(const pmix_proc_t *requestor, + const pmix_info_t *monitor, pmix_status_t error, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); static pmix_server_module_t mymodule = { .client_connected = connected, @@ -125,7 +136,10 @@ static pmix_server_module_t mymodule = { .notify_event = notify_event, .query = query_fn, .tool_connected = tool_connect_fn, - .log = log_fn + .log = log_fn, + .allocate = alloc_fn, + .job_control = jctrl_fn, + .monitor = mon_fn }; typedef struct { @@ -215,8 +229,6 @@ static void dlcbfunc(int sd, short flags, void *cbdata) { myxfer_t *x = (myxfer_t*)cbdata; - pmix_output(0, "INVENTORY READY FOR DELIVERY"); - PMIx_server_deliver_inventory(x->info, x->ninfo, NULL, 0, opcbfunc, (void*)x); } @@ -230,8 +242,6 @@ static void infocbfunc(pmix_status_t status, myxfer_t *x; size_t n; - pmix_output(0, "INVENTORY RECEIVED"); - /* we don't have any place to send this, so for test * purposes only, let's push it back down for processing. * Note: it must be thread-shifted first as we are in @@ -302,6 +312,17 @@ static void model_registration_callback(pmix_status_t status, DEBUG_WAKEUP_THREAD(lock); } +static void set_handler_default(int sig) +{ + struct sigaction act; + + act.sa_handler = SIG_DFL; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + sigaction(sig, &act, (struct sigaction *)0); +} + int main(int argc, char **argv) { char **client_env=NULL; @@ -324,6 +345,7 @@ int main(int argc, char **argv) #endif mylock_t mylock; pmix_status_t code; + sigset_t unblock; /* smoke test */ if (PMIX_SUCCESS != 0) { @@ -331,8 +353,6 @@ int main(int argc, char **argv) exit(1); } - fprintf(stderr, "Testing version %s\n", PMIx_Get_version()); - /* see if we were passed the number of procs to run or * the executable to use */ for (n=1; n < argc; n++) { @@ -389,6 +409,12 @@ int main(int argc, char **argv) if (NULL == executable) { executable = strdup("./simpclient"); } + /* check for executable existence and permissions */ + if (0 != access(executable, X_OK)) { + fprintf(stderr, "Executable %s not found or missing executable permissions\n", executable); + exit(1); + } + if (cross_version && nprocs < 2) { fprintf(stderr, "Cross-version testing requires at least two clients\n"); exit(1); @@ -401,34 +427,50 @@ int main(int argc, char **argv) } #endif + fprintf(stderr, "Testing version %s\n", PMIx_Get_version()); + + /* ensure that SIGCHLD is unblocked as we need to capture it */ + if (0 != sigemptyset(&unblock)) { + fprintf(stderr, "SIGEMPTYSET FAILED\n"); + exit(1); + } + if (0 != sigaddset(&unblock, SIGCHLD)) { + fprintf(stderr, "SIGADDSET FAILED\n"); + exit(1); + } + if (0 != sigprocmask(SIG_UNBLOCK, &unblock, NULL)) { + fprintf(stderr, "SIG_UNBLOCK FAILED\n"); + exit(1); + } + + /* setup the server library and tell it to support tool connections */ #if PMIX_HAVE_HWLOC if (hwloc) { #if HWLOC_API_VERSION < 0x20000 - ninfo = 4; + ninfo = 3; #else - ninfo = 5; + ninfo = 4; #endif } else { - ninfo = 3; + ninfo = 2; } #else - ninfo = 3; + ninfo = 2; #endif PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_SERVER_TOOL_SUPPORT, NULL, PMIX_BOOL); - PMIX_INFO_LOAD(&info[1], PMIX_USOCK_DISABLE, &usock, PMIX_BOOL); - PMIX_INFO_LOAD(&info[2], PMIX_SERVER_GATEWAY, NULL, PMIX_BOOL); + PMIX_INFO_LOAD(&info[1], PMIX_SERVER_GATEWAY, NULL, PMIX_BOOL); #if PMIX_HAVE_HWLOC if (hwloc) { if (NULL != hwloc_file) { - PMIX_INFO_LOAD(&info[3], PMIX_TOPOLOGY_FILE, hwloc_file, PMIX_STRING); + PMIX_INFO_LOAD(&info[2], PMIX_TOPOLOGY_FILE, hwloc_file, PMIX_STRING); } else { - PMIX_INFO_LOAD(&info[3], PMIX_TOPOLOGY, NULL, PMIX_STRING); + PMIX_INFO_LOAD(&info[2], PMIX_TOPOLOGY, NULL, PMIX_STRING); } #if HWLOC_API_VERSION >= 0x20000 - PMIX_INFO_LOAD(&info[4], PMIX_HWLOC_SHARE_TOPO, NULL, PMIX_BOOL); + PMIX_INFO_LOAD(&info[3], PMIX_HWLOC_SHARE_TOPO, NULL, PMIX_BOOL); #endif } #endif @@ -472,9 +514,9 @@ int main(int argc, char **argv) /* setup to see sigchld on the forked tests */ PMIX_CONSTRUCT(&children, pmix_list_t); - event_assign(&handler, pmix_globals.evbase, SIGCHLD, - EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); - event_add(&handler, NULL); + pmix_event_assign(&handler, pmix_globals.evbase, SIGCHLD, + EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); + pmix_event_add(&handler, NULL); /* we have a single namespace for all clients */ atmp = NULL; @@ -555,17 +597,24 @@ int main(int argc, char **argv) PMIx_server_finalize(); return -1; } - child = PMIX_NEW(wait_tracker_t); - child->pid = pid; - pmix_list_append(&children, &child->super); - if (pid == 0) { + sigset_t sigs; + set_handler_default(SIGTERM); + set_handler_default(SIGINT); + set_handler_default(SIGHUP); + set_handler_default(SIGPIPE); + set_handler_default(SIGCHLD); + sigprocmask(0, 0, &sigs); + sigprocmask(SIG_UNBLOCK, &sigs, 0); execve(executable, client_argv, client_env); /* Does not return */ exit(0); + } else { + child = PMIX_NEW(wait_tracker_t); + child->pid = pid; + pmix_list_append(&children, &child->super); } } - free(executable); pmix_argv_free(client_argv); pmix_argv_free(client_env); @@ -577,15 +626,21 @@ int main(int argc, char **argv) nanosleep(&ts, NULL); } - /* see if anyone exited with non-zero status */ - n=0; - PMIX_LIST_FOREACH(child, &children, wait_tracker_t) { - if (0 != child->exit_code) { - fprintf(stderr, "Child %d exited with status %d - test FAILED\n", n, child->exit_code); - goto done; - } - ++n; + /* see if anyone exited with non-zero status unless the test + * was expected to do so */ + if (NULL == strstr(executable, "simpdie")) { + n=0; + PMIX_LIST_FOREACH(child, &children, wait_tracker_t) { + if (0 != child->exit_code) { + fprintf(stderr, "Child %d [%d] exited with status %d - test FAILED\n", n, child->pid, child->exit_code); + } + ++n; + } + } else if (1 == exit_code) { + exit_code = 0; } + free(executable); + /* try notifying ourselves */ ninfo = 3; PMIX_INFO_CREATE(info, ninfo); @@ -601,6 +656,24 @@ int main(int argc, char **argv) DEBUG_DESTRUCT_LOCK(&globallock); PMIX_INFO_FREE(info, ninfo); +#if 0 + fprintf(stderr, "TEST NONDEFAULT NOTIFICATION\n"); + /* verify that notifications don't recirculate */ + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + /* mark that it is not to go to any default handlers */ + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); + PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE, + &pmix_globals.myid, PMIX_RANGE_LOCAL, + info, ninfo, NULL, NULL); + PMIX_INFO_FREE(info, ninfo); + /* wait a little in case we get notified */ + for (ninfo=0; ninfo < 100000; ninfo++) { + struct timespec t = {0, 100}; + nanosleep(&t, NULL); + } +#endif + done: /* deregister the event handlers */ PMIx_Deregister_event_handler(0, NULL, NULL); @@ -630,42 +703,132 @@ static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, myxfer_t *x) { char *regex, *ppn; - char hostname[PMIX_MAXHOSTNAMELEN]; + int n, m, k; + pmix_info_t *info; + pmix_data_array_t *array; - gethostname(hostname, sizeof(hostname)); - x->ninfo = 7; + x->ninfo = 16 + nprocs; PMIX_INFO_CREATE(x->info, x->ninfo); - (void)strncpy(x->info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); - x->info[0].value.type = PMIX_UINT32; - x->info[0].value.data.uint32 = nprocs; - - (void)strncpy(x->info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); - x->info[1].value.type = PMIX_UINT32; - x->info[1].value.data.uint32 = 0; - - (void)strncpy(x->info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); - x->info[2].value.type = PMIX_UINT32; - x->info[2].value.data.uint32 = nprocs; - - (void)strncpy(x->info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); - x->info[3].value.type = PMIX_STRING; - x->info[3].value.data.string = strdup(ranks); - - PMIx_generate_regex(hostname, ®ex); - (void)strncpy(x->info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); - x->info[4].value.type = PMIX_STRING; - x->info[4].value.data.string = regex; - - PMIx_generate_ppn(ranks, &ppn); - (void)strncpy(x->info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); - x->info[5].value.type = PMIX_STRING; - x->info[5].value.data.string = ppn; - - (void)strncpy(x->info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); - x->info[6].value.type = PMIX_UINT32; - x->info[6].value.data.uint32 = nprocs; - + n = 0; + + PMIx_generate_regex("test000,test001,test002", ®ex); + PMIx_generate_ppn("0;1;2", &ppn); + + (void)strncpy(x->info[n].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = regex; + ++n; + + /* if we have some empty nodes, then fill their spots */ + (void)strncpy(x->info[n].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = ppn; + ++n; + + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup(ranks); + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOBID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup("1234"); + ++n; + + (void)strncpy(x->info[n].key, PMIX_NPROC_OFFSET, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODE_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NUM_NODES, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_MAX_PROCS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_NUM_APPS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCALLDR, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_PROC_RANK; + x->info[n].value.data.uint32 = 0; + ++n; + + /* add the proc-specific data */ + for (m=0; m < nprocs; m++) { + (void)strncpy(x->info[n].key, PMIX_PROC_DATA, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(array, 5, PMIX_INFO); + x->info[n].value.data.darray = array; + info = (pmix_info_t*)array->array; + k = 0; + (void)strncpy(info[k].key, PMIX_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_GLOBAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_LOCAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODE_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT32; + info[k].value.data.uint32 = 0; + ++k; + /* move to next proc */ + ++n; + } PMIx_server_register_nspace(nspace, nprocs, x->info, x->ninfo, cbfunc, x); } @@ -679,6 +842,12 @@ static void errhandler(size_t evhdlr_registration_id, void *cbdata) { pmix_output(0, "SERVER: ERRHANDLER CALLED WITH STATUS %d", status); + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } } static void errhandler_reg_callbk (pmix_status_t status, @@ -687,8 +856,6 @@ static void errhandler_reg_callbk (pmix_status_t status, { mylock_t *lock = (mylock_t*)cbdata; - pmix_output(0, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu", - status, (unsigned long)errhandler_ref); lock->status = status; DEBUG_WAKEUP_THREAD(lock); } @@ -696,21 +863,12 @@ static void errhandler_reg_callbk (pmix_status_t status, static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata) { - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } static pmix_status_t finalized(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: FINALIZED %s:%d WAKEUP %d", - proc->nspace, proc->rank, wakeup); - /* ensure we call the cbfunc so the proc can exit! */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } static void abcbfunc(pmix_status_t status, void *cbdata) @@ -768,17 +926,30 @@ static pmix_status_t abort_fn(const pmix_proc_t *proc, return PMIX_SUCCESS; } +static void fencbfn(int sd, short args, void *cbdata) +{ + pmix_shift_caddy_t *scd = (pmix_shift_caddy_t*)cbdata; + /* pass the provided data back to each participating proc */ + if (NULL != scd->cbfunc.modexcbfunc) { + scd->cbfunc.modexcbfunc(scd->status, scd->data, scd->ndata, scd->cbdata, NULL, NULL); + } + PMIX_RELEASE(scd); +} static pmix_status_t fencenb_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, char *data, size_t ndata, pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: FENCENB"); - /* pass the provided data back to each participating proc */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, data, ndata, cbdata, NULL, NULL); - } + pmix_shift_caddy_t *scd; + + scd = PMIX_NEW(pmix_shift_caddy_t); + scd->status = PMIX_SUCCESS; + scd->data = data; + scd->ndata = ndata; + scd->cbfunc.modexcbfunc = cbfunc; + scd->cbdata = cbdata; + PMIX_THREADSHIFT(scd, fencbfn); return PMIX_SUCCESS; } @@ -787,18 +958,19 @@ static pmix_status_t dmodex_fn(const pmix_proc_t *proc, const pmix_info_t info[], size_t ninfo, pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: DMODEX"); + pmix_shift_caddy_t *scd; /* if this is a timeout test, then do nothing */ if (istimeouttest) { return PMIX_SUCCESS; } - /* we don't have any data for remote procs as this - * test only runs one server - so report accordingly */ - if (NULL != cbfunc) { - cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL); - } + scd = PMIX_NEW(pmix_shift_caddy_t); + scd->status = PMIX_ERR_NOT_FOUND; + scd->cbfunc.modexcbfunc = cbfunc; + scd->cbdata = cbdata; + PMIX_THREADSHIFT(scd, fencbfn); + return PMIX_SUCCESS; } @@ -810,8 +982,6 @@ static pmix_status_t publish_fn(const pmix_proc_t *proc, pmix_locdat_t *p; size_t n; - pmix_output(0, "SERVER: PUBLISH"); - for (n=0; n < ninfo; n++) { p = PMIX_NEW(pmix_locdat_t); (void)strncpy(p->pdata.proc.nspace, proc->nspace, PMIX_MAX_NSLEN); @@ -820,12 +990,26 @@ static pmix_status_t publish_fn(const pmix_proc_t *proc, pmix_value_xfer(&p->pdata.value, (pmix_value_t*)&info[n].value); pmix_list_append(&pubdata, &p->super); } - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - return PMIX_SUCCESS; + + return PMIX_OPERATION_SUCCEEDED; } +typedef struct { + pmix_event_t ev; + pmix_pdata_t *pd; + size_t n; + pmix_lookup_cbfunc_t cbfunc; + void *cbdata; +} lkobj_t; + +static void lkcbfn(int sd, short args, void *cbdata) +{ + lkobj_t *lk = (lkobj_t*)cbdata; + + lk->cbfunc(PMIX_SUCCESS, lk->pd, lk->n, lk->cbdata); + PMIX_PDATA_FREE(lk->pd, lk->n); + free(lk); +} static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, const pmix_info_t info[], size_t ninfo, @@ -836,8 +1020,7 @@ static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, size_t i, n; pmix_pdata_t *pd = NULL; pmix_status_t ret = PMIX_ERR_NOT_FOUND; - - pmix_output(0, "SERVER: LOOKUP"); + lkobj_t *lk; PMIX_CONSTRUCT(&results, pmix_list_t); @@ -868,13 +1051,16 @@ static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, } } PMIX_LIST_DESTRUCT(&results); - if (NULL != cbfunc) { - cbfunc(ret, pd, n, cbdata); - } - if (0 < n) { - PMIX_PDATA_FREE(pd, n); + if (PMIX_SUCCESS == ret) { + lk = (lkobj_t*)malloc(sizeof(lkobj_t)); + lk->pd = pd; + lk->n = n; + lk->cbfunc = cbfunc; + lk->cbdata = cbdata; + PMIX_THREADSHIFT(lk, lkcbfn); } - return PMIX_SUCCESS; + + return ret; } @@ -885,8 +1071,6 @@ static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys, pmix_locdat_t *p, *p2; size_t n; - pmix_output(0, "SERVER: UNPUBLISH"); - for (n=0; NULL != keys[n]; n++) { PMIX_LIST_FOREACH_SAFE(p, p2, &pubdata, pmix_locdat_t) { if (0 == strncmp(keys[n], p->pdata.key, PMIX_MAX_KEYLEN)) { @@ -896,10 +1080,7 @@ static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys, } } } - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } static void spcbfunc(pmix_status_t status, void *cbdata) @@ -921,8 +1102,6 @@ static pmix_status_t spawn_fn(const pmix_proc_t *proc, pmix_proc_t *pptr; bool spawned; - pmix_output(0, "SERVER: SPAWN"); - /* check the job info for parent and spawned keys */ for (n=0; n < ninfo; n++) { if (0 == strncmp(job_info[n].key, PMIX_PARENT_ID, PMIX_MAX_KEYLEN)) { @@ -956,18 +1135,12 @@ static pmix_status_t connect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: CONNECT"); - /* in practice, we would pass this request to the local * resource manager for handling */ numconnects++; - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } @@ -975,32 +1148,20 @@ static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: DISCONNECT"); - - /* in practice, we would pass this request to the local - * resource manager for handling */ - - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } static pmix_status_t register_event_fn(pmix_status_t *codes, size_t ncodes, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } static pmix_status_t deregister_events(pmix_status_t *codes, size_t ncodes, pmix_op_cbfunc_t cbfunc, void *cbdata) { - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } static pmix_status_t notify_event(pmix_status_t code, @@ -1009,14 +1170,25 @@ static pmix_status_t notify_event(pmix_status_t code, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } typedef struct query_data_t { + pmix_event_t ev; pmix_info_t *data; size_t ndata; + pmix_info_cbfunc_t cbfunc; + void *cbdata; } query_data_t; +static void qfn(int sd, short args, void *cbdata) +{ + query_data_t *qd = (query_data_t*)cbdata; + + qd->cbfunc(PMIX_SUCCESS, qd->data, qd->ndata, qd->cbdata, NULL, NULL); + PMIX_INFO_FREE(qd->data, qd->ndata); +} + static pmix_status_t query_fn(pmix_proc_t *proct, pmix_query_t *queries, size_t nqueries, pmix_info_cbfunc_t cbfunc, @@ -1024,8 +1196,7 @@ static pmix_status_t query_fn(pmix_proc_t *proct, { size_t n; pmix_info_t *info; - - pmix_output(0, "SERVER: QUERY"); + query_data_t qd; if (NULL == cbfunc) { return PMIX_ERROR; @@ -1040,7 +1211,11 @@ static pmix_status_t query_fn(pmix_proc_t *proct, return PMIX_ERROR; } } - cbfunc(PMIX_SUCCESS, info, nqueries, cbdata, NULL, NULL); + qd.data = info; + qd.ndata = nqueries; + qd.cbfunc = cbfunc; + qd.cbdata = cbdata; + PMIX_THREADSHIFT(&qd, qfn); return PMIX_SUCCESS; } @@ -1050,8 +1225,6 @@ static void tool_connect_fn(pmix_info_t *info, size_t ninfo, { pmix_proc_t proc; - pmix_output(0, "SERVER: TOOL CONNECT"); - /* just pass back an arbitrary nspace */ (void)strncpy(proc.nspace, "TOOL", PMIX_MAX_NSLEN); proc.rank = 0; @@ -1061,18 +1234,54 @@ static void tool_connect_fn(pmix_info_t *info, size_t ninfo, } } +typedef struct { + pmix_event_t ev; + pmix_op_cbfunc_t cbfunc; + void *cbdata; +} mylog_t; + +static void foobar(int sd, short args, void *cbdata) +{ + mylog_t *lg = (mylog_t*)cbdata; + lg->cbfunc(PMIX_SUCCESS, lg->cbdata); +} static void log_fn(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: LOG"); + mylog_t *lg = (mylog_t *)malloc(sizeof(mylog_t)); - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } + lg->cbfunc = cbfunc; + lg->cbdata = cbdata; + PMIX_THREADSHIFT(lg, foobar); +} + +static pmix_status_t alloc_fn(const pmix_proc_t *client, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_OPERATION_SUCCEEDED; } +static pmix_status_t jctrl_fn(const pmix_proc_t *requestor, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_OPERATION_SUCCEEDED; +} + +static pmix_status_t mon_fn(const pmix_proc_t *requestor, + const pmix_info_t *monitor, pmix_status_t error, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_ERR_NOT_SUPPORTED; +} + + static void wait_signal_callback(int fd, short event, void *arg) { pmix_event_t *sig = (pmix_event_t*) arg; @@ -1080,7 +1289,7 @@ static void wait_signal_callback(int fd, short event, void *arg) pid_t pid; wait_tracker_t *t2; - if (SIGCHLD != event_get_signal(sig)) { + if (SIGCHLD != pmix_event_get_signal(sig)) { return; } @@ -1101,14 +1310,21 @@ static void wait_signal_callback(int fd, short event, void *arg) /* we are already in an event, so it is safe to access the list */ PMIX_LIST_FOREACH(t2, &children, wait_tracker_t) { if (pid == t2->pid) { - t2->exit_code = status; /* found it! */ - if (0 != status && 0 == exit_code) { - exit_code = status; + if (WIFEXITED(status)) { + t2->exit_code = WEXITSTATUS(status); + } else { + if (WIFSIGNALED(status)) { + t2->exit_code = WTERMSIG(status) + 128; + } + } + if (0 != t2->exit_code && 0 == exit_code) { + exit_code = t2->exit_code; } --wakeup; break; } } } + fprintf(stderr, "ENDLOOP\n"); } diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simptimeout.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simptimeout.c index f5454029887..10835d68abf 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simptimeout.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simptimeout.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -103,17 +103,17 @@ int main(int argc, char **argv) usleep(10); } - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* if we are rank=0, then do a fence with timeout */ if (0 == myproc.rank) { diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simptool.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simptool.c index 2af6f395ede..9e96d21ee7d 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simptool.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simptool.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -95,6 +95,7 @@ int main(int argc, char **argv) PMIX_QUERY_CREATE(query, nq); pmix_argv_append_nosize(&query[0].keys, "foobar"); pmix_argv_append_nosize(&query[1].keys, "spastic"); + pmix_argv_append_nosize(&query[1].keys, PMIX_SERVER_URI); active = true; if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&active))) { pmix_output(0, "Client ns %s rank %d: PMIx_Query_info failed: %d", myproc.nspace, myproc.rank, rc); diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/stability.c b/opal/mca/pmix/pmix3x/pmix/test/simple/stability.c index d67b6f84109..c97df8c74fe 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/stability.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/stability.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -183,6 +183,8 @@ static pmix_list_t pubdata; static pmix_event_t handler; static pmix_list_t children; static bool istimeouttest = false; +static bool nettest = false; +static bool arrays = false; static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, myxfer_t *x); @@ -209,6 +211,32 @@ static void opcbfunc(pmix_status_t status, void *cbdata) DEBUG_WAKEUP_THREAD(&x->lock); } +static void setup_cbfunc(pmix_status_t status, + pmix_info_t info[], size_t ninfo, + void *provided_cbdata, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + myxfer_t *x = (myxfer_t*)provided_cbdata; + size_t n; + + /* transfer it to the caddy for return to the main thread */ + if (0 < ninfo) { + PMIX_INFO_CREATE(x->info, ninfo); + x->ninfo = ninfo; + for (n=0; n < ninfo; n++) { + PMIX_INFO_XFER(&x->info[n], &info[n]); + } + } + + /* let the library release the data and cleanup from + * the operation */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } + + DEBUG_WAKEUP_THREAD(&x->lock); +} + int main(int argc, char **argv) { char **client_env=NULL; @@ -225,6 +253,11 @@ int main(int argc, char **argv) size_t ninfo; mylock_t mylock; int ncycles=1, m, delay=0; + bool hwloc = false; +#if PMIX_HAVE_HWLOC + char *hwloc_file = NULL; +#endif + sigset_t unblock; /* smoke test */ if (PMIX_SUCCESS != 0) { @@ -260,30 +293,115 @@ int main(int argc, char **argv) 0 == strcmp("--sleep", argv[n])) && NULL != argv[n+1]) { delay = strtol(argv[n+1], NULL, 10); +#if PMIX_HAVE_HWLOC + } else if (0 == strcmp("-hwloc", argv[n]) || + 0 == strcmp("--hwloc", argv[n])) { + /* test hwloc support */ + hwloc = true; + } else if (0 == strcmp("-hwloc-file", argv[n]) || + 0 == strcmp("--hwloc-file", argv[n])) { + if (NULL == argv[n+1]) { + fprintf(stderr, "The --hwloc-file option requires an argument\n"); + exit(1); + } + hwloc_file = strdup(argv[n+1]); + hwloc = true; + ++n; +#endif } else if (0 == strcmp("-h", argv[n])) { /* print the options and exit */ fprintf(stderr, "usage: simptest \n"); fprintf(stderr, " -n N Number of clients to run\n"); fprintf(stderr, " -e foo Name of the client executable to run (default: simpclient\n"); fprintf(stderr, " -reps N Cycle for N repetitions"); + fprintf(stderr, " -hwloc Test hwloc support\n"); + fprintf(stderr, " -hwloc-file FILE Use file to import topology\n"); + fprintf(stderr, " -net-test Test network endpt assignments\n"); + fprintf(stderr, " -arrays Use the job session array to pass registration info\n"); exit(0); + } else if (0 == strcmp("-net-test", argv[n]) || + 0 == strcmp("--net-test", argv[n])) { + /* test network support */ + nettest = true; + } else if (0 == strcmp("-arrays", argv[n]) || + 0 == strcmp("--arrays", argv[n])) { + /* test network support */ + arrays = true; } } if (NULL == executable) { - executable = strdup("./quietclient"); + if (nettest) { + executable = strdup("./simpcoord"); + } else { + executable = strdup("./quietclient"); + } + } + /* check for executable existence and permissions */ + if (0 != access(executable, X_OK)) { + fprintf(stderr, "Executable %s not found or missing executable permissions\n", executable); + exit(1); + } + + /* ensure that SIGCHLD is unblocked as we need to capture it */ + if (0 != sigemptyset(&unblock)) { + fprintf(stderr, "SIGEMPTYSET FAILED\n"); + exit(1); + } + if (0 != sigaddset(&unblock, SIGCHLD)) { + fprintf(stderr, "SIGADDSET FAILED\n"); + exit(1); + } + if (0 != sigprocmask(SIG_UNBLOCK, &unblock, NULL)) { + fprintf(stderr, "SIG_UNBLOCK FAILED\n"); + exit(1); } + /* setup the server library and tell it to support tool connections */ +#if PMIX_HAVE_HWLOC + if (hwloc) { +#if HWLOC_API_VERSION < 0x20000 + ninfo = 4; +#else + ninfo = 5; +#endif + } else { + ninfo = 4; + } +#else ninfo = 3; +#endif PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_SERVER_TOOL_SUPPORT, NULL, PMIX_BOOL); PMIX_INFO_LOAD(&info[1], PMIX_USOCK_DISABLE, NULL, PMIX_BOOL); PMIX_INFO_LOAD(&info[2], PMIX_SERVER_GATEWAY, NULL, PMIX_BOOL); +#if PMIX_HAVE_HWLOC + if (hwloc) { + if (NULL != hwloc_file) { + PMIX_INFO_LOAD(&info[3], PMIX_TOPOLOGY_FILE, hwloc_file, PMIX_STRING); + } else { + PMIX_INFO_LOAD(&info[3], PMIX_TOPOLOGY, NULL, PMIX_STRING); + } +#if HWLOC_API_VERSION >= 0x20000 + PMIX_INFO_LOAD(&info[4], PMIX_HWLOC_SHARE_TOPO, NULL, PMIX_BOOL); +#endif + } +#endif + if (nettest) { + /* set a known network configuration for the pnet/test component */ + putenv("PMIX_MCA_pnet_test_nverts=nodes:5;plane:d:3;plane:s:2;plane:d:5"); + putenv("PMIX_MCA_pnet=test"); + } + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, ninfo))) { fprintf(stderr, "Init failed with error %d\n", rc); return rc; } PMIX_INFO_FREE(info, ninfo); + if (nettest) { + unsetenv("PMIX_MCA_pnet"); + unsetenv("PMIX_MCA_pnet_test_nverts"); + } /* register the default errhandler */ DEBUG_CONSTRUCT_LOCK(&mylock); @@ -304,9 +422,9 @@ int main(int argc, char **argv) /* setup to see sigchld on the forked tests */ PMIX_CONSTRUCT(&children, pmix_list_t); - event_assign(&handler, pmix_globals.evbase, SIGCHLD, - EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); - event_add(&handler, NULL); + pmix_event_assign(&handler, pmix_globals.evbase, SIGCHLD, + EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); + pmix_event_add(&handler, NULL); for (m=0; m < ncycles; m++) { fprintf(stderr, "Running cycle %d\n", m); @@ -445,43 +563,186 @@ int main(int argc, char **argv) static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, myxfer_t *x) { - char *regex, *ppn; - char hostname[PMIX_MAXHOSTNAMELEN]; + char *regex, *ppn, *rks; + int n, m, k; + pmix_data_array_t *array; + pmix_info_t *info, *iptr, *ip; + myxfer_t cd, lock; + pmix_status_t rc; - gethostname(hostname, sizeof(hostname)); - x->ninfo = 7; + if (arrays) { + x->ninfo = 15 + nprocs; + } else { + x->ninfo = 16 + nprocs; + } PMIX_INFO_CREATE(x->info, x->ninfo); - (void)strncpy(x->info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); - x->info[0].value.type = PMIX_UINT32; - x->info[0].value.data.uint32 = nprocs; - - (void)strncpy(x->info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); - x->info[1].value.type = PMIX_UINT32; - x->info[1].value.data.uint32 = 0; - - (void)strncpy(x->info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); - x->info[2].value.type = PMIX_UINT32; - x->info[2].value.data.uint32 = nprocs; - - (void)strncpy(x->info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); - x->info[3].value.type = PMIX_STRING; - x->info[3].value.data.string = strdup(ranks); - - PMIx_generate_regex(hostname, ®ex); - (void)strncpy(x->info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); - x->info[4].value.type = PMIX_STRING; - x->info[4].value.data.string = regex; - - PMIx_generate_ppn(ranks, &ppn); - (void)strncpy(x->info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); - x->info[5].value.type = PMIX_STRING; - x->info[5].value.data.string = ppn; + n = 0; + + PMIx_generate_regex("test000,test001,test002", ®ex); + PMIx_generate_ppn("0;1;2", &ppn); + + if (arrays) { + (void)strncpy(x->info[n].key, PMIX_JOB_INFO_ARRAY, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(x->info[n].value.data.darray, 2, PMIX_INFO); + iptr = (pmix_info_t*)x->info[n].value.data.darray->array; + (void)strncpy(iptr[0].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + iptr[0].value.type = PMIX_STRING; + iptr[0].value.data.string = regex; + (void)strncpy(iptr[1].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + iptr[1].value.type = PMIX_STRING; + iptr[1].value.data.string = ppn; + ++n; + } else { + (void)strncpy(x->info[n].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = regex; + ++n; + + /* if we have some empty nodes, then fill their spots */ + (void)strncpy(x->info[n].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = ppn; + ++n; + } - (void)strncpy(x->info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); - x->info[6].value.type = PMIX_UINT32; - x->info[6].value.data.uint32 = nprocs; + /* we have the required info to run setup_app, so do that now */ + PMIX_INFO_CREATE(iptr, 4); + PMIX_INFO_XFER(&iptr[0], &x->info[0]); + PMIX_INFO_XFER(&iptr[1], &x->info[1]); + PMIX_INFO_LOAD(&iptr[2], PMIX_SETUP_APP_ENVARS, NULL, PMIX_BOOL); + PMIX_LOAD_KEY(iptr[3].key, PMIX_ALLOC_NETWORK); + iptr[3].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(iptr[3].value.data.darray, 2, PMIX_INFO); + ip = (pmix_info_t*)iptr[3].value.data.darray->array; + asprintf(&rks, "%s.net", nspace); + PMIX_INFO_LOAD(&ip[0], PMIX_ALLOC_NETWORK_ID, rks, PMIX_STRING); + free(rks); + PMIX_INFO_LOAD(&ip[1], PMIX_ALLOC_NETWORK_SEC_KEY, NULL, PMIX_BOOL); + PMIX_CONSTRUCT(&cd, myxfer_t); + if (PMIX_SUCCESS != (rc = PMIx_server_setup_application(nspace, iptr, 4, + setup_cbfunc, &cd))) { + pmix_output(0, "[%s:%d] PMIx_server_setup_application failed: %s", __FILE__, __LINE__, PMIx_Error_string(rc)); + DEBUG_DESTRUCT_LOCK(&cd.lock); + } else { + DEBUG_WAIT_THREAD(&cd.lock); + } + /* use the results to setup the local subsystems */ + PMIX_CONSTRUCT(&lock, myxfer_t); + if (PMIX_SUCCESS != (rc = PMIx_server_setup_local_support(nspace, cd.info, cd.ninfo, + opcbfunc, &lock))) { + pmix_output(0, "[%s:%d] PMIx_server_setup_local_support failed: %s", __FILE__, __LINE__, PMIx_Error_string(rc)); + } else { + DEBUG_WAIT_THREAD(&lock.lock); + } + PMIX_DESTRUCT(&lock); + PMIX_DESTRUCT(&cd); + + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup(ranks); + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOBID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup("1234"); + ++n; + + (void)strncpy(x->info[n].key, PMIX_NPROC_OFFSET, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODE_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NUM_NODES, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_MAX_PROCS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_NUM_APPS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCALLDR, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_PROC_RANK; + x->info[n].value.data.uint32 = 0; + ++n; + + /* add the proc-specific data */ + for (m=0; m < nprocs; m++) { + (void)strncpy(x->info[n].key, PMIX_PROC_DATA, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(array, 5, PMIX_INFO); + x->info[n].value.data.darray = array; + info = (pmix_info_t*)array->array; + k = 0; + (void)strncpy(info[k].key, PMIX_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_GLOBAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_LOCAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODE_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT32; + info[k].value.data.uint32 = 0; + ++k; + /* move to next proc */ + ++n; + } PMIx_server_register_nspace(nspace, nprocs, x->info, x->ninfo, cbfunc, x); } @@ -852,7 +1113,7 @@ static void wait_signal_callback(int fd, short event, void *arg) pid_t pid; wait_tracker_t *t2; - if (SIGCHLD != event_get_signal(sig)) { + if (SIGCHLD != pmix_event_get_signal(sig)) { return; } diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_common.c b/opal/mca/pmix/pmix3x/pmix/test/test_common.c index 9021e58ba21..7b9ac8701d1 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_common.c +++ b/opal/mca/pmix/pmix3x/pmix/test/test_common.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. - * Copyright (c) 2015-2017 Mellanox Technologies, Inc. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -87,6 +87,11 @@ void parse_cmd(int argc, char **argv, test_params *params) if (NULL != argv[i]) { params->binary = strdup(argv[i]); } + } else if (0 == strcmp(argv[i], "--nservers") || 0 == strcmp(argv[i], "-s")){ + i++; + if (NULL != argv[i]) { + params->nservers = atoi(argv[i]); + } } else if( 0 == strcmp(argv[i], "--verbose") || 0 == strcmp(argv[i],"-v") ){ TEST_VERBOSE_ON(); params->verbose = 1; @@ -600,7 +605,6 @@ int get_total_ns_number(test_params params) int get_all_ranks_from_namespace(test_params params, char *nspace, pmix_proc_t **ranks, size_t *nranks) { - int base_rank = 0; size_t num_ranks = 0; int num = -1; size_t j; @@ -616,7 +620,6 @@ int get_all_ranks_from_namespace(test_params params, char *nspace, pmix_proc_t * char *pch = tmp; int ns_id = (int)strtol(nspace + strlen(TEST_NAMESPACE) + 1, NULL, 10); while (NULL != pch && num != ns_id) { - base_rank += num_ranks; pch = strtok((-1 == num ) ? tmp : NULL, ":"); if (NULL == pch) { break; @@ -629,7 +632,7 @@ int get_all_ranks_from_namespace(test_params params, char *nspace, pmix_proc_t * PMIX_PROC_CREATE(*ranks, num_ranks); for (j = 0; j < num_ranks; j++) { (void)strncpy((*ranks)[j].nspace, nspace, PMIX_MAX_NSLEN); - (*ranks)[j].rank = base_rank+j; + (*ranks)[j].rank = j; } } else { free(tmp); diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_common.h b/opal/mca/pmix/pmix3x/pmix/test/test_common.h index acc49d1bcef..490f68323be 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_common.h +++ b/opal/mca/pmix/pmix3x/pmix/test/test_common.h @@ -1,10 +1,10 @@ /* - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015-2017 Mellanox Technologies, Inc. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -51,7 +51,7 @@ extern FILE *file; #define STRIPPED_FILE_NAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) #define TEST_OUTPUT(x) { \ - fprintf(file,"%s:%s: %s\n",STRIPPED_FILE_NAME, __func__, \ + fprintf(file,"==%d== %s:%s: %s\n", getpid(), STRIPPED_FILE_NAME, __func__, \ pmix_test_output_prepare x ); \ fflush(file); \ } @@ -59,13 +59,13 @@ extern FILE *file; // Write output without adding anything to it. // Need for automate tests to receive "OK" string #define TEST_OUTPUT_CLEAR(x) { \ - fprintf(file, "%s", pmix_test_output_prepare x ); \ + fprintf(file, "==%d== %s", getpid(), pmix_test_output_prepare x ); \ fflush(file); \ } // Always write errors to the stderr #define TEST_ERROR(x) { \ - fprintf(stderr,"ERROR [%s:%d:%s]: %s\n", STRIPPED_FILE_NAME, __LINE__, __func__, \ + fprintf(stderr,"==%d== ERROR [%s:%d:%s]: %s\n", getpid(), STRIPPED_FILE_NAME, __LINE__, __func__, \ pmix_test_output_prepare x ); \ fflush(stderr); \ } @@ -129,6 +129,8 @@ typedef struct { char *key_replace; int test_internal; char *gds_mode; + int nservers; + uint32_t lsize; } test_params; #define INIT_TEST_PARAMS(params) do { \ @@ -160,6 +162,8 @@ typedef struct { params.key_replace = NULL; \ params.test_internal = 0; \ params.gds_mode = NULL; \ + params.nservers = 1; \ + params.lsize = 0; \ } while (0) #define FREE_TEST_PARAMS(params) do { \ @@ -258,7 +262,7 @@ typedef struct { TEST_VERBOSE(("%s:%d want to get from %s:%d key %s", my_nspace, my_rank, ns, r, key)); \ if (blocking) { \ if (PMIX_SUCCESS != (rc = PMIx_Get(&foobar, key, NULL, 0, &val))) { \ - if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \ + if( !( (rc == PMIX_ERR_NOT_FOUND || rc == PMIX_ERR_PROC_ENTRY_NOT_FOUND) && ok_notfnd ) ){ \ TEST_ERROR(("%s:%d: PMIx_Get failed: %d from %s:%d, key %s", my_nspace, my_rank, rc, ns, r, key)); \ } \ rc = PMIX_ERROR; \ @@ -285,9 +289,9 @@ typedef struct { } \ if (PMIX_SUCCESS == rc) { \ if( PMIX_SUCCESS != cbdata.status ){ \ - if( !( cbdata.status == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \ - TEST_ERROR(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \ - my_nspace, my_rank, rc, my_nspace, r)); \ + if( !( (cbdata.status == PMIX_ERR_NOT_FOUND || cbdata.status == PMIX_ERR_PROC_ENTRY_NOT_FOUND) && ok_notfnd ) ){ \ + TEST_ERROR(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \ + my_nspace, my_rank, rc, my_nspace, r, key)); \ } \ rc = PMIX_ERROR; \ } else if (NULL == val) { \ diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_error.c b/opal/mca/pmix/pmix3x/pmix/test/test_error.c index f5217f0657c..24a63da4917 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_error.c +++ b/opal/mca/pmix/pmix3x/pmix/test/test_error.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,7 +38,7 @@ static void timeout_errhandler(size_t evhdlr_registration_id, void *cbdata) { TEST_ERROR(("timeout errhandler called for error status = %d ninfo = %d", - status, ninfo)); + status, (int)ninfo)); if (NULL != cbfunc) { cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); } @@ -58,7 +58,7 @@ static void errhandler_reg_callbk1 (pmix_status_t status, size_t *ref = (size_t*) cbdata; *ref = errhandler_ref; TEST_VERBOSE(("PMIX client ERRHANDLER REGISTRATION CALLED WITH STATUS %d, ref=%lu", - status, *ref, (unsigned long)errhandler_ref)); + status, (unsigned long)errhandler_ref)); } diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_fence.c b/opal/mca/pmix/pmix3x/pmix/test/test_fence.c index 9ad4cf786df..a33d9618b71 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_fence.c +++ b/opal/mca/pmix/pmix3x/pmix/test/test_fence.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -384,7 +384,7 @@ int test_job_fence(test_params params, char *my_nspace, pmix_rank_t my_rank) if( local ){ GET(int, (12340+j), my_nspace, i+params.base_rank, 100, j, 0, 0, 0); if (PMIX_SUCCESS != rc) { - TEST_ERROR(("%s:%d: PMIx_Get failed: %d", my_nspace, my_rank, rc)); + TEST_ERROR(("%s:%d: PMIx_Get failed: %s", my_nspace, my_rank, PMIx_Error_string(rc))); return PMIX_ERROR; } @@ -423,9 +423,10 @@ int test_job_fence(test_params params, char *my_nspace, pmix_rank_t my_rank) my_nspace, my_rank)); return PMIX_ERROR; } - if (PMIX_ERR_NOT_FOUND != rc) { - TEST_ERROR(("%s:%d [ERROR]: PMIx_Get returned %d instead of not_found", - my_nspace, my_rank, rc)); + if (PMIX_ERR_NOT_FOUND != rc && PMIX_ERR_PROC_ENTRY_NOT_FOUND != rc) { + TEST_ERROR(("%s:%d [ERROR]: PMIx_Get returned %s instead of not_found", + my_nspace, my_rank, PMIx_Error_string(rc))); + return PMIX_ERROR; } if (NULL != val) { TEST_ERROR(("%s:%d [ERROR]: PMIx_Get did not return NULL value", my_nspace, my_rank)); diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_server.c b/opal/mca/pmix/pmix3x/pmix/test/test_server.c new file mode 100644 index 00000000000..30d174a9567 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/test_server.c @@ -0,0 +1,989 @@ + /* + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include +#include +#include +#include +#include +#include + +#include "pmix_server.h" +#include "src/include/pmix_globals.h" + +#include "test_server.h" +#include "test_common.h" +#include "cli_stages.h" +#include "server_callbacks.h" + +int my_server_id = 0; + +server_info_t *my_server_info = NULL; +pmix_list_t *server_list = NULL; +pmix_list_t *server_nspace = NULL; + +static void sdes(server_info_t *s) +{ + close(s->rd_fd); + close(s->wr_fd); + if (s->evread) { + event_del(s->evread); + } + s->evread = NULL; +} + +static void scon(server_info_t *s) +{ + s->idx = 0; + s->pid = 0; + s->rd_fd = -1; + s->wr_fd = -1; + s->evread = NULL; + s->modex_cbfunc = NULL; + s->cbdata = NULL; +} + +PMIX_CLASS_INSTANCE(server_info_t, + pmix_list_item_t, + scon, sdes); + +static void nsdes(server_nspace_t *ns) +{ + if (ns->task_map) { + free(ns->task_map); + } +} + +static void nscon(server_nspace_t *ns) +{ + memset(ns->name, 0, PMIX_MAX_NSLEN); + ns->ntasks = 0; + ns->task_map = NULL; +} + +PMIX_CLASS_INSTANCE(server_nspace_t, + pmix_list_item_t, + nscon, nsdes); + +static int server_send_procs(void); +static void server_read_cb(int fd, short event, void *arg); +static int srv_wait_all(double timeout); +static int server_fwd_msg(msg_hdr_t *msg_hdr, char *buf, size_t size); +static int server_send_msg(msg_hdr_t *msg_hdr, char *data, size_t size); +static void remove_server_item(server_info_t *server); +static void server_unpack_dmdx(char *buf, int *sender, pmix_proc_t *proc); +static int server_pack_dmdx(int sender_id, const char *nspace, int rank, + char **buf); +static void _dmdx_cb(int status, char *data, size_t sz, void *cbdata); + +static void release_cb(pmix_status_t status, void *cbdata) +{ + int *ptr = (int*)cbdata; + *ptr = 0; +} + +static void fill_seq_ranks_array(size_t nprocs, int base_rank, char **ranks) +{ + uint32_t i; + int len = 0, max_ranks_len; + if (0 >= nprocs) { + return; + } + max_ranks_len = nprocs * (MAX_DIGIT_LEN+1); + *ranks = (char*) malloc(max_ranks_len); + for (i = 0; i < nprocs; i++) { + len += snprintf(*ranks + len, max_ranks_len-len-1, "%d", i+base_rank); + if (i != nprocs-1) { + len += snprintf(*ranks + len, max_ranks_len-len-1, "%c", ','); + } + } + if (len >= max_ranks_len-1) { + free(*ranks); + *ranks = NULL; + TEST_ERROR(("Not enough allocated space for global ranks array.")); + } +} + +static void set_namespace(int local_size, int univ_size, + int base_rank, char *name) +{ + size_t ninfo; + pmix_info_t *info; + ninfo = 8; + char *regex, *ppn; + char *ranks = NULL; + + PMIX_INFO_CREATE(info, ninfo); + pmix_strncpy(info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + info[0].value.type = PMIX_UINT32; + info[0].value.data.uint32 = univ_size; + + pmix_strncpy(info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); + info[1].value.type = PMIX_UINT32; + info[1].value.data.uint32 = 0; + + pmix_strncpy(info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); + info[2].value.type = PMIX_UINT32; + info[2].value.data.uint32 = local_size; + + /* generate the array of local peers */ + fill_seq_ranks_array(local_size, base_rank, &ranks); + if (NULL == ranks) { + return; + } + pmix_strncpy(info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); + info[3].value.type = PMIX_STRING; + info[3].value.data.string = strdup(ranks); + free(ranks); + + PMIx_generate_regex(NODE_NAME, ®ex); + pmix_strncpy(info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + info[4].value.type = PMIX_STRING; + info[4].value.data.string = strdup(regex); + + /* generate the global proc map */ + fill_seq_ranks_array(univ_size, 0, &ranks); + if (NULL == ranks) { + return; + } + PMIx_generate_ppn(ranks, &ppn); + free(ranks); + pmix_strncpy(info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + info[5].value.type = PMIX_STRING; + info[5].value.data.string = strdup(ppn); + + pmix_strncpy(info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); + info[6].value.type = PMIX_UINT32; + info[6].value.data.uint32 = univ_size; + + pmix_strncpy(info[7].key, PMIX_APPNUM, PMIX_MAX_KEYLEN); + info[7].value.type = PMIX_UINT32; + info[7].value.data.uint32 = getpid (); + + int in_progress = 1, rc; + if (PMIX_SUCCESS == (rc = PMIx_server_register_nspace(name, local_size, + info, ninfo, release_cb, &in_progress))) { + PMIX_WAIT_FOR_COMPLETION(in_progress); + } + PMIX_INFO_FREE(info, ninfo); +} + +static void server_unpack_procs(char *buf, size_t size) +{ + char *ptr = buf; + size_t i; + size_t ns_count; + char *nspace; + + while ((size_t)(ptr - buf) < size) { + ns_count = *(size_t *)ptr; + ptr += sizeof(size_t); + + for (i = 0; i < ns_count; i++) { + server_nspace_t *tmp, *ns_item = NULL; + size_t ltasks, ntasks; + int server_id; + + server_id = *(int *)ptr; + ptr += sizeof(int); + + nspace = ptr; + ptr += PMIX_MAX_NSLEN+1; + + ntasks = *(size_t *)ptr; + ptr += sizeof(size_t); + + ltasks = *(size_t *)ptr; + ptr += sizeof(size_t); + + PMIX_LIST_FOREACH(tmp, server_nspace, server_nspace_t) { + if (0 == strcmp(nspace, tmp->name)) { + ns_item = tmp; + break; + } + } + if (NULL == ns_item) { + ns_item = PMIX_NEW(server_nspace_t); + memcpy(ns_item->name, nspace, PMIX_MAX_NSLEN); + pmix_list_append(server_nspace, &ns_item->super); + ns_item->ltasks = ltasks; + ns_item->ntasks = ntasks; + ns_item->task_map = (int*)malloc(sizeof(int) * ntasks); + memset(ns_item->task_map, -1, sizeof(int) * ntasks); + } else { + assert(ns_item->ntasks == ntasks); + } + size_t i; + for (i = 0; i < ltasks; i++) { + int rank = *(int *)ptr; + ptr += sizeof(int); + if (ns_item->task_map[rank] >= 0) { + continue; + } + ns_item->task_map[rank] = server_id; + } + } + } +} + +static size_t server_pack_procs(int server_id, char **buf, size_t size) +{ + size_t ns_count = pmix_list_get_size(server_nspace); + size_t buf_size = sizeof(size_t) + (PMIX_MAX_NSLEN+1)*ns_count; + server_nspace_t *tmp; + char *ptr; + + if (0 == ns_count) { + return 0; + } + + buf_size += size; + /* compute size: server_id + total + local procs count + ranks */ + PMIX_LIST_FOREACH(tmp, server_nspace, server_nspace_t) { + buf_size += sizeof(int) + sizeof(size_t) + sizeof(size_t) + + sizeof(int) * tmp->ltasks; + } + *buf = (char*)realloc(*buf, buf_size); + memset(*buf + size, 0, buf_size); + ptr = *buf + size; + /* pack ns count */ + memcpy(ptr, &ns_count, sizeof(size_t)); + ptr += sizeof(size_t); + + assert(server_nspace->pmix_list_length); + + PMIX_LIST_FOREACH(tmp, server_nspace, server_nspace_t) { + size_t i; + /* pack server_id */ + memcpy(ptr, &server_id, sizeof(int)); + ptr += sizeof(int); + /* pack ns name */ + memcpy(ptr, tmp->name, PMIX_MAX_NSLEN+1); + ptr += PMIX_MAX_NSLEN+1; + /* pack ns total size */ + memcpy(ptr, &tmp->ntasks, sizeof(size_t)); + ptr += sizeof(size_t); + /* pack ns local size */ + memcpy(ptr, &tmp->ltasks, sizeof(size_t)); + ptr += sizeof(size_t); + /* pack ns ranks */ + for(i = 0; i < tmp->ntasks; i++) { + if (tmp->task_map[i] == server_id) { + int rank = (int)i; + memcpy(ptr, &rank, sizeof(int)); + ptr += sizeof(int); + } + } + } + assert((size_t)(ptr - *buf) == buf_size); + return buf_size; +} + +static void remove_server_item(server_info_t *server) +{ + pmix_list_remove_item(server_list, &server->super); + PMIX_DESTRUCT_LOCK(&server->lock); + PMIX_RELEASE(server); +} + +static int srv_wait_all(double timeout) +{ + server_info_t *server, *next; + pid_t pid; + int status; + struct timeval tv; + double start_time, cur_time; + int ret = 0; + + gettimeofday(&tv, NULL); + start_time = tv.tv_sec + 1E-6*tv.tv_usec; + cur_time = start_time; + + /* Remove this server from the list */ + PMIX_LIST_FOREACH_SAFE(server, next, server_list, server_info_t) { + if (server->pid == getpid()) { + /* remove himself */ + remove_server_item(server); + break; + } + } + + while (!pmix_list_is_empty(server_list) && + (timeout >= (cur_time - start_time))) { + pid = waitpid(-1, &status, 0); + if (pid >= 0) { + PMIX_LIST_FOREACH_SAFE(server, next, server_list, server_info_t) { + if (server->pid == pid) { + TEST_VERBOSE(("server %d finalize PID:%d with status %d", server->idx, + server->pid, WEXITSTATUS(status))); + ret += WEXITSTATUS(status); + remove_server_item(server); + } + } + } + // calculate current timestamp + gettimeofday(&tv, NULL); + cur_time = tv.tv_sec + 1E-6*tv.tv_usec; + } + + return ret; +} + +static int server_fwd_msg(msg_hdr_t *msg_hdr, char *buf, size_t size) +{ + server_info_t *tmp_server, *server = NULL; + int rc = PMIX_SUCCESS; + + PMIX_LIST_FOREACH(tmp_server, server_list, server_info_t) { + if (tmp_server->idx == msg_hdr->dst_id) { + server = tmp_server; + break; + } + } + if (NULL == server) { + return PMIX_ERROR; + } + rc = write(server->wr_fd, msg_hdr, sizeof(msg_hdr_t)); + if (rc != sizeof(msg_hdr_t)) { + return PMIX_ERROR; + } + rc = write(server->wr_fd, buf, size); + if (rc != (ssize_t)size) { + return PMIX_ERROR; + } + return PMIX_SUCCESS; +} + +static int server_send_msg(msg_hdr_t *msg_hdr, char *data, size_t size) +{ + size_t ret = 0; + server_info_t *server = NULL, *server_tmp; + if (0 == my_server_id) { + PMIX_LIST_FOREACH(server_tmp, server_list, server_info_t) { + if (server_tmp->idx == msg_hdr->dst_id) { + server = server_tmp; + break; + } + } + if (NULL == server) { + abort(); + } + } else { + server = (server_info_t *)pmix_list_get_first(server_list); + } + + ret += write(server->wr_fd, msg_hdr, sizeof(msg_hdr_t)); + ret += write(server->wr_fd, data, size); + if (ret != (sizeof(*msg_hdr) + size)) { + return PMIX_ERROR; + } + return PMIX_SUCCESS; +} + +static void _send_procs_cb(pmix_status_t status, const char *data, + size_t ndata, void *cbdata, + pmix_release_cbfunc_t relfn, void *relcbd) +{ + server_info_t *server = (server_info_t*)cbdata; + + server_unpack_procs((char*)data, ndata); + free((char*)data); + PMIX_WAKEUP_THREAD(&server->lock); +} + +static int server_send_procs(void) +{ + server_info_t *server; + msg_hdr_t msg_hdr; + int rc = PMIX_SUCCESS; + char *buf = NULL; + + if (0 == my_server_id) { + server = my_server_info; + } else { + server = (server_info_t *)pmix_list_get_first(server_list); + } + + msg_hdr.cmd = CMD_FENCE_CONTRIB; + msg_hdr.dst_id = 0; + msg_hdr.src_id = my_server_id; + msg_hdr.size = server_pack_procs(my_server_id, &buf, 0); + server->modex_cbfunc = _send_procs_cb; + server->cbdata = (void*)server; + + server->lock.active = true; + + if (PMIX_SUCCESS != (rc = server_send_msg(&msg_hdr, buf, msg_hdr.size))) { + if (buf) { + free(buf); + } + return PMIX_ERROR; + } + if (buf) { + free(buf); + } + + PMIX_WAIT_THREAD(&server->lock); + return PMIX_SUCCESS; +} + +int server_barrier(void) +{ + server_info_t *server; + msg_hdr_t msg_hdr; + int rc = PMIX_SUCCESS; + + if (0 == my_server_id) { + server = my_server_info; + } else { + server = (server_info_t *)pmix_list_get_first(server_list); + } + + msg_hdr.cmd = CMD_BARRIER_REQUEST; + msg_hdr.dst_id = 0; + msg_hdr.src_id = my_server_id; + msg_hdr.size = 0; + + server->lock.active = true; + + if (PMIX_SUCCESS != (rc = server_send_msg(&msg_hdr, NULL, 0))) { + return PMIX_ERROR; + } + PMIX_WAIT_THREAD(&server->lock); + + return PMIX_SUCCESS; +} + +static void _libpmix_cb(void *cbdata) +{ + char *ptr = (char*)cbdata; + if (ptr) { + free(ptr); + } +} + +static void server_read_cb(int fd, short event, void *arg) +{ + server_info_t *server = (server_info_t*)arg; + msg_hdr_t msg_hdr; + char *msg_buf = NULL; + static char *fence_buf = NULL; + int rc; + static size_t barrier_cnt = 0; + static size_t contrib_cnt = 0; + static size_t fence_buf_offset = 0; + + rc = read(server->rd_fd, &msg_hdr, sizeof(msg_hdr_t)); + if (rc <= 0) { + return; + } + if (msg_hdr.size) { + msg_buf = (char*) malloc(sizeof(char) * msg_hdr.size); + rc += read(server->rd_fd, msg_buf, msg_hdr.size); + } + if (rc != (int)(sizeof(msg_hdr_t) + msg_hdr.size)) { + TEST_ERROR(("error read from %d", server->idx)); + } + + if (my_server_id != msg_hdr.dst_id) { + server_fwd_msg(&msg_hdr, msg_buf, msg_hdr.size); + free(msg_buf); + return; + } + + switch(msg_hdr.cmd) { + case CMD_BARRIER_REQUEST: + barrier_cnt++; + TEST_VERBOSE(("CMD_BARRIER_REQ req from %d cnt %lu", msg_hdr.src_id, + (unsigned long)barrier_cnt)); + if (pmix_list_get_size(server_list) == barrier_cnt) { + barrier_cnt = 0; /* reset barrier counter */ + server_info_t *tmp_server; + PMIX_LIST_FOREACH(tmp_server, server_list, server_info_t) { + msg_hdr_t resp_hdr; + resp_hdr.dst_id = tmp_server->idx; + resp_hdr.src_id = my_server_id; + resp_hdr.cmd = CMD_BARRIER_RESPONSE; + resp_hdr.size = 0; + server_send_msg(&resp_hdr, NULL, 0); + } + } + break; + case CMD_BARRIER_RESPONSE: + TEST_VERBOSE(("%d: CMD_BARRIER_RESP", my_server_id)); + PMIX_WAKEUP_THREAD(&server->lock); + break; + case CMD_FENCE_CONTRIB: + contrib_cnt++; + if (msg_hdr.size > 0) { + fence_buf = (char*)realloc((void*)fence_buf, + fence_buf_offset + msg_hdr.size); + memcpy(fence_buf + fence_buf_offset, msg_buf, msg_hdr.size); + fence_buf_offset += msg_hdr.size; + free(msg_buf); + msg_buf = NULL; + } + + TEST_VERBOSE(("CMD_FENCE_CONTRIB req from %d cnt %lu size %d", + msg_hdr.src_id, (unsigned long)contrib_cnt, msg_hdr.size)); + if (pmix_list_get_size(server_list) == contrib_cnt) { + server_info_t *tmp_server; + PMIX_LIST_FOREACH(tmp_server, server_list, server_info_t) { + msg_hdr_t resp_hdr; + resp_hdr.dst_id = tmp_server->idx; + resp_hdr.src_id = my_server_id; + resp_hdr.cmd = CMD_FENCE_COMPLETE; + resp_hdr.size = fence_buf_offset; + server_send_msg(&resp_hdr, fence_buf, fence_buf_offset); + } + TEST_VERBOSE(("CMD_FENCE_CONTRIB complete, size %lu", + (unsigned long)fence_buf_offset)); + if (fence_buf) { + free(fence_buf); + fence_buf = NULL; + fence_buf_offset = 0; + } + contrib_cnt = 0; + } + break; + case CMD_FENCE_COMPLETE: + TEST_VERBOSE(("%d: CMD_FENCE_COMPLETE size %d", my_server_id, + msg_hdr.size)); + server->modex_cbfunc(PMIX_SUCCESS, msg_buf, msg_hdr.size, + server->cbdata, _libpmix_cb, msg_buf); + msg_buf = NULL; + break; + case CMD_DMDX_REQUEST: { + int *sender_id; + pmix_proc_t proc; + if (NULL == msg_buf) { + abort(); + } + sender_id = (int*)malloc(sizeof(int)); + server_unpack_dmdx(msg_buf, sender_id, &proc); + TEST_VERBOSE(("%d: CMD_DMDX_REQUEST from %d: %s:%d", my_server_id, + *sender_id, proc.nspace, proc.rank)); + rc = PMIx_server_dmodex_request(&proc, _dmdx_cb, (void*)sender_id); + break; + } + case CMD_DMDX_RESPONSE: + TEST_VERBOSE(("%d: CMD_DMDX_RESPONSE", my_server_id)); + server->modex_cbfunc(PMIX_SUCCESS, msg_buf, msg_hdr.size, + server->cbdata, _libpmix_cb, msg_buf); + msg_buf = NULL; + break; + } + if (NULL != msg_buf) { + free(msg_buf); + } +} + +int server_fence_contrib(char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + server_info_t *server; + msg_hdr_t msg_hdr; + int rc = PMIX_SUCCESS; + + if (0 == my_server_id) { + server = my_server_info; + } else { + server = (server_info_t *)pmix_list_get_first(server_list); + } + msg_hdr.cmd = CMD_FENCE_CONTRIB; + msg_hdr.dst_id = 0; + msg_hdr.src_id = my_server_id; + msg_hdr.size = ndata; + server->modex_cbfunc = cbfunc; + server->cbdata = cbdata; + + if (PMIX_SUCCESS != (rc = server_send_msg(&msg_hdr, data, ndata))) { + return PMIX_ERROR; + } + return rc; +} + +static int server_find_id(const char *nspace, int rank) +{ + server_nspace_t *tmp; + + PMIX_LIST_FOREACH(tmp, server_nspace, server_nspace_t) { + if (0 == strcmp(tmp->name, nspace)) { + return tmp->task_map[rank]; + } + } + return -1; +} + +static int server_pack_dmdx(int sender_id, const char *nspace, int rank, + char **buf) +{ + size_t buf_size = sizeof(int) + PMIX_MAX_NSLEN +1 + sizeof(int); + char *ptr; + + *buf = (char*)malloc(buf_size); + ptr = *buf; + + memcpy(ptr, &sender_id, sizeof(int)); + ptr += sizeof(int); + + memcpy(ptr, nspace, PMIX_MAX_NSLEN+1); + ptr += PMIX_MAX_NSLEN +1; + + memcpy(ptr, &rank, sizeof(int)); + ptr += sizeof(int); + + return buf_size; +} + +static void server_unpack_dmdx(char *buf, int *sender, pmix_proc_t *proc) +{ + char *ptr = buf; + + *sender = *(int *)ptr; + ptr += sizeof(int); + + memcpy(proc->nspace, ptr, PMIX_MAX_NSLEN +1); + ptr += PMIX_MAX_NSLEN +1; + + proc->rank = *(int *)ptr; + ptr += sizeof(int); +} + + +static void _dmdx_cb(int status, char *data, size_t sz, void *cbdata) +{ + msg_hdr_t msg_hdr; + int *sender_id = (int*)cbdata; + + msg_hdr.cmd = CMD_DMDX_RESPONSE; + msg_hdr.src_id = my_server_id; + msg_hdr.size = sz; + msg_hdr.dst_id = *sender_id; + TEST_VERBOSE(("srv #%d: DMDX RESPONSE: receiver=%d, size=%lu,", + my_server_id, *sender_id, (unsigned long)sz)); + free(sender_id); + + server_send_msg(&msg_hdr, data, sz); +} + +int server_dmdx_get(const char *nspace, int rank, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + server_info_t *server = NULL, *tmp; + msg_hdr_t msg_hdr; + pmix_status_t rc = PMIX_SUCCESS; + char *buf = NULL; + + + if (0 > (msg_hdr.dst_id = server_find_id(nspace, rank))) { + TEST_ERROR(("%d: server cannot found for %s:%d", my_server_id, nspace, rank)); + goto error; + } + + if (0 == my_server_id) { + PMIX_LIST_FOREACH(tmp, server_list, server_info_t) { + if (tmp->idx == msg_hdr.dst_id) { + server = tmp; + break; + } + } + } else { + server = (server_info_t *)pmix_list_get_first(server_list); + } + + if (server == NULL) { + goto error; + } + + msg_hdr.cmd = CMD_DMDX_REQUEST; + msg_hdr.src_id = my_server_id; + msg_hdr.size = server_pack_dmdx(my_server_id, nspace, rank, &buf); + server->modex_cbfunc = cbfunc; + server->cbdata = cbdata; + + if (PMIX_SUCCESS != (rc = server_send_msg(&msg_hdr, buf, msg_hdr.size))) { + rc = PMIX_ERROR; + } + free(buf); + return rc; + +error: + cbfunc(PMIX_ERROR, NULL, 0, cbdata, NULL, 0); + return PMIX_ERROR; +} + +int server_init(test_params *params) +{ + pmix_info_t info[1]; + int rc = PMIX_SUCCESS; + + /* fork/init servers procs */ + if (params->nservers >= 1) { + int i; + server_info_t *server_info = NULL; + server_list = PMIX_NEW(pmix_list_t); + + TEST_VERBOSE(("pmix server %d started PID:%d", my_server_id, getpid())); + for (i = params->nservers - 1; i >= 0; i--) { + pid_t pid; + server_info = PMIX_NEW(server_info_t); + + int fd1[2]; + int fd2[2]; + + pipe(fd1); + pipe(fd2); + + if (0 != i) { + pid = fork(); + if (pid < 0) { + TEST_ERROR(("Fork failed")); + return pid; + } + if (pid == 0) { + server_list = PMIX_NEW(pmix_list_t); + my_server_id = i; + server_info->idx = 0; + server_info->pid = getppid(); + server_info->rd_fd = fd1[0]; + server_info->wr_fd = fd2[1]; + close(fd1[1]); + close(fd2[0]); + PMIX_CONSTRUCT_LOCK(&server_info->lock); + pmix_list_append(server_list, &server_info->super); + break; + } + server_info->idx = i; + server_info->pid = pid; + server_info->wr_fd = fd1[1]; + server_info->rd_fd = fd2[0]; + PMIX_CONSTRUCT_LOCK(&server_info->lock); + close(fd1[0]); + close(fd2[1]); + } else { + my_server_info = server_info; + server_info->pid = getpid(); + server_info->idx = 0; + server_info->rd_fd = fd1[0]; + server_info->wr_fd = fd1[1]; + PMIX_CONSTRUCT_LOCK(&server_info->lock); + close(fd2[0]); + close(fd2[1]); + } + TEST_VERBOSE(("%d: add server %d", my_server_id, server_info->idx)); + pmix_list_append(server_list, &server_info->super); + } + } + /* compute local proc size */ + params->lsize = (params->nprocs % params->nservers) > (uint32_t)my_server_id ? + params->nprocs / params->nservers + 1 : + params->nprocs / params->nservers; + /* setup the server library */ + (void)strncpy(info[0].key, PMIX_SOCKET_MODE, PMIX_MAX_KEYLEN); + info[0].value.type = PMIX_UINT32; + info[0].value.data.uint32 = 0666; + + server_nspace = PMIX_NEW(pmix_list_t); + + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, 1))) { + TEST_ERROR(("Init failed with error %d", rc)); + goto error; + } + + /* register test server read thread */ + if (params->nservers && pmix_list_get_size(server_list)) { + server_info_t *server; + PMIX_LIST_FOREACH(server, server_list, server_info_t) { + server->evread = pmix_event_new(pmix_globals.evbase, server->rd_fd, + EV_READ|EV_PERSIST, server_read_cb, server); + pmix_event_add(server->evread, NULL); + } + } + + /* register the errhandler */ + PMIx_Register_event_handler(NULL, 0, NULL, 0, + errhandler, errhandler_reg_callbk, NULL); + + if (0 != (rc = server_barrier())) { + goto error; + } + + return PMIX_SUCCESS; + +error: + PMIX_DESTRUCT(server_nspace); + return rc; +} + +int server_finalize(test_params *params) +{ + int rc = PMIX_SUCCESS; + int total_ret = 0; + + if (0 != (rc = server_barrier())) { + total_ret++; + goto exit; + } + + if (0 != my_server_id) { + server_info_t *server = (server_info_t*)pmix_list_get_first(server_list); + remove_server_item(server); + } + + if (params->nservers && 0 == my_server_id) { + int ret; + /* wait for all servers are finished */ + ret = srv_wait_all(10.0); + if (!pmix_list_is_empty(server_list)) { + total_ret += ret; + } + PMIX_LIST_RELEASE(server_list); + TEST_VERBOSE(("SERVER %d FINALIZE PID:%d with status %d", + my_server_id, getpid(), ret)); + if (0 == total_ret) { + TEST_OUTPUT(("Test finished OK!")); + } else { + rc = PMIX_ERROR; + } + } + PMIX_LIST_RELEASE(server_nspace); + + /* finalize the server library */ + if (PMIX_SUCCESS != (rc = PMIx_server_finalize())) { + TEST_ERROR(("Finalize failed with error %d", rc)); + total_ret += rc; + goto exit; + } + +exit: + return total_ret; +} + +int server_launch_clients(int local_size, int univ_size, int base_rank, + test_params *params, char *** client_env, char ***base_argv) +{ + int n; + uid_t myuid; + gid_t mygid; + char *ranks = NULL; + char digit[MAX_DIGIT_LEN]; + int rc; + static int cli_counter = 0; + static int num_ns = 0; + pmix_proc_t proc; + int rank_counter = 0; + server_nspace_t *nspace_item = PMIX_NEW(server_nspace_t); + + TEST_VERBOSE(("%d: lsize: %d, base rank %d, local_size %d, univ_size %d", + my_server_id, + params->lsize, + base_rank, + local_size, + univ_size)); + + TEST_VERBOSE(("Setting job info")); + (void)snprintf(proc.nspace, PMIX_MAX_NSLEN, "%s-%d", TEST_NAMESPACE, num_ns); + set_namespace(local_size, univ_size, base_rank, proc.nspace); + if (NULL != ranks) { + free(ranks); + } + /* add namespace entry */ + nspace_item->ntasks = univ_size; + nspace_item->ltasks = local_size; + nspace_item->task_map = (int*)malloc(sizeof(int) * univ_size); + memset(nspace_item->task_map, -1, sizeof(int)*univ_size); + strcpy(nspace_item->name, proc.nspace); + pmix_list_append(server_nspace, &nspace_item->super); + for (n = 0; n < local_size; n++) { + proc.rank = base_rank + n; + nspace_item->task_map[proc.rank] = my_server_id; + } + + server_send_procs(); + + myuid = getuid(); + mygid = getgid(); + + /* fork/exec the test */ + for (n = 0; n < local_size; n++) { + proc.rank = base_rank + rank_counter; + rc = PMIx_server_register_client(&proc, myuid, mygid, NULL, NULL, NULL); + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { + TEST_ERROR(("Server register client failed with error %d", rc)); + PMIx_server_finalize(); + cli_kill_all(); + return 0; + } + if (PMIX_SUCCESS != (rc = PMIx_server_setup_fork(&proc, client_env))) {//n + TEST_ERROR(("Server fork setup failed with error %d", rc)); + PMIx_server_finalize(); + cli_kill_all(); + return rc; + } + + cli_info[cli_counter].pid = fork(); + if (cli_info[cli_counter].pid < 0) { + TEST_ERROR(("Fork failed")); + PMIx_server_finalize(); + cli_kill_all(); + return 0; + } + cli_info[cli_counter].rank = proc.rank;//n + cli_info[cli_counter].ns = strdup(proc.nspace); + + char **client_argv = pmix_argv_copy(*base_argv); + + /* add two last arguments: -r */ + sprintf(digit, "%d", proc.rank); + pmix_argv_append_nosize(&client_argv, "-r"); + pmix_argv_append_nosize(&client_argv, digit); + + pmix_argv_append_nosize(&client_argv, "-s"); + pmix_argv_append_nosize(&client_argv, proc.nspace); + + sprintf(digit, "%d", univ_size); + pmix_argv_append_nosize(&client_argv, "--ns-size"); + pmix_argv_append_nosize(&client_argv, digit); + + sprintf(digit, "%d", num_ns); + pmix_argv_append_nosize(&client_argv, "--ns-id"); + pmix_argv_append_nosize(&client_argv, digit); + + sprintf(digit, "%d", 0); + pmix_argv_append_nosize(&client_argv, "--base-rank"); + pmix_argv_append_nosize(&client_argv, digit); + + if (cli_info[cli_counter].pid == 0) { + if( !TEST_VERBOSE_GET() ){ + // Hide clients stdout + if (NULL == freopen("/dev/null","w", stdout)) { + return 0; + } + } + execve(params->binary, client_argv, *client_env); + /* Does not return */ + TEST_ERROR(("execve() failed")); + return 0; + } + cli_info[cli_counter].state = CLI_FORKED; + + pmix_argv_free(client_argv); + + cli_counter++; + rank_counter++; + } + num_ns++; + return rank_counter; +} diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_server.h b/opal/mca/pmix/pmix3x/pmix/test/test_server.h new file mode 100644 index 00000000000..09767ea56f4 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/test_server.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * + * Copyright (c) 2018 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#ifndef TEST_SERVER_C +#define TEST_SERVER_C + +#include "pmix_server.h" +#include "test_common.h" + + +typedef enum { + CMD_BARRIER_REQUEST, + CMD_BARRIER_RESPONSE, + CMD_FENCE_CONTRIB, + CMD_FENCE_COMPLETE, + CMD_DMDX_REQUEST, + CMD_DMDX_RESPONSE +} server_cmd_t; + +typedef struct { + int dst_id; + int src_id; + int cmd; + size_t size; +} msg_hdr_t; + +struct server_info_t +{ + pmix_list_item_t super; + pid_t pid; + int idx; + int rd_fd; + int wr_fd; + pmix_event_t *evread; + pmix_lock_t lock; + pmix_modex_cbfunc_t modex_cbfunc; + void *cbdata; +}; +typedef struct server_info_t server_info_t; +PMIX_EXPORT PMIX_CLASS_DECLARATION(server_info_t); + +struct server_nspace_t +{ + pmix_list_item_t super; + char name[PMIX_MAX_NSLEN+1]; + size_t ntasks; /* total number of tasks in this namespace */ + size_t ltasks; /* local */ + int *task_map; +}; +typedef struct server_nspace_t server_nspace_t; +PMIX_EXPORT PMIX_CLASS_DECLARATION(server_nspace_t); + +extern int my_server_id; +extern pmix_list_t *server_list; +extern server_info_t *my_server_info; +extern pmix_list_t *server_nspace; + +int server_init(test_params *params); +int server_finalize(test_params *params); +int server_barrier(void); +int server_fence_contrib(char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +int server_dmdx_get(const char *nspace, int rank, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +int server_launch_clients(int local_size, int univ_size, int base_rank, + test_params *params, char *** client_env, char ***base_argv); + + +#endif // TEST_SERVER_C + diff --git a/opal/mca/pmix/pmix3x/pmix/test/utils.c b/opal/mca/pmix/pmix3x/pmix/test/utils.c index 5fb1a0e78b3..d6cd31b5415 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/utils.c +++ b/opal/mca/pmix/pmix3x/pmix/test/utils.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015-2017 Mellanox Technologies, Inc. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -16,83 +16,7 @@ #include "test_common.h" #include "pmix_server.h" #include "cli_stages.h" - -static void release_cb(pmix_status_t status, void *cbdata) -{ - int *ptr = (int*)cbdata; - *ptr = 0; -} - -static void fill_seq_ranks_array(size_t nprocs, int base_rank, char **ranks) -{ - uint32_t i; - int len = 0, max_ranks_len; - if (0 >= nprocs) { - return; - } - max_ranks_len = nprocs * (MAX_DIGIT_LEN+1); - *ranks = (char*) malloc(max_ranks_len); - for (i = 0; i < nprocs; i++) { - len += snprintf(*ranks + len, max_ranks_len-len-1, "%d", i+base_rank); - if (i != nprocs-1) { - len += snprintf(*ranks + len, max_ranks_len-len-1, "%c", ','); - } - } - if (len >= max_ranks_len-1) { - free(*ranks); - *ranks = NULL; - TEST_ERROR(("Not enough allocated space for global ranks array.")); - } -} - -static void set_namespace(int nprocs, char *ranks, char *name) -{ - size_t ninfo; - pmix_info_t *info; - ninfo = 8; - char *regex, *ppn; - - PMIX_INFO_CREATE(info, ninfo); - (void)strncpy(info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); - info[0].value.type = PMIX_UINT32; - info[0].value.data.uint32 = nprocs; - - (void)strncpy(info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); - info[1].value.type = PMIX_UINT32; - info[1].value.data.uint32 = 0; - - (void)strncpy(info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); - info[2].value.type = PMIX_UINT32; - info[2].value.data.uint32 = nprocs; - - (void)strncpy(info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); - info[3].value.type = PMIX_STRING; - info[3].value.data.string = strdup(ranks); - - PMIx_generate_regex(NODE_NAME, ®ex); - (void)strncpy(info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); - info[4].value.type = PMIX_STRING; - info[4].value.data.string = regex; - - PMIx_generate_ppn(ranks, &ppn); - (void)strncpy(info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); - info[5].value.type = PMIX_STRING; - info[5].value.data.string = ppn; - - (void)strncpy(info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); - info[6].value.type = PMIX_UINT32; - info[6].value.data.uint32 = nprocs; - - (void)strncpy(info[7].key, PMIX_APPNUM, PMIX_MAX_KEYLEN); - info[7].value.type = PMIX_UINT32; - info[7].value.data.uint32 = getpid (); - - int in_progress = 1, rc; - if (PMIX_SUCCESS == (rc = PMIx_server_register_nspace(name, nprocs, info, ninfo, release_cb, &in_progress))) { - PMIX_WAIT_FOR_COMPLETION(in_progress); - } - PMIX_INFO_FREE(info, ninfo); -} +#include "test_server.h" void set_client_argv(test_params *params, char ***argv) { @@ -170,100 +94,3 @@ void set_client_argv(test_params *params, char ***argv) pmix_argv_append_nosize(argv, params->gds_mode); } } - -int launch_clients(int num_procs, char *binary, char *** client_env, char ***base_argv) -{ - int n; - uid_t myuid; - gid_t mygid; - char *ranks = NULL; - char digit[MAX_DIGIT_LEN]; - int rc; - static int counter = 0; - static int num_ns = 0; - pmix_proc_t proc; - - TEST_VERBOSE(("Setting job info")); - fill_seq_ranks_array(num_procs, counter, &ranks); - if (NULL == ranks) { - PMIx_server_finalize(); - TEST_ERROR(("fill_seq_ranks_array failed")); - return PMIX_ERROR; - } - (void)snprintf(proc.nspace, PMIX_MAX_NSLEN, "%s-%d", TEST_NAMESPACE, num_ns); - set_namespace(num_procs, ranks, proc.nspace); - if (NULL != ranks) { - free(ranks); - } - - myuid = getuid(); - mygid = getgid(); - - /* fork/exec the test */ - for (n = 0; n < num_procs; n++) { - proc.rank = counter; - if (PMIX_SUCCESS != (rc = PMIx_server_setup_fork(&proc, client_env))) {//n - TEST_ERROR(("Server fork setup failed with error %d", rc)); - PMIx_server_finalize(); - cli_kill_all(); - return rc; - } - if (PMIX_SUCCESS != (rc = PMIx_server_register_client(&proc, myuid, mygid, NULL, NULL, NULL))) {//n - TEST_ERROR(("Server fork setup failed with error %d", rc)); - PMIx_server_finalize(); - cli_kill_all(); - return rc; - } - - cli_info[counter].pid = fork(); - if (cli_info[counter].pid < 0) { - TEST_ERROR(("Fork failed")); - PMIx_server_finalize(); - cli_kill_all(); - return -1; - } - cli_info[counter].rank = counter;//n - cli_info[counter].ns = strdup(proc.nspace); - - char **client_argv = pmix_argv_copy(*base_argv); - - /* add two last arguments: -r */ - sprintf(digit, "%d", counter);//n - pmix_argv_append_nosize(&client_argv, "-r"); - pmix_argv_append_nosize(&client_argv, digit); - - pmix_argv_append_nosize(&client_argv, "-s"); - pmix_argv_append_nosize(&client_argv, proc.nspace); - - sprintf(digit, "%d", num_procs); - pmix_argv_append_nosize(&client_argv, "--ns-size"); - pmix_argv_append_nosize(&client_argv, digit); - - sprintf(digit, "%d", num_ns); - pmix_argv_append_nosize(&client_argv, "--ns-id"); - pmix_argv_append_nosize(&client_argv, digit); - - sprintf(digit, "%d", (counter-n)); - pmix_argv_append_nosize(&client_argv, "--base-rank"); - pmix_argv_append_nosize(&client_argv, digit); - - if (cli_info[counter].pid == 0) { - if( !TEST_VERBOSE_GET() ){ - // Hide clients stdout - if (NULL == freopen("/dev/null","w", stdout)) { - exit(1); - } - } - execve(binary, client_argv, *client_env); - /* Does not return */ - exit(0); - } - cli_info[counter].state = CLI_FORKED; - - pmix_argv_free(client_argv); - - counter++; - } - num_ns++; - return PMIX_SUCCESS; -} diff --git a/opal/mca/pmix/pmix3x/pmix/test/utils.h b/opal/mca/pmix/pmix3x/pmix/test/utils.h index bbeebaa2f7a..d6856dd7a14 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/utils.h +++ b/opal/mca/pmix/pmix3x/pmix/test/utils.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -19,4 +19,3 @@ #include "test_common.h" void set_client_argv(test_params *params, char ***argv); -int launch_clients(int num_procs, char *binary, char *** client_env, char ***client_argv); diff --git a/opal/mca/pmix/pmix3x/pmix3x.c b/opal/mca/pmix/pmix3x/pmix3x.c index f8650cbe7f1..5e0e91342b0 100644 --- a/opal/mca/pmix/pmix3x/pmix3x.c +++ b/opal/mca/pmix/pmix3x/pmix3x.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -364,37 +364,13 @@ void pmix3x_event_hdlr(size_t evhdlr_registration_id, return; } -static void cleanup_cbfunc(pmix_status_t status, - pmix_info_t *info, size_t ninfo, - void *cbdata, - pmix_release_cbfunc_t release_fn, - void *release_cbdata) -{ - opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata; - - OPAL_POST_OBJECT(lk); - - /* let the library release the data and cleanup from - * the operation */ - if (NULL != release_fn) { - release_fn(release_cbdata); - } - - /* release the block */ - lk->status = pmix3x_convert_rc(status); - OPAL_PMIX_WAKEUP_THREAD(lk); -} - static int pmix3x_register_cleanup(char *path, bool directory, bool ignore, bool jobscope) { - opal_pmix_lock_t lk; pmix_info_t pinfo[3]; size_t n, ninfo=0; pmix_status_t rc; int ret; - OPAL_PMIX_CONSTRUCT_LOCK(&lk); - if (ignore) { /* they want this path ignored */ PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_IGNORE, path, PMIX_STRING); @@ -415,18 +391,12 @@ static int pmix3x_register_cleanup(char *path, bool directory, bool ignore, bool /* if they want this applied to the job, then indicate so */ if (jobscope) { - rc = PMIx_Job_control_nb(NULL, 0, pinfo, ninfo, cleanup_cbfunc, (void*)&lk); + rc = PMIx_Job_control_nb(NULL, 0, pinfo, ninfo, NULL, NULL); } else { /* only applies to us */ - rc = PMIx_Job_control_nb(&mca_pmix_pmix3x_component.myproc, 1, pinfo, ninfo, cleanup_cbfunc, (void*)&lk); - } - if (PMIX_SUCCESS != rc) { - ret = pmix3x_convert_rc(rc); - } else { - OPAL_PMIX_WAIT_THREAD(&lk); - ret = lk.status; + rc = PMIx_Job_control_nb(&mca_pmix_pmix3x_component.myproc, 1, pinfo, ninfo, NULL, NULL); } - OPAL_PMIX_DESTRUCT_LOCK(&lk); + ret = pmix3x_convert_rc(rc); for (n=0; n < ninfo; n++) { PMIX_INFO_DESTRUCT(&pinfo[n]); } @@ -536,6 +506,10 @@ pmix_status_t pmix3x_convert_opalrc(int rc) return PMIX_ERROR; case OPAL_SUCCESS: return PMIX_SUCCESS; + + case OPAL_OPERATION_SUCCEEDED: + return PMIX_OPERATION_SUCCEEDED; + default: return rc; } @@ -629,6 +603,10 @@ int pmix3x_convert_rc(pmix_status_t rc) return OPAL_ERROR; case PMIX_SUCCESS: return OPAL_SUCCESS; + + case PMIX_OPERATION_SUCCEEDED: + return OPAL_OPERATION_SUCCEEDED; + default: return rc; } @@ -1128,10 +1106,12 @@ int pmix3x_value_unload(opal_value_t *kv, OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); rc = OPAL_ERR_NOT_SUPPORTED; break; +#ifdef PMIX_MODEX case PMIX_MODEX: OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); rc = OPAL_ERR_NOT_SUPPORTED; break; +#endif /* PMIX_MODEX */ case PMIX_PERSIST: kv->type = OPAL_PERSIST; kv->data.uint8 = pmix3x_convert_persist(v->data.persist); @@ -1240,10 +1220,12 @@ int pmix3x_value_unload(opal_value_t *kv, OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); rc = OPAL_ERR_NOT_SUPPORTED; break; +#ifdef PMIX_INFO_ARRAY case PMIX_INFO_ARRAY: OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); rc = OPAL_ERR_NOT_SUPPORTED; break; +#endif /* PMIX_INFO_ARRAY */ case PMIX_IOF_CHANNEL: OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); rc = OPAL_ERR_NOT_SUPPORTED; diff --git a/opal/mca/pmix/pmix3x/pmix3x_client.c b/opal/mca/pmix/pmix3x/pmix3x_client.c index 61fc6825782..caf1a409f4a 100644 --- a/opal/mca/pmix/pmix3x/pmix3x_client.c +++ b/opal/mca/pmix/pmix3x/pmix3x_client.c @@ -8,6 +8,10 @@ * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017-2018 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -98,11 +102,6 @@ int pmix3x_client_init(opal_list_t *ilist) ninfo = 0; } - /* check for direct modex use-case */ - if (opal_pmix_base_async_modex && !opal_pmix_collect_all_data) { - opal_setenv("PMIX_MCA_gds", "hash", true, &environ); - } - OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_Init(&mca_pmix_pmix3x_component.myproc, pinfo, ninfo); if (NULL != pinfo) { @@ -169,6 +168,8 @@ int pmix3x_client_finalize(void) { pmix_status_t rc; opal_pmix3x_event_t *event, *ev2; + opal_list_t evlist; + OBJ_CONSTRUCT(&evlist, opal_list_t); opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client finalize"); @@ -182,12 +183,19 @@ int pmix3x_client_finalize(void) OPAL_PMIX_DESTRUCT_LOCK(&event->lock); OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); - OPAL_PMIX_WAIT_THREAD(&event->lock); opal_list_remove_item(&mca_pmix_pmix3x_component.events, &event->super); - OBJ_RELEASE(event); + /* wait and release outside the loop to avoid double mutex + * interlock */ + opal_list_append(&evlist, &event->super); } } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_pmix3x_event_t) { + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&evlist, &event->super); + OBJ_RELEASE(event); + } + OBJ_DESTRUCT(&evlist); rc = PMIx_Finalize(NULL, 0); return pmix3x_convert_rc(rc); @@ -1621,6 +1629,7 @@ int pmix3x_job_control(opal_list_t *targets, OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_INITIALIZED; } +abort(); /* create the caddy */ op = OBJ_NEW(pmix3x_opcaddy_t); diff --git a/opal/mca/pmix/pmix3x/pmix3x_component.c b/opal/mca/pmix/pmix3x/pmix3x_component.c index dd685272cc9..7770ff8243f 100644 --- a/opal/mca/pmix/pmix3x/pmix3x_component.c +++ b/opal/mca/pmix/pmix3x/pmix3x_component.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2018 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ @@ -86,6 +86,7 @@ mca_pmix_pmix3x_component_t mca_pmix_pmix3x_component = { static int external_register(void) { mca_base_component_t *component = &mca_pmix_pmix3x_component.super.base_version; + char *tmp = NULL; mca_pmix_pmix3x_component.silence_warning = false; (void) mca_base_component_var_register (component, "silence_warning", @@ -97,6 +98,7 @@ static int external_register(void) asprintf(&pmix_library_version, "PMIx library version %s (embedded in Open MPI)", PMIx_Get_version()); + tmp = pmix_library_version; (void) mca_base_component_var_register(component, "library_version", "Version of the underlying PMIx library", MCA_BASE_VAR_TYPE_STRING, @@ -104,6 +106,7 @@ static int external_register(void) OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_CONSTANT, &pmix_library_version); + free(tmp); return OPAL_SUCCESS; } @@ -118,7 +121,7 @@ static int external_open(void) OBJ_CONSTRUCT(&mca_pmix_pmix3x_component.dmdx, opal_list_t); version = PMIx_Get_version(); - if ('3' != version[0]) { + if ('3' > version[0]) { opal_show_help("help-pmix-base.txt", "incorrect-pmix", true, version, "v3.x"); return OPAL_ERROR; @@ -131,6 +134,7 @@ static int external_close(void) OPAL_LIST_DESTRUCT(&mca_pmix_pmix3x_component.jobids); OPAL_LIST_DESTRUCT(&mca_pmix_pmix3x_component.events); OPAL_LIST_DESTRUCT(&mca_pmix_pmix3x_component.dmdx); + return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix3x_server_south.c b/opal/mca/pmix/pmix3x/pmix3x_server_south.c index 203ddefaed5..c50f6d8d0ec 100644 --- a/opal/mca/pmix/pmix3x/pmix3x_server_south.c +++ b/opal/mca/pmix/pmix3x/pmix3x_server_south.c @@ -9,6 +9,10 @@ * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017-2018 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -129,11 +133,6 @@ int pmix3x_server_init(opal_pmix_server_module_t *module, } } - /* check for direct modex use-case */ - if (opal_pmix_base_async_modex && !opal_pmix_collect_all_data) { - opal_setenv("PMIX_MCA_gds", "hash", true, &environ); - } - /* insert ourselves into our list of jobids - it will be the * first, and so we'll check it first */ job = OBJ_NEW(opal_pmix3x_jobid_trkr_t); @@ -186,6 +185,8 @@ int pmix3x_server_finalize(void) { pmix_status_t rc; opal_pmix3x_event_t *event, *ev2; + opal_list_t evlist; + OBJ_CONSTRUCT(&evlist, opal_list_t); OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); --opal_pmix_base.initialized; @@ -196,12 +197,19 @@ int pmix3x_server_finalize(void) OPAL_PMIX_DESTRUCT_LOCK(&event->lock); OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); - OPAL_PMIX_WAIT_THREAD(&event->lock); opal_list_remove_item(&mca_pmix_pmix3x_component.events, &event->super); - OBJ_RELEASE(event); + /* wait and release outside the loop to avoid double mutex + * interlock */ + opal_list_append(&evlist, &event->super); } } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_pmix3x_event_t) { + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&evlist, &event->super); + OBJ_RELEASE(event); + } + OBJ_DESTRUCT(&evlist); rc = PMIx_server_finalize(); return pmix3x_convert_rc(rc); } diff --git a/opal/mca/pmix/pmix4x/openpmix/config/autogen_found_items.m4 b/opal/mca/pmix/pmix4x/openpmix/config/autogen_found_items.m4 new file mode 100644 index 00000000000..646cb10c2ee --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/config/autogen_found_items.m4 @@ -0,0 +1,115 @@ +dnl +dnl $HEADER$ +dnl +dnl --------------------------------------------------------------------------- +dnl This file is automatically created by autogen.pl; it should not +dnl be edited by hand!! +dnl +dnl Generated by wbailey2 at Wed Nov 6 15:33:32 2019 +dnl on wbailey2-VirtualBox. +dnl --------------------------------------------------------------------------- + +m4_define([autogen_platform_file], []) + + +dnl Project names +m4_define([project_name_long], [PMIx]) +m4_define([project_name_short], [PMIx]) + +dnl --------------------------------------------------------------------------- +dnl --------------------------------------------------------------------------- +dnl --------------------------------------------------------------------------- + +dnl MCA information +dnl --------------------------------------------------------------------------- + +dnl Frameworks in the pmix project and their corresponding directories +m4_define([mca_pmix_framework_list], [common, bfrops, gds, pcompress, pdl, pfexec, pif, pinstalldirs, plog, pmdl, pnet, preg, psec, psensor, pshmem, psquash, ptl]) + +dnl Components in the pmix / common framework +m4_define([mca_pmix_common_m4_config_component_list], [dstore]) +m4_define([mca_pmix_common_no_config_component_list], []) + +dnl Components in the pmix / bfrops framework +m4_define([mca_pmix_bfrops_m4_config_component_list], []) +m4_define([mca_pmix_bfrops_no_config_component_list], [v12, v20, v21, v3, v4]) + +dnl Components in the pmix / gds framework +m4_define([mca_pmix_gds_m4_config_component_list], []) +m4_define([mca_pmix_gds_no_config_component_list], [ds12, ds21, hash]) + +dnl Components in the pmix / pcompress framework +m4_define([mca_pmix_pcompress_m4_config_component_list], [zlib]) +m4_define([mca_pmix_pcompress_no_config_component_list], []) + +dnl Components in the pmix / pdl framework +m4_define([mca_pmix_pdl_m4_config_component_list], [pdlopen, plibltdl]) +m4_define([mca_pmix_pdl_no_config_component_list], []) + +dnl Components in the pmix / pfexec framework +m4_define([mca_pmix_pfexec_m4_config_component_list], [linux]) +m4_define([mca_pmix_pfexec_no_config_component_list], []) + +dnl Components in the pmix / pif framework +m4_define([mca_pmix_pif_m4_config_component_list], [bsdx_ipv4, bsdx_ipv6, linux_ipv6, posix_ipv4, solaris_ipv6]) +m4_define([mca_pmix_pif_no_config_component_list], []) + +dnl Components in the pmix / pinstalldirs framework +m4_define([mca_pmix_pinstalldirs_m4_config_component_list], [config, env]) +m4_define([mca_pmix_pinstalldirs_no_config_component_list], []) + +dnl Components in the pmix / plog framework +m4_define([mca_pmix_plog_m4_config_component_list], [syslog]) +m4_define([mca_pmix_plog_no_config_component_list], [default, stdfd]) + +dnl Components in the pmix / pmdl framework +m4_define([mca_pmix_pmdl_m4_config_component_list], []) +m4_define([mca_pmix_pmdl_no_config_component_list], [ompi]) + +dnl Components in the pmix / pnet framework +m4_define([mca_pmix_pnet_m4_config_component_list], []) +m4_define([mca_pmix_pnet_no_config_component_list], [tcp, test]) + +dnl Components in the pmix / preg framework +m4_define([mca_pmix_preg_m4_config_component_list], []) +m4_define([mca_pmix_preg_no_config_component_list], [compress, native]) + +dnl Components in the pmix / psec framework +m4_define([mca_pmix_psec_m4_config_component_list], [munge]) +m4_define([mca_pmix_psec_no_config_component_list], [dummy_handshake, native, none]) + +dnl Components in the pmix / psensor framework +m4_define([mca_pmix_psensor_m4_config_component_list], []) +m4_define([mca_pmix_psensor_no_config_component_list], [file, heartbeat]) + +dnl Components in the pmix / pshmem framework +m4_define([mca_pmix_pshmem_m4_config_component_list], []) +m4_define([mca_pmix_pshmem_no_config_component_list], [mmap]) + +dnl Components in the pmix / psquash framework +m4_define([mca_pmix_psquash_m4_config_component_list], []) +m4_define([mca_pmix_psquash_no_config_component_list], [flex128, native]) + +dnl Components in the pmix / ptl framework +m4_define([mca_pmix_ptl_m4_config_component_list], []) +m4_define([mca_pmix_ptl_no_config_component_list], [tcp, usock]) + +dnl --------------------------------------------------------------------------- + +dnl List of configure.m4 files to include +m4_include([src/mca/pdl/configure.m4]) +m4_include([src/mca/pinstalldirs/configure.m4]) +m4_include([src/mca/common/dstore/configure.m4]) +m4_include([src/mca/pcompress/zlib/configure.m4]) +m4_include([src/mca/pdl/pdlopen/configure.m4]) +m4_include([src/mca/pdl/plibltdl/configure.m4]) +m4_include([src/mca/pfexec/linux/configure.m4]) +m4_include([src/mca/pif/bsdx_ipv4/configure.m4]) +m4_include([src/mca/pif/bsdx_ipv6/configure.m4]) +m4_include([src/mca/pif/linux_ipv6/configure.m4]) +m4_include([src/mca/pif/posix_ipv4/configure.m4]) +m4_include([src/mca/pif/solaris_ipv6/configure.m4]) +m4_include([src/mca/pinstalldirs/config/configure.m4]) +m4_include([src/mca/pinstalldirs/env/configure.m4]) +m4_include([src/mca/plog/syslog/configure.m4]) +m4_include([src/mca/psec/munge/configure.m4]) diff --git a/opal/mca/pmix/pmix4x/openpmix/config/mca_library_paths.txt b/opal/mca/pmix/pmix4x/openpmix/config/mca_library_paths.txt new file mode 100644 index 00000000000..04cdb06f3cc --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/config/mca_library_paths.txt @@ -0,0 +1 @@ +src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock \ No newline at end of file diff --git a/opal/mca/pmix/pmix4x/openpmix/config/test-driver b/opal/mca/pmix/pmix4x/openpmix/config/test-driver new file mode 100755 index 00000000000..8e575b017d9 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/config/test-driver @@ -0,0 +1,148 @@ +#! /bin/sh +# test-driver - basic testsuite driver script. + +scriptversion=2013-07-13.22; # UTC + +# Copyright (C) 2011-2014 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +# Make unconditional expansion of undefined variables an error. This +# helps a lot in preventing typo-related bugs. +set -u + +usage_error () +{ + echo "$0: $*" >&2 + print_usage >&2 + exit 2 +} + +print_usage () +{ + cat <$log_file 2>&1 +estatus=$? + +if test $enable_hard_errors = no && test $estatus -eq 99; then + tweaked_estatus=1 +else + tweaked_estatus=$estatus +fi + +case $tweaked_estatus:$expect_failure in + 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; + 0:*) col=$grn res=PASS recheck=no gcopy=no;; + 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; + 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; + *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; + *:*) col=$red res=FAIL recheck=yes gcopy=yes;; +esac + +# Report the test outcome and exit status in the logs, so that one can +# know whether the test passed or failed simply by looking at the '.log' +# file, without the need of also peaking into the corresponding '.trs' +# file (automake bug#11814). +echo "$res $test_name (exit status: $estatus)" >>$log_file + +# Report outcome to console. +echo "${col}${res}${std}: $test_name" + +# Register the test result, and other relevant metadata. +echo ":test-result: $res" > $trs_file +echo ":global-test-result: $res" >> $trs_file +echo ":recheck: $recheck" >> $trs_file +echo ":copy-in-global-log: $gcopy" >> $trs_file + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/opal/mca/pmix/pmix4x/openpmix/include/pmix_common.h b/opal/mca/pmix/pmix4x/openpmix/include/pmix_common.h new file mode 100644 index 00000000000..aef2ce515b1 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/include/pmix_common.h @@ -0,0 +1,2976 @@ +/* include/pmix_common.h. Generated from pmix_common.h.in by configure. */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2019 Mellanox Technologies, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer listed + * in this license in the documentation and/or other materials + * provided with the distribution. + * + * - Neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * The copyright holders provide no reassurances that the source code + * provided does not infringe any patent, copyright, or any other + * intellectual property rights of third parties. The copyright holders + * disclaim any liability to any recipient for claims brought against + * recipient by any third party for infringement of that parties + * intellectual property rights. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIx_COMMON_H +#define PMIx_COMMON_H + +#include +#include +#include +#include +#include +#include /* for struct timeval */ +#include /* for uid_t and gid_t */ +#include /* for uid_t and gid_t */ + +/* Whether C compiler supports -fvisibility */ +#define PMIX_HAVE_VISIBILITY 0 + +#if PMIX_HAVE_VISIBILITY == 1 +#define PMIX_EXPORT __attribute__((__visibility__("default"))) +#else +#define PMIX_EXPORT +#endif + + +#include +#include + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/**** PMIX CONSTANTS ****/ + +/* define maximum value and key sizes */ +#define PMIX_MAX_NSLEN 255 +#define PMIX_MAX_KEYLEN 511 + +/* define abstract types for namespaces and keys */ +typedef char pmix_nspace_t[PMIX_MAX_NSLEN+1]; +typedef char pmix_key_t[PMIX_MAX_KEYLEN+1]; + +/* define a type for rank values */ +typedef uint32_t pmix_rank_t; + +/* define a value for requests for job-level data + * where the info itself isn't associated with any + * specific rank, or when a request involves + * a rank that isn't known - e.g., when someone requests + * info thru one of the legacy interfaces where the rank + * is typically encoded into the key itself since there is + * no rank parameter in the API itself */ +#define PMIX_RANK_UNDEF UINT32_MAX +/* define a value to indicate that the user wants the + * data for the given key from every rank that posted + * that key */ +#define PMIX_RANK_WILDCARD UINT32_MAX-1 +/* other special rank values will be used to define + * groups of ranks for use in collectives */ +#define PMIX_RANK_LOCAL_NODE UINT32_MAX-2 // all ranks on local node +#define PMIX_RANK_LOCAL_PEERS UINT32_MAX-4 // all peers (i.e., all procs within the same nspace) on local node +/* define an invalid value */ +#define PMIX_RANK_INVALID UINT32_MAX-3 +/* define a boundary for valid ranks */ +#define PMIX_RANK_VALID UINT32_MAX-50 + +/* define a value to indicate that data applies + * to all apps in a job */ +#define PMIX_APP_WILDCARD UINT32_MAX + +/**** PMIX ENVIRONMENTAL PARAMETERS ****/ +/* There are a few environmental parameters used by PMIx for + * various operations. While there is no "definition" of them + * as values, we do record them here for informational purposes. + * + * PMIX_LAUNCHER_PAUSE_FOR_TOOL - if set to non-zero value, instructs + * launchers (e.g., "prun") to stop prior to spawning the application until + * a tool can connect with further instructions. This envar will be + * set by the tool and is _not_ intended for the direct use of users. + * + * PMIX_LAUNCHER_RENDEZVOUS_FILE - if set, contains the full pathname + * of a file the launcher is to write that contains its connection info. + * Works in addition to anything else the launcher may output. + */ + +/* define a set of "standard" PMIx attributes that can + * be queried. Implementations (and users) are free to extend as + * desired, so the get functions need to be capable + * of handling the "not found" condition. Note that these + * are attributes of the system and the job as opposed to + * values the application (or underlying MPI library) + * might choose to expose - i.e., they are values provided + * by the resource manager as opposed to the application. Thus, + * these keys are RESERVED */ +#define PMIX_ATTR_UNDEF NULL + +/* initialization attributes */ +#define PMIX_EVENT_BASE "pmix.evbase" // (struct event_base *) pointer to libevent event_base to use in place + // of the internal progress thread +#define PMIX_SERVER_TOOL_SUPPORT "pmix.srvr.tool" // (bool) The host RM wants to declare itself as willing to + // accept tool connection requests +#define PMIX_SERVER_REMOTE_CONNECTIONS "pmix.srvr.remote" // (bool) Allow connections from remote tools (do not use loopback device) +#define PMIX_SERVER_SYSTEM_SUPPORT "pmix.srvr.sys" // (bool) The host RM wants to declare itself as being the local + // system server for PMIx connection requests +#define PMIX_SERVER_TMPDIR "pmix.srvr.tmpdir" // (char*) temp directory where PMIx server will place + // client rendezvous points and contact info +#define PMIX_SYSTEM_TMPDIR "pmix.sys.tmpdir" // (char*) temp directory for this system, where PMIx + // server will place tool rendezvous points and contact info +#define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server +#define PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server +#define PMIX_SERVER_RANK "pmix.srv.rank" // (pmix_rank_t) Rank of this server +#define PMIX_SERVER_GATEWAY "pmix.srv.gway" // (bool) Server is acting as a gateway for PMIx requests + // that cannot be serviced on backend nodes + // (e.g., logging to email) +#define PMIX_SERVER_SCHEDULER "pmix.srv.sched" // (bool) Server supports system scheduler + +/* tool-related attributes */ +#define PMIX_TOOL_NSPACE "pmix.tool.nspace" // (char*) Name of the nspace to use for this tool +#define PMIX_TOOL_RANK "pmix.tool.rank" // (uint32_t) Rank of this tool +#define PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (pid_t) pid of the target server for a tool +#define PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to + // a local system-level PMIx server +#define PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first +#define PMIX_SERVER_URI "pmix.srvr.uri" // (char*) URI of server to be contacted +#define PMIX_SERVER_HOSTNAME "pmix.srvr.host" // (char*) node where target server is located +#define PMIX_CONNECT_MAX_RETRIES "pmix.tool.mretries" // (uint32_t) maximum number of times to try to connect to server +#define PMIX_CONNECT_RETRY_DELAY "pmix.tool.retry" // (uint32_t) time in seconds between connection attempts +#define PMIX_TOOL_DO_NOT_CONNECT "pmix.tool.nocon" // (bool) the tool wants to use internal PMIx support, but does + // not want to connect to a PMIx server + // from the specified processes to this tool +#define PMIX_TOOL_CONNECT_OPTIONAL "pmix.tool.conopt" // (bool) tool shall connect to a server if available, but otherwise + // continue to operate unconnected +#define PMIX_RECONNECT_SERVER "pmix.cnct.recon" // (bool) tool is requesting to change server connections +#define PMIX_LAUNCHER "pmix.tool.launcher" // (bool) tool is a launcher and needs rendezvous files created +#define PMIX_LAUNCHER_RENDEZVOUS_FILE "pmix.tool.lncrnd" // (char*) Pathname of file where connection info is to be stored + +/* identification attributes */ +#define PMIX_USERID "pmix.euid" // (uint32_t) effective user id +#define PMIX_GRPID "pmix.egid" // (uint32_t) effective group id +#define PMIX_DSTPATH "pmix.dstpath" // (char*) path to dstore files +#define PMIX_VERSION_INFO "pmix.version" // (char*) PMIx version of contactor +#define PMIX_REQUESTOR_IS_TOOL "pmix.req.tool" // (bool) requesting process is a tool +#define PMIX_REQUESTOR_IS_CLIENT "pmix.req.client" // (bool) requesting process is a client process +#define PMIX_PSET_NAME "pmix.pset.nm" // (char*) user-assigned name for the process + // set containing the given process + +/* model attributes */ +#define PMIX_PROGRAMMING_MODEL "pmix.pgm.model" // (char*) programming model being initialized (e.g., "MPI" or "OpenMP") +#define PMIX_MODEL_LIBRARY_NAME "pmix.mdl.name" // (char*) programming model implementation ID (e.g., "OpenMPI" or "MPICH") +#define PMIX_MODEL_LIBRARY_VERSION "pmix.mld.vrs" // (char*) programming model version string (e.g., "2.1.1") +#define PMIX_THREADING_MODEL "pmix.threads" // (char*) threading model used (e.g., "pthreads") +#define PMIX_MODEL_NUM_THREADS "pmix.mdl.nthrds" // (uint64_t) number of active threads being used by the model +#define PMIX_MODEL_NUM_CPUS "pmix.mdl.ncpu" // (uint64_t) number of cpus being used by the model +#define PMIX_MODEL_CPU_TYPE "pmix.mdl.cputype" // (char*) granularity - "hwthread", "core", etc. +#define PMIX_MODEL_PHASE_NAME "pmix.mdl.phase" // (char*) user-assigned name for a phase in the application execution - e.g., + // "cfd reduction" +#define PMIX_MODEL_PHASE_TYPE "pmix.mdl.ptype" // (char*) type of phase being executed - e.g., "matrix multiply" +#define PMIX_MODEL_AFFINITY_POLICY "pmix.mdl.tap" // (char*) thread affinity policy - e.g.: + // "master" (thread co-located with master thread), + // "close" (thread located on cpu close to master thread) + // "spread" (threads load-balanced across available cpus) + +/* attributes for the USOCK rendezvous socket */ +#define PMIX_USOCK_DISABLE "pmix.usock.disable" // (bool) disable legacy usock support +#define PMIX_SOCKET_MODE "pmix.sockmode" // (uint32_t) POSIX mode_t (9 bits valid) +#define PMIX_SINGLE_LISTENER "pmix.sing.listnr" // (bool) use only one rendezvous socket, letting priorities and/or + // MCA param select the active transport + +/* attributes for TCP connections */ +#define PMIX_TCP_REPORT_URI "pmix.tcp.repuri" // (char*) output URI - '-' => stdout, '+' => stderr, or filename +#define PMIX_TCP_URI "pmix.tcp.uri" // (char*) URI of server to connect to, or file: +#define PMIX_TCP_IF_INCLUDE "pmix.tcp.ifinclude" // (char*) comma-delimited list of devices and/or CIDR notation +#define PMIX_TCP_IF_EXCLUDE "pmix.tcp.ifexclude" // (char*) comma-delimited list of devices and/or CIDR notation +#define PMIX_TCP_IPV4_PORT "pmix.tcp.ipv4" // (int) IPv4 port to be used +#define PMIX_TCP_IPV6_PORT "pmix.tcp.ipv6" // (int) IPv6 port to be used +#define PMIX_TCP_DISABLE_IPV4 "pmix.tcp.disipv4" // (bool) true to disable IPv4 family +#define PMIX_TCP_DISABLE_IPV6 "pmix.tcp.disipv6" // (bool) true to disable IPv6 family + + +/* attributes for GDS */ +#define PMIX_GDS_MODULE "pmix.gds.mod" // (char*) comma-delimited string of desired modules + + +/* general proc-level attributes */ +#define PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch +#define PMIX_CREDENTIAL "pmix.cred" // (char*) security credential assigned to proc +#define PMIX_SPAWNED "pmix.spawned" // (bool) true if this proc resulted from a call to PMIx_Spawn +#define PMIX_ARCH "pmix.arch" // (uint32_t) datatype architecture flag + +/* scratch directory locations for use by applications */ +#define PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session +#define PMIX_NSDIR "pmix.nsdir" // (char*) sub-tmpdir assigned to namespace +#define PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc +#define PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories + +/* information about relative ranks as assigned by the RM */ +#define PMIX_CLUSTER_ID "pmix.clid" // (char*) a string name for the cluster this proc is executing on +#define PMIX_PROCID "pmix.procid" // (pmix_proc_t*) process identifier +#define PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job +#define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler +#define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job +#define PMIX_RANK "pmix.rank" // (pmix_rank_t) process rank within the job +#define PMIX_GLOBAL_RANK "pmix.grank" // (pmix_rank_t) rank spanning across all jobs in this session +#define PMIX_APP_RANK "pmix.apprank" // (pmix_rank_t) rank within this app +#define PMIX_NPROC_OFFSET "pmix.offset" // (pmix_rank_t) starting global rank of this job +#define PMIX_LOCAL_RANK "pmix.lrank" // (uint16_t) rank on this node within this job +#define PMIX_NODE_RANK "pmix.nrank" // (uint16_t) rank on this node spanning all jobs +#define PMIX_LOCALLDR "pmix.lldr" // (pmix_rank_t) lowest rank on this node within this job +#define PMIX_APPLDR "pmix.aldr" // (pmix_rank_t) lowest rank in this app within this job +#define PMIX_PROC_PID "pmix.ppid" // (pid_t) pid of specified proc +#define PMIX_SESSION_ID "pmix.session.id" // (uint32_t) session identifier + +#define PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for the specified nspace +#define PMIX_ALLOCATED_NODELIST "pmix.alist" // (char*) comma-delimited list of all nodes in this allocation regardless of + // whether or not they currently host procs. +#define PMIX_HOSTNAME "pmix.hname" // (char*) name of the host the specified proc is on +#define PMIX_NODEID "pmix.nodeid" // (uint32_t) node identifier where the specified proc is located +#define PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within the specified nspace +#define PMIX_LOCAL_PROCS "pmix.lprocs" // (pmix_proc_t array) array of pmix_proc_t of procs on the specified node +#define PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace +#define PMIX_PROC_URI "pmix.puri" // (char*) URI containing contact info for proc +#define PMIX_LOCALITY "pmix.loc" // (uint16_t) relative locality of two procs +#define PMIX_PARENT_ID "pmix.parent" // (pmix_proc_t*) identifier of the process that called PMIx_Spawn + // to launch this proc's application +#define PMIX_EXIT_CODE "pmix.exit.code" // (int) exit code returned when proc terminated +#define PMIX_NETWORK_COORDINATE "pmix.net.coord" // (pmix_coord_t*) Network coordinate of the specified process in + // the given view type (e.g., logical vs physical) +#define PMIX_NETWORK_VIEW "pmix.net.view" // (pmix_coord_view_t) Requested view type (e.g., logical vs physical) +#define PMIX_NETWORK_DIMS "pmix.net.dims" // (uint32_t) Number of dimensions in the specified network plane/view +#define PMIX_NETWORK_PLANE "pmix.net.plane" // (char*) string ID of a network plane +#define PMIX_NETWORK_SWITCH "pmix.net.switch" // (char*) string ID of a network switch +#define PMIX_NETWORK_NIC "pmix.net.nic" // (char*) string ID of a NIC +#define PMIX_NETWORK_ENDPT "pmix.net.endpt" // (assigned) network endpt for process - type assigned by + // fabric provider +#define PMIX_NETWORK_SHAPE "pmix.net.shape" // (pmix_data_array_t*) number of interfaces (uint32_t) on each dimension of the + // specified network plane in the requested view +#define PMIX_NETWORK_SHAPE_STRING "pmix.net.shapestr" // (char*) network shape expressed as a string (e.g., "10x12x2") + +/* size info */ +#define PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace +#define PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job +#define PMIX_JOB_NUM_APPS "pmix.job.napps" // (uint32_t) #apps in this job +#define PMIX_APP_SIZE "pmix.app.size" // (uint32_t) #procs in this application +#define PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node +#define PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node +#define PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job +#define PMIX_NUM_SLOTS "pmix.num.slots" // (uint32_t) #slots allocated +#define PMIX_NUM_NODES "pmix.num.nodes" // (uint32_t) #nodes in this nspace + + +/* Memory info */ +#define PMIX_AVAIL_PHYS_MEMORY "pmix.pmem" // (uint64_t) total available physical memory on this node +#define PMIX_DAEMON_MEMORY "pmix.dmn.mem" // (float) Mbytes of memory currently used by daemon +#define PMIX_CLIENT_AVG_MEMORY "pmix.cl.mem.avg" // (float) Average Mbytes of memory used by client processes + + +/* topology info */ +#define PMIX_NET_TOPO "pmix.ntopo" // (char*) xml-representation of network topology +#define PMIX_LOCAL_TOPO "pmix.ltopo" // (char*) xml-representation of local node topology +#define PMIX_TOPOLOGY "pmix.topo" // (hwloc_topology_t) pointer to the PMIx client's internal topology object +#define PMIX_TOPOLOGY_XML "pmix.topo.xml" // (char*) XML-based description of topology +#define PMIX_TOPOLOGY_FILE "pmix.topo.file" // (char*) full path to file containing XML topology description +#define PMIX_TOPOLOGY_SIGNATURE "pmix.toposig" // (char*) topology signature string +#define PMIX_LOCALITY_STRING "pmix.locstr" // (char*) string describing a proc's location +#define PMIX_HWLOC_SHMEM_ADDR "pmix.hwlocaddr" // (size_t) address of HWLOC shared memory segment +#define PMIX_HWLOC_SHMEM_SIZE "pmix.hwlocsize" // (size_t) size of HWLOC shared memory segment +#define PMIX_HWLOC_SHMEM_FILE "pmix.hwlocfile" // (char*) path to HWLOC shared memory file +#define PMIX_HWLOC_XML_V1 "pmix.hwlocxml1" // (char*) XML representation of local topology using HWLOC v1.x format +#define PMIX_HWLOC_XML_V2 "pmix.hwlocxml2" // (char*) XML representation of local topology using HWLOC v2.x format +#define PMIX_HWLOC_SHARE_TOPO "pmix.hwlocsh" // (bool) Share the HWLOC topology via shared memory +#define PMIX_HWLOC_HOLE_KIND "pmix.hwlocholek" // (char*) Kind of VM "hole" HWLOC should use for shared memory + + +/* request-related info */ +#define PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation +#define PMIX_TIMEOUT "pmix.timeout" // (int) time in sec before specified operation should time out (0 => infinite) +#define PMIX_IMMEDIATE "pmix.immediate" // (bool) specified operation should immediately return an error from the PMIx + // server if requested data cannot be found - do not request it from + // the host RM +#define PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until at least the specified + // #values are found (0 => all and is the default) +#define PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective +#define PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory +#define PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job +#define PMIX_RANGE "pmix.range" // (pmix_data_range_t) value for calls to publish/lookup/unpublish or for + // monitoring event notifications +#define PMIX_PERSISTENCE "pmix.persist" // (pmix_persistence_t) value for calls to publish +#define PMIX_DATA_SCOPE "pmix.scope" // (pmix_scope_t) scope of the data to be found in a PMIx_Get call +#define PMIX_OPTIONAL "pmix.optional" // (bool) look only in the client's local data store for the requested value - do + // not request data from the server if not found +#define PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the + // specified operation +#define PMIX_JOB_TERM_STATUS "pmix.job.term.status" // (pmix_status_t) status returned upon job termination +#define PMIX_PROC_TERM_STATUS "pmix.proc.term.status" // (pmix_status_t) status returned upon process termination +#define PMIX_PROC_STATE_STATUS "pmix.proc.state" // (pmix_proc_state_t) process state +#define PMIX_NOTIFY_LAUNCH "pmix.note.lnch" // (bool) notify the requestor upon launch of the child job and return + // its namespace in the event + + +/* attributes used by host server to pass data to the server convenience library - the + * data will then be parsed and provided to the local clients */ +#define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data +#define PMIX_PROC_DATA "pmix.pdata" // (pmix_data_array_t*) starts with rank, then contains more data +#define PMIX_NODE_MAP "pmix.nmap" // (char*) regex of nodes containing procs for this job +#define PMIX_PROC_MAP "pmix.pmap" // (char*) regex describing procs on each node within this job +#define PMIX_ANL_MAP "pmix.anlmap" // (char*) process mapping in ANL notation (used in PMI-1/PMI-2) +#define PMIX_APP_MAP_TYPE "pmix.apmap.type" // (char*) type of mapping used to layout the application (e.g., cyclic) +#define PMIX_APP_MAP_REGEX "pmix.apmap.regex" // (char*) regex describing the result of the mapping + + +/* attributes used internally to communicate data from the server to the client */ +#define PMIX_PROC_BLOB "pmix.pblob" // (pmix_byte_object_t) packed blob of process data +#define PMIX_MAP_BLOB "pmix.mblob" // (pmix_byte_object_t) packed blob of process location + + +/* event handler registration and notification info keys */ +#define PMIX_EVENT_HDLR_NAME "pmix.evname" // (char*) string name identifying this handler +#define PMIX_EVENT_HDLR_FIRST "pmix.evfirst" // (bool) invoke this event handler before any other handlers +#define PMIX_EVENT_HDLR_LAST "pmix.evlast" // (bool) invoke this event handler after all other handlers have been called +#define PMIX_EVENT_HDLR_FIRST_IN_CATEGORY "pmix.evfirstcat" // (bool) invoke this event handler before any other handlers in this category +#define PMIX_EVENT_HDLR_LAST_IN_CATEGORY "pmix.evlastcat" // (bool) invoke this event handler after all other handlers in this category have been called +#define PMIX_EVENT_HDLR_BEFORE "pmix.evbefore" // (char*) put this event handler immediately before the one specified in the (char*) value +#define PMIX_EVENT_HDLR_AFTER "pmix.evafter" // (char*) put this event handler immediately after the one specified in the (char*) value +#define PMIX_EVENT_HDLR_PREPEND "pmix.evprepend" // (bool) prepend this handler to the precedence list within its category +#define PMIX_EVENT_HDLR_APPEND "pmix.evappend" // (bool) append this handler to the precedence list within its category +#define PMIX_EVENT_CUSTOM_RANGE "pmix.evrange" // (pmix_data_array_t*) array of pmix_proc_t defining range of event notification +#define PMIX_EVENT_AFFECTED_PROC "pmix.evproc" // (pmix_proc_t*) single proc that was affected +#define PMIX_EVENT_AFFECTED_PROCS "pmix.evaffected" // (pmix_data_array_t*) array of pmix_proc_t defining affected procs +#define PMIX_EVENT_NON_DEFAULT "pmix.evnondef" // (bool) event is not to be delivered to default event handlers +#define PMIX_EVENT_RETURN_OBJECT "pmix.evobject" // (void*) object to be returned whenever the registered cbfunc is invoked + // NOTE: the object will _only_ be returned to the process that + // registered it +#define PMIX_EVENT_DO_NOT_CACHE "pmix.evnocache" // (bool) instruct the PMIx server not to cache the event +#define PMIX_EVENT_SILENT_TERMINATION "pmix.evsilentterm" // (bool) do not generate an event when this job normally terminates +#define PMIX_EVENT_PROXY "pmix.evproxy" // (pmix_proc_t*) PMIx server that sourced the event +#define PMIX_EVENT_TEXT_MESSAGE "pmix.evtext" // (char*) text message suitable for output by recipient - e.g., describing + // the cause of the event + +/* fault tolerance-related events */ +#define PMIX_EVENT_TERMINATE_SESSION "pmix.evterm.sess" // (bool) RM intends to terminate session +#define PMIX_EVENT_TERMINATE_JOB "pmix.evterm.job" // (bool) RM intends to terminate this job +#define PMIX_EVENT_TERMINATE_NODE "pmix.evterm.node" // (bool) RM intends to terminate all procs on this node +#define PMIX_EVENT_TERMINATE_PROC "pmix.evterm.proc" // (bool) RM intends to terminate just this process +#define PMIX_EVENT_ACTION_TIMEOUT "pmix.evtimeout" // (int) time in sec before RM will execute error response +#define PMIX_EVENT_NO_TERMINATION "pmix.evnoterm" // (bool) indicates that the handler has satisfactorily handled + // the event and believes termination of the application is not required +#define PMIX_EVENT_WANT_TERMINATION "pmix.evterm" // (bool) indicates that the handler has determined that the + // application should be terminated + + +/* attributes used to describe "spawn" directives */ +#define PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use +#define PMIX_HOST "pmix.host" // (char*) comma-delimited list of hosts to use for spawned procs +#define PMIX_HOSTFILE "pmix.hostfile" // (char*) hostfile to use for spawned procs +#define PMIX_ADD_HOST "pmix.addhost" // (char*) comma-delimited list of hosts to add to allocation +#define PMIX_ADD_HOSTFILE "pmix.addhostfile" // (char*) hostfile to add to existing allocation +#define PMIX_PREFIX "pmix.prefix" // (char*) prefix to use for starting spawned procs +#define PMIX_WDIR "pmix.wdir" // (char*) working directory for spawned procs +#define PMIX_MAPPER "pmix.mapper" // (char*) mapper to use for placing spawned procs +#define PMIX_DISPLAY_MAP "pmix.dispmap" // (bool) display process map upon spawn +#define PMIX_PPR "pmix.ppr" // (char*) #procs to spawn on each identified resource +#define PMIX_MAPBY "pmix.mapby" // (char*) mapping policy +#define PMIX_RANKBY "pmix.rankby" // (char*) ranking policy +#define PMIX_BINDTO "pmix.bindto" // (char*) binding policy +#define PMIX_PRELOAD_BIN "pmix.preloadbin" // (bool) preload binaries +#define PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position +#define PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init +#define PMIX_STDIN_TGT "pmix.stdin" // (pmix_proc_t*) proc that is to receive stdin + // (PMIX_RANK_WILDCARD = all in given nspace) +#define PMIX_DEBUGGER_DAEMONS "pmix.debugger" // (bool) spawned app consists of debugger daemons +#define PMIX_COSPAWN_APP "pmix.cospawn" // (bool) designated app is to be spawned as a disconnected + // job - i.e., not part of the "comm_world" of the job +#define PMIX_SET_SESSION_CWD "pmix.ssncwd" // (bool) set the application's current working directory to + // the session working directory assigned by the RM +#define PMIX_TAG_OUTPUT "pmix.tagout" // (bool) tag application output with the ID of the source +#define PMIX_TIMESTAMP_OUTPUT "pmix.tsout" // (bool) timestamp output from applications +#define PMIX_MERGE_STDERR_STDOUT "pmix.mergeerrout" // (bool) merge stdout and stderr streams from application procs +#define PMIX_OUTPUT_TO_FILE "pmix.outfile" // (char*) output application output to given file +#define PMIX_INDEX_ARGV "pmix.indxargv" // (bool) mark the argv with the rank of the proc +#define PMIX_CPUS_PER_PROC "pmix.cpuperproc" // (uint32_t) #cpus to assign to each rank +#define PMIX_NO_PROCS_ON_HEAD "pmix.nolocal" // (bool) do not place procs on the head node +#define PMIX_NO_OVERSUBSCRIBE "pmix.noover" // (bool) do not oversubscribe the cpus +#define PMIX_REPORT_BINDINGS "pmix.repbind" // (bool) report bindings of the individual procs +#define PMIX_CPU_LIST "pmix.cpulist" // (char*) list of cpus to use for this job +#define PMIX_JOB_RECOVERABLE "pmix.recover" // (bool) application supports recoverable operations +#define PMIX_JOB_CONTINUOUS "pmix.continuous" // (bool) application is continuous, all failed procs should + // be immediately restarted +#define PMIX_MAX_RESTARTS "pmix.maxrestarts" // (uint32_t) max number of times to restart a job +#define PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward the stdin from this process to the target processes +#define PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from the spawned processes to this process (typically used by a tool) +#define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from the spawned processes to this process (typically used by a tool) +#define PMIX_FWD_STDDIAG "pmix.fwd.stddiag" // (bool) if a diagnostic channel exists, forward any output on it + // from the spawned processes to this process (typically used by a tool) +#define PMIX_SPAWN_TOOL "pmix.spwn.tool" // (bool) job being spawned is a tool +#define PMIX_CMD_LINE "pmix.cmd.line" // (char*) command line executing in the specified nspace +#define PMIX_FORK_EXEC_AGENT "pmix.fe.agnt" // (char*) command line of fork/exec agent to be used for starting + // local processes + + +/* query attributes */ +#define PMIX_QUERY_REFRESH_CACHE "pmix.qry.rfsh" // (bool) retrieve updated information from server + // to update local cache +#define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) return a comma-delimited list of active namespaces +#define PMIX_QUERY_NAMESPACE_INFO "pmix.qry.nsinfo" // (pmix_data_array_t) request an array of active nspace information - each + // element will contain an array including the namespace plus the + // command line of the application executing within it +#define PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job +#define PMIX_QUERY_QUEUE_LIST "pmix.qry.qlst" // (char*) request a comma-delimited list of scheduler queues +#define PMIX_QUERY_QUEUE_STATUS "pmix.qry.qst" // (TBD) status of a specified scheduler queue +#define PMIX_QUERY_PROC_TABLE "pmix.qry.ptable" // (char*) input nspace of job whose info is being requested + // returns (pmix_data_array_t*) an array of pmix_proc_info_t +#define PMIX_QUERY_LOCAL_PROC_TABLE "pmix.qry.lptable" // (char*) input nspace of job whose info is being requested + // returns (pmix_data_array_t*) an array of pmix_proc_info_t for + // procs in job on same node +#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // (bool) return operations tool is authorized to perform +#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // (bool) return a comma-delimited list of supported spawn attributes +#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // (bool) return a comma-delimited list of supported debug attributes +#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // (bool) return info on memory usage for the procs indicated in the qualifiers +#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // (bool) constrain the query to local information only +#define PMIX_QUERY_REPORT_AVG "pmix.qry.avg" // (bool) report average values +#define PMIX_QUERY_REPORT_MINMAX "pmix.qry.minmax" // (bool) report minimum and maximum value +#define PMIX_QUERY_ALLOC_STATUS "pmix.query.alloc" // (char*) string identifier of the allocation whose status + // is being requested +#define PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation + // for the specified nspace +#define PMIX_QUERY_NUM_PSETS "pmix.qry.psetnum" // (size_t) return the number of psets defined + // in the specified range (defaults to session) +#define PMIX_QUERY_PSET_NAMES "pmix.qry.psets" // (char*) return a comma-delimited list of the names of the + // psets defined in the specified range (defaults to session) +#define PMIX_QUERY_ATTRIBUTE_SUPPORT "pmix.qry.attrs" // (bool) query attribute support for specified functions +#define PMIX_CLIENT_FUNCTIONS "pmix.client.fns" // (bool) query the list of supported PMIx client functions +#define PMIX_SERVER_FUNCTIONS "pmix.srvr.fns" // (bool) query the list of supported PMIx server functions +#define PMIX_TOOL_FUNCTIONS "pmix.tool.fns" // (bool) query the list of supported PMIx tool functions +#define PMIX_HOST_FUNCTIONS "pmix.host.fns" // (bool) query the list of PMIx functions supported by the host environment +#define PMIX_CLIENT_ATTRIBUTES "pmix.client.attrs" // (char*) comma-delimited list of functions, including "all" + // when used in a query, indicates whether or not to include + // attributes supported by the PMIx client library +#define PMIX_SERVER_ATTRIBUTES "pmix.srvr.attrs" // (char*) comma-delimited list of functions, including "all" + // when used in a query, indicates whether or not to include + // attributes supported by the PMIx server library +#define PMIX_HOST_ATTRIBUTES "pmix.host.attrs" // (char*) comma-delimited list of functions, including "all" + // when used in a query, indicates whether or not to include + // attributes supported by the host environment +#define PMIX_TOOL_ATTRIBUTES "pmix.tool.attrs" // (char*) comma-delimited list of functions, including "all" + // when used in a query, indicates whether or not to include + // attributes supported by the PMIx tool library + + +/* information retrieval attributes */ +#define PMIX_SESSION_INFO "pmix.ssn.info" // (bool) Return information about the specified session. If information + // about a session other than the one containing the requesting + // process is desired, then the attribute array must contain a + // PMIX_SESSION_ID attribute identifying the desired target. +#define PMIX_JOB_INFO "pmix.job.info" // (bool) Return information about the specified job or namespace. If + // information about a job or namespace other than the one containing + // the requesting process is desired, then the attribute array must + // contain a PMIX_JOBID or PMIX_NSPACE attribute identifying the + // desired target. Similarly, if information is requested about a + // job or namespace in a session other than the one containing the + // requesting process, then an attribute identifying the target + // session must be provided. +#define PMIX_APP_INFO "pmix.app.info" // (bool) Return information about the specified application. If information + // about an application other than the one containing the requesting + // process is desired, then the attribute array must contain a + // PMIX_APPNUM attribute identifying the desired target. Similarly, + // if information is requested about an application in a job or session + // other than the one containing the requesting process, then attributes + // identifying the target job and/or session must be provided. +#define PMIX_NODE_INFO "pmix.node.info" // (bool) Return information about the specified node. If information about a + // node other than the one containing the requesting process is desired, + // then the attribute array must contain either the PMIX_NODEID or + // PMIX_HOSTNAME attribute identifying the desired target. + +/* information storage attributes */ +#define PMIX_SESSION_INFO_ARRAY "pmix.ssn.arr" // (pmix_data_array_t) Provide an array of pmix_info_t containing + // session-level information. The PMIX_SESSION_ID attribute is required + // to be included in the array. +#define PMIX_JOB_INFO_ARRAY "pmix.job.arr" // (pmix_data_array_t) Provide an array of pmix_info_t containing job-level + // information. Information is registered one job (aka namespace) at a time + // via the PMIx_server_register_nspace API. Thus, there is no requirement that + // the array contain either the PMIX_NSPACE or PMIX_JOBID attributes, though + // either or both of them may be included. +#define PMIX_APP_INFO_ARRAY "pmix.app.arr" // (pmix_data_array_t) Provide an array of pmix_info_t containing app-level + // information. The PMIX_NSPACE or PMIX_JOBID attributes of the job containing + // the appplication, plus its PMIX_APPNUM attribute, are required to be + // included in the array. +#define PMIX_NODE_INFO_ARRAY "pmix.node.arr" // (pmix_data_array_t) Provide an array of pmix_info_t containing node-level + // information. At a minimum, either the PMIX_NODEID or PMIX_HOSTNAME + // attribute is required to be included in the array, though both may be + // included. + +/* log attributes */ +#define PMIX_LOG_SOURCE "pmix.log.source" // (pmix_proc_t*) ID of source of the log request +#define PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr +#define PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout +#define PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log message to syslog - defaults to ERROR priority. Will log + // to global syslog if available, otherwise to local syslog +#define PMIX_LOG_LOCAL_SYSLOG "pmix.log.lsys" // (char*) log msg to local syslog - defaults to ERROR priority +#define PMIX_LOG_GLOBAL_SYSLOG "pmix.log.gsys" // (char*) forward data to system "master" and log msg to that syslog +#define PMIX_LOG_SYSLOG_PRI "pmix.log.syspri" // (int) syslog priority level + +#define PMIX_LOG_TIMESTAMP "pmix.log.tstmp" // (time_t) timestamp for log report +#define PMIX_LOG_GENERATE_TIMESTAMP "pmix.log.gtstmp" // (bool) generate timestamp for log +#define PMIX_LOG_TAG_OUTPUT "pmix.log.tag" // (bool) label the output stream with the channel name (e.g., "stdout") +#define PMIX_LOG_TIMESTAMP_OUTPUT "pmix.log.tsout" // (bool) print timestamp in output string +#define PMIX_LOG_XML_OUTPUT "pmix.log.xml" // (bool) print the output stream in xml format +#define PMIX_LOG_ONCE "pmix.log.once" // (bool) only log this once with whichever channel can first support it +#define PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere + +#define PMIX_LOG_EMAIL "pmix.log.email" // (pmix_data_array_t*) log via email based on array of pmix_info_t + // containing directives +#define PMIX_LOG_EMAIL_ADDR "pmix.log.emaddr" // (char*) comma-delimited list of email addresses that are to recv msg +#define PMIX_LOG_EMAIL_SENDER_ADDR "pmix.log.emfaddr" // (char*) return email address of sender +#define PMIX_LOG_EMAIL_SUBJECT "pmix.log.emsub" // (char*) subject line for email +#define PMIX_LOG_EMAIL_MSG "pmix.log.emmsg" // (char*) msg to be included in email +#define PMIX_LOG_EMAIL_SERVER "pmix.log.esrvr" // (char*) hostname (or IP addr) of estmp server +#define PMIX_LOG_EMAIL_SRVR_PORT "pmix.log.esrvrprt" // (int32_t) port the email server is listening to + +#define PMIX_LOG_GLOBAL_DATASTORE "pmix.log.gstore" // (bool) +#define PMIX_LOG_JOB_RECORD "pmix.log.jrec" // (bool) log the provided information to the RM's job record + + +/* debugger attributes */ +#define PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start +#define PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init +#define PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification +#define PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job assigned to this debugger to be debugged. Note + // that id's, pids, and other info on the procs is available + // via a query for the nspace's local or global proctable +#define PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release +#define PMIX_DEBUG_JOB_DIRECTIVES "pmix.dbg.jdirs" // (pmix_data_array_t*) array of job-level directives +#define PMIX_DEBUG_APP_DIRECTIVES "pmix.dbg.adirs" // (pmix_data_array_t*) array of app-level directives + + +/* Resource Manager identification */ +#define PMIX_RM_NAME "pmix.rm.name" // (char*) string name of the resource manager +#define PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string + +/* environmental variable operation attributes */ +#define PMIX_SET_ENVAR "pmix.envar.set" // (pmix_envar_t*) set the envar to the given value, + // overwriting any pre-existing one +#define PMIX_ADD_ENVAR "pmix.envar.add" // (pmix_envar_t*) add envar, but do not overwrite any existing one +#define PMIX_UNSET_ENVAR "pmix.envar.unset" // (char*) unset the envar, if present +#define PMIX_PREPEND_ENVAR "pmix.envar.prepnd" // (pmix_envar_t*) prepend the given value to the + // specified envar using the separator + // character, creating the envar if it doesn't already exist +#define PMIX_APPEND_ENVAR "pmix.envar.appnd" // (pmix_envar_t*) append the given value to the specified + // envar using the separator character, + // creating the envar if it doesn't already exist + +/* attributes relating to allocations */ +#define PMIX_ALLOC_ID "pmix.alloc.id" // (char*) provide a string identifier for this allocation request + // which can later be used to query status of the request +#define PMIX_ALLOC_NUM_NODES "pmix.alloc.nnodes" // (uint64_t) number of nodes +#define PMIX_ALLOC_NODE_LIST "pmix.alloc.nlist" // (char*) regex of specific nodes +#define PMIX_ALLOC_NUM_CPUS "pmix.alloc.ncpus" // (uint64_t) number of cpus +#define PMIX_ALLOC_NUM_CPU_LIST "pmix.alloc.ncpulist" // (char*) regex of #cpus for each node +#define PMIX_ALLOC_CPU_LIST "pmix.alloc.cpulist" // (char*) regex of specific cpus indicating the cpus involved. +#define PMIX_ALLOC_MEM_SIZE "pmix.alloc.msize" // (float) number of Mbytes +#define PMIX_ALLOC_NETWORK "pmix.alloc.net" // (pmix_data_array_t*) Array of pmix_info_t describing + // network resource request. This must include at least: + // * PMIX_ALLOC_NETWORK_ID + // * PMIX_ALLOC_NETWORK_TYPE + // * PMIX_ALLOC_NETWORK_ENDPTS + // plus whatever other descriptors are desired +#define PMIX_ALLOC_NETWORK_ID "pmix.alloc.netid" // (char*) key to be used when accessing this requested network allocation. The + // allocation will be returned/stored as a pmix_data_array_t of + // pmix_info_t indexed by this key and containing at least one + // entry with the same key and the allocated resource description. + // The type of the included value depends upon the network + // support. For example, a TCP allocation might consist of a + // comma-delimited string of socket ranges such as + // "32000-32100,33005,38123-38146". Additional entries will consist + // of any provided resource request directives, along with their + // assigned values. Examples include: + // * PMIX_ALLOC_NETWORK_TYPE - the type of resources provided + // * PMIX_ALLOC_NETWORK_PLANE - if applicable, what plane the + // resources were assigned from + // * PMIX_ALLOC_NETWORK_QOS - the assigned QoS + // * PMIX_ALLOC_BANDWIDTH - the allocated bandwidth + // * PMIX_ALLOC_NETWORK_SEC_KEY - a security key for the requested + // network allocation + // NOTE: the assigned values may differ from those requested, + // especially if the "required" flag was not set in the request +#define PMIX_ALLOC_BANDWIDTH "pmix.alloc.bw" // (float) Mbits/sec +#define PMIX_ALLOC_NETWORK_QOS "pmix.alloc.netqos" // (char*) quality of service level +#define PMIX_ALLOC_TIME "pmix.alloc.time" // (uint32_t) time in seconds that the allocation shall remain valid +#define PMIX_ALLOC_NETWORK_TYPE "pmix.alloc.nettype" // (char*) type of desired transport (e.g., tcp, udp) +#define PMIX_ALLOC_NETWORK_PLANE "pmix.alloc.netplane" // (char*) id string for the NIC (aka plane) to be used for this allocation + // (e.g., CIDR for Ethernet) +#define PMIX_ALLOC_NETWORK_ENDPTS "pmix.alloc.endpts" // (size_t) number of endpoints to allocate per process +#define PMIX_ALLOC_NETWORK_ENDPTS_NODE "pmix.alloc.endpts.nd" // (size_t) number of endpoints to allocate per node +#define PMIX_ALLOC_NETWORK_SEC_KEY "pmix.alloc.nsec" // (pmix_byte_object_t) network security key + + +/* job control attributes */ +#define PMIX_JOB_CTRL_ID "pmix.jctrl.id" // (char*) provide a string identifier for this request +#define PMIX_JOB_CTRL_PAUSE "pmix.jctrl.pause" // (bool) pause the specified processes +#define PMIX_JOB_CTRL_RESUME "pmix.jctrl.resume" // (bool) "un-pause" the specified processes +#define PMIX_JOB_CTRL_CANCEL "pmix.jctrl.cancel" // (char*) cancel the specified request + // (NULL => cancel all requests from this requestor) +#define PMIX_JOB_CTRL_KILL "pmix.jctrl.kill" // (bool) forcibly terminate the specified processes and cleanup +#define PMIX_JOB_CTRL_RESTART "pmix.jctrl.restart" // (char*) restart the specified processes using the given checkpoint ID +#define PMIX_JOB_CTRL_CHECKPOINT "pmix.jctrl.ckpt" // (char*) checkpoint the specified processes and assign the given ID to it +#define PMIX_JOB_CTRL_CHECKPOINT_EVENT "pmix.jctrl.ckptev" // (bool) use event notification to trigger process checkpoint +#define PMIX_JOB_CTRL_CHECKPOINT_SIGNAL "pmix.jctrl.ckptsig" // (int) use the given signal to trigger process checkpoint +#define PMIX_JOB_CTRL_CHECKPOINT_TIMEOUT "pmix.jctrl.ckptsig" // (int) time in seconds to wait for checkpoint to complete +#define PMIX_JOB_CTRL_CHECKPOINT_METHOD "pmix.jctrl.ckmethod" // (pmix_data_array_t) array of pmix_info_t declaring each + // method and value supported by this application +#define PMIX_JOB_CTRL_SIGNAL "pmix.jctrl.sig" // (int) send given signal to specified processes +#define PMIX_JOB_CTRL_PROVISION "pmix.jctrl.pvn" // (char*) regex identifying nodes that are to be provisioned +#define PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned +#define PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted +#define PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs +#define PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files to + // be removed upon process termination +#define PMIX_REGISTER_CLEANUP_DIR "pmix.reg.cleanupdir" // (char*) comma-delimited list of directories to + // be removed upon process termination +#define PMIX_CLEANUP_RECURSIVE "pmix.clnup.recurse" // (bool) recursively cleanup all subdirectories under the + // specified one(s) +#define PMIX_CLEANUP_EMPTY "pmix.clnup.empty" // (bool) only remove empty subdirectories +#define PMIX_CLEANUP_IGNORE "pmix.clnup.ignore" // (char*) comma-delimited list of filenames that are not + // to be removed +#define PMIX_CLEANUP_LEAVE_TOPDIR "pmix.clnup.lvtop" // (bool) when recursively cleaning subdirs, do not remove + // the top-level directory (the one given in the + // cleanup request) + +/* monitoring attributes */ +#define PMIX_MONITOR_ID "pmix.monitor.id" // (char*) provide a string identifier for this request +#define PMIX_MONITOR_CANCEL "pmix.monitor.cancel" // (char*) identifier to be canceled (NULL = cancel all + // monitoring for this process) +#define PMIX_MONITOR_APP_CONTROL "pmix.monitor.appctrl" // (bool) the application desires to control the response to + // a monitoring event +#define PMIX_MONITOR_HEARTBEAT "pmix.monitor.mbeat" // (void) register to have the server monitor the requestor for heartbeats +#define PMIX_SEND_HEARTBEAT "pmix.monitor.beat" // (void) send heartbeat to local server +#define PMIX_MONITOR_HEARTBEAT_TIME "pmix.monitor.btime" // (uint32_t) time in seconds before declaring heartbeat missed +#define PMIX_MONITOR_HEARTBEAT_DROPS "pmix.monitor.bdrop" // (uint32_t) number of heartbeats that can be missed before + // generating the event +#define PMIX_MONITOR_FILE "pmix.monitor.fmon" // (char*) register to monitor file for signs of life +#define PMIX_MONITOR_FILE_SIZE "pmix.monitor.fsize" // (bool) monitor size of given file is growing to determine app is running +#define PMIX_MONITOR_FILE_ACCESS "pmix.monitor.faccess" // (char*) monitor time since last access of given file to determine app is running +#define PMIX_MONITOR_FILE_MODIFY "pmix.monitor.fmod" // (char*) monitor time since last modified of given file to determine app is running +#define PMIX_MONITOR_FILE_CHECK_TIME "pmix.monitor.ftime" // (uint32_t) time in seconds between checking file +#define PMIX_MONITOR_FILE_DROPS "pmix.monitor.fdrop" // (uint32_t) number of file checks that can be missed before + // generating the event + +/* security attributes */ +#define PMIX_CRED_TYPE "pmix.sec.ctype" // (char*) when passed in PMIx_Get_credential, a prioritized, + // comma-delimited list of desired credential types for use + // in environments where multiple authentication mechanisms + // may be available. When returned in a callback function, a + // string identifier of the credential type +#define PMIX_CRYPTO_KEY "pmix.sec.key" // (pmix_byte_object_t) blob containing crypto key + + +/* IO Forwarding Attributes */ +#define PMIX_IOF_CACHE_SIZE "pmix.iof.csize" // (uint32_t) requested size of the server cache in bytes for each specified channel. + // By default, the server is allowed (but not required) to drop + // all bytes received beyond the max size +#define PMIX_IOF_DROP_OLDEST "pmix.iof.old" // (bool) in an overflow situation, drop the oldest bytes to make room in the cache +#define PMIX_IOF_DROP_NEWEST "pmix.iof.new" // (bool) in an overflow situation, drop any new bytes received until room becomes + // available in the cache (default) +#define PMIX_IOF_BUFFERING_SIZE "pmix.iof.bsize" // (uint32_t) basically controls grouping of IO on the specified channel(s) to + // avoid being called every time a bit of IO arrives. The library + // will execute the callback whenever the specified number of bytes + // becomes available. Any remaining buffered data will be "flushed" + // upon call to deregister the respective channel +#define PMIX_IOF_BUFFERING_TIME "pmix.iof.btime" // (uint32_t) max time in seconds to buffer IO before delivering it. Used in conjunction + // with buffering size, this prevents IO from being held indefinitely + // while waiting for another payload to arrive +#define PMIX_IOF_COMPLETE "pmix.iof.cmp" // (bool) indicates whether or not the specified IO channel has been closed + // by the source +#define PMIX_IOF_PUSH_STDIN "pmix.iof.stdin" // (bool) Used by a tool to request that the PMIx library collect + // the tool's stdin and forward it to the procs specified in + // the PMIx_IOF_push call +#define PMIX_IOF_TAG_OUTPUT "pmix.iof.tag" // (bool) Tag output with the channel it comes from +#define PMIX_IOF_TIMESTAMP_OUTPUT "pmix.iof.ts" // (bool) Timestamp output +#define PMIX_IOF_XML_OUTPUT "pmix.iof.xml" // (bool) Format output in XML + + +/* Attributes for controlling contents of application setup data */ +#define PMIX_SETUP_APP_ENVARS "pmix.setup.env" // (bool) harvest and include relevant envars +#define PMIX_SETUP_APP_NONENVARS "pmix.setup.nenv" // (bool) include all non-envar data +#define PMIX_SETUP_APP_ALL "pmix.setup.all" // (bool) include all relevant data + +/* Attributes supporting the PMIx Groups APIs */ +#define PMIX_GROUP_ID "pmix.grp.id" // (char*) user-provided group identifier +#define PMIX_GROUP_LEADER "pmix.grp.ldr" // (bool) this process is the leader of the group +#define PMIX_GROUP_OPTIONAL "pmix.grp.opt" // (bool) participation is optional - do not return an error if any of the + // specified processes terminate without having joined. The default + // is false +#define PMIX_GROUP_NOTIFY_TERMINATION "pmix.grp.notterm" // (bool) notify remaining members when another member terminates without first + // leaving the group. The default is false +#define PMIX_GROUP_INVITE_DECLINE "pmix.grp.decline" // (bool) notify the inviting process that this process does not wish to + // participate in the proposed group The default is true +#define PMIX_GROUP_FT_COLLECTIVE "pmix.grp.ftcoll" // (bool) adjust internal tracking for terminated processes. Default is false +#define PMIX_GROUP_MEMBERSHIP "pmix.grp.mbrs" // (pmix_data_array_t*) array of group member ID's +#define PMIX_GROUP_ASSIGN_CONTEXT_ID "pmix.grp.actxid" // (bool) request that the RM assign a unique numerical (size_t) ID to this group +#define PMIX_GROUP_CONTEXT_ID "pmix.grp.ctxid" // (size_t) context ID assigned to group +#define PMIX_GROUP_LOCAL_ONLY "pmix.grp.lcl" // (bool) group operation only involves local procs +#define PMIX_GROUP_ENDPT_DATA "pmix.grp.endpt" // (pmix_byte_object_t) data collected to be shared during construction + + +/**** PROCESS STATE DEFINITIONS ****/ +typedef uint8_t pmix_proc_state_t; +#define PMIX_PROC_STATE_UNDEF 0 /* undefined process state */ +#define PMIX_PROC_STATE_PREPPED 1 /* process is ready to be launched */ +#define PMIX_PROC_STATE_LAUNCH_UNDERWAY 2 /* launch process underway */ +#define PMIX_PROC_STATE_RESTART 3 /* the proc is ready for restart */ +#define PMIX_PROC_STATE_TERMINATE 4 /* process is marked for termination */ +#define PMIX_PROC_STATE_RUNNING 5 /* daemon has locally fork'd process */ +#define PMIX_PROC_STATE_CONNECTED 6 /* proc connected to PMIx server */ +/* +* Define a "boundary" so users can easily and quickly determine +* if a proc is still running or not - any value less than +* this one means that the proc has not terminated +*/ +#define PMIX_PROC_STATE_UNTERMINATED 15 + +#define PMIX_PROC_STATE_TERMINATED 20 /* process has terminated and is no longer running */ +/* Define a boundary so users can easily and quickly determine +* if a proc abnormally terminated - leave a little room +* for future expansion +*/ +#define PMIX_PROC_STATE_ERROR 50 +/* Define specific error code values */ +#define PMIX_PROC_STATE_KILLED_BY_CMD (PMIX_PROC_STATE_ERROR + 1) /* process was killed by cmd */ +#define PMIX_PROC_STATE_ABORTED (PMIX_PROC_STATE_ERROR + 2) /* process aborted */ +#define PMIX_PROC_STATE_FAILED_TO_START (PMIX_PROC_STATE_ERROR + 3) /* process failed to start */ +#define PMIX_PROC_STATE_ABORTED_BY_SIG (PMIX_PROC_STATE_ERROR + 4) /* process aborted by signal */ +#define PMIX_PROC_STATE_TERM_WO_SYNC (PMIX_PROC_STATE_ERROR + 5) /* process exit'd w/o calling PMIx_Finalize */ +#define PMIX_PROC_STATE_COMM_FAILED (PMIX_PROC_STATE_ERROR + 6) /* process communication has failed */ +#define PMIX_PROC_STATE_SENSOR_BOUND_EXCEEDED (PMIX_PROC_STATE_ERROR + 7) /* process exceeded a sensor limit */ +#define PMIX_PROC_STATE_CALLED_ABORT (PMIX_PROC_STATE_ERROR + 8) /* process called "PMIx_Abort" */ +#define PMIX_PROC_STATE_HEARTBEAT_FAILED (PMIX_PROC_STATE_ERROR + 9) /* process failed to send heartbeat w/in time limit */ +#define PMIX_PROC_STATE_MIGRATING (PMIX_PROC_STATE_ERROR + 10) /* process failed and is waiting for resources before restarting */ +#define PMIX_PROC_STATE_CANNOT_RESTART (PMIX_PROC_STATE_ERROR + 11) /* process failed and cannot be restarted */ +#define PMIX_PROC_STATE_TERM_NON_ZERO (PMIX_PROC_STATE_ERROR + 12) /* process exited with a non-zero status, indicating abnormal */ +#define PMIX_PROC_STATE_FAILED_TO_LAUNCH (PMIX_PROC_STATE_ERROR + 13) /* unable to launch process */ + + +/**** PMIX ERROR CONSTANTS ****/ +/* PMIx errors are always negative, with 0 reserved for success */ +#define PMIX_ERR_BASE 0 + +typedef int pmix_status_t; + +/* v1.x error values - must be fixed in place for backward + * compatability. Note that some number of these have been + * deprecated and may not be returned by v2.x and above + * clients or servers. However, they must always be + * at least defined to ensure older codes will compile */ +#define PMIX_SUCCESS 0 +#define PMIX_ERROR -1 // general error +#define PMIX_ERR_SILENT -2 +/* debugger release flag */ +#define PMIX_ERR_DEBUGGER_RELEASE -3 +/* fault tolerance */ +#define PMIX_ERR_PROC_RESTART -4 +#define PMIX_ERR_PROC_CHECKPOINT -5 +#define PMIX_ERR_PROC_MIGRATE -6 +/* abort */ +#define PMIX_ERR_PROC_ABORTED -7 +#define PMIX_ERR_PROC_REQUESTED_ABORT -8 +#define PMIX_ERR_PROC_ABORTING -9 +/* communication failures */ +#define PMIX_ERR_SERVER_FAILED_REQUEST -10 +#define PMIX_EXISTS -11 +#define PMIX_ERR_INVALID_CRED -12 +#define PMIX_ERR_HANDSHAKE_FAILED -13 +#define PMIX_ERR_READY_FOR_HANDSHAKE -14 +#define PMIX_ERR_WOULD_BLOCK -15 +#define PMIX_ERR_UNKNOWN_DATA_TYPE -16 +#define PMIX_ERR_PROC_ENTRY_NOT_FOUND -17 +#define PMIX_ERR_TYPE_MISMATCH -18 +#define PMIX_ERR_UNPACK_INADEQUATE_SPACE -19 +#define PMIX_ERR_UNPACK_FAILURE -20 +#define PMIX_ERR_PACK_FAILURE -21 +#define PMIX_ERR_PACK_MISMATCH -22 +#define PMIX_ERR_NO_PERMISSIONS -23 +#define PMIX_ERR_TIMEOUT -24 +#define PMIX_ERR_UNREACH -25 +#define PMIX_ERR_IN_ERRNO -26 +#define PMIX_ERR_BAD_PARAM -27 +#define PMIX_ERR_RESOURCE_BUSY -28 +#define PMIX_ERR_OUT_OF_RESOURCE -29 +#define PMIX_ERR_DATA_VALUE_NOT_FOUND -30 +#define PMIX_ERR_INIT -31 +#define PMIX_ERR_NOMEM -32 +#define PMIX_ERR_INVALID_ARG -33 +#define PMIX_ERR_INVALID_KEY -34 +#define PMIX_ERR_INVALID_KEY_LENGTH -35 +#define PMIX_ERR_INVALID_VAL -36 +#define PMIX_ERR_INVALID_VAL_LENGTH -37 +#define PMIX_ERR_INVALID_LENGTH -38 +#define PMIX_ERR_INVALID_NUM_ARGS -39 +#define PMIX_ERR_INVALID_ARGS -40 +#define PMIX_ERR_INVALID_NUM_PARSED -41 +#define PMIX_ERR_INVALID_KEYVALP -42 +#define PMIX_ERR_INVALID_SIZE -43 +#define PMIX_ERR_INVALID_NAMESPACE -44 +#define PMIX_ERR_SERVER_NOT_AVAIL -45 +#define PMIX_ERR_NOT_FOUND -46 +#define PMIX_ERR_NOT_SUPPORTED -47 +#define PMIX_ERR_NOT_IMPLEMENTED -48 +#define PMIX_ERR_COMM_FAILURE -49 +#define PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER -50 +#define PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES -51 + +/* define a starting point for v2.x error values */ +#define PMIX_ERR_V2X_BASE -100 + +/* v2.x communication errors */ +#define PMIX_ERR_LOST_CONNECTION_TO_SERVER -101 +#define PMIX_ERR_LOST_PEER_CONNECTION -102 +#define PMIX_ERR_LOST_CONNECTION_TO_CLIENT -103 +/* used by the query system */ +#define PMIX_QUERY_PARTIAL_SUCCESS -104 +/* request responses */ +#define PMIX_NOTIFY_ALLOC_COMPLETE -105 +/* job control */ +#define PMIX_JCTRL_CHECKPOINT -106 // monitored by client to trigger checkpoint operation +#define PMIX_JCTRL_CHECKPOINT_COMPLETE -107 // sent by client and monitored by server to notify that requested + // checkpoint operation has completed +#define PMIX_JCTRL_PREEMPT_ALERT -108 // monitored by client to detect RM intends to preempt + +/* monitoring */ +#define PMIX_MONITOR_HEARTBEAT_ALERT -109 +#define PMIX_MONITOR_FILE_ALERT -110 +#define PMIX_PROC_TERMINATED -111 +#define PMIX_ERR_INVALID_TERMINATION -112 + +/* operational */ +#define PMIX_ERR_EVENT_REGISTRATION -144 +#define PMIX_ERR_JOB_TERMINATED -145 +#define PMIX_ERR_UPDATE_ENDPOINTS -146 +#define PMIX_MODEL_DECLARED -147 +#define PMIX_GDS_ACTION_COMPLETE -148 +#define PMIX_PROC_HAS_CONNECTED -149 +#define PMIX_CONNECT_REQUESTED -150 +#define PMIX_MODEL_RESOURCES -151 // model resource usage has changed +#define PMIX_OPENMP_PARALLEL_ENTERED -152 // an OpenMP parallel region has been entered +#define PMIX_OPENMP_PARALLEL_EXITED -153 // an OpenMP parallel region has completed +#define PMIX_LAUNCH_DIRECTIVE -154 +#define PMIX_LAUNCHER_READY -155 +#define PMIX_OPERATION_IN_PROGRESS -156 +#define PMIX_OPERATION_SUCCEEDED -157 +#define PMIX_ERR_INVALID_OPERATION -158 +#define PMIX_GROUP_INVITED -159 +#define PMIX_GROUP_LEFT -160 +#define PMIX_GROUP_INVITE_ACCEPTED -161 +#define PMIX_GROUP_INVITE_DECLINED -162 +#define PMIX_GROUP_INVITE_FAILED -163 +#define PMIX_GROUP_MEMBERSHIP_UPDATE -164 +#define PMIX_GROUP_CONSTRUCT_ABORT -165 +#define PMIX_GROUP_CONSTRUCT_COMPLETE -166 +#define PMIX_GROUP_LEADER_SELECTED -167 +#define PMIX_GROUP_LEADER_FAILED -168 +#define PMIX_GROUP_CONTEXT_ID_ASSIGNED -169 +#define PMIX_ERR_REPEAT_ATTR_REGISTRATION -170 +#define PMIX_ERR_IOF_FAILURE -171 +#define PMIX_ERR_IOF_COMPLETE -172 +#define PMIX_LAUNCH_COMPLETE -173 // include nspace of the launched job with notification +#define PMIX_FABRIC_UPDATED -174 +#define PMIX_FABRIC_UPDATE_PENDING -175 + +/* system failures */ +#define PMIX_ERR_SYS_BASE -230 +#define PMIX_ERR_NODE_DOWN -231 +#define PMIX_ERR_NODE_OFFLINE -232 +#define PMIX_ERR_SYS_OTHER -330 + +/* define a macro for identifying system event values */ +#define PMIX_SYSTEM_EVENT(a) \ + ((a) <= PMIX_ERR_SYS_BASE && PMIX_ERR_SYS_OTHER <= (a)) + +/* used by event handlers */ +#define PMIX_EVENT_NO_ACTION_TAKEN -331 +#define PMIX_EVENT_PARTIAL_ACTION_TAKEN -332 +#define PMIX_EVENT_ACTION_DEFERRED -333 +#define PMIX_EVENT_ACTION_COMPLETE -334 + +/* define a starting point for PMIx internal error codes + * that are never exposed outside the library */ +#define PMIX_INTERNAL_ERR_BASE -1330 + +/* define a starting point for user-level defined error + * constants - negative values larger than this are guaranteed + * not to conflict with PMIx values. Definitions should always + * be based on the PMIX_EXTERNAL_ERR_BASE constant and -not- a + * specific value as the value of the constant may change */ +#define PMIX_EXTERNAL_ERR_BASE PMIX_INTERNAL_ERR_BASE-2000 + +/**** PMIX DATA TYPES ****/ +typedef uint16_t pmix_data_type_t; +#define PMIX_UNDEF 0 +#define PMIX_BOOL 1 // converted to/from native true/false to uint8 for pack/unpack +#define PMIX_BYTE 2 // a byte of data +#define PMIX_STRING 3 // NULL-terminated string +#define PMIX_SIZE 4 // size_t +#define PMIX_PID 5 // OS-pid +#define PMIX_INT 6 +#define PMIX_INT8 7 +#define PMIX_INT16 8 +#define PMIX_INT32 9 +#define PMIX_INT64 10 +#define PMIX_UINT 11 +#define PMIX_UINT8 12 +#define PMIX_UINT16 13 +#define PMIX_UINT32 14 +#define PMIX_UINT64 15 +#define PMIX_FLOAT 16 +#define PMIX_DOUBLE 17 +#define PMIX_TIMEVAL 18 +#define PMIX_TIME 19 +#define PMIX_STATUS 20 // needs to be tracked separately from integer for those times + // when we are embedded and it needs to be converted to the + // host error definitions +#define PMIX_VALUE 21 +#define PMIX_PROC 22 +#define PMIX_APP 23 +#define PMIX_INFO 24 +#define PMIX_PDATA 25 +#define PMIX_BUFFER 26 +#define PMIX_BYTE_OBJECT 27 +#define PMIX_KVAL 28 +// Hole left by deprecation/removal of PMIX_MODEX +#define PMIX_PERSIST 30 +#define PMIX_POINTER 31 +#define PMIX_SCOPE 32 +#define PMIX_DATA_RANGE 33 +#define PMIX_COMMAND 34 +#define PMIX_INFO_DIRECTIVES 35 +#define PMIX_DATA_TYPE 36 +#define PMIX_PROC_STATE 37 +#define PMIX_PROC_INFO 38 +#define PMIX_DATA_ARRAY 39 +#define PMIX_PROC_RANK 40 +#define PMIX_QUERY 41 +#define PMIX_COMPRESSED_STRING 42 // string compressed with zlib +#define PMIX_ALLOC_DIRECTIVE 43 +// Hole left by deprecation/removal of PMIX_INFO_ARRAY +#define PMIX_IOF_CHANNEL 45 +#define PMIX_ENVAR 46 +#define PMIX_COORD 47 +#define PMIX_REGATTR 48 +#define PMIX_REGEX 49 +/********************/ + +/* define a boundary for implementers so they can add their own data types */ +#define PMIX_DATA_TYPE_MAX 500 + + +/* define a scope for data "put" by PMIx per the following: + * + * PMI_LOCAL - the data is intended only for other application + * processes on the same node. Data marked in this way + * will not be included in data packages sent to remote requestors + * PMI_REMOTE - the data is intended solely for applications processes on + * remote nodes. Data marked in this way will not be shared with + * other processes on the same node + * PMI_GLOBAL - the data is to be shared with all other requesting processes, + * regardless of location + */ +typedef uint8_t pmix_scope_t; +#define PMIX_SCOPE_UNDEF 0 +#define PMIX_LOCAL 1 // share to procs also on this node +#define PMIX_REMOTE 2 // share with procs not on this node +#define PMIX_GLOBAL 3 // share with all procs (local + remote) +#define PMIX_INTERNAL 4 // store data in the internal tables + +/* define a range for data "published" by PMIx + */ +typedef uint8_t pmix_data_range_t; +#define PMIX_RANGE_UNDEF 0 +#define PMIX_RANGE_RM 1 // data is intended for the host resource manager +#define PMIX_RANGE_LOCAL 2 // available on local node only +#define PMIX_RANGE_NAMESPACE 3 // data is available to procs in the same nspace only +#define PMIX_RANGE_SESSION 4 // data available to all procs in session +#define PMIX_RANGE_GLOBAL 5 // data available to all procs +#define PMIX_RANGE_CUSTOM 6 // range is specified in a pmix_info_t +#define PMIX_RANGE_PROC_LOCAL 7 // restrict range to the local proc +#define PMIX_RANGE_INVALID UINT8_MAX + +/* define a "persistence" policy for data published by clients */ +typedef uint8_t pmix_persistence_t; +#define PMIX_PERSIST_INDEF 0 // retain until specifically deleted +#define PMIX_PERSIST_FIRST_READ 1 // delete upon first access +#define PMIX_PERSIST_PROC 2 // retain until publishing process terminates +#define PMIX_PERSIST_APP 3 // retain until application terminates +#define PMIX_PERSIST_SESSION 4 // retain until session/allocation terminates +#define PMIX_PERSIST_INVALID UINT8_MAX + +/* define a set of bit-mask flags for specifying behavior of + * command directives via pmix_info_t arrays */ +typedef uint32_t pmix_info_directives_t; +#define PMIX_INFO_REQD 0x00000001 +#define PMIX_INFO_ARRAY_END 0x00000002 // mark the end of an array created by PMIX_INFO_CREATE +/* the top 16-bits are reserved for internal use by + * implementers - these may be changed inside the + * PMIx library */ +#define PMIX_INFO_DIR_RESERVED 0xffff0000 + +/* define a set of directives for allocation requests */ +typedef uint8_t pmix_alloc_directive_t; +#define PMIX_ALLOC_NEW 1 // new allocation is being requested. The resulting allocation will be + // disjoint (i.e., not connected in a job sense) from the requesting allocation +#define PMIX_ALLOC_EXTEND 2 // extend the existing allocation, either in time or as additional resources +#define PMIX_ALLOC_RELEASE 3 // release part of the existing allocation. Attributes in the accompanying + // pmix\_info\_t array may be used to specify permanent release of the + // identified resources, or "lending" of those resources for some period + // of time. +#define PMIX_ALLOC_REAQUIRE 4 // reacquire resources that were previously "lent" back to the scheduler + +/* define a value boundary beyond which implementers are free + * to define their own directive values */ +#define PMIX_ALLOC_EXTERNAL 128 + + +/* define a set of bit-mask flags for specifying IO + * forwarding channels. These can be OR'd together + * to reference multiple channels */ +typedef uint16_t pmix_iof_channel_t; +#define PMIX_FWD_NO_CHANNELS 0x0000 +#define PMIX_FWD_STDIN_CHANNEL 0x0001 +#define PMIX_FWD_STDOUT_CHANNEL 0x0002 +#define PMIX_FWD_STDERR_CHANNEL 0x0004 +#define PMIX_FWD_STDDIAG_CHANNEL 0x0008 +#define PMIX_FWD_ALL_CHANNELS 0x00ff + +/* define values associated with PMIx_Group_join + * to indicate accept and decline - this is + * done for readability of user code */ +typedef enum { + PMIX_GROUP_DECLINE, + PMIX_GROUP_ACCEPT +} pmix_group_opt_t; + +typedef enum { + PMIX_GROUP_CONSTRUCT, + PMIX_GROUP_DESTRUCT +} pmix_group_operation_t; + + + +/* define some "hooks" external libraries can use to + * intercept memory allocation/release operations */ +static inline void* pmix_malloc(size_t n) +{ + return malloc(n); +} + +static inline void pmix_free(void *m) +{ + free(m); +} + +static inline void* pmix_calloc(size_t n, size_t m) +{ + return calloc(n, m); +} + +/* declare a convenience macro for checking keys */ +#define PMIX_CHECK_KEY(a, b) \ + (0 == strncmp((a)->key, (b), PMIX_MAX_KEYLEN)) + +#define PMIX_LOAD_KEY(a, b) \ + do { \ + memset((a), 0, PMIX_MAX_KEYLEN+1); \ + pmix_strncpy((char*)(a), (const char*)(b), PMIX_MAX_KEYLEN); \ + }while(0) + +/* define a convenience macro for loading nspaces */ +#define PMIX_LOAD_NSPACE(a, b) \ + do { \ + memset((a), 0, PMIX_MAX_NSLEN+1); \ + pmix_strncpy((char*)(a), (b), PMIX_MAX_NSLEN); \ + }while(0) + +/* define a convenience macro for checking nspaces */ +#define PMIX_CHECK_NSPACE(a, b) \ + (0 == strncmp((a), (b), PMIX_MAX_NSLEN)) + +/* define a convenience macro for loading names */ +#define PMIX_LOAD_PROCID(a, b, c) \ + do { \ + PMIX_LOAD_NSPACE((a)->nspace, (b)); \ + (a)->rank = (c); \ + }while(0) + +/* define a convenience macro for checking names */ +#define PMIX_CHECK_PROCID(a, b) \ + (PMIX_CHECK_NSPACE((a)->nspace, (b)->nspace) && ((a)->rank == (b)->rank || (PMIX_RANK_WILDCARD == (a)->rank || PMIX_RANK_WILDCARD == (b)->rank))) + + +/**** PMIX COORD ****/ +/* define coordinate system views */ +typedef uint8_t pmix_coord_view_t; +#define PMIX_COORD_VIEW_UNDEF 0x00 +#define PMIX_COORD_LOGICAL_VIEW 0x01 +#define PMIX_COORD_PHYSICAL_VIEW 0x02 + +/* define a structure for a proc's network coordinate */ +typedef struct pmix_coord { + char *fabric; + char *plane; + pmix_coord_view_t view; + int *coord; + size_t dims; +} pmix_coord_t; + +#define PMIX_COORD_CREATE(m, d, n) \ + do { \ + (m) = (pmix_coord_t*)pmix_calloc((n), sizeof(pmix_coord_t)); \ + if (NULL != (m)) { \ + (m)->fabric = NULL; \ + (m)->plane = NULL; \ + (m)->view = PMIX_COORD_VIEW_UNDEF; \ + (m)->dims = (d); \ + (m)->coord = (int*)pmix_calloc((m)->dims, sizeof(int)); \ + } \ + } while(0) + +#define PMIX_COORD_CONSTRUCT(m) \ + do { \ + (m)->fabric = NULL; \ + (m)->plane = NULL; \ + (m)->view = PMIX_COORD_VIEW_UNDEF; \ + (m)->coord = NULL; \ + (m)->dims = 0; \ + } while(0) + +#define PMIX_COORD_DESTRUCT(m) \ + do { \ + (m)->view = PMIX_COORD_VIEW_UNDEF; \ + if (NULL != (m)->coord) { \ + if (NULL != (m)->fabric) { \ + free((m)->fabric); \ + } \ + if (NULL != (m)->plane) { \ + free((m)->plane); \ + }; \ + pmix_free((m)->coord); \ + (m)->coord = NULL; \ + (m)->dims = 0; \ + } \ + } while(0) + +#define PMIX_COORD_FREE(m, n) \ + do { \ + size_t _nc_; \ + if (NULL != (m)) { \ + for (_nc_ = 0; _nc_ < (n); _nc_++) { \ + PMIX_COORD_DESTRUCT(&(m)[_nc_]); \ + } \ + free((m)); \ + (m) = NULL; \ + } \ + } while(0) + + +/**** PMIX BYTE OBJECT ****/ +typedef struct pmix_byte_object { + char *bytes; + size_t size; +} pmix_byte_object_t; + +#define PMIX_BYTE_OBJECT_CREATE(m, n) \ + do { \ + (m) = (pmix_byte_object_t*)pmix_malloc((n) * sizeof(pmix_byte_object_t)); \ + if (NULL != (m)) { \ + memset((m), 0, (n)*sizeof(pmix_byte_object_t)); \ + } \ + } while(0) + +#define PMIX_BYTE_OBJECT_CONSTRUCT(m) \ + do { \ + (m)->bytes = NULL; \ + (m)->size = 0; \ + } while(0) + +#define PMIX_BYTE_OBJECT_DESTRUCT(m) \ + do { \ + if (NULL != (m)->bytes) { \ + pmix_free((m)->bytes); \ + } \ + } while(0) + +#define PMIX_BYTE_OBJECT_FREE(m, n) \ + do { \ + size_t _bon; \ + if (NULL != (m)) { \ + for (_bon=0; _bon < n; _bon++) { \ + if (NULL != (m)[_bon].bytes) { \ + pmix_free((m)[_bon].bytes); \ + } \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while(0) + +#define PMIX_BYTE_OBJECT_LOAD(b, d, s) \ + do { \ + (b)->bytes = (d); \ + (d) = NULL; \ + (b)->size = (s); \ + (s) = 0; \ + } while(0) + + +/**** PMIX ENVAR STRUCT ****/ +/* Provide a structure for specifying environment variable modifications + * Standard environment variables (e.g., PATH, LD_LIBRARY_PATH, and LD_PRELOAD) + * take multiple arguments separated by delimiters. Unfortunately, the delimiters + * depend upon the variable itself - some use semi-colons, some colons, etc. Thus, + * the operation requires not only the name of the variable to be modified and + * the value to be inserted, but also the separator to be used when composing + * the aggregate value + */ +typedef struct { + char *envar; + char *value; + char separator; +} pmix_envar_t; + +#define PMIX_ENVAR_CREATE(m, n) \ + do { \ + (m) = (pmix_envar_t*)pmix_calloc((n) , sizeof(pmix_envar_t)); \ + } while (0) +#define PMIX_ENVAR_FREE(m, n) \ + do { \ + size_t _ek; \ + if (NULL != (m)) { \ + for (_ek=0; _ek < (n); _ek++) { \ + PMIX_ENVAR_DESTRUCT(&(m)[_ek]); \ + } \ + pmix_free((m)); \ + } \ + } while (0) +#define PMIX_ENVAR_CONSTRUCT(m) \ + do { \ + (m)->envar = NULL; \ + (m)->value = NULL; \ + (m)->separator = '\0'; \ + } while(0) +#define PMIX_ENVAR_DESTRUCT(m) \ + do { \ + if (NULL != (m)->envar) { \ + pmix_free((m)->envar); \ + (m)->envar = NULL; \ + } \ + if (NULL != (m)->value) { \ + pmix_free((m)->value); \ + (m)->value = NULL; \ + } \ + } while(0) +#define PMIX_ENVAR_LOAD(m, e, v, s) \ + do { \ + if (NULL != (e)) { \ + (m)->envar = strdup(e); \ + } \ + if (NULL != (v)) { \ + (m)->value = strdup(v); \ + } \ + (m)->separator = (s); \ + } while(0) + + +/**** PMIX DATA BUFFER ****/ +typedef struct pmix_data_buffer { + /** Start of my memory */ + char *base_ptr; + /** Where the next data will be packed to (within the allocated + memory starting at base_ptr) */ + char *pack_ptr; + /** Where the next data will be unpacked from (within the + allocated memory starting as base_ptr) */ + char *unpack_ptr; + /** Number of bytes allocated (starting at base_ptr) */ + size_t bytes_allocated; + /** Number of bytes used by the buffer (i.e., amount of data -- + including overhead -- packed in the buffer) */ + size_t bytes_used; +} pmix_data_buffer_t; +#define PMIX_DATA_BUFFER_CREATE(m) \ + do { \ + (m) = (pmix_data_buffer_t*)pmix_calloc(1, sizeof(pmix_data_buffer_t)); \ + } while (0) +#define PMIX_DATA_BUFFER_RELEASE(m) \ + do { \ + if (NULL != (m)->base_ptr) { \ + pmix_free((m)->base_ptr); \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } while (0) +#define PMIX_DATA_BUFFER_CONSTRUCT(m) \ + memset((m), 0, sizeof(pmix_data_buffer_t)) +#define PMIX_DATA_BUFFER_DESTRUCT(m) \ + do { \ + if (NULL != (m)->base_ptr) { \ + pmix_free((m)->base_ptr); \ + (m)->base_ptr = NULL; \ + } \ + (m)->pack_ptr = NULL; \ + (m)->unpack_ptr = NULL; \ + (m)->bytes_allocated = 0; \ + (m)->bytes_used = 0; \ + } while (0) +#define PMIX_DATA_BUFFER_LOAD(b, d, s) \ + do { \ + (b)->base_ptr = (char*)(d); \ + (b)->pack_ptr = (b)->base_ptr + (s); \ + (b)->unpack_ptr = (b)->base_ptr; \ + (b)->bytes_allocated = (s); \ + (b)->bytes_used = (s); \ + } while(0) + +#define PMIX_DATA_BUFFER_UNLOAD(b, d, s) \ + do { \ + (d) = (b)->base_ptr; \ + (s) = (b)->bytes_used; \ + (b)->base_ptr = NULL; \ + } while(0) + +/**** PMIX PROC OBJECT ****/ +typedef struct pmix_proc { + pmix_nspace_t nspace; + pmix_rank_t rank; +} pmix_proc_t; +#define PMIX_PROC_CREATE(m, n) \ + do { \ + (m) = (pmix_proc_t*)pmix_calloc((n) , sizeof(pmix_proc_t)); \ + } while (0) + +#define PMIX_PROC_RELEASE(m) \ + do { \ + pmix_free((m)); \ + (m) = NULL; \ + } while (0) + +#define PMIX_PROC_CONSTRUCT(m) \ + do { \ + memset((m), 0, sizeof(pmix_proc_t)); \ + } while (0) + +#define PMIX_PROC_DESTRUCT(m) + +#define PMIX_PROC_FREE(m, n) \ + do { \ + if (NULL != (m)) { \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while (0) + +#define PMIX_PROC_LOAD(m, n, r) \ + do { \ + PMIX_PROC_CONSTRUCT((m)); \ + pmix_strncpy((m)->nspace, (n), PMIX_MAX_NSLEN); \ + (m)->rank = (r); \ + } while(0) + +#define PMIX_MULTICLUSTER_NSPACE_CONSTRUCT(t, c, n) \ + do { \ + size_t _len; \ + memset((t), 0, PMIX_MAX_NSLEN+1); \ + _len = strlen((c)); \ + if ((_len + strlen((n))) < PMIX_MAX_NSLEN) { \ + pmix_strncpy((t), (c), PMIX_MAX_NSLEN); \ + (t)[_len] = ':'; \ + pmix_strncpy(&(t)[_len+1], (n), PMIX_MAX_NSLEN - _len); \ + } \ + } while(0) + +#define PMIX_MULTICLUSTER_NSPACE_PARSE(t, c, n) \ + do { \ + size_t _n, _j; \ + for (_n=0; '\0' != (t)[_n] && ':' != (t)[_n] && \ + _n <= PMIX_MAX_NSLEN; _n++) { \ + (c)[_n] = (t)[_n]; \ + } \ + _n++; \ + for (_j=0; _n <= PMIX_MAX_NSLEN && \ + '\0' != (t)[_n]; _n++, _j++) { \ + (n)[_j] = (t)[_n]; \ + } \ + } while(0) + + +/**** PMIX PROC INFO STRUCT ****/ +typedef struct pmix_proc_info { + pmix_proc_t proc; + char *hostname; + char *executable_name; + pid_t pid; + int exit_code; + pmix_proc_state_t state; +} pmix_proc_info_t; +#define PMIX_PROC_INFO_CREATE(m, n) \ + do { \ + (m) = (pmix_proc_info_t*)pmix_calloc((n) , sizeof(pmix_proc_info_t)); \ + } while (0) + +#define PMIX_PROC_INFO_RELEASE(m) \ + do { \ + PMIX_PROC_INFO_FREE((m), 1); \ + } while (0) + +#define PMIX_PROC_INFO_CONSTRUCT(m) \ + do { \ + memset((m), 0, sizeof(pmix_proc_info_t)); \ + } while (0) + +#define PMIX_PROC_INFO_DESTRUCT(m) \ + do { \ + if (NULL != (m)->hostname) { \ + pmix_free((m)->hostname); \ + (m)->hostname = NULL; \ + } \ + if (NULL != (m)->executable_name) { \ + pmix_free((m)->executable_name); \ + (m)->executable_name = NULL; \ + } \ + } while(0) + +#define PMIX_PROC_INFO_FREE(m, n) \ + do { \ + size_t _k; \ + if (NULL != (m)) { \ + for (_k=0; _k < (n); _k++) { \ + PMIX_PROC_INFO_DESTRUCT(&(m)[_k]); \ + } \ + pmix_free((m)); \ + } \ + } while (0) + + +/**** PMIX DATA ARRAY STRUCT ****/ + +typedef struct pmix_data_array { + pmix_data_type_t type; + size_t size; + void *array; +} pmix_data_array_t; + +/**** THE PMIX_DATA_ARRAY SUPPORT MACROS ARE DEFINED ****/ +/**** DOWN BELOW (NEAR THE BOTTOM OF THE FILE) TO ****/ +/**** AVOID CIRCULAR DEPENDENCIES ****/ + + +/* we cannot forward-declare the pmix_regattr_t struct + * as Cython doesn't know what to do with it. Thus, we + * will utilize the void* entry of the pmix_value_t to + * hold the point to pmix_regattr_t */ + +/**** PMIX VALUE STRUCT ****/ + +/* NOTE: operations can supply a collection of values under + * a single key by passing a pmix_value_t containing a + * data array of type PMIX_INFO, with each array element + * containing its own pmix_info_t object */ + +typedef struct pmix_value { + pmix_data_type_t type; + union { + bool flag; + uint8_t byte; + char *string; + size_t size; + pid_t pid; + int integer; + int8_t int8; + int16_t int16; + int32_t int32; + int64_t int64; + unsigned int uint; + uint8_t uint8; + uint16_t uint16; + uint32_t uint32; + uint64_t uint64; + float fval; + double dval; + struct timeval tv; + time_t time; + pmix_status_t status; + pmix_rank_t rank; + pmix_proc_t *proc; + pmix_byte_object_t bo; + pmix_persistence_t persist; + pmix_scope_t scope; + pmix_data_range_t range; + pmix_proc_state_t state; + pmix_proc_info_t *pinfo; + pmix_data_array_t *darray; + void *ptr; + pmix_alloc_directive_t adir; + pmix_envar_t envar; + pmix_coord_t *coord; + } data; +} pmix_value_t; +/* allocate and initialize a specified number of value structs */ +#define PMIX_VALUE_CREATE(m, n) \ + do { \ + int _ii; \ + pmix_value_t *_v; \ + (m) = (pmix_value_t*)pmix_calloc((n), sizeof(pmix_value_t)); \ + _v = (pmix_value_t*)(m); \ + if (NULL != (m)) { \ + for (_ii=0; _ii < (int)(n); _ii++) { \ + _v[_ii].type = PMIX_UNDEF; \ + } \ + } \ + } while (0) + +/* release a single pmix_value_t struct, including its data */ +#define PMIX_VALUE_RELEASE(m) \ + do { \ + PMIX_VALUE_DESTRUCT((m)); \ + pmix_free((m)); \ + (m) = NULL; \ + } while (0) + +/* initialize a single value struct */ +#define PMIX_VALUE_CONSTRUCT(m) \ + do { \ + memset((m), 0, sizeof(pmix_value_t)); \ + (m)->type = PMIX_UNDEF; \ + } while (0) + +/* release the memory in the value struct data field */ +#define PMIX_VALUE_DESTRUCT(m) pmix_value_destruct(m) + +#define PMIX_VALUE_FREE(m, n) \ + do { \ + size_t _vv; \ + if (NULL != (m)) { \ + for (_vv=0; _vv < (n); _vv++) { \ + PMIX_VALUE_DESTRUCT(&((m)[_vv])); \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while (0) + +#define PMIX_VALUE_LOAD(v, d, t) \ + pmix_value_load((v), (d), (t)) + +#define PMIX_VALUE_UNLOAD(r, k, d, s) \ + (r) = pmix_value_unload((k), (d), (s)) + +#define PMIX_VALUE_XFER(r, v, s) \ + do { \ + if (NULL == (v)) { \ + (v) = (pmix_value_t*)pmix_malloc(sizeof(pmix_value_t)); \ + if (NULL == (v)) { \ + (r) = PMIX_ERR_NOMEM; \ + } else { \ + (r) = pmix_value_xfer((v), (s)); \ + } \ + } else { \ + (r) = pmix_value_xfer((v), (s)); \ + } \ + } while(0) + +#define PMIX_VALUE_GET_NUMBER(s, m, n, t) \ + do { \ + (s) = PMIX_SUCCESS; \ + if (PMIX_SIZE == (m)->type) { \ + (n) = (t)((m)->data.size); \ + } else if (PMIX_INT == (m)->type) { \ + (n) = (t)((m)->data.integer); \ + } else if (PMIX_INT8 == (m)->type) { \ + (n) = (t)((m)->data.int8); \ + } else if (PMIX_INT16 == (m)->type) { \ + (n) = (t)((m)->data.int16); \ + } else if (PMIX_INT32 == (m)->type) { \ + (n) = (t)((m)->data.int32); \ + } else if (PMIX_INT64 == (m)->type) { \ + (n) = (t)((m)->data.int64); \ + } else if (PMIX_UINT == (m)->type) { \ + (n) = (t)((m)->data.uint); \ + } else if (PMIX_UINT8 == (m)->type) { \ + (n) = (t)((m)->data.uint8); \ + } else if (PMIX_UINT16 == (m)->type) { \ + (n) = (t)((m)->data.uint16); \ + } else if (PMIX_UINT32 == (m)->type) { \ + (n) = (t)((m)->data.uint32); \ + } else if (PMIX_UINT64 == (m)->type) { \ + (n) = (t)((m)->data.uint64); \ + } else if (PMIX_FLOAT == (m)->type) { \ + (n) = (t)((m)->data.fval); \ + } else if (PMIX_DOUBLE == (m)->type) { \ + (n) = (t)((m)->data.dval); \ + } else if (PMIX_PID == (m)->type) { \ + (n) = (t)((m)->data.pid); \ + } else { \ + (s) = PMIX_ERR_BAD_PARAM; \ + } \ + } while(0) + +/**** PMIX INFO STRUCT ****/ +typedef struct pmix_info { + pmix_key_t key; + pmix_info_directives_t flags; // bit-mask of flags + pmix_value_t value; +} pmix_info_t; + +/* utility macros for working with pmix_info_t structs */ +#define PMIX_INFO_CREATE(m, n) \ + do { \ + pmix_info_t *_i; \ + (m) = (pmix_info_t*)pmix_calloc((n), sizeof(pmix_info_t)); \ + if (NULL != (m)) { \ + _i = (pmix_info_t*)(m); \ + _i[(n)-1].flags = PMIX_INFO_ARRAY_END; \ + } \ + } while (0) + +#define PMIX_INFO_CONSTRUCT(m) \ + do { \ + memset((m), 0, sizeof(pmix_info_t)); \ + (m)->value.type = PMIX_UNDEF; \ + } while (0) + +#define PMIX_INFO_DESTRUCT(m) \ + do { \ + PMIX_VALUE_DESTRUCT(&(m)->value); \ + } while (0) + +#define PMIX_INFO_FREE(m, n) \ + do { \ + size_t _is; \ + if (NULL != (m)) { \ + for (_is=0; _is < (n); _is++) { \ + PMIX_INFO_DESTRUCT(&((m)[_is])); \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while (0) + +#define PMIX_INFO_LOAD(m, k, v, t) \ + do { \ + if (NULL != (k)) { \ + pmix_strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ + } \ + (m)->flags = 0; \ + pmix_value_load(&((m)->value), (v), (t)); \ + } while (0) +#define PMIX_INFO_XFER(d, s) \ + do { \ + if (NULL != (s)->key) { \ + pmix_strncpy((d)->key, (s)->key, PMIX_MAX_KEYLEN); \ + } \ + (d)->flags = (s)->flags; \ + pmix_value_xfer(&(d)->value, (pmix_value_t*)&(s)->value); \ + } while(0) + + +/* macros for setting and unsetting the "reqd" flag + * in a pmix_info_t */ +#define PMIX_INFO_REQUIRED(m) \ + (m)->flags |= PMIX_INFO_REQD +#define PMIX_INFO_OPTIONAL(m) \ + (m)->flags &= ~PMIX_INFO_REQD + +/* macros for testing the "reqd" flag in a pmix_info_t */ +#define PMIX_INFO_IS_REQUIRED(m) \ + (m)->flags & PMIX_INFO_REQD +#define PMIX_INFO_IS_OPTIONAL(m) \ + !((m)->flags & PMIX_INFO_REQD) + +/* macro for testing end of the array */ +#define PMIX_INFO_IS_END(m) \ + (m)->flags & PMIX_INFO_ARRAY_END + +/* define a special macro for checking if a boolean + * info is true - when info structs are provided, a + * type of PMIX_UNDEF is taken to imply a boolean "true" + * as the presence of the key defaults to indicating + * "true" */ +#define PMIX_INFO_TRUE(m) \ + (PMIX_UNDEF == (m)->value.type || (PMIX_BOOL == (m)->value.type && (m)->value.data.flag)) ? true : false + + +/**** PMIX LOOKUP RETURN STRUCT ****/ +typedef struct pmix_pdata { + pmix_proc_t proc; + pmix_key_t key; + pmix_value_t value; +} pmix_pdata_t; + +/* utility macros for working with pmix_pdata_t structs */ +#define PMIX_PDATA_CREATE(m, n) \ + do { \ + (m) = (pmix_pdata_t*)pmix_calloc((n), sizeof(pmix_pdata_t)); \ + } while (0) + +#define PMIX_PDATA_RELEASE(m) \ + do { \ + PMIX_VALUE_DESTRUCT(&(m)->value); \ + pmix_free((m)); \ + (m) = NULL; \ + } while (0) + +#define PMIX_PDATA_CONSTRUCT(m) \ + do { \ + memset((m), 0, sizeof(pmix_pdata_t)); \ + (m)->value.type = PMIX_UNDEF; \ + } while (0) + +#define PMIX_PDATA_DESTRUCT(m) \ + do { \ + PMIX_VALUE_DESTRUCT(&(m)->value); \ + } while (0) + +#define PMIX_PDATA_FREE(m, n) \ + do { \ + size_t _ps; \ + pmix_pdata_t *_pdf = (pmix_pdata_t*)(m); \ + if (NULL != _pdf) { \ + for (_ps=0; _ps < (n); _ps++) { \ + PMIX_PDATA_DESTRUCT(&(_pdf[_ps])); \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while (0) + +#define PMIX_PDATA_LOAD(m, p, k, v, t) \ + do { \ + if (NULL != (m)) { \ + memset((m), 0, sizeof(pmix_pdata_t)); \ + pmix_strncpy((m)->proc.nspace, (p)->nspace, PMIX_MAX_NSLEN); \ + (m)->proc.rank = (p)->rank; \ + pmix_strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ + pmix_value_load(&((m)->value), (v), (t)); \ + } \ + } while (0) + +#define PMIX_PDATA_XFER(d, s) \ + do { \ + if (NULL != (d)) { \ + memset((d), 0, sizeof(pmix_pdata_t)); \ + pmix_strncpy((d)->proc.nspace, (s)->proc.nspace, PMIX_MAX_NSLEN); \ + (d)->proc.rank = (s)->proc.rank; \ + pmix_strncpy((d)->key, (s)->key, PMIX_MAX_KEYLEN); \ + pmix_value_xfer(&((d)->value), &((s)->value)); \ + } \ + } while (0) + + +/**** PMIX APP STRUCT ****/ +typedef struct pmix_app { + char *cmd; + char **argv; + char **env; + char *cwd; + int maxprocs; + pmix_info_t *info; + size_t ninfo; +} pmix_app_t; +/* utility macros for working with pmix_app_t structs */ +#define PMIX_APP_CREATE(m, n) \ + do { \ + (m) = (pmix_app_t*)pmix_calloc((n), sizeof(pmix_app_t)); \ + } while (0) + +#define PMIX_APP_INFO_CREATE(m, n) \ + do { \ + (m)->ninfo = (n); \ + PMIX_INFO_CREATE((m)->info, (m)->ninfo); \ + } while(0) + +#define PMIX_APP_RELEASE(m) \ + do { \ + PMIX_APP_DESTRUCT((m)); \ + pmix_free((m)); \ + (m) = NULL; \ + } while (0) + +#define PMIX_APP_CONSTRUCT(m) \ + do { \ + memset((m), 0, sizeof(pmix_app_t)); \ + } while (0) + +#define PMIX_APP_DESTRUCT(m) \ + do { \ + size_t _aii; \ + if (NULL != (m)->cmd) { \ + pmix_free((m)->cmd); \ + (m)->cmd = NULL; \ + } \ + if (NULL != (m)->argv) { \ + for (_aii=0; NULL != (m)->argv[_aii]; _aii++) { \ + pmix_free((m)->argv[_aii]); \ + } \ + pmix_free((m)->argv); \ + (m)->argv = NULL; \ + } \ + if (NULL != (m)->env) { \ + for (_aii=0; NULL != (m)->env[_aii]; _aii++) { \ + pmix_free((m)->env[_aii]); \ + } \ + pmix_free((m)->env); \ + (m)->env = NULL; \ + } \ + if (NULL != (m)->cwd) { \ + pmix_free((m)->cwd); \ + (m)->cwd = NULL; \ + } \ + if (NULL != (m)->info) { \ + PMIX_INFO_FREE((m)->info, (m)->ninfo); \ + (m)->info = NULL; \ + (m)->ninfo = 0; \ + } \ + } while (0) + +#define PMIX_APP_FREE(m, n) \ + do { \ + size_t _as; \ + if (NULL != (m)) { \ + for (_as=0; _as < (n); _as++) { \ + PMIX_APP_DESTRUCT(&((m)[_as])); \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while (0) + + +/**** PMIX QUERY STRUCT ****/ +typedef struct pmix_query { + char **keys; + pmix_info_t *qualifiers; + size_t nqual; +} pmix_query_t; +/* utility macros for working with pmix_query_t structs */ +#define PMIX_QUERY_CREATE(m, n) \ + do { \ + (m) = (pmix_query_t*)pmix_calloc((n) , sizeof(pmix_query_t)); \ + } while (0) + +#define PMIX_QUERY_QUALIFIERS_CREATE(m, n) \ + do { \ + (m)->nqual = (n); \ + PMIX_INFO_CREATE((m)->qualifiers, (m)->nqual); \ + } while(0) + +#define PMIX_QUERY_RELEASE(m) \ + do { \ + PMIX_QUERY_DESTRUCT((m)); \ + pmix_free((m)); \ + (m) = NULL; \ + } while (0) + +#define PMIX_QUERY_CONSTRUCT(m) \ + do { \ + memset((m), 0, sizeof(pmix_query_t)); \ + } while (0) + +#define PMIX_QUERY_DESTRUCT(m) \ + do { \ + size_t _qi; \ + if (NULL != (m)->keys) { \ + for (_qi=0; NULL != (m)->keys[_qi]; _qi++) { \ + pmix_free((m)->keys[_qi]); \ + } \ + pmix_free((m)->keys); \ + (m)->keys = NULL; \ + } \ + if (NULL != (m)->qualifiers) { \ + PMIX_INFO_FREE((m)->qualifiers, (m)->nqual); \ + (m)->qualifiers = NULL; \ + (m)->nqual = 0; \ + } \ + } while (0) + +#define PMIX_QUERY_FREE(m, n) \ + do { \ + size_t _qs; \ + if (NULL != (m)) { \ + for (_qs=0; _qs < (n); _qs++) { \ + PMIX_QUERY_DESTRUCT(&((m)[_qs])); \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while (0) + +/**** ATTRIBUTE REGISTRATION STRUCT ****/ +typedef struct pmix_regattr_t { + char *name; + pmix_key_t string; + pmix_data_type_t type; + pmix_info_t *info; + size_t ninfo; + char **description; +} pmix_regattr_t; + +#define PMIX_REGATTR_CONSTRUCT(a) \ + do { \ + if (NULL != (a)) { \ + (a)->name = NULL; \ + memset((a)->string, 0, PMIX_MAX_KEYLEN+1); \ + (a)->type = PMIX_UNDEF; \ + (a)->info = NULL; \ + (a)->ninfo = 0; \ + (a)->description = NULL; \ + } \ + } while(0) + +#define PMIX_REGATTR_LOAD(a, n, k, t, ni, v) \ + do { \ + pmix_status_t _rgl; \ + if (NULL != (n)) { \ + (a)->name = strdup((n)); \ + } \ + if (NULL != (k)) { \ + PMIX_LOAD_KEY((a)->string, (k)); \ + } \ + (a)->type = (t); \ + if (0 < (ni)) { \ + PMIX_INFO_CREATE((a)->info, (ni)); \ + (a)->ninfo = (ni); \ + } \ + if (NULL != (v)) { \ + PMIX_ARGV_APPEND(_rgl, &(a)->description, (v)); \ + } \ + } while(0) + +#define PMIX_REGATTR_DESTRUCT(a) \ + do { \ + if (NULL != (a)) { \ + if (NULL != (a)->name) { \ + pmix_free((a)->name); \ + } \ + if (NULL != (a)->description) { \ + PMIX_ARGV_FREE((a)->description); \ + } \ + } \ + } while(0) + +#define PMIX_REGATTR_CREATE(m, n) \ + do { \ + (m) = (pmix_regattr_t*)pmix_calloc((n) , sizeof(pmix_regattr_t)); \ + } while (0) + +#define PMIX_REGATTR_FREE(m, n) \ + do { \ + size_t _ra; \ + if (NULL != (m)) { \ + for (_ra=0; _ra < (n); _ra++) { \ + PMIX_REGATTR_DESTRUCT(&((m)[_ra])); \ + } \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while (0) + +#define PMIX_REGATTR_XFER(a, b) \ + do { \ + size_t _n; \ + PMIX_REGATTR_CONSTRUCT((a)); \ + if (NULL != ((b)->name)) { \ + (a)->name = strdup((b)->name); \ + } \ + PMIX_LOAD_KEY((a)->string, (b)->string); \ + (a)->type = (b)->type; \ + if (0 < (b)->ninfo) { \ + (a)->ninfo = (b)->ninfo; \ + PMIX_INFO_CREATE((a)->info, (a)->ninfo); \ + for (_n=0; _n < (a)->ninfo; _n++) { \ + PMIX_INFO_XFER(&(a)->info[_n], &(b)->info[_n]); \ + } \ + } \ + if (NULL != (b)->description) { \ + PMIX_ARGV_COPY((a)->description, (b)->description); \ + } \ + } while(0) + + +/**** GENERIC HELPER MACROS ****/ + +/* Append a string (by value) to an new or existing NULL-terminated + * argv array. + * + * @param argv Pointer to an argv array. + * @param str Pointer to the string to append. + * + * @retval PMIX_SUCCESS On success + * @retval PMIX_ERROR On failure + * + * This function adds a string to an argv array of strings by value; + * it is permissable to pass a string on the stack as the str + * argument to this function. + * + * To add the first entry to an argv array, call this function with + * (*argv == NULL). This function will allocate an array of length + * 2; the first entry will point to a copy of the string passed in + * arg, the second entry will be set to NULL. + * + * If (*argv != NULL), it will be realloc'ed to be 1 (char*) larger, + * and the next-to-last entry will point to a copy of the string + * passed in arg. The last entry will be set to NULL. + * + * Just to reinforce what was stated above: the string is copied by + * value into the argv array; there is no need to keep the original + * string (i.e., the arg parameter) after invoking this function. + */ +#define PMIX_ARGV_APPEND(r, a, b) \ + (r) = pmix_argv_append_nosize(&(a), (b)) + +/* Prepend a string to a new or existing NULL-terminated + * argv array - same as above only prepend + */ +#define PMIX_ARGV_PREPEND(r, a, b) \ + (r) = pmix_argv_prepend_nosize(a, b) + +/* Append to an argv-style array, but only if the provided argument + * doesn't already exist somewhere in the array. Ignore the size of the array. + * + * @param argv Pointer to an argv array. + * @param str Pointer to the string to append. + * + * @retval PMIX_SUCCESS On success + * @retval PMIX_ERROR On failure + * + * This function is identical to the pmix_argv_append_nosize() function + * except that it only appends the provided argument if it does not already + * exist in the provided array. + */ +#define PMIX_ARGV_APPEND_UNIQUE(r, a, b) \ + (r) = pmix_argv_append_unique_nosize(a, b) + +/* Free a NULL-terminated argv array. + * + * @param argv Argv array to free. + * + * This function frees an argv array and all of the strings that it + * contains. Since the argv parameter is passed by value, it is not + * set to NULL in the caller's scope upon return. + * + * It is safe to invoke this function with a NULL pointer. It is + * not safe to invoke this function with a non-NULL-terminated argv + * array. + */ +#define PMIX_ARGV_FREE(a) pmix_argv_free(a) + +/* + * Split a string into a NULL-terminated argv array. Do not include empty + * strings in result array. + * + * @param src_string Input string. + * @param delimiter Delimiter character. + * + * @retval argv pointer to new argv array on success + * @retval NULL on error + * + * All strings are insertted into the argv array by value; the + * newly-allocated array makes no references to the src_string + * argument (i.e., it can be freed after calling this function + * without invalidating the output argv). + */ +#define PMIX_ARGV_SPLIT(a, b, c) \ + (a) = pmix_argv_split(b, c) + +/* + * Return the length of a NULL-terminated argv array. + * + * @param argv The input argv array. + * + * @retval 0 If NULL is passed as argv. + * @retval count Number of entries in the argv array. + * + * The argv array must be NULL-terminated. + */ +#define PMIX_ARGV_COUNT(r, a) \ + (r) = pmix_argv_count(a) + +/* + * Join all the elements of an argv array into a single + * newly-allocated string. + * + * @param argv The input argv array. + * @param delimiter Delimiter character placed between each argv string. + * + * @retval new_string Output string on success. + * @retval NULL On failure. + * + * Similar to the Perl join function, this function takes an input + * argv and joins them into into a single string separated by the + * delimiter character. + * + * It is the callers responsibility to free the returned string. + */ +#define PMIX_ARGV_JOIN(a, b, c) \ + (a) = pmix_argv_join(b, c) + +/* + * Copy a NULL-terminated argv array. + * + * @param argv The input argv array. + * + * @retval argv Copied argv array on success. + * @retval NULL On failure. + * + * Copy an argv array, including copying all off its strings. + * Specifically, the output argv will be an array of the same length + * as the input argv, and strcmp(argv_in[i], argv_out[i]) will be 0. + */ +#define PMIX_ARGV_COPY(a, b) \ + (a) = pmix_argv_copy(b) + +/* + * Set an environmenal paramter in an env array + * + * @retval r Return pmix_status_t status + * + * @param a Name of the environmental param + * + * @param b String value of the environmental param + * + * @param c Address of the NULL-terminated env array + */ +#define PMIX_SETENV(r, a, b, c) \ + (r) = pmix_setenv((a), (b), true, (c)) + + +/**** CALLBACK FUNCTIONS FOR NON-BLOCKING OPERATIONS ****/ + +typedef void (*pmix_release_cbfunc_t)(void *cbdata); + +/* define a callback function that is solely used by servers, and + * not clients, to return modex data in response to "fence" and "get" + * operations. The returned blob contains the data collected from each + * server participating in the operation. + * + * As the data is "owned" by the host server, provide a secondary + * callback function to notify the host server that we are done + * with the data so it can be released */ +typedef void (*pmix_modex_cbfunc_t)(pmix_status_t status, + const char *data, size_t ndata, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata); + +/* define a callback function for calls to PMIx_Spawn_nb - the function + * will be called upon completion of the spawn command. The status + * will indicate whether or not the spawn succeeded. The nspace + * of the spawned processes will be returned, along with any provided + * callback data. Note that the returned nspace value will be + * released by the library upon return from the callback function, so + * the receiver must copy it if it needs to be retained */ +typedef void (*pmix_spawn_cbfunc_t)(pmix_status_t status, + pmix_nspace_t nspace, void *cbdata); + +/* define a callback for common operations that simply return + * a status. Examples include the non-blocking versions of + * Fence, Connect, and Disconnect */ +typedef void (*pmix_op_cbfunc_t)(pmix_status_t status, void *cbdata); + +/* define a callback function for calls to PMIx_Lookup_nb - the + * function will be called upon completion of the command with the + * status indicating the success of failure of the request. Any + * retrieved data will be returned in an array of pmix_pdata_t structs. + * The nspace/rank of the process that provided each data element is + * also returned. + * + * Note that these structures will be released upon return from + * the callback function, so the receiver must copy/protect the + * data prior to returning if it needs to be retained */ + +typedef void (*pmix_lookup_cbfunc_t)(pmix_status_t status, + pmix_pdata_t data[], size_t ndata, + void *cbdata); + +/* define a callback by which an event handler can notify the PMIx library + * that it has completed its response to the notification. The handler + * is _required_ to execute this callback so the library can determine + * if additional handlers need to be called. The handler shall return + * PMIX_SUCCESS if no further action is required. The return status + * of each event handler and any returned pmix_info_t structures + * will be added to the array of pmix_info_t passed to any subsequent + * event handlers to help guide their operation. + * + * If non-NULL, the provided callback function will be called to allow + * the event handler to release the provided info array. + */ +typedef void (*pmix_event_notification_cbfunc_fn_t)(pmix_status_t status, + pmix_info_t *results, size_t nresults, + pmix_op_cbfunc_t cbfunc, void *thiscbdata, + void *notification_cbdata); + +/* define a callback function for the event handler. Upon receipt of an + * event notification, PMIx will execute the specified notification + * callback function, providing: + * + * evhdlr_registration_id - the returned registration number of + * the event handler being called + * status - the event that occurred + * source - the nspace and rank of the process that generated + * the event. If the source is the resource manager, + * then the nspace will be empty and the rank will + * be PMIX_RANK_UNDEF + * info - any additional info provided regarding the event. + * ninfo - the number of info objects in the info array + * results - any provided results from event handlers called + * prior to this one. + * nresults - number of info objects in the results array + * cbfunc - the function to be called upon completion of the handler + * cbdata - pointer to be returned in the completion cbfunc + * + * Note that different resource managers may provide differing levels + * of support for event notification to application processes. Thus, the + * info array may be NULL or may contain detailed information of the event. + * It is the responsibility of the application to parse any provided info array + * for defined key-values if it so desires. + * + * Possible uses of the pmix_info_t object include: + * + * - for the RM to alert the process as to planned actions, such as + * to abort the session, in response to the reported event + * + * - provide a timeout for alternative action to occur, such as for + * the application to request an alternate response to the event + * + * For example, the RM might alert the application to the failure of + * a node that resulted in termination of several processes, and indicate + * that the overall session will be aborted unless the application + * requests an alternative behavior in the next 5 seconds. The application + * then has time to respond with a checkpoint request, or a request to + * recover from the failure by obtaining replacement nodes and restarting + * from some earlier checkpoint. + * + * Support for these options is left to the discretion of the host RM. Info + * keys are included in the common definions above, but also may be augmented + * on a per-RM basis. + * + * On the server side, the notification function is used to inform the host + * server of a detected event in the PMIx subsystem and/or client + */ +typedef void (*pmix_notification_fn_t)(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t *results, size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata); + +/* define a callback function for calls to register handlers, e.g., event + * notification and IOF requests + * + * status - PMIX_SUCCESS or an appropriate error constant + * + * refid - reference identifier assigned to the handler by PMIx, + * used to deregister the handler + * + * cbdata - object provided to the registration call + */ +typedef void (*pmix_hdlr_reg_cbfunc_t)(pmix_status_t status, + size_t refid, + void *cbdata); + +/* define a callback function for calls to PMIx_Get_nb. The status + * indicates if the requested data was found or not - a pointer to the + * pmix_value_t structure containing the found data is returned. The + * pointer will be NULL if the requested data was not found. */ +typedef void (*pmix_value_cbfunc_t)(pmix_status_t status, + pmix_value_t *kv, void *cbdata); + +/* define a callback function for calls to PMIx_Query. The status + * indicates if requested data was found or not - an array of + * pmix_info_t will contain the key/value pairs. */ +typedef void (*pmix_info_cbfunc_t)(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata); + +/* Define a callback function to return a requested security credential. + * Returned values include: + * + * status - PMIX_SUCCESS if a credential could be assigned as requested, or + * else an appropriate error code indicating the problem + * + * credential - pointer to an allocated pmix_byte_object_t containing the + * credential (as a opaque blob) and its size. Ownership of + * the credential is transferred to the receiving function - thus, + * responsibility for releasing the memory lies outside the + * PMIx library. + * + * info - an array of pmix_info_t structures provided by the system to pass + * any additional information about the credential - e.g., the identity + * of the issuing agent. The info array is owned by the PMIx library + * and is not to be released or altered by the receiving party. Note that + * this array is not related to the pmix_info_t structures possibly + * provided in the call to PMIx_Get_credential. + * + * Information provided by the issuing agent can subsequently be used + * by the application for a variety of purposes. Examples include: + * - checking identified authorizations to determine what + * requests/operations are feasible as a means to steering + * workflows + * - compare the credential type to that of the local SMS for + * compatibility + * + * ninfo - number of elements in the info array + * + * cbdata - the caller's provided void* object + * + * NOTE: the credential is opaque and therefore understandable only by + * a service compatible with the issuer. + */ +typedef void (*pmix_credential_cbfunc_t)(pmix_status_t status, + pmix_byte_object_t *credential, + pmix_info_t info[], size_t ninfo, + void *cbdata); + + +/* Define a validation callback function to indicate if a provided + * credential is valid, and any corresponding information regarding + * authorizations and other security matters + * Returned values include: + * + * status - PMIX_SUCCESS if the provided credential is valid. An appropriate + * error code indicating the issue if the credential is rejected. + * + * info - an array of pmix_info_t structures provided by the system to pass + * any additional information about the authentication - e.g., the + * effective userid and group id of the certificate holder, and any + * related authorizations. The info array is owned by the PMIx library + * and is not to be released or altered by the receiving party. Note that + * this array is not related to the pmix_info_t structures possibly + * provided in the call to PMIx_Validate_credential. + * + * The precise contents of the array will depend on the host SMS and + * its associated security system. At the minimum, it is expected (but + * not required) that the array will contain entries for the PMIX_USERID + * and PMIX_GROUPID of the client described in the credential. + * + * ninfo - number of elements in the info array + * + * cbdata - the caller's provided void* object + */ +typedef void (*pmix_validation_cbfunc_t)(pmix_status_t status, + pmix_info_t info[], size_t ninfo, + void *cbdata); + +/****************************************/ +/**** COMMON SUPPORT FUNCTIONS ****/ +/****************************************/ + +/****** EVENT NOTIFICATION SUPPORT ******/ +/* Register an event handler to report events. Three types of events + * can be reported: + * + * (a) those that occur within the client library, but are not + * reportable via the API itself (e.g., loss of connection to + * the server). These events typically occur during behind-the-scenes + * non-blocking operations. + * + * (b) job-related events such as the failure of another process in + * the job or in any connected job, impending failure of hardware + * within the job's usage footprint, etc. + * + * (c) system notifications that are made available by the local + * administrators + * + * By default, only events that directly affect the process and/or + * any process to which it is connected (via the PMIx_Connect call) + * will be reported. Options to modify that behavior can be provided + * in the info array + * + * Both the client application and the resource manager can register + * err handlers for specific events. PMIx client/server calls the registered + * err handler upon receiving event notify notification (via PMIx_Notify_event) + * from the other end (Resource Manager/Client application). + * + * Multiple err handlers can be registered for different events. PMIX returns + * an integer reference to each register handler in the callback fn. The caller + * must retain the reference in order to deregister the evhdlr. + * Modification of the notification behavior can be accomplished by + * deregistering the current evhdlr, and then registering it + * using a new set of info values. + * + * If cbfunc is NULL, then this is treated as a BLOCKING call - a positive + * return value represents the reference ID for the request, while + * negative values indicate the corresponding error + * + * See pmix_common.h for a description of the notification function */ +PMIX_EXPORT pmix_status_t PMIx_Register_event_handler(pmix_status_t codes[], size_t ncodes, + pmix_info_t info[], size_t ninfo, + pmix_notification_fn_t evhdlr, + pmix_hdlr_reg_cbfunc_t cbfunc, + void *cbdata); + +/* Deregister an event handler + * evhdlr_ref is the reference returned by PMIx from the call to + * PMIx_Register_event_handler. If non-NULL, the provided cbfunc + * will be called to confirm removal of the designated handler */ +PMIX_EXPORT pmix_status_t PMIx_Deregister_event_handler(size_t evhdlr_ref, + pmix_op_cbfunc_t cbfunc, + void *cbdata); + +/* Report an event for notification via any + * registered evhdlr. + * + * This function allows the host server to direct the server + * convenience library to notify all registered local procs of + * an event. The event can be local, or anywhere in the cluster. + * The status indicates the event being reported. + * + * The client application can also call this function to notify the + * resource manager and/or other processes of an event it encountered. + * It can also be used to asynchronously notify other parts of its + * own internal process - e.g., for one library to notify another + * when initialized inside the process. + * + * status - status code indicating the event being reported + * + * source - the process that generated the event + * + * range - the range in which the event is to be reported. For example, + * a value of PMIX_RANGE_LOCAL would instruct the system + * to only notify procs on the same local node as the + * event generator. + * + * info - an array of pmix_info_t structures provided by the event + * generator to pass any additional information about the + * event. This can include an array of pmix_proc_t structs + * describing the processes impacted by the event, the nature + * of the event and its severity, etc. The precise contents + * of the array will depend on the event generator. + * + * ninfo - number of elements in the info array + * + * cbfunc - callback function to be called upon completion of the + * notify_event function's actions. Note that any messages + * will have been queued, but may not have been transmitted + * by this time. Note that the caller is required to maintain + * the input data until the callback function has been executed! + * If cbfunc is NULL, then this is treated as a BLOCKING call and + * the result of the operation is provided in the returned + * status + * + * cbdata - the caller's provided void* object + */ +PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status, + const pmix_proc_t *source, + pmix_data_range_t range, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + + +/****** PRETTY-PRINT DEFINED VALUE TYPES ******/ +/* Provide a string representation for several types of value. Note + * that the provided string is statically defined and must NOT be + * free'd. Supported value types: + * + * - pmix_status_t (PMIX_STATUS) + * - pmix_scope_t (PMIX_SCOPE) + * - pmix_persistence_t (PMIX_PERSIST) + * - pmix_data_range_t (PMIX_DATA_RANGE) + * - pmix_info_directives_t (PMIX_INFO_DIRECTIVES) + * - pmix_data_type_t (PMIX_DATA_TYPE) + * - pmix_alloc_directive_t (PMIX_ALLOC_DIRECTIVE) + * - pmix_iof_channel_t (PMIX_IOF_CHANNEL) + */ +PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t status); +PMIX_EXPORT const char* PMIx_Proc_state_string(pmix_proc_state_t state); +PMIX_EXPORT const char* PMIx_Scope_string(pmix_scope_t scope); +PMIX_EXPORT const char* PMIx_Persistence_string(pmix_persistence_t persist); +PMIX_EXPORT const char* PMIx_Data_range_string(pmix_data_range_t range); +PMIX_EXPORT const char* PMIx_Info_directives_string(pmix_info_directives_t directives); +PMIX_EXPORT const char* PMIx_Data_type_string(pmix_data_type_t type); +PMIX_EXPORT const char* PMIx_Alloc_directive_string(pmix_alloc_directive_t directive); +PMIX_EXPORT const char* PMIx_IOF_channel_string(pmix_iof_channel_t channel); + +/* Get the PMIx version string. Note that the provided string is + * statically defined and must NOT be free'd */ +PMIX_EXPORT const char* PMIx_Get_version(void); + +/* Store some data locally for retrieval by other areas of the + * proc. This is data that has only internal scope - it will + * never be "pushed" externally */ +PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, + const pmix_key_t key, pmix_value_t *val); + + +/****** DATA BUFFER PACK/UNPACK SUPPORT ******/ +/** + * Top-level interface function to pack one or more values into a + * buffer. + * + * The pack function packs one or more values of a specified type into + * the specified buffer. The buffer must have already been + * initialized via the PMIX_DATA_BUFFER_CREATE or PMIX_DATA_BUFFER_CONSTRUCT + * call - otherwise, the pack_value function will return an error. + * Providing an unsupported type flag will likewise be reported as an error. + * + * Note that any data to be packed that is not hard type cast (i.e., + * not type cast to a specific size) may lose precision when unpacked + * by a non-homogeneous recipient. The PACK function will do its best to deal + * with heterogeneity issues between the packer and unpacker in such + * cases. Sending a number larger than can be handled by the recipient + * will return an error code (generated upon unpacking) - + * the error cannot be detected during packing. + * + * The identity of the intended recipient of the packed buffer (i.e., the + * process that will be unpacking it) is used solely to resolve any data type + * differences between PMIx versions. The recipient must, therefore, be + * known to the user prior to calling the pack function so that the + * PMIx library is aware of the version the recipient is using. + * + * @param *target Pointer to a pmix_proc_t structure containing the + * nspace/rank of the process that will be unpacking the final buffer. + * A NULL value may be used to indicate that the target is based on + * the same PMIx version as the caller. + * + * @param *buffer A pointer to the buffer into which the value is to + * be packed. + * + * @param *src A void* pointer to the data that is to be packed. Note + * that strings are to be passed as (char **) - i.e., the caller must + * pass the address of the pointer to the string as the void*. This + * allows PMIx to use a single pack function, but still allow + * the caller to pass multiple strings in a single call. + * + * @param num_values An int32_t indicating the number of values that are + * to be packed, beginning at the location pointed to by src. A string + * value is counted as a single value regardless of length. The values + * must be contiguous in memory. Arrays of pointers (e.g., string + * arrays) should be contiguous, although (obviously) the data pointed + * to need not be contiguous across array entries. + * + * @param type The type of the data to be packed - must be one of the + * PMIX defined data types. + * + * @retval PMIX_SUCCESS The data was packed as requested. + * + * @retval PMIX_ERROR(s) An appropriate PMIX error code indicating the + * problem encountered. This error code should be handled + * appropriately. + * + * @code + * pmix_data_buffer_t *buffer; + * int32_t src; + * + * PMIX_DATA_BUFFER_CREATE(buffer); + * status_code = PMIx_Data_pack(buffer, &src, 1, PMIX_INT32); + * @endcode + */ +PMIX_EXPORT pmix_status_t PMIx_Data_pack(const pmix_proc_t *target, + pmix_data_buffer_t *buffer, + void *src, int32_t num_vals, + pmix_data_type_t type); + +/** + * Unpack values from a buffer. + * + * The unpack function unpacks the next value (or values) of a + * specified type from the specified buffer. + * + * The buffer must have already been initialized via an PMIX_DATA_BUFFER_CREATE or + * PMIX_DATA_BUFFER_CONSTRUCT call (and assumedly filled with some data) - + * otherwise, the unpack_value function will return an + * error. Providing an unsupported type flag will likewise be reported + * as an error, as will specifying a data type that DOES NOT match the + * type of the next item in the buffer. An attempt to read beyond the + * end of the stored data held in the buffer will also return an + * error. + * + * NOTE: it is possible for the buffer to be corrupted and that + * PMIx will *think* there is a proper variable type at the + * beginning of an unpack region - but that the value is bogus (e.g., just + * a byte field in a string array that so happens to have a value that + * matches the specified data type flag). Therefore, the data type error check + * is NOT completely safe. This is true for ALL unpack functions. + * + * + * Unpacking values is a "nondestructive" process - i.e., the values are + * not removed from the buffer. It is therefore possible for the caller + * to re-unpack a value from the same buffer by resetting the unpack_ptr. + * + * Warning: The caller is responsible for providing adequate memory + * storage for the requested data. As noted below, the user + * must provide a parameter indicating the maximum number of values that + * can be unpacked into the allocated memory. If more values exist in the + * buffer than can fit into the memory storage, then the function will unpack + * what it can fit into that location and return an error code indicating + * that the buffer was only partially unpacked. + * + * Note that any data that was not hard type cast (i.e., not type cast + * to a specific size) when packed may lose precision when unpacked by + * a non-homogeneous recipient. PMIx will do its best to deal with + * heterogeneity issues between the packer and unpacker in such + * cases. Sending a number larger than can be handled by the recipient + * will return an error code generated upon unpacking - these errors + * cannot be detected during packing. + * + * The identity of the source of the packed buffer (i.e., the + * process that packed it) is used solely to resolve any data type + * differences between PMIx versions. The source must, therefore, be + * known to the user prior to calling the unpack function so that the + * PMIx library is aware of the version the source used. + * + * @param *source Pointer to a pmix_proc_t structure containing the + * nspace/rank of the process that packed the provided buffer. + * A NULL value may be used to indicate that the source is based on + * the same PMIx version as the caller. + * + * @param *buffer A pointer to the buffer from which the value will be + * extracted. + * + * @param *dest A void* pointer to the memory location into which the + * data is to be stored. Note that these values will be stored + * contiguously in memory. For strings, this pointer must be to (char + * **) to provide a means of supporting multiple string + * operations. The unpack function will allocate memory for each + * string in the array - the caller must only provide adequate memory + * for the array of pointers. + * + * @param type The type of the data to be unpacked - must be one of + * the BFROP defined data types. + * + * @retval *max_num_values The number of values actually unpacked. In + * most cases, this should match the maximum number provided in the + * parameters - but in no case will it exceed the value of this + * parameter. Note that if you unpack fewer values than are actually + * available, the buffer will be in an unpackable state - the function will + * return an error code to warn of this condition. + * + * @note The unpack function will return the actual number of values + * unpacked in this location. + * + * @retval PMIX_SUCCESS The next item in the buffer was successfully + * unpacked. + * + * @retval PMIX_ERROR(s) The unpack function returns an error code + * under one of several conditions: (a) the number of values in the + * item exceeds the max num provided by the caller; (b) the type of + * the next item in the buffer does not match the type specified by + * the caller; or (c) the unpack failed due to either an error in the + * buffer or an attempt to read past the end of the buffer. + * + * @code + * pmix_data_buffer_t *buffer; + * int32_t dest; + * char **string_array; + * int32_t num_values; + * + * num_values = 1; + * status_code = PMIx_Data_unpack(buffer, (void*)&dest, &num_values, PMIX_INT32); + * + * num_values = 5; + * string_array = pmix_malloc(num_values*sizeof(char *)); + * status_code = PMIx_Data_unpack(buffer, (void*)(string_array), &num_values, PMIX_STRING); + * + * @endcode + */ +PMIX_EXPORT pmix_status_t PMIx_Data_unpack(const pmix_proc_t *source, + pmix_data_buffer_t *buffer, void *dest, + int32_t *max_num_values, + pmix_data_type_t type); + +/** + * Copy a data value from one location to another. + * + * Since registered data types can be complex structures, the system + * needs some way to know how to copy the data from one location to + * another (e.g., for storage in the registry). This function, which + * can call other copy functions to build up complex data types, defines + * the method for making a copy of the specified data type. + * + * @param **dest The address of a pointer into which the + * address of the resulting data is to be stored. + * + * @param *src A pointer to the memory location from which the + * data is to be copied. + * + * @param type The type of the data to be copied - must be one of + * the PMIx defined data types. + * + * @retval PMIX_SUCCESS The value was successfully copied. + * + * @retval PMIX_ERROR(s) An appropriate error code. + * + */ +PMIX_EXPORT pmix_status_t PMIx_Data_copy(void **dest, void *src, + pmix_data_type_t type); + +/** + * Print a data value. + * + * Since registered data types can be complex structures, the system + * needs some way to know how to print them (i.e., convert them to a string + * representation). Provided for debug purposes. + * + * @retval PMIX_SUCCESS The value was successfully printed. + * + * @retval PMIX_ERROR(s) An appropriate error code. + */ +PMIX_EXPORT pmix_status_t PMIx_Data_print(char **output, char *prefix, + void *src, pmix_data_type_t type); + +/** + * Copy a payload from one buffer to another + * + * This function will append a copy of the payload in one buffer into + * another buffer. + * NOTE: This is NOT a destructive procedure - the + * source buffer's payload will remain intact, as will any pre-existing + * payload in the destination's buffer. + */ +PMIX_EXPORT pmix_status_t PMIx_Data_copy_payload(pmix_data_buffer_t *dest, + pmix_data_buffer_t *src); + + +/******** STANDARD MACROS FOR DARRAY AND VALUE SUPPORT ********/ +static inline void pmix_darray_destruct(pmix_data_array_t *m); + +static inline void pmix_value_destruct(pmix_value_t * m) +{ + if (PMIX_STRING == (m)->type) { + if (NULL != (m)->data.string) { + pmix_free((m)->data.string); + (m)->data.string = NULL; + } + } else if ((PMIX_BYTE_OBJECT == (m)->type) || + (PMIX_COMPRESSED_STRING == (m)->type)) { + if (NULL != (m)->data.bo.bytes) { + pmix_free((m)->data.bo.bytes); + (m)->data.bo.bytes = NULL; + (m)->data.bo.size = 0; + } + } else if (PMIX_DATA_ARRAY == (m)->type) { + if (NULL != (m)->data.darray) { + pmix_darray_destruct((m)->data.darray); + pmix_free((m)->data.darray); + (m)->data.darray = NULL; + } + } else if (PMIX_ENVAR == (m)->type) { + PMIX_ENVAR_DESTRUCT(&(m)->data.envar); + } else if (PMIX_PROC == (m)->type) { + PMIX_PROC_RELEASE((m)->data.proc); + } +} + +static inline void pmix_darray_destruct(pmix_data_array_t *m) +{ + if (NULL != m) { + if (PMIX_INFO == m->type) { + pmix_info_t *_info = (pmix_info_t*)m->array; + PMIX_INFO_FREE(_info, m->size); + } else if (PMIX_PROC == m->type) { + pmix_proc_t *_p = (pmix_proc_t*)m->array; + PMIX_PROC_FREE(_p, m->size); + } else if (PMIX_PROC_INFO == m->type) { + pmix_proc_info_t *_pi = (pmix_proc_info_t*)m->array; + PMIX_PROC_INFO_FREE(_pi, m->size); + } else if (PMIX_ENVAR == m->type) { + pmix_envar_t *_e = (pmix_envar_t*)m->array; + PMIX_ENVAR_FREE(_e, m->size); + } else if (PMIX_VALUE == m->type) { + pmix_value_t *_v = (pmix_value_t*)m->array; + PMIX_VALUE_FREE(_v, m->size); + } else if (PMIX_PDATA == m->type) { + pmix_pdata_t *_pd = (pmix_pdata_t*)m->array; + PMIX_PDATA_FREE(_pd, m->size); + } else if (PMIX_QUERY == m->type) { + pmix_query_t *_q = (pmix_query_t*)m->array; + PMIX_QUERY_FREE(_q, m->size); + } else if (PMIX_APP == m->type) { + pmix_app_t *_a = (pmix_app_t*)m->array; + PMIX_APP_FREE(_a, m->size); + } else if (PMIX_BYTE_OBJECT == m->type || + PMIX_COMPRESSED_STRING == m->type) { + pmix_byte_object_t *_b = (pmix_byte_object_t*)m->array; + PMIX_BYTE_OBJECT_FREE(_b, m->size); + } else if (PMIX_STRING == m->type) { + char **_s = (char**)m->array; + size_t _si; + for (_si=0; _si < m->size; _si++) { + pmix_free(_s[_si]); + } + pmix_free(m->array); + m->array = NULL; + } else { + pmix_free(m->array); + } + } +} + +#define PMIX_DATA_ARRAY_CONSTRUCT(m, n, t) \ + do { \ + (m)->type = (t); \ + (m)->size = (n); \ + if (0 < (n)) { \ + if (PMIX_INFO == (t)) { \ + PMIX_INFO_CREATE((m)->array, (n)); \ + } else if (PMIX_PROC == (t)) { \ + PMIX_PROC_CREATE((m)->array, (n)); \ + } else if (PMIX_PROC_INFO == (t)) { \ + PMIX_PROC_INFO_CREATE((m)->array, (n)); \ + } else if (PMIX_ENVAR == (t)) { \ + PMIX_ENVAR_CREATE((m)->array, (n)); \ + } else if (PMIX_VALUE == (t)) { \ + PMIX_VALUE_CREATE((m)->array, (n)); \ + } else if (PMIX_PDATA == (t)) { \ + PMIX_PDATA_CREATE((m)->array, (n)); \ + } else if (PMIX_QUERY == (t)) { \ + PMIX_QUERY_CREATE((m)->array, (n)); \ + } else if (PMIX_APP == (t)) { \ + PMIX_APP_CREATE((m)->array, (n)); \ + } else if (PMIX_BYTE_OBJECT == (t) || \ + PMIX_COMPRESSED_STRING == (t)) { \ + PMIX_BYTE_OBJECT_CREATE((m)->array, (n)); \ + } else if (PMIX_ALLOC_DIRECTIVE == (t) || \ + PMIX_PROC_STATE == (t) || \ + PMIX_PERSIST == (t) || \ + PMIX_SCOPE == (t) || \ + PMIX_DATA_RANGE == (t) || \ + PMIX_BYTE == (t) || \ + PMIX_INT8 == (t) || \ + PMIX_UINT8 == (t) || \ + PMIX_POINTER == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int8_t)); \ + } else if (PMIX_STRING == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(char*)); \ + } else if (PMIX_SIZE == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(size_t)); \ + } else if (PMIX_PID == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(pid_t)); \ + } else if (PMIX_INT == (t) || \ + PMIX_UINT == (t) || \ + PMIX_STATUS == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int)); \ + } else if (PMIX_IOF_CHANNEL == (t) || \ + PMIX_DATA_TYPE == (t) || \ + PMIX_INT16 == (t) || \ + PMIX_UINT16 == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int16_t)); \ + } else if (PMIX_PROC_RANK == (t) || \ + PMIX_INFO_DIRECTIVES == (t) || \ + PMIX_INT32 == (t) || \ + PMIX_UINT32 == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int32_t)); \ + } else if (PMIX_INT64 == (t) || \ + PMIX_UINT64 == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(int64_t)); \ + } else if (PMIX_FLOAT == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(float)); \ + } else if (PMIX_DOUBLE == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(double)); \ + } else if (PMIX_TIMEVAL == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(struct timeval)); \ + } else if (PMIX_TIME == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(time_t)); \ + } else if (PMIX_REGATTR == (t)) { \ + PMIX_REGATTR_CREATE((m)->array, (n)); \ + } else if (PMIX_BOOL == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(bool)); \ + } else if (PMIX_COORD == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(pmix_coord_t)); \ + } \ + } else { \ + (m)->array = NULL; \ + } \ + } while(0) +#define PMIX_DATA_ARRAY_CREATE(m, n, t) \ + do { \ + (m) = (pmix_data_array_t*)pmix_calloc(1, sizeof(pmix_data_array_t)); \ + if (NULL != (m)) { \ + PMIX_DATA_ARRAY_CONSTRUCT((m), (n), (t)); \ + } \ + } while(0) + +#define PMIX_DATA_ARRAY_DESTRUCT(m) pmix_darray_destruct(m) + +#define PMIX_DATA_ARRAY_FREE(m) \ + do { \ + if (NULL != (m)) { \ + PMIX_DATA_ARRAY_DESTRUCT(m); \ + pmix_free((m)); \ + (m) = NULL; \ + } \ + } while(0) + + +/** + * Provide a safe version of strncpy that doesn't generate + * a ton of spurious warnings. Note that not every environment + * provides nice string functions, and we aren't concerned about + * max performance here + * + * @param dest Destination string. + * @param src Source string. + * @param len Size of the dest array - 1 + * + */ +static inline void pmix_strncpy(char *dest, + const char *src, + size_t len) +{ + size_t i; + + /* use an algorithm that also protects against + * non-NULL-terminated src strings */ + for (i=0; i < len; ++i, ++src, ++dest) { + *dest = *src; + if ('\0' == *src) { + break; + } + } + *dest = '\0'; +} + +#include + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/opal/mca/pmix/pmix4x/openpmix/include/pmix_rename.h b/opal/mca/pmix/pmix4x/openpmix/include/pmix_rename.h new file mode 100644 index 00000000000..8061f65210a --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/include/pmix_rename.h @@ -0,0 +1,618 @@ +#define PMI2_Abort OPAL_MCA_PMIX4X_PMI2_Abort +#define PMI2_Finalize OPAL_MCA_PMIX4X_PMI2_Finalize +#define PMI2_Info_GetJobAttr OPAL_MCA_PMIX4X_PMI2_Info_GetJobAttr +#define PMI2_Info_GetJobAttrIntArray OPAL_MCA_PMIX4X_PMI2_Info_GetJobAttrIntArray +#define PMI2_Info_GetNodeAttr OPAL_MCA_PMIX4X_PMI2_Info_GetNodeAttr +#define PMI2_Info_GetNodeAttrIntArray OPAL_MCA_PMIX4X_PMI2_Info_GetNodeAttrIntArray +#define PMI2_Info_GetSize OPAL_MCA_PMIX4X_PMI2_Info_GetSize +#define PMI2_Info_PutNodeAttr OPAL_MCA_PMIX4X_PMI2_Info_PutNodeAttr +#define PMI2_Init OPAL_MCA_PMIX4X_PMI2_Init +#define PMI2_Initialized OPAL_MCA_PMIX4X_PMI2_Initialized +#define PMI2_Job_Connect OPAL_MCA_PMIX4X_PMI2_Job_Connect +#define PMI2_Job_Disconnect OPAL_MCA_PMIX4X_PMI2_Job_Disconnect +#define PMI2_Job_GetId OPAL_MCA_PMIX4X_PMI2_Job_GetId +#define PMI2_Job_GetRank OPAL_MCA_PMIX4X_PMI2_Job_GetRank +#define PMI2_Job_Spawn OPAL_MCA_PMIX4X_PMI2_Job_Spawn +#define PMI2_KVS_Fence OPAL_MCA_PMIX4X_PMI2_KVS_Fence +#define PMI2_KVS_Get OPAL_MCA_PMIX4X_PMI2_KVS_Get +#define PMI2_KVS_Put OPAL_MCA_PMIX4X_PMI2_KVS_Put +#define PMI2_Nameserv_lookup OPAL_MCA_PMIX4X_PMI2_Nameserv_lookup +#define PMI2_Nameserv_publish OPAL_MCA_PMIX4X_PMI2_Nameserv_publish +#define PMI2_Nameserv_unpublish OPAL_MCA_PMIX4X_PMI2_Nameserv_unpublish +#define PMI_Abort OPAL_MCA_PMIX4X_PMI_Abort +#define PMI_Args_to_keyval OPAL_MCA_PMIX4X_PMI_Args_to_keyval +#define PMI_Barrier OPAL_MCA_PMIX4X_PMI_Barrier +#define PMI_Finalize OPAL_MCA_PMIX4X_PMI_Finalize +#define PMI_Free_keyvals OPAL_MCA_PMIX4X_PMI_Free_keyvals +#define PMI_Get_appnum OPAL_MCA_PMIX4X_PMI_Get_appnum +#define PMI_Get_clique_ranks OPAL_MCA_PMIX4X_PMI_Get_clique_ranks +#define PMI_Get_clique_size OPAL_MCA_PMIX4X_PMI_Get_clique_size +#define PMI_Get_id OPAL_MCA_PMIX4X_PMI_Get_id +#define PMI_Get_id_length_max OPAL_MCA_PMIX4X_PMI_Get_id_length_max +#define PMI_Get_kvs_domain_id OPAL_MCA_PMIX4X_PMI_Get_kvs_domain_id +#define PMI_Get_options OPAL_MCA_PMIX4X_PMI_Get_options +#define PMI_Get_rank OPAL_MCA_PMIX4X_PMI_Get_rank +#define PMI_Get_size OPAL_MCA_PMIX4X_PMI_Get_size +#define PMI_Get_universe_size OPAL_MCA_PMIX4X_PMI_Get_universe_size +#define PMI_Init OPAL_MCA_PMIX4X_PMI_Init +#define PMI_Initialized OPAL_MCA_PMIX4X_PMI_Initialized +#define PMI_KVS_Commit OPAL_MCA_PMIX4X_PMI_KVS_Commit +#define PMI_KVS_Create OPAL_MCA_PMIX4X_PMI_KVS_Create +#define PMI_KVS_Destroy OPAL_MCA_PMIX4X_PMI_KVS_Destroy +#define PMI_KVS_Get OPAL_MCA_PMIX4X_PMI_KVS_Get +#define PMI_KVS_Get_key_length_max OPAL_MCA_PMIX4X_PMI_KVS_Get_key_length_max +#define PMI_KVS_Get_my_name OPAL_MCA_PMIX4X_PMI_KVS_Get_my_name +#define PMI_KVS_Get_name_length_max OPAL_MCA_PMIX4X_PMI_KVS_Get_name_length_max +#define PMI_KVS_Get_value_length_max OPAL_MCA_PMIX4X_PMI_KVS_Get_value_length_max +#define PMI_KVS_Iter_first OPAL_MCA_PMIX4X_PMI_KVS_Iter_first +#define PMI_KVS_Iter_next OPAL_MCA_PMIX4X_PMI_KVS_Iter_next +#define PMI_KVS_Put OPAL_MCA_PMIX4X_PMI_KVS_Put +#define PMI_Lookup_name OPAL_MCA_PMIX4X_PMI_Lookup_name +#define PMI_Parse_option OPAL_MCA_PMIX4X_PMI_Parse_option +#define PMI_Publish_name OPAL_MCA_PMIX4X_PMI_Publish_name +#define PMI_Spawn_multiple OPAL_MCA_PMIX4X_PMI_Spawn_multiple +#define PMI_Unpublish_name OPAL_MCA_PMIX4X_PMI_Unpublish_name +#define PMIx_Abort OPAL_MCA_PMIX4X_PMIx_Abort +#define PMIx_Allocation_request_nb OPAL_MCA_PMIX4X_PMIx_Allocation_request_nb +#define PMIx_Alloc_directive_string OPAL_MCA_PMIX4X_PMIx_Alloc_directive_string +#define pmix_argv_append OPAL_MCA_PMIX4X_pmix_argv_append +#define pmix_argv_append_nosize OPAL_MCA_PMIX4X_pmix_argv_append_nosize +#define pmix_argv_append_unique_nosize OPAL_MCA_PMIX4X_pmix_argv_append_unique_nosize +#define pmix_argv_copy OPAL_MCA_PMIX4X_pmix_argv_copy +#define pmix_argv_count OPAL_MCA_PMIX4X_pmix_argv_count +#define pmix_argv_delete OPAL_MCA_PMIX4X_pmix_argv_delete +#define pmix_argv_free OPAL_MCA_PMIX4X_pmix_argv_free +#define pmix_argv_insert OPAL_MCA_PMIX4X_pmix_argv_insert +#define pmix_argv_insert_element OPAL_MCA_PMIX4X_pmix_argv_insert_element +#define pmix_argv_join OPAL_MCA_PMIX4X_pmix_argv_join +#define pmix_argv_join_range OPAL_MCA_PMIX4X_pmix_argv_join_range +#define pmix_argv_len OPAL_MCA_PMIX4X_pmix_argv_len +#define pmix_argv_prepend_nosize OPAL_MCA_PMIX4X_pmix_argv_prepend_nosize +#define pmix_argv_split OPAL_MCA_PMIX4X_pmix_argv_split +#define pmix_argv_split_with_empty OPAL_MCA_PMIX4X_pmix_argv_split_with_empty +#define pmix_asprintf OPAL_MCA_PMIX4X_pmix_asprintf +#define pmix_basename OPAL_MCA_PMIX4X_pmix_basename +#define pmix_bfrop_base_copy_persist OPAL_MCA_PMIX4X_pmix_bfrop_base_copy_persist +#define pmix_bfrop_base_select OPAL_MCA_PMIX4X_pmix_bfrop_base_select +#define pmix_bfrop_buffer_extend OPAL_MCA_PMIX4X_pmix_bfrop_buffer_extend +#define pmix_bfrop_get_data_type OPAL_MCA_PMIX4X_pmix_bfrop_get_data_type +#define pmix_bfrops_base_assign_module OPAL_MCA_PMIX4X_pmix_bfrops_base_assign_module +#define pmix_bfrops_base_copy OPAL_MCA_PMIX4X_pmix_bfrops_base_copy +#define pmix_bfrops_base_copy_app OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_app +#define pmix_bfrops_base_copy_array OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_array +#define pmix_bfrops_base_copy_bo OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_bo +#define pmix_bfrops_base_copy_buf OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_buf +#define pmix_bfrops_base_copy_darray OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_darray +#define pmix_bfrops_base_copy_info OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_info +#define pmix_bfrops_base_copy_kval OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_kval +#define pmix_bfrops_base_copy_modex OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_modex +#define pmix_bfrops_base_copy_payload OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_payload +#define pmix_bfrops_base_copy_pdata OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_pdata +#define pmix_bfrops_base_copy_pinfo OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_pinfo +#define pmix_bfrops_base_copy_proc OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_proc +#define pmix_bfrops_base_copy_query OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_query +#define pmix_bfrops_base_copy_string OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_string +#define pmix_bfrops_base_copy_value OPAL_MCA_PMIX4X_pmix_bfrops_base_copy_value +#define pmix_bfrops_base_data_type_string OPAL_MCA_PMIX4X_pmix_bfrops_base_data_type_string +#define pmix_bfrops_base_framework OPAL_MCA_PMIX4X_pmix_bfrops_base_framework +#define pmix_bfrops_base_get_available_modules OPAL_MCA_PMIX4X_pmix_bfrops_base_get_available_modules +#define pmix_bfrops_base_output OPAL_MCA_PMIX4X_pmix_bfrops_base_output +#define pmix_bfrops_base_pack OPAL_MCA_PMIX4X_pmix_bfrops_base_pack +#define pmix_bfrops_base_pack_alloc_directive OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_alloc_directive +#define pmix_bfrops_base_pack_app OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_app +#define pmix_bfrops_base_pack_array OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_array +#define pmix_bfrops_base_pack_bo OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_bo +#define pmix_bfrops_base_pack_bool OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_bool +#define pmix_bfrops_base_pack_buf OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_buf +#define pmix_bfrops_base_pack_buffer OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_buffer +#define pmix_bfrops_base_pack_byte OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_byte +#define pmix_bfrops_base_pack_cmd OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_cmd +#define pmix_bfrops_base_pack_darray OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_darray +#define pmix_bfrops_base_pack_datatype OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_datatype +#define pmix_bfrops_base_pack_double OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_double +#define pmix_bfrops_base_pack_float OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_float +#define pmix_bfrops_base_pack_info OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_info +#define pmix_bfrops_base_pack_info_directives OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_info_directives +#define pmix_bfrops_base_pack_int OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_int +#define pmix_bfrops_base_pack_int16 OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_int16 +#define pmix_bfrops_base_pack_int32 OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_int32 +#define pmix_bfrops_base_pack_int64 OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_int64 +#define pmix_bfrops_base_pack_kval OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_kval +#define pmix_bfrops_base_pack_modex OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_modex +#define pmix_bfrops_base_pack_pdata OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_pdata +#define pmix_bfrops_base_pack_persist OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_persist +#define pmix_bfrops_base_pack_pid OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_pid +#define pmix_bfrops_base_pack_pinfo OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_pinfo +#define pmix_bfrops_base_pack_proc OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_proc +#define pmix_bfrops_base_pack_pstate OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_pstate +#define pmix_bfrops_base_pack_ptr OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_ptr +#define pmix_bfrops_base_pack_query OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_query +#define pmix_bfrops_base_pack_range OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_range +#define pmix_bfrops_base_pack_rank OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_rank +#define pmix_bfrops_base_pack_scope OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_scope +#define pmix_bfrops_base_pack_sizet OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_sizet +#define pmix_bfrops_base_pack_status OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_status +#define pmix_bfrops_base_pack_string OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_string +#define pmix_bfrops_base_pack_time OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_time +#define pmix_bfrops_base_pack_timeval OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_timeval +#define pmix_bfrops_base_pack_val OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_val +#define pmix_bfrops_base_pack_value OPAL_MCA_PMIX4X_pmix_bfrops_base_pack_value +#define pmix_bfrops_base_print OPAL_MCA_PMIX4X_pmix_bfrops_base_print +#define pmix_bfrops_base_print_alloc_directive OPAL_MCA_PMIX4X_pmix_bfrops_base_print_alloc_directive +#define pmix_bfrops_base_print_app OPAL_MCA_PMIX4X_pmix_bfrops_base_print_app +#define pmix_bfrops_base_print_array OPAL_MCA_PMIX4X_pmix_bfrops_base_print_array +#define pmix_bfrops_base_print_bo OPAL_MCA_PMIX4X_pmix_bfrops_base_print_bo +#define pmix_bfrops_base_print_bool OPAL_MCA_PMIX4X_pmix_bfrops_base_print_bool +#define pmix_bfrops_base_print_buf OPAL_MCA_PMIX4X_pmix_bfrops_base_print_buf +#define pmix_bfrops_base_print_byte OPAL_MCA_PMIX4X_pmix_bfrops_base_print_byte +#define pmix_bfrops_base_print_cmd OPAL_MCA_PMIX4X_pmix_bfrops_base_print_cmd +#define pmix_bfrops_base_print_darray OPAL_MCA_PMIX4X_pmix_bfrops_base_print_darray +#define pmix_bfrops_base_print_datatype OPAL_MCA_PMIX4X_pmix_bfrops_base_print_datatype +#define pmix_bfrops_base_print_double OPAL_MCA_PMIX4X_pmix_bfrops_base_print_double +#define pmix_bfrops_base_print_float OPAL_MCA_PMIX4X_pmix_bfrops_base_print_float +#define pmix_bfrops_base_print_info OPAL_MCA_PMIX4X_pmix_bfrops_base_print_info +#define pmix_bfrops_base_print_info_directives OPAL_MCA_PMIX4X_pmix_bfrops_base_print_info_directives +#define pmix_bfrops_base_print_int OPAL_MCA_PMIX4X_pmix_bfrops_base_print_int +#define pmix_bfrops_base_print_int16 OPAL_MCA_PMIX4X_pmix_bfrops_base_print_int16 +#define pmix_bfrops_base_print_int32 OPAL_MCA_PMIX4X_pmix_bfrops_base_print_int32 +#define pmix_bfrops_base_print_int64 OPAL_MCA_PMIX4X_pmix_bfrops_base_print_int64 +#define pmix_bfrops_base_print_int8 OPAL_MCA_PMIX4X_pmix_bfrops_base_print_int8 +#define pmix_bfrops_base_print_kval OPAL_MCA_PMIX4X_pmix_bfrops_base_print_kval +#define pmix_bfrops_base_print_modex OPAL_MCA_PMIX4X_pmix_bfrops_base_print_modex +#define pmix_bfrops_base_print_pdata OPAL_MCA_PMIX4X_pmix_bfrops_base_print_pdata +#define pmix_bfrops_base_print_persist OPAL_MCA_PMIX4X_pmix_bfrops_base_print_persist +#define pmix_bfrops_base_print_pid OPAL_MCA_PMIX4X_pmix_bfrops_base_print_pid +#define pmix_bfrops_base_print_pinfo OPAL_MCA_PMIX4X_pmix_bfrops_base_print_pinfo +#define pmix_bfrops_base_print_proc OPAL_MCA_PMIX4X_pmix_bfrops_base_print_proc +#define pmix_bfrops_base_print_pstate OPAL_MCA_PMIX4X_pmix_bfrops_base_print_pstate +#define pmix_bfrops_base_print_ptr OPAL_MCA_PMIX4X_pmix_bfrops_base_print_ptr +#define pmix_bfrops_base_print_query OPAL_MCA_PMIX4X_pmix_bfrops_base_print_query +#define pmix_bfrops_base_print_range OPAL_MCA_PMIX4X_pmix_bfrops_base_print_range +#define pmix_bfrops_base_print_rank OPAL_MCA_PMIX4X_pmix_bfrops_base_print_rank +#define pmix_bfrops_base_print_scope OPAL_MCA_PMIX4X_pmix_bfrops_base_print_scope +#define pmix_bfrops_base_print_size OPAL_MCA_PMIX4X_pmix_bfrops_base_print_size +#define pmix_bfrops_base_print_status OPAL_MCA_PMIX4X_pmix_bfrops_base_print_status +#define pmix_bfrops_base_print_string OPAL_MCA_PMIX4X_pmix_bfrops_base_print_string +#define pmix_bfrops_base_print_time OPAL_MCA_PMIX4X_pmix_bfrops_base_print_time +#define pmix_bfrops_base_print_timeval OPAL_MCA_PMIX4X_pmix_bfrops_base_print_timeval +#define pmix_bfrops_base_print_uint OPAL_MCA_PMIX4X_pmix_bfrops_base_print_uint +#define pmix_bfrops_base_print_uint16 OPAL_MCA_PMIX4X_pmix_bfrops_base_print_uint16 +#define pmix_bfrops_base_print_uint32 OPAL_MCA_PMIX4X_pmix_bfrops_base_print_uint32 +#define pmix_bfrops_base_print_uint64 OPAL_MCA_PMIX4X_pmix_bfrops_base_print_uint64 +#define pmix_bfrops_base_print_uint8 OPAL_MCA_PMIX4X_pmix_bfrops_base_print_uint8 +#define pmix_bfrops_base_print_value OPAL_MCA_PMIX4X_pmix_bfrops_base_print_value +#define pmix_bfrops_base_std_copy OPAL_MCA_PMIX4X_pmix_bfrops_base_std_copy +#define pmix_bfrops_base_unpack OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack +#define pmix_bfrops_base_unpack_alloc_directive OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_alloc_directive +#define pmix_bfrops_base_unpack_app OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_app +#define pmix_bfrops_base_unpack_array OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_array +#define pmix_bfrops_base_unpack_bo OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_bo +#define pmix_bfrops_base_unpack_bool OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_bool +#define pmix_bfrops_base_unpack_buf OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_buf +#define pmix_bfrops_base_unpack_byte OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_byte +#define pmix_bfrops_base_unpack_cmd OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_cmd +#define pmix_bfrops_base_unpack_darray OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_darray +#define pmix_bfrops_base_unpack_datatype OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_datatype +#define pmix_bfrops_base_unpack_double OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_double +#define pmix_bfrops_base_unpack_float OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_float +#define pmix_bfrops_base_unpack_info OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_info +#define pmix_bfrops_base_unpack_info_directives OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_info_directives +#define pmix_bfrops_base_unpack_int OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_int +#define pmix_bfrops_base_unpack_int16 OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_int16 +#define pmix_bfrops_base_unpack_int32 OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_int32 +#define pmix_bfrops_base_unpack_int64 OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_int64 +#define pmix_bfrops_base_unpack_kval OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_kval +#define pmix_bfrops_base_unpack_modex OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_modex +#define pmix_bfrops_base_unpack_pdata OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_pdata +#define pmix_bfrops_base_unpack_persist OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_persist +#define pmix_bfrops_base_unpack_pid OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_pid +#define pmix_bfrops_base_unpack_pinfo OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_pinfo +#define pmix_bfrops_base_unpack_proc OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_proc +#define pmix_bfrops_base_unpack_pstate OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_pstate +#define pmix_bfrops_base_unpack_ptr OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_ptr +#define pmix_bfrops_base_unpack_query OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_query +#define pmix_bfrops_base_unpack_range OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_range +#define pmix_bfrops_base_unpack_rank OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_rank +#define pmix_bfrops_base_unpack_scope OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_scope +#define pmix_bfrops_base_unpack_sizet OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_sizet +#define pmix_bfrops_base_unpack_status OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_status +#define pmix_bfrops_base_unpack_string OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_string +#define pmix_bfrops_base_unpack_time OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_time +#define pmix_bfrops_base_unpack_timeval OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_timeval +#define pmix_bfrops_base_unpack_val OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_val +#define pmix_bfrops_base_unpack_value OPAL_MCA_PMIX4X_pmix_bfrops_base_unpack_value +#define pmix_bfrops_base_value_cmp OPAL_MCA_PMIX4X_pmix_bfrops_base_value_cmp +#define pmix_bfrops_base_value_load OPAL_MCA_PMIX4X_pmix_bfrops_base_value_load +#define pmix_bfrops_base_value_unload OPAL_MCA_PMIX4X_pmix_bfrops_base_value_unload +#define pmix_bfrops_base_value_xfer OPAL_MCA_PMIX4X_pmix_bfrops_base_value_xfer +#define pmix_bfrops_globals OPAL_MCA_PMIX4X_pmix_bfrops_globals +#define pmix_bfrop_store_data_type OPAL_MCA_PMIX4X_pmix_bfrop_store_data_type +#define pmix_bfrop_too_small OPAL_MCA_PMIX4X_pmix_bfrop_too_small +#define pmix_bfrop_type_info_t_class OPAL_MCA_PMIX4X_pmix_bfrop_type_info_t_class +#define pmix_buffer_t_class OPAL_MCA_PMIX4X_pmix_buffer_t_class +#define pmix_cb_t_class OPAL_MCA_PMIX4X_pmix_cb_t_class +#define pmix_class_finalize OPAL_MCA_PMIX4X_pmix_class_finalize +#define pmix_class_init_epoch OPAL_MCA_PMIX4X_pmix_class_init_epoch +#define pmix_class_initialize OPAL_MCA_PMIX4X_pmix_class_initialize +#define pmix_client_globals OPAL_MCA_PMIX4X_pmix_client_globals +#define pmix_cmd_line_add OPAL_MCA_PMIX4X_pmix_cmd_line_add +#define pmix_cmd_line_create OPAL_MCA_PMIX4X_pmix_cmd_line_create +#define pmix_cmd_line_get_argc OPAL_MCA_PMIX4X_pmix_cmd_line_get_argc +#define pmix_cmd_line_get_argv OPAL_MCA_PMIX4X_pmix_cmd_line_get_argv +#define pmix_cmd_line_get_ninsts OPAL_MCA_PMIX4X_pmix_cmd_line_get_ninsts +#define pmix_cmd_line_get_param OPAL_MCA_PMIX4X_pmix_cmd_line_get_param +#define pmix_cmd_line_get_tail OPAL_MCA_PMIX4X_pmix_cmd_line_get_tail +#define pmix_cmd_line_get_usage_msg OPAL_MCA_PMIX4X_pmix_cmd_line_get_usage_msg +#define pmix_cmd_line_is_taken OPAL_MCA_PMIX4X_pmix_cmd_line_is_taken +#define pmix_cmd_line_make_opt3 OPAL_MCA_PMIX4X_pmix_cmd_line_make_opt3 +#define pmix_cmd_line_make_opt_mca OPAL_MCA_PMIX4X_pmix_cmd_line_make_opt_mca +#define pmix_cmd_line_parse OPAL_MCA_PMIX4X_pmix_cmd_line_parse +#define pmix_cmd_line_t_class OPAL_MCA_PMIX4X_pmix_cmd_line_t_class +#define pmix_command_string OPAL_MCA_PMIX4X_pmix_command_string +#define PMIx_Commit OPAL_MCA_PMIX4X_PMIx_Commit +#define PMIx_Connect OPAL_MCA_PMIX4X_PMIx_Connect +#define PMIx_Connect_nb OPAL_MCA_PMIX4X_PMIx_Connect_nb +#define PMIx_Data_copy OPAL_MCA_PMIX4X_PMIx_Data_copy +#define PMIx_Data_copy_payload OPAL_MCA_PMIX4X_PMIx_Data_copy_payload +#define PMIx_Data_pack OPAL_MCA_PMIX4X_PMIx_Data_pack +#define PMIx_Data_print OPAL_MCA_PMIX4X_PMIx_Data_print +#define PMIx_Data_range_string OPAL_MCA_PMIX4X_PMIx_Data_range_string +#define PMIx_Data_type_string OPAL_MCA_PMIX4X_PMIx_Data_type_string +#define PMIx_Data_unpack OPAL_MCA_PMIX4X_PMIx_Data_unpack +#define pmix_debug_threads OPAL_MCA_PMIX4X_pmix_debug_threads +#define PMIx_Deregister_event_handler OPAL_MCA_PMIX4X_PMIx_Deregister_event_handler +#define pmix_deregister_params OPAL_MCA_PMIX4X_pmix_deregister_params +#define pmix_dirname OPAL_MCA_PMIX4X_pmix_dirname +#define PMIx_Disconnect OPAL_MCA_PMIX4X_PMIx_Disconnect +#define PMIx_Disconnect_nb OPAL_MCA_PMIX4X_PMIx_Disconnect_nb +#define pmix_environ_merge OPAL_MCA_PMIX4X_pmix_environ_merge +#define PMIx_Error_string OPAL_MCA_PMIX4X_PMIx_Error_string +#define pmix_fd_read OPAL_MCA_PMIX4X_pmix_fd_read +#define pmix_fd_set_cloexec OPAL_MCA_PMIX4X_pmix_fd_set_cloexec +#define pmix_fd_write OPAL_MCA_PMIX4X_pmix_fd_write +#define PMIx_Fence OPAL_MCA_PMIX4X_PMIx_Fence +#define PMIx_Fence_nb OPAL_MCA_PMIX4X_PMIx_Fence_nb +#define PMIx_Finalize OPAL_MCA_PMIX4X_PMIx_Finalize +#define pmix_find_absolute_path OPAL_MCA_PMIX4X_pmix_find_absolute_path +#define pmix_gds_base_assign_module OPAL_MCA_PMIX4X_pmix_gds_base_assign_module +#define pmix_gds_base_framework OPAL_MCA_PMIX4X_pmix_gds_base_framework +#define pmix_gds_base_get_available_modules OPAL_MCA_PMIX4X_pmix_gds_base_get_available_modules +#define pmix_gds_base_output OPAL_MCA_PMIX4X_pmix_gds_base_output +#define pmix_gds_base_select OPAL_MCA_PMIX4X_pmix_gds_base_select +#define pmix_gds_base_setup_fork OPAL_MCA_PMIX4X_pmix_gds_base_setup_fork +#define pmix_gds_globals OPAL_MCA_PMIX4X_pmix_gds_globals +#define PMIx_generate_ppn OPAL_MCA_PMIX4X_PMIx_generate_ppn +#define PMIx_generate_regex OPAL_MCA_PMIX4X_PMIx_generate_regex +#define PMIx_Get OPAL_MCA_PMIX4X_PMIx_Get +#define PMIx_Get_nb OPAL_MCA_PMIX4X_PMIx_Get_nb +#define PMIx_Get_version OPAL_MCA_PMIX4X_PMIx_Get_version +#define pmix_global_lock OPAL_MCA_PMIX4X_pmix_global_lock +#define pmix_globals OPAL_MCA_PMIX4X_pmix_globals +#define pmix_hash_fetch OPAL_MCA_PMIX4X_pmix_hash_fetch +#define pmix_hash_fetch_by_key OPAL_MCA_PMIX4X_pmix_hash_fetch_by_key +#define pmix_hash_remove_data OPAL_MCA_PMIX4X_pmix_hash_remove_data +#define pmix_hash_store OPAL_MCA_PMIX4X_pmix_hash_store +#define pmix_hash_table_get_first_key_ptr OPAL_MCA_PMIX4X_pmix_hash_table_get_first_key_ptr +#define pmix_hash_table_get_first_key_uint32 OPAL_MCA_PMIX4X_pmix_hash_table_get_first_key_uint32 +#define pmix_hash_table_get_first_key_uint64 OPAL_MCA_PMIX4X_pmix_hash_table_get_first_key_uint64 +#define pmix_hash_table_get_next_key_ptr OPAL_MCA_PMIX4X_pmix_hash_table_get_next_key_ptr +#define pmix_hash_table_get_next_key_uint32 OPAL_MCA_PMIX4X_pmix_hash_table_get_next_key_uint32 +#define pmix_hash_table_get_next_key_uint64 OPAL_MCA_PMIX4X_pmix_hash_table_get_next_key_uint64 +#define pmix_hash_table_get_value_ptr OPAL_MCA_PMIX4X_pmix_hash_table_get_value_ptr +#define pmix_hash_table_get_value_uint32 OPAL_MCA_PMIX4X_pmix_hash_table_get_value_uint32 +#define pmix_hash_table_get_value_uint64 OPAL_MCA_PMIX4X_pmix_hash_table_get_value_uint64 +#define pmix_hash_table_init OPAL_MCA_PMIX4X_pmix_hash_table_init +#define pmix_hash_table_init2 OPAL_MCA_PMIX4X_pmix_hash_table_init2 +#define pmix_hash_table_remove_all OPAL_MCA_PMIX4X_pmix_hash_table_remove_all +#define pmix_hash_table_remove_value_ptr OPAL_MCA_PMIX4X_pmix_hash_table_remove_value_ptr +#define pmix_hash_table_remove_value_uint32 OPAL_MCA_PMIX4X_pmix_hash_table_remove_value_uint32 +#define pmix_hash_table_remove_value_uint64 OPAL_MCA_PMIX4X_pmix_hash_table_remove_value_uint64 +#define pmix_hash_table_set_value_ptr OPAL_MCA_PMIX4X_pmix_hash_table_set_value_ptr +#define pmix_hash_table_set_value_uint32 OPAL_MCA_PMIX4X_pmix_hash_table_set_value_uint32 +#define pmix_hash_table_set_value_uint64 OPAL_MCA_PMIX4X_pmix_hash_table_set_value_uint64 +#define pmix_hash_table_t_class OPAL_MCA_PMIX4X_pmix_hash_table_t_class +#define pmix_home_directory OPAL_MCA_PMIX4X_pmix_home_directory +#define pmix_host_server OPAL_MCA_PMIX4X_pmix_host_server +#define pmix_hotel_init OPAL_MCA_PMIX4X_pmix_hotel_init +#define pmix_ifaddrtokindex OPAL_MCA_PMIX4X_pmix_ifaddrtokindex +#define pmix_ifaddrtoname OPAL_MCA_PMIX4X_pmix_ifaddrtoname +#define pmix_ifbegin OPAL_MCA_PMIX4X_pmix_ifbegin +#define pmix_ifcount OPAL_MCA_PMIX4X_pmix_ifcount +#define pmix_ifgetaliases OPAL_MCA_PMIX4X_pmix_ifgetaliases +#define pmix_ifindextoaddr OPAL_MCA_PMIX4X_pmix_ifindextoaddr +#define pmix_ifindextoflags OPAL_MCA_PMIX4X_pmix_ifindextoflags +#define pmix_ifindextokindex OPAL_MCA_PMIX4X_pmix_ifindextokindex +#define pmix_ifindextomac OPAL_MCA_PMIX4X_pmix_ifindextomac +#define pmix_ifindextomask OPAL_MCA_PMIX4X_pmix_ifindextomask +#define pmix_ifindextomtu OPAL_MCA_PMIX4X_pmix_ifindextomtu +#define pmix_ifindextoname OPAL_MCA_PMIX4X_pmix_ifindextoname +#define pmix_ifislocal OPAL_MCA_PMIX4X_pmix_ifislocal +#define pmix_ifisloopback OPAL_MCA_PMIX4X_pmix_ifisloopback +#define pmix_ifkindextoaddr OPAL_MCA_PMIX4X_pmix_ifkindextoaddr +#define pmix_ifkindextoname OPAL_MCA_PMIX4X_pmix_ifkindextoname +#define pmix_ifmatches OPAL_MCA_PMIX4X_pmix_ifmatches +#define pmix_ifnametoaddr OPAL_MCA_PMIX4X_pmix_ifnametoaddr +#define pmix_ifnametoindex OPAL_MCA_PMIX4X_pmix_ifnametoindex +#define pmix_ifnametokindex OPAL_MCA_PMIX4X_pmix_ifnametokindex +#define pmix_ifnext OPAL_MCA_PMIX4X_pmix_ifnext +#define pmix_iftupletoaddr OPAL_MCA_PMIX4X_pmix_iftupletoaddr +#define pmix_info_caddy_t_class OPAL_MCA_PMIX4X_pmix_info_caddy_t_class +#define PMIx_Info_directives_string OPAL_MCA_PMIX4X_PMIx_Info_directives_string +#define PMIx_Init OPAL_MCA_PMIX4X_PMIx_Init +#define pmix_init_called OPAL_MCA_PMIX4X_pmix_init_called +#define pmix_initialized OPAL_MCA_PMIX4X_pmix_initialized +#define PMIx_Initialized OPAL_MCA_PMIX4X_PMIx_Initialized +#define PMIx_Job_control_nb OPAL_MCA_PMIX4X_PMIx_Job_control_nb +#define pmix_kval_t_class OPAL_MCA_PMIX4X_pmix_kval_t_class +#define pmix_listener_t_class OPAL_MCA_PMIX4X_pmix_listener_t_class +#define pmix_list_item_t_class OPAL_MCA_PMIX4X_pmix_list_item_t_class +#define pmix_list_t_class OPAL_MCA_PMIX4X_pmix_list_t_class +#define PMIx_Log_nb OPAL_MCA_PMIX4X_PMIx_Log_nb +#define PMIx_Lookup OPAL_MCA_PMIX4X_PMIx_Lookup +#define PMIx_Lookup_nb OPAL_MCA_PMIX4X_PMIx_Lookup_nb +#define pmix_mca_base_close OPAL_MCA_PMIX4X_pmix_mca_base_close +#define pmix_mca_base_cmd_line_process_args OPAL_MCA_PMIX4X_pmix_mca_base_cmd_line_process_args +#define pmix_mca_base_cmd_line_setup OPAL_MCA_PMIX4X_pmix_mca_base_cmd_line_setup +#define pmix_mca_base_cmd_line_wrap_args OPAL_MCA_PMIX4X_pmix_mca_base_cmd_line_wrap_args +#define pmix_mca_base_component_close OPAL_MCA_PMIX4X_pmix_mca_base_component_close +#define pmix_mca_base_component_compare OPAL_MCA_PMIX4X_pmix_mca_base_component_compare +#define pmix_mca_base_component_compare_priority OPAL_MCA_PMIX4X_pmix_mca_base_component_compare_priority +#define pmix_mca_base_component_compatible OPAL_MCA_PMIX4X_pmix_mca_base_component_compatible +#define pmix_mca_base_component_disable_dlopen OPAL_MCA_PMIX4X_pmix_mca_base_component_disable_dlopen +#define pmix_mca_base_component_find OPAL_MCA_PMIX4X_pmix_mca_base_component_find +#define pmix_mca_base_component_find_finalize OPAL_MCA_PMIX4X_pmix_mca_base_component_find_finalize +#define pmix_mca_base_component_list_item_t_class OPAL_MCA_PMIX4X_pmix_mca_base_component_list_item_t_class +#define pmix_mca_base_component_parse_requested OPAL_MCA_PMIX4X_pmix_mca_base_component_parse_requested +#define pmix_mca_base_component_path OPAL_MCA_PMIX4X_pmix_mca_base_component_path +#define pmix_mca_base_component_priority_list_item_t_class OPAL_MCA_PMIX4X_pmix_mca_base_component_priority_list_item_t_class +#define pmix_mca_base_component_repository_add OPAL_MCA_PMIX4X_pmix_mca_base_component_repository_add +#define pmix_mca_base_component_repository_finalize OPAL_MCA_PMIX4X_pmix_mca_base_component_repository_finalize +#define pmix_mca_base_component_repository_get_components OPAL_MCA_PMIX4X_pmix_mca_base_component_repository_get_components +#define pmix_mca_base_component_repository_init OPAL_MCA_PMIX4X_pmix_mca_base_component_repository_init +#define pmix_mca_base_component_repository_item_t_class OPAL_MCA_PMIX4X_pmix_mca_base_component_repository_item_t_class +#define pmix_mca_base_component_repository_open OPAL_MCA_PMIX4X_pmix_mca_base_component_repository_open +#define pmix_mca_base_component_repository_release OPAL_MCA_PMIX4X_pmix_mca_base_component_repository_release +#define pmix_mca_base_component_repository_retain_component OPAL_MCA_PMIX4X_pmix_mca_base_component_repository_retain_component +#define pmix_mca_base_components_close OPAL_MCA_PMIX4X_pmix_mca_base_components_close +#define pmix_mca_base_components_filter OPAL_MCA_PMIX4X_pmix_mca_base_components_filter +#define pmix_mca_base_component_show_load_errors OPAL_MCA_PMIX4X_pmix_mca_base_component_show_load_errors +#define pmix_mca_base_component_to_string OPAL_MCA_PMIX4X_pmix_mca_base_component_to_string +#define pmix_mca_base_component_track_load_errors OPAL_MCA_PMIX4X_pmix_mca_base_component_track_load_errors +#define pmix_mca_base_component_unload OPAL_MCA_PMIX4X_pmix_mca_base_component_unload +#define pmix_mca_base_component_var_register OPAL_MCA_PMIX4X_pmix_mca_base_component_var_register +#define pmix_mca_base_failed_component_t_class OPAL_MCA_PMIX4X_pmix_mca_base_failed_component_t_class +#define pmix_mca_base_framework_close OPAL_MCA_PMIX4X_pmix_mca_base_framework_close +#define pmix_mca_base_framework_components_close OPAL_MCA_PMIX4X_pmix_mca_base_framework_components_close +#define pmix_mca_base_framework_components_open OPAL_MCA_PMIX4X_pmix_mca_base_framework_components_open +#define pmix_mca_base_framework_components_register OPAL_MCA_PMIX4X_pmix_mca_base_framework_components_register +#define pmix_mca_base_framework_is_open OPAL_MCA_PMIX4X_pmix_mca_base_framework_is_open +#define pmix_mca_base_framework_is_registered OPAL_MCA_PMIX4X_pmix_mca_base_framework_is_registered +#define pmix_mca_base_framework_open OPAL_MCA_PMIX4X_pmix_mca_base_framework_open +#define pmix_mca_base_framework_register OPAL_MCA_PMIX4X_pmix_mca_base_framework_register +#define pmix_mca_base_framework_var_register OPAL_MCA_PMIX4X_pmix_mca_base_framework_var_register +#define pmix_mca_base_open OPAL_MCA_PMIX4X_pmix_mca_base_open +#define pmix_mca_base_select OPAL_MCA_PMIX4X_pmix_mca_base_select +#define pmix_mca_base_system_default_path OPAL_MCA_PMIX4X_pmix_mca_base_system_default_path +#define pmix_mca_base_user_default_path OPAL_MCA_PMIX4X_pmix_mca_base_user_default_path +#define pmix_mca_base_var_build_env OPAL_MCA_PMIX4X_pmix_mca_base_var_build_env +#define pmix_mca_base_var_cache_files OPAL_MCA_PMIX4X_pmix_mca_base_var_cache_files +#define pmix_mca_base_var_check_exclusive OPAL_MCA_PMIX4X_pmix_mca_base_var_check_exclusive +#define pmix_mca_base_var_deregister OPAL_MCA_PMIX4X_pmix_mca_base_var_deregister +#define pmix_mca_base_var_dump OPAL_MCA_PMIX4X_pmix_mca_base_var_dump +#define pmix_mca_base_var_env_name OPAL_MCA_PMIX4X_pmix_mca_base_var_env_name +#define pmix_mca_base_var_finalize OPAL_MCA_PMIX4X_pmix_mca_base_var_finalize +#define pmix_mca_base_var_find OPAL_MCA_PMIX4X_pmix_mca_base_var_find +#define pmix_mca_base_var_find_by_name OPAL_MCA_PMIX4X_pmix_mca_base_var_find_by_name +#define pmix_mca_base_var_get OPAL_MCA_PMIX4X_pmix_mca_base_var_get +#define pmix_mca_base_var_get_count OPAL_MCA_PMIX4X_pmix_mca_base_var_get_count +#define pmix_mca_base_var_get_value OPAL_MCA_PMIX4X_pmix_mca_base_var_get_value +#define pmix_mca_base_var_group_component_register OPAL_MCA_PMIX4X_pmix_mca_base_var_group_component_register +#define pmix_mca_base_var_group_deregister OPAL_MCA_PMIX4X_pmix_mca_base_var_group_deregister +#define pmix_mca_base_var_group_find OPAL_MCA_PMIX4X_pmix_mca_base_var_group_find +#define pmix_mca_base_var_group_find_by_name OPAL_MCA_PMIX4X_pmix_mca_base_var_group_find_by_name +#define pmix_mca_base_var_group_get OPAL_MCA_PMIX4X_pmix_mca_base_var_group_get +#define pmix_mca_base_var_group_get_count OPAL_MCA_PMIX4X_pmix_mca_base_var_group_get_count +#define pmix_mca_base_var_group_get_stamp OPAL_MCA_PMIX4X_pmix_mca_base_var_group_get_stamp +#define pmix_mca_base_var_group_register OPAL_MCA_PMIX4X_pmix_mca_base_var_group_register +#define pmix_mca_base_var_group_set_var_flag OPAL_MCA_PMIX4X_pmix_mca_base_var_group_set_var_flag +#define pmix_mca_base_var_group_t_class OPAL_MCA_PMIX4X_pmix_mca_base_var_group_t_class +#define pmix_mca_base_var_init OPAL_MCA_PMIX4X_pmix_mca_base_var_init +#define pmix_mca_base_var_process_env_list OPAL_MCA_PMIX4X_pmix_mca_base_var_process_env_list +#define pmix_mca_base_var_process_env_list_from_file OPAL_MCA_PMIX4X_pmix_mca_base_var_process_env_list_from_file +#define pmix_mca_base_var_register OPAL_MCA_PMIX4X_pmix_mca_base_var_register +#define pmix_mca_base_var_register_synonym OPAL_MCA_PMIX4X_pmix_mca_base_var_register_synonym +#define pmix_mca_base_var_set_flag OPAL_MCA_PMIX4X_pmix_mca_base_var_set_flag +#define pmix_mca_base_var_set_value OPAL_MCA_PMIX4X_pmix_mca_base_var_set_value +#define pmix_mca_base_var_t_class OPAL_MCA_PMIX4X_pmix_mca_base_var_t_class +#define pmix_mutex_t_class OPAL_MCA_PMIX4X_pmix_mutex_t_class +#define pmix_namelist_t_class OPAL_MCA_PMIX4X_pmix_namelist_t_class +#define pmix_net_addr_isipv4public OPAL_MCA_PMIX4X_pmix_net_addr_isipv4public +#define pmix_net_finalize OPAL_MCA_PMIX4X_pmix_net_finalize +#define pmix_net_get_hostname OPAL_MCA_PMIX4X_pmix_net_get_hostname +#define pmix_net_get_port OPAL_MCA_PMIX4X_pmix_net_get_port +#define pmix_net_init OPAL_MCA_PMIX4X_pmix_net_init +#define pmix_net_isaddr OPAL_MCA_PMIX4X_pmix_net_isaddr +#define pmix_net_islocalhost OPAL_MCA_PMIX4X_pmix_net_islocalhost +#define pmix_net_prefix2netmask OPAL_MCA_PMIX4X_pmix_net_prefix2netmask +#define pmix_net_samenetwork OPAL_MCA_PMIX4X_pmix_net_samenetwork +#define PMIx_Notify_event OPAL_MCA_PMIX4X_PMIx_Notify_event +#define pmix_nspace_caddy_t_class OPAL_MCA_PMIX4X_pmix_nspace_caddy_t_class +#define pmix_nspace_t_class OPAL_MCA_PMIX4X_pmix_nspace_t_class +#define pmix_object_t_class OPAL_MCA_PMIX4X_pmix_object_t_class +#define pmix_os_path OPAL_MCA_PMIX4X_pmix_os_path +#define pmix_output OPAL_MCA_PMIX4X_pmix_output +#define pmix_output_close OPAL_MCA_PMIX4X_pmix_output_close +#define pmix_output_finalize OPAL_MCA_PMIX4X_pmix_output_finalize +#define pmix_output_get_verbosity OPAL_MCA_PMIX4X_pmix_output_get_verbosity +#define pmix_output_check_verbosity OPAL_MCA_PMIX4X_pmix_output_check_verbosity +#define pmix_output_hexdump OPAL_MCA_PMIX4X_pmix_output_hexdump +#define pmix_output_init OPAL_MCA_PMIX4X_pmix_output_init +#define pmix_output_open OPAL_MCA_PMIX4X_pmix_output_open +#define pmix_output_reopen OPAL_MCA_PMIX4X_pmix_output_reopen +#define pmix_output_reopen_all OPAL_MCA_PMIX4X_pmix_output_reopen_all +#define pmix_output_set_output_file_info OPAL_MCA_PMIX4X_pmix_output_set_output_file_info +#define pmix_output_set_verbosity OPAL_MCA_PMIX4X_pmix_output_set_verbosity +#define pmix_output_switch OPAL_MCA_PMIX4X_pmix_output_switch +#define pmix_output_vverbose OPAL_MCA_PMIX4X_pmix_output_vverbose +#define pmix_path_access OPAL_MCA_PMIX4X_pmix_path_access +#define pmix_path_df OPAL_MCA_PMIX4X_pmix_path_df +#define pmix_path_find OPAL_MCA_PMIX4X_pmix_path_find +#define pmix_path_findv OPAL_MCA_PMIX4X_pmix_path_findv +#define pmix_path_is_absolute OPAL_MCA_PMIX4X_pmix_path_is_absolute +#define pmix_path_nfs OPAL_MCA_PMIX4X_pmix_path_nfs +#define pmix_pdl_base_framework OPAL_MCA_PMIX4X_pmix_pdl_base_framework +#define pmix_peer_t_class OPAL_MCA_PMIX4X_pmix_peer_t_class +#define pmix_pending_connection_t_class OPAL_MCA_PMIX4X_pmix_pending_connection_t_class +#define PMIx_Persistence_string OPAL_MCA_PMIX4X_PMIx_Persistence_string +#define pmix_pif_base_framework OPAL_MCA_PMIX4X_pmix_pif_base_framework +#define pmix_pinstall_dirs OPAL_MCA_PMIX4X_pmix_pinstall_dirs +#define pmix_pinstalldirs_base_framework OPAL_MCA_PMIX4X_pmix_pinstalldirs_base_framework +#define pmix_pnet OPAL_MCA_PMIX4X_pmix_pnet +#define pmix_pnet_base_child_finalized OPAL_MCA_PMIX4X_pmix_pnet_base_child_finalized +#define pmix_pnet_base_framework OPAL_MCA_PMIX4X_pmix_pnet_base_framework +#define pmix_pnet_base_local_app_finalized OPAL_MCA_PMIX4X_pmix_pnet_base_local_app_finalized +#define pmix_pnet_base_select OPAL_MCA_PMIX4X_pmix_pnet_base_select +#define pmix_pnet_base_setup_app OPAL_MCA_PMIX4X_pmix_pnet_base_setup_app +#define pmix_pnet_base_setup_fork OPAL_MCA_PMIX4X_pmix_pnet_base_setup_fork +#define pmix_pnet_base_setup_local_network OPAL_MCA_PMIX4X_pmix_pnet_base_setup_local_network +#define pmix_pnet_globals OPAL_MCA_PMIX4X_pmix_pnet_globals +#define pmix_pointer_array_add OPAL_MCA_PMIX4X_pmix_pointer_array_add +#define pmix_pointer_array_init OPAL_MCA_PMIX4X_pmix_pointer_array_init +#define pmix_pointer_array_set_item OPAL_MCA_PMIX4X_pmix_pointer_array_set_item +#define pmix_pointer_array_set_size OPAL_MCA_PMIX4X_pmix_pointer_array_set_size +#define pmix_pointer_array_t_class OPAL_MCA_PMIX4X_pmix_pointer_array_t_class +#define pmix_pointer_array_test_and_set_item OPAL_MCA_PMIX4X_pmix_pointer_array_test_and_set_item +#define pmix_preg OPAL_MCA_PMIX4X_pmix_preg +#define pmix_preg_base_framework OPAL_MCA_PMIX4X_pmix_preg_base_framework +#define pmix_preg_base_generate_node_regex OPAL_MCA_PMIX4X_pmix_preg_base_generate_node_regex +#define pmix_preg_base_generate_ppn OPAL_MCA_PMIX4X_pmix_preg_base_generate_ppn +#define pmix_preg_base_parse_nodes OPAL_MCA_PMIX4X_pmix_preg_base_parse_nodes +#define pmix_preg_base_parse_procs OPAL_MCA_PMIX4X_pmix_preg_base_parse_procs +#define pmix_preg_base_resolve_nodes OPAL_MCA_PMIX4X_pmix_preg_base_resolve_nodes +#define pmix_preg_base_resolve_peers OPAL_MCA_PMIX4X_pmix_preg_base_resolve_peers +#define pmix_preg_base_select OPAL_MCA_PMIX4X_pmix_preg_base_select +#define pmix_preg_globals OPAL_MCA_PMIX4X_pmix_preg_globals +#define PMIx_Process_monitor_nb OPAL_MCA_PMIX4X_PMIx_Process_monitor_nb +#define PMIx_Proc_state_string OPAL_MCA_PMIX4X_PMIx_Proc_state_string +#define pmix_psec_base_assign_module OPAL_MCA_PMIX4X_pmix_psec_base_assign_module +#define pmix_psec_base_framework OPAL_MCA_PMIX4X_pmix_psec_base_framework +#define pmix_psec_base_get_available_modules OPAL_MCA_PMIX4X_pmix_psec_base_get_available_modules +#define pmix_psec_base_select OPAL_MCA_PMIX4X_pmix_psec_base_select +#define pmix_psensor OPAL_MCA_PMIX4X_pmix_psensor +#define pmix_psensor_base OPAL_MCA_PMIX4X_pmix_psensor_base +#define pmix_psensor_base_framework OPAL_MCA_PMIX4X_pmix_psensor_base_framework +#define pmix_psensor_base_select OPAL_MCA_PMIX4X_pmix_psensor_base_select +#define pmix_psensor_base_start OPAL_MCA_PMIX4X_pmix_psensor_base_start +#define pmix_psensor_base_stop OPAL_MCA_PMIX4X_pmix_psensor_base_stop +#define pmix_pshmem OPAL_MCA_PMIX4X_pmix_pshmem +#define pmix_pshmem_base_framework OPAL_MCA_PMIX4X_pmix_pshmem_base_framework +#define pmix_pshmem_base_select OPAL_MCA_PMIX4X_pmix_pshmem_base_select +#define pmix_ptl_base_assign_module OPAL_MCA_PMIX4X_pmix_ptl_base_assign_module +#define pmix_ptl_base_cancel_recv OPAL_MCA_PMIX4X_pmix_ptl_base_cancel_recv +#define pmix_ptl_base_connect OPAL_MCA_PMIX4X_pmix_ptl_base_connect +#define pmix_ptl_base_connect_to_peer OPAL_MCA_PMIX4X_pmix_ptl_base_connect_to_peer +#define pmix_ptl_base_framework OPAL_MCA_PMIX4X_pmix_ptl_base_framework +#define pmix_ptl_base_get_available_modules OPAL_MCA_PMIX4X_pmix_ptl_base_get_available_modules +#define pmix_ptl_base_lost_connection OPAL_MCA_PMIX4X_pmix_ptl_base_lost_connection +#define pmix_ptl_base_output OPAL_MCA_PMIX4X_pmix_ptl_base_output +#define pmix_ptl_base_process_msg OPAL_MCA_PMIX4X_pmix_ptl_base_process_msg +#define pmix_ptl_base_recv_blocking OPAL_MCA_PMIX4X_pmix_ptl_base_recv_blocking +#define pmix_ptl_base_recv_handler OPAL_MCA_PMIX4X_pmix_ptl_base_recv_handler +#define pmix_ptl_base_register_recv OPAL_MCA_PMIX4X_pmix_ptl_base_register_recv +#define pmix_ptl_base_select OPAL_MCA_PMIX4X_pmix_ptl_base_select +#define pmix_ptl_base_send OPAL_MCA_PMIX4X_pmix_ptl_base_send +#define pmix_ptl_base_send_blocking OPAL_MCA_PMIX4X_pmix_ptl_base_send_blocking +#define pmix_ptl_base_send_handler OPAL_MCA_PMIX4X_pmix_ptl_base_send_handler +#define pmix_ptl_base_send_recv OPAL_MCA_PMIX4X_pmix_ptl_base_send_recv +#define pmix_ptl_base_set_blocking OPAL_MCA_PMIX4X_pmix_ptl_base_set_blocking +#define pmix_ptl_base_set_nonblocking OPAL_MCA_PMIX4X_pmix_ptl_base_set_nonblocking +#define pmix_ptl_base_set_notification_cbfunc OPAL_MCA_PMIX4X_pmix_ptl_base_set_notification_cbfunc +#define pmix_ptl_base_start_listening OPAL_MCA_PMIX4X_pmix_ptl_base_start_listening +#define pmix_ptl_base_stop_listening OPAL_MCA_PMIX4X_pmix_ptl_base_stop_listening +#define pmix_ptl_globals OPAL_MCA_PMIX4X_pmix_ptl_globals +#define pmix_ptl_posted_recv_t_class OPAL_MCA_PMIX4X_pmix_ptl_posted_recv_t_class +#define pmix_ptl_queue_t_class OPAL_MCA_PMIX4X_pmix_ptl_queue_t_class +#define pmix_ptl_recv_t_class OPAL_MCA_PMIX4X_pmix_ptl_recv_t_class +#define pmix_ptl_send_t_class OPAL_MCA_PMIX4X_pmix_ptl_send_t_class +#define pmix_ptl_sr_t_class OPAL_MCA_PMIX4X_pmix_ptl_sr_t_class +#define PMIx_Publish OPAL_MCA_PMIX4X_PMIx_Publish +#define PMIx_Publish_nb OPAL_MCA_PMIX4X_PMIx_Publish_nb +#define PMIx_Put OPAL_MCA_PMIX4X_PMIx_Put +#define pmix_query_caddy_t_class OPAL_MCA_PMIX4X_pmix_query_caddy_t_class +#define PMIx_Query_info_nb OPAL_MCA_PMIX4X_PMIx_Query_info_nb +#define pmix_rand OPAL_MCA_PMIX4X_pmix_rand +#define pmix_random OPAL_MCA_PMIX4X_pmix_random +#define pmix_rank_info_t_class OPAL_MCA_PMIX4X_pmix_rank_info_t_class +#define pmix_recursive_mutex_t_class OPAL_MCA_PMIX4X_pmix_recursive_mutex_t_class +#define pmix_regex_range_t_class OPAL_MCA_PMIX4X_pmix_regex_range_t_class +#define pmix_regex_value_t_class OPAL_MCA_PMIX4X_pmix_regex_value_t_class +#define PMIx_Register_event_handler OPAL_MCA_PMIX4X_PMIx_Register_event_handler +#define pmix_register_params OPAL_MCA_PMIX4X_pmix_register_params +#define PMIx_Resolve_nodes OPAL_MCA_PMIX4X_PMIx_Resolve_nodes +#define PMIx_Resolve_peers OPAL_MCA_PMIX4X_PMIx_Resolve_peers +#define pmix_ring_buffer_init OPAL_MCA_PMIX4X_pmix_ring_buffer_init +#define pmix_ring_buffer_poke OPAL_MCA_PMIX4X_pmix_ring_buffer_poke +#define pmix_ring_buffer_pop OPAL_MCA_PMIX4X_pmix_ring_buffer_pop +#define pmix_ring_buffer_push OPAL_MCA_PMIX4X_pmix_ring_buffer_push +#define pmix_rte_finalize OPAL_MCA_PMIX4X_pmix_rte_finalize +#define pmix_rte_init OPAL_MCA_PMIX4X_pmix_rte_init +#define PMIx_Scope_string OPAL_MCA_PMIX4X_PMIx_Scope_string +#define PMIx_server_deregister_client OPAL_MCA_PMIX4X_PMIx_server_deregister_client +#define PMIx_server_deregister_nspace OPAL_MCA_PMIX4X_PMIx_server_deregister_nspace +#define PMIx_server_dmodex_request OPAL_MCA_PMIX4X_PMIx_server_dmodex_request +#define PMIx_server_finalize OPAL_MCA_PMIX4X_PMIx_server_finalize +#define pmix_server_globals OPAL_MCA_PMIX4X_pmix_server_globals +#define PMIx_server_init OPAL_MCA_PMIX4X_PMIx_server_init +#define PMIx_server_register_client OPAL_MCA_PMIX4X_PMIx_server_register_client +#define PMIx_server_register_nspace OPAL_MCA_PMIX4X_PMIx_server_register_nspace +#define PMIx_server_setup_application OPAL_MCA_PMIX4X_PMIx_server_setup_application +#define PMIx_server_setup_fork OPAL_MCA_PMIX4X_PMIx_server_setup_fork +#define PMIx_server_setup_local_support OPAL_MCA_PMIX4X_PMIx_server_setup_local_support +#define pmix_setenv OPAL_MCA_PMIX4X_pmix_setenv +#define pmix_setup_caddy_t_class OPAL_MCA_PMIX4X_pmix_setup_caddy_t_class +#define pmix_shift_caddy_t_class OPAL_MCA_PMIX4X_pmix_shift_caddy_t_class +#define pmix_show_help OPAL_MCA_PMIX4X_pmix_show_help +#define pmix_show_help_add_dir OPAL_MCA_PMIX4X_pmix_show_help_add_dir +#define pmix_show_help_finalize OPAL_MCA_PMIX4X_pmix_show_help_finalize +#define pmix_show_help_init OPAL_MCA_PMIX4X_pmix_show_help_init +#define pmix_show_help_string OPAL_MCA_PMIX4X_pmix_show_help_string +#define pmix_show_help_vstring OPAL_MCA_PMIX4X_pmix_show_help_vstring +#define pmix_show_vhelp OPAL_MCA_PMIX4X_pmix_show_vhelp +#define pmix_snprintf OPAL_MCA_PMIX4X_pmix_snprintf +#define PMIx_Spawn OPAL_MCA_PMIX4X_PMIx_Spawn +#define PMIx_Spawn_nb OPAL_MCA_PMIX4X_PMIx_Spawn_nb +#define pmix_srand OPAL_MCA_PMIX4X_pmix_srand +#define PMIx_Store_internal OPAL_MCA_PMIX4X_PMIx_Store_internal +#define pmix_sync_wait_mt OPAL_MCA_PMIX4X_pmix_sync_wait_mt +#define pmix_thread_get_self OPAL_MCA_PMIX4X_pmix_thread_get_self +#define pmix_thread_join OPAL_MCA_PMIX4X_pmix_thread_join +#define pmix_thread_kill OPAL_MCA_PMIX4X_pmix_thread_kill +#define pmix_thread_self_compare OPAL_MCA_PMIX4X_pmix_thread_self_compare +#define pmix_thread_set_main OPAL_MCA_PMIX4X_pmix_thread_set_main +#define pmix_thread_start OPAL_MCA_PMIX4X_pmix_thread_start +#define pmix_thread_t_class OPAL_MCA_PMIX4X_pmix_thread_t_class +#define pmix_tmp_directory OPAL_MCA_PMIX4X_pmix_tmp_directory +#define PMIx_tool_finalize OPAL_MCA_PMIX4X_PMIx_tool_finalize +#define PMIx_tool_init OPAL_MCA_PMIX4X_PMIx_tool_init +#define pmix_tsd_key_create OPAL_MCA_PMIX4X_pmix_tsd_key_create +#define pmix_tsd_keys_destruct OPAL_MCA_PMIX4X_pmix_tsd_keys_destruct +#define PMIx_Unpublish OPAL_MCA_PMIX4X_PMIx_Unpublish +#define PMIx_Unpublish_nb OPAL_MCA_PMIX4X_PMIx_Unpublish_nb +#define pmix_unsetenv OPAL_MCA_PMIX4X_pmix_unsetenv +#define pmix_util_compress_string OPAL_MCA_PMIX4X_pmix_util_compress_string +#define pmix_util_getid OPAL_MCA_PMIX4X_pmix_util_getid +#define pmix_util_get_ranges OPAL_MCA_PMIX4X_pmix_util_get_ranges +#define pmix_util_keyval_parse OPAL_MCA_PMIX4X_pmix_util_keyval_parse +#define pmix_util_keyval_parse_finalize OPAL_MCA_PMIX4X_pmix_util_keyval_parse_finalize +#define pmix_util_keyval_parse_init OPAL_MCA_PMIX4X_pmix_util_keyval_parse_init +#define pmix_util_keyval_parse_lineno OPAL_MCA_PMIX4X_pmix_util_keyval_parse_lineno +#define pmix_util_keyval_save_internal_envars OPAL_MCA_PMIX4X_pmix_util_keyval_save_internal_envars +#define pmix_util_parse_range_options OPAL_MCA_PMIX4X_pmix_util_parse_range_options +#define pmix_util_uncompress_string OPAL_MCA_PMIX4X_pmix_util_uncompress_string +#define pmix_value_array_set_size OPAL_MCA_PMIX4X_pmix_value_array_set_size +#define pmix_value_array_t_class OPAL_MCA_PMIX4X_pmix_value_array_t_class +#define pmix_value_load OPAL_MCA_PMIX4X_pmix_value_load +#define pmix_value_xfer OPAL_MCA_PMIX4X_pmix_value_xfer +#define pmix_var_type_names OPAL_MCA_PMIX4X_pmix_var_type_names +#define pmix_vasprintf OPAL_MCA_PMIX4X_pmix_vasprintf +#define pmix_vsnprintf OPAL_MCA_PMIX4X_pmix_vsnprintf diff --git a/opal/mca/pmix/pmix4x/openpmix/include/pmix_version.h b/opal/mca/pmix/pmix4x/openpmix/include/pmix_version.h new file mode 100644 index 00000000000..b4b56e1e858 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/include/pmix_version.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#ifndef PMIx_VERSION_H +#define PMIx_VERSION_H + +/* define PMIx version */ +#define PMIX_VERSION_MAJOR 4L +#define PMIX_VERSION_MINOR 0L +#define PMIX_VERSION_RELEASE 0L + +#define PMIX_NUMERIC_VERSION 0x00040000 +#endif diff --git a/opal/mca/pmix/pmix4x/openpmix/src/include/frameworks.h b/opal/mca/pmix/pmix4x/openpmix/src/include/frameworks.h new file mode 100644 index 00000000000..146ef97fe75 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/src/include/frameworks.h @@ -0,0 +1,47 @@ +/* + * This file is autogenerated by autogen.pl. Do not edit this file by hand. + */ +#ifndef PMIX_FRAMEWORKS_H +#define PMIX_FRAMEWORKS_H + +#include + +extern pmix_mca_base_framework_t pmix_bfrops_base_framework; +extern pmix_mca_base_framework_t pmix_gds_base_framework; +extern pmix_mca_base_framework_t pmix_pcompress_base_framework; +extern pmix_mca_base_framework_t pmix_pdl_base_framework; +extern pmix_mca_base_framework_t pmix_pfexec_base_framework; +extern pmix_mca_base_framework_t pmix_pif_base_framework; +extern pmix_mca_base_framework_t pmix_pinstalldirs_base_framework; +extern pmix_mca_base_framework_t pmix_plog_base_framework; +extern pmix_mca_base_framework_t pmix_pmdl_base_framework; +extern pmix_mca_base_framework_t pmix_pnet_base_framework; +extern pmix_mca_base_framework_t pmix_preg_base_framework; +extern pmix_mca_base_framework_t pmix_psec_base_framework; +extern pmix_mca_base_framework_t pmix_psensor_base_framework; +extern pmix_mca_base_framework_t pmix_pshmem_base_framework; +extern pmix_mca_base_framework_t pmix_psquash_base_framework; +extern pmix_mca_base_framework_t pmix_ptl_base_framework; + +static pmix_mca_base_framework_t *pmix_frameworks[] = { + &pmix_bfrops_base_framework, + &pmix_gds_base_framework, + &pmix_pcompress_base_framework, + &pmix_pdl_base_framework, + &pmix_pfexec_base_framework, + &pmix_pif_base_framework, + &pmix_pinstalldirs_base_framework, + &pmix_plog_base_framework, + &pmix_pmdl_base_framework, + &pmix_pnet_base_framework, + &pmix_preg_base_framework, + &pmix_psec_base_framework, + &pmix_psensor_base_framework, + &pmix_pshmem_base_framework, + &pmix_psquash_base_framework, + &pmix_ptl_base_framework, + NULL +}; + +#endif /* PMIX_FRAMEWORKS_H */ + diff --git a/opal/mca/pmix/pmix4x/openpmix/src/include/pmix_config.h b/opal/mca/pmix/pmix4x/openpmix/src/include/pmix_config.h new file mode 100644 index 00000000000..7228c5a7a03 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/src/include/pmix_config.h @@ -0,0 +1,940 @@ +/* src/include/pmix_config.h. Generated from pmix_config.h.in by configure. */ +/* src/include/pmix_config.h.in. Generated from configure.ac by autoheader. */ + +/* -*- c -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * This file is automatically generated by configure. Edits will be lost + * the next time you run configure! + */ + +#ifndef PMIX_CONFIG_H +#define PMIX_CONFIG_H + +#include + + + +/* Define if building universal (internal helper macro) */ +/* #undef AC_APPLE_UNIVERSAL_BUILD */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the `asprintf' function. */ +#define HAVE_ASPRINTF 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_CRT_EXTERNS_H */ + +/* Define to 1 if you have the declaration of `AF_INET6', and to 0 if you + don't. */ +#define HAVE_DECL_AF_INET6 1 + +/* Define to 1 if you have the declaration of `AF_UNSPEC', and to 0 if you + don't. */ +#define HAVE_DECL_AF_UNSPEC 1 + +/* Define to 1 if you have the declaration of `PF_INET6', and to 0 if you + don't. */ +#define HAVE_DECL_PF_INET6 1 + +/* Define to 1 if you have the declaration of `PF_UNSPEC', and to 0 if you + don't. */ +#define HAVE_DECL_PF_UNSPEC 1 + +/* Define to 1 if you have the declaration of `__func__', and to 0 if you + don't. */ +#define HAVE_DECL___FUNC__ 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EVENT_H */ + +/* Define to 1 if you have the `execve' function. */ +#define HAVE_EXECVE 1 + +/* Define to 1 if you have the locking by fcntl. */ +#define HAVE_FCNTL_FLOCK 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `fork' function. */ +#define HAVE_FORK 1 + +/* Define to 1 if you have the `getpeereid' function. */ +/* #undef HAVE_GETPEEREID */ + +/* Define to 1 if you have the `getpeerucred' function. */ +/* #undef HAVE_GETPEERUCRED */ + +/* Define to 1 if you have the header file. */ +#define HAVE_GRP_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_HOSTLIB_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_HWLOC_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_IFADDRS_H 1 + +/* Define to 1 if the system has the type `int16_t'. */ +#define HAVE_INT16_T 1 + +/* Define to 1 if the system has the type `int32_t'. */ +#define HAVE_INT32_T 1 + +/* Define to 1 if the system has the type `int64_t'. */ +#define HAVE_INT64_T 1 + +/* Define to 1 if the system has the type `int8_t'. */ +#define HAVE_INT8_T 1 + +/* Define to 1 if the system has the type `intptr_t'. */ +#define HAVE_INTPTR_T 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IOLIB_H */ + +/* Define to 1 if you have the `event' library (-levent). */ +/* #undef HAVE_LIBEVENT */ + +/* Define to 1 if you have the `event_pthreads' library (-levent_pthreads). */ +/* #undef HAVE_LIBEVENT_PTHREADS */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LIBGEN_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if the system has the type `long long'. */ +#define HAVE_LONG_LONG 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LTDL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MUNGE_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_NETDB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_NETINET_IN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_NETINET_TCP_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_NET_IF_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NET_UIO_H */ + +/* Define to 1 if you have the `openpty' function. */ +#define HAVE_OPENPTY 1 + +/* Define to 1 if you have the `posix_fallocate' function. */ +#define HAVE_POSIX_FALLOCATE 1 + +/* Define to 1 if you have the `pthread_condattr_setpshared' function. */ +#define HAVE_PTHREAD_CONDATTR_SETPSHARED 1 + +/* Define to 1 if you have the `pthread_mutexattr_setpshared' function. */ +#define HAVE_PTHREAD_MUTEXATTR_SETPSHARED 1 + +/* Define to 1 if you have the `pthread_rwlockattr_setkind_np` function. */ +#define HAVE_PTHREAD_SETKIND 1 + +/* Define to 1 if you have the `PTHREAD_PROCESS_SHARED` definition. */ +#define HAVE_PTHREAD_SHARED 1 + +/* Define to 1 if the system has the type `ptrdiff_t'. */ +#define HAVE_PTRDIFF_T 1 + +/* Define to 1 if you have the `ptsname' function. */ +#define HAVE_PTSNAME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTY_H 1 + +/* Define to 1 if you have the `setenv' function. */ +#define HAVE_SETENV 1 + +/* Define to 1 if you have the `setpgid' function. */ +#define HAVE_SETPGID 1 + +/* Define to 1 if `si_band' is a member of `siginfo_t'. */ +#define HAVE_SIGINFO_T_SI_BAND 1 + +/* Define to 1 if `si_fd' is a member of `siginfo_t'. */ +#define HAVE_SIGINFO_T_SI_FD 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SIGNAL_H 1 + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `socketpair' function. */ +#define HAVE_SOCKETPAIR 1 + +/* Define to 1 if the system has the type `socklen_t'. */ +#define HAVE_SOCKLEN_T 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SOCKLIB_H */ + +/* Define to 1 if you have the `statfs' function. */ +#define HAVE_STATFS 1 + +/* Define to 1 if you have the `statvfs' function. */ +#define HAVE_STATVFS 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDBOOL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDDEF_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strncpy_s' function. */ +/* #undef HAVE_STRNCPY_S */ + +/* Define to 1 if you have the `strnlen' function. */ +#define HAVE_STRNLEN 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STROPTS_H 1 + +/* Define to 1 if you have the `strsignal' function. */ +#define HAVE_STRSIGNAL 1 + +/* Define to 1 if `d_type' is a member of `struct dirent'. */ +#define HAVE_STRUCT_DIRENT_D_TYPE 1 + +/* Define to 1 if `l_type' is a member of `struct flock'. */ +#define HAVE_STRUCT_FLOCK_L_TYPE 1 + +/* Define to 1 if `ifr_hwaddr' is a member of `struct ifreq'. */ +#define HAVE_STRUCT_IFREQ_IFR_HWADDR 1 + +/* Define to 1 if `ifr_mtu' is a member of `struct ifreq'. */ +#define HAVE_STRUCT_IFREQ_IFR_MTU 1 + +/* Define to 1 if the system has the type `struct sockaddr_in'. */ +#define HAVE_STRUCT_SOCKADDR_IN 1 + +/* Define to 1 if the system has the type `struct sockaddr_in6'. */ +#define HAVE_STRUCT_SOCKADDR_IN6 1 + +/* Define to 1 if `sa_len' is a member of `struct sockaddr'. */ +/* #undef HAVE_STRUCT_SOCKADDR_SA_LEN */ + +/* Define to 1 if the system has the type `struct sockaddr_storage'. */ +#define HAVE_STRUCT_SOCKADDR_STORAGE 1 + +/* Define to 1 if the system has the type `struct sockaddr_un'. */ +#define HAVE_STRUCT_SOCKADDR_UN 1 + +/* Define to 1 if `uid' is a member of `struct sockpeercred'. */ +/* #undef HAVE_STRUCT_SOCKPEERCRED_UID */ + +/* Define to 1 if `f_fstypename' is a member of `struct statfs'. */ +/* #undef HAVE_STRUCT_STATFS_F_FSTYPENAME */ + +/* Define to 1 if `f_type' is a member of `struct statfs'. */ +#define HAVE_STRUCT_STATFS_F_TYPE 1 + +/* Define to 1 if `f_basetype' is a member of `struct statvfs'. */ +/* #undef HAVE_STRUCT_STATVFS_F_BASETYPE */ + +/* Define to 1 if `f_fstypename' is a member of `struct statvfs'. */ +/* #undef HAVE_STRUCT_STATVFS_F_FSTYPENAME */ + +/* Define to 1 if `cr_uid' is a member of `struct ucred'. */ +/* #undef HAVE_STRUCT_UCRED_CR_UID */ + +/* Define to 1 if `uid' is a member of `struct ucred'. */ +#define HAVE_STRUCT_UCRED_UID 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYSLOG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_AUXV_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_CDEFS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_FCNTL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_IOCTL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_MOUNT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_SOCKIO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STATFS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STATVFS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SYSCTL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_UIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_UN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if you have the `tcgetpgrp' function. */ +#define HAVE_TCGETPGRP 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UCRED_H */ + +/* Define to 1 if the system has the type `uint128_t'. */ +/* #undef HAVE_UINT128_T */ + +/* Define to 1 if the system has the type `uint16_t'. */ +#define HAVE_UINT16_T 1 + +/* Define to 1 if the system has the type `uint32_t'. */ +#define HAVE_UINT32_T 1 + +/* Define to 1 if the system has the type `uint64_t'. */ +#define HAVE_UINT64_T 1 + +/* Define to 1 if the system has the type `uint8_t'. */ +#define HAVE_UINT8_T 1 + +/* Define to 1 if the system has the type `uintptr_t'. */ +#define HAVE_UINTPTR_T 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* whether unix byteswap routines -- htonl, htons, nothl, ntohs -- are + available */ +#define HAVE_UNIX_BYTESWAP 1 + +/* Define to 1 if you have the `usleep' function. */ +#define HAVE_USLEEP 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_UTMP_H 1 + +/* Define to 1 if you have the `vasprintf' function. */ +#define HAVE_VASPRINTF 1 + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `waitpid' function. */ +#define HAVE_WAITPID 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZLIB_H */ + +/* Define to 1 if the system has the type `__int128'. */ +#define HAVE___INT128 1 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "https://github.com/pmix/pmix/issues" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "pmix" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "pmix 4.0.0a1" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "pmix" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "4.0.0a1" + +/* Alignment of type bool */ +#define PMIX_ALIGNMENT_BOOL 1 + +/* Alignment of type char */ +#define PMIX_ALIGNMENT_CHAR 1 + +/* Alignment of type double */ +#define PMIX_ALIGNMENT_DOUBLE 8 + +/* Alignment of type float */ +#define PMIX_ALIGNMENT_FLOAT 4 + +/* Alignment of type int */ +#define PMIX_ALIGNMENT_INT 4 + +/* Alignment of type int16_t */ +#define PMIX_ALIGNMENT_INT16 2 + +/* Alignment of type int32_t */ +#define PMIX_ALIGNMENT_INT32 4 + +/* Alignment of type int64_t */ +#define PMIX_ALIGNMENT_INT64 8 + +/* Alignment of type int8_t */ +#define PMIX_ALIGNMENT_INT8 1 + +/* Alignment of type long */ +#define PMIX_ALIGNMENT_LONG 8 + +/* Alignment of type long double */ +/* #undef PMIX_ALIGNMENT_LONG_DOUBLE */ + +/* Alignment of type long long */ +#define PMIX_ALIGNMENT_LONG_LONG 8 + +/* Alignment of type short */ +#define PMIX_ALIGNMENT_SHORT 2 + +/* Alignment of type size_t */ +#define PMIX_ALIGNMENT_SIZE_T 8 + +/* Alignment of type void * */ +#define PMIX_ALIGNMENT_VOID_P 8 + +/* Alignment of type wchar_t */ +#define PMIX_ALIGNMENT_WCHAR 4 + +/* Assembly align directive expects logarithmic value */ +#define PMIX_ASM_ALIGN_LOG + +/* What ARM assembly version to use */ +/* #undef PMIX_ASM_ARM_VERSION */ + +/* Assembly directive for exporting symbols */ +#define PMIX_ASM_GLOBAL ".globl" + +/* Assembly prefix for gsym labels */ +#define PMIX_ASM_GSYM "" + +/* Assembly suffix for labels */ +#define PMIX_ASM_LABEL_SUFFIX ":" + +/* Assembly prefix for lsym labels */ +#define PMIX_ASM_LSYM ".L" + +/* Do we need to give a .size directive */ +#define PMIX_ASM_SIZE "1" + +/* Whether we can do 64bit assembly operations or not. Should not be used + outside of the assembly header files */ +#define PMIX_ASM_SUPPORT_64BIT 1 + +/* Whether 64-bit is supported by the __sync builtin atomics */ +/* #undef PMIX_ASM_SYNC_HAVE_64BIT */ + +/* Assembly directive for setting text section */ +#define PMIX_ASM_TEXT ".text" + +/* How to set function type in .type directive */ +#define PMIX_ASM_TYPE "@" + +/* Architecture type of assembly to use for atomic operations and CMA */ +#define PMIX_ASSEMBLY_ARCH PMIX_X86_64 + +/* Whether to use builtin atomics */ +#define PMIX_ASSEMBLY_BUILTIN PMIX_BUILTIN_C11 + +/* Format of assembly file */ +#define PMIX_ASSEMBLY_FORMAT "default-.text-.globl-:--.L-@-1-0-1-1-1" + +/* Whether we have support for RDTSCP instruction */ +#define PMIX_ASSEMBLY_SUPPORTS_RDTSCP 0 + +/* The compiler $lower which PMIx was built with */ +#define PMIX_BUILD_PLATFORM_COMPILER_FAMILYID 1 + +/* The compiler $lower which PMIX was built with */ +#define PMIX_BUILD_PLATFORM_COMPILER_FAMILYNAME GNU + +/* The compiler $lower which PMIx was built with */ +#define PMIX_BUILD_PLATFORM_COMPILER_VERSION 459776 + +/* The compiler $lower which PMIx was built with */ +#define PMIX_BUILD_PLATFORM_COMPILER_VERSION_STR 7.4.0 + +/* OMPI underlying C compiler */ +#define PMIX_CC "gcc" + +/* Capture the configure cmd line */ +#define PMIX_CONFIGURE_CLI " \'--disable-debug\' \'--disable-pmix-timing\' \'--without-tests-examples\' \'--with-pmix-symbol-rename=OPAL_MCA_PMIX4X_\' \'--disable-pmix-binaries\' \'--disable-pmix-backward-compatibility\' \'--disable-visibility\' \'--enable-embedded-mode\' '--with-libevent-header=\"opal/mca/event/libevent2022/libevent2022.h\"' '--with-hwloc-header=\"opal/mca/hwloc/hwloc201/hwloc201.h\"' \'--prefix=/home/wbailey2/my-test-ompi-install\' \'CFLAGS=-O3 -DNDEBUG \' \'CPPFLAGS=-I/home/wbailey2/ompi -I/home/wbailey2/ompi -I/home/wbailey2/ompi/opal/include -I/home/wbailey2/ompi/opal/include -I/home/wbailey2/ompi/opal/mca/event/libevent2022/libevent -I/home/wbailey2/ompi/opal/mca/event/libevent2022/libevent/include -I/home/wbailey2/ompi/opal/mca/hwloc/hwloc201/hwloc/include \' \'--cache-file=/dev/null\' \'--srcdir=.\' \'--disable-option-checking\'" + +/* Date when PMIx was built */ +#define PMIX_CONFIGURE_DATE "Thu Nov 7 17:17:00 EST 2019" + +/* Hostname where PMIx was built */ +#define PMIX_CONFIGURE_HOST "wbailey2-VirtualBox" + +/* User who built PMIx */ +#define PMIX_CONFIGURE_USER "wbailey2" + +/* Whether C compiler supports GCC style inline assembly */ +#define PMIX_C_GCC_INLINE_ASSEMBLY 1 + +/* Whether C compiler supports atomic convenience variables in stdatomic.h */ +#define PMIX_C_HAVE_ATOMIC_CONV_VAR 1 + +/* Whether C compiler supports __builtin_clz */ +#define PMIX_C_HAVE_BUILTIN_CLZ 1 + +/* Whether C compiler supports __builtin_expect */ +#define PMIX_C_HAVE_BUILTIN_EXPECT 1 + +/* Whether C compiler supports __builtin_prefetch */ +#define PMIX_C_HAVE_BUILTIN_PREFETCH 1 + +/* Whether C compiler supports __Atomic keyword */ +#define PMIX_C_HAVE__ATOMIC 1 + +/* Whether C compiler supports __Generic keyword */ +#define PMIX_C_HAVE__GENERIC 1 + +/* Whether C compiler supports _Static_assert keyword */ +#define PMIX_C_HAVE__STATIC_ASSERT 1 + +/* Whether C compiler supports __Thread_local */ +#define PMIX_C_HAVE__THREAD_LOCAL 1 + +/* Whether C compiler supports __thread */ +#define PMIX_C_HAVE___THREAD 1 + +/* Whether we are in debugging mode or not */ +#define PMIX_ENABLE_DEBUG 0 + +/* Whether we want to enable dlopen support */ +#define PMIX_ENABLE_DLOPEN_SUPPORT 1 + +/* Whether we should enable thread support within the PMIX code base */ +#define PMIX_ENABLE_MULTI_THREADS 1 + +/* Whether user wants PTY support or not */ +#define PMIX_ENABLE_PTY_SUPPORT 1 + +/* Whether we want developer-level timing support or not */ +#define PMIX_ENABLE_TIMING 0 + +/* Location of event2/thread.h */ +#define PMIX_EVENT2_THREAD_HEADER "opal/mca/event/libevent2022/libevent2022.h" + +/* Location of event.h */ +#define PMIX_EVENT_HEADER "opal/mca/event/libevent2022/libevent2022.h" + +/* Whether or not we have apple */ +#define PMIX_HAVE_APPLE 0 + +/* Whether your compiler has __attribute__ or not */ +#define PMIX_HAVE_ATTRIBUTE 1 + +/* Whether your compiler has __attribute__ aligned or not */ +#define PMIX_HAVE_ATTRIBUTE_ALIGNED 1 + +/* Whether your compiler has __attribute__ always_inline or not */ +#define PMIX_HAVE_ATTRIBUTE_ALWAYS_INLINE 1 + +/* Whether your compiler has __attribute__ cold or not */ +#define PMIX_HAVE_ATTRIBUTE_COLD 1 + +/* Whether your compiler has __attribute__ const or not */ +#define PMIX_HAVE_ATTRIBUTE_CONST 1 + +/* Whether your compiler has __attribute__ deprecated or not */ +#define PMIX_HAVE_ATTRIBUTE_DEPRECATED 1 + +/* Whether your compiler has __attribute__ deprecated with optional argument + */ +#define PMIX_HAVE_ATTRIBUTE_DEPRECATED_ARGUMENT 1 + +/* Whether your compiler has __attribute__ destructor or not */ +#define PMIX_HAVE_ATTRIBUTE_DESTRUCTOR 1 + +/* Whether your compiler has __attribute__ extension or not */ +#define PMIX_HAVE_ATTRIBUTE_EXTENSION 1 + +/* Whether your compiler has __attribute__ format or not */ +#define PMIX_HAVE_ATTRIBUTE_FORMAT 1 + +/* Whether your compiler has __attribute__ format and it works on function + pointers */ +#define PMIX_HAVE_ATTRIBUTE_FORMAT_FUNCPTR 1 + +/* Whether your compiler has __attribute__ hot or not */ +#define PMIX_HAVE_ATTRIBUTE_HOT 1 + +/* Whether your compiler has __attribute__ malloc or not */ +#define PMIX_HAVE_ATTRIBUTE_MALLOC 1 + +/* Whether your compiler has __attribute__ may_alias or not */ +#define PMIX_HAVE_ATTRIBUTE_MAY_ALIAS 1 + +/* Whether your compiler has __attribute__ nonnull or not */ +#define PMIX_HAVE_ATTRIBUTE_NONNULL 1 + +/* Whether your compiler has __attribute__ noreturn or not */ +#define PMIX_HAVE_ATTRIBUTE_NORETURN 1 + +/* Whether your compiler has __attribute__ noreturn and it works on function + pointers */ +#define PMIX_HAVE_ATTRIBUTE_NORETURN_FUNCPTR 1 + +/* Whether your compiler has __attribute__ no_instrument_function or not */ +#define PMIX_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION 1 + +/* Whether your compiler has __attribute__ optnone or not */ +#define PMIX_HAVE_ATTRIBUTE_OPTNONE 0 + +/* Whether your compiler has __attribute__ packed or not */ +#define PMIX_HAVE_ATTRIBUTE_PACKED 1 + +/* Whether your compiler has __attribute__ pure or not */ +#define PMIX_HAVE_ATTRIBUTE_PURE 1 + +/* Whether your compiler has __attribute__ sentinel or not */ +#define PMIX_HAVE_ATTRIBUTE_SENTINEL 1 + +/* Whether your compiler has __attribute__ unused or not */ +#define PMIX_HAVE_ATTRIBUTE_UNUSED 1 + +/* Whether your compiler has __attribute__ visibility or not */ +#define PMIX_HAVE_ATTRIBUTE_VISIBILITY 1 + +/* Whether your compiler has __attribute__ warn unused result or not */ +#define PMIX_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT 1 + +/* Whether your compiler has __attribute__ weak alias or not */ +#define PMIX_HAVE_ATTRIBUTE_WEAK_ALIAS + +/* whether qsort is broken or not */ +#define PMIX_HAVE_BROKEN_QSORT 0 + +/* Whether C11 atomic compare swap is both supported and lock-free on 128-bit + values */ +#define PMIX_HAVE_C11_CSWAP_INT128 0 + +/* whether ceil is found and available */ +#define PMIX_HAVE_CEIL 1 + +/* whether clock_gettime is found and available */ +#define PMIX_HAVE_CLOCK_GETTIME 1 + +/* Whether the processor supports the cmpxchg16b instruction */ +#define PMIX_HAVE_CMPXCHG16B 1 + +/* whether dirname is found and available */ +#define PMIX_HAVE_DIRNAME 1 + +/* Whether the __atomic builtin atomic compare swap is both supported and + lock-free on 128-bit values */ +/* #undef PMIX_HAVE_GCC_BUILTIN_CSWAP_INT128 */ + +/* whether gethostbyname is found and available */ +#define PMIX_HAVE_GETHOSTBYNAME 1 + +/* Whether or not we have hwloc support */ +#define PMIX_HAVE_HWLOC 1 + +/* Whether we are building against libev */ +#define PMIX_HAVE_LIBEV 0 + +/* Whether we are building against libevent */ +/* #undef PMIX_HAVE_LIBEVENT */ + +/* whether openpty is found and available */ +#define PMIX_HAVE_OPENPTY 1 + +/* Whether the PMIX PDL framework is functional or not */ +#define PMIX_HAVE_PDL_SUPPORT 1 + +/* If PTHREADS implementation supports PTHREAD_MUTEX_ERRORCHECK */ +#define PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK 1 + +/* If PTHREADS implementation supports PTHREAD_MUTEX_ERRORCHECK_NP */ +#define PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK_NP 1 + +/* Whether we have SA_RESTART in or not */ +#define PMIX_HAVE_SA_RESTART 1 + +/* whether socket is found and available */ +#define PMIX_HAVE_SOCKET 1 + +/* Whether or not we have solaris */ +#define PMIX_HAVE_SOLARIS 0 + +/* Whether the __sync builtin atomic compare and swap supports 128-bit values + */ +#define PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128 1 + +/* Whether we have __va_copy or not */ +#define PMIX_HAVE_UNDERSCORE_VA_COPY 1 + +/* Whether we have va_copy or not */ +#define PMIX_HAVE_VA_COPY 1 + +/* Whether C compiler supports symbol visibility or not */ +#define PMIX_HAVE_VISIBILITY 0 + +/* Location of hwloc.h */ +#define PMIX_HWLOC_HEADER "opal/mca/hwloc/hwloc201/hwloc201.h" + +/* ident string for PMIX */ +#define PMIX_IDENT_STRING "" + +/* The library major version is always available, contrary to VERSION */ +#define PMIX_MAJOR_VERSION 4 + +/* MCA cmd line identifier */ +#define PMIX_MCA_CMD_LINE_ID "pmca" + +/* MCA prefix string for envars */ +#define PMIX_MCA_PREFIX "PMIX_MCA_" + +/* The library minor version is always available, contrary to VERSION */ +#define PMIX_MINOR_VERSION 0 + +/* Whether the C compiler supports "bool" without any other help (such as + ) */ +#define PMIX_NEED_C_BOOL 1 + +/* Whether libraries can be configured with destructor functions */ +#define PMIX_NO_LIB_DESTRUCTOR 0 + +/* package/branding string for PMIx */ +#define PMIX_PACKAGE_STRING "PMIx wbailey2@wbailey2-VirtualBox Distribution" + +/* Whether we have lt_dladvise or not */ +#define PMIX_PDL_PLIBLTDL_HAVE_LT_DLADVISE 1 + +/* Whether r notation is used for ppc registers */ +/* #undef PMIX_POWERPC_R_REGISTERS */ + +/* type to use for ptrdiff_t */ +#define PMIX_PTRDIFF_TYPE ptrdiff_t + +/* The library release version is always available, contrary to VERSION */ +#define PMIX_RELEASE_VERSION 0 + +/* Default value for mca_base_component_show_load_errors MCA variable */ +#define PMIX_SHOW_LOAD_ERRORS_DEFAULT 1 + +/* The pmix symbol rename include directive */ +#define PMIX_SYMBOL_RENAME OPAL_MCA_PMIX4X_ + +/* Whether to use or not */ +#define PMIX_USE_STDBOOL_H 1 + +/* The library version is always available, contrary to VERSION */ +#define PMIX_VERSION "4.0.0a1" + +/* Enable per-user config files */ +#define PMIX_WANT_HOME_CONFIG_FILES 1 + +/* if want pretty-print stack trace feature */ +#define PMIX_WANT_PRETTY_PRINT_STACKTRACE 1 + +/* The size of `char', as computed by sizeof. */ +#define SIZEOF_CHAR 1 + +/* The size of `double', as computed by sizeof. */ +#define SIZEOF_DOUBLE 8 + +/* The size of `float', as computed by sizeof. */ +#define SIZEOF_FLOAT 4 + +/* The size of `int', as computed by sizeof. */ +#define SIZEOF_INT 4 + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 8 + +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 + +/* The size of `pid_t', as computed by sizeof. */ +#define SIZEOF_PID_T 4 + +/* The size of `ptrdiff_t', as computed by sizeof. */ +#define SIZEOF_PTRDIFF_T 8 + +/* The size of `short', as computed by sizeof. */ +#define SIZEOF_SHORT 2 + +/* The size of `size_t', as computed by sizeof. */ +#define SIZEOF_SIZE_T 8 + +/* The size of `ssize_t', as computed by sizeof. */ +/* #undef SIZEOF_SSIZE_T */ + +/* The size of `void *', as computed by sizeof. */ +#define SIZEOF_VOID_P 8 + +/* The size of `wchar_t', as computed by sizeof. */ +#define SIZEOF_WCHAR_T 4 + +/* The size of `_Bool', as computed by sizeof. */ +#define SIZEOF__BOOL 1 + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif + + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +#if defined AC_APPLE_UNIVERSAL_BUILD +# if defined __BIG_ENDIAN__ +# define WORDS_BIGENDIAN 1 +# endif +#else +# ifndef WORDS_BIGENDIAN +/* # undef WORDS_BIGENDIAN */ +# endif +#endif + +/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a + `char[]'. */ +#define YYTEXT_POINTER 1 + +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif + +/* Define to 1 if on MINIX. */ +/* #undef _MINIX */ + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#define inline __inline__ +#endif + + +#include +#endif /* PMIX_CONFIG_H */ + diff --git a/opal/mca/pmix/pmix4x/openpmix/src/include/pmix_config.h.in b/opal/mca/pmix/pmix4x/openpmix/src/include/pmix_config.h.in new file mode 100644 index 00000000000..7b6e52be358 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/src/include/pmix_config.h.in @@ -0,0 +1,939 @@ +/* src/include/pmix_config.h.in. Generated from configure.ac by autoheader. */ + +/* -*- c -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * This file is automatically generated by configure. Edits will be lost + * the next time you run configure! + */ + +#ifndef PMIX_CONFIG_H +#define PMIX_CONFIG_H + +#include + + + +/* Define if building universal (internal helper macro) */ +#undef AC_APPLE_UNIVERSAL_BUILD + +/* Define to 1 if you have the header file. */ +#undef HAVE_ARPA_INET_H + +/* Define to 1 if you have the `asprintf' function. */ +#undef HAVE_ASPRINTF + +/* Define to 1 if you have the header file. */ +#undef HAVE_CRT_EXTERNS_H + +/* Define to 1 if you have the declaration of `AF_INET6', and to 0 if you + don't. */ +#undef HAVE_DECL_AF_INET6 + +/* Define to 1 if you have the declaration of `AF_UNSPEC', and to 0 if you + don't. */ +#undef HAVE_DECL_AF_UNSPEC + +/* Define to 1 if you have the declaration of `PF_INET6', and to 0 if you + don't. */ +#undef HAVE_DECL_PF_INET6 + +/* Define to 1 if you have the declaration of `PF_UNSPEC', and to 0 if you + don't. */ +#undef HAVE_DECL_PF_UNSPEC + +/* Define to 1 if you have the declaration of `__func__', and to 0 if you + don't. */ +#undef HAVE_DECL___FUNC__ + +/* Define to 1 if you have the header file. */ +#undef HAVE_DIRENT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_EVENT_H + +/* Define to 1 if you have the `execve' function. */ +#undef HAVE_EXECVE + +/* Define to 1 if you have the locking by fcntl. */ +#undef HAVE_FCNTL_FLOCK + +/* Define to 1 if you have the header file. */ +#undef HAVE_FCNTL_H + +/* Define to 1 if you have the `fork' function. */ +#undef HAVE_FORK + +/* Define to 1 if you have the `getpeereid' function. */ +#undef HAVE_GETPEEREID + +/* Define to 1 if you have the `getpeerucred' function. */ +#undef HAVE_GETPEERUCRED + +/* Define to 1 if you have the header file. */ +#undef HAVE_GRP_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_HOSTLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_HWLOC_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_IFADDRS_H + +/* Define to 1 if the system has the type `int16_t'. */ +#undef HAVE_INT16_T + +/* Define to 1 if the system has the type `int32_t'. */ +#undef HAVE_INT32_T + +/* Define to 1 if the system has the type `int64_t'. */ +#undef HAVE_INT64_T + +/* Define to 1 if the system has the type `int8_t'. */ +#undef HAVE_INT8_T + +/* Define to 1 if the system has the type `intptr_t'. */ +#undef HAVE_INTPTR_T + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_IOLIB_H + +/* Define to 1 if you have the `event' library (-levent). */ +#undef HAVE_LIBEVENT + +/* Define to 1 if you have the `event_pthreads' library (-levent_pthreads). */ +#undef HAVE_LIBEVENT_PTHREADS + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIBGEN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIBUTIL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIMITS_H + +/* Define to 1 if the system has the type `long long'. */ +#undef HAVE_LONG_LONG + +/* Define to 1 if you have the header file. */ +#undef HAVE_LTDL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MUNGE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NETDB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NETINET_IN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NETINET_TCP_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NET_IF_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NET_UIO_H + +/* Define to 1 if you have the `openpty' function. */ +#undef HAVE_OPENPTY + +/* Define to 1 if you have the `posix_fallocate' function. */ +#undef HAVE_POSIX_FALLOCATE + +/* Define to 1 if you have the `pthread_condattr_setpshared' function. */ +#undef HAVE_PTHREAD_CONDATTR_SETPSHARED + +/* Define to 1 if you have the `pthread_mutexattr_setpshared' function. */ +#undef HAVE_PTHREAD_MUTEXATTR_SETPSHARED + +/* Define to 1 if you have the `pthread_rwlockattr_setkind_np` function. */ +#undef HAVE_PTHREAD_SETKIND + +/* Define to 1 if you have the `PTHREAD_PROCESS_SHARED` definition. */ +#undef HAVE_PTHREAD_SHARED + +/* Define to 1 if the system has the type `ptrdiff_t'. */ +#undef HAVE_PTRDIFF_T + +/* Define to 1 if you have the `ptsname' function. */ +#undef HAVE_PTSNAME + +/* Define to 1 if you have the header file. */ +#undef HAVE_PTY_H + +/* Define to 1 if you have the `setenv' function. */ +#undef HAVE_SETENV + +/* Define to 1 if you have the `setpgid' function. */ +#undef HAVE_SETPGID + +/* Define to 1 if `si_band' is a member of `siginfo_t'. */ +#undef HAVE_SIGINFO_T_SI_BAND + +/* Define to 1 if `si_fd' is a member of `siginfo_t'. */ +#undef HAVE_SIGINFO_T_SI_FD + +/* Define to 1 if you have the header file. */ +#undef HAVE_SIGNAL_H + +/* Define to 1 if you have the `snprintf' function. */ +#undef HAVE_SNPRINTF + +/* Define to 1 if you have the `socketpair' function. */ +#undef HAVE_SOCKETPAIR + +/* Define to 1 if the system has the type `socklen_t'. */ +#undef HAVE_SOCKLEN_T + +/* Define to 1 if you have the header file. */ +#undef HAVE_SOCKLIB_H + +/* Define to 1 if you have the `statfs' function. */ +#undef HAVE_STATFS + +/* Define to 1 if you have the `statvfs' function. */ +#undef HAVE_STATVFS + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDARG_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDBOOL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDDEF_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the `strncpy_s' function. */ +#undef HAVE_STRNCPY_S + +/* Define to 1 if you have the `strnlen' function. */ +#undef HAVE_STRNLEN + +/* Define to 1 if you have the header file. */ +#undef HAVE_STROPTS_H + +/* Define to 1 if you have the `strsignal' function. */ +#undef HAVE_STRSIGNAL + +/* Define to 1 if `d_type' is a member of `struct dirent'. */ +#undef HAVE_STRUCT_DIRENT_D_TYPE + +/* Define to 1 if `l_type' is a member of `struct flock'. */ +#undef HAVE_STRUCT_FLOCK_L_TYPE + +/* Define to 1 if `ifr_hwaddr' is a member of `struct ifreq'. */ +#undef HAVE_STRUCT_IFREQ_IFR_HWADDR + +/* Define to 1 if `ifr_mtu' is a member of `struct ifreq'. */ +#undef HAVE_STRUCT_IFREQ_IFR_MTU + +/* Define to 1 if the system has the type `struct sockaddr_in'. */ +#undef HAVE_STRUCT_SOCKADDR_IN + +/* Define to 1 if the system has the type `struct sockaddr_in6'. */ +#undef HAVE_STRUCT_SOCKADDR_IN6 + +/* Define to 1 if `sa_len' is a member of `struct sockaddr'. */ +#undef HAVE_STRUCT_SOCKADDR_SA_LEN + +/* Define to 1 if the system has the type `struct sockaddr_storage'. */ +#undef HAVE_STRUCT_SOCKADDR_STORAGE + +/* Define to 1 if the system has the type `struct sockaddr_un'. */ +#undef HAVE_STRUCT_SOCKADDR_UN + +/* Define to 1 if `uid' is a member of `struct sockpeercred'. */ +#undef HAVE_STRUCT_SOCKPEERCRED_UID + +/* Define to 1 if `f_fstypename' is a member of `struct statfs'. */ +#undef HAVE_STRUCT_STATFS_F_FSTYPENAME + +/* Define to 1 if `f_type' is a member of `struct statfs'. */ +#undef HAVE_STRUCT_STATFS_F_TYPE + +/* Define to 1 if `f_basetype' is a member of `struct statvfs'. */ +#undef HAVE_STRUCT_STATVFS_F_BASETYPE + +/* Define to 1 if `f_fstypename' is a member of `struct statvfs'. */ +#undef HAVE_STRUCT_STATVFS_F_FSTYPENAME + +/* Define to 1 if `cr_uid' is a member of `struct ucred'. */ +#undef HAVE_STRUCT_UCRED_CR_UID + +/* Define to 1 if `uid' is a member of `struct ucred'. */ +#undef HAVE_STRUCT_UCRED_UID + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYSLOG_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_AUXV_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_CDEFS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_FCNTL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_IOCTL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_MOUNT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SELECT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SOCKET_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SOCKIO_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STATFS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STATVFS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SYSCTL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TIME_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_UIO_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_UN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_WAIT_H + +/* Define to 1 if you have the `tcgetpgrp' function. */ +#undef HAVE_TCGETPGRP + +/* Define to 1 if you have the header file. */ +#undef HAVE_TERMIOS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_TERMIO_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_TIME_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UCRED_H + +/* Define to 1 if the system has the type `uint128_t'. */ +#undef HAVE_UINT128_T + +/* Define to 1 if the system has the type `uint16_t'. */ +#undef HAVE_UINT16_T + +/* Define to 1 if the system has the type `uint32_t'. */ +#undef HAVE_UINT32_T + +/* Define to 1 if the system has the type `uint64_t'. */ +#undef HAVE_UINT64_T + +/* Define to 1 if the system has the type `uint8_t'. */ +#undef HAVE_UINT8_T + +/* Define to 1 if the system has the type `uintptr_t'. */ +#undef HAVE_UINTPTR_T + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* whether unix byteswap routines -- htonl, htons, nothl, ntohs -- are + available */ +#undef HAVE_UNIX_BYTESWAP + +/* Define to 1 if you have the `usleep' function. */ +#undef HAVE_USLEEP + +/* Define to 1 if you have the header file. */ +#undef HAVE_UTIL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UTMP_H + +/* Define to 1 if you have the `vasprintf' function. */ +#undef HAVE_VASPRINTF + +/* Define to 1 if you have the `vsnprintf' function. */ +#undef HAVE_VSNPRINTF + +/* Define to 1 if you have the `waitpid' function. */ +#undef HAVE_WAITPID + +/* Define to 1 if you have the header file. */ +#undef HAVE_ZLIB_H + +/* Define to 1 if the system has the type `__int128'. */ +#undef HAVE___INT128 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Alignment of type bool */ +#undef PMIX_ALIGNMENT_BOOL + +/* Alignment of type char */ +#undef PMIX_ALIGNMENT_CHAR + +/* Alignment of type double */ +#undef PMIX_ALIGNMENT_DOUBLE + +/* Alignment of type float */ +#undef PMIX_ALIGNMENT_FLOAT + +/* Alignment of type int */ +#undef PMIX_ALIGNMENT_INT + +/* Alignment of type int16_t */ +#undef PMIX_ALIGNMENT_INT16 + +/* Alignment of type int32_t */ +#undef PMIX_ALIGNMENT_INT32 + +/* Alignment of type int64_t */ +#undef PMIX_ALIGNMENT_INT64 + +/* Alignment of type int8_t */ +#undef PMIX_ALIGNMENT_INT8 + +/* Alignment of type long */ +#undef PMIX_ALIGNMENT_LONG + +/* Alignment of type long double */ +#undef PMIX_ALIGNMENT_LONG_DOUBLE + +/* Alignment of type long long */ +#undef PMIX_ALIGNMENT_LONG_LONG + +/* Alignment of type short */ +#undef PMIX_ALIGNMENT_SHORT + +/* Alignment of type size_t */ +#undef PMIX_ALIGNMENT_SIZE_T + +/* Alignment of type void * */ +#undef PMIX_ALIGNMENT_VOID_P + +/* Alignment of type wchar_t */ +#undef PMIX_ALIGNMENT_WCHAR + +/* Assembly align directive expects logarithmic value */ +#undef PMIX_ASM_ALIGN_LOG + +/* What ARM assembly version to use */ +#undef PMIX_ASM_ARM_VERSION + +/* Assembly directive for exporting symbols */ +#undef PMIX_ASM_GLOBAL + +/* Assembly prefix for gsym labels */ +#undef PMIX_ASM_GSYM + +/* Assembly suffix for labels */ +#undef PMIX_ASM_LABEL_SUFFIX + +/* Assembly prefix for lsym labels */ +#undef PMIX_ASM_LSYM + +/* Do we need to give a .size directive */ +#undef PMIX_ASM_SIZE + +/* Whether we can do 64bit assembly operations or not. Should not be used + outside of the assembly header files */ +#undef PMIX_ASM_SUPPORT_64BIT + +/* Whether 64-bit is supported by the __sync builtin atomics */ +#undef PMIX_ASM_SYNC_HAVE_64BIT + +/* Assembly directive for setting text section */ +#undef PMIX_ASM_TEXT + +/* How to set function type in .type directive */ +#undef PMIX_ASM_TYPE + +/* Architecture type of assembly to use for atomic operations and CMA */ +#undef PMIX_ASSEMBLY_ARCH + +/* Whether to use builtin atomics */ +#undef PMIX_ASSEMBLY_BUILTIN + +/* Format of assembly file */ +#undef PMIX_ASSEMBLY_FORMAT + +/* Whether we have support for RDTSCP instruction */ +#undef PMIX_ASSEMBLY_SUPPORTS_RDTSCP + +/* The compiler $lower which PMIx was built with */ +#undef PMIX_BUILD_PLATFORM_COMPILER_FAMILYID + +/* The compiler $lower which PMIX was built with */ +#undef PMIX_BUILD_PLATFORM_COMPILER_FAMILYNAME + +/* The compiler $lower which PMIx was built with */ +#undef PMIX_BUILD_PLATFORM_COMPILER_VERSION + +/* The compiler $lower which PMIx was built with */ +#undef PMIX_BUILD_PLATFORM_COMPILER_VERSION_STR + +/* OMPI underlying C compiler */ +#undef PMIX_CC + +/* Capture the configure cmd line */ +#undef PMIX_CONFIGURE_CLI + +/* Date when PMIx was built */ +#undef PMIX_CONFIGURE_DATE + +/* Hostname where PMIx was built */ +#undef PMIX_CONFIGURE_HOST + +/* User who built PMIx */ +#undef PMIX_CONFIGURE_USER + +/* Whether C compiler supports GCC style inline assembly */ +#undef PMIX_C_GCC_INLINE_ASSEMBLY + +/* Whether C compiler supports atomic convenience variables in stdatomic.h */ +#undef PMIX_C_HAVE_ATOMIC_CONV_VAR + +/* Whether C compiler supports __builtin_clz */ +#undef PMIX_C_HAVE_BUILTIN_CLZ + +/* Whether C compiler supports __builtin_expect */ +#undef PMIX_C_HAVE_BUILTIN_EXPECT + +/* Whether C compiler supports __builtin_prefetch */ +#undef PMIX_C_HAVE_BUILTIN_PREFETCH + +/* Whether C compiler supports __Atomic keyword */ +#undef PMIX_C_HAVE__ATOMIC + +/* Whether C compiler supports __Generic keyword */ +#undef PMIX_C_HAVE__GENERIC + +/* Whether C compiler supports _Static_assert keyword */ +#undef PMIX_C_HAVE__STATIC_ASSERT + +/* Whether C compiler supports __Thread_local */ +#undef PMIX_C_HAVE__THREAD_LOCAL + +/* Whether C compiler supports __thread */ +#undef PMIX_C_HAVE___THREAD + +/* Whether we are in debugging mode or not */ +#undef PMIX_ENABLE_DEBUG + +/* Whether we want to enable dlopen support */ +#undef PMIX_ENABLE_DLOPEN_SUPPORT + +/* Whether we should enable thread support within the PMIX code base */ +#undef PMIX_ENABLE_MULTI_THREADS + +/* Whether user wants PTY support or not */ +#undef PMIX_ENABLE_PTY_SUPPORT + +/* Whether we want developer-level timing support or not */ +#undef PMIX_ENABLE_TIMING + +/* Location of event2/thread.h */ +#undef PMIX_EVENT2_THREAD_HEADER + +/* Location of event.h */ +#undef PMIX_EVENT_HEADER + +/* Whether or not we have apple */ +#undef PMIX_HAVE_APPLE + +/* Whether your compiler has __attribute__ or not */ +#undef PMIX_HAVE_ATTRIBUTE + +/* Whether your compiler has __attribute__ aligned or not */ +#undef PMIX_HAVE_ATTRIBUTE_ALIGNED + +/* Whether your compiler has __attribute__ always_inline or not */ +#undef PMIX_HAVE_ATTRIBUTE_ALWAYS_INLINE + +/* Whether your compiler has __attribute__ cold or not */ +#undef PMIX_HAVE_ATTRIBUTE_COLD + +/* Whether your compiler has __attribute__ const or not */ +#undef PMIX_HAVE_ATTRIBUTE_CONST + +/* Whether your compiler has __attribute__ deprecated or not */ +#undef PMIX_HAVE_ATTRIBUTE_DEPRECATED + +/* Whether your compiler has __attribute__ deprecated with optional argument + */ +#undef PMIX_HAVE_ATTRIBUTE_DEPRECATED_ARGUMENT + +/* Whether your compiler has __attribute__ destructor or not */ +#undef PMIX_HAVE_ATTRIBUTE_DESTRUCTOR + +/* Whether your compiler has __attribute__ extension or not */ +#undef PMIX_HAVE_ATTRIBUTE_EXTENSION + +/* Whether your compiler has __attribute__ format or not */ +#undef PMIX_HAVE_ATTRIBUTE_FORMAT + +/* Whether your compiler has __attribute__ format and it works on function + pointers */ +#undef PMIX_HAVE_ATTRIBUTE_FORMAT_FUNCPTR + +/* Whether your compiler has __attribute__ hot or not */ +#undef PMIX_HAVE_ATTRIBUTE_HOT + +/* Whether your compiler has __attribute__ malloc or not */ +#undef PMIX_HAVE_ATTRIBUTE_MALLOC + +/* Whether your compiler has __attribute__ may_alias or not */ +#undef PMIX_HAVE_ATTRIBUTE_MAY_ALIAS + +/* Whether your compiler has __attribute__ nonnull or not */ +#undef PMIX_HAVE_ATTRIBUTE_NONNULL + +/* Whether your compiler has __attribute__ noreturn or not */ +#undef PMIX_HAVE_ATTRIBUTE_NORETURN + +/* Whether your compiler has __attribute__ noreturn and it works on function + pointers */ +#undef PMIX_HAVE_ATTRIBUTE_NORETURN_FUNCPTR + +/* Whether your compiler has __attribute__ no_instrument_function or not */ +#undef PMIX_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION + +/* Whether your compiler has __attribute__ optnone or not */ +#undef PMIX_HAVE_ATTRIBUTE_OPTNONE + +/* Whether your compiler has __attribute__ packed or not */ +#undef PMIX_HAVE_ATTRIBUTE_PACKED + +/* Whether your compiler has __attribute__ pure or not */ +#undef PMIX_HAVE_ATTRIBUTE_PURE + +/* Whether your compiler has __attribute__ sentinel or not */ +#undef PMIX_HAVE_ATTRIBUTE_SENTINEL + +/* Whether your compiler has __attribute__ unused or not */ +#undef PMIX_HAVE_ATTRIBUTE_UNUSED + +/* Whether your compiler has __attribute__ visibility or not */ +#undef PMIX_HAVE_ATTRIBUTE_VISIBILITY + +/* Whether your compiler has __attribute__ warn unused result or not */ +#undef PMIX_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT + +/* Whether your compiler has __attribute__ weak alias or not */ +#undef PMIX_HAVE_ATTRIBUTE_WEAK_ALIAS + +/* whether qsort is broken or not */ +#undef PMIX_HAVE_BROKEN_QSORT + +/* Whether C11 atomic compare swap is both supported and lock-free on 128-bit + values */ +#undef PMIX_HAVE_C11_CSWAP_INT128 + +/* whether ceil is found and available */ +#undef PMIX_HAVE_CEIL + +/* whether clock_gettime is found and available */ +#undef PMIX_HAVE_CLOCK_GETTIME + +/* Whether the processor supports the cmpxchg16b instruction */ +#undef PMIX_HAVE_CMPXCHG16B + +/* whether dirname is found and available */ +#undef PMIX_HAVE_DIRNAME + +/* Whether the __atomic builtin atomic compare swap is both supported and + lock-free on 128-bit values */ +#undef PMIX_HAVE_GCC_BUILTIN_CSWAP_INT128 + +/* whether gethostbyname is found and available */ +#undef PMIX_HAVE_GETHOSTBYNAME + +/* Whether or not we have hwloc support */ +#undef PMIX_HAVE_HWLOC + +/* Whether we are building against libev */ +#undef PMIX_HAVE_LIBEV + +/* Whether we are building against libevent */ +#undef PMIX_HAVE_LIBEVENT + +/* whether openpty is found and available */ +#undef PMIX_HAVE_OPENPTY + +/* Whether the PMIX PDL framework is functional or not */ +#undef PMIX_HAVE_PDL_SUPPORT + +/* If PTHREADS implementation supports PTHREAD_MUTEX_ERRORCHECK */ +#undef PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK + +/* If PTHREADS implementation supports PTHREAD_MUTEX_ERRORCHECK_NP */ +#undef PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK_NP + +/* Whether we have SA_RESTART in or not */ +#undef PMIX_HAVE_SA_RESTART + +/* whether socket is found and available */ +#undef PMIX_HAVE_SOCKET + +/* Whether or not we have solaris */ +#undef PMIX_HAVE_SOLARIS + +/* Whether the __sync builtin atomic compare and swap supports 128-bit values + */ +#undef PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128 + +/* Whether we have __va_copy or not */ +#undef PMIX_HAVE_UNDERSCORE_VA_COPY + +/* Whether we have va_copy or not */ +#undef PMIX_HAVE_VA_COPY + +/* Whether C compiler supports symbol visibility or not */ +#undef PMIX_HAVE_VISIBILITY + +/* Location of hwloc.h */ +#undef PMIX_HWLOC_HEADER + +/* ident string for PMIX */ +#undef PMIX_IDENT_STRING + +/* The library major version is always available, contrary to VERSION */ +#undef PMIX_MAJOR_VERSION + +/* MCA cmd line identifier */ +#undef PMIX_MCA_CMD_LINE_ID + +/* MCA prefix string for envars */ +#undef PMIX_MCA_PREFIX + +/* The library minor version is always available, contrary to VERSION */ +#undef PMIX_MINOR_VERSION + +/* Whether the C compiler supports "bool" without any other help (such as + ) */ +#undef PMIX_NEED_C_BOOL + +/* Whether libraries can be configured with destructor functions */ +#undef PMIX_NO_LIB_DESTRUCTOR + +/* package/branding string for PMIx */ +#undef PMIX_PACKAGE_STRING + +/* Whether we have lt_dladvise or not */ +#undef PMIX_PDL_PLIBLTDL_HAVE_LT_DLADVISE + +/* Whether r notation is used for ppc registers */ +#undef PMIX_POWERPC_R_REGISTERS + +/* type to use for ptrdiff_t */ +#undef PMIX_PTRDIFF_TYPE + +/* The library release version is always available, contrary to VERSION */ +#undef PMIX_RELEASE_VERSION + +/* Default value for mca_base_component_show_load_errors MCA variable */ +#undef PMIX_SHOW_LOAD_ERRORS_DEFAULT + +/* The pmix symbol rename include directive */ +#undef PMIX_SYMBOL_RENAME + +/* Whether to use or not */ +#undef PMIX_USE_STDBOOL_H + +/* The library version is always available, contrary to VERSION */ +#undef PMIX_VERSION + +/* Enable per-user config files */ +#undef PMIX_WANT_HOME_CONFIG_FILES + +/* if want pretty-print stack trace feature */ +#undef PMIX_WANT_PRETTY_PRINT_STACKTRACE + +/* The size of `char', as computed by sizeof. */ +#undef SIZEOF_CHAR + +/* The size of `double', as computed by sizeof. */ +#undef SIZEOF_DOUBLE + +/* The size of `float', as computed by sizeof. */ +#undef SIZEOF_FLOAT + +/* The size of `int', as computed by sizeof. */ +#undef SIZEOF_INT + +/* The size of `long', as computed by sizeof. */ +#undef SIZEOF_LONG + +/* The size of `long long', as computed by sizeof. */ +#undef SIZEOF_LONG_LONG + +/* The size of `pid_t', as computed by sizeof. */ +#undef SIZEOF_PID_T + +/* The size of `ptrdiff_t', as computed by sizeof. */ +#undef SIZEOF_PTRDIFF_T + +/* The size of `short', as computed by sizeof. */ +#undef SIZEOF_SHORT + +/* The size of `size_t', as computed by sizeof. */ +#undef SIZEOF_SIZE_T + +/* The size of `ssize_t', as computed by sizeof. */ +#undef SIZEOF_SSIZE_T + +/* The size of `void *', as computed by sizeof. */ +#undef SIZEOF_VOID_P + +/* The size of `wchar_t', as computed by sizeof. */ +#undef SIZEOF_WCHAR_T + +/* The size of `_Bool', as computed by sizeof. */ +#undef SIZEOF__BOOL + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# undef _ALL_SOURCE +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# undef _POSIX_PTHREAD_SEMANTICS +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# undef _TANDEM_SOURCE +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# undef __EXTENSIONS__ +#endif + + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +#if defined AC_APPLE_UNIVERSAL_BUILD +# if defined __BIG_ENDIAN__ +# define WORDS_BIGENDIAN 1 +# endif +#else +# ifndef WORDS_BIGENDIAN +# undef WORDS_BIGENDIAN +# endif +#endif + +/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a + `char[]'. */ +#undef YYTEXT_POINTER + +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif + +/* Define to 1 if on MINIX. */ +#undef _MINIX + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +#undef _POSIX_1_SOURCE + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +#undef _POSIX_SOURCE + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + + +#include +#endif /* PMIX_CONFIG_H */ + diff --git a/opal/mca/pmix/pmix4x/openpmix/src/mca/pinstalldirs/config/pinstall_dirs.h b/opal/mca/pmix/pmix4x/openpmix/src/mca/pinstalldirs/config/pinstall_dirs.h new file mode 100644 index 00000000000..7c1cf6cf3af --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/src/mca/pinstalldirs/config/pinstall_dirs.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * This file should be included by any file that needs the + * installation directories hard-coded into the object file. This + * should be avoided if at all possible, but there are some places + * (like the wrapper compilers) where it is infinitely easier to have + * the paths stored. + * + * If you have questions about which directory to use, we try as best + * we can to follow the GNU coding standards on this issue. The + * description of each directory can be found at the following URL: + * + * http://www.gnu.org/prep/standards/html_node/Directory-Variables.html + * + * The line below is to shut AC 2.60 up about datarootdir. Don't remove. + * datarootdir=foo + */ + +#ifndef PMIX_INST_DIRS_H +#define PMIX_INST_DIRS_H + +#define PMIX_INSTALL_PREFIX "/home/wbailey2/my-test-ompi-install" +#define PMIX_EXEC_PREFIX "${prefix}" + +/* The directory for installing executable programs that users can + run. */ +#define PMIX_BINDIR "${exec_prefix}/bin" + +/* The directory for installing executable programs that can be run + from the shell, but are only generally useful to system + administrators. */ +#define PMIX_SBINDIR "${exec_prefix}/sbin" + +/* The directory for installing executable programs to be run by other + programs rather than by users. + + The definition of ‘libexecdir’ is the same for all packages, so + you should install your data in a subdirectory thereof. Most + packages install their data under $(libexecdir)/package-name/, + possibly within additional subdirectories thereof, such as + $(libexecdir)/package-name/machine/version. */ +#define PMIX_LIBEXECDIR "${exec_prefix}/libexec" + +/* The root of the directory tree for read-only + architecture-independent data files. + + See not about PMIX_DATADIR. And you probably want that one, not + this one. This is one of those "building block" paths, that is + really only used for defining other paths. */ +#define PMIX_DATAROOTDIR "${prefix}/share" + +/* The directory for installing idiosyncratic read-only + architecture-independent data files for this program. + + The definition of ‘datadir’ is the same for all packages, so you + should install your data in a subdirectory thereof. Most packages + install their data under $(datadir)/package-name/. */ +#define PMIX_DATADIR "${datarootdir}" + +/* $(datadir)/package-name/. You probably want to use this instead of + PMIX_DATADIR */ +#define PMIX_PKGDATADIR "${datadir}/pmix" + +/* The directory for installing read-only data files that pertain to a + single machine–that is to say, files for configuring a host. Mailer + and network configuration files, /etc/passwd, and so forth belong + here. All the files in this directory should be ordinary ASCII text + files. + + Do not install executables here in this directory (they probably + belong in $(libexecdir) or $(sbindir)). Also do not install files + that are modified in the normal course of their use (programs whose + purpose is to change the configuration of the system + excluded). Those probably belong in $(localstatedir). */ +#define PMIX_SYSCONFDIR "${prefix}/etc" + +/* The directory for installing architecture-independent data files + which the programs modify while they run. */ +#define PMIX_SHAREDSTATEDIR "${prefix}/com" + +/* The directory for installing data files which the programs modify + while they run, and that pertain to one specific machine. Users + should never need to modify files in this directory to configure + the package's operation; put such configuration information in + separate files that go in $(datadir) or + $(sysconfdir). */ +#define PMIX_LOCALSTATEDIR "${prefix}/var" + +/* The directory for object files and libraries of object code. Do not + install executables here, they probably ought to go in + $(libexecdir) instead. */ +#define PMIX_LIBDIR "${exec_prefix}/lib" + +/* $(libdir)/package-name/. This is where components should go */ +#define PMIX_PKGLIBDIR "${libdir}/pmix" + +/* The directory for installing header files to be included by user + programs with the C ‘#include’ preprocessor directive. */ +#define PMIX_INCLUDEDIR "${prefix}/include" + +/* $(includedir)/package-name/. The devel headers go in here */ +#define PMIX_PKGINCLUDEDIR "${includedir}/pmix" + +/* The directory for installing the Info files for this package. */ +#define PMIX_INFODIR "${datarootdir}/info" + +/* The top-level directory for installing the man pages (if any) for + this package. */ +#define PMIX_MANDIR "${datarootdir}/man" + +#endif diff --git a/opal/mca/pmix/pmix4x/openpmix/src/tools/wrapper/pmix.pc b/opal/mca/pmix/pmix4x/openpmix/src/tools/wrapper/pmix.pc new file mode 100644 index 00000000000..0597a30a90d --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/src/tools/wrapper/pmix.pc @@ -0,0 +1,25 @@ +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# +Name: Open Portability Access Layer (PMIX) +Description: Lowest layer abstractions for the Open MPI software package +Version: 4.0.0a1 +URL: http://www.open-mpi.org/ +# +prefix=/home/wbailey2/my-test-ompi-install +exec_prefix=${prefix} +includedir=${prefix}/include +libdir=${exec_prefix}/lib +pkgincludedir=${includedir}/pmix +# +# Note that the EXTRA_LIBS are only necessary when static linking +# (they're pulled in via libopen-pal.so's implicit dependencies), so +# list them in Libs.private. +# +Libs: -L${libdir} -lopen-pal +Libs.private: +# +# It is safe to hard-wire the -I before the EXTRA_INCLUDES because we +# will not be installing this .pc file unless --enable-devel-headers is +# selected, meaning that there will definitely be a value in EXTRA_INCLUDES. +# +Cflags: -I${includedir} -pthread diff --git a/opal/mca/pmix/pmix4x/openpmix/src/tools/wrapper/pmixcc-wrapper-data.txt b/opal/mca/pmix/pmix4x/openpmix/src/tools/wrapper/pmixcc-wrapper-data.txt new file mode 100644 index 00000000000..24adc5c7507 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/src/tools/wrapper/pmixcc-wrapper-data.txt @@ -0,0 +1,25 @@ +# There can be multiple blocks of configuration data, chosen by +# compiler flags (using the compiler_args key to chose which block +# should be activated. This can be useful for multilib builds. See the +# multilib page at: +# https://github.com/open-mpi/ompi/wiki/compilerwrapper3264 +# for more information. + +project=PMI-Exascale (PMIx) +project_short=PMIx +version=4.0.0a1 +language=C +compiler_env=CC +compiler_flags_env=CFLAGS +compiler=gcc +preprocessor_flags= +compiler_flags_prefix= +compiler_flags=-pthread +linker_flags= +libs=-lpmix +libs_static=-lpmix +dyn_lib_file=libpmix.@PMIX_DYN_LIB_SUFFIX@ +static_lib_file=libpmix.a +required_file= +includedir=${includedir} +libdir=${libdir} diff --git a/opal/mca/pmix/pmix4x/openpmix/src/util/keyval/keyval_lex.c b/opal/mca/pmix/pmix4x/openpmix/src/util/keyval/keyval_lex.c new file mode 100644 index 00000000000..96fc1bb02c0 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/src/util/keyval/keyval_lex.c @@ -0,0 +1,2324 @@ + +#line 3 "keyval_lex.c" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define yy_create_buffer pmix_util_keyval_yy_create_buffer +#define yy_delete_buffer pmix_util_keyval_yy_delete_buffer +#define yy_scan_buffer pmix_util_keyval_yy_scan_buffer +#define yy_scan_string pmix_util_keyval_yy_scan_string +#define yy_scan_bytes pmix_util_keyval_yy_scan_bytes +#define yy_init_buffer pmix_util_keyval_yy_init_buffer +#define yy_flush_buffer pmix_util_keyval_yy_flush_buffer +#define yy_load_buffer_state pmix_util_keyval_yy_load_buffer_state +#define yy_switch_to_buffer pmix_util_keyval_yy_switch_to_buffer +#define yypush_buffer_state pmix_util_keyval_yypush_buffer_state +#define yypop_buffer_state pmix_util_keyval_yypop_buffer_state +#define yyensure_buffer_stack pmix_util_keyval_yyensure_buffer_stack +#define yy_flex_debug pmix_util_keyval_yy_flex_debug +#define yyin pmix_util_keyval_yyin +#define yyleng pmix_util_keyval_yyleng +#define yylex pmix_util_keyval_yylex +#define yylineno pmix_util_keyval_yylineno +#define yyout pmix_util_keyval_yyout +#define yyrestart pmix_util_keyval_yyrestart +#define yytext pmix_util_keyval_yytext +#define yywrap pmix_util_keyval_yywrap +#define yyalloc pmix_util_keyval_yyalloc +#define yyrealloc pmix_util_keyval_yyrealloc +#define yyfree pmix_util_keyval_yyfree + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 6 +#define YY_FLEX_SUBMINOR_VERSION 4 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +#ifdef yy_create_buffer +#define pmix_util_keyval_yy_create_buffer_ALREADY_DEFINED +#else +#define yy_create_buffer pmix_util_keyval_yy_create_buffer +#endif + +#ifdef yy_delete_buffer +#define pmix_util_keyval_yy_delete_buffer_ALREADY_DEFINED +#else +#define yy_delete_buffer pmix_util_keyval_yy_delete_buffer +#endif + +#ifdef yy_scan_buffer +#define pmix_util_keyval_yy_scan_buffer_ALREADY_DEFINED +#else +#define yy_scan_buffer pmix_util_keyval_yy_scan_buffer +#endif + +#ifdef yy_scan_string +#define pmix_util_keyval_yy_scan_string_ALREADY_DEFINED +#else +#define yy_scan_string pmix_util_keyval_yy_scan_string +#endif + +#ifdef yy_scan_bytes +#define pmix_util_keyval_yy_scan_bytes_ALREADY_DEFINED +#else +#define yy_scan_bytes pmix_util_keyval_yy_scan_bytes +#endif + +#ifdef yy_init_buffer +#define pmix_util_keyval_yy_init_buffer_ALREADY_DEFINED +#else +#define yy_init_buffer pmix_util_keyval_yy_init_buffer +#endif + +#ifdef yy_flush_buffer +#define pmix_util_keyval_yy_flush_buffer_ALREADY_DEFINED +#else +#define yy_flush_buffer pmix_util_keyval_yy_flush_buffer +#endif + +#ifdef yy_load_buffer_state +#define pmix_util_keyval_yy_load_buffer_state_ALREADY_DEFINED +#else +#define yy_load_buffer_state pmix_util_keyval_yy_load_buffer_state +#endif + +#ifdef yy_switch_to_buffer +#define pmix_util_keyval_yy_switch_to_buffer_ALREADY_DEFINED +#else +#define yy_switch_to_buffer pmix_util_keyval_yy_switch_to_buffer +#endif + +#ifdef yypush_buffer_state +#define pmix_util_keyval_yypush_buffer_state_ALREADY_DEFINED +#else +#define yypush_buffer_state pmix_util_keyval_yypush_buffer_state +#endif + +#ifdef yypop_buffer_state +#define pmix_util_keyval_yypop_buffer_state_ALREADY_DEFINED +#else +#define yypop_buffer_state pmix_util_keyval_yypop_buffer_state +#endif + +#ifdef yyensure_buffer_stack +#define pmix_util_keyval_yyensure_buffer_stack_ALREADY_DEFINED +#else +#define yyensure_buffer_stack pmix_util_keyval_yyensure_buffer_stack +#endif + +#ifdef yylex +#define pmix_util_keyval_yylex_ALREADY_DEFINED +#else +#define yylex pmix_util_keyval_yylex +#endif + +#ifdef yyrestart +#define pmix_util_keyval_yyrestart_ALREADY_DEFINED +#else +#define yyrestart pmix_util_keyval_yyrestart +#endif + +#ifdef yylex_init +#define pmix_util_keyval_yylex_init_ALREADY_DEFINED +#else +#define yylex_init pmix_util_keyval_yylex_init +#endif + +#ifdef yylex_init_extra +#define pmix_util_keyval_yylex_init_extra_ALREADY_DEFINED +#else +#define yylex_init_extra pmix_util_keyval_yylex_init_extra +#endif + +#ifdef yylex_destroy +#define pmix_util_keyval_yylex_destroy_ALREADY_DEFINED +#else +#define yylex_destroy pmix_util_keyval_yylex_destroy +#endif + +#ifdef yyget_debug +#define pmix_util_keyval_yyget_debug_ALREADY_DEFINED +#else +#define yyget_debug pmix_util_keyval_yyget_debug +#endif + +#ifdef yyset_debug +#define pmix_util_keyval_yyset_debug_ALREADY_DEFINED +#else +#define yyset_debug pmix_util_keyval_yyset_debug +#endif + +#ifdef yyget_extra +#define pmix_util_keyval_yyget_extra_ALREADY_DEFINED +#else +#define yyget_extra pmix_util_keyval_yyget_extra +#endif + +#ifdef yyset_extra +#define pmix_util_keyval_yyset_extra_ALREADY_DEFINED +#else +#define yyset_extra pmix_util_keyval_yyset_extra +#endif + +#ifdef yyget_in +#define pmix_util_keyval_yyget_in_ALREADY_DEFINED +#else +#define yyget_in pmix_util_keyval_yyget_in +#endif + +#ifdef yyset_in +#define pmix_util_keyval_yyset_in_ALREADY_DEFINED +#else +#define yyset_in pmix_util_keyval_yyset_in +#endif + +#ifdef yyget_out +#define pmix_util_keyval_yyget_out_ALREADY_DEFINED +#else +#define yyget_out pmix_util_keyval_yyget_out +#endif + +#ifdef yyset_out +#define pmix_util_keyval_yyset_out_ALREADY_DEFINED +#else +#define yyset_out pmix_util_keyval_yyset_out +#endif + +#ifdef yyget_leng +#define pmix_util_keyval_yyget_leng_ALREADY_DEFINED +#else +#define yyget_leng pmix_util_keyval_yyget_leng +#endif + +#ifdef yyget_text +#define pmix_util_keyval_yyget_text_ALREADY_DEFINED +#else +#define yyget_text pmix_util_keyval_yyget_text +#endif + +#ifdef yyget_lineno +#define pmix_util_keyval_yyget_lineno_ALREADY_DEFINED +#else +#define yyget_lineno pmix_util_keyval_yyget_lineno +#endif + +#ifdef yyset_lineno +#define pmix_util_keyval_yyset_lineno_ALREADY_DEFINED +#else +#define yyset_lineno pmix_util_keyval_yyset_lineno +#endif + +#ifdef yywrap +#define pmix_util_keyval_yywrap_ALREADY_DEFINED +#else +#define yywrap pmix_util_keyval_yywrap +#endif + +#ifdef yyalloc +#define pmix_util_keyval_yyalloc_ALREADY_DEFINED +#else +#define yyalloc pmix_util_keyval_yyalloc +#endif + +#ifdef yyrealloc +#define pmix_util_keyval_yyrealloc_ALREADY_DEFINED +#else +#define yyrealloc pmix_util_keyval_yyrealloc +#endif + +#ifdef yyfree +#define pmix_util_keyval_yyfree_ALREADY_DEFINED +#else +#define yyfree pmix_util_keyval_yyfree +#endif + +#ifdef yytext +#define pmix_util_keyval_yytext_ALREADY_DEFINED +#else +#define yytext pmix_util_keyval_yytext +#endif + +#ifdef yyleng +#define pmix_util_keyval_yyleng_ALREADY_DEFINED +#else +#define yyleng pmix_util_keyval_yyleng +#endif + +#ifdef yyin +#define pmix_util_keyval_yyin_ALREADY_DEFINED +#else +#define yyin pmix_util_keyval_yyin +#endif + +#ifdef yyout +#define pmix_util_keyval_yyout_ALREADY_DEFINED +#else +#define yyout pmix_util_keyval_yyout +#endif + +#ifdef yy_flex_debug +#define pmix_util_keyval_yy_flex_debug_ALREADY_DEFINED +#else +#define yy_flex_debug pmix_util_keyval_yy_flex_debug +#endif + +#ifdef yylineno +#define pmix_util_keyval_yylineno_ALREADY_DEFINED +#else +#define yylineno pmix_util_keyval_yylineno +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#ifndef SIZE_MAX +#define SIZE_MAX (~(size_t)0) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +/* begin standard C++ headers. */ + +/* TODO: this is always defined, so inline it */ +#define yyconst const + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define yynoreturn __attribute__((__noreturn__)) +#else +#define yynoreturn +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an + * integer in range [0..255] for use as an array index. + */ +#define YY_SC_TO_UI(c) ((YY_CHAR) (c)) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN (yy_start) = 1 + 2 * +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START (((yy_start) - 1) / 2) +#define YYSTATE YY_START +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +extern int yyleng; + +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + /* Note: We specifically omit the test for yy_rule_can_match_eol because it requires + * access to the local variable yy_act. Since yyless() is a macro, it would break + * existing scanners that call yyless() from OUTSIDE yylex. + * One obvious solution it to make yy_act a global. I tried that, and saw + * a 5% performance hit in a non-yylineno scanner, because yy_act is + * normally declared as a register variable-- so it is not worth it. + */ + #define YY_LESS_LINENO(n) \ + do { \ + int yyl;\ + for ( yyl = n; yyl < yyleng; ++yyl )\ + if ( yytext[yyl] == '\n' )\ + --yylineno;\ + }while(0) + #define YY_LINENO_REWIND_TO(dst) \ + do {\ + const char *p;\ + for ( p = yy_cp-1; p >= (dst); --p)\ + if ( *p == '\n' )\ + --yylineno;\ + }while(0) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = (yy_hold_char); \ + YY_RESTORE_YY_MORE_OFFSET \ + (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) +#define unput(c) yyunput( c, (yytext_ptr) ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* Stack of input buffers. */ +static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ +static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ +static YY_BUFFER_STATE * yy_buffer_stack = NULL; /**< Stack as an array. */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ + ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ + : NULL) +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; +static int yy_n_chars; /* number of characters read into yy_ch_buf */ +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = NULL; +static int yy_init = 0; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart ( FILE *input_file ); +void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer ); +YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size ); +void yy_delete_buffer ( YY_BUFFER_STATE b ); +void yy_flush_buffer ( YY_BUFFER_STATE b ); +void yypush_buffer_state ( YY_BUFFER_STATE new_buffer ); +void yypop_buffer_state ( void ); + +static void yyensure_buffer_stack ( void ); +static void yy_load_buffer_state ( void ); +static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file ); +#define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER ) + +YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size ); +YY_BUFFER_STATE yy_scan_string ( const char *yy_str ); +YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len ); + +void *yyalloc ( yy_size_t ); +void *yyrealloc ( void *, yy_size_t ); +void yyfree ( void * ); + +#define yy_new_buffer yy_create_buffer +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ +typedef flex_uint8_t YY_CHAR; + +FILE *yyin = NULL, *yyout = NULL; + +typedef int yy_state_type; + +extern int yylineno; +int yylineno = 1; + +extern char *yytext; +#ifdef yytext_ptr +#undef yytext_ptr +#endif +#define yytext_ptr yytext + +static yy_state_type yy_get_previous_state ( void ); +static yy_state_type yy_try_NUL_trans ( yy_state_type current_state ); +static int yy_get_next_buffer ( void ); +static void yynoreturn yy_fatal_error ( const char* msg ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + (yytext_ptr) = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + (yy_hold_char) = *yy_cp; \ + *yy_cp = '\0'; \ + (yy_c_buf_p) = yy_cp; +#define YY_NUM_RULES 22 +#define YY_END_OF_BUFFER 23 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static const flex_int16_t yy_acclist[93] = + { 0, + 5, 5, 23, 21, 22, 10, 21, 22, 1, 22, + 21, 22, 11, 21, 22, 11, 21, 22, 21, 22, + 9, 21, 22, 8205, 22,16397, 22, 12, 22, 8205, + 22,16397, 5, 22, 7, 22, 6, 22, 14, 22, + 22, 17, 22, 14, 22, 14, 22, 14, 22, 10, + 1, 9, 2, 11, 11, 11, 11, 4, 9, 8205, + 16397, 8205, 12, 8205,16397, 8205, 5, 6, 6, 8, + 14, 17, 14, 14, 14, 14, 14, 11, 3, 16, + 14, 16, 15, 14, 15, 11, 20, 20, 20, 19, + 19, 18 + + } ; + +static const flex_int16_t yy_accept[85] = + { 0, + 1, 1, 1, 1, 1, 2, 3, 3, 3, 4, + 6, 9, 11, 13, 16, 19, 21, 24, 27, 28, + 30, 33, 35, 37, 39, 41, 42, 44, 46, 48, + 50, 51, 52, 53, 53, 54, 55, 56, 57, 58, + 59, 59, 60, 62, 63, 63, 64, 66, 67, 68, + 69, 70, 71, 72, 72, 73, 74, 75, 75, 76, + 77, 77, 78, 79, 79, 79, 80, 80, 81, 83, + 83, 84, 86, 87, 87, 87, 88, 89, 90, 91, + 91, 92, 93, 93 + } ; + +static const YY_CHAR yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 5, 6, 1, 1, 1, 7, 1, + 1, 8, 1, 1, 9, 10, 11, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 1, 1, 1, + 12, 1, 1, 1, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 1, 1, 1, 1, 10, 1, 13, 10, 14, 10, + + 10, 10, 10, 10, 10, 10, 10, 10, 15, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 16, + 10, 10, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static const YY_CHAR yy_meta[17] = + { 0, + 1, 2, 3, 4, 1, 1, 1, 5, 6, 6, + 1, 1, 6, 6, 6, 6 + } ; + +static const flex_int16_t yy_base[100] = + { 0, + 0, 0, 15, 18, 20, 21, 28, 34, 143, 252, + 40, 252, 121, 44, 52, 37, 23, 115, 67, 252, + 70, 0, 252, 38, 0, 111, 252, 73, 45, 49, + 0, 252, 60, 105, 252, 50, 0, 93, 76, 252, + 98, 77, 98, 91, 80, 252, 83, 86, 0, 83, + 84, 252, 0, 60, 252, 94, 97, 50, 101, 102, + 33, 108, 21, 109, 23, 252, 114, 0, 0, 118, + 0, 0, 119, 125, 140, 252, 136, 0, 151, 152, + 155, 156, 252, 160, 166, 172, 178, 184, 190, 196, + 202, 207, 211, 217, 223, 229, 235, 240, 245 + + } ; + +static const flex_int16_t yy_def[100] = + { 0, + 83, 1, 84, 84, 85, 85, 86, 86, 83, 83, + 83, 83, 87, 83, 14, 83, 83, 88, 88, 83, + 88, 89, 83, 90, 91, 92, 83, 91, 93, 94, + 11, 83, 83, 87, 83, 15, 15, 15, 15, 83, + 95, 83, 88, 88, 88, 83, 88, 88, 89, 90, + 90, 83, 91, 92, 83, 91, 93, 96, 93, 94, + 97, 94, 15, 98, 95, 83, 96, 67, 59, 97, + 70, 62, 15, 83, 99, 83, 74, 74, 83, 99, + 83, 83, 0, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, 83 + + } ; + +static const flex_int16_t yy_nxt[269] = + { 0, + 10, 11, 12, 11, 10, 13, 10, 10, 14, 15, + 16, 17, 15, 15, 15, 15, 19, 20, 21, 19, + 20, 21, 23, 23, 42, 66, 42, 24, 24, 26, + 27, 28, 29, 73, 30, 26, 27, 28, 29, 70, + 30, 31, 32, 31, 40, 51, 58, 41, 52, 59, + 61, 33, 36, 37, 67, 62, 37, 37, 38, 39, + 37, 42, 55, 42, 38, 39, 37, 37, 45, 46, + 47, 48, 46, 47, 54, 55, 56, 64, 42, 64, + 42, 45, 46, 47, 48, 46, 47, 48, 46, 47, + 83, 51, 44, 83, 52, 54, 55, 56, 58, 44, + + 66, 59, 68, 61, 69, 59, 63, 35, 62, 71, + 64, 72, 64, 55, 62, 68, 44, 68, 67, 71, + 75, 71, 75, 35, 70, 76, 77, 76, 77, 76, + 76, 76, 76, 78, 78, 76, 79, 78, 78, 78, + 78, 75, 83, 75, 76, 76, 83, 83, 76, 76, + 76, 76, 81, 82, 81, 82, 81, 82, 81, 82, + 18, 18, 18, 18, 18, 18, 22, 22, 22, 22, + 22, 22, 25, 25, 25, 25, 25, 25, 34, 34, + 34, 34, 34, 34, 43, 43, 83, 43, 43, 43, + 49, 49, 83, 49, 83, 49, 50, 50, 83, 50, + + 50, 50, 53, 83, 83, 53, 53, 53, 54, 54, + 54, 57, 57, 83, 57, 57, 57, 60, 60, 83, + 60, 60, 60, 65, 65, 65, 65, 65, 65, 58, + 58, 83, 58, 58, 58, 61, 61, 83, 61, 61, + 61, 74, 83, 74, 83, 74, 80, 83, 80, 83, + 80, 9, 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83 + } ; + +static const flex_int16_t yy_chk[269] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 3, 3, 4, + 4, 4, 5, 6, 17, 65, 17, 5, 6, 7, + 7, 7, 7, 63, 7, 8, 8, 8, 8, 61, + 8, 11, 11, 11, 16, 24, 29, 16, 24, 29, + 30, 11, 14, 14, 58, 30, 14, 14, 14, 14, + 15, 33, 54, 33, 36, 36, 15, 15, 19, 19, + 19, 21, 21, 21, 28, 28, 28, 39, 42, 39, + 42, 45, 45, 45, 47, 47, 47, 48, 48, 48, + 50, 51, 44, 50, 51, 56, 56, 56, 57, 43, + + 41, 57, 59, 60, 59, 59, 38, 34, 60, 62, + 64, 62, 64, 26, 62, 67, 18, 67, 67, 70, + 73, 70, 73, 13, 70, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 75, 9, 75, 77, 77, 0, 0, 77, 77, + 77, 77, 79, 80, 79, 80, 81, 82, 81, 82, + 84, 84, 84, 84, 84, 84, 85, 85, 85, 85, + 85, 85, 86, 86, 86, 86, 86, 86, 87, 87, + 87, 87, 87, 87, 88, 88, 0, 88, 88, 88, + 89, 89, 0, 89, 0, 89, 90, 90, 0, 90, + + 90, 90, 91, 0, 0, 91, 91, 91, 92, 92, + 92, 93, 93, 0, 93, 93, 93, 94, 94, 0, + 94, 94, 94, 95, 95, 95, 95, 95, 95, 96, + 96, 0, 96, 96, 96, 97, 97, 0, 97, 97, + 97, 98, 0, 98, 0, 98, 99, 0, 99, 0, + 99, 83, 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83 + } ; + +/* Table of booleans, true if rule could match eol. */ +static const flex_int32_t yy_rule_can_match_eol[23] = + { 0, +1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + 1, 0, 0, }; + +extern int yy_flex_debug; +int yy_flex_debug = 0; + +static yy_state_type *yy_state_buf=0, *yy_state_ptr=0; +static char *yy_full_match; +static int yy_lp; +static int yy_looking_for_trail_begin = 0; +static int yy_full_lp; +static int *yy_full_state; +#define YY_TRAILING_MASK 0x2000 +#define YY_TRAILING_HEAD_MASK 0x4000 +#define REJECT \ +{ \ +*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */ \ +yy_cp = (yy_full_match); /* restore poss. backed-over text */ \ +(yy_lp) = (yy_full_lp); /* restore orig. accepting pos. */ \ +(yy_state_ptr) = (yy_full_state); /* restore orig. state */ \ +yy_current_state = *(yy_state_ptr); /* restore curr. state */ \ +++(yy_lp); \ +goto find_rule; \ +} + +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "keyval_lex.l" +#define YY_NO_INPUT 1 +#line 6 "keyval_lex.l" +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "src/util/keyval/keyval_lex.h" + +/* + * local functions + */ + +BEGIN_C_DECLS + +int pmix_util_keyval_yywrap(void); + +END_C_DECLS + +/* + * global variables + */ +int pmix_util_keyval_yynewlines = 1; +bool pmix_util_keyval_parse_done = false; +char *pmix_util_keyval_string = NULL; + +#line 885 "keyval_lex.c" + +#line 887 "keyval_lex.c" + +#define INITIAL 0 +#define VALUE 1 +#define comment 2 +#define MCA_VALUE 3 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +static int yy_init_globals ( void ); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy ( void ); + +int yyget_debug ( void ); + +void yyset_debug ( int debug_flag ); + +YY_EXTRA_TYPE yyget_extra ( void ); + +void yyset_extra ( YY_EXTRA_TYPE user_defined ); + +FILE *yyget_in ( void ); + +void yyset_in ( FILE * _in_str ); + +FILE *yyget_out ( void ); + +void yyset_out ( FILE * _out_str ); + + int yyget_leng ( void ); + +char *yyget_text ( void ); + +int yyget_lineno ( void ); + +void yyset_lineno ( int _line_number ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap ( void ); +#else +extern int yywrap ( void ); +#endif +#endif + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy ( char *, const char *, int ); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen ( const char * ); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput ( void ); +#else +static int input ( void ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + int n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex (void); + +#define YY_DECL int yylex (void) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK /*LINTED*/break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + yy_state_type yy_current_state; + char *yy_cp, *yy_bp; + int yy_act; + + if ( !(yy_init) ) + { + (yy_init) = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + /* Create the reject buffer large enough to save one state per allowed character. */ + if ( ! (yy_state_buf) ) + (yy_state_buf) = (yy_state_type *)yyalloc(YY_STATE_BUF_SIZE ); + if ( ! (yy_state_buf) ) + YY_FATAL_ERROR( "out of dynamic memory in yylex()" ); + + if ( ! (yy_start) ) + (yy_start) = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE ); + } + + yy_load_buffer_state( ); + } + + { +#line 61 "keyval_lex.l" + + +#line 1114 "keyval_lex.c" + + while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ + { + yy_cp = (yy_c_buf_p); + + /* Support of yytext. */ + *yy_cp = (yy_hold_char); + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = (yy_start); + + (yy_state_ptr) = (yy_state_buf); + *(yy_state_ptr)++ = yy_current_state; + +yy_match: + do + { + YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ; + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 84 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + *(yy_state_ptr)++ = yy_current_state; + ++yy_cp; + } + while ( yy_base[yy_current_state] != 252 ); + +yy_find_action: + yy_current_state = *--(yy_state_ptr); + (yy_lp) = yy_accept[yy_current_state]; +find_rule: /* we branch to this label when backing up */ + for ( ; ; ) /* until we find what rule we matched */ + { + if ( (yy_lp) && (yy_lp) < yy_accept[yy_current_state + 1] ) + { + yy_act = yy_acclist[(yy_lp)]; + if ( yy_act & YY_TRAILING_HEAD_MASK || + (yy_looking_for_trail_begin) ) + { + if ( yy_act == (yy_looking_for_trail_begin) ) + { + (yy_looking_for_trail_begin) = 0; + yy_act &= ~YY_TRAILING_HEAD_MASK; + break; + } + } + else if ( yy_act & YY_TRAILING_MASK ) + { + (yy_looking_for_trail_begin) = yy_act & ~YY_TRAILING_MASK; + (yy_looking_for_trail_begin) |= YY_TRAILING_HEAD_MASK; + } + else + { + (yy_full_match) = yy_cp; + (yy_full_state) = (yy_state_ptr); + (yy_full_lp) = (yy_lp); + break; + } + ++(yy_lp); + goto find_rule; + } + --yy_cp; + yy_current_state = *--(yy_state_ptr); + (yy_lp) = yy_accept[yy_current_state]; + } + + YY_DO_BEFORE_ACTION; + + if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] ) + { + int yyl; + for ( yyl = 0; yyl < yyleng; ++yyl ) + if ( yytext[yyl] == '\n' ) + + yylineno++; +; + } + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ +case 1: +/* rule 1 can match eol */ +YY_RULE_SETUP +#line 63 "keyval_lex.l" +{ pmix_util_keyval_yynewlines++; return PMIX_UTIL_KEYVAL_PARSE_NEWLINE; } + YY_BREAK +case 2: +/* rule 2 can match eol */ +YY_RULE_SETUP +#line 64 "keyval_lex.l" +{ pmix_util_keyval_yynewlines++; return PMIX_UTIL_KEYVAL_PARSE_NEWLINE; } + YY_BREAK +case 3: +/* rule 3 can match eol */ +YY_RULE_SETUP +#line 65 "keyval_lex.l" +{ pmix_util_keyval_yynewlines++; return PMIX_UTIL_KEYVAL_PARSE_NEWLINE; } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 67 "keyval_lex.l" +{ BEGIN(comment); + return PMIX_UTIL_KEYVAL_PARSE_NEWLINE; } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 69 "keyval_lex.l" +; /* Eat up non '*'s */ + YY_BREAK +case 6: +YY_RULE_SETUP +#line 70 "keyval_lex.l" +; /* Eat '*'s not followed by a '/' */ + YY_BREAK +case 7: +/* rule 7 can match eol */ +YY_RULE_SETUP +#line 71 "keyval_lex.l" +{ pmix_util_keyval_yynewlines++; + return PMIX_UTIL_KEYVAL_PARSE_NEWLINE; } + YY_BREAK +case 8: +YY_RULE_SETUP +#line 73 "keyval_lex.l" +{ BEGIN(INITIAL); /* Done with Block Comment */ + return PMIX_UTIL_KEYVAL_PARSE_NEWLINE; } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 76 "keyval_lex.l" +{ BEGIN(VALUE); return PMIX_UTIL_KEYVAL_PARSE_EQUAL; } + YY_BREAK +case 10: +YY_RULE_SETUP +#line 77 "keyval_lex.l" +; /* whitespace */ + YY_BREAK +case 11: +YY_RULE_SETUP +#line 78 "keyval_lex.l" +{ return PMIX_UTIL_KEYVAL_PARSE_SINGLE_WORD; } + YY_BREAK +case 12: +/* rule 12 can match eol */ +YY_RULE_SETUP +#line 80 "keyval_lex.l" +{ BEGIN(INITIAL); return PMIX_UTIL_KEYVAL_PARSE_NEWLINE; } + YY_BREAK +case 13: +YY_RULE_SETUP +#line 81 "keyval_lex.l" +{ return PMIX_UTIL_KEYVAL_PARSE_VALUE; } + YY_BREAK +case 14: +YY_RULE_SETUP +#line 83 "keyval_lex.l" +{ BEGIN(INITIAL); return PMIX_UTIL_KEYVAL_PARSE_VALUE; } + YY_BREAK +case 15: +YY_RULE_SETUP +#line 84 "keyval_lex.l" +{ BEGIN(INITIAL); return PMIX_UTIL_KEYVAL_PARSE_VALUE; } + YY_BREAK +case 16: +YY_RULE_SETUP +#line 85 "keyval_lex.l" +{ BEGIN(INITIAL); return PMIX_UTIL_KEYVAL_PARSE_VALUE; } + YY_BREAK +case 17: +/* rule 17 can match eol */ +YY_RULE_SETUP +#line 86 "keyval_lex.l" +{ BEGIN(INITIAL); BEGIN(INITIAL); return PMIX_UTIL_KEYVAL_PARSE_NEWLINE; } + YY_BREAK +case 18: +YY_RULE_SETUP +#line 88 "keyval_lex.l" +{BEGIN(MCA_VALUE); return PMIX_UTIL_KEYVAL_PARSE_MCAVAR; } + YY_BREAK +case 19: +YY_RULE_SETUP +#line 89 "keyval_lex.l" +{BEGIN(MCA_VALUE); return PMIX_UTIL_KEYVAL_PARSE_ENVEQL; } + YY_BREAK +case 20: +/* rule 20 can match eol */ +*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */ +YY_LINENO_REWIND_TO(yy_cp - 1); +(yy_c_buf_p) = yy_cp -= 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +YY_RULE_SETUP +#line 90 "keyval_lex.l" +{ return PMIX_UTIL_KEYVAL_PARSE_ENVVAR; } + YY_BREAK +case 21: +YY_RULE_SETUP +#line 92 "keyval_lex.l" +{ return PMIX_UTIL_KEYVAL_PARSE_ERROR; } + YY_BREAK +case 22: +YY_RULE_SETUP +#line 94 "keyval_lex.l" +ECHO; + YY_BREAK +#line 1328 "keyval_lex.c" + case YY_STATE_EOF(INITIAL): + case YY_STATE_EOF(VALUE): + case YY_STATE_EOF(comment): + case YY_STATE_EOF(MCA_VALUE): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = (yy_hold_char); + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++(yy_c_buf_p); + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = (yy_c_buf_p); + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_END_OF_FILE: + { + (yy_did_buffer_switch_on_eof) = 0; + + if ( yywrap( ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = + (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + (yy_c_buf_p) = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of user's declarations */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (void) +{ + char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + char *source = (yytext_ptr); + int number_to_move, i; + int ret_val; + + if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr) - 1); + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + (yy_n_chars), num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + if ( (yy_n_chars) == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + if (((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { + /* Extend the array by 50%, plus the number we really need. */ + int new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc( + (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size ); + if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); + /* "- 2" to take care of EOB's */ + YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2); + } + + (yy_n_chars) += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; + + (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (void) +{ + yy_state_type yy_current_state; + char *yy_cp; + + yy_current_state = (yy_start); + + (yy_state_ptr) = (yy_state_buf); + *(yy_state_ptr)++ = yy_current_state; + + for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) + { + YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 84 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + *(yy_state_ptr)++ = yy_current_state; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) +{ + int yy_is_jam; + + YY_CHAR yy_c = 1; + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 84 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + yy_is_jam = (yy_current_state == 83); + if ( ! yy_is_jam ) + *(yy_state_ptr)++ = yy_current_state; + + return yy_is_jam ? 0 : yy_current_state; +} + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (void) +#else + static int input (void) +#endif + +{ + int c; + + *(yy_c_buf_p) = (yy_hold_char); + + if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + /* This was really a NUL. */ + *(yy_c_buf_p) = '\0'; + + else + { /* need more input */ + int offset = (int) ((yy_c_buf_p) - (yytext_ptr)); + ++(yy_c_buf_p); + + switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap( ) ) + return 0; + + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = (yytext_ptr) + offset; + break; + } + } + } + + c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ + *(yy_c_buf_p) = '\0'; /* preserve yytext */ + (yy_hold_char) = *++(yy_c_buf_p); + + if ( c == '\n' ) + + yylineno++; +; + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file ) +{ + + if ( ! YY_CURRENT_BUFFER ){ + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE ); + } + + yy_init_buffer( YY_CURRENT_BUFFER, input_file ); + yy_load_buffer_state( ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) +{ + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state( ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + (yy_did_buffer_switch_on_eof) = 1; +} + +static void yy_load_buffer_state (void) +{ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + (yy_hold_char) = *(yy_c_buf_p); +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size ) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * + */ + void yy_delete_buffer (YY_BUFFER_STATE b ) +{ + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree( (void *) b->yy_ch_buf ); + + yyfree( (void *) b ); +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) + +{ + int oerrno = errno; + + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * + */ + void yy_flush_buffer (YY_BUFFER_STATE b ) +{ + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state( ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ) +{ + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + (yy_buffer_stack_top)++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * + */ +void yypop_buffer_state (void) +{ + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + if ((yy_buffer_stack_top) > 0) + --(yy_buffer_stack_top); + + if (YY_CURRENT_BUFFER) { + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (void) +{ + yy_size_t num_to_alloc; + + if (!(yy_buffer_stack)) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */ + (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + ); + if ( ! (yy_buffer_stack) ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + (yy_buffer_stack_max) = num_to_alloc; + (yy_buffer_stack_top) = 0; + return; + } + + if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + yy_size_t grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = (yy_buffer_stack_max) + grow_size; + (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc + ((yy_buffer_stack), + num_to_alloc * sizeof(struct yy_buffer_state*) + ); + if ( ! (yy_buffer_stack) ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + /* zero only the new slots.*/ + memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); + (yy_buffer_stack_max) = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size ) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return NULL; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = NULL; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param yystr a NUL-terminated string to scan + * + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (const char * yystr ) +{ + + return yy_scan_bytes( yystr, (int) strlen(yystr) ); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len ) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = (yy_size_t) (_yybytes_len + 2); + buf = (char *) yyalloc( n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yynoreturn yy_fatal_error (const char* msg ) +{ + fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = (yy_hold_char); \ + (yy_c_buf_p) = yytext + yyless_macro_arg; \ + (yy_hold_char) = *(yy_c_buf_p); \ + *(yy_c_buf_p) = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the current line number. + * + */ +int yyget_lineno (void) +{ + + return yylineno; +} + +/** Get the input stream. + * + */ +FILE *yyget_in (void) +{ + return yyin; +} + +/** Get the output stream. + * + */ +FILE *yyget_out (void) +{ + return yyout; +} + +/** Get the length of the current token. + * + */ +int yyget_leng (void) +{ + return yyleng; +} + +/** Get the current token. + * + */ + +char *yyget_text (void) +{ + return yytext; +} + +/** Set the current line number. + * @param _line_number line number + * + */ +void yyset_lineno (int _line_number ) +{ + + yylineno = _line_number; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param _in_str A readable stream. + * + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * _in_str ) +{ + yyin = _in_str ; +} + +void yyset_out (FILE * _out_str ) +{ + yyout = _out_str ; +} + +int yyget_debug (void) +{ + return yy_flex_debug; +} + +void yyset_debug (int _bdebug ) +{ + yy_flex_debug = _bdebug ; +} + +static int yy_init_globals (void) +{ + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + /* We do not touch yylineno unless the option is enabled. */ + yylineno = 1; + + (yy_buffer_stack) = NULL; + (yy_buffer_stack_top) = 0; + (yy_buffer_stack_max) = 0; + (yy_c_buf_p) = NULL; + (yy_init) = 0; + (yy_start) = 0; + + (yy_state_buf) = 0; + (yy_state_ptr) = 0; + (yy_full_match) = 0; + (yy_lp) = 0; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = NULL; + yyout = NULL; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (void) +{ + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer( YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(); + } + + /* Destroy the stack itself. */ + yyfree((yy_buffer_stack) ); + (yy_buffer_stack) = NULL; + + yyfree ( (yy_state_buf) ); + (yy_state_buf) = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( ); + + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, const char * s2, int n ) +{ + + int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (const char * s ) +{ + int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +void *yyalloc (yy_size_t size ) +{ + return malloc(size); +} + +void *yyrealloc (void * ptr, yy_size_t size ) +{ + + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return realloc(ptr, size); +} + +void yyfree (void * ptr ) +{ + free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ +} + +#define YYTABLES_NAME "yytables" + +#line 94 "keyval_lex.l" + + +/* Old flex (2.5.4a? and older) does not define a destroy function */ +#if !defined(YY_FLEX_SUBMINOR_VERSION) +#define YY_FLEX_SUBMINOR_VERSION 0 +#endif + +#if (YY_FLEX_MAJOR_VERSION < 2) || (YY_FLEX_MAJOR_VERSION == 2 && (YY_FLEX_MINOR_VERSION < 5 || (YY_FLEX_MINOR_VERSION == 5 && YY_FLEX_SUBMINOR_VERSION < 5))) +int pmix_util_keyval_yylex_destroy(void) +{ + if (NULL != YY_CURRENT_BUFFER) { + yy_delete_buffer(YY_CURRENT_BUFFER); +#if defined(YY_CURRENT_BUFFER_LVALUE) + YY_CURRENT_BUFFER_LVALUE = NULL; +#else + YY_CURRENT_BUFFER = NULL; +#endif /* YY_CURRENT_BUFFER_LVALUE */ + } + return YY_NULL; +} +#endif + +int pmix_util_keyval_yywrap(void) +{ + pmix_util_keyval_parse_done = true; + return 1; +} + +/* + * Ensure that we have a valid yybuffer to use. Specifically, if this + * scanner is invoked a second time, finish_parsing() (above) will + * have been executed, and the current buffer will have been freed. + * Flex doesn't recognize this fact because as far as it's concerned, + * its internal state was already initialized, so it thinks it should + * have a valid buffer. Hence, here we ensure to give it a valid + * buffer. + */ +int pmix_util_keyval_init_buffer(FILE *file) +{ + YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE); + yy_switch_to_buffer(buf); + + return 0; +} + diff --git a/opal/mca/pmix/pmix4x/openpmix/src/util/show_help_lex.c b/opal/mca/pmix/pmix4x/openpmix/src/util/show_help_lex.c new file mode 100644 index 00000000000..782e4a2fc16 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/src/util/show_help_lex.c @@ -0,0 +1,2114 @@ + +#line 3 "util/show_help_lex.c" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define yy_create_buffer pmix_show_help_yy_create_buffer +#define yy_delete_buffer pmix_show_help_yy_delete_buffer +#define yy_scan_buffer pmix_show_help_yy_scan_buffer +#define yy_scan_string pmix_show_help_yy_scan_string +#define yy_scan_bytes pmix_show_help_yy_scan_bytes +#define yy_init_buffer pmix_show_help_yy_init_buffer +#define yy_flush_buffer pmix_show_help_yy_flush_buffer +#define yy_load_buffer_state pmix_show_help_yy_load_buffer_state +#define yy_switch_to_buffer pmix_show_help_yy_switch_to_buffer +#define yypush_buffer_state pmix_show_help_yypush_buffer_state +#define yypop_buffer_state pmix_show_help_yypop_buffer_state +#define yyensure_buffer_stack pmix_show_help_yyensure_buffer_stack +#define yy_flex_debug pmix_show_help_yy_flex_debug +#define yyin pmix_show_help_yyin +#define yyleng pmix_show_help_yyleng +#define yylex pmix_show_help_yylex +#define yylineno pmix_show_help_yylineno +#define yyout pmix_show_help_yyout +#define yyrestart pmix_show_help_yyrestart +#define yytext pmix_show_help_yytext +#define yywrap pmix_show_help_yywrap +#define yyalloc pmix_show_help_yyalloc +#define yyrealloc pmix_show_help_yyrealloc +#define yyfree pmix_show_help_yyfree + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 6 +#define YY_FLEX_SUBMINOR_VERSION 4 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +#ifdef yy_create_buffer +#define pmix_show_help_yy_create_buffer_ALREADY_DEFINED +#else +#define yy_create_buffer pmix_show_help_yy_create_buffer +#endif + +#ifdef yy_delete_buffer +#define pmix_show_help_yy_delete_buffer_ALREADY_DEFINED +#else +#define yy_delete_buffer pmix_show_help_yy_delete_buffer +#endif + +#ifdef yy_scan_buffer +#define pmix_show_help_yy_scan_buffer_ALREADY_DEFINED +#else +#define yy_scan_buffer pmix_show_help_yy_scan_buffer +#endif + +#ifdef yy_scan_string +#define pmix_show_help_yy_scan_string_ALREADY_DEFINED +#else +#define yy_scan_string pmix_show_help_yy_scan_string +#endif + +#ifdef yy_scan_bytes +#define pmix_show_help_yy_scan_bytes_ALREADY_DEFINED +#else +#define yy_scan_bytes pmix_show_help_yy_scan_bytes +#endif + +#ifdef yy_init_buffer +#define pmix_show_help_yy_init_buffer_ALREADY_DEFINED +#else +#define yy_init_buffer pmix_show_help_yy_init_buffer +#endif + +#ifdef yy_flush_buffer +#define pmix_show_help_yy_flush_buffer_ALREADY_DEFINED +#else +#define yy_flush_buffer pmix_show_help_yy_flush_buffer +#endif + +#ifdef yy_load_buffer_state +#define pmix_show_help_yy_load_buffer_state_ALREADY_DEFINED +#else +#define yy_load_buffer_state pmix_show_help_yy_load_buffer_state +#endif + +#ifdef yy_switch_to_buffer +#define pmix_show_help_yy_switch_to_buffer_ALREADY_DEFINED +#else +#define yy_switch_to_buffer pmix_show_help_yy_switch_to_buffer +#endif + +#ifdef yypush_buffer_state +#define pmix_show_help_yypush_buffer_state_ALREADY_DEFINED +#else +#define yypush_buffer_state pmix_show_help_yypush_buffer_state +#endif + +#ifdef yypop_buffer_state +#define pmix_show_help_yypop_buffer_state_ALREADY_DEFINED +#else +#define yypop_buffer_state pmix_show_help_yypop_buffer_state +#endif + +#ifdef yyensure_buffer_stack +#define pmix_show_help_yyensure_buffer_stack_ALREADY_DEFINED +#else +#define yyensure_buffer_stack pmix_show_help_yyensure_buffer_stack +#endif + +#ifdef yylex +#define pmix_show_help_yylex_ALREADY_DEFINED +#else +#define yylex pmix_show_help_yylex +#endif + +#ifdef yyrestart +#define pmix_show_help_yyrestart_ALREADY_DEFINED +#else +#define yyrestart pmix_show_help_yyrestart +#endif + +#ifdef yylex_init +#define pmix_show_help_yylex_init_ALREADY_DEFINED +#else +#define yylex_init pmix_show_help_yylex_init +#endif + +#ifdef yylex_init_extra +#define pmix_show_help_yylex_init_extra_ALREADY_DEFINED +#else +#define yylex_init_extra pmix_show_help_yylex_init_extra +#endif + +#ifdef yylex_destroy +#define pmix_show_help_yylex_destroy_ALREADY_DEFINED +#else +#define yylex_destroy pmix_show_help_yylex_destroy +#endif + +#ifdef yyget_debug +#define pmix_show_help_yyget_debug_ALREADY_DEFINED +#else +#define yyget_debug pmix_show_help_yyget_debug +#endif + +#ifdef yyset_debug +#define pmix_show_help_yyset_debug_ALREADY_DEFINED +#else +#define yyset_debug pmix_show_help_yyset_debug +#endif + +#ifdef yyget_extra +#define pmix_show_help_yyget_extra_ALREADY_DEFINED +#else +#define yyget_extra pmix_show_help_yyget_extra +#endif + +#ifdef yyset_extra +#define pmix_show_help_yyset_extra_ALREADY_DEFINED +#else +#define yyset_extra pmix_show_help_yyset_extra +#endif + +#ifdef yyget_in +#define pmix_show_help_yyget_in_ALREADY_DEFINED +#else +#define yyget_in pmix_show_help_yyget_in +#endif + +#ifdef yyset_in +#define pmix_show_help_yyset_in_ALREADY_DEFINED +#else +#define yyset_in pmix_show_help_yyset_in +#endif + +#ifdef yyget_out +#define pmix_show_help_yyget_out_ALREADY_DEFINED +#else +#define yyget_out pmix_show_help_yyget_out +#endif + +#ifdef yyset_out +#define pmix_show_help_yyset_out_ALREADY_DEFINED +#else +#define yyset_out pmix_show_help_yyset_out +#endif + +#ifdef yyget_leng +#define pmix_show_help_yyget_leng_ALREADY_DEFINED +#else +#define yyget_leng pmix_show_help_yyget_leng +#endif + +#ifdef yyget_text +#define pmix_show_help_yyget_text_ALREADY_DEFINED +#else +#define yyget_text pmix_show_help_yyget_text +#endif + +#ifdef yyget_lineno +#define pmix_show_help_yyget_lineno_ALREADY_DEFINED +#else +#define yyget_lineno pmix_show_help_yyget_lineno +#endif + +#ifdef yyset_lineno +#define pmix_show_help_yyset_lineno_ALREADY_DEFINED +#else +#define yyset_lineno pmix_show_help_yyset_lineno +#endif + +#ifdef yywrap +#define pmix_show_help_yywrap_ALREADY_DEFINED +#else +#define yywrap pmix_show_help_yywrap +#endif + +#ifdef yyalloc +#define pmix_show_help_yyalloc_ALREADY_DEFINED +#else +#define yyalloc pmix_show_help_yyalloc +#endif + +#ifdef yyrealloc +#define pmix_show_help_yyrealloc_ALREADY_DEFINED +#else +#define yyrealloc pmix_show_help_yyrealloc +#endif + +#ifdef yyfree +#define pmix_show_help_yyfree_ALREADY_DEFINED +#else +#define yyfree pmix_show_help_yyfree +#endif + +#ifdef yytext +#define pmix_show_help_yytext_ALREADY_DEFINED +#else +#define yytext pmix_show_help_yytext +#endif + +#ifdef yyleng +#define pmix_show_help_yyleng_ALREADY_DEFINED +#else +#define yyleng pmix_show_help_yyleng +#endif + +#ifdef yyin +#define pmix_show_help_yyin_ALREADY_DEFINED +#else +#define yyin pmix_show_help_yyin +#endif + +#ifdef yyout +#define pmix_show_help_yyout_ALREADY_DEFINED +#else +#define yyout pmix_show_help_yyout +#endif + +#ifdef yy_flex_debug +#define pmix_show_help_yy_flex_debug_ALREADY_DEFINED +#else +#define yy_flex_debug pmix_show_help_yy_flex_debug +#endif + +#ifdef yylineno +#define pmix_show_help_yylineno_ALREADY_DEFINED +#else +#define yylineno pmix_show_help_yylineno +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#ifndef SIZE_MAX +#define SIZE_MAX (~(size_t)0) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +/* begin standard C++ headers. */ + +/* TODO: this is always defined, so inline it */ +#define yyconst const + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define yynoreturn __attribute__((__noreturn__)) +#else +#define yynoreturn +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an + * integer in range [0..255] for use as an array index. + */ +#define YY_SC_TO_UI(c) ((YY_CHAR) (c)) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN (yy_start) = 1 + 2 * +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START (((yy_start) - 1) / 2) +#define YYSTATE YY_START +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +extern int yyleng; + +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + #define YY_LESS_LINENO(n) + #define YY_LINENO_REWIND_TO(ptr) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = (yy_hold_char); \ + YY_RESTORE_YY_MORE_OFFSET \ + (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) +#define unput(c) yyunput( c, (yytext_ptr) ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* Stack of input buffers. */ +static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ +static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ +static YY_BUFFER_STATE * yy_buffer_stack = NULL; /**< Stack as an array. */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ + ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ + : NULL) +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; +static int yy_n_chars; /* number of characters read into yy_ch_buf */ +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = NULL; +static int yy_init = 0; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart ( FILE *input_file ); +void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer ); +YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size ); +void yy_delete_buffer ( YY_BUFFER_STATE b ); +void yy_flush_buffer ( YY_BUFFER_STATE b ); +void yypush_buffer_state ( YY_BUFFER_STATE new_buffer ); +void yypop_buffer_state ( void ); + +static void yyensure_buffer_stack ( void ); +static void yy_load_buffer_state ( void ); +static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file ); +#define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER ) + +YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size ); +YY_BUFFER_STATE yy_scan_string ( const char *yy_str ); +YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len ); + +void *yyalloc ( yy_size_t ); +void *yyrealloc ( void *, yy_size_t ); +void yyfree ( void * ); + +#define yy_new_buffer yy_create_buffer +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ +typedef flex_uint8_t YY_CHAR; + +FILE *yyin = NULL, *yyout = NULL; + +typedef int yy_state_type; + +extern int yylineno; +int yylineno = 1; + +extern char *yytext; +#ifdef yytext_ptr +#undef yytext_ptr +#endif +#define yytext_ptr yytext + +static yy_state_type yy_get_previous_state ( void ); +static yy_state_type yy_try_NUL_trans ( yy_state_type current_state ); +static int yy_get_next_buffer ( void ); +static void yynoreturn yy_fatal_error ( const char* msg ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + (yytext_ptr) = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + (yy_hold_char) = *yy_cp; \ + *yy_cp = '\0'; \ + (yy_c_buf_p) = yy_cp; +#define YY_NUM_RULES 5 +#define YY_END_OF_BUFFER 6 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static const flex_int16_t yy_acclist[17] = + { 0, + 6, 5, 4, 5, 5, 5, 5, 3, 5, 4, + 1, 4, 3,16386, 8194, 4 + } ; + +static const flex_int16_t yy_accept[24] = + { 0, + 1, 1, 1, 1, 1, 2, 3, 5, 6, 7, + 8, 10, 10, 11, 11, 13, 13, 13, 14, 15, + 15, 17, 17 + } ; + +static const YY_CHAR yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 4, 1, 5, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static const YY_CHAR yy_meta[6] = + { 0, + 1, 1, 1, 1, 1 + } ; + +static const flex_int16_t yy_base[30] = + { 0, + 0, 3, 28, 27, 28, 25, 31, 24, 23, 22, + 31, 21, 31, 20, 31, 7, 19, 31, 11, 15, + 31, 31, 18, 17, 14, 13, 10, 9, 0 + } ; + +static const flex_int16_t yy_def[30] = + { 0, + 23, 22, 24, 24, 22, 25, 22, 26, 27, 28, + 22, 25, 22, 26, 22, 27, 28, 22, 29, 29, + 22, 0, 22, 22, 22, 22, 22, 22, 22 + } ; + +static const flex_int16_t yy_nxt[37] = + { 0, + 20, 7, 8, 6, 7, 8, 9, 6, 13, 17, + 16, 19, 21, 14, 12, 19, 21, 10, 6, 19, + 18, 15, 13, 18, 13, 15, 13, 22, 11, 11, + 5, 22, 22, 22, 22, 22 + } ; + +static const flex_int16_t yy_chk[37] = + { 0, + 29, 1, 1, 2, 2, 2, 2, 2, 16, 28, + 27, 16, 19, 26, 25, 19, 20, 24, 23, 20, + 17, 14, 12, 10, 9, 8, 6, 5, 4, 3, + 22, 22, 22, 22, 22, 22 + } ; + +extern int yy_flex_debug; +int yy_flex_debug = 0; + +static yy_state_type *yy_state_buf=0, *yy_state_ptr=0; +static char *yy_full_match; +static int yy_lp; +static int yy_looking_for_trail_begin = 0; +static int yy_full_lp; +static int *yy_full_state; +#define YY_TRAILING_MASK 0x2000 +#define YY_TRAILING_HEAD_MASK 0x4000 +#define REJECT \ +{ \ +*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */ \ +yy_cp = (yy_full_match); /* restore poss. backed-over text */ \ +(yy_lp) = (yy_full_lp); /* restore orig. accepting pos. */ \ +(yy_state_ptr) = (yy_full_state); /* restore orig. state */ \ +yy_current_state = *(yy_state_ptr); /* restore curr. state */ \ +++(yy_lp); \ +goto find_rule; \ +} + +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "util/show_help_lex.l" +#define YY_NO_INPUT 1 +#line 5 "util/show_help_lex.l" +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "src/util/show_help_lex.h" + +BEGIN_C_DECLS + +/* + * public functions + */ +extern int pmix_show_help_finish_parsing(void); + +/* + * local functions + */ +static int pmix_show_help_yywrap(void); + +END_C_DECLS + +/* + * global variables + */ +int pmix_show_help_yynewlines = 1; +bool pmix_show_help_parse_done = false; + +#line 780 "util/show_help_lex.c" + +#line 782 "util/show_help_lex.c" + +#define INITIAL 0 +#define CHOMP 1 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +static int yy_init_globals ( void ); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy ( void ); + +int yyget_debug ( void ); + +void yyset_debug ( int debug_flag ); + +YY_EXTRA_TYPE yyget_extra ( void ); + +void yyset_extra ( YY_EXTRA_TYPE user_defined ); + +FILE *yyget_in ( void ); + +void yyset_in ( FILE * _in_str ); + +FILE *yyget_out ( void ); + +void yyset_out ( FILE * _out_str ); + + int yyget_leng ( void ); + +char *yyget_text ( void ); + +int yyget_lineno ( void ); + +void yyset_lineno ( int _line_number ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap ( void ); +#else +extern int yywrap ( void ); +#endif +#endif + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy ( char *, const char *, int ); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen ( const char * ); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput ( void ); +#else +static int input ( void ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + int n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex (void); + +#define YY_DECL int yylex (void) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK /*LINTED*/break; +#endif + +#define YY_RULE_SETUP \ + if ( yyleng > 0 ) \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = \ + (yytext[yyleng - 1] == '\n'); \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + yy_state_type yy_current_state; + char *yy_cp, *yy_bp; + int yy_act; + + if ( !(yy_init) ) + { + (yy_init) = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + /* Create the reject buffer large enough to save one state per allowed character. */ + if ( ! (yy_state_buf) ) + (yy_state_buf) = (yy_state_type *)yyalloc(YY_STATE_BUF_SIZE ); + if ( ! (yy_state_buf) ) + YY_FATAL_ERROR( "out of dynamic memory in yylex()" ); + + if ( ! (yy_start) ) + (yy_start) = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE ); + } + + yy_load_buffer_state( ); + } + + { +#line 60 "util/show_help_lex.l" + + +#line 1010 "util/show_help_lex.c" + + while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ + { + yy_cp = (yy_c_buf_p); + + /* Support of yytext. */ + *yy_cp = (yy_hold_char); + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = (yy_start); + yy_current_state += YY_AT_BOL(); + + (yy_state_ptr) = (yy_state_buf); + *(yy_state_ptr)++ = yy_current_state; + +yy_match: + do + { + YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ; + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 23 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + *(yy_state_ptr)++ = yy_current_state; + ++yy_cp; + } + while ( yy_base[yy_current_state] != 31 ); + +yy_find_action: + yy_current_state = *--(yy_state_ptr); + (yy_lp) = yy_accept[yy_current_state]; +find_rule: /* we branch to this label when backing up */ + for ( ; ; ) /* until we find what rule we matched */ + { + if ( (yy_lp) && (yy_lp) < yy_accept[yy_current_state + 1] ) + { + yy_act = yy_acclist[(yy_lp)]; + if ( yy_act & YY_TRAILING_HEAD_MASK || + (yy_looking_for_trail_begin) ) + { + if ( yy_act == (yy_looking_for_trail_begin) ) + { + (yy_looking_for_trail_begin) = 0; + yy_act &= ~YY_TRAILING_HEAD_MASK; + break; + } + } + else if ( yy_act & YY_TRAILING_MASK ) + { + (yy_looking_for_trail_begin) = yy_act & ~YY_TRAILING_MASK; + (yy_looking_for_trail_begin) |= YY_TRAILING_HEAD_MASK; + } + else + { + (yy_full_match) = yy_cp; + (yy_full_state) = (yy_state_ptr); + (yy_full_lp) = (yy_lp); + break; + } + ++(yy_lp); + goto find_rule; + } + --yy_cp; + yy_current_state = *--(yy_state_ptr); + (yy_lp) = yy_accept[yy_current_state]; + } + + YY_DO_BEFORE_ACTION; + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ +case 1: +/* rule 1 can match eol */ +YY_RULE_SETUP +#line 62 "util/show_help_lex.l" +; /* comment line */ + YY_BREAK +case 2: +/* rule 2 can match eol */ +YY_RULE_SETUP +#line 64 "util/show_help_lex.l" +{ BEGIN(CHOMP); return PMIX_SHOW_HELP_PARSE_TOPIC; } + YY_BREAK +case 3: +/* rule 3 can match eol */ +YY_RULE_SETUP +#line 66 "util/show_help_lex.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 4: +/* rule 4 can match eol */ +*yy_cp = (yy_hold_char); /* undo effects of setting up yytext */ +YY_LINENO_REWIND_TO(yy_cp - 1); +(yy_c_buf_p) = yy_cp -= 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +YY_RULE_SETUP +#line 68 "util/show_help_lex.l" +{ BEGIN(CHOMP); return PMIX_SHOW_HELP_PARSE_MESSAGE; } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 70 "util/show_help_lex.l" +ECHO; + YY_BREAK +#line 1124 "util/show_help_lex.c" + case YY_STATE_EOF(INITIAL): + case YY_STATE_EOF(CHOMP): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = (yy_hold_char); + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++(yy_c_buf_p); + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = (yy_c_buf_p); + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_END_OF_FILE: + { + (yy_did_buffer_switch_on_eof) = 0; + + if ( yywrap( ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = + (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + (yy_c_buf_p) = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of user's declarations */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (void) +{ + char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + char *source = (yytext_ptr); + int number_to_move, i; + int ret_val; + + if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr) - 1); + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + (yy_n_chars), num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + if ( (yy_n_chars) == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + if (((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { + /* Extend the array by 50%, plus the number we really need. */ + int new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc( + (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size ); + if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); + /* "- 2" to take care of EOB's */ + YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2); + } + + (yy_n_chars) += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; + + (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (void) +{ + yy_state_type yy_current_state; + char *yy_cp; + + yy_current_state = (yy_start); + yy_current_state += YY_AT_BOL(); + + (yy_state_ptr) = (yy_state_buf); + *(yy_state_ptr)++ = yy_current_state; + + for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) + { + YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 23 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + *(yy_state_ptr)++ = yy_current_state; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) +{ + int yy_is_jam; + + YY_CHAR yy_c = 1; + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 23 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + yy_is_jam = (yy_current_state == 22); + if ( ! yy_is_jam ) + *(yy_state_ptr)++ = yy_current_state; + + return yy_is_jam ? 0 : yy_current_state; +} + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (void) +#else + static int input (void) +#endif + +{ + int c; + + *(yy_c_buf_p) = (yy_hold_char); + + if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + /* This was really a NUL. */ + *(yy_c_buf_p) = '\0'; + + else + { /* need more input */ + int offset = (int) ((yy_c_buf_p) - (yytext_ptr)); + ++(yy_c_buf_p); + + switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap( ) ) + return 0; + + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = (yytext_ptr) + offset; + break; + } + } + } + + c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ + *(yy_c_buf_p) = '\0'; /* preserve yytext */ + (yy_hold_char) = *++(yy_c_buf_p); + + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = (c == '\n'); + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file ) +{ + + if ( ! YY_CURRENT_BUFFER ){ + yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE ); + } + + yy_init_buffer( YY_CURRENT_BUFFER, input_file ); + yy_load_buffer_state( ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) +{ + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state( ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + (yy_did_buffer_switch_on_eof) = 1; +} + +static void yy_load_buffer_state (void) +{ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + (yy_hold_char) = *(yy_c_buf_p); +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size ) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * + */ + void yy_delete_buffer (YY_BUFFER_STATE b ) +{ + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree( (void *) b->yy_ch_buf ); + + yyfree( (void *) b ); +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) + +{ + int oerrno = errno; + + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * + */ + void yy_flush_buffer (YY_BUFFER_STATE b ) +{ + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state( ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ) +{ + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + (yy_buffer_stack_top)++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * + */ +void yypop_buffer_state (void) +{ + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + if ((yy_buffer_stack_top) > 0) + --(yy_buffer_stack_top); + + if (YY_CURRENT_BUFFER) { + yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (void) +{ + yy_size_t num_to_alloc; + + if (!(yy_buffer_stack)) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */ + (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + ); + if ( ! (yy_buffer_stack) ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + (yy_buffer_stack_max) = num_to_alloc; + (yy_buffer_stack_top) = 0; + return; + } + + if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + yy_size_t grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = (yy_buffer_stack_max) + grow_size; + (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc + ((yy_buffer_stack), + num_to_alloc * sizeof(struct yy_buffer_state*) + ); + if ( ! (yy_buffer_stack) ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + /* zero only the new slots.*/ + memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); + (yy_buffer_stack_max) = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size ) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return NULL; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = NULL; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param yystr a NUL-terminated string to scan + * + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (const char * yystr ) +{ + + return yy_scan_bytes( yystr, (int) strlen(yystr) ); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len ) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = (yy_size_t) (_yybytes_len + 2); + buf = (char *) yyalloc( n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yynoreturn yy_fatal_error (const char* msg ) +{ + fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = (yy_hold_char); \ + (yy_c_buf_p) = yytext + yyless_macro_arg; \ + (yy_hold_char) = *(yy_c_buf_p); \ + *(yy_c_buf_p) = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the current line number. + * + */ +int yyget_lineno (void) +{ + + return yylineno; +} + +/** Get the input stream. + * + */ +FILE *yyget_in (void) +{ + return yyin; +} + +/** Get the output stream. + * + */ +FILE *yyget_out (void) +{ + return yyout; +} + +/** Get the length of the current token. + * + */ +int yyget_leng (void) +{ + return yyleng; +} + +/** Get the current token. + * + */ + +char *yyget_text (void) +{ + return yytext; +} + +/** Set the current line number. + * @param _line_number line number + * + */ +void yyset_lineno (int _line_number ) +{ + + yylineno = _line_number; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param _in_str A readable stream. + * + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * _in_str ) +{ + yyin = _in_str ; +} + +void yyset_out (FILE * _out_str ) +{ + yyout = _out_str ; +} + +int yyget_debug (void) +{ + return yy_flex_debug; +} + +void yyset_debug (int _bdebug ) +{ + yy_flex_debug = _bdebug ; +} + +static int yy_init_globals (void) +{ + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + (yy_buffer_stack) = NULL; + (yy_buffer_stack_top) = 0; + (yy_buffer_stack_max) = 0; + (yy_c_buf_p) = NULL; + (yy_init) = 0; + (yy_start) = 0; + + (yy_state_buf) = 0; + (yy_state_ptr) = 0; + (yy_full_match) = 0; + (yy_lp) = 0; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = NULL; + yyout = NULL; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (void) +{ + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer( YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(); + } + + /* Destroy the stack itself. */ + yyfree((yy_buffer_stack) ); + (yy_buffer_stack) = NULL; + + yyfree ( (yy_state_buf) ); + (yy_state_buf) = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( ); + + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, const char * s2, int n ) +{ + + int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (const char * s ) +{ + int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +void *yyalloc (yy_size_t size ) +{ + return malloc(size); +} + +void *yyrealloc (void * ptr, yy_size_t size ) +{ + + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return realloc(ptr, size); +} + +void yyfree (void * ptr ) +{ + free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ +} + +#define YYTABLES_NAME "yytables" + +#line 70 "util/show_help_lex.l" + + +/* Old flex (2.5.4a? and older) does not define a destroy function */ +#if !defined(YY_FLEX_SUBMINOR_VERSION) +#define YY_FLEX_SUBMINOR_VERSION 0 +#endif + +#if (YY_FLEX_MAJOR_VERSION < 2) || (YY_FLEX_MAJOR_VERSION == 2 && (YY_FLEX_MINOR_VERSION < 5 || (YY_FLEX_MINOR_VERSION == 5 && YY_FLEX_SUBMINOR_VERSION < 5))) +int pmix_show_help_yylex_destroy(void) +{ + if (NULL != YY_CURRENT_BUFFER) { + yy_delete_buffer(YY_CURRENT_BUFFER); +#if defined(YY_CURRENT_BUFFER_LVALUE) + YY_CURRENT_BUFFER_LVALUE = NULL; +#else + YY_CURRENT_BUFFER = NULL; +#endif /* YY_CURRENT_BUFFER_LVALUE */ + } + return YY_NULL; +} +#endif + +static int pmix_show_help_yywrap(void) +{ + pmix_show_help_parse_done = true; + return 1; +} + + +/* + * Ensure that we have a valid yybuffer to use. Specifically, if this + * scanner is invoked a second time, finish_parsing() (above) will + * have been executed, and the current buffer will have been freed. + * Flex doesn't recognize this fact because as far as it's concerned, + * its internal state was already initialized, so it thinks it should + * have a valid buffer. Hence, here we ensure to give it a valid + * buffer. + */ +int pmix_show_help_init_buffer(FILE *file) +{ + YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE); + yy_switch_to_buffer(buf); + + return 0; +} + diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests00.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests00.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests00.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests01.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests01.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests01.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests02.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests02.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests02.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests03.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests03.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests03.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests04.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests04.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests04.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests05.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests05.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests05.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests06.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests06.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests06.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests07.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests07.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests07.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests08.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests08.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests08.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests09.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests09.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests09.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests10.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests10.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests10.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests11.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests11.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests11.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests12.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests12.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests12.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests13.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests13.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests13.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests14.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests14.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests14.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix4x/openpmix/test/run_tests15.pl b/opal/mca/pmix/pmix4x/openpmix/test/run_tests15.pl new file mode 100755 index 00000000000..fbe3ba0fab3 --- /dev/null +++ b/opal/mca/pmix/pmix4x/openpmix/test/run_tests15.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "/home/wbailey2/ompi/opal/mca/pmix/pmix4x/openpmix"; +my $mypathstr = "src/mca/bfrops/v12:src/mca/bfrops/v20:src/mca/bfrops/v21:src/mca/bfrops/v3:src/mca/bfrops/v4:src/mca/common/dstore:src/mca/gds/ds12:src/mca/gds/ds21:src/mca/gds/hash:src/mca/pcompress/zlib:src/mca/pdl/pdlopen:src/mca/pdl/plibltdl:src/mca/pfexec/linux:src/mca/pif/bsdx_ipv4:src/mca/pif/bsdx_ipv6:src/mca/pif/linux_ipv6:src/mca/pif/posix_ipv4:src/mca/pif/solaris_ipv6:src/mca/pinstalldirs/config:src/mca/pinstalldirs/env:src/mca/plog/default:src/mca/plog/stdfd:src/mca/plog/syslog:src/mca/pmdl/ompi:src/mca/pnet/tcp:src/mca/pnet/test:src/mca/preg/compress:src/mca/preg/native:src/mca/psec/dummy_handshake:src/mca/psec/munge:src/mca/psec/native:src/mca/psec/none:src/mca/psensor/file:src/mca/psensor/heartbeat:src/mca/pshmem/mmap:src/mca/psquash/flex128:src/mca/psquash/native:src/mca/ptl/tcp:src/mca/ptl/usock"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/s1/configure.m4 b/opal/mca/pmix/s1/configure.m4 index 974107be5e5..78acca50058 100644 --- a/opal/mca/pmix/s1/configure.m4 +++ b/opal/mca/pmix/s1/configure.m4 @@ -1,6 +1,6 @@ # -*- shell-script -*- # -# Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -14,7 +14,6 @@ AC_DEFUN([MCA_opal_pmix_s1_CONFIG], [ AC_CONFIG_FILES([opal/mca/pmix/s1/Makefile]) AC_REQUIRE([OPAL_CHECK_UGNI]) - AC_REQUIRE([OPAL_CHECK_PMI]) # Evaluate succeed / fail AS_IF([test "$opal_enable_pmi1" = "yes" && test "$opal_check_ugni_happy" = "no"], diff --git a/opal/mca/pmix/s2/configure.m4 b/opal/mca/pmix/s2/configure.m4 index 5e3a7c4a31c..b3c8b06e78e 100644 --- a/opal/mca/pmix/s2/configure.m4 +++ b/opal/mca/pmix/s2/configure.m4 @@ -1,6 +1,6 @@ # -*- shell-script -*- # -# Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -14,7 +14,6 @@ AC_DEFUN([MCA_opal_pmix_s2_CONFIG], [ AC_CONFIG_FILES([opal/mca/pmix/s2/Makefile]) AC_REQUIRE([OPAL_CHECK_UGNI]) - AC_REQUIRE([OPAL_CHECK_PMI]) # Evaluate succeed / fail AS_IF([test "$opal_enable_pmi2" = "yes" && test "$opal_check_ugni_happy" = "no"], diff --git a/opal/runtime/opal_progress.c b/opal/runtime/opal_progress.c index 24607ec71f8..8c88a32c67d 100644 --- a/opal/runtime/opal_progress.c +++ b/opal/runtime/opal_progress.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -171,9 +171,10 @@ opal_progress_finalize(void) static int opal_progress_events(void) { + static volatile int32_t lock = 0; int events = 0; - if( opal_progress_event_flag != 0 ) { + if( opal_progress_event_flag != 0 && !OPAL_THREAD_SWAP_32(&lock, 1) ) { #if OPAL_HAVE_WORKING_EVENTOPS #if OPAL_PROGRESS_USE_TIMERS #if OPAL_PROGRESS_ONLY_USEC_NATIVE @@ -201,6 +202,7 @@ static int opal_progress_events(void) #endif /* OPAL_PROGRESS_USE_TIMERS */ #endif /* OPAL_HAVE_WORKING_EVENTOPS */ + lock = 0; } return events; diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h index 178c8ceaab6..434c69e88df 100644 --- a/opal/threads/thread_usage.h +++ b/opal/threads/thread_usage.h @@ -88,6 +88,10 @@ static inline bool opal_set_using_threads(bool have) } +// Back-ported from master (2019-05-04) as part of +// a16cf0e4dd6df4dea820fecedd5920df632935b8 +typedef volatile size_t opal_atomic_size_t; + /** * Use an atomic operation for increment/decrement if opal_using_threads() * indicates that threads are in use by the application or library. diff --git a/opal/util/info.c b/opal/util/info.c index e63c0416b89..a88efe41c83 100644 --- a/opal/util/info.c +++ b/opal/util/info.c @@ -176,7 +176,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, { int err, flag; opal_info_entry_t *iterator; - char savedkey[OPAL_MAX_INFO_KEY]; + char savedkey[OPAL_MAX_INFO_KEY + 1]; // iterator->ie_key has this as its size char savedval[OPAL_MAX_INFO_VAL]; char *valptr, *pkey; int is_IN_key; @@ -194,7 +194,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, if (0 == strncmp(iterator->ie_key, OPAL_INFO_SAVE_PREFIX, strlen(OPAL_INFO_SAVE_PREFIX))) { - pkey += 5; + pkey += strlen(OPAL_INFO_SAVE_PREFIX); is_IN_key = 1; exists_IN_key = 1; @@ -207,9 +207,9 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, exists_reg_key = 1; // see if there is an __IN_ for the current - if (strlen(iterator->ie_key) + 5 < OPAL_MAX_INFO_KEY) { - snprintf(savedkey, OPAL_MAX_INFO_KEY, - OPAL_INFO_SAVE_PREFIX "%s", iterator->ie_key); + if (strlen(OPAL_INFO_SAVE_PREFIX) + strlen(pkey) < OPAL_MAX_INFO_KEY) { + snprintf(savedkey, OPAL_MAX_INFO_KEY+1, + OPAL_INFO_SAVE_PREFIX "%s", pkey); // (the prefix macro is a string, so the unreadable part above is a string concatenation) opal_info_get_nolock (info, savedkey, OPAL_MAX_INFO_VAL, savedval, &flag); diff --git a/opal/util/info_subscriber.c b/opal/util/info_subscriber.c index 348194c89bf..5b0544e36d5 100644 --- a/opal/util/info_subscriber.c +++ b/opal/util/info_subscriber.c @@ -14,7 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. @@ -97,6 +97,10 @@ static void infosubscriber_destruct(opal_infosubscriber_t *obj) { } OBJ_DESTRUCT(&obj->s_subscriber_table); + + if (NULL != obj->s_info) { + OBJ_RELEASE(obj->s_info); + } } static void opal_callback_list_item_destruct(opal_callback_list_item_t *obj) { diff --git a/opal/util/stacktrace.c b/opal/util/stacktrace.c index e9d8cdb1ee2..afb791e62c8 100644 --- a/opal/util/stacktrace.c +++ b/opal/util/stacktrace.c @@ -32,8 +32,12 @@ #ifdef HAVE_SYS_STAT_H #include #endif -#ifdef HAVE_SYS_FCNTL_H +#ifdef HAVE_FCNTL_H #include +#else +#ifdef HAVE_SYS_FCNTL_H +#include +#endif #endif #include diff --git a/orte/include/orte/constants.h b/orte/include/orte/constants.h index de6c3cbb212..d272c31a6ca 100644 --- a/orte/include/orte/constants.h +++ b/orte/include/orte/constants.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -93,6 +93,7 @@ enum { ORTE_ERR_PROC_ABORTING = OPAL_ERR_PROC_ABORTING, ORTE_ERR_NODE_DOWN = OPAL_ERR_NODE_DOWN, ORTE_ERR_NODE_OFFLINE = OPAL_ERR_NODE_OFFLINE, + ORTE_OPERATION_SUCCEEDED = OPAL_OPERATION_SUCCEEDED, /* error codes specific to ORTE - don't forget to update orte/util/error_strings.c when adding new error codes!! diff --git a/orte/mca/dfs/Makefile.am b/orte/mca/dfs/Makefile.am deleted file mode 100644 index c374dfcff8a..00000000000 --- a/orte/mca/dfs/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_dfs.la -libmca_dfs_la_SOURCES = - -# local files -headers = dfs.h dfs_types.h -libmca_dfs_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ortedir = $(orteincludedir)/$(subdir) -nobase_orte_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/orte/mca/dfs/app/Makefile.am b/orte/mca/dfs/app/Makefile.am deleted file mode 100644 index 7c86273e46b..00000000000 --- a/orte/mca/dfs/app/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - dfs_app.h \ - dfs_app_component.c \ - dfs_app.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_dfs_app_DSO -component_noinst = -component_install = mca_dfs_app.la -else -component_noinst = libmca_dfs_app.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dfs_app_la_SOURCES = $(sources) -mca_dfs_app_la_LDFLAGS = -module -avoid-version -mca_dfs_app_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dfs_app_la_SOURCES =$(sources) -libmca_dfs_app_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/dfs/app/dfs_app.c b/orte/mca/dfs/app/dfs_app.c deleted file mode 100644 index 33676f5095a..00000000000 --- a/orte/mca/dfs/app/dfs_app.c +++ /dev/null @@ -1,1315 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_FCNTL_H -#include -#endif -#include - -#include "opal/util/if.h" -#include "opal/util/output.h" -#include "opal/util/uri.h" -#include "opal/dss/dss.h" -#include "opal/mca/pmix/pmix.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/dfs/base/base.h" -#include "dfs_app.h" - -/* - * Module functions: Global - */ -static int init(void); -static int finalize(void); - -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata); -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata); -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata); -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata); -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata); -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata); -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata); - -/****************** - * APP module - ******************/ -orte_dfs_base_module_t orte_dfs_app_module = { - init, - finalize, - dfs_open, - dfs_close, - dfs_get_file_size, - dfs_seek, - dfs_read, - dfs_post_file_map, - dfs_get_file_map, - dfs_load_file_maps, - dfs_purge_file_maps -}; - -static opal_list_t requests, active_files; -static int local_fd = 0; -static uint64_t req_id = 0; -static void recv_dfs(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); - -static int init(void) -{ - OBJ_CONSTRUCT(&requests, opal_list_t); - OBJ_CONSTRUCT(&active_files, opal_list_t); - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DFS_DATA, - ORTE_RML_PERSISTENT, - recv_dfs, - NULL); - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DFS_DATA); - while (NULL != (item = opal_list_remove_first(&requests))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&requests); - while (NULL != (item = opal_list_remove_first(&active_files))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&active_files); - return ORTE_SUCCESS; -} - -/* receives take place in an event, so we are free to process - * the request list without fear of getting things out-of-order - */ -static void recv_dfs(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_dfs_cmd_t cmd; - int32_t cnt; - orte_dfs_request_t *dfs, *dptr; - opal_list_item_t *item; - int remote_fd, rc; - int64_t i64; - uint64_t rid; - orte_dfs_tracker_t *trk; - - /* unpack the command this message is responding to */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd cmd %d from sender %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd, - ORTE_NAME_PRINT(sender)); - - switch (cmd) { - case ORTE_DFS_OPEN_CMD: - /* unpack the request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the remote fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &remote_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* as the request has been fulfilled, remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* if the remote_fd < 0, then we had an error, so return - * the error value to the caller - */ - if (remote_fd < 0) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file response error file %s [error: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(remote_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->host_daemon.jobid = sender->jobid; - trk->host_daemon.vpid = sender->vpid; - trk->uri = strdup(dfs->uri); - /* break the uri down into scheme and filename */ - trk->scheme = opal_uri_get_scheme(dfs->uri); - trk->filename = opal_filename_from_uri(dfs->uri, NULL); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* return the local_fd to the caller for - * subsequent operations - */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file completed for file %s [local fd: %d remote fd: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, trk->local_fd, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SIZE_CMD: - /* unpack the request id for this request */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd size - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the size */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass it back to the original caller */ - if (NULL != dfs->size_cbfunc) { - dfs->size_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SEEK_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd seek - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the returned offset/status */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass it back to the original caller */ - if (NULL != dfs->seek_cbfunc) { - dfs->seek_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_READ_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd read - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the bytes read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - if (0 < i64) { - cnt = i64; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, dfs->read_buffer, &cnt, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - } - /* pass them back to the original caller */ - if (NULL != dfs->read_cbfunc) { - dfs->read_cbfunc(i64, dfs->read_buffer, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_POST_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd post - no corresponding request found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_GETFM_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd getfm - no corresponding request found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* return it to caller */ - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(buffer, dfs->cbdata); - } - OBJ_RELEASE(dfs); - break; - - default: - opal_output(0, "APP:DFS:RECV WTF"); - break; - } -} - -static void open_local_file(orte_dfs_request_t *dfs) -{ - char *filename; - orte_dfs_tracker_t *trk; - - /* extract the filename from the uri */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, NULL))) { - /* something wrong - error was reported, so just get out */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - OBJ_RELEASE(dfs); - return; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening local file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - /* attempt to open the file */ - if (0 > (dfs->remote_fd = open(filename, O_RDONLY))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(dfs->remote_fd, dfs->cbdata); - } - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->uri = strdup(dfs->uri); - /* break the uri down into scheme and filename */ - trk->scheme = opal_uri_get_scheme(dfs->uri); - trk->filename = strdup(filename); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = dfs->remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* the file is locally hosted */ - trk->host_daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - trk->host_daemon.vpid = ORTE_PROC_MY_DAEMON->vpid; - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local file %s mapped localfd %d to remotefd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, trk->local_fd, trk->remote_fd); - /* let the caller know */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* request will be released by the calling routing */ -} - -static void process_opens(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - int rc; - opal_buffer_t *buffer; - char *scheme, *host, *filename; - orte_process_name_t daemon; - opal_list_t lt; - opal_namelist_t *nm; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* get the scheme to determine if we can process locally or not */ - if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - goto complete; - } - - if (0 == strcmp(scheme, "nfs")) { - open_local_file(dfs); - /* the callback was done in the above function */ - OBJ_RELEASE(dfs); - return; - } - - if (0 != strcmp(scheme, "file")) { - /* not yet supported */ - orte_show_help("orte_dfs_help.txt", "unsupported-filesystem", - true, dfs->uri); - goto complete; - } - - /* dissect the uri to extract host and filename/path */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, &host))) { - goto complete; - } - if (NULL == host) { - host = strdup(orte_process_info.nodename); - } - - /* if the host is our own, then treat it as a local file */ - if (orte_ifislocal(host)) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on local host", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - open_local_file(dfs); - /* the callback was done in the above function */ - OBJ_RELEASE(dfs); - return; - } - - /* ident the daemon on that host */ - daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - /* fetch the daemon for this hostname */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s looking for daemon on host %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host); - OBJ_CONSTRUCT(<, opal_list_t); - if (ORTE_SUCCESS != (rc = opal_pmix.resolve_peers(host, daemon.jobid, <))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(<); - goto complete; - } - nm = (opal_namelist_t*)opal_list_get_first(<); - daemon.vpid = nm->name.vpid; - OPAL_LIST_DESTRUCT(<); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on host %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, host, ORTE_NAME_PRINT(&daemon)); - - /* double-check: if it is our local daemon, then we - * treat this as local - */ - if (daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local file %s on same daemon", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - open_local_file(dfs); - /* the callback was done in the above function */ - OBJ_RELEASE(dfs); - return; - } - - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &filename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending open file request to %s file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&daemon), - filename); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* don't release it */ - return; - - complete: - /* we get here if an error occurred - execute any - * pending callback so the proc doesn't hang - */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - - -/* in order to handle the possible opening/reading of files by - * multiple threads, we have to ensure that all operations are - * carried out in events - so the "open" cmd simply posts an - * event containing the required info, and then returns - */ -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uri); - - /* setup the request */ - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_OPEN_CMD; - dfs->uri = strdup(uri); - dfs->open_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); -} - -static void process_close(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *close_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(close_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s closing fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - close_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == close_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); - return; - } - - /* if the file is local, close it */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - close(trk->remote_fd); - goto complete; - } - - /* setup a message for the daemon telling - * them what file to close - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &close_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending close file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - - complete: - opal_list_remove_item(&active_files, &trk->super); - OBJ_RELEASE(trk); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); -} - -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s close called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_CLOSE_CMD; - dfs->local_fd = fd; - dfs->close_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); -} - -static void process_sizes(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *size_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - struct stat buf; - - ORTE_ACQUIRE_OBJECT(size_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing get_size on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - size_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == size_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(size_dfs); - return; - } - - /* if the file is local, execute the seek on it - we - * stuck the "whence" value in the remote_fd - */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - /* stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - } else { - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(buf.st_size, size_dfs->cbdata); - } - } - goto complete; - } - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - size_dfs->id = req_id++; - opal_list_append(&requests, &size_dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending get_size request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &size_dfs->super); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - goto complete; - } - /* leave the request there */ - return; - - complete: - OBJ_RELEASE(size_dfs); -} - -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s get_size called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SIZE_CMD; - dfs->local_fd = fd; - dfs->size_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); -} - - -static void process_seeks(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *seek_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - struct stat buf; - - ORTE_ACQUIRE_OBJECT(seek_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == seek_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(seek_dfs); - return; - } - - /* if the file is local, execute the seek on it - we - * stuck the "whence" value in the remote_fd - */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - /* stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else if (buf.st_size < seek_dfs->read_length && - SEEK_SET == seek_dfs->remote_fd) { - /* seek would take us past EOF */ - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else if (buf.st_size < (off_t)(trk->location + seek_dfs->read_length) && - SEEK_CUR == seek_dfs->remote_fd) { - /* seek would take us past EOF */ - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else { - lseek(trk->remote_fd, seek_dfs->read_length, seek_dfs->remote_fd); - if (SEEK_SET == seek_dfs->remote_fd) { - trk->location = seek_dfs->read_length; - } else { - trk->location += seek_dfs->read_length; - } - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(seek_dfs->read_length, seek_dfs->cbdata); - } - } - goto complete; - } - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - seek_dfs->id = req_id++; - opal_list_append(&requests, &seek_dfs->super); - - /* setup a message for the daemon telling - * them what file to seek - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &seek_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)seek_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending seek file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - /* leave the request */ - return; - - complete: - OBJ_RELEASE(seek_dfs); -} - - -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SEEK_CMD; - dfs->local_fd = fd; - dfs->read_length = offset; - dfs->remote_fd = whence; - dfs->seek_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); -} - -static void process_reads(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *read_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - long nbytes; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - - ORTE_ACQUIRE_OBJECT(read_dfs); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == read_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(read_dfs); - return; - } - - /* if the file is local, read the desired bytes */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - nbytes = read(trk->remote_fd, read_dfs->read_buffer, read_dfs->read_length); - if (0 < nbytes) { - /* update our location */ - trk->location += nbytes; - } - /* pass them back to the caller */ - if (NULL != read_dfs->read_cbfunc) { - read_dfs->read_cbfunc(nbytes, read_dfs->read_buffer, read_dfs->cbdata); - } - /* request is complete */ - OBJ_RELEASE(read_dfs); - return; - } - /* add this request to our pending list */ - read_dfs->id = req_id++; - opal_list_append(&requests, &read_dfs->super); - - /* setup a message for the daemon telling - * them what file to read - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)read_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending read file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - } - /* don't release the request */ - return; - - complete: - /* don't need to hang on to this request */ - opal_list_remove_item(&requests, &read_dfs->super); - OBJ_RELEASE(read_dfs); -} - -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_READ_CMD; - dfs->local_fd = fd; - dfs->read_buffer = buffer; - dfs->read_length = length; - dfs->read_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); -} - -static void process_posts(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* we will get confirmation in our receive function, so - * add this request to our list */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* Send the buffer's contents to our local daemon for storage */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* add my name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* pack the payload */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_DAEMON, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - goto error; - } - return; - - error: - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_POST_CMD; - dfs->bptr = bo; - dfs->post_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); -} - -static void process_getfm(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* we will get confirmation in our receive function, so - * add this request to our list */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* Send the request to our local daemon */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* and the target */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->target, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_DAEMON, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - goto error; - } - return; - - error: - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(NULL, dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_GETFM_CMD; - dfs->target.jobid = target->jobid; - dfs->target.vpid = target->vpid; - dfs->fm_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); -} - -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata) -{ - /* apps don't store file maps */ - if (NULL != cbfunc) { - cbfunc(cbdata); - } -} - -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata) -{ - /* apps don't store file maps */ - if (NULL != cbfunc) { - cbfunc(cbdata); - } -} diff --git a/orte/mca/dfs/app/dfs_app.h b/orte/mca/dfs/app/dfs_app.h deleted file mode 100644 index fef69fdf582..00000000000 --- a/orte/mca/dfs/app/dfs_app.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_dfs_app_EXPORT_H -#define MCA_dfs_app_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/dfs/dfs.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_dfs_base_component_t mca_dfs_app_component; - -ORTE_DECLSPEC extern orte_dfs_base_module_t orte_dfs_app_module; - -END_C_DECLS - -#endif /* MCA_dfs_app_EXPORT_H */ diff --git a/orte/mca/dfs/app/dfs_app_component.c b/orte/mca/dfs/app/dfs_app_component.c deleted file mode 100644 index 1479007ac0e..00000000000 --- a/orte/mca/dfs/app/dfs_app_component.c +++ /dev/null @@ -1,85 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * Copyright (c) 2016 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/dfs/dfs.h" -#include "orte/mca/dfs/base/base.h" -#include "dfs_app.h" - -/* - * Public string for version number - */ -const char *orte_dfs_app_component_version_string = - "ORTE DFS app MCA component version " ORTE_VERSION; - -/* - * Local functionality - */ -static int dfs_app_open(void); -static int dfs_app_close(void); -static int dfs_app_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_dfs_base_component_t mca_dfs_app_component = -{ - /* Handle the general mca_component_t struct containing - * meta information about the component - */ - .base_version = { - ORTE_DFS_BASE_VERSION_1_0_0, - /* Component name and version */ - .mca_component_name = "app", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = dfs_app_open, - .mca_close_component = dfs_app_close, - .mca_query_component = dfs_app_component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int dfs_app_open(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_app_close(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_app_component_query(mca_base_module_t **module, int *priority) -{ - if (ORTE_PROC_IS_APP) { - /* set our priority high as we are the default for apps */ - *priority = 1000; - *module = (mca_base_module_t *)&orte_dfs_app_module; - return ORTE_SUCCESS; - } - - *priority = -1; - *module = NULL; - return ORTE_ERROR; -} diff --git a/orte/mca/dfs/app/owner.txt b/orte/mca/dfs/app/owner.txt deleted file mode 100644 index 4ad6f408ca3..00000000000 --- a/orte/mca/dfs/app/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/orte/mca/dfs/base/Makefile.am b/orte/mca/dfs/base/Makefile.am deleted file mode 100644 index eb036387189..00000000000 --- a/orte/mca/dfs/base/Makefile.am +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_dfs_la_SOURCES += \ - base/dfs_base_select.c \ - base/dfs_base_frame.c diff --git a/orte/mca/dfs/base/base.h b/orte/mca/dfs/base/base.h deleted file mode 100644 index 8356b488cd7..00000000000 --- a/orte/mca/dfs/base/base.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef ORTE_MCA_DFS_BASE_H -#define ORTE_MCA_DFS_BASE_H - -/* - * includes - */ -#include "orte_config.h" -#include "orte/types.h" -#include "orte/constants.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/event/event.h" - -#include "orte/mca/mca.h" -#include "orte/mca/dfs/dfs.h" - - -BEGIN_C_DECLS - -/* - * MCA Framework - */ -ORTE_DECLSPEC extern mca_base_framework_t orte_dfs_base_framework; -/* select a component */ -ORTE_DECLSPEC int orte_dfs_base_select(void); - -/* tracker for active files */ -typedef struct { - opal_list_item_t super; - orte_process_name_t requestor; - orte_process_name_t host_daemon; - char *uri; - char *scheme; - char *filename; - int local_fd; - int remote_fd; - size_t location; -} orte_dfs_tracker_t; -OBJ_CLASS_DECLARATION(orte_dfs_tracker_t); - -/* requests */ -typedef struct { - opal_list_item_t super; - opal_event_t ev; - uint64_t id; - orte_dfs_cmd_t cmd; - orte_process_name_t target; - char *uri; - int local_fd; - int remote_fd; - uint8_t *read_buffer; - long read_length; - opal_buffer_t *bptr; - opal_buffer_t bucket; - orte_dfs_open_callback_fn_t open_cbfunc; - orte_dfs_close_callback_fn_t close_cbfunc; - orte_dfs_size_callback_fn_t size_cbfunc; - orte_dfs_seek_callback_fn_t seek_cbfunc; - orte_dfs_read_callback_fn_t read_cbfunc; - orte_dfs_post_callback_fn_t post_cbfunc; - orte_dfs_fm_callback_fn_t fm_cbfunc; - orte_dfs_load_callback_fn_t load_cbfunc; - orte_dfs_purge_callback_fn_t purge_cbfunc; - void *cbdata; -} orte_dfs_request_t; -OBJ_CLASS_DECLARATION(orte_dfs_request_t); - -END_C_DECLS - -#endif diff --git a/orte/mca/dfs/base/dfs_base_frame.c b/orte/mca/dfs/base/dfs_base_frame.c deleted file mode 100644 index 77ce6171423..00000000000 --- a/orte/mca/dfs/base/dfs_base_frame.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" - -#include "orte/util/show_help.h" -#include "orte/mca/dfs/base/base.h" - -#include "orte/mca/dfs/base/static-components.h" - -/* - * Globals - */ -orte_dfs_base_module_t orte_dfs = { - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL -}; - -static int orte_dfs_base_close(void) -{ - /* Close selected component */ - if (NULL != orte_dfs.finalize) { - orte_dfs.finalize(); - } - - return mca_base_framework_components_close(&orte_dfs_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int orte_dfs_base_open(mca_base_open_flag_t flags) -{ - /* Open up all available components */ - return mca_base_framework_components_open(&orte_dfs_base_framework, flags); -} - -MCA_BASE_FRAMEWORK_DECLARE(orte, dfs, "ORTE Distributed File System", - NULL, orte_dfs_base_open, orte_dfs_base_close, - mca_dfs_base_static_components, 0); - - -/* instantiate classes */ -static void trk_con(orte_dfs_tracker_t *trk) -{ - trk->host_daemon.jobid = ORTE_JOBID_INVALID; - trk->host_daemon.vpid = ORTE_VPID_INVALID; - trk->uri = NULL; - trk->scheme = NULL; - trk->filename = NULL; - trk->location = 0; -} -static void trk_des(orte_dfs_tracker_t *trk) -{ - if (NULL != trk->uri) { - free(trk->uri); - } - if (NULL != trk->scheme) { - free(trk->scheme); - } - if (NULL != trk->filename) { - free(trk->filename); - } -} -OBJ_CLASS_INSTANCE(orte_dfs_tracker_t, - opal_list_item_t, - trk_con, trk_des); -static void req_const(orte_dfs_request_t *dfs) -{ - dfs->id = 0; - dfs->uri = NULL; - dfs->local_fd = -1; - dfs->remote_fd = -1; - dfs->read_length = -1; - dfs->bptr = NULL; - OBJ_CONSTRUCT(&dfs->bucket, opal_buffer_t); - dfs->read_buffer = NULL; - dfs->open_cbfunc = NULL; - dfs->close_cbfunc = NULL; - dfs->size_cbfunc = NULL; - dfs->seek_cbfunc = NULL; - dfs->read_cbfunc = NULL; - dfs->post_cbfunc = NULL; - dfs->fm_cbfunc = NULL; - dfs->load_cbfunc = NULL; - dfs->purge_cbfunc = NULL; - dfs->cbdata = NULL; -} -static void req_dest(orte_dfs_request_t *dfs) -{ - if (NULL != dfs->uri) { - free(dfs->uri); - } - OBJ_DESTRUCT(&dfs->bucket); -} -OBJ_CLASS_INSTANCE(orte_dfs_request_t, - opal_list_item_t, - req_const, req_dest); - -static void jobfm_const(orte_dfs_jobfm_t *fm) -{ - OBJ_CONSTRUCT(&fm->maps, opal_list_t); -} -static void jobfm_dest(orte_dfs_jobfm_t *fm) -{ - opal_list_item_t *item; - - while (NULL != (item = opal_list_remove_first(&fm->maps))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&fm->maps); -} -OBJ_CLASS_INSTANCE(orte_dfs_jobfm_t, - opal_list_item_t, - jobfm_const, jobfm_dest); - -static void vpidfm_const(orte_dfs_vpidfm_t *fm) -{ - OBJ_CONSTRUCT(&fm->data, opal_buffer_t); - fm->num_entries = 0; -} -static void vpidfm_dest(orte_dfs_vpidfm_t *fm) -{ - OBJ_DESTRUCT(&fm->data); -} -OBJ_CLASS_INSTANCE(orte_dfs_vpidfm_t, - opal_list_item_t, - vpidfm_const, vpidfm_dest); diff --git a/orte/mca/dfs/base/dfs_base_select.c b/orte/mca/dfs/base/dfs_base_select.c deleted file mode 100644 index bf0a7c2d678..00000000000 --- a/orte/mca/dfs/base/dfs_base_select.c +++ /dev/null @@ -1,56 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/util/output.h" - -#include "orte/mca/dfs/base/base.h" - -int orte_dfs_base_select(void) -{ - int exit_status = ORTE_SUCCESS; - orte_dfs_base_component_t *best_component = NULL; - orte_dfs_base_module_t *best_module = NULL; - - /* - * Select the best component - */ - if (OPAL_SUCCESS != mca_base_select("dfs", orte_dfs_base_framework.framework_output, - &orte_dfs_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component, NULL)) { - /* This will only happen if no component was selected, which - * is okay - we don't have to select anything - */ - return ORTE_SUCCESS; - } - - /* Save the winner */ - orte_dfs = *best_module; - - /* Initialize the winner */ - if (NULL != best_module && NULL != orte_dfs.init) { - if (ORTE_SUCCESS != orte_dfs.init()) { - exit_status = ORTE_ERROR; - goto cleanup; - } - } - - cleanup: - return exit_status; -} diff --git a/orte/mca/dfs/base/owner.txt b/orte/mca/dfs/base/owner.txt deleted file mode 100644 index 4ad6f408ca3..00000000000 --- a/orte/mca/dfs/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/orte/mca/dfs/dfs.h b/orte/mca/dfs/dfs.h deleted file mode 100644 index 136c0d76b67..00000000000 --- a/orte/mca/dfs/dfs.h +++ /dev/null @@ -1,184 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTE_MCA_DFS_H -#define ORTE_MCA_DFS_H - -#include "orte_config.h" -#include "orte/types.h" - -#ifdef HAVE_FCNTL_H -#include -#endif - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "orte/mca/dfs/dfs_types.h" - -BEGIN_C_DECLS - -/* - * Framework Interfaces - */ -/** - * Module initialization function. - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_dfs_base_module_init_fn_t)(void); - -/** - * Module finalization function. - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_dfs_base_module_finalize_fn_t)(void); - -/* Open a file - * - * Open a possibly remote file for reading. The uri can include file - * system descriptions (e.g., file:///, nfs:///, or hdfs:///). Note - * that this is a full uri - i.e., it may include a hostname to - * indicate where the file is located - * - * The file descriptor will be returned in the cbfunc. It - * represents the number by which the file can be referenced, - * and will be an ORTE error code upon failure - */ -typedef void (*orte_dfs_base_module_open_fn_t)(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata); - -/* Close a file - * - * Closes and invalidates the file descriptor - */ -typedef void (*orte_dfs_base_module_close_fn_t)(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata); - -/* Get the size of a file - * - */ -typedef void (*orte_dfs_base_module_get_file_size_fn_t)(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata); - -/* Position a file - * - * Move the read position in the file to the specified byte number - * relative to the location specified by whence: - * SEEK_SET => from beginning of file - * SEEK_CUR => from current location - * - * The callback will return the offset, or a negative value if - * the requested seek would take the pointer past the end of the - * file. This is contrary to standard lseek behavior, but is consistent - * with the read-only nature of this framework - */ -typedef void (*orte_dfs_base_module_seek_fn_t)(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata); - -/* Read bytes from a possibly remote file - * - * Read the specified number of bytes from the given file, using the - * specified offset (in bytes). The status returned in cbfunc is the actual number - * of bytes read, which should match the request unless the requested - * length/offset would read past the end of file. An ORTE error code - * will be returned upon error - * - * Note: the caller is responsible for ensuring the buffer is at least - * length bytes in size - */ -typedef void (*orte_dfs_base_module_read_fn_t)(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata); - - -/* Post a file map so others may access it */ -typedef void (*orte_dfs_base_module_post_file_map_fn_t)(opal_buffer_t *buf, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata); - -/* Get the file map for a process - * - * Returns the file map associated with the specified process name. If - * NULL is provided, then all known process maps will be returned in the - * byte object. It is the responsibility of the caller to unpack it, so - * applications are free to specify whatever constitutes a "file map" that - * suits their purposes - */ -typedef void (*orte_dfs_base_module_get_file_map_fn_t)(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata); - - -/* Load file maps for a job - */ -typedef void (*orte_dfs_base_module_load_file_maps_fn_t)(orte_jobid_t jobid, - opal_buffer_t *buf, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata); - -/* Purge file maps for a job */ -typedef void (*orte_dfs_base_module_purge_file_maps_fn_t)(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata); - -/* - * Module Structure - */ -struct orte_dfs_base_module_1_0_0_t { - /** Initialization Function */ - orte_dfs_base_module_init_fn_t init; - /** Finalization Function */ - orte_dfs_base_module_finalize_fn_t finalize; - - orte_dfs_base_module_open_fn_t open; - orte_dfs_base_module_close_fn_t close; - orte_dfs_base_module_get_file_size_fn_t get_file_size; - orte_dfs_base_module_seek_fn_t seek; - orte_dfs_base_module_read_fn_t read; - orte_dfs_base_module_post_file_map_fn_t post_file_map; - orte_dfs_base_module_get_file_map_fn_t get_file_map; - orte_dfs_base_module_load_file_maps_fn_t load_file_maps; - orte_dfs_base_module_purge_file_maps_fn_t purge_file_maps; -}; -typedef struct orte_dfs_base_module_1_0_0_t orte_dfs_base_module_1_0_0_t; -typedef orte_dfs_base_module_1_0_0_t orte_dfs_base_module_t; -ORTE_DECLSPEC extern orte_dfs_base_module_t orte_dfs; - -/* - * DFS Component - */ -struct orte_dfs_base_component_1_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; -}; -typedef struct orte_dfs_base_component_1_0_0_t orte_dfs_base_component_1_0_0_t; -typedef orte_dfs_base_component_1_0_0_t orte_dfs_base_component_t; - -/* - * Macro for use in components that are of type errmgr - */ -#define ORTE_DFS_BASE_VERSION_1_0_0 \ - ORTE_MCA_BASE_VERSION_2_1_0("dfs", 1, 0, 0) - -END_C_DECLS - -#endif diff --git a/orte/mca/dfs/dfs_types.h b/orte/mca/dfs/dfs_types.h deleted file mode 100644 index 1f3e088e1d8..00000000000 --- a/orte/mca/dfs/dfs_types.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTE_MCA_DFS_TYPES_H -#define ORTE_MCA_DFS_TYPES_H - -#include "orte_config.h" - -#include "opal/class/opal_list.h" -#include "opal/dss/dss_types.h" -#include "opal/util/proc.h" - -BEGIN_C_DECLS - -typedef uint8_t orte_dfs_cmd_t; -#define ORTE_DFS_CMD_T OPAL_UINT8 - -#define ORTE_DFS_OPEN_CMD 1 -#define ORTE_DFS_CLOSE_CMD 2 -#define ORTE_DFS_SIZE_CMD 3 -#define ORTE_DFS_SEEK_CMD 4 -#define ORTE_DFS_READ_CMD 5 -#define ORTE_DFS_POST_CMD 6 -#define ORTE_DFS_GETFM_CMD 7 -#define ORTE_DFS_LOAD_CMD 8 -#define ORTE_DFS_PURGE_CMD 9 -#define ORTE_DFS_RELAY_POSTS_CMD 10 - -/* file maps */ -typedef struct { - opal_list_item_t super; - orte_jobid_t jobid; - opal_list_t maps; -} orte_dfs_jobfm_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_dfs_jobfm_t); - -typedef struct { - opal_list_item_t super; - orte_vpid_t vpid; - int num_entries; - opal_buffer_t data; -} orte_dfs_vpidfm_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_dfs_vpidfm_t); - -typedef void (*orte_dfs_open_callback_fn_t)(int fd, void *cbdata); - -typedef void (*orte_dfs_close_callback_fn_t)(int fd, void *cbdata); - -typedef void (*orte_dfs_size_callback_fn_t)(long size, void *cbdata); - -typedef void (*orte_dfs_seek_callback_fn_t)(long offset, void *cbdata); - -typedef void (*orte_dfs_read_callback_fn_t)(long status, - uint8_t *buffer, - void *cbdata); - -typedef void (*orte_dfs_post_callback_fn_t)(void *cbdata); - -typedef void (*orte_dfs_fm_callback_fn_t)(opal_buffer_t *fmaps, void *cbdata); - -typedef void (*orte_dfs_load_callback_fn_t)(void *cbdata); - -typedef void (*orte_dfs_purge_callback_fn_t)(void *cbdata); - -END_C_DECLS - -#endif diff --git a/orte/mca/dfs/orted/Makefile.am b/orte/mca/dfs/orted/Makefile.am deleted file mode 100644 index 90946f6f4c6..00000000000 --- a/orte/mca/dfs/orted/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - dfs_orted.h \ - dfs_orted_component.c \ - dfs_orted.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_dfs_orted_DSO -component_noinst = -component_install = mca_dfs_orted.la -else -component_noinst = libmca_dfs_orted.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dfs_orted_la_SOURCES = $(sources) -mca_dfs_orted_la_LDFLAGS = -module -avoid-version -mca_dfs_orted_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dfs_orted_la_SOURCES =$(sources) -libmca_dfs_orted_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/dfs/orted/dfs_orted.c b/orte/mca/dfs/orted/dfs_orted.c deleted file mode 100644 index ee3c9d5a338..00000000000 --- a/orte/mca/dfs/orted/dfs_orted.c +++ /dev/null @@ -1,2388 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_FCNTL_H -#include -#endif -#include - -#include "opal/util/if.h" -#include "opal/util/output.h" -#include "opal/util/uri.h" -#include "opal/dss/dss.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/proc_info.h" -#include "orte/util/session_dir.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/state/state.h" - -#include "orte/runtime/orte_quit.h" -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/dfs/dfs.h" -#include "orte/mca/dfs/base/base.h" -#include "dfs_orted.h" - -/* - * Module functions: Global - */ -static int init(void); -static int finalize(void); - -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata); -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata); -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata); -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata); -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata); -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata); -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata); -/****************** - * Daemon/HNP module - ******************/ -orte_dfs_base_module_t orte_dfs_orted_module = { - init, - finalize, - dfs_open, - dfs_close, - dfs_get_file_size, - dfs_seek, - dfs_read, - dfs_post_file_map, - dfs_get_file_map, - dfs_load_file_maps, - dfs_purge_file_maps -}; - -static void* worker_thread_engine(opal_object_t *obj); - -typedef struct { - opal_object_t super; - int idx; - opal_event_base_t *event_base; - bool active; - opal_thread_t thread; -} worker_thread_t; -static void wt_const(worker_thread_t *ptr) -{ - /* create an event base for this thread */ - ptr->event_base = opal_event_base_create(); - /* construct the thread object */ - OBJ_CONSTRUCT(&ptr->thread, opal_thread_t); - /* fork off a thread to progress it */ - ptr->active = true; - ptr->thread.t_run = worker_thread_engine; - ptr->thread.t_arg = ptr; - opal_thread_start(&ptr->thread); -} -static void wt_dest(worker_thread_t *ptr) -{ - /* stop the thread */ - ptr->active = false; - /* break the loop */ - opal_event_base_loopbreak(ptr->event_base); - /* wait for thread to exit */ - opal_thread_join(&ptr->thread, NULL); - OBJ_DESTRUCT(&ptr->thread); - /* release the event base */ - opal_event_base_free(ptr->event_base); -} -OBJ_CLASS_INSTANCE(worker_thread_t, - opal_object_t, - wt_const, wt_dest); - -typedef struct { - opal_object_t super; - opal_event_t ev; - uint64_t rid; - orte_dfs_tracker_t *trk; - int64_t nbytes; - int whence; -} worker_req_t; -OBJ_CLASS_INSTANCE(worker_req_t, - opal_object_t, - NULL, NULL); -#define ORTE_DFS_POST_WORKER(r, cb) \ - do { \ - worker_thread_t *wt; \ - wt = (worker_thread_t*)opal_pointer_array_get_item(&worker_threads, wt_cntr); \ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, \ - "%s assigning req to worker thread %d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - wt->idx); \ - opal_event_set(wt->event_base, &((r)->ev), \ - -1, OPAL_EV_WRITE, (cb), (r)); \ - opal_event_active(&((r)->ev), OPAL_EV_WRITE, 1); \ - /* move to the next thread */ \ - wt_cntr++; \ - if (wt_cntr == orte_dfs_orted_num_worker_threads) { \ - wt_cntr = 0; \ - } \ - } while(0); - -static opal_list_t requests, active_files, file_maps; -static opal_pointer_array_t worker_threads; -static int wt_cntr = 0; -static int local_fd = 0; -static uint64_t req_id = 0; -static void recv_dfs_cmd(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -static void recv_dfs_data(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -static void remote_read(int fd, short args, void *cbata); -static void remote_open(int fd, short args, void *cbdata); -static void remote_size(int fd, short args, void *cbdata); -static void remote_seek(int fd, short args, void *cbdata); - -static int init(void) -{ - int i; - worker_thread_t *wt; - - OBJ_CONSTRUCT(&requests, opal_list_t); - OBJ_CONSTRUCT(&active_files, opal_list_t); - OBJ_CONSTRUCT(&file_maps, opal_list_t); - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DFS_CMD, - ORTE_RML_PERSISTENT, - recv_dfs_cmd, - NULL); - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DFS_DATA, - ORTE_RML_PERSISTENT, - recv_dfs_data, - NULL); - OBJ_CONSTRUCT(&worker_threads, opal_pointer_array_t); - opal_pointer_array_init(&worker_threads, 1, INT_MAX, 1); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s starting %d worker threads", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_dfs_orted_num_worker_threads); - for (i=0; i < orte_dfs_orted_num_worker_threads; i++) { - wt = OBJ_NEW(worker_thread_t); - wt->idx = i; - opal_pointer_array_add(&worker_threads, wt); - } - - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - int i; - worker_thread_t *wt; - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DFS_CMD); - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DFS_DATA); - while (NULL != (item = opal_list_remove_first(&requests))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&requests); - while (NULL != (item = opal_list_remove_first(&active_files))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&active_files); - while (NULL != (item = opal_list_remove_first(&file_maps))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&file_maps); - for (i=0; i < worker_threads.size; i++) { - if (NULL != (wt = (worker_thread_t*)opal_pointer_array_get_item(&worker_threads, i))) { - OBJ_RELEASE(wt); - } - } - OBJ_DESTRUCT(&worker_threads); - - return ORTE_SUCCESS; -} - -static void open_local_file(orte_dfs_request_t *dfs) -{ - char *filename; - orte_dfs_tracker_t *trk; - - /* extract the filename from the uri */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, NULL))) { - /* something wrong - error was reported, so just get out */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - return; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening local file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - /* attempt to open the file */ - if (0 > (dfs->remote_fd = open(filename, O_RDONLY))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(dfs->remote_fd, dfs->cbdata); - } - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->filename = strdup(dfs->uri); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = dfs->remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* the file is locally hosted */ - trk->host_daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - trk->host_daemon.vpid = ORTE_PROC_MY_DAEMON->vpid; - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local file %s mapped localfd %d to remotefd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, trk->local_fd, trk->remote_fd); - /* let the caller know */ - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* request will be released by the calling routing */ -} - -static void process_opens(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - int rc; - opal_buffer_t *buffer = NULL; - char *scheme = NULL, *host = NULL, *filename = NULL; - int v; - orte_node_t *node, *nptr; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* get the scheme to determine if we can process locally or not */ - if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { - OBJ_RELEASE(dfs); - return; - } - - if (0 == strcmp(scheme, "nfs")) { - open_local_file(dfs); - goto complete; - } - - if (0 != strcmp(scheme, "file")) { - /* not yet supported */ - orte_show_help("orte_dfs_help.txt", "unsupported-filesystem", - true, dfs->uri); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - goto complete; - } - - free(scheme); - scheme = NULL; - - /* dissect the uri to extract host and filename/path */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, &host))) { - goto complete; - } - /* if the host is our own, then treat it as a local file */ - if (NULL == host || orte_ifislocal(host)) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on local host", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - open_local_file(dfs); - goto complete; - } - - /* ident the daemon on that host */ - node = NULL; - for (v=0; v < orte_node_pool->size; v++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, v))) { - continue; - } - if (NULL == nptr->daemon) { - continue; - } - if (0 == strcmp(host, nptr->name)) { - node = nptr; - break; - } - } - if (NULL == node) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - goto complete; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on host %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, host, ORTE_NAME_PRINT(&node->daemon->name)); - - free(host); - host = NULL; - /* double-check: if it is our local daemon, then we - * treat this as local - */ - if (node->daemon->name.vpid == ORTE_PROC_MY_DAEMON->vpid) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local file %s on same daemon", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - open_local_file(dfs); - goto complete; - } - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &filename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending open file request to %s file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&node->daemon->name), - filename); - - free(filename); - filename = NULL; - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &node->daemon->name, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* don't release it */ - return; - - complete: - if (NULL != buffer) { - OBJ_RELEASE(buffer); - } - if (NULL != scheme) { - free(scheme); - } - if (NULL != host) { - free(host); - } - if (NULL != filename) { - free(filename); - } - OBJ_RELEASE(dfs); -} - - -/* in order to handle the possible opening/reading of files by - * multiple threads, we have to ensure that all operations are - * carried out in events - so the "open" cmd simply posts an - * event containing the required info, and then returns - */ -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uri); - - /* setup the request */ - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_OPEN_CMD; - dfs->uri = strdup(uri); - dfs->open_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); -} - -static void process_close(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *close_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(close_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s closing fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - close_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == close_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); - return; - } - - /* if the file is local, close it */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - close(trk->remote_fd); - goto complete; - } - - /* setup a message for the daemon telling - * them what file to close - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &close_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending close file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - - complete: - opal_list_remove_item(&active_files, &trk->super); - OBJ_RELEASE(trk); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); -} - -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s close called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_CLOSE_CMD; - dfs->local_fd = fd; - dfs->close_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); -} - -static void process_sizes(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *size_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - struct stat buf; - - ORTE_ACQUIRE_OBJECT(size_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing get_size on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - size_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == size_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(size_dfs); - return; - } - - /* if the file is local, execute the seek on it - we - * stuck the "whence" value in the remote_fd - */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - /* stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - } - goto complete; - } - - /* setup a message for the daemon telling - * them what file to get the size of - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending get_size request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - goto complete; - } - - complete: - OBJ_RELEASE(size_dfs); -} - -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s get_size called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SIZE_CMD; - dfs->local_fd = fd; - dfs->size_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); -} - - -static void process_seeks(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *seek_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - struct stat buf; - - ORTE_ACQUIRE_OBJECT(seek_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == seek_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(seek_dfs); - return; - } - - /* if the file is local, execute the seek on it - we - * stuck the "whence" value in the remote_fd - */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s local seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - /* stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else if (buf.st_size < seek_dfs->read_length && - SEEK_SET == seek_dfs->remote_fd) { - /* seek would take us past EOF */ - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else if (buf.st_size < (off_t)(trk->location + seek_dfs->read_length) && - SEEK_CUR == seek_dfs->remote_fd) { - /* seek would take us past EOF */ - if (NULL != seek_dfs->seek_cbfunc) { - seek_dfs->seek_cbfunc(-1, seek_dfs->cbdata); - } - } else { - lseek(trk->remote_fd, seek_dfs->read_length, seek_dfs->remote_fd); - if (SEEK_SET == seek_dfs->remote_fd) { - trk->location = seek_dfs->read_length; - } else { - trk->location += seek_dfs->read_length; - } - } - goto complete; - } - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - seek_dfs->id = req_id++; - opal_list_append(&requests, &seek_dfs->super); - - /* setup a message for the daemon telling - * them what file to seek - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &seek_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)seek_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending seek file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - - complete: - OBJ_RELEASE(seek_dfs); -} - - -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SEEK_CMD; - dfs->local_fd = fd; - dfs->read_length = offset; - dfs->remote_fd = whence; - dfs->seek_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); -} - -static void process_reads(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *read_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - long nbytes; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - - ORTE_ACQUIRE_OBJECT(read_dfs); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == read_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(read_dfs); - return; - } - - /* if the file is local, read the desired bytes */ - if (trk->host_daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { - nbytes = read(trk->remote_fd, read_dfs->read_buffer, read_dfs->read_length); - if (0 < nbytes) { - /* update our location */ - trk->location += nbytes; - } - /* pass them back to the caller */ - if (NULL != read_dfs->read_cbfunc) { - read_dfs->read_cbfunc(nbytes, read_dfs->read_buffer, read_dfs->cbdata); - } - /* request is complete */ - OBJ_RELEASE(read_dfs); - return; - } - /* add this request to our pending list */ - read_dfs->id = req_id++; - opal_list_append(&requests, &read_dfs->super); - - /* setup a message for the daemon telling - * them what file to read - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)read_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending read file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - } - /* don't release the request */ - return; - - complete: - /* don't need to hang on to this request */ - opal_list_remove_item(&requests, &read_dfs->super); - OBJ_RELEASE(read_dfs); -} - -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_READ_CMD; - dfs->local_fd = fd; - dfs->read_buffer = buffer; - dfs->read_length = length; - dfs->read_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); -} - -static void process_posts(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_jobfm_t *jptr, *jfm; - orte_dfs_vpidfm_t *vptr, *vfm; - opal_list_item_t *item; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s posting file map containing %d bytes for target %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)dfs->bptr->bytes_used, ORTE_NAME_PRINT(&dfs->target)); - - /* lookup the job map */ - jfm = NULL; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jptr = (orte_dfs_jobfm_t*)item; - if (jptr->jobid == dfs->target.jobid) { - jfm = jptr; - break; - } - } - if (NULL == jfm) { - /* add it */ - jfm = OBJ_NEW(orte_dfs_jobfm_t); - jfm->jobid = dfs->target.jobid; - opal_list_append(&file_maps, &jfm->super); - } - /* see if we already have an entry for this source */ - vfm = NULL; - for (item = opal_list_get_first(&jfm->maps); - item != opal_list_get_end(&jfm->maps); - item = opal_list_get_next(item)) { - vptr = (orte_dfs_vpidfm_t*)item; - if (vptr->vpid == dfs->target.vpid) { - vfm = vptr; - break; - } - } - if (NULL == vfm) { - /* add it */ - vfm = OBJ_NEW(orte_dfs_vpidfm_t); - vfm->vpid = dfs->target.vpid; - opal_list_append(&jfm->maps, &vfm->super); - } - - /* add this entry to our collection */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&vfm->data, &dfs->bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - vfm->num_entries++; - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s target %s now has %d entries", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&dfs->target), - vfm->num_entries); - - cleanup: - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_post_file_map(opal_buffer_t *buffer, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_POST_CMD; - dfs->target.jobid = ORTE_PROC_MY_NAME->jobid; - dfs->target.vpid = ORTE_PROC_MY_NAME->vpid; - dfs->bptr = buffer; - dfs->post_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); -} - -static int get_job_maps(orte_dfs_jobfm_t *jfm, - orte_vpid_t vpid, - opal_buffer_t *buf) -{ - orte_dfs_vpidfm_t *vfm; - opal_list_item_t *item; - int rc; - int entries=0; - - /* if the target vpid is WILDCARD, then process - * data for all vpids - else, find the one - */ - for (item = opal_list_get_first(&jfm->maps); - item != opal_list_get_end(&jfm->maps); - item = opal_list_get_next(item)) { - vfm = (orte_dfs_vpidfm_t*)item; - if (ORTE_VPID_WILDCARD == vpid || - vfm->vpid == vpid) { - entries++; - /* indicate data from this vpid */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &vfm->vpid, 1, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return -1; - } - /* pack the number of posts we received from it */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &vfm->num_entries, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return -1; - } - /* copy the data across */ - opal_dss.copy_payload(buf, &vfm->data); - } - } - return entries; -} - -static void process_getfm(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_jobfm_t *jfm; - opal_list_item_t *item; - opal_buffer_t xfer; - int32_t n, ntotal; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* if the target job is WILDCARD, then process - * data for all jobids - else, find the one - */ - ntotal = 0; - n = -1; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jfm = (orte_dfs_jobfm_t*)item; - if (ORTE_JOBID_WILDCARD == dfs->target.jobid || - jfm->jobid == dfs->target.jobid) { - n = get_job_maps(jfm, dfs->target.vpid, &dfs->bucket); - if (n < 0) { - break; - } - ntotal += n; - } - } - - if (n < 0) { - /* indicates an error */ - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(NULL, dfs->cbdata); - } - } else { - OBJ_CONSTRUCT(&xfer, opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(&xfer, &ntotal, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&xfer); - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(NULL, dfs->cbdata); - } - return; - } - opal_dss.copy_payload(&xfer, &dfs->bucket); - /* pass it back to caller */ - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(&xfer, dfs->cbdata); - } - OBJ_DESTRUCT(&xfer); - } - OBJ_RELEASE(dfs); -} - -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s get file map for %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(target)); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_GETFM_CMD; - dfs->target.jobid = target->jobid; - dfs->target.vpid = target->vpid; - dfs->fm_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); -} - -static void process_load(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_list_item_t *item; - orte_dfs_jobfm_t *jfm, *jptr; - orte_dfs_vpidfm_t *vfm; - orte_vpid_t vpid; - int32_t entries, nvpids; - int cnt, i, j; - int rc; - opal_buffer_t *xfer; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* see if we already have a tracker for this job */ - jfm = NULL; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jptr = (orte_dfs_jobfm_t*)item; - if (jptr->jobid == dfs->target.jobid) { - jfm = jptr; - break; - } - } - if (NULL != jfm) { - /* need to purge it first */ - while (NULL != (item = opal_list_remove_first(&jfm->maps))) { - OBJ_RELEASE(item); - } - } else { - jfm = OBJ_NEW(orte_dfs_jobfm_t); - jfm->jobid = dfs->target.jobid; - opal_list_append(&file_maps, &jfm->super); - } - - /* retrieve the number of vpids in the map */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(dfs->bptr, &nvpids, &cnt, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s loading file maps from %d vpids", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nvpids); - - /* unpack the buffer */ - for (i=0; i < nvpids; i++) { - /* unpack this vpid */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(dfs->bptr, &vpid, &cnt, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* unpack the number of file maps in this entry */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(dfs->bptr, &entries, &cnt, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s loading %d entries in file map for vpid %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - entries, ORTE_VPID_PRINT(vpid)); - /* create the entry */ - vfm = OBJ_NEW(orte_dfs_vpidfm_t); - vfm->vpid = vpid; - vfm->num_entries = entries; - /* copy the data */ - for (j=0; j < entries; j++) { - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(dfs->bptr, &xfer, &cnt, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(&vfm->data, &xfer, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - OBJ_RELEASE(xfer); - } - opal_list_append(&jfm->maps, &vfm->super); - } - - complete: - if (NULL != dfs->load_cbfunc) { - dfs->load_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *buf, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s loading file maps for %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jobid)); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_LOAD_CMD; - dfs->target.jobid = jobid; - dfs->bptr = buf; - dfs->load_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_load, ORTE_SYS_PRI); -} - -static void process_purge(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_list_item_t *item; - orte_dfs_jobfm_t *jfm, *jptr; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* find the job tracker */ - jfm = NULL; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jptr = (orte_dfs_jobfm_t*)item; - if (jptr->jobid == dfs->target.jobid) { - jfm = jptr; - break; - } - } - if (NULL == jfm) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - } else { - /* remove it from the list */ - opal_list_remove_item(&file_maps, &jfm->super); - /* the destructor will release the list of maps - * in the jobfm object - */ - OBJ_RELEASE(jfm); - } - - if (NULL != dfs->purge_cbfunc) { - dfs->purge_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s purging file maps for job %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jobid)); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_PURGE_CMD; - dfs->target.jobid = jobid; - dfs->purge_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_purge, ORTE_SYS_PRI); -} - - -/* receives take place in an event, so we are free to process - * the request list without fear of getting things out-of-order - */ -static void recv_dfs_cmd(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_dfs_cmd_t cmd; - int32_t cnt; - opal_list_item_t *item; - int my_fd; - int32_t rc, nmaps; - char *filename; - orte_dfs_tracker_t *trk; - int64_t i64, bytes_read; - uint8_t *read_buf; - uint64_t rid; - int whence; - struct stat buf; - orte_process_name_t source; - opal_buffer_t *bptr, *xfer; - orte_dfs_request_t *dfs; - orte_dfs_jobfm_t *jfm, *jptr; - orte_dfs_vpidfm_t *vfm, *vptr; - opal_buffer_t *answer, bucket; - int i, j; - orte_vpid_t vpid; - int32_t nentries, ncontributors; - worker_req_t *wrkr; - - /* unpack the command */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s received command %d from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd, - ORTE_NAME_PRINT(sender)); - - switch (cmd) { - case ORTE_DFS_OPEN_CMD: - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the filename */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &filename, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return; - } - /* create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = sender->jobid; - trk->requestor.vpid = sender->vpid; - trk->host_daemon.jobid = ORTE_PROC_MY_NAME->jobid; - trk->host_daemon.vpid = ORTE_PROC_MY_NAME->vpid; - trk->filename = strdup(filename); - opal_list_append(&active_files, &trk->super); - /* process the request */ - if (0 < orte_dfs_orted_num_worker_threads) { - wrkr = OBJ_NEW(worker_req_t); - wrkr->trk = trk; - wrkr->rid = rid; - ORTE_DFS_POST_WORKER(wrkr, remote_open); - return; - } - /* no worker threads, so attempt to open the file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename); - if (0 > (my_fd = open(filename, O_RDONLY))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - goto answer_open; - } - trk->local_fd = my_fd; - answer_open: - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &my_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - break; - - case ORTE_DFS_CLOSE_CMD: - /* unpack our fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &my_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* find the corresponding tracker */ - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - trk = (orte_dfs_tracker_t*)item; - if (my_fd == trk->local_fd) { - /* remove it */ - opal_list_remove_item(&active_files, item); - OBJ_RELEASE(item); - /* close the file */ - close(my_fd); - break; - } - } - break; - - case ORTE_DFS_SIZE_CMD: - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack our fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &my_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* find the corresponding tracker */ - i64 = -1; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - trk = (orte_dfs_tracker_t*)item; - if (my_fd == trk->local_fd) { - /* process the request */ - if (0 < orte_dfs_orted_num_worker_threads) { - wrkr = OBJ_NEW(worker_req_t); - wrkr->trk = trk; - wrkr->rid = rid; - ORTE_DFS_POST_WORKER(wrkr, remote_size); - return; - } - /* no worker threads, so stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - } else { - i64 = buf.st_size; - } - break; - } - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - break; - - case ORTE_DFS_SEEK_CMD: - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack our fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &my_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the offset */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the whence */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &whence, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* set default error */ - bytes_read = -1; - /* find the corresponding tracker - we do this to ensure - * that the local fd we were sent is actually open - */ - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - trk = (orte_dfs_tracker_t*)item; - if (my_fd == trk->local_fd) { - /* process the request */ - if (0 < orte_dfs_orted_num_worker_threads) { - wrkr = OBJ_NEW(worker_req_t); - wrkr->trk = trk; - wrkr->rid = rid; - wrkr->nbytes = i64; - wrkr->whence = whence; - ORTE_DFS_POST_WORKER(wrkr, remote_seek); - return; - } - /* no worker threads, so stat the file and get its size */ - if (0 > stat(trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - } else if (buf.st_size < i64 && SEEK_SET == whence) { - /* seek would take us past EOF */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek SET past EOF on file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - bytes_read = -2; - } else if (buf.st_size < (off_t)(trk->location + i64) && - SEEK_CUR == whence) { - /* seek would take us past EOF */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek CUR past EOF on file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - trk->filename); - bytes_read = -3; - } else { - lseek(my_fd, i64, whence); - if (SEEK_SET == whence) { - trk->location = i64; - } else { - trk->location += i64; - } - bytes_read = i64; - } - break; - } - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* return the offset/status */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &bytes_read, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending %ld offset back to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)bytes_read, - ORTE_NAME_PRINT(sender)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - break; - - case ORTE_DFS_READ_CMD: - /* set default error */ - my_fd = -1; - bytes_read = -1; - read_buf = NULL; - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack our fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &my_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto answer_read; - } - /* unpack the number of bytes to read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto answer_read; - } - /* find the corresponding tracker - we do this to ensure - * that the local fd we were sent is actually open - */ - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - trk = (orte_dfs_tracker_t*)item; - if (my_fd == trk->local_fd) { - if (0 < orte_dfs_orted_num_worker_threads) { - wrkr = OBJ_NEW(worker_req_t); - wrkr->rid = rid; - wrkr->trk = trk; - wrkr->nbytes = i64; - /* dispatch to the currently indexed thread */ - ORTE_DFS_POST_WORKER(wrkr, remote_read); - return; - } else { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s reading %ld bytes from local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)i64, my_fd); - /* do the read */ - read_buf = (uint8_t*)malloc(i64); - if (NULL == read_buf) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - goto answer_read; - } - bytes_read = read(my_fd, read_buf, (long)i64); - if (0 < bytes_read) { - /* update our location */ - trk->location += bytes_read; - } - } - break; - } - } - answer_read: - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - if (NULL != read_buf) { - free(read_buf); - } - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - if (NULL != read_buf) { - free(read_buf); - } - return; - } - /* include the number of bytes read */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &bytes_read, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - if (NULL != read_buf) { - free(read_buf); - } - return; - } - /* include the bytes read */ - if (0 < bytes_read) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, read_buf, bytes_read, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - free(read_buf); - return; - } - } - if (NULL != read_buf) { - free(read_buf); - } - /* send it */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending %ld bytes back to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)bytes_read, - ORTE_NAME_PRINT(sender)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - break; - - case ORTE_DFS_POST_CMD: - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s received post command from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender)); - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto answer_post; - } - /* unpack the name of the source of this data */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &source, &cnt, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto answer_post; - } - /* unpack their buffer object */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto answer_post; - } - /* add the contents to the storage for this process */ - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->target.jobid = source.jobid; - dfs->target.vpid = source.vpid; - dfs->bptr = bptr; - dfs->post_cbfunc = NULL; - process_posts(0, 0, (void*)dfs); - OBJ_RELEASE(bptr); - answer_post: - if (UINT64_MAX != rid) { - /* return an ack */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } - } - break; - - case ORTE_DFS_RELAY_POSTS_CMD: - /* unpack the name of the source of this data */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &source, &cnt, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s received relayed posts from sender %s for source %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender), - ORTE_NAME_PRINT(&source)); - /* lookup the job map */ - jfm = NULL; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jptr = (orte_dfs_jobfm_t*)item; - if (jptr->jobid == source.jobid) { - jfm = jptr; - break; - } - } - if (NULL == jfm) { - /* add it */ - jfm = OBJ_NEW(orte_dfs_jobfm_t); - jfm->jobid = source.jobid; - opal_list_append(&file_maps, &jfm->super); - } - /* see if we already have an entry for this source */ - vfm = NULL; - for (item = opal_list_get_first(&jfm->maps); - item != opal_list_get_end(&jfm->maps); - item = opal_list_get_next(item)) { - vptr = (orte_dfs_vpidfm_t*)item; - if (vptr->vpid == source.vpid) { - vfm = vptr; - break; - } - } - if (NULL == vfm) { - /* add it */ - vfm = OBJ_NEW(orte_dfs_vpidfm_t); - vfm->vpid = source.vpid; - opal_list_append(&jfm->maps, &vfm->super); - } - /* unpack their buffer object */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - return; - } - /* the buffer object came from a call to get_file_maps, so it isn't quite - * the same as when someone posts directly to us. So process it here by - * starting with getting the number of vpids that contributed. This - * should always be one, but leave it open for flexibility - */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(bptr, &ncontributors, &cnt, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return; - } - /* loop thru the number of contributors */ - for (i=0; i < ncontributors; i++) { - /* unpack the vpid of the contributor */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(bptr, &vpid, &cnt, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the number of entries */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(bptr, &nentries, &cnt, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return; - } - for (j=0; j < nentries; j++) { - /* get the entry */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(bptr, &xfer, &cnt, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - return; - } - /* store it */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&vfm->data, &xfer, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - return; - } - OBJ_RELEASE(xfer); - vfm->num_entries++; - } - } - OBJ_RELEASE(bptr); - /* no reply required */ - break; - - case ORTE_DFS_GETFM_CMD: - /* unpack their request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the target */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &source, &cnt, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return; - } - /* construct the response */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our data tree for matches, assembling them - * into a byte object - */ - /* if the target job is WILDCARD, then process - * data for all jobids - else, find the one - */ - OBJ_CONSTRUCT(&bucket, opal_buffer_t); - nmaps = 0; - for (item = opal_list_get_first(&file_maps); - item != opal_list_get_end(&file_maps); - item = opal_list_get_next(item)) { - jfm = (orte_dfs_jobfm_t*)item; - if (ORTE_JOBID_WILDCARD == source.jobid || - jfm->jobid == source.jobid) { - rc = get_job_maps(jfm, source.vpid, &bucket); - if (rc < 0) { - break; - } else { - nmaps += rc; - } - } - } - if (rc < 0) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &rc, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return; - } - } else { - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &nmaps, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return; - } - if (0 < nmaps) { - opal_dss.copy_payload(answer, &bucket); - } - } - OBJ_DESTRUCT(&bucket); - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s getf-cmd: returning %d maps with %d bytes to sender %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nmaps, - (int)answer->bytes_used, ORTE_NAME_PRINT(sender)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } - break; - - default: - opal_output(0, "ORTED:DFS:RECV_DFS WTF"); - break; - } -} - -static void recv_dfs_data(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_dfs_cmd_t cmd; - int32_t cnt; - orte_dfs_request_t *dfs, *dptr; - opal_list_item_t *item; - int remote_fd, rc; - int64_t i64; - uint64_t rid; - orte_dfs_tracker_t *trk; - - /* unpack the command this message is responding to */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data cmd %d from sender %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd, - ORTE_NAME_PRINT(sender)); - - switch (cmd) { - case ORTE_DFS_OPEN_CMD: - /* unpack the request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the remote fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &remote_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* as the request has been fulfilled, remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data open file - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* if the remote_fd < 0, then we had an error, so return - * the error value to the caller - */ - if (remote_fd < 0) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data open file response error file %s [error: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(remote_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->host_daemon.jobid = sender->jobid; - trk->host_daemon.vpid = sender->vpid; - trk->filename = strdup(dfs->uri); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* return the local_fd to the caller for - * subsequent operations - */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data open file completed for file %s [local fd: %d remote fd: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, trk->local_fd, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SIZE_CMD: - /* unpack the request id for this request */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data size - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the size */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass them back to the original caller */ - if (NULL != dfs->read_cbfunc) { - dfs->size_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_READ_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd:data read - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the bytes read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - if (0 < i64) { - cnt = i64; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, dfs->read_buffer, &cnt, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - } - /* pass them back to the original caller */ - if (NULL != dfs->read_cbfunc) { - dfs->read_cbfunc(i64, dfs->read_buffer, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - default: - opal_output(0, "ORTED:DFS:RECV:DATA WTF"); - break; - } -} - -static void* worker_thread_engine(opal_object_t *obj) -{ - opal_thread_t *thread = (opal_thread_t*)obj; - worker_thread_t *ptr = (worker_thread_t*)thread->t_arg; - - while (ptr->active) { - opal_event_loop(ptr->event_base, OPAL_EVLOOP_ONCE); - } - return OPAL_THREAD_CANCELLED; -} - -static void remote_open(int fd, short args, void *cbdata) -{ - worker_req_t *req = (worker_req_t*)cbdata; - opal_buffer_t *answer; - orte_dfs_cmd_t cmd = ORTE_DFS_OPEN_CMD; - int rc; - - /* attempt to open the file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - if (0 > (req->trk->local_fd = open(req->trk->filename, O_RDONLY))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->trk->local_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->trk->requestor, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } -} - -static void remote_size(int fd, short args, void *cbdata) -{ - worker_req_t *req = (worker_req_t*)cbdata; - int rc; - struct stat buf; - int64_t i64; - opal_buffer_t *answer; - orte_dfs_cmd_t cmd = ORTE_DFS_SIZE_CMD; - - if (0 > stat(req->trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - } else { - i64 = buf.st_size; - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->trk->requestor, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } -} - -static void remote_seek(int fd, short args, void *cbdata) -{ - worker_req_t *req = (worker_req_t*)cbdata; - opal_buffer_t *answer; - orte_dfs_cmd_t cmd = ORTE_DFS_SEEK_CMD; - int rc; - struct stat buf; - int64_t i64; - - /* stat the file and get its size */ - if (0 > stat(req->trk->filename, &buf)) { - /* cannot stat file */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - } else if (buf.st_size < req->nbytes && SEEK_SET == req->whence) { - /* seek would take us past EOF */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek SET past EOF on file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - i64 = -2; - } else if (buf.st_size < (off_t)(req->trk->location + req->nbytes) && - SEEK_CUR == req->whence) { - /* seek would take us past EOF */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek CUR past EOF on file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - req->trk->filename); - i64 = -3; - } else { - lseek(req->trk->local_fd, req->nbytes, req->whence); - if (SEEK_SET == req->whence) { - req->trk->location = req->nbytes; - } else { - req->trk->location += req->nbytes; - } - i64 = req->nbytes; - } - - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->trk->requestor, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - } -} - -static void remote_read(int fd, short args, void *cbdata) -{ - worker_req_t *req = (worker_req_t*)cbdata; - uint8_t *read_buf; - opal_buffer_t *answer; - orte_dfs_cmd_t cmd = ORTE_DFS_READ_CMD; - int64_t bytes_read; - int rc; - - /* do the read */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s issuing read", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - read_buf = (uint8_t*)malloc(req->nbytes); - if (NULL == read_buf) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return; - } - bytes_read = read(req->trk->local_fd, read_buf, (long)req->nbytes); - if (0 < bytes_read) { - /* update our location */ - req->trk->location += bytes_read; - } - /* construct the return message */ - answer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - free(read_buf); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &req->rid, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - free(read_buf); - OBJ_RELEASE(answer); - return; - } - /* include the number of bytes read */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, &bytes_read, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - free(read_buf); - OBJ_RELEASE(answer); - return; - } - /* include the bytes read */ - if (0 < bytes_read) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(answer, read_buf, bytes_read, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - free(read_buf); - OBJ_RELEASE(answer); - return; - } - } - free(read_buf); - /* send it */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending %ld bytes back to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (long)bytes_read, - ORTE_NAME_PRINT(&req->trk->requestor)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->trk->requestor, answer, - ORTE_RML_TAG_DFS_DATA, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return; - } - OBJ_RELEASE(req); -} diff --git a/orte/mca/dfs/orted/dfs_orted.h b/orte/mca/dfs/orted/dfs_orted.h deleted file mode 100644 index b2b2f440964..00000000000 --- a/orte/mca/dfs/orted/dfs_orted.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_dfs_orted_EXPORT_H -#define MCA_dfs_orted_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/dfs/dfs.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_dfs_base_component_t mca_dfs_orted_component; - -ORTE_DECLSPEC extern orte_dfs_base_module_t orte_dfs_orted_module; - -extern int orte_dfs_orted_num_worker_threads; - -END_C_DECLS - -#endif /* MCA_dfs_orted_EXPORT_H */ diff --git a/orte/mca/dfs/orted/dfs_orted_component.c b/orte/mca/dfs/orted/dfs_orted_component.c deleted file mode 100644 index f102b898b15..00000000000 --- a/orte/mca/dfs/orted/dfs_orted_component.c +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/dfs/dfs.h" -#include "orte/mca/dfs/base/base.h" -#include "dfs_orted.h" - -/* - * Public string for version number - */ -const char *orte_dfs_orted_component_version_string = - "ORTE DFS orted MCA component version " ORTE_VERSION; - -int orte_dfs_orted_num_worker_threads = 0; - -/* - * Local functionality - */ -static int dfs_orted_register(void); -static int dfs_orted_open(void); -static int dfs_orted_close(void); -static int dfs_orted_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_dfs_base_component_t mca_dfs_orted_component = -{ - /* Handle the general mca_component_t struct containing - * meta information about the component itdefault_orted - */ - .base_version = { - ORTE_DFS_BASE_VERSION_1_0_0, - /* Component name and version */ - .mca_component_name = "orted", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = dfs_orted_open, - .mca_close_component = dfs_orted_close, - .mca_query_component = dfs_orted_component_query, - .mca_register_component_params = dfs_orted_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int dfs_orted_register(void) -{ - orte_dfs_orted_num_worker_threads = 0; - (void) mca_base_component_var_register(&mca_dfs_orted_component.base_version, "num_worker_threads", - "Number of worker threads to use for processing file requests", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, - &orte_dfs_orted_num_worker_threads); - - return ORTE_SUCCESS; -} - -static int dfs_orted_open(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_orted_close(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_orted_component_query(mca_base_module_t **module, int *priority) -{ - if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { - /* we are the default component for daemons and HNP */ - *priority = 1000; - *module = (mca_base_module_t *)&orte_dfs_orted_module; - return ORTE_SUCCESS; - } - - *priority = -1; - *module = NULL; - return ORTE_ERROR; -} - diff --git a/orte/mca/dfs/orted/owner.txt b/orte/mca/dfs/orted/owner.txt deleted file mode 100644 index 4ad6f408ca3..00000000000 --- a/orte/mca/dfs/orted/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/orte/mca/dfs/test/Makefile.am b/orte/mca/dfs/test/Makefile.am deleted file mode 100644 index 1abd1f6dbc8..00000000000 --- a/orte/mca/dfs/test/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - dfs_test.h \ - dfs_test_component.c \ - dfs_test.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_dfs_test_DSO -component_noinst = -component_install = mca_dfs_test.la -else -component_noinst = libmca_dfs_test.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dfs_test_la_SOURCES = $(sources) -mca_dfs_test_la_LDFLAGS = -module -avoid-version -mca_dfs_test_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dfs_test_la_SOURCES =$(sources) -libmca_dfs_test_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/dfs/test/dfs_test.c b/orte/mca/dfs/test/dfs_test.c deleted file mode 100644 index 24392e013dc..00000000000 --- a/orte/mca/dfs/test/dfs_test.c +++ /dev/null @@ -1,1149 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_FCNTL_H -#include -#endif -#include - -#include "opal/util/if.h" -#include "opal/util/output.h" -#include "opal/util/uri.h" -#include "opal/dss/dss.h" -#include "opal/mca/pmix/pmix.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" - -#include "orte/mca/dfs/base/base.h" -#include "dfs_test.h" - -/* - * Module functions: Global - */ -static int init(void); -static int finalize(void); - -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata); -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata); -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata); -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata); -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata); -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata); -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata); -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata); - -/****************** - * TEST module - ******************/ -orte_dfs_base_module_t orte_dfs_test_module = { - init, - finalize, - dfs_open, - dfs_close, - dfs_get_file_size, - dfs_seek, - dfs_read, - dfs_post_file_map, - dfs_get_file_map, - dfs_load_file_maps, - dfs_purge_file_maps -}; - -static opal_list_t requests, active_files; -static int local_fd = 0; -static uint64_t req_id = 0; -static void recv_dfs(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); - -static int init(void) -{ - OBJ_CONSTRUCT(&requests, opal_list_t); - OBJ_CONSTRUCT(&active_files, opal_list_t); - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DFS_DATA, - ORTE_RML_PERSISTENT, - recv_dfs, - NULL); - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DFS_DATA); - while (NULL != (item = opal_list_remove_first(&requests))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&requests); - while (NULL != (item = opal_list_remove_first(&active_files))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&active_files); - return ORTE_SUCCESS; -} - -/* receives take place in an event, so we are free to process - * the request list without fear of getting things out-of-order - */ -static void recv_dfs(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_dfs_cmd_t cmd; - int32_t cnt; - orte_dfs_request_t *dfs, *dptr; - opal_list_item_t *item; - int remote_fd, rc; - int64_t i64; - uint64_t rid; - orte_dfs_tracker_t *trk; - - /* unpack the command this message is responding to */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - return; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd cmd %d from sender %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd, - ORTE_NAME_PRINT(sender)); - - switch (cmd) { - case ORTE_DFS_OPEN_CMD: - /* unpack the request id */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* unpack the remote fd */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &remote_fd, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* as the request has been fulfilled, remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* if the remote_fd < 0, then we had an error, so return - * the error value to the caller - */ - if (remote_fd < 0) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file response error file %s [error: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(remote_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - return; - } - /* otherwise, create a tracker for this file */ - trk = OBJ_NEW(orte_dfs_tracker_t); - trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; - trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; - trk->host_daemon.jobid = sender->jobid; - trk->host_daemon.vpid = sender->vpid; - trk->filename = strdup(dfs->uri); - /* define the local fd */ - trk->local_fd = local_fd++; - /* record the remote file descriptor */ - trk->remote_fd = remote_fd; - /* add it to our list of active files */ - opal_list_append(&active_files, &trk->super); - /* return the local_fd to the caller for - * subsequent operations - */ - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd open file completed for file %s [local fd: %d remote fd: %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - dfs->uri, trk->local_fd, remote_fd); - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(trk->local_fd, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SIZE_CMD: - /* unpack the request id for this request */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd size - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the size */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass it back to the original caller */ - if (NULL != dfs->size_cbfunc) { - dfs->size_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_SEEK_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd seek - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the returned offset/status */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - /* pass it back to the original caller */ - if (NULL != dfs->seek_cbfunc) { - dfs->seek_cbfunc(i64, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_READ_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd read - no corresponding request found for local fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* get the bytes read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - if (0 < i64) { - cnt = i64; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, dfs->read_buffer, &cnt, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dfs); - return; - } - } - /* pass them back to the original caller */ - if (NULL != dfs->read_cbfunc) { - dfs->read_cbfunc(i64, dfs->read_buffer, dfs->cbdata); - } - /* release the request */ - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_POST_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd post - no corresponding request found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); - break; - - case ORTE_DFS_GETFM_CMD: - /* unpack the request id for this read */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - return; - } - /* search our list of requests to find the matching one */ - dfs = NULL; - for (item = opal_list_get_first(&requests); - item != opal_list_get_end(&requests); - item = opal_list_get_next(item)) { - dptr = (orte_dfs_request_t*)item; - if (dptr->id == rid) { - /* request was fulfilled, so remove it */ - opal_list_remove_item(&requests, item); - dfs = dptr; - break; - } - } - if (NULL == dfs) { - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s recvd getfm - no corresponding request found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - /* return it to caller */ - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(buffer, dfs->cbdata); - } - OBJ_RELEASE(dfs); - break; - - default: - opal_output(0, "TEST:DFS:RECV WTF"); - break; - } -} - -static void process_opens(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - int rc; - opal_buffer_t *buffer; - char *scheme, *host=NULL, *filename=NULL; - orte_process_name_t daemon; - opal_list_t lt; - opal_namelist_t *nm; - - ORTE_ACQUIRE_OBJECT(dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s PROCESSING OPEN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* get the scheme to determine if we can process locally or not */ - if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - goto complete; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s GOT SCHEME", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - if (0 != strcmp(scheme, "file")) { - /* not yet supported */ - orte_show_help("orte_dfs_help.txt", "unsupported-filesystem", - true, dfs->uri); - free(scheme); - goto complete; - } - free(scheme); - - /* dissect the uri to extract host and filename/path */ - if (NULL == (filename = opal_filename_from_uri(dfs->uri, &host))) { - goto complete; - } - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s GOT FILENAME %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), filename); - if (NULL == host) { - host = strdup(orte_process_info.nodename); - } - - /* ident the daemon on that host */ - daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - OBJ_CONSTRUCT(<, opal_list_t); - if (ORTE_SUCCESS != (rc = opal_pmix.resolve_peers(host, daemon.jobid, <))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(<); - goto complete; - } - nm = (opal_namelist_t*)opal_list_get_first(<); - daemon.vpid = nm->name.vpid; - OPAL_LIST_DESTRUCT(<); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s file %s on host %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - filename, host, ORTE_NAME_PRINT(&daemon)); - - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &filename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending open file request to %s file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&daemon), - filename); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - goto complete; - } - /* don't release it */ - free(host); - free(filename); - return; - - complete: - /* we get here if an error occurred - execute any - * pending callback so the proc doesn't hang - */ - if (NULL != host) { - free(host); - } - if (NULL != filename) { - free(filename); - } - if (NULL != dfs->open_cbfunc) { - dfs->open_cbfunc(-1, dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - - -/* in order to handle the possible opening/reading of files by - * multiple threads, we have to ensure that all operations are - * carried out in events - so the "open" cmd simply posts an - * event containing the required info, and then returns - */ -static void dfs_open(char *uri, - orte_dfs_open_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s opening file %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uri); - - /* setup the request */ - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_OPEN_CMD; - dfs->uri = strdup(uri); - dfs->open_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); -} - -static void process_close(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *close_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(close_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s closing fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - close_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == close_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); - return; - } - - /* setup a message for the daemon telling - * them what file to close - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &close_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending close file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - - complete: - opal_list_remove_item(&active_files, &trk->super); - OBJ_RELEASE(trk); - if (NULL != close_dfs->close_cbfunc) { - close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); - } - OBJ_RELEASE(close_dfs); -} - -static void dfs_close(int fd, - orte_dfs_close_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s close called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_CLOSE_CMD; - dfs->local_fd = fd; - dfs->close_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); -} - -static void process_sizes(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *size_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(size_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing get_size on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - size_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == size_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(size_dfs); - return; - } - - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - size_dfs->id = req_id++; - opal_list_append(&requests, &size_dfs->super); - - /* setup a message for the daemon telling - * them what file we want to access - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &size_dfs->super); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending get_size request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &size_dfs->super); - if (NULL != size_dfs->size_cbfunc) { - size_dfs->size_cbfunc(-1, size_dfs->cbdata); - } - goto complete; - } - /* leave the request there */ - return; - - complete: - OBJ_RELEASE(size_dfs); -} - -static void dfs_get_file_size(int fd, - orte_dfs_size_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s get_size called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SIZE_CMD; - dfs->local_fd = fd; - dfs->size_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); -} - - -static void process_seeks(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *seek_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - - ORTE_ACQUIRE_OBJECT(seek_dfs); - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s processing seek on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - seek_dfs->local_fd); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == seek_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(seek_dfs); - return; - } - - /* add this request to our local list so we can - * match it with the returned response when it comes - */ - seek_dfs->id = req_id++; - opal_list_append(&requests, &seek_dfs->super); - - /* setup a message for the daemon telling - * them what file to seek - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* pass the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - opal_list_remove_item(&requests, &seek_dfs->super); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)seek_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &seek_dfs->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending seek file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - goto complete; - } - /* leave the request */ - return; - - complete: - OBJ_RELEASE(seek_dfs); -} - - -static void dfs_seek(int fd, long offset, int whence, - orte_dfs_seek_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s seek called on fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fd); - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_SEEK_CMD; - dfs->local_fd = fd; - dfs->read_length = offset; - dfs->remote_fd = whence; - dfs->seek_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); -} - -static void process_reads(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *read_dfs = (orte_dfs_request_t*)cbdata; - orte_dfs_tracker_t *tptr, *trk; - opal_list_item_t *item; - opal_buffer_t *buffer; - int64_t i64; - int rc; - - ORTE_ACQUIRE_OBJECT(read_dfs); - - /* look in our local records for this fd */ - trk = NULL; - for (item = opal_list_get_first(&active_files); - item != opal_list_get_end(&active_files); - item = opal_list_get_next(item)) { - tptr = (orte_dfs_tracker_t*)item; - if (tptr->local_fd == read_dfs->local_fd) { - trk = tptr; - break; - } - } - if (NULL == trk) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(read_dfs); - return; - } - - /* add this request to our pending list */ - read_dfs->id = req_id++; - opal_list_append(&requests, &read_dfs->super); - - /* setup a message for the daemon telling - * them what file to read - */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - i64 = (int64_t)read_dfs->read_length; - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - - opal_output_verbose(1, orte_dfs_base_framework.framework_output, - "%s sending read file request to %s for fd %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&trk->host_daemon), - trk->local_fd); - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &trk->host_daemon, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - } - /* don't release the request */ - return; - - complete: - /* don't need to hang on to this request */ - opal_list_remove_item(&requests, &read_dfs->super); - OBJ_RELEASE(read_dfs); -} - -static void dfs_read(int fd, uint8_t *buffer, - long length, - orte_dfs_read_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_READ_CMD; - dfs->local_fd = fd; - dfs->read_buffer = buffer; - dfs->read_length = length; - dfs->read_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); -} - -static void process_posts(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* we will get confirmation in our receive function, so - * add this request to our list */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* Send the buffer's contents to our local daemon for storage */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* add my name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* pack the payload */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_DAEMON, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - goto error; - } - return; - - error: - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - if (NULL != dfs->post_cbfunc) { - dfs->post_cbfunc(dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_post_file_map(opal_buffer_t *bo, - orte_dfs_post_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_POST_CMD; - dfs->bptr = bo; - dfs->post_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); -} - -static void process_getfm(int fd, short args, void *cbdata) -{ - orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; - opal_buffer_t *buffer; - int rc; - - ORTE_ACQUIRE_OBJECT(dfs); - - /* we will get confirmation in our receive function, so - * add this request to our list */ - dfs->id = req_id++; - opal_list_append(&requests, &dfs->super); - - /* Send the request to our local daemon */ - buffer = OBJ_NEW(opal_buffer_t); - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* include the request id */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* and the target */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->target, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto error; - } - /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_DAEMON, buffer, - ORTE_RML_TAG_DFS_CMD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - goto error; - } - return; - - error: - OBJ_RELEASE(buffer); - opal_list_remove_item(&requests, &dfs->super); - if (NULL != dfs->fm_cbfunc) { - dfs->fm_cbfunc(NULL, dfs->cbdata); - } - OBJ_RELEASE(dfs); -} - -static void dfs_get_file_map(orte_process_name_t *target, - orte_dfs_fm_callback_fn_t cbfunc, - void *cbdata) -{ - orte_dfs_request_t *dfs; - - dfs = OBJ_NEW(orte_dfs_request_t); - dfs->cmd = ORTE_DFS_GETFM_CMD; - dfs->target.jobid = target->jobid; - dfs->target.vpid = target->vpid; - dfs->fm_cbfunc = cbfunc; - dfs->cbdata = cbdata; - - /* post it for processing */ - ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); -} - -static void dfs_load_file_maps(orte_jobid_t jobid, - opal_buffer_t *bo, - orte_dfs_load_callback_fn_t cbfunc, - void *cbdata) -{ - /* apps don't store file maps */ - if (NULL != cbfunc) { - cbfunc(cbdata); - } -} - -static void dfs_purge_file_maps(orte_jobid_t jobid, - orte_dfs_purge_callback_fn_t cbfunc, - void *cbdata) -{ - /* apps don't store file maps */ - if (NULL != cbfunc) { - cbfunc(cbdata); - } -} diff --git a/orte/mca/dfs/test/dfs_test.h b/orte/mca/dfs/test/dfs_test.h deleted file mode 100644 index d9ef7b301bb..00000000000 --- a/orte/mca/dfs/test/dfs_test.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_dfs_test_EXPORT_H -#define MCA_dfs_test_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/dfs/dfs.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_dfs_base_component_t mca_dfs_test_component; - -ORTE_DECLSPEC extern orte_dfs_base_module_t orte_dfs_test_module; - -END_C_DECLS - -#endif /* MCA_dfs_test_EXPORT_H */ diff --git a/orte/mca/dfs/test/dfs_test_component.c b/orte/mca/dfs/test/dfs_test_component.c deleted file mode 100644 index 11ec09ced4e..00000000000 --- a/orte/mca/dfs/test/dfs_test_component.c +++ /dev/null @@ -1,100 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/dfs/dfs.h" -#include "orte/mca/dfs/base/base.h" -#include "dfs_test.h" - -/* - * Public string for version number - */ -const char *orte_dfs_test_component_version_string = - "ORTE DFS test MCA component version " ORTE_VERSION; - -/* - * Local functionality - */ -static int dfs_test_register(void); -static int dfs_test_open(void); -static int dfs_test_close(void); -static int dfs_test_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_dfs_base_component_t mca_dfs_test_component = -{ - /* Handle the general mca_component_t struct containing - * meta information about the component - */ - .base_version = { - ORTE_DFS_BASE_VERSION_1_0_0, - /* Component name and version */ - .mca_component_name = "test", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = dfs_test_open, - .mca_close_component = dfs_test_close, - .mca_query_component = dfs_test_component_query, - .mca_register_component_params = dfs_test_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static bool select_me = false; - -static int dfs_test_register(void) -{ - select_me = false; - (void) mca_base_component_var_register(&mca_dfs_test_component.base_version, "select", - "Apps select the test plug-in for the DFS framework", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, &select_me); - - return ORTE_SUCCESS; -} - -static int dfs_test_open(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_test_close(void) -{ - return ORTE_SUCCESS; -} - -static int dfs_test_component_query(mca_base_module_t **module, int *priority) -{ - if (ORTE_PROC_IS_APP && select_me) { - /* set our priority high so apps use us */ - *priority = 10000; - *module = (mca_base_module_t *)&orte_dfs_test_module; - return ORTE_SUCCESS; - } - - *priority = -1; - *module = NULL; - return ORTE_ERROR; -} diff --git a/orte/mca/errmgr/base/help-errmgr-base.txt b/orte/mca/errmgr/base/help-errmgr-base.txt index 07a9f71909f..30ff0f882f1 100644 --- a/orte/mca/errmgr/base/help-errmgr-base.txt +++ b/orte/mca/errmgr/base/help-errmgr-base.txt @@ -12,6 +12,8 @@ # All rights reserved. # Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -105,3 +107,6 @@ An internal error has occurred in ORTE: %s This is something that should be reported to the developers. +# +[normal-termination-but] +%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted. diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index 9c807dd54b4..05a2a83713e 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -11,6 +11,8 @@ * All rights reserved. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -839,14 +841,11 @@ static void default_hnp_abort(orte_job_t *jdata) i32ptr = &i32; if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) { /* warn user */ - opal_output(orte_clean_output, - "-------------------------------------------------------\n" - "%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.\n" - "-------------------------------------------------------", - (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child", - (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), - i32, (1 == i32) ? "process returned\na non-zero exit code" : - "processes returned\nnon-zero exit codes"); + orte_show_help("help-errmgr-base.txt", "normal-termination-but", true, + (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child", + (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), + i32, (1 == i32) ? "process returned\na non-zero exit code" : + "processes returned\nnon-zero exit codes"); } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, diff --git a/orte/mca/errmgr/dvm/Makefile.am b/orte/mca/errmgr/dvm/Makefile.am deleted file mode 100644 index 43fbe76550d..00000000000 --- a/orte/mca/errmgr/dvm/Makefile.am +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - errmgr_dvm.h \ - errmgr_dvm_component.c \ - errmgr_dvm.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_errmgr_dvm_DSO -component_noinst = -component_install = mca_errmgr_dvm.la -else -component_noinst = libmca_errmgr_dvm.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_errmgr_dvm_la_SOURCES = $(sources) -mca_errmgr_dvm_la_LDFLAGS = -module -avoid-version -mca_errmgr_dvm_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_errmgr_dvm_la_SOURCES =$(sources) -libmca_errmgr_dvm_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/errmgr/dvm/errmgr_dvm.c b/orte/mca/errmgr/dvm/errmgr_dvm.c deleted file mode 100644 index f259da2321c..00000000000 --- a/orte/mca/errmgr/dvm/errmgr_dvm.c +++ /dev/null @@ -1,632 +0,0 @@ -/* - * Copyright (c) 2009-2011 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2017 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_SYS_WAIT_H -#include -#endif - -#include "opal/util/output.h" -#include "opal/dss/dss.h" - -#include "orte/mca/iof/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/odls/odls.h" -#include "orte/mca/odls/base/base.h" -#include "orte/mca/odls/base/odls_private.h" -#include "orte/mca/plm/base/plm_private.h" -#include "orte/mca/plm/plm.h" -#include "orte/mca/rmaps/rmaps_types.h" -#include "orte/mca/routed/routed.h" -#include "orte/mca/grpcomm/grpcomm.h" -#include "orte/mca/ess/ess.h" -#include "orte/mca/state/state.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_locks.h" -#include "orte/runtime/orte_quit.h" -#include "orte/runtime/data_type_support/orte_dt_support.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -#include "errmgr_dvm.h" - -static int init(void); -static int finalize(void); - -/****************** - * dvm module - ******************/ -orte_errmgr_base_module_t orte_errmgr_dvm_module = { - .init = init, - .finalize = finalize, - .logfn = orte_errmgr_base_log, - .abort = orte_errmgr_base_abort, - .abort_peers = orte_errmgr_base_abort_peers -}; - - -/* - * Local functions - */ -static void job_errors(int fd, short args, void *cbdata); -static void proc_errors(int fd, short args, void *cbdata); - -static int init(void) -{ - /* setup state machine to trap job errors */ - orte_state.add_job_state(ORTE_JOB_STATE_ERROR, job_errors, ORTE_ERROR_PRI); - - /* set the lost connection state to run at MSG priority so - * we can process any last messages from the proc - */ - orte_state.add_proc_state(ORTE_PROC_STATE_COMM_FAILED, proc_errors, ORTE_MSG_PRI); - - /* setup state machine to trap proc errors */ - orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); - - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - return ORTE_SUCCESS; -} - -static void _terminate_job(orte_jobid_t jobid) -{ - opal_pointer_array_t procs; - orte_proc_t pobj; - - OBJ_CONSTRUCT(&procs, opal_pointer_array_t); - opal_pointer_array_init(&procs, 1, 1, 1); - OBJ_CONSTRUCT(&pobj, orte_proc_t); - pobj.name.jobid = jobid; - pobj.name.vpid = ORTE_VPID_WILDCARD; - opal_pointer_array_add(&procs, &pobj); - orte_plm.terminate_procs(&procs); - OBJ_DESTRUCT(&procs); - OBJ_DESTRUCT(&pobj); -} - -static void job_errors(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata; - orte_job_state_t jobstate; - opal_buffer_t *answer; - int32_t rc, ret; - int room, *rmptr; - - ORTE_ACQUIRE_OBJECT(caddy); - - /* - * if orte is trying to shutdown, just let it - */ - if (orte_finalizing) { - return; - } - - /* if the jdata is NULL, then we ignore it as this - * is reporting an unrecoverable error - */ - if (NULL == caddy->jdata) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - OBJ_RELEASE(caddy); - return; - } - - /* update the state */ - jdata = caddy->jdata; - jobstate = caddy->job_state; - jdata->state = jobstate; - - OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: job %s reported state %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jdata->jobid), - orte_job_state_to_str(jobstate))); - - if (jdata->jobid == ORTE_PROC_MY_NAME->jobid) { - /* if the daemon job aborted and we haven't heard from everyone yet, - * then this could well have been caused by a daemon not finding - * a way back to us. In this case, output a message indicating a daemon - * died without reporting. Otherwise, say nothing as we - * likely already output an error message */ - if (ORTE_JOB_STATE_ABORTED == jobstate && - jdata->num_procs != jdata->num_reported) { - orte_routing_is_enabled = false; - orte_show_help("help-errmgr-base.txt", "failed-daemon", true); - } - /* there really isn't much else we can do since the problem - * is in the DVM itself, so best just to terminate */ - jdata->num_terminated = jdata->num_procs; - /* activate the terminated state so we can exit */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - OBJ_RELEASE(caddy); - return; - } - - /* all other cases involve jobs submitted to the DVM - therefore, - * we only inform the submitter of the problem, but do NOT terminate - * the DVM itself */ - - rc = jobstate; - answer = OBJ_NEW(opal_buffer_t); - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &rc, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(caddy); - return; - } - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &jdata->jobid, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(caddy); - return; - } - /* pack the room number */ - rmptr = &room; - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&rmptr, OPAL_INT)) { - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &room, 1, OPAL_INT))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(caddy); - return; - } - } - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm sending notification of job %s failure to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jdata->jobid), - ORTE_NAME_PRINT(&jdata->originator))); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &jdata->originator, answer, - ORTE_RML_TAG_LAUNCH_RESP, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - } - /* ensure we terminate any processes left running in the DVM */ - _terminate_job(jdata->jobid); - - /* cleanup */ - OBJ_RELEASE(caddy); -} - -static void proc_errors(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata; - orte_proc_t *pptr, *proct; - orte_process_name_t *proc = &caddy->name; - orte_proc_state_t state = caddy->proc_state; - int i; - int32_t i32, *i32ptr; - char *rtmod; - - ORTE_ACQUIRE_OBJECT(caddy); - - OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: for proc %s state %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - orte_proc_state_to_str(state))); - - /* - * if orte is trying to shutdown, just let it - */ - if (orte_finalizing) { - goto cleanup; - } - - /* get the job object */ - if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) { - /* could be a race condition */ - goto cleanup; - } - pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid); - - /* get the management conduit's routed module */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - - /* we MUST handle a communication failure before doing anything else - * as it requires some special care to avoid normal termination issues - * for local application procs - */ - if (ORTE_PROC_STATE_COMM_FAILED == state) { - /* is this to a daemon? */ - if (ORTE_PROC_MY_NAME->jobid != proc->jobid) { - /* nope - ignore it */ - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure to non-daemon proc - ignoring it", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - goto cleanup; - } - /* if this is my own connection, ignore it */ - if (ORTE_PROC_MY_NAME->vpid == proc->vpid) { - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure on my own connection - ignoring it", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - goto cleanup; - } - /* mark the daemon as gone */ - ORTE_FLAG_UNSET(pptr, ORTE_PROC_FLAG_ALIVE); - /* update the state */ - pptr->state = state; - /* adjust our num_procs */ - --orte_process_info.num_procs; - /* if we have ordered orteds to terminate or abort - * is in progress, record it */ - if (orte_orteds_term_ordered || orte_abnormal_term_ordered) { - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure: daemons terminating - recording daemon %s as gone", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); - /* remove from dependent routes, if it is one */ - orte_routed.route_lost(rtmod, proc); - /* if all my routes and local children are gone, then terminate ourselves */ - if (0 == orte_routed.num_routes(rtmod)) { - for (i=0; i < orte_local_children->size; i++) { - if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && - ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_ALIVE) && proct->state < ORTE_PROC_STATE_UNTERMINATED) { - /* at least one is still alive */ - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure: at least one proc (%s) still alive", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proct->name))); - goto cleanup; - } - } - /* call our appropriate exit procedure */ - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr_dvm: all routes and children gone - ordering exit", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); - } else { - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure: %d routes remain alive", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)orte_routed.num_routes(rtmod))); - } - goto cleanup; - } - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s Comm failure: daemon %s - aborting", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); - /* record the first one to fail */ - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - /* output an error message so the user knows what happened */ - orte_show_help("help-errmgr-base.txt", "node-died", true, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_process_info.nodename, - ORTE_NAME_PRINT(proc), - pptr->node->name); - /* mark the daemon job as failed */ - jdata->state = ORTE_JOB_STATE_COMM_FAILED; - /* point to the lowest rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - /* update our exit code */ - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* just in case the exit code hadn't been set, do it here - this - * won't override any reported exit code */ - ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_COMM_FAILURE); - } - goto cleanup; - } - - /* update the proc state - can get multiple reports on a proc - * depending on circumstances, so ensure we only do this once - */ - if (pptr->state < ORTE_PROC_STATE_TERMINATED) { - pptr->state = state; - } - - /* if we were ordered to terminate, mark this proc as dead and see if - * any of our routes or local children remain alive - if not, then - * terminate ourselves. */ - if (orte_orteds_term_ordered) { - for (i=0; i < orte_local_children->size; i++) { - if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - if (ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) { - goto keep_going; - } - } - } - /* if all my routes and children are gone, then terminate - ourselves nicely (i.e., this is a normal termination) */ - if (0 == orte_routed.num_routes(rtmod)) { - OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, - "%s errmgr:default:dvm all routes gone - exiting", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); - } - } - - keep_going: - /* ensure we record the failed proc properly so we can report - * the error once we terminate - */ - switch (state) { - case ORTE_PROC_STATE_KILLED_BY_CMD: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s killed by cmd", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - /* we ordered this proc to die, so it isn't an abnormal termination - * and we don't flag it as such - */ - if (jdata->num_terminated >= jdata->num_procs) { - /* this job has terminated */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - } - /* don't abort the job as this isn't an abnormal termination */ - break; - - case ORTE_PROC_STATE_ABORTED: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s aborted", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_ABORTED; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* kill the job */ - _terminate_job(jdata->jobid); - } - break; - - case ORTE_PROC_STATE_ABORTED_BY_SIG: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s aborted by signal", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_ABORTED_BY_SIG; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* kill the job */ - _terminate_job(jdata->jobid); - } - break; - - case ORTE_PROC_STATE_TERM_WO_SYNC: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s terminated without sync", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_ABORTED_WO_SYNC; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* now treat a special case - if the proc exit'd without a required - * sync, it may have done so with a zero exit code. We want to ensure - * that the user realizes there was an error, so in this -one- case, - * we overwrite the process' exit code with the default error code - */ - ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - /* kill the job */ - _terminate_job(jdata->jobid); - } - break; - - case ORTE_PROC_STATE_FAILED_TO_START: - case ORTE_PROC_STATE_FAILED_TO_LAUNCH: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - orte_proc_state_to_str(state))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - opal_buffer_t *answer; - int id, *idptr, ret; - - if (ORTE_PROC_STATE_FAILED_TO_START) { - jdata->state = ORTE_JOB_STATE_FAILED_TO_START; - } else { - jdata->state = ORTE_JOB_STATE_FAILED_TO_LAUNCH; - } - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - /* send a notification to the requestor - indicate that this is a spawn response */ - answer = OBJ_NEW(opal_buffer_t); - /* pack the return status */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &pptr->exit_code, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - goto CLEANUP; - } - /* pack the jobid to be returned */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &jdata->jobid, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - goto CLEANUP; - } - idptr = &id; - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&idptr, OPAL_INT)) { - /* pack the sender's index to the tracking object */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, idptr, 1, OPAL_INT))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - goto CLEANUP; - } - } - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, NULL, OPAL_BOOL)) { - /* we need to send the requestor more info about what happened */ - opal_dss.pack(answer, &jdata->state, 1, ORTE_JOB_STATE_T); - opal_dss.pack(answer, &pptr, 1, ORTE_PROC); - opal_dss.pack(answer, &pptr->node, 1, ORTE_NODE); - } - /* return response */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &jdata->originator, answer, - ORTE_RML_TAG_LAUNCH_RESP, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); - } - /* record that we notified about this job */ - jdata->state = ORTE_JOB_STATE_NOTIFIED; - CLEANUP: - /* kill the job */ - _terminate_job(jdata->jobid); - } - /* if this was a daemon, report it */ - if (jdata->jobid == ORTE_PROC_MY_NAME->jobid) { - /* output a message indicating we failed to launch a daemon */ - orte_show_help("help-errmgr-base.txt", "failed-daemon-launch", true); - } - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - break; - - case ORTE_PROC_STATE_CALLED_ABORT: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s called abort with exit code %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), pptr->exit_code)); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_CALLED_ABORT; - /* point to the first proc to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* kill the job */ - _terminate_job(jdata->jobid); - } - break; - - case ORTE_PROC_STATE_TERM_NON_ZERO: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s exited with non-zero status %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - pptr->exit_code)); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* track the number of non-zero exits */ - i32 = 0; - i32ptr = &i32; - orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32); - ++i32; - orte_set_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, ORTE_ATTR_LOCAL, i32ptr, OPAL_INT32); - if (orte_abort_non_zero_exit) { - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_NON_ZERO_TERM; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - /* kill the job */ - _terminate_job(jdata->jobid); - } - } else { - /* user requested we consider this normal termination */ - if (jdata->num_terminated >= jdata->num_procs) { - /* this job has terminated */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - } - } - break; - - case ORTE_PROC_STATE_HEARTBEAT_FAILED: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s heartbeat failed", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - jdata->state = ORTE_JOB_STATE_HEARTBEAT_FAILED; - /* point to the first rank to cause the problem */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, ORTE_ATTR_LOCAL, pptr, OPAL_PTR); - /* retain the object so it doesn't get free'd */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* kill the job */ - _terminate_job(jdata->jobid); - } - /* remove from dependent routes, if it is one */ - orte_routed.route_lost(rtmod, proc); - break; - - case ORTE_PROC_STATE_UNABLE_TO_SEND_MSG: - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: unable to send message to proc %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - /* if this proc is one of my daemons, then we are truly - * hosed - so just exit out - */ - if (ORTE_PROC_MY_NAME->jobid == proc->jobid) { - ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); - break; - } - break; - - default: - /* shouldn't get this, but terminate job if required */ - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:dvm: proc %s default error %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - orte_proc_state_to_str(state))); - if (jdata->num_terminated == jdata->num_procs) { - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); - } - break; - } - /* if the waitpid fired, be sure to let the state machine know */ - if (ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_WAITPID)) { - ORTE_ACTIVATE_PROC_STATE(&pptr->name, ORTE_PROC_STATE_WAITPID_FIRED); - } - - cleanup: - OBJ_RELEASE(caddy); -} diff --git a/orte/mca/errmgr/dvm/errmgr_dvm.h b/orte/mca/errmgr/dvm/errmgr_dvm.h deleted file mode 100644 index 291394d9a56..00000000000 --- a/orte/mca/errmgr/dvm/errmgr_dvm.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_ERRMGR_dvm_EXPORT_H -#define MCA_ERRMGR_dvm_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/errmgr/errmgr.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_errmgr_base_component_t mca_errmgr_dvm_component; - -ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr_dvm_module; - -END_C_DECLS - -#endif /* MCA_ERRMGR_dvm_EXPORT_H */ diff --git a/orte/mca/errmgr/dvm/errmgr_dvm_component.c b/orte/mca/errmgr/dvm/errmgr_dvm_component.c deleted file mode 100644 index 879062893bf..00000000000 --- a/orte/mca/errmgr/dvm/errmgr_dvm_component.c +++ /dev/null @@ -1,102 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/errmgr/base/errmgr_private.h" -#include "errmgr_dvm.h" - -/* - * Public string for version number - */ -const char *orte_errmgr_dvm_component_version_string = - "ORTE ERRMGR dvm MCA component version " ORTE_VERSION; - -/* - * Local functionality - */ -static int dvm_register(void); -static int dvm_open(void); -static int dvm_close(void); -static int dvm_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_errmgr_base_component_t mca_errmgr_dvm_component = { - /* Handle the general mca_component_t struct containing - * meta information about the component dvm - */ - .base_version = { - ORTE_ERRMGR_BASE_VERSION_3_0_0, - /* Component name and version */ - .mca_component_name = "dvm", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = dvm_open, - .mca_close_component = dvm_close, - .mca_query_component = dvm_component_query, - .mca_register_component_params = dvm_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int my_priority; - -static int dvm_register(void) -{ - mca_base_component_t *c = &mca_errmgr_dvm_component.base_version; - - my_priority = 1000; - (void) mca_base_component_var_register(c, "priority", - "Priority of the dvm errmgr component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &my_priority); - - return ORTE_SUCCESS; -} - -static int dvm_open(void) -{ - return ORTE_SUCCESS; -} - -static int dvm_close(void) -{ - return ORTE_SUCCESS; -} - -static int dvm_component_query(mca_base_module_t **module, int *priority) -{ - /* used by DVM masters */ - if (ORTE_PROC_IS_MASTER) { - *priority = my_priority; - *module = (mca_base_module_t *)&orte_errmgr_dvm_module; - return ORTE_SUCCESS; - } - - *module = NULL; - *priority = -1; - return ORTE_ERROR; -} diff --git a/orte/mca/ess/Makefile.am b/orte/mca/ess/Makefile.am index 21354432311..3edc397d321 100644 --- a/orte/mca/ess/Makefile.am +++ b/orte/mca/ess/Makefile.am @@ -19,6 +19,10 @@ AM_CPPFLAGS = $(LTDLINCL) +# Add unwind flags because files in this tree are +# involved in startup. +AM_CFLAGS = $(MPIR_UNWIND_CFLAGS) + # main library setup noinst_LTLIBRARIES = libmca_ess.la libmca_ess_la_SOURCES = diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c index 533a054e8b8..bfbbc3d9f62 100644 --- a/orte/mca/ess/alps/ess_alps_module.c +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -45,7 +45,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_alps_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL /* ft_event */ }; diff --git a/orte/mca/ess/base/Makefile.am b/orte/mca/ess/base/Makefile.am index db1903699cf..70528b9d8ab 100644 --- a/orte/mca/ess/base/Makefile.am +++ b/orte/mca/ess/base/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -28,7 +28,6 @@ libmca_ess_la_SOURCES += \ base/ess_base_select.c \ base/ess_base_get.c \ base/ess_base_std_tool.c \ - base/ess_base_std_app.c \ base/ess_base_std_orted.c \ base/ess_base_std_prolog.c \ base/ess_base_fns.c diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h index 679bac4b199..139a6cff469 100644 --- a/orte/mca/ess/base/base.h +++ b/orte/mca/ess/base/base.h @@ -12,7 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -61,10 +61,6 @@ ORTE_DECLSPEC int orte_ess_env_get(void); ORTE_DECLSPEC int orte_ess_base_std_prolog(void); -ORTE_DECLSPEC int orte_ess_base_app_setup(bool db_restrict_local); -ORTE_DECLSPEC int orte_ess_base_app_finalize(void); -ORTE_DECLSPEC void orte_ess_base_app_abort(int status, bool report); - ORTE_DECLSPEC int orte_ess_base_tool_setup(opal_list_t *flags); ORTE_DECLSPEC int orte_ess_base_tool_finalize(void); diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c deleted file mode 100644 index a02711f5f43..00000000000 --- a/orte/mca/ess/base/ess_base_std_app.c +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2018 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#include -#ifdef HAVE_FCNTL_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_STAT_H -#include -#endif - -#include "opal/mca/event/event.h" -#include "opal/mca/pmix/base/base.h" -#include "opal/util/arch.h" -#include "opal/util/os_path.h" -#include "opal/util/output.h" -#include "opal/util/proc.h" -#include "opal/runtime/opal.h" - -#include "orte/mca/rml/base/base.h" -#include "orte/mca/routed/base/base.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/dfs/base/base.h" -#include "orte/mca/grpcomm/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/odls/odls_types.h" -#include "orte/mca/filem/base/base.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/state/base/base.h" -#include "orte/util/proc_info.h" -#include "orte/util/session_dir.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "opal/util/timings.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" - -#include "orte/mca/ess/base/base.h" - -int orte_ess_base_app_setup(bool db_restrict_local) -{ - int ret; - char *error = NULL; - opal_list_t transports; - - OPAL_TIMING_ENV_INIT(ess_base_setup); - /* - * stdout/stderr buffering - * If the user requested to override the default setting then do - * as they wish. - */ - if( orte_ess_base_std_buffering > -1 ) { - if( 0 == orte_ess_base_std_buffering ) { - setvbuf(stdout, NULL, _IONBF, 0); - setvbuf(stderr, NULL, _IONBF, 0); - } - else if( 1 == orte_ess_base_std_buffering ) { - setvbuf(stdout, NULL, _IOLBF, 0); - setvbuf(stderr, NULL, _IOLBF, 0); - } - else if( 2 == orte_ess_base_std_buffering ) { - setvbuf(stdout, NULL, _IOFBF, 0); - setvbuf(stderr, NULL, _IOFBF, 0); - } - } - - /* if I am an MPI app, we will let the MPI layer define and - * control the opal_proc_t structure. Otherwise, we need to - * do so here */ - if (ORTE_PROC_NON_MPI) { - orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME; - orte_process_info.super.proc_hostname = orte_process_info.nodename; - orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL; - orte_process_info.super.proc_arch = opal_local_arch; - opal_proc_local_set(&orte_process_info.super); - } - - /* open and setup the state machine */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_state_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_state_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_state_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "state_framework_open"); - - /* open the errmgr */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_errmgr_base_open"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_framework_open"); - - /* setup my session directory */ - if (orte_create_session_dirs) { - OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output, - "%s setting up session dir with\n\ttmpdir: %s\n\thost %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, - orte_process_info.nodename)); - if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) { - ORTE_ERROR_LOG(ret); - error = "orte_session_dir"; - goto error; - } - /* Once the session directory location has been established, set - the opal_output env file location to be in the - proc-specific session directory. */ - opal_output_set_output_file_info(orte_process_info.proc_session_dir, - "output-", NULL, NULL); - /* register the directory for cleanup */ - if (NULL != opal_pmix.register_cleanup) { - if (orte_standalone_operation) { - if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.top_session_dir, true, false, true))) { - ORTE_ERROR_LOG(ret); - error = "register cleanup"; - goto error; - } - } else { - if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.job_session_dir, true, false, false))) { - ORTE_ERROR_LOG(ret); - error = "register cleanup"; - goto error; - } - } - } - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "create_session_dirs"); - - /* Setup the communication infrastructure */ - /* Routed system */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_routed_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_routed_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_routed_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "routed_framework_open"); - - /* - * OOB Layer - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_oob_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_oob_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_oob_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "oob_framework_open"); - - /* Runtime Messaging Layer */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_rml_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_rml_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_rml_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_framework_open"); - - /* if we have info on the HNP and local daemon, process it */ - if (NULL != orte_process_info.my_hnp_uri) { - /* we have to set the HNP's name, even though we won't route messages directly - * to it. This is required to ensure that we -do- send messages to the correct - * HNP name - */ - if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, - ORTE_PROC_MY_HNP, NULL))) { - ORTE_ERROR_LOG(ret); - error = "orte_rml_parse_HNP"; - goto error; - } - } - if (NULL != orte_process_info.my_daemon_uri) { - opal_value_t val; - - /* extract the daemon's name so we can update the routing table */ - if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri, - ORTE_PROC_MY_DAEMON, NULL))) { - ORTE_ERROR_LOG(ret); - error = "orte_rml_parse_daemon"; - goto error; - } - /* Set the contact info in the database - this won't actually establish - * the connection, but just tells us how to reach the daemon - * if/when we attempt to send to it - */ - OBJ_CONSTRUCT(&val, opal_value_t); - val.key = OPAL_PMIX_PROC_URI; - val.type = OPAL_STRING; - val.data.string = orte_process_info.my_daemon_uri; - if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_DAEMON, &val))) { - ORTE_ERROR_LOG(ret); - val.key = NULL; - val.data.string = NULL; - OBJ_DESTRUCT(&val); - error = "store DAEMON URI"; - goto error; - } - val.key = NULL; - val.data.string = NULL; - OBJ_DESTRUCT(&val); - } - - /* setup the errmgr */ - if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_errmgr_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_select"); - - /* get a conduit for our use - we never route IO over fabric */ - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_mgmt_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_coll_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_open_conduit"); - - /* - * Group communications - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_grpcomm_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_grpcomm_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "grpcomm_framework_open"); - - /* open the distributed file system */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_base_select"; - goto error; - } - OPAL_TIMING_ENV_NEXT(ess_base_setup, "dfs_framework_open"); - - return ORTE_SUCCESS; - error: - orte_show_help("help-orte-runtime.txt", - "orte_init:startup:internal-failure", - true, error, ORTE_ERROR_NAME(ret), ret); - return ret; -} - -int orte_ess_base_app_finalize(void) -{ - /* release the conduits */ - orte_rml.close_conduit(orte_mgmt_conduit); - orte_rml.close_conduit(orte_coll_conduit); - - /* close frameworks */ - (void) mca_base_framework_close(&orte_filem_base_framework); - (void) mca_base_framework_close(&orte_errmgr_base_framework); - - /* now can close the rml and its friendly group comm */ - (void) mca_base_framework_close(&orte_grpcomm_base_framework); - (void) mca_base_framework_close(&orte_dfs_base_framework); - (void) mca_base_framework_close(&orte_routed_base_framework); - - (void) mca_base_framework_close(&orte_rml_base_framework); - if (NULL != opal_pmix.finalize) { - opal_pmix.finalize(); - (void) mca_base_framework_close(&opal_pmix_base_framework); - } - (void) mca_base_framework_close(&orte_oob_base_framework); - (void) mca_base_framework_close(&orte_state_base_framework); - - if (NULL == opal_pmix.register_cleanup) { - orte_session_dir_finalize(ORTE_PROC_MY_NAME); - } - /* cleanup the process info */ - orte_proc_info_finalize(); - - return ORTE_SUCCESS; -} - -/* - * We do NOT call the regular C-library "abort" function, even - * though that would have alerted us to the fact that this is - * an abnormal termination, because it would automatically cause - * a core file to be generated. On large systems, that can be - * overwhelming (imagine a few thousand Gbyte-sized files hitting - * a shared file system simultaneously...ouch!). - * - * However, this causes a problem for OpenRTE as the system truly - * needs to know that this actually IS an abnormal termination. - * To get around the problem, we drop a marker in the proc-level - * session dir. If session dir's were not allowed, then we just - * ignore this question. - * - * In some cases, however, we DON'T want to create that alert. For - * example, if an orted detects that the HNP has died, then there - * is truly nobody to alert! In these cases, we pass report=false - * to indicate that we don't want the marker dropped. - */ -void orte_ess_base_app_abort(int status, bool report) -{ - int fd; - char *myfile; - struct timespec tp = {0, 100000}; - - /* Exit - do NOT do a normal finalize as this will very likely - * hang the process. We are aborting due to an abnormal condition - * that precludes normal cleanup - * - * We do need to do the following bits to make sure we leave a - * clean environment. Taken from orte_finalize(): - * - Assume errmgr cleans up child processes before we exit. - */ - - /* If we were asked to report this termination, do so. - * Since singletons don't start an HNP unless necessary, and - * direct-launched procs don't have daemons at all, only send - * the message if routing is enabled as this indicates we - * have someone to send to - */ - if (report && orte_routing_is_enabled && orte_create_session_dirs) { - myfile = opal_os_path(false, orte_process_info.proc_session_dir, "aborted", NULL); - fd = open(myfile, O_CREAT, S_IRUSR); - close(fd); - /* now introduce a short delay to allow any pending - * messages (e.g., from a call to "show_help") to - * have a chance to be sent */ - nanosleep(&tp, NULL); - } - /* - Clean out the global structures - * (not really necessary, but good practice) */ - orte_proc_info_finalize(); - /* Now Exit */ - _exit(status); -} diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 7f505338800..9711bd64fdd 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -52,7 +52,6 @@ #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/dfs/base/base.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/iof/base/base.h" @@ -621,18 +620,6 @@ int orte_ess_base_orted_setup(void) goto error; } - /* setup the DFS framework */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_select"; - goto error; - } - return ORTE_SUCCESS; error: @@ -684,8 +671,6 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_iof_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); (void) mca_base_framework_close(&orte_plm_base_framework); - /* close the dfs so its threads can exit */ - (void) mca_base_framework_close(&orte_dfs_base_framework); /* make sure our local procs are dead */ orte_odls.kill_local_procs(NULL); (void) mca_base_framework_close(&orte_rtc_base_framework); diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c index df55650e4a8..ac107d08b1f 100644 --- a/orte/mca/ess/env/ess_env_module.c +++ b/orte/mca/ess/env/ess_env_module.c @@ -77,7 +77,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_env_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL }; diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 56b4f56be64..e8e811645e8 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -62,7 +62,6 @@ #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/mca/rtc/base/base.h" -#include "orte/mca/dfs/base/base.h" #include "orte/mca/errmgr/base/base.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/iof/base/base.h" @@ -699,18 +698,6 @@ static int rte_init(void) goto error; } - /* setup the dfs framework */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_dfs_select"; - goto error; - } - /* setup to support debugging */ orte_state.add_job_state(ORTE_JOB_STATE_READY_FOR_DEBUGGERS, orte_debugger_init_after_spawn, @@ -795,7 +782,6 @@ static int rte_finalize(void) /* shutdown the pmix server */ pmix_server_finalize(); (void) mca_base_framework_close(&opal_pmix_base_framework); - (void) mca_base_framework_close(&orte_dfs_base_framework); (void) mca_base_framework_close(&orte_filem_base_framework); /* output any lingering stdout/err data */ fflush(stdout); @@ -859,6 +845,52 @@ static int rte_finalize(void) if (orte_do_not_launch) { exit(0); } + +{ + opal_pointer_array_t * array = orte_node_topologies; + int i; + if( array->number_free != array->size ) { + OPAL_THREAD_LOCK(&array->lock); + array->lowest_free = 0; + array->number_free = array->size; + for(i=0; isize; i++) { + if(NULL != array->addr[i]) { + orte_topology_t * topo = (orte_topology_t *)array->addr[i]; + topo->topo = NULL; + OBJ_RELEASE(topo); + } + array->addr[i] = NULL; + } + OPAL_THREAD_UNLOCK(&array->lock); + } +} + OBJ_RELEASE(orte_node_topologies); + +{ + opal_pointer_array_t * array = orte_node_pool; + int i; + orte_node_t* node = (orte_node_t *)opal_pointer_array_get_item(orte_node_pool, 0); + assert(NULL != node); + OBJ_RELEASE(node->daemon); + node->daemon = NULL; + if( array->number_free != array->size ) { + OPAL_THREAD_LOCK(&array->lock); + array->lowest_free = 0; + array->number_free = array->size; + for(i=0; isize; i++) { + if(NULL != array->addr[i]) { + node= (orte_node_t*)array->addr[i]; + OBJ_RELEASE(node); + } + array->addr[i] = NULL; + } + OPAL_THREAD_UNLOCK(&array->lock); + } +} + OBJ_RELEASE(orte_node_pool); + + free(orte_topo_signature); + return ORTE_SUCCESS; } diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c index 18d3ddc5546..32247aeeec8 100644 --- a/orte/mca/ess/lsf/ess_lsf_module.c +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -53,7 +53,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_lsf_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL /* ft_event */ }; @@ -100,15 +100,6 @@ static int rte_init(void) } - /* otherwise, I must be an application process - use - * the default procedure to finish my setup - */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) { - ORTE_ERROR_LOG(ret); - error = "orte_ess_base_app_setup"; - goto error; - } - return ORTE_SUCCESS; error: @@ -137,14 +128,6 @@ static int rte_finalize(void) ORTE_ERROR_LOG(ret); } return ret; - } else { - /* otherwise, I must be an application process - * use the default procedure to finish - */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { - ORTE_ERROR_LOG(ret); - return ret; - } } return ORTE_SUCCESS;; diff --git a/orte/mca/ess/pmi/Makefile.am b/orte/mca/ess/pmi/Makefile.am index 3d532702859..15d38a0817e 100644 --- a/orte/mca/ess/pmi/Makefile.am +++ b/orte/mca/ess/pmi/Makefile.am @@ -11,6 +11,12 @@ # $HEADER$ # +# Add MPIR unwind flags because files in this tree are +# involved in startup. This is not needed in the other +# subdirs in orte/mca/ess because the other components are +# solely used by daemons and thus are not accessible by the debugger. +AM_CFLAGS = $(MPIR_UNWIND_CFLAGS) + AM_CPPFLAGS = $(ess_pmi_CPPFLAGS) sources = \ diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 563b8115dcf..45d75f56161 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -17,6 +17,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -45,6 +46,7 @@ #include "opal/util/opal_environ.h" #include "opal/util/output.h" +#include "opal/util/arch.h" #include "opal/util/argv.h" #include "opal/runtime/opal_progress_threads.h" #include "opal/class/opal_pointer_array.h" @@ -55,11 +57,15 @@ #include "opal/mca/pmix/base/base.h" #include "opal/util/timings.h" -#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/filem/base/base.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/schizo/schizo.h" +#include "orte/mca/state/base/base.h" #include "orte/util/proc_info.h" +#include "orte/util/session_dir.h" #include "orte/util/show_help.h" #include "orte/util/name_fns.h" #include "orte/util/pre_condition_transports.h" @@ -85,6 +91,7 @@ static bool added_transport_keys=false; static bool added_num_procs = false; static bool added_app_ctx = false; static bool progress_thread_running = false; +static bool direct_launched = false; /**** MODULE FUNCTIONS ****/ @@ -135,13 +142,17 @@ static int rte_init(void) opal_pmix_base_set_evbase(orte_event_base); OPAL_TIMING_ENV_NEXT(rte_init, "pmix_framework_open"); + /* see if we were direct launched */ + if (ORTE_SCHIZO_DIRECT_LAUNCHED == orte_schizo.check_launch_environment()) { + direct_launched = true; + } + /* initialize the selected module */ if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) { /* we cannot run - this could be due to being direct launched * without the required PMI support being built. Try to detect * that scenario and warn the user */ - if (ORTE_SCHIZO_DIRECT_LAUNCHED == orte_schizo.check_launch_environment() && - NULL != (envar = getenv("ORTE_SCHIZO_DETECTION"))) { + if (direct_launched && NULL != (envar = getenv("ORTE_SCHIZO_DETECTION"))) { if (0 == strcmp(envar, "SLURM")) { /* yes to both - so emit a hopefully helpful * error message and abort */ @@ -176,7 +187,7 @@ static int rte_init(void) pname.vpid = 0; OPAL_TIMING_ENV_NEXT(rte_init, "pmix_init"); - + /* get our local rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK, ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); @@ -412,12 +423,145 @@ static int rte_init(void) OPAL_TIMING_ENV_NEXT(rte_init, "pmix_set_locality"); /* now that we have all required info, complete the setup */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) { + /* + * stdout/stderr buffering + * If the user requested to override the default setting then do + * as they wish. + */ + if( orte_ess_base_std_buffering > -1 ) { + if( 0 == orte_ess_base_std_buffering ) { + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + } + else if( 1 == orte_ess_base_std_buffering ) { + setvbuf(stdout, NULL, _IOLBF, 0); + setvbuf(stderr, NULL, _IOLBF, 0); + } + else if( 2 == orte_ess_base_std_buffering ) { + setvbuf(stdout, NULL, _IOFBF, 0); + setvbuf(stderr, NULL, _IOFBF, 0); + } + } + + /* if I am an MPI app, we will let the MPI layer define and + * control the opal_proc_t structure. Otherwise, we need to + * do so here */ + if (ORTE_PROC_NON_MPI) { + orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME; + orte_process_info.super.proc_hostname = orte_process_info.nodename; + orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL; + orte_process_info.super.proc_arch = opal_local_arch; + opal_proc_local_set(&orte_process_info.super); + } + + /* open and setup the state machine */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_state_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_state_base_select())) { ORTE_ERROR_LOG(ret); - error = "orte_ess_base_app_setup"; + error = "orte_state_base_select"; goto error; } - OPAL_TIMING_ENV_NEXT(rte_init, "ess_base_app_setup"); + OPAL_TIMING_ENV_NEXT(rte_init, "state_framework_open"); + + /* open the errmgr */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_errmgr_base_open"; + goto error; + } + OPAL_TIMING_ENV_NEXT(rte_init, "errmgr_framework_open"); + + /* setup my session directory */ + if (orte_create_session_dirs) { + OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output, + "%s setting up session dir with\n\ttmpdir: %s\n\thost %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, + orte_process_info.nodename)); + if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) { + ORTE_ERROR_LOG(ret); + error = "orte_session_dir"; + goto error; + } + /* Once the session directory location has been established, set + the opal_output env file location to be in the + proc-specific session directory. */ + opal_output_set_output_file_info(orte_process_info.proc_session_dir, + "output-", NULL, NULL); + /* register the directory for cleanup */ + if (NULL != opal_pmix.register_cleanup) { + if (orte_standalone_operation) { + if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.top_session_dir, true, false, true))) { + ORTE_ERROR_LOG(ret); + error = "register cleanup"; + goto error; + } + } else { + if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.job_session_dir, true, false, false))) { + ORTE_ERROR_LOG(ret); + error = "register cleanup"; + goto error; + } + } + } + } + OPAL_TIMING_ENV_NEXT(rte_init, "create_session_dirs"); + + /* if we have info on the HNP and local daemon, process it */ + if (NULL != orte_process_info.my_hnp_uri) { + /* we have to set the HNP's name, even though we won't route messages directly + * to it. This is required to ensure that we -do- send messages to the correct + * HNP name + */ + if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, + ORTE_PROC_MY_HNP, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_parse_HNP"; + goto error; + } + } + if (NULL != orte_process_info.my_daemon_uri) { + opal_value_t val; + + /* extract the daemon's name so we can update the routing table */ + if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri, + ORTE_PROC_MY_DAEMON, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_parse_daemon"; + goto error; + } + /* Set the contact info in the database - this won't actually establish + * the connection, but just tells us how to reach the daemon + * if/when we attempt to send to it + */ + OBJ_CONSTRUCT(&val, opal_value_t); + val.key = OPAL_PMIX_PROC_URI; + val.type = OPAL_STRING; + val.data.string = orte_process_info.my_daemon_uri; + if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_DAEMON, &val))) { + ORTE_ERROR_LOG(ret); + val.key = NULL; + val.data.string = NULL; + OBJ_DESTRUCT(&val); + error = "store DAEMON URI"; + goto error; + } + val.key = NULL; + val.data.string = NULL; + OBJ_DESTRUCT(&val); + } + + /* setup the errmgr */ + if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_errmgr_base_select"; + goto error; + } + OPAL_TIMING_ENV_NEXT(rte_init, "errmgr_select"); /* setup process binding */ if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) { @@ -458,10 +602,13 @@ static int rte_init(void) if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) { /* need to commit the data before we fence */ opal_pmix.commit(); - opal_pmix.fence(NULL, 0); + if (ORTE_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + error = "opal_pmix.fence() failed"; + goto error; + } } OPAL_TIMING_ENV_NEXT(rte_init, "rte_init_done"); - + return ORTE_SUCCESS; error: @@ -481,8 +628,6 @@ static int rte_init(void) static int rte_finalize(void) { - int ret; - /* remove the envars that we pushed into environ * so we leave that structure intact */ @@ -496,11 +641,21 @@ static int rte_finalize(void) unsetenv("OMPI_APP_CTX_NUM_PROCS"); } - /* use the default app procedure to finish */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { - ORTE_ERROR_LOG(ret); - return ret; + /* close frameworks */ + (void) mca_base_framework_close(&orte_filem_base_framework); + (void) mca_base_framework_close(&orte_errmgr_base_framework); + + if (NULL != opal_pmix.finalize) { + opal_pmix.finalize(); + (void) mca_base_framework_close(&opal_pmix_base_framework); + } + (void) mca_base_framework_close(&orte_state_base_framework); + + if (direct_launched) { + orte_session_dir_finalize(ORTE_PROC_MY_NAME); } + /* cleanup the process info */ + orte_proc_info_finalize(); /* release the event base */ if (progress_thread_running) { diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 7729b9bc0fd..da9321b08ed 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 Research Organization for Information Science @@ -39,9 +39,11 @@ #include #include "opal/hash_string.h" +#include "opal/util/arch.h" #include "opal/util/argv.h" #include "opal/util/opal_environ.h" #include "opal/util/path.h" +#include "opal/util/timings.h" #include "opal/runtime/opal_progress_threads.h" #include "opal/mca/installdirs/installdirs.h" #include "opal/mca/pmix/base/base.h" @@ -49,8 +51,11 @@ #include "orte/util/show_help.h" #include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/filem/base/base.h" #include "orte/mca/plm/base/base.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/state/base/base.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" #include "orte/util/session_dir.h" @@ -63,11 +68,12 @@ static int rte_init(void); static int rte_finalize(void); +static void rte_abort(int status, bool report); orte_ess_base_module_t orte_ess_singleton_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + rte_abort, NULL /* ft_event */ }; @@ -272,15 +278,190 @@ static int rte_init(void) } } - /* use the std app init to complete the procedure */ - if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) { - ORTE_ERROR_LOG(rc); - return rc; + /* now that we have all required info, complete the setup */ + /* + * stdout/stderr buffering + * If the user requested to override the default setting then do + * as they wish. + */ + if( orte_ess_base_std_buffering > -1 ) { + if( 0 == orte_ess_base_std_buffering ) { + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + } + else if( 1 == orte_ess_base_std_buffering ) { + setvbuf(stdout, NULL, _IOLBF, 0); + setvbuf(stderr, NULL, _IOLBF, 0); + } + else if( 2 == orte_ess_base_std_buffering ) { + setvbuf(stdout, NULL, _IOFBF, 0); + setvbuf(stderr, NULL, _IOFBF, 0); + } + } + + /* if I am an MPI app, we will let the MPI layer define and + * control the opal_proc_t structure. Otherwise, we need to + * do so here */ + if (ORTE_PROC_NON_MPI) { + orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME; + orte_process_info.super.proc_hostname = orte_process_info.nodename; + orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL; + orte_process_info.super.proc_arch = opal_local_arch; + opal_proc_local_set(&orte_process_info.super); + } + + /* open and setup the state machine */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_state_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_state_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_state_base_select"; + goto error; + } + + /* open the errmgr */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_errmgr_base_open"; + goto error; + } + + /* setup my session directory */ + if (orte_create_session_dirs) { + OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output, + "%s setting up session dir with\n\ttmpdir: %s\n\thost %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, + orte_process_info.nodename)); + if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) { + ORTE_ERROR_LOG(ret); + error = "orte_session_dir"; + goto error; + } + /* Once the session directory location has been established, set + the opal_output env file location to be in the + proc-specific session directory. */ + opal_output_set_output_file_info(orte_process_info.proc_session_dir, + "output-", NULL, NULL); + /* register the directory for cleanup */ + if (NULL != opal_pmix.register_cleanup) { + if (orte_standalone_operation) { + if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.top_session_dir, true, false, true))) { + ORTE_ERROR_LOG(ret); + error = "register cleanup"; + goto error; + } + } else { + if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.job_session_dir, true, false, false))) { + ORTE_ERROR_LOG(ret); + error = "register cleanup"; + goto error; + } + } + } + } + + /* if we have info on the HNP and local daemon, process it */ + if (NULL != orte_process_info.my_hnp_uri) { + /* we have to set the HNP's name, even though we won't route messages directly + * to it. This is required to ensure that we -do- send messages to the correct + * HNP name + */ + if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, + ORTE_PROC_MY_HNP, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_parse_HNP"; + goto error; + } + } + if (NULL != orte_process_info.my_daemon_uri) { + opal_value_t val; + + /* extract the daemon's name so we can update the routing table */ + if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri, + ORTE_PROC_MY_DAEMON, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_rml_parse_daemon"; + goto error; + } + /* Set the contact info in the database - this won't actually establish + * the connection, but just tells us how to reach the daemon + * if/when we attempt to send to it + */ + OBJ_CONSTRUCT(&val, opal_value_t); + val.key = OPAL_PMIX_PROC_URI; + val.type = OPAL_STRING; + val.data.string = orte_process_info.my_daemon_uri; + if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_DAEMON, &val))) { + ORTE_ERROR_LOG(ret); + val.key = NULL; + val.data.string = NULL; + OBJ_DESTRUCT(&val); + error = "store DAEMON URI"; + goto error; + } + val.key = NULL; + val.data.string = NULL; + OBJ_DESTRUCT(&val); + } + + /* setup the errmgr */ + if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_errmgr_base_select"; + goto error; + } + + /* setup process binding */ + if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) { + error = "proc_binding"; + goto error; + } + + /* this needs to be set to enable debugger use when direct launched */ + if (NULL == orte_process_info.my_daemon_uri) { + orte_standalone_operation = true; + } + + /* set max procs */ + if (orte_process_info.max_procs < orte_process_info.num_procs) { + orte_process_info.max_procs = orte_process_info.num_procs; + } + + /* push our hostname so others can find us, if they need to - the + * native PMIx component will ignore this request as the hostname + * is provided by the system */ + OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING); + if (ORTE_SUCCESS != ret) { + error = "db store hostname"; + goto error; + } + + /* if we are an ORTE app - and not an MPI app - then + * we need to exchange our connection info here. + * MPI_Init has its own modex, so we don't need to do + * two of them. However, if we don't do a modex at all, + * then processes have no way to communicate + * + * NOTE: only do this when the process originally launches. + * Cannot do this on a restart as the rest of the processes + * in the job won't be executing this step, so we would hang + */ + if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) { + /* need to commit the data before we fence */ + opal_pmix.commit(); + if (ORTE_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + error = "opal_pmix.fence() failed"; + goto error; + } } return ORTE_SUCCESS; - error: + error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", @@ -291,8 +472,6 @@ static int rte_init(void) static int rte_finalize(void) { - int ret; - /* remove the envars that we pushed into environ * so we leave that structure intact */ @@ -311,10 +490,9 @@ static int rte_finalize(void) unsetenv("PMIX_SERVER_URI"); unsetenv("PMIX_SECURITY_MODE"); } - /* use the default procedure to finish */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { - ORTE_ERROR_LOG(ret); - } + /* close frameworks */ + (void) mca_base_framework_close(&orte_filem_base_framework); + (void) mca_base_framework_close(&orte_errmgr_base_framework); /* mark us as finalized */ if (NULL != opal_pmix.finalize) { @@ -322,12 +500,18 @@ static int rte_finalize(void) (void) mca_base_framework_close(&opal_pmix_base_framework); } + (void) mca_base_framework_close(&orte_state_base_framework); + orte_session_dir_finalize(ORTE_PROC_MY_NAME); + + /* cleanup the process info */ + orte_proc_info_finalize(); + /* release the event base */ if (progress_thread_running) { opal_progress_thread_finalize(NULL); progress_thread_running = false; } - return ret; + return ORTE_SUCCESS; } #define ORTE_URI_MSG_LGTH 256 @@ -584,3 +768,25 @@ static int fork_hnp(void) return ORTE_SUCCESS; } } + +static void rte_abort(int status, bool report) +{ + struct timespec tp = {0, 100000}; + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, + "%s ess:singleton:abort: abort with status %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + status)); + + /* PMI doesn't like NULL messages, but our interface + * doesn't provide one - so rig one up here + */ + opal_pmix.abort(status, "N/A", NULL); + + /* provide a little delay for the PMIx thread to + * get the info out */ + nanosleep(&tp, NULL); + + /* Now Exit */ + _exit(status); +} diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index 47dddbed492..d400de30853 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -54,7 +54,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_slurm_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL /* ft_event */ }; @@ -125,14 +125,6 @@ static int rte_finalize(void) ORTE_ERROR_LOG(ret); } return ret; - } else { - /* otherwise, I must be an application process - * use the default procedure to finish - */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { - ORTE_ERROR_LOG(ret); - return ret; - } } return ORTE_SUCCESS; diff --git a/orte/mca/ess/tm/ess_tm_module.c b/orte/mca/ess/tm/ess_tm_module.c index 20373798c42..646caced773 100644 --- a/orte/mca/ess/tm/ess_tm_module.c +++ b/orte/mca/ess/tm/ess_tm_module.c @@ -53,7 +53,7 @@ static int rte_finalize(void); orte_ess_base_module_t orte_ess_tm_module = { rte_init, rte_finalize, - orte_ess_base_app_abort, + NULL, NULL /* ft_event */ }; @@ -129,14 +129,6 @@ static int rte_finalize(void) ORTE_ERROR_LOG(ret); } return ret; - } else { - /* otherwise, I must be an application process - * use the default procedure to finish - */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { - ORTE_ERROR_LOG(ret); - return ret; - } } return ORTE_SUCCESS; diff --git a/orte/mca/notifier/Makefile.am b/orte/mca/notifier/Makefile.am deleted file mode 100644 index 52444ea1251..00000000000 --- a/orte/mca/notifier/Makefile.am +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_notifier.la -libmca_notifier_la_SOURCES = - -# local files -headers = notifier.h - -libmca_notifier_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ortedir = $(includedir)/openmpi/$(subdir) -nobase_orte_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/orte/mca/notifier/base/base.h b/orte/mca/notifier/base/base.h deleted file mode 100644 index 2f944dfd9d8..00000000000 --- a/orte/mca/notifier/base/base.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_NOTIFIER_BASE_H -#define MCA_NOTIFIER_BASE_H - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/base/base.h" -#include "opal/mca/event/event.h" - -#include "orte/mca/notifier/notifier.h" - -BEGIN_C_DECLS - -/* - * MCA Framework - */ -ORTE_DECLSPEC extern mca_base_framework_t orte_notifier_base_framework; - -typedef struct { - opal_event_base_t *ev_base; - bool ev_base_active; - opal_list_t modules; - orte_notifier_severity_t severity_level; - char *default_actions; - char *emerg_actions; - char *alert_actions; - char *crit_actions; - char *warn_actions; - char *notice_actions; - char *info_actions; - char *debug_actions; - char *error_actions; -} orte_notifier_base_t; - -/* - * Type for holding selected module / component pairs - */ -typedef struct { - opal_list_item_t super; - /* Component */ - orte_notifier_base_component_t *component; - /* Module */ - orte_notifier_base_module_t *module; -} orte_notifier_active_module_t; -OBJ_CLASS_DECLARATION(orte_notifier_active_module_t); - -ORTE_DECLSPEC extern orte_notifier_base_t orte_notifier_base; - -/* select a component */ -ORTE_DECLSPEC int orte_notifier_base_select(void); - -/* base functions */ -ORTE_DECLSPEC void orte_notifier_base_log(int sd, short args, void *cbdata); -ORTE_DECLSPEC void orte_notifier_base_event(int sd, short args, void *cbdata); -ORTE_DECLSPEC void orte_notifier_base_report(int sd, short args, void *cbdata); - -/* severity to string */ -ORTE_DECLSPEC const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity); -END_C_DECLS -#endif diff --git a/orte/mca/notifier/base/notifier_base_fns.c b/orte/mca/notifier/base/notifier_base_fns.c deleted file mode 100644 index 1a6751a2085..00000000000 --- a/orte/mca/notifier/base/notifier_base_fns.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/util/argv.h" - -#include "orte/util/attr.h" -#include "orte/util/threads.h" -#include "orte/mca/notifier/base/base.h" - - -static void orte_notifier_base_identify_modules(char ***modules, - orte_notifier_request_t *req); - -void orte_notifier_base_log(int sd, short args, void *cbdata) -{ - orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata; - char **modules = NULL; - orte_notifier_active_module_t *imod; - int i; - - ORTE_ACQUIRE_OBJECT(req); - - /* if no modules are active, then there is nothing to do */ - if (0 == opal_list_get_size(&orte_notifier_base.modules)) { - return; - } - - /* check if the severity is >= severity level set for - * reporting - note that the severity enum value goes up - * as severity goes down */ - if (orte_notifier_base.severity_level < req->severity ) { - return; - } - - orte_notifier_base_identify_modules(&modules, req); - - /* no modules selected then nothing to do */ - if (NULL == modules) { - return; - } - - for (i=0; NULL != modules[i]; i++) { - OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) { - if (NULL != imod->module->log && - 0 == strcmp(imod->component->base_version.mca_component_name, modules[i])) - imod->module->log(req); - } - } - opal_argv_free(modules); -} - -void orte_notifier_base_event(int sd, short args, void *cbdata) -{ - orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata; - char **modules = NULL; - orte_notifier_active_module_t *imod; - int i; - - ORTE_ACQUIRE_OBJECT(req); - - /* if no modules are active, then there is nothing to do */ - if (0 == opal_list_get_size(&orte_notifier_base.modules)) { - return; - } - - /* check if the severity is >= severity level set for - * reporting - note that the severity enum value goes up - * as severity goes down */ - if (orte_notifier_base.severity_level < req->severity ) { - return; - } - - orte_notifier_base_identify_modules(&modules, req); - - /* no modules selected then nothing to do */ - if (NULL == modules) { - return; - } - - for (i=0; NULL != modules[i]; i++) { - OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) { - if (NULL != imod->module->log && - 0 == strcmp(imod->component->base_version.mca_component_name, modules[i])) - imod->module->event(req); - } - } - opal_argv_free(modules); -} - -void orte_notifier_base_report(int sd, short args, void *cbdata) -{ - orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata; - char **modules = NULL; - orte_notifier_active_module_t *imod; - int i; - - ORTE_ACQUIRE_OBJECT(req); - - /* if no modules are active, then there is nothing to do */ - if (0 == opal_list_get_size(&orte_notifier_base.modules)) { - return; - } - - /* see if the job requested any notifications */ - if (!orte_get_attribute(&req->jdata->attributes, ORTE_JOB_NOTIFICATIONS, (void**)modules, OPAL_STRING)) { - return; - } - - /* need to process the notification string to get the names of the modules */ - if (NULL == modules) { - orte_notifier_base_identify_modules(&modules, req); - - /* no modules selected then nothing to do */ - if (NULL == modules) { - return; - } - } - - for (i=0; NULL != modules[i]; i++) { - OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) { - if (NULL != imod->module->log && - 0 == strcmp(imod->component->base_version.mca_component_name, modules[i])) - imod->module->report(req); - } - } - opal_argv_free(modules); -} - -const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity) -{ - switch (severity) { - case ORTE_NOTIFIER_EMERG: return "EMERGENCY"; break; - case ORTE_NOTIFIER_ALERT: return "ALERT"; break; - case ORTE_NOTIFIER_CRIT: return "CRITICAL"; break; - case ORTE_NOTIFIER_ERROR: return "ERROR"; break; - case ORTE_NOTIFIER_WARN: return "WARNING"; break; - case ORTE_NOTIFIER_NOTICE: return "NOTICE"; break; - case ORTE_NOTIFIER_INFO: return "INFO"; break; - case ORTE_NOTIFIER_DEBUG: return "DEBUG"; break; - default: return "UNKNOWN"; break; - } -} - -static void orte_notifier_base_identify_modules(char ***modules, - orte_notifier_request_t *req) -{ - if (NULL != req->action) { - *modules = opal_argv_split(req->action, ','); - } else { - if (ORTE_NOTIFIER_EMERG == req->severity && - (NULL != orte_notifier_base.emerg_actions)) { - *modules = opal_argv_split(orte_notifier_base.emerg_actions, ','); - } else if (ORTE_NOTIFIER_ALERT == req->severity && - (NULL != orte_notifier_base.alert_actions)) { - *modules = opal_argv_split(orte_notifier_base.alert_actions, ','); - } else if (ORTE_NOTIFIER_CRIT == req->severity && - (NULL != orte_notifier_base.crit_actions)) { - *modules = opal_argv_split(orte_notifier_base.crit_actions, ','); - } else if (ORTE_NOTIFIER_WARN == req->severity && - (NULL != orte_notifier_base.warn_actions)) { - *modules = opal_argv_split(orte_notifier_base.warn_actions, ','); - } else if (ORTE_NOTIFIER_NOTICE == req->severity && - (NULL != orte_notifier_base.notice_actions)) { - *modules = opal_argv_split(orte_notifier_base.notice_actions, ','); - } else if (ORTE_NOTIFIER_INFO == req->severity && - (NULL != orte_notifier_base.info_actions)) { - *modules = opal_argv_split(orte_notifier_base.info_actions, ','); - } else if (ORTE_NOTIFIER_DEBUG == req->severity && - (NULL != orte_notifier_base.debug_actions)) { - *modules = opal_argv_split(orte_notifier_base.debug_actions, ','); - } else if (ORTE_NOTIFIER_ERROR == req->severity && - (NULL != orte_notifier_base.error_actions)) { - *modules = opal_argv_split(orte_notifier_base.error_actions, ','); - } else if (NULL != orte_notifier_base.default_actions) { - *modules = opal_argv_split(orte_notifier_base.default_actions, ','); - } - } - return; -} diff --git a/orte/mca/notifier/base/notifier_base_frame.c b/orte/mca/notifier/base/notifier_base_frame.c deleted file mode 100644 index 207998c4738..00000000000 --- a/orte/mca/notifier/base/notifier_base_frame.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include - -#include "orte/mca/mca.h" -#include "opal/util/argv.h" -#include "opal/util/fd.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/runtime/opal_progress_threads.h" -#include "orte/mca/notifier/base/base.h" - -/* default module to use for logging*/ -#define ORTE_NOTIFIER_DEFAULT_MODULE "syslog" - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "orte/mca/notifier/base/static-components.h" - -/* - * Global variables - */ -opal_list_t orte_notifier_base_components_available = {{0}}; -int orte_notifier_debug_output = -1; - -orte_notifier_base_t orte_notifier_base = {0}; - -static char *notifier_severity = NULL; -static bool use_progress_thread = false; - -/** - * Function for selecting a set of components from all those that are - * available. - * - * Examples: - * 1) - * -mca notifier syslog,smtp - * --> syslog and smtp are selected for the loging - */ -static int orte_notifier_base_register(mca_base_register_flag_t flags) -{ - (void) mca_base_var_register("orte", "notifier", "base", "use_progress_thread", - "Use a dedicated progress thread for notifications [default: false]", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &use_progress_thread); - - /* let the user define a base level of severity to report */ - (void) mca_base_var_register("orte", "notifier", "base", "severity_level", - "Report all events at or above this severity [default: error]", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - ¬ifier_severity); - if (NULL == notifier_severity) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_ERROR; - } else if (0 == strncasecmp(notifier_severity, "emerg", strlen("emerg"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_EMERG; - } else if (0 == strncasecmp(notifier_severity, "alert", strlen("alert"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_ALERT; - } else if (0 == strncasecmp(notifier_severity, "crit", strlen("crit"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_CRIT; - } else if (0 == strncasecmp(notifier_severity, "warn", strlen("warn"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_WARN; - } else if (0 == strncasecmp(notifier_severity, "notice", strlen("notice"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_NOTICE; - } else if (0 == strncasecmp(notifier_severity, "info", strlen("info"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_INFO; - } else if (0 == strncasecmp(notifier_severity, "debug", strlen("debug"))) { - orte_notifier_base.severity_level = ORTE_NOTIFIER_DEBUG; - } else { - orte_notifier_base.severity_level = ORTE_NOTIFIER_ERROR; - } - - /* let the user define a base default actions */ - orte_notifier_base.default_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "default_actions", - "Report all events to the default actions:NONE,syslog,smtp", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.default_actions); - - if (NULL == orte_notifier_base.default_actions) { - orte_notifier_base.default_actions = strdup(ORTE_NOTIFIER_DEFAULT_MODULE); - } - /* let the user define a action for emergency events */ - orte_notifier_base.emerg_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "emerg_event_actions", - "Report emergency events to the specified actions: example 'smtp'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.emerg_actions); - - /* let the user define a action for alert events */ - orte_notifier_base.alert_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "alert_event_actions", - "Report alert events to the specified actions: example 'smtp'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.alert_actions); - - /* let the user define a action for critical events */ - orte_notifier_base.crit_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "crit_event_actions", - "Report critical events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.crit_actions); - - /* let the user define a action for warning events */ - orte_notifier_base.warn_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "warn_event_actions", - "Report warning events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.warn_actions); - - /* let the user define a action for notice events */ - orte_notifier_base.notice_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "notice_event_actions", - "Report notice events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.notice_actions); - - /* let the user define a action for info events */ - orte_notifier_base.info_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "info_event_actions", - "Report info events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.info_actions); - - /* let the user define a action for debug events */ - orte_notifier_base.debug_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "debug_event_actions", - "Report debug events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.debug_actions); - - /* let the user define a action for error events */ - orte_notifier_base.error_actions = NULL; - (void) mca_base_var_register("orte", "notifier", "base", "error_event_actions", - "Report error events to the specified actions: example 'syslog'", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_notifier_base.error_actions); - - return ORTE_SUCCESS; -} - -static int orte_notifier_base_close(void) -{ - orte_notifier_active_module_t *i_module; - - if (orte_notifier_base.ev_base_active) { - orte_notifier_base.ev_base_active = false; - opal_progress_thread_finalize("notifier"); - } - - OPAL_LIST_FOREACH(i_module, &orte_notifier_base.modules, orte_notifier_active_module_t) { - if (NULL != i_module->module->finalize) { - i_module->module->finalize(); - } - } - OPAL_LIST_DESTRUCT(&orte_notifier_base.modules); - - /* close all remaining available components */ - return mca_base_framework_components_close(&orte_notifier_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int orte_notifier_base_open(mca_base_open_flag_t flags) -{ - int rc; - - /* construct the array of modules */ - OBJ_CONSTRUCT(&orte_notifier_base.modules, opal_list_t); - - /* if requested, create our own event base */ - if (use_progress_thread) { - orte_notifier_base.ev_base_active = true; - if (NULL == (orte_notifier_base.ev_base = - opal_progress_thread_init("notifier"))) { - orte_notifier_base.ev_base_active = false; - return ORTE_ERROR; - } - } else { - orte_notifier_base.ev_base = orte_event_base; - } - - /* Open up all available components */ - rc = mca_base_framework_components_open(&orte_notifier_base_framework, - flags); - orte_notifier_debug_output = orte_notifier_base_framework.framework_output; - return rc; -} - -MCA_BASE_FRAMEWORK_DECLARE(orte, notifier, "ORTE Notifier Framework", - orte_notifier_base_register, - orte_notifier_base_open, orte_notifier_base_close, - mca_notifier_base_static_components, 0); - - -OBJ_CLASS_INSTANCE (orte_notifier_active_module_t, - opal_list_item_t, - NULL, NULL); - -static void req_cons (orte_notifier_request_t *r) -{ - r->jdata = NULL; - r->msg = NULL; - r->t = 0; -} -static void req_des(orte_notifier_request_t *r) -{ - if (NULL != r->jdata) { - OBJ_RELEASE(r->jdata); - } -} -OBJ_CLASS_INSTANCE (orte_notifier_request_t, - opal_object_t, - req_cons, req_des); diff --git a/orte/mca/notifier/base/notifier_base_select.c b/orte/mca/notifier/base/notifier_base_select.c deleted file mode 100644 index cdd9142ff2e..00000000000 --- a/orte/mca/notifier/base/notifier_base_select.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" - -#include - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/mca/notifier/base/base.h" - -/* Global variables */ -/* - * orte_notifier_base_selected is set to true if at least 1 module has - * been selected for the notifier log API interface. - */ -static bool orte_notifier_base_selected = false; - -/** - * Function for weeding out notifier components that don't want to run. - * - * Call the init function on all available compoenent to find out if - * they want to run. Select all components that don't fail. Failing - * Components will be closed and unloaded. The selected modules will - * be returned to the called in a opal_list_t. - */ - -int orte_notifier_base_select(void) -{ - mca_base_component_list_item_t *cli = NULL; - orte_notifier_base_component_t *component = NULL; - mca_base_module_t *module = NULL; - int priority; - orte_notifier_active_module_t *tmp_module; - orte_notifier_base_module_t *bmod; - - if (orte_notifier_base_selected) { - return ORTE_SUCCESS; - } - orte_notifier_base_selected = true; - - opal_output_verbose(10, orte_notifier_base_framework.framework_output, - "notifier:base:select: Auto-selecting components"); - - /* - * Traverse the list of available components. - * For each call their 'query' functions to see if they are available. - */ - OPAL_LIST_FOREACH(cli, &orte_notifier_base_framework.framework_components, mca_base_component_list_item_t) { - component = (orte_notifier_base_component_t *) cli->cli_component; - - /* - * If there is a query function then use it. - */ - if (NULL == component->base_version.mca_query_component) { - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:select Skipping component [%s]. It does not implement a query function", - component->base_version.mca_component_name ); - continue; - } - - /* - * Query this component for the module and priority - */ - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:select Querying component [%s]", - component->base_version.mca_component_name); - - component->base_version.mca_query_component(&module, &priority); - - /* - * If no module was returned or negative priority, then skip component - */ - if (NULL == module || priority < 0) { - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:select Skipping component [%s]. Query failed to return a module", - component->base_version.mca_component_name ); - continue; - } - bmod = (orte_notifier_base_module_t*)module; - - /* see if it can be init'd */ - if (NULL != bmod->init) { - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:init module called with priority [%s] %d", - component->base_version.mca_component_name, priority); - if (ORTE_SUCCESS != bmod->init()) { - continue; - } - } - /* - * Append them to the list - */ - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:base:select adding component [%s]", - component->base_version.mca_component_name); - tmp_module = OBJ_NEW(orte_notifier_active_module_t); - tmp_module->component = component; - tmp_module->module = (orte_notifier_base_module_t*)module; - - opal_list_append(&orte_notifier_base.modules, (void*)tmp_module); - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/notifier/notifier.h b/orte/mca/notifier/notifier.h deleted file mode 100644 index 8c7eb8529d4..00000000000 --- a/orte/mca/notifier/notifier.h +++ /dev/null @@ -1,234 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All Rights Reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The OpenRTE Notifier Framework - * - * The OpenRTE Notifier framework provides a mechanism for notifying - * system administrators or other fault monitoring systems that a - * problem with the underlying cluster has been detected - e.g., a - * failed connection in a network fabric - */ - -#ifndef MCA_NOTIFIER_H -#define MCA_NOTIFIER_H - -/* - * includes - */ - -#include "orte_config.h" - -#include -#include -#ifdef HAVE_SYSLOG_H -#include -#endif - -#include "orte/mca/mca.h" - -#include "orte/constants.h" -#include "orte/types.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/threads.h" - -BEGIN_C_DECLS - -/* make the verbose channel visible here so everyone - * doesn't have to include notifier/base/base.h */ -ORTE_DECLSPEC extern int orte_notifier_debug_output; - -/* The maximum size of any on-stack buffers used in the notifier - * so we can try to avoid calling malloc in OUT_OF_RESOURCES conditions. - * The code has NOT been auditied for use of malloc, so this still - * may fail to get the "OUT_OF_RESOURCE" message out. Oh Well. - */ -#define ORTE_NOTIFIER_MAX_BUF 512 - -/* Severities */ -typedef enum { -#ifdef HAVE_SYSLOG_H - ORTE_NOTIFIER_EMERG = LOG_EMERG, - ORTE_NOTIFIER_ALERT = LOG_ALERT, - ORTE_NOTIFIER_CRIT = LOG_CRIT, - ORTE_NOTIFIER_ERROR = LOG_ERR, - ORTE_NOTIFIER_WARN = LOG_WARNING, - ORTE_NOTIFIER_NOTICE = LOG_NOTICE, - ORTE_NOTIFIER_INFO = LOG_INFO, - ORTE_NOTIFIER_DEBUG = LOG_DEBUG -#else - ORTE_NOTIFIER_EMERG, - ORTE_NOTIFIER_ALERT, - ORTE_NOTIFIER_CRIT, - ORTE_NOTIFIER_ERROR, - ORTE_NOTIFIER_WARN, - ORTE_NOTIFIER_NOTICE, - ORTE_NOTIFIER_INFO, - ORTE_NOTIFIER_DEBUG -#endif -} orte_notifier_severity_t; - -typedef struct { - opal_object_t super; - opal_event_t ev; - orte_job_t *jdata; - orte_job_state_t state; - orte_notifier_severity_t severity; - int errcode; - const char *msg; - const char *action; - time_t t; -} orte_notifier_request_t; -OBJ_CLASS_DECLARATION(orte_notifier_request_t); - -/* - * Component functions - all MUST be provided! - */ - -/* initialize the selected module */ -typedef int (*orte_notifier_base_module_init_fn_t)(void); - -/* finalize the selected module */ -typedef void (*orte_notifier_base_module_finalize_fn_t)(void); - -/* Log an internal error - this will include the job that caused the - * error to occur */ -typedef void (*orte_notifier_base_module_log_fn_t)(orte_notifier_request_t *req); - -/* Report a system event - e.g., a temperature out-of-bound */ -typedef void (*orte_notifier_base_module_event_fn_t)(orte_notifier_request_t *req); - -/* Report a job state */ -typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *req); - - -#define ORTE_NOTIFIER_INTERNAL_ERROR(j, st, s, e, m) \ - do { \ - orte_notifier_request_t *_n; \ - opal_output_verbose(2, orte_notifier_debug_output, \ - "%s notifier:internal:error[%s:%d] " \ - "job %s error %s severity %s", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__, \ - ORTE_JOBID_PRINT((NULL == (j)) ? \ - ORTE_JOBID_INVALID : \ - (j)->jobid), \ - ORTE_ERROR_NAME((e)), \ - orte_notifier_base_sev2str(s)); \ - _n = OBJ_NEW(orte_notifier_request_t); \ - _n->jdata = (j); \ - _n->state = (st); \ - _n->severity = (s); \ - _n->errcode = (e); \ - _n->msg = (m); \ - _n->t = time(NULL); \ - _n->action = (NULL); \ - /* add the event */ \ - opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ - OPAL_EV_WRITE, orte_notifier_base_log, (_n)); \ - opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ - ORTE_POST_OBJECT(_n); \ - opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -#define ORTE_NOTIFIER_JOB_STATE(j, st, m) \ - do { \ - orte_notifier_request_t *_n; \ - opal_output_verbose(2, orte_notifier_debug_output, \ - "%s notifier[%s:%d] job %s state %s", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__, \ - ORTE_JOBID_PRINT((NULL == (j)) ? \ - ORTE_JOBID_INVALID : \ - (j)->jobid), \ - orte_job_state_to_str(st)); \ - _n = OBJ_NEW(orte_notifier_request_t); \ - _n->jdata = (j); \ - _n->state = (st); \ - _n->msg = (m); \ - _n->t = time(NULL); \ - _n->action = (NULL); \ - /* add the event */ \ - opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ - OPAL_EV_WRITE, orte_notifier_base_report, (_n)); \ - opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ - ORTE_POST_OBJECT(_n); \ - opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -#define ORTE_NOTIFIER_SYSTEM_EVENT(s, m, a) \ - do { \ - orte_notifier_request_t *_n; \ - opal_output_verbose(2, orte_notifier_debug_output, \ - "%s notifier:sys:event[%s:%d] event %s", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__, \ - orte_notifier_base_sev2str(s)); \ - _n = OBJ_NEW(orte_notifier_request_t); \ - _n->jdata = (NULL); \ - _n->state = (NULL); \ - _n->jdata = NULL; \ - _n->msg = (m); \ - _n->t = time(NULL); \ - _n->severity = (s); \ - _n->action = (a); \ - /* add the event */ \ - opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ - OPAL_EV_WRITE, orte_notifier_base_event, (_n)); \ - opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ - ORTE_POST_OBJECT(_n); \ - opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -/* - * Ver 1.0 - */ -typedef struct { - orte_notifier_base_module_init_fn_t init; - orte_notifier_base_module_finalize_fn_t finalize; - orte_notifier_base_module_log_fn_t log; - orte_notifier_base_module_event_fn_t event; - orte_notifier_base_module_report_fn_t report; -} orte_notifier_base_module_t; - - -/* - * the standard component data structure - */ -typedef struct { - mca_base_component_t base_version; - mca_base_component_data_t base_data; -} orte_notifier_base_component_t; - - -/* - * Macro for use in components that are of type notifier v1.0.0 - */ -#define ORTE_NOTIFIER_BASE_VERSION_1_0_0 \ - /* notifier v1.0 is chained to MCA v2.0 */ \ - ORTE_MCA_BASE_VERSION_2_1_0("notifier", 1, 0, 0) - -END_C_DECLS - -#endif /* MCA_NOTIFIER_H */ diff --git a/orte/mca/notifier/smtp/.opal_ignore b/orte/mca/notifier/smtp/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/orte/mca/notifier/smtp/Makefile.am b/orte/mca/notifier/smtp/Makefile.am deleted file mode 100644 index 87e978e534c..00000000000 --- a/orte/mca/notifier/smtp/Makefile.am +++ /dev/null @@ -1,54 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(notifier_smtp_CPPFLAGS) - -dist_ortedata_DATA = \ - help-orte-notifier-smtp.txt - -sources = \ - notifier_smtp.h \ - notifier_smtp_module.c \ - notifier_smtp_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_notifier_smtp_DSO -component_noinst = -component_install = mca_notifier_smtp.la -else -component_noinst = libmca_notifier_smtp.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_notifier_smtp_la_SOURCES = $(sources) -mca_notifier_smtp_la_LDFLAGS = -module -avoid-version $(notifier_smtp_LDFLAGS) -mca_notifier_smtp_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(notifier_smtp_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_notifier_smtp_la_SOURCES =$(sources) -libmca_notifier_smtp_la_LDFLAGS = -module -avoid-version $(notifier_smtp_LDFLAGS) -libmca_notifier_smtp_la_LIBADD = $(notifier_smtp_LIBS) diff --git a/orte/mca/notifier/smtp/configure.m4 b/orte/mca/notifier/smtp/configure.m4 deleted file mode 100644 index a4a4771050a..00000000000 --- a/orte/mca/notifier/smtp/configure.m4 +++ /dev/null @@ -1,39 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_notifier_smtp_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_notifier_smtp_CONFIG], [ - AC_CONFIG_FILES([orte/mca/notifier/smtp/Makefile]) - - AC_CHECK_TYPES( [include/libesmtp.h], - [libesmtp*], - [libesmtp.h], - [esmtp], - [smtp_create_session], - [], - [orte_notifier_want_smtp=1], - [orte_notifier_want_smtp=0]) - - AS_IF([test "$orte_notifier_want_smtp" = 1], - [$1], - [$2]) -])dnl diff --git a/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt b/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt deleted file mode 100644 index 58b06bc81bb..00000000000 --- a/orte/mca/notifier/smtp/help-orte-notifier-smtp.txt +++ /dev/null @@ -1,33 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI's SMTP notifier support -# -[to/from not specified] -Error: the Open MPI SMTP notifier component had no "to" and/or "from" -email addresses specified. -# -[server not specified] -Error: the Open MPI SMTP notifier component had no SMTP server name or -IP address specified. -# -[unable to resolve server] -Sorry, Open MPI's SMTP notifier component was unable to resolve the IP -address of the server provided. - - Server: %s -# -[send_email failed] -Oops! Open MPI's SMTP notifier failed to send an email. - - Reason: %s - libESMTP function: %s - libESMTP message: %s - Message: %s -# diff --git a/orte/mca/notifier/smtp/notifier_smtp.h b/orte/mca/notifier/smtp/notifier_smtp.h deleted file mode 100644 index 56732541864..00000000000 --- a/orte/mca/notifier/smtp/notifier_smtp.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef NOTIFIER_SMTP_H -#define NOTIFIER_SMTP_H - -#include "orte_config.h" - -#include - -#include "libesmtp.h" - -#include "orte/mca/notifier/notifier.h" - -BEGIN_C_DECLS - -typedef struct { - orte_notifier_base_component_t super; - - /* libesmtp version */ - char *version; - - /* SMTP server name and port */ - char *server; - int port; - - /* To, From, Subject */ - char *to, **to_argv, *from_name, *from_addr, *subject; - - /* Mail body prefix and suffix */ - char *body_prefix, *body_suffix; - - /* struct hostent from resolved SMTP server name */ - struct hostent *server_hostent; - - /* Priority of this component */ - int priority; -} orte_notifier_smtp_component_t; - - -/* - * Notifier interfaces - */ -ORTE_MODULE_DECLSPEC extern orte_notifier_smtp_component_t - mca_notifier_smtp_component; -extern orte_notifier_base_module_t orte_notifier_smtp_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/notifier/smtp/notifier_smtp_component.c b/orte/mca/notifier/smtp/notifier_smtp_component.c deleted file mode 100644 index ee37eda593a..00000000000 --- a/orte/mca/notifier/smtp/notifier_smtp_component.c +++ /dev/null @@ -1,197 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ -*/ - -/* - * Simple smtp notifier (using libesmtp) - */ - -#include "orte_config.h" - -#include "opal/mca/base/mca_base_var.h" - -#include "orte/constants.h" -#include "orte/util/show_help.h" - -#include "notifier_smtp.h" - -static int smtp_component_query(mca_base_module_t **module, int *priority); -static int smtp_close(void); -static int smtp_register(void); - -/* - * Struct of function pointers that need to be initialized - */ -orte_notifier_smtp_component_t mca_notifier_smtp_component = { - { - .base_version = { - ORTE_NOTIFIER_BASE_VERSION_1_0_0, - - .mca_component_name = "smtp", - - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_close_component = smtp_close, - .mca_query_component = smtp_component_query, - .mca_register_component_params = smtp_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - }, -}; - -static int smtp_register(void) -{ - char version[256]; - - /* Server stuff */ - mca_notifier_smtp_component.server = strdup("localhost"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "server", - "SMTP server name or IP address", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.server); - - mca_notifier_smtp_component.port = 25; - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "port", - "SMTP server port", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.port); - - /* Email stuff */ - mca_notifier_smtp_component.to = NULL; - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "to", - "Comma-delimited list of email addresses to send to", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.to); - mca_notifier_smtp_component.from_addr = NULL; - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "from_addr", - "Email address that messages will be from", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.from_addr); - mca_notifier_smtp_component.from_name = strdup("ORTE Notifier"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "from_name", - "Email name that messages will be from", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.from_name); - mca_notifier_smtp_component.subject = strdup("ORTE Notifier"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "subject", - "Email subject", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.subject); - - /* Mail body prefix and suffix */ - mca_notifier_smtp_component.body_prefix = strdup("The ORTE SMTP notifier wishes to inform you of the following message:\n\n"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "body_prefix", - "Text to put at the beginning of the mail message", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.body_prefix); - mca_notifier_smtp_component.body_suffix = strdup("\n\nSincerely,\nOscar the ORTE Owl"); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "body_prefix", - "Text to put at the end of the mail message", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.body_suffix); - - /* Priority */ - mca_notifier_smtp_component.priority = 10; - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "priority", - "Priority of this component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.priority); - /* Libesmtp version */ - smtp_version(version, sizeof(version), 0); - version[sizeof(version) - 1] = '\0'; - mca_notifier_smtp_component.version = strdup(version); - (void) mca_base_component_var_register(&mca_notifier_smtp_component.super.base_version, "libesmtp_version", - "Version of libesmtp that this component is linked against", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_notifier_smtp_component.version); - - return ORTE_SUCCESS; -} - -static int smtp_close(void) -{ - return ORTE_SUCCESS; -} - -static int smtp_component_query(mca_base_module_t **module, - int *priority) -{ - *priority = 0; - *module = NULL; - - /* If there's no to or from, there's no love */ - if (NULL == mca_notifier_smtp_component.to || - '\0' == mca_notifier_smtp_component.to[0] || - NULL == mca_notifier_smtp_component.from_addr || - '\0' == mca_notifier_smtp_component.from_addr[0]) { - orte_show_help("help-orte-notifier-smtp.txt", - "to/from not specified", true); - return ORTE_ERR_NOT_FOUND; - } - - /* Sanity checks */ - if (NULL == mca_notifier_smtp_component.server || - '\0' == mca_notifier_smtp_component.server[0]) { - orte_show_help("help-orte-notifier-smtp.txt", - "server not specified", true); - return ORTE_ERR_NOT_FOUND; - } - - /* Since we have to open a socket later, try to resolve the IP - address of the server now. Save the result, or abort if we - can't resolve it. */ - mca_notifier_smtp_component.server_hostent = - gethostbyname(mca_notifier_smtp_component.server); - if (NULL == mca_notifier_smtp_component.server_hostent) { - orte_show_help("help-orte-notifier-smtp.txt", - "unable to resolve server", - true, mca_notifier_smtp_component.server); - return ORTE_ERR_NOT_FOUND; - } - - *priority = 10; - *module = (mca_base_module_t *)&orte_notifier_smtp_module; - return ORTE_SUCCESS; -} diff --git a/orte/mca/notifier/smtp/notifier_smtp_module.c b/orte/mca/notifier/smtp/notifier_smtp_module.c deleted file mode 100644 index 666fd080281..00000000000 --- a/orte/mca/notifier/smtp/notifier_smtp_module.c +++ /dev/null @@ -1,316 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * Send an email upon notifier events. - */ - -#include "orte_config.h" - -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "opal/util/show_help.h" -#include "opal/util/argv.h" - -#include "orte/constants.h" -#include "orte/mca/ess/ess.h" -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/notifier/base/base.h" - -#include "notifier_smtp.h" - - -/* Static API's */ -static void mylog(orte_notifier_base_severity_t severity, int errcode, - const char *msg, va_list ap); - -/* Module */ -orte_notifier_base_module_t orte_notifier_smtp_module = { - .log = mylog -}; - -typedef enum { - SENT_NONE, - SENT_HEADER, - SENT_BODY_PREFIX, - SENT_BODY, - SENT_BODY_SUFFIX, - SENT_ALL -} sent_flag_t; - -typedef struct { - sent_flag_t sent_flag; - char *msg; - char *prev_string; -} message_status_t; - -/* - * Convert lone \n's to \r\n - */ -static char *crnl(char *orig) -{ - int i, j, max, count; - char *str; - return strdup(orig); - - /* Count how much space we need */ - count = max = strlen(orig); - for (i = 0; i < max; ++i) { - if (orig[i] == '\n' && i > 0 && orig[i - 1] != '\r') { - ++count; - } - } - - /* Copy, changing \n to \r\n */ - str = malloc(count + 1); - for (j = i = 0; i < max; ++i) { - if (orig[i] == '\n' && i > 0 && orig[i - 1] != '\r') { - str[j++] = '\n'; - } - str[j++] = orig[i]; - } - str[j] = '\0'; - return str; -} - -/* - * Callback function invoked via smtp_start_session() - */ -static const char *message_cb(void **buf, int *len, void *arg) -{ - message_status_t *ms = (message_status_t*) arg; - - if (NULL == *buf) { - *buf = malloc(8192); - } - if (NULL == len) { - ms->sent_flag = SENT_NONE; - return NULL; - } - - /* Free the previous string */ - if (NULL != ms->prev_string) { - free(ms->prev_string); - ms->prev_string = NULL; - } - - switch (ms->sent_flag) { - case SENT_NONE: - /* Send a blank line to signify the end of the header */ - ms->sent_flag = SENT_HEADER; - ms->prev_string = NULL; - *len = 2; - return "\r\n"; - - case SENT_HEADER: - if (NULL != mca_notifier_smtp_component.body_prefix) { - ms->sent_flag = SENT_BODY_PREFIX; - ms->prev_string = crnl(mca_notifier_smtp_component.body_prefix); - *len = strlen(ms->prev_string); - return ms->prev_string; - } - - case SENT_BODY_PREFIX: - ms->sent_flag = SENT_BODY; - ms->prev_string = crnl(ms->msg); - *len = strlen(ms->prev_string); - return ms->prev_string; - - case SENT_BODY: - if (NULL != mca_notifier_smtp_component.body_suffix) { - ms->sent_flag = SENT_BODY_SUFFIX; - ms->prev_string = crnl(mca_notifier_smtp_component.body_suffix); - *len = strlen(ms->prev_string); - return ms->prev_string; - } - - case SENT_BODY_SUFFIX: - case SENT_ALL: - default: - ms->sent_flag = SENT_ALL; - *len = 0; - return NULL; - } -} - -/* - * Back-end function to actually send the email - */ -static int send_email(char *msg) -{ - int i, err = ORTE_SUCCESS; - char *str = NULL; - char *errmsg = NULL; - struct sigaction sig, oldsig; - bool set_oldsig = false; - smtp_session_t session = NULL; - smtp_message_t message = NULL; - message_status_t ms; - orte_notifier_smtp_component_t *c = &mca_notifier_smtp_component; - - if (NULL == c->to_argv) { - c->to_argv = opal_argv_split(c->to, ','); - if (NULL == c->to_argv || - NULL == c->to_argv[0]) { - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - - ms.sent_flag = SENT_NONE; - ms.prev_string = NULL; - ms.msg = msg; - - /* Temporarily disable SIGPIPE so that if remote servers timeout - or hang up on us, it doesn't kill this application. We'll - restore the original SIGPIPE handler when we're done. */ - sig.sa_handler = SIG_IGN; - sigemptyset(&sig.sa_mask); - sig.sa_flags = 0; - sigaction(SIGPIPE, &sig, &oldsig); - set_oldsig = true; - - /* Try to get a libesmtp session. If so, assume that libesmtp is - happy and proceeed */ - session = smtp_create_session(); - if (NULL == session) { - err = ORTE_ERR_NOT_SUPPORTED; - errmsg = "stmp_create_session"; - goto error; - } - - /* Create the message */ - message = smtp_add_message(session); - if (NULL == message) { - err = ORTE_ERROR; - errmsg = "stmp_add_message"; - goto error; - } - - /* Set the SMTP server (yes, it's a weird return status!) */ - asprintf(&str, "%s:%d", c->server, c->port); - if (0 == smtp_set_server(session, str)) { - err = ORTE_ERROR; - errmsg = "stmp_set_server"; - goto error; - } - free(str); - str = NULL; - - /* Add the sender */ - if (0 == smtp_set_reverse_path(message, c->from_addr)) { - err = ORTE_ERROR; - errmsg = "stmp_set_reverse_path"; - goto error; - } - - /* Set the subject and some headers */ - asprintf(&str, "Open MPI SMTP Notifier v%d.%d.%d", - c->super.base_version.mca_component_major_version, - c->super.base_version.mca_component_minor_version, - c->super.base_version.mca_component_release_version); - if (0 == smtp_set_header(message, "Subject", c->subject) || - 0 == smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1) || - 0 == smtp_set_header(message, "To", NULL, NULL) || - 0 == smtp_set_header(message, "From", - (NULL != c->from_name ? - c->from_name : c->from_addr), - c->from_addr) || - 0 == smtp_set_header(message, "X-Mailer", str) || - 0 == smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1)) { - err = ORTE_ERROR; - errmsg = "smtp_set_header"; - goto error; - } - free(str); - str = NULL; - - /* Add the recipients */ - for (i = 0; NULL != c->to_argv[i]; ++i) { - if (NULL == smtp_add_recipient(message, c->to_argv[i])) { - err = ORTE_ERR_OUT_OF_RESOURCE; - errmsg = "stmp_add_recipient"; - goto error; - } - } - - /* Set the callback to get the message */ - if (0 == smtp_set_messagecb(message, message_cb, &ms)) { - err = ORTE_ERROR; - errmsg = "smtp_set_messagecb"; - goto error; - } - - /* Send it! */ - if (0 == smtp_start_session(session)) { - err = ORTE_ERROR; - errmsg = "smtp_start_session"; - goto error; - } - - /* Fall through */ - - error: - if (NULL != str) { - free(str); - } - if (NULL != session) { - smtp_destroy_session(session); - } - /* Restore the SIGPIPE handler */ - if (set_oldsig) { - sigaction(SIGPIPE, &oldsig, NULL); - } - if (ORTE_SUCCESS != err) { - int e; - char em[256]; - - e = smtp_errno(); - smtp_strerror(e, em, sizeof(em)); - orte_show_help("help-orte-notifier-smtp.txt", - "send_email failed", - true, "libesmtp library call failed", - errmsg, em, e, msg); - } - return err; -} - -static void mylog(orte_notifier_base_severity_t severity, int errcode, - const char *msg, va_list ap) -{ - char *output; - - /* If there was a message, output it */ - vasprintf(&output, msg, ap); - - if (NULL != output) { - send_email(output); - free(output); - } -} diff --git a/orte/mca/notifier/syslog/Makefile.am b/orte/mca/notifier/syslog/Makefile.am deleted file mode 100644 index b4f57089f71..00000000000 --- a/orte/mca/notifier/syslog/Makefile.am +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - notifier_syslog.h \ - notifier_syslog_module.c \ - notifier_syslog_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_notifier_syslog_DSO -component_noinst = -component_install = mca_notifier_syslog.la -else -component_noinst = libmca_notifier_syslog.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_notifier_syslog_la_SOURCES = $(sources) -mca_notifier_syslog_la_LDFLAGS = -module -avoid-version -mca_notifier_syslog_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_notifier_syslog_la_SOURCES =$(sources) -libmca_notifier_syslog_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/notifier/syslog/configure.m4 b/orte/mca/notifier/syslog/configure.m4 deleted file mode 100644 index d9e54bb9425..00000000000 --- a/orte/mca/notifier/syslog/configure.m4 +++ /dev/null @@ -1,31 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2017 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_notifier_syslog_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_notifier_syslog_CONFIG], [ - AC_CONFIG_FILES([orte/mca/notifier/syslog/Makefile]) - - OPAL_VAR_SCOPE_PUSH([orte_notifier_syslog_happy]) - - # Per https://github.com/open-mpi/ompi/issues/4373 and - # https://github.com/open-mpi/ompi/pull/4374, we need to check - # that syslog.h is compilable. If syslog.h is not compilable, - # disable this component. - AC_CHECK_HEADER([syslog.h], - [orte_notifier_syslog_happy=1], - [orte_notifier_syslog_happy=0]) - - AS_IF([test $orte_notifier_syslog_happy -eq 1], - [$1], - [$2]) - - OPAL_VAR_SCOPE_POP -])dnl diff --git a/orte/mca/notifier/syslog/notifier_syslog.h b/orte/mca/notifier/syslog/notifier_syslog.h deleted file mode 100644 index a78bb915b78..00000000000 --- a/orte/mca/notifier/syslog/notifier_syslog.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef NOTIFIER_SYSLOG_H -#define NOTIFIER_SYSLOG_H - -#include "orte_config.h" - -#include "orte/mca/notifier/notifier.h" - -BEGIN_C_DECLS - -/* - * Notifier interfaces - */ - -ORTE_MODULE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_syslog_component; -extern orte_notifier_base_module_t orte_notifier_syslog_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/notifier/syslog/notifier_syslog_component.c b/orte/mca/notifier/syslog/notifier_syslog_component.c deleted file mode 100644 index 99085338166..00000000000 --- a/orte/mca/notifier/syslog/notifier_syslog_component.c +++ /dev/null @@ -1,60 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ -*/ - -/* - * includes - */ -#include "orte_config.h" -#include "orte/constants.h" - -#include "notifier_syslog.h" - - -static int orte_notifier_syslog_component_query(mca_base_module_t **module, - int *priority); - -/* - * Struct of function pointers that need to be initialized - */ -orte_notifier_base_component_t mca_notifier_syslog_component = { - .base_version = { - ORTE_NOTIFIER_BASE_VERSION_1_0_0, - - .mca_component_name = "syslog", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = orte_notifier_syslog_component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int orte_notifier_syslog_component_query(mca_base_module_t **module, - int *priority) -{ - *priority = 1; - *module = (mca_base_module_t *)&orte_notifier_syslog_module; - return ORTE_SUCCESS; -} diff --git a/orte/mca/notifier/syslog/notifier_syslog_module.c b/orte/mca/notifier/syslog/notifier_syslog_module.c deleted file mode 100644 index d488ca392f0..00000000000 --- a/orte/mca/notifier/syslog/notifier_syslog_module.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ -#ifdef HAVE_SYSLOG_H -#include -#endif -#include - -#include "opal/util/show_help.h" - -#include "orte/util/error_strings.h" -#include "orte/util/name_fns.h" - -#include "orte/mca/notifier/base/base.h" -#include "notifier_syslog.h" - - -/* Static API's */ -static int init(void); -static void finalize(void); -static void mylog(orte_notifier_request_t *req); -static void myevent(orte_notifier_request_t *req); -static void myreport(orte_notifier_request_t *req); - -/* Module def */ -orte_notifier_base_module_t orte_notifier_syslog_module = { - .init = init, - .finalize = finalize, - .log = mylog, - .event = myevent, - .report = myreport -}; - - -static int init(void) -{ - int opts; - - opts = LOG_CONS | LOG_PID; - openlog("OpenRTE Error Report:", opts, LOG_USER); - - return ORTE_SUCCESS; -} - -static void finalize(void) -{ - closelog(); -} - -static void mylog(orte_notifier_request_t *req) -{ - char tod[48]; - - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:syslog:mylog function called with severity %d errcode %d and messg %s", - (int)req->severity, req->errcode, req->msg); - /* If there was a message, output it */ - (void)ctime_r(&req->t, tod); - /* trim the newline */ - tod[strlen(tod)] = '\0'; - - syslog(req->severity, "[%s]%s %s: JOBID %s REPORTS ERROR %s: %s", tod, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_notifier_base_sev2str(req->severity), - ORTE_JOBID_PRINT((NULL == req->jdata) ? - ORTE_JOBID_INVALID : req->jdata->jobid), - orte_job_state_to_str(req->state), - (NULL == req->msg) ? "" : req->msg); -} - -static void myevent(orte_notifier_request_t *req) -{ - char tod[48]; - - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:syslog:myevent function called with severity %d and messg %s", - (int)req->severity, req->msg); - /* If there was a message, output it */ - (void)ctime_r(&req->t, tod); - /* trim the newline */ - tod[strlen(tod)] = '\0'; - - syslog(req->severity, "[%s]%s %s SYSTEM EVENT : %s", tod, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_notifier_base_sev2str(req->severity), - (NULL == req->msg) ? "" : req->msg); -} - -static void myreport(orte_notifier_request_t *req) -{ - char tod[48]; - - opal_output_verbose(5, orte_notifier_base_framework.framework_output, - "notifier:syslog:myreport function called with severity %d state %s and messg %s", - (int)req->severity, orte_job_state_to_str(req->state), - req->msg); - /* If there was a message, output it */ - (void)ctime_r(&req->t, tod); - /* trim the newline */ - tod[strlen(tod)] = '\0'; - - syslog(req->severity, "[%s]%s JOBID %s REPORTS STATE %s: %s", tod, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT((NULL == req->jdata) ? - ORTE_JOBID_INVALID : req->jdata->jobid), - orte_job_state_to_str(req->state), - (NULL == req->msg) ? "" : req->msg); -} diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index d99bd177f57..8d25e798518 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -13,7 +14,7 @@ * Copyright (c) 2007 Evergrid, Inc. All rights reserved. * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Rutgers, The State University of New Jersey. @@ -109,6 +110,7 @@ #include #endif +#include #include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" @@ -289,59 +291,40 @@ static void send_error_show_help(int fd, int exit_status, exit(exit_status); } -static int close_open_file_descriptors(int write_fd, orte_iof_base_io_conf_t opts) -{ - int rc, fd; - DIR *dir = NULL; - struct dirent *files; - int app_alps_filedes[2], alps_app_filedes[2]; - - dir = opendir("/proc/self/fd"); +static int close_open_file_descriptors(int write_fd, + orte_iof_base_io_conf_t opts) { + DIR *dir = opendir("/proc/self/fd"); if (NULL == dir) { return ORTE_ERR_FILE_OPEN_FAILURE; } + struct dirent *files; - /* close all file descriptors w/ exception of stdin/stdout/stderr, - the pipe used for the IOF INTERNAL messages, and the pipe up to - the parent. Be careful to retain all of the pipe fd's set up - by the apshephered. These are needed for obtaining RDMA credentials, - synchronizing with aprun, etc. */ - - rc = alps_app_lli_pipes(app_alps_filedes,alps_app_filedes); - if (0 != rc) { - closedir(dir); + /* grab the fd of the opendir above so we don't close in the + * middle of the scan. */ + int dir_scan_fd = dirfd(dir); + if(dir_scan_fd < 0 ) { return ORTE_ERR_FILE_OPEN_FAILURE; } - while ((files = readdir(dir)) != NULL) { - if(!strncmp(files->d_name,".",1) || !strncmp(files->d_name,"..",2)) continue; - fd = strtoul(files->d_name, NULL, 10); - if (EINVAL == errno || ERANGE == errno) { + while (NULL != (files = readdir(dir))) { + if (!isdigit(files->d_name[0])) { + continue; + } + int fd = strtol(files->d_name, NULL, 10); + if (errno == EINVAL || errno == ERANGE) { closedir(dir); return ORTE_ERR_TYPE_MISMATCH; } - - /* - * skip over the pipes we have open to apshepherd or slurmd - */ - - if (fd == XTAPI_FD_IDENTITY) continue; - if (fd == XTAPI_FD_RESILIENCY) continue; - if ((fd == app_alps_filedes[0]) || - (fd == app_alps_filedes[1]) || - (fd == alps_app_filedes[0]) || - (fd == alps_app_filedes[1])) continue; - if (fd >=3 && #if OPAL_PMIX_V1 fd != opts.p_internal[1] && #endif - fd != write_fd) { - close(fd); + fd != write_fd && + fd != dir_scan_fd) { + close(fd); } } - closedir(dir); return ORTE_SUCCESS; } @@ -368,14 +351,18 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) always outputs a nice, single message indicating what happened */ - if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&cd->opts, &cd->env))) { - ORTE_ERROR_LOG(i); - send_error_show_help(write_fd, 1, - "help-orte-odls-alps.txt", - "iof setup failed", - orte_process_info.nodename, cd->app->app); - /* Does not return */ - } + + if (ORTE_FLAG_TEST(cd->jdata, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { + if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&cd->opts, &cd->env))) { + ORTE_ERROR_LOG(i); + send_error_show_help(write_fd, 1, + "help-orte-odls-alps.txt", + "iof setup failed", + orte_process_info.nodename, cd->app->app); + /* Does not return */ + } + } + /* now set any child-level controls such as binding */ orte_rtc.set(cd->jdata, cd->child, &cd->env, write_fd); diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index f98dd7468b0..69974bb5790 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -13,7 +13,7 @@ * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2011-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -75,7 +75,6 @@ #include "orte/mca/schizo/schizo.h" #include "orte/mca/state/state.h" #include "orte/mca/filem/filem.h" -#include "orte/mca/dfs/dfs.h" #include "orte/util/context_fns.h" #include "orte/util/name_fns.h" @@ -446,13 +445,6 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, return ORTE_SUCCESS; } -static void fm_release(void *cbdata) -{ - opal_buffer_t *bptr = (opal_buffer_t*)cbdata; - - OBJ_RELEASE(bptr); -} - static void ls_cbunc(int status, void *cbdata) { opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; @@ -606,17 +598,17 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } } - /* extract the ppn regex */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ppn, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto REPORT_ERROR; - } - /* if the job is fully described, then mpirun will have computed * and sent us the complete array of procs in the orte_job_t, so we * don't need to do anything more here */ if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + /* extract the ppn regex */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ppn, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + if (!ORTE_PROC_IS_HNP) { /* populate the node array of the job map and the proc array of * the job object so we know how many procs are on each node */ @@ -632,21 +624,20 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, goto REPORT_ERROR; } } + free(ppn); + /* compute the ranks and add the proc objects * to the jdata->procs array */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { ORTE_ERROR_LOG(rc); - free(ppn); goto REPORT_ERROR; } /* and finally, compute the local and node ranks */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { ORTE_ERROR_LOG(rc); - free(ppn); goto REPORT_ERROR; } } - free(ppn); /* unpack the buffer containing any application setup info - there * might not be any, so it isn't an error if we don't find things */ @@ -810,15 +801,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, lock.active = false; // we won't get a callback } - /* if we have a file map, then we need to load it */ - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FILE_MAPS, (void**)&bptr, OPAL_BUFFER)) { - if (NULL != orte_dfs.load_file_maps) { - orte_dfs.load_file_maps(jdata->jobid, bptr, fm_release, bptr); - } else { - OBJ_RELEASE(bptr); - } - } - /* load any controls into the job */ orte_rtc.assign(jdata); @@ -1785,7 +1767,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, orte_proc_t *child; opal_list_t procs_killed; orte_proc_t *proc, proctmp; - int i, j; + int i, j, ret; opal_pointer_array_t procarray, *procptr; bool do_cleanup; orte_odls_quick_caddy_t *cd; @@ -1931,7 +1913,17 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, /* if we are issuing signals, then we need to wait a little * and send the next in sequence */ if (0 < opal_list_get_size(&procs_killed)) { - sleep(orte_odls_globals.timeout_before_sigkill); + /* Wait a little. Do so in a loop since sleep() can be interrupted by a + * signal. Most likely SIGCHLD in this case */ + ret = orte_odls_globals.timeout_before_sigkill; + while( ret > 0 ) { + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, + "%s Sleep %d sec (total = %d)", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ret, orte_odls_globals.timeout_before_sigkill)); + ret = sleep(ret); + } + /* issue a SIGTERM to all */ OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) { OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, @@ -1940,8 +1932,18 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, ORTE_NAME_PRINT(&cd->child->name))); kill_local(cd->child->pid, SIGTERM); } - /* wait a little again */ - sleep(orte_odls_globals.timeout_before_sigkill); + + /* Wait a little. Do so in a loop since sleep() can be interrupted by a + * signal. Most likely SIGCHLD in this case */ + ret = orte_odls_globals.timeout_before_sigkill; + while( ret > 0 ) { + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, + "%s Sleep %d sec (total = %d)", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ret, orte_odls_globals.timeout_before_sigkill)); + ret = sleep(ret); + } + /* issue a SIGKILL to all */ OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) { OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, diff --git a/orte/mca/odls/base/odls_base_frame.c b/orte/mca/odls/base/odls_base_frame.c index f4beb709b18..48c7702e60f 100644 --- a/orte/mca/odls/base/odls_base_frame.c +++ b/orte/mca/odls/base/odls_base_frame.c @@ -28,6 +28,7 @@ #include "orte/constants.h" #include +#include #include "opal/class/opal_ring_buffer.h" #include "orte/mca/mca.h" @@ -225,6 +226,7 @@ static int orte_odls_base_open(mca_base_open_flag_t flags) int rc, i, rank; orte_namelist_t *nm; bool xterm_hold; + sigset_t unblock; ORTE_CONSTRUCT_LOCK(&orte_odls_globals.lock); orte_odls_globals.lock.active = false; // start with nobody having the thread @@ -243,6 +245,17 @@ static int orte_odls_base_open(mca_base_open_flag_t flags) OBJ_CONSTRUCT(&orte_odls_globals.xterm_ranks, opal_list_t); orte_odls_globals.xtermcmd = NULL; + /* ensure that SIGCHLD is unblocked as we need to capture it */ + if (0 != sigemptyset(&unblock)) { + return ORTE_ERROR; + } + if (0 != sigaddset(&unblock, SIGCHLD)) { + return ORTE_ERROR; + } + if (0 != sigprocmask(SIG_UNBLOCK, &unblock, NULL)) { + return ORTE_ERR_NOT_SUPPORTED; + } + /* check if the user requested that we display output in xterms */ if (NULL != orte_xterm) { /* construct a list of ranks to be displayed */ diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index b9c6f665ce3..ab9d6e442f6 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -302,6 +302,15 @@ static int close_open_file_descriptors(int write_fd, return ORTE_ERR_FILE_OPEN_FAILURE; } struct dirent *files; + + /* grab the fd of the opendir above so we don't close in the + * middle of the scan. */ + int dir_scan_fd = dirfd(dir); + if(dir_scan_fd < 0 ) { + return ORTE_ERR_FILE_OPEN_FAILURE; + } + + while (NULL != (files = readdir(dir))) { if (!isdigit(files->d_name[0])) { continue; @@ -315,7 +324,8 @@ static int close_open_file_descriptors(int write_fd, #if OPAL_PMIX_V1 fd != opts.p_internal[1] && #endif - fd != write_fd) { + fd != write_fd && + fd != dir_scan_fd) { close(fd); } } diff --git a/orte/mca/odls/pspawn/odls_pspawn.c b/orte/mca/odls/pspawn/odls_pspawn.c index 537f1a70ea0..7909d210c6c 100644 --- a/orte/mca/odls/pspawn/odls_pspawn.c +++ b/orte/mca/odls/pspawn/odls_pspawn.c @@ -231,6 +231,14 @@ static int close_open_file_descriptors(posix_spawn_file_actions_t *factions) return ORTE_ERR_FILE_OPEN_FAILURE; } struct dirent *files; + + /* grab the fd of the opendir above so we don't close in the + * middle of the scan. */ + int dir_scan_fd = dirfd(dir); + if(dir_scan_fd < 0 ) { + return ORTE_ERR_FILE_OPEN_FAILURE; + } + while (NULL != (files = readdir(dir))) { if (!isdigit(files->d_name[0])) { continue; @@ -240,7 +248,7 @@ static int close_open_file_descriptors(posix_spawn_file_actions_t *factions) closedir(dir); return ORTE_ERR_TYPE_MISMATCH; } - if (fd >=3) { + if (fd >=3 && fd != dir_scan_fd) { posix_spawn_file_actions_addclose(factions, fd); } } diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index 7dfe4b0adfc..51c98f8de45 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -55,14 +55,12 @@ BEGIN_C_DECLS * Convenience Typedef */ typedef struct { - opal_event_base_t *ev_base; char *include; char *exclude; opal_list_t components; opal_list_t actives; int max_uri_length; opal_hash_table_t peers; - int num_threads; #if OPAL_ENABLE_TIMING bool timing; #endif @@ -121,7 +119,7 @@ ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata); __FILE__, __LINE__); \ cd = OBJ_NEW(orte_oob_send_t); \ cd->msg = (m); \ - ORTE_THREADSHIFT(cd, orte_oob_base.ev_base, \ + ORTE_THREADSHIFT(cd, orte_event_base, \ orte_oob_base_send_nb, ORTE_MSG_PRI); \ }while(0) diff --git a/orte/mca/oob/base/oob_base_frame.c b/orte/mca/oob/base/oob_base_frame.c index be5c745e507..deaf851a90c 100644 --- a/orte/mca/oob/base/oob_base_frame.c +++ b/orte/mca/oob/base/oob_base_frame.c @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,14 +55,6 @@ orte_oob_base_t orte_oob_base = {0}; static int orte_oob_base_register(mca_base_register_flag_t flags) { - orte_oob_base.num_threads = 0; - (void)mca_base_var_register("orte", "oob", "base", "num_progress_threads", - "Number of independent progress OOB messages for each interface", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_oob_base.num_threads); - #if OPAL_ENABLE_TIMING /* Detailed timing setup */ orte_oob_base.timing = false; @@ -91,10 +83,6 @@ static int orte_oob_base_close(void) OBJ_RELEASE(cli); } - if (!ORTE_PROC_IS_APP && !ORTE_PROC_IS_TOOL) { - opal_progress_thread_finalize("OOB-BASE"); - } - /* destruct our internal lists */ OBJ_DESTRUCT(&orte_oob_base.actives); @@ -122,13 +110,6 @@ static int orte_oob_base_open(mca_base_open_flag_t flags) opal_hash_table_init(&orte_oob_base.peers, 128); OBJ_CONSTRUCT(&orte_oob_base.actives, opal_list_t); - if (ORTE_PROC_IS_APP || ORTE_PROC_IS_TOOL) { - orte_oob_base.ev_base = orte_event_base; - } else { - orte_oob_base.ev_base = opal_progress_thread_init("OOB-BASE"); - } - - #if OPAL_ENABLE_FT_CR == 1 /* register the FT events callback */ orte_state.add_job_state(ORTE_JOB_STATE_FT_CHECKPOINT, orte_oob_base_ft_event, ORTE_ERROR_PRI); diff --git a/orte/mca/oob/tcp/help-oob-tcp.txt b/orte/mca/oob/tcp/help-oob-tcp.txt index e5562ac4708..8af0589032f 100644 --- a/orte/mca/oob/tcp/help-oob-tcp.txt +++ b/orte/mca/oob/tcp/help-oob-tcp.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved # $COPYRIGHT$ # @@ -132,3 +132,7 @@ up aborting your job. Peer host: %s Peer process name: %s Peer Open MPI version: %s +# +[no-listeners] +No sockets were able to be opened on the available protocols +(IPv4 and/or IPv6). Please check your network and retry. diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index d5f5ce9c55d..df97be168a7 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -141,12 +141,6 @@ static void ping(const orte_process_name_t *proc) return; } - /* has this peer had a progress thread assigned yet? */ - if (NULL == peer->ev_base) { - /* nope - assign one */ - ORTE_OOB_TCP_NEXT_BASE(peer); - } - /* if we are already connected, there is nothing to do */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, @@ -204,11 +198,7 @@ static void send_nb(orte_rml_send_t *msg) __FILE__, __LINE__, ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num, ORTE_NAME_PRINT(&peer->name)); - /* has this peer had a progress thread assigned yet? */ - if (NULL == peer->ev_base) { - /* nope - assign one */ - ORTE_OOB_TCP_NEXT_BASE(peer); - } + /* add the msg to the hop's send queue */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 2843ce9cd3c..4398b9bd483 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -147,12 +147,8 @@ mca_oob_tcp_component_t mca_oob_tcp_component = { */ static int tcp_component_open(void) { - mca_oob_tcp_component.next_base = 0; OBJ_CONSTRUCT(&mca_oob_tcp_component.peers, opal_hash_table_t); opal_hash_table_init(&mca_oob_tcp_component.peers, 32); - OBJ_CONSTRUCT(&mca_oob_tcp_component.ev_bases, opal_pointer_array_t); - opal_pointer_array_init(&mca_oob_tcp_component.ev_bases, - orte_oob_base.num_threads, 256, 8); OBJ_CONSTRUCT(&mca_oob_tcp_component.listeners, opal_list_t); if (ORTE_PROC_IS_HNP) { @@ -188,9 +184,6 @@ static int tcp_component_open(void) */ static int tcp_component_close(void) { - /* cleanup listen event list */ - OPAL_LIST_DESTRUCT(&mca_oob_tcp_component.listeners); - OBJ_DESTRUCT(&mca_oob_tcp_component.peers); if (NULL != mca_oob_tcp_component.ipv4conns) { @@ -209,8 +202,6 @@ static int tcp_component_close(void) } #endif - OBJ_DESTRUCT(&mca_oob_tcp_component.ev_bases); - return ORTE_SUCCESS; } static char *static_port_string; @@ -667,27 +658,11 @@ static orte_rml_pathway_t* component_query_transports(void) static int component_startup(void) { int rc = ORTE_SUCCESS; - int i; - char *tmp; - opal_event_base_t *evb; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP STARTUP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* initialize state */ - if (0 == orte_oob_base.num_threads) { - opal_pointer_array_add(&mca_oob_tcp_component.ev_bases, orte_oob_base.ev_base); - } else { - for (i=0; i < orte_oob_base.num_threads; i++) { - asprintf(&tmp, "OOB-TCP-%d", i); - evb = opal_progress_thread_init(tmp); - opal_pointer_array_add(&mca_oob_tcp_component.ev_bases, evb); - opal_argv_append_nosize(&mca_oob_tcp_component.ev_threads, tmp); - free(tmp); - } - } - /* if we are a daemon/HNP, or we are a standalone app, * then it is possible that someone else may initiate a * connection to us. In these cases, we need to start the @@ -715,14 +690,6 @@ static void component_shutdown(void) "%s TCP SHUTDOWN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - if (0 < orte_oob_base.num_threads) { - for (i=0; i < orte_oob_base.num_threads; i++) { - opal_progress_thread_finalize(mca_oob_tcp_component.ev_threads[i]); - opal_pointer_array_set_item(&mca_oob_tcp_component.ev_bases, i, NULL); - } - opal_argv_free(mca_oob_tcp_component.ev_threads); - } - if (ORTE_PROC_IS_HNP && mca_oob_tcp_component.listen_thread_active) { mca_oob_tcp_component.listen_thread_active = false; /* tell the thread to exit */ @@ -748,6 +715,9 @@ static void component_shutdown(void) (void **) &peer, node, &node); } + /* cleanup listen event list */ + OPAL_LIST_DESTRUCT(&mca_oob_tcp_component.listeners); + opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN done", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -1366,7 +1336,6 @@ static char **split_and_resolve(char **orig_str, char *name) static void peer_cons(mca_oob_tcp_peer_t *peer) { - peer->ev_base = NULL; peer->auth_method = NULL; peer->sd = -1; OBJ_CONSTRUCT(&peer->addrs, opal_list_t); diff --git a/orte/mca/oob/tcp/oob_tcp_component.h b/orte/mca/oob/tcp/oob_tcp_component.h index 37f91466f0e..7a36ea3c30b 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.h +++ b/orte/mca/oob/tcp/oob_tcp_component.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,9 +48,6 @@ typedef struct { int max_retries; /**< max number of retries before declaring peer gone */ opal_list_t events; /**< events for monitoring connections */ int peer_limit; /**< max size of tcp peer cache */ - opal_pointer_array_t ev_bases; // event base array for progress threads - char** ev_threads; // event progress thread names - int next_base; // counter to load-level thread use opal_hash_table_t peers; // connection addresses for peers /* Port specifications */ @@ -96,13 +93,4 @@ ORTE_MODULE_DECLSPEC void mca_oob_tcp_component_failed_to_connect(int fd, short ORTE_MODULE_DECLSPEC void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata); ORTE_MODULE_DECLSPEC void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata); -#define ORTE_OOB_TCP_NEXT_BASE(p) \ - do { \ - ++mca_oob_tcp_component.next_base; \ - if (orte_oob_base.num_threads <= mca_oob_tcp_component.next_base) { \ - mca_oob_tcp_component.next_base = 0; \ - } \ - (p)->ev_base = (opal_event_base_t*)opal_pointer_array_get_item(&mca_oob_tcp_component.ev_bases, mca_oob_tcp_component.next_base); \ - } while(0) - #endif /* _MCA_OOB_TCP_COMPONENT_H_ */ diff --git a/orte/mca/oob/tcp/oob_tcp_connection.c b/orte/mca/oob/tcp/oob_tcp_connection.c index ff06ec8a97d..0358ba487b3 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.c +++ b/orte/mca/oob/tcp/oob_tcp_connection.c @@ -14,8 +14,8 @@ * Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ * @@ -83,6 +83,14 @@ #include "oob_tcp_common.h" #include "oob_tcp_connection.h" +#define OOB_TCP_STR_EXPAND(tok) #tok +#define OOB_TCP_STR(tok) OOB_TCP_STR_EXPAND(tok) + +/* + * See discussion at https://github.com/open-mpi/ompi/pull/6157 + */ +static char * oob_tcp_version_string = "4.0.0"; + static void tcp_peer_event_init(mca_oob_tcp_peer_t* peer); static int tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer); static int tcp_peer_send_connect_nack(int sd, orte_process_name_t name); @@ -417,7 +425,7 @@ static int tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer) memset(hdr.routed, 0, ORTE_MAX_RTD_SIZE+1); /* payload size */ - sdsize = sizeof(ack_flag) + strlen(orte_version_string) + 1; + sdsize = sizeof(ack_flag) + strlen(oob_tcp_version_string) + 1; hdr.nbytes = sdsize; MCA_OOB_TCP_HDR_HTON(&hdr); @@ -433,8 +441,8 @@ static int tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer) offset += sizeof(hdr); memcpy(msg + offset, &ack_flag, sizeof(ack_flag)); offset += sizeof(ack_flag); - memcpy(msg + offset, orte_version_string, strlen(orte_version_string)); - offset += strlen(orte_version_string)+1; + memcpy(msg + offset, oob_tcp_version_string, strlen(oob_tcp_version_string)); + offset += strlen(oob_tcp_version_string)+1; /* send it */ if (ORTE_SUCCESS != tcp_peer_send_blocking(peer->sd, msg, sdsize)) { @@ -507,10 +515,7 @@ static void tcp_peer_event_init(mca_oob_tcp_peer_t* peer) { if (peer->sd >= 0) { assert(!peer->send_ev_active && !peer->recv_ev_active); - if (NULL == peer->ev_base) { - ORTE_OOB_TCP_NEXT_BASE(peer); - } - opal_event_set(peer->ev_base, + opal_event_set(orte_event_base, &peer->recv_event, peer->sd, OPAL_EV_READ|OPAL_EV_PERSIST, @@ -522,7 +527,7 @@ static void tcp_peer_event_init(mca_oob_tcp_peer_t* peer) peer->recv_ev_active = false; } - opal_event_set(peer->ev_base, + opal_event_set(orte_event_base, &peer->send_event, peer->sd, OPAL_EV_WRITE|OPAL_EV_PERSIST, @@ -803,7 +808,6 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); peer = OBJ_NEW(mca_oob_tcp_peer_t); peer->name = hdr.origin; - ORTE_OOB_TCP_NEXT_BASE(peer); // assign it an event base peer->state = MCA_OOB_TCP_ACCEPTING; ui64 = (uint64_t*)(&peer->name); if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(&mca_oob_tcp_component.peers, (*ui64), peer)) { @@ -905,12 +909,12 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, /* check that this is from a matching version */ version = (char*)((char*)msg + offset); offset += strlen(version) + 1; - if (0 != strcmp(version, orte_version_string)) { + if (0 != strcmp(version, oob_tcp_version_string)) { opal_show_help("help-oob-tcp.txt", "version mismatch", true, opal_process_info.nodename, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_version_string, + oob_tcp_version_string, opal_fd_get_peer_name(peer->sd), ORTE_NAME_PRINT(&(peer->name)), version); diff --git a/orte/mca/oob/tcp/oob_tcp_connection.h b/orte/mca/oob/tcp/oob_tcp_connection.h index e1392fe781c..0cac37d8da9 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.h +++ b/orte/mca/oob/tcp/oob_tcp_connection.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,14 +60,14 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t); ORTE_NAME_PRINT((&(p)->name))); \ cop = OBJ_NEW(mca_oob_tcp_conn_op_t); \ cop->peer = (p); \ - ORTE_THREADSHIFT(cop, (p)->ev_base, (cbfunc), ORTE_MSG_PRI); \ + ORTE_THREADSHIFT(cop, orte_event_base, (cbfunc), ORTE_MSG_PRI); \ } while(0); #define ORTE_ACTIVATE_TCP_ACCEPT_STATE(s, a, cbfunc) \ do { \ mca_oob_tcp_conn_op_t *cop; \ cop = OBJ_NEW(mca_oob_tcp_conn_op_t); \ - opal_event_set(orte_oob_base.ev_base, &cop->ev, s, \ + opal_event_set(orte_event_base, &cop->ev, s, \ OPAL_EV_READ, (cbfunc), cop); \ opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \ ORTE_POST_OBJECT(cop); \ @@ -84,7 +84,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t); ORTE_NAME_PRINT((&(p)->name))); \ cop = OBJ_NEW(mca_oob_tcp_conn_op_t); \ cop->peer = (p); \ - opal_event_evtimer_set((p)->ev_base, \ + opal_event_evtimer_set(orte_event_base, \ &cop->ev, \ (cbfunc), cop); \ ORTE_POST_OBJECT(cop); \ diff --git a/orte/mca/oob/tcp/oob_tcp_listener.c b/orte/mca/oob/tcp/oob_tcp_listener.c index e97103c07f1..41c4aeb51c3 100644 --- a/orte/mca/oob/tcp/oob_tcp_listener.c +++ b/orte/mca/oob/tcp/oob_tcp_listener.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -98,7 +98,7 @@ static void connection_event_handler(int sd, short flags, void* cbdata); */ int orte_oob_tcp_start_listening(void) { - int rc; + int rc = ORTE_SUCCESS, rc2 = ORTE_SUCCESS; mca_oob_tcp_listener_t *listener; /* if we don't have any TCP interfaces, we shouldn't be here */ @@ -112,19 +112,19 @@ int orte_oob_tcp_start_listening(void) } /* create listen socket(s) for incoming connection attempts */ - if (ORTE_SUCCESS != (rc = create_listen())) { - ORTE_ERROR_LOG(rc); - return rc; - } + rc = create_listen(); #if OPAL_ENABLE_IPV6 /* create listen socket(s) for incoming connection attempts */ - if (ORTE_SUCCESS != (rc = create_listen6())) { - ORTE_ERROR_LOG(rc); - return rc; - } + rc2 = create_listen6(); #endif + if (ORTE_SUCCESS != rc && ORTE_SUCCESS != rc2) { + /* we were unable to open any listening sockets */ + opal_show_help("help-oob-tcp.txt", "no-listeners", true); + return ORTE_ERR_FATAL; + } + /* if I am the HNP, start a listening thread so we can * harvest connection requests as rapidly as possible */ @@ -157,7 +157,7 @@ int orte_oob_tcp_start_listening(void) /* otherwise, setup to listen via the event lib */ OPAL_LIST_FOREACH(listener, &mca_oob_tcp_component.listeners, mca_oob_tcp_listener_t) { listener->ev_active = true; - opal_event_set(orte_oob_base.ev_base, &listener->event, + opal_event_set(orte_event_base, &listener->event, listener->sd, OPAL_EV_READ|OPAL_EV_PERSIST, connection_event_handler, @@ -744,7 +744,7 @@ static void* listen_thread(opal_object_t *obj) * OS might start rejecting connections due to timeout. */ pending_connection = OBJ_NEW(mca_oob_tcp_pending_connection_t); - opal_event_set(orte_oob_base.ev_base, &pending_connection->ev, -1, + opal_event_set(orte_event_base, &pending_connection->ev, -1, OPAL_EV_WRITE, connection_handler, pending_connection); opal_event_set_priority(&pending_connection->ev, ORTE_MSG_PRI); pending_connection->fd = accept(sd, diff --git a/orte/mca/oob/tcp/oob_tcp_peer.h b/orte/mca/oob/tcp/oob_tcp_peer.h index 8d04fd44387..395e4f8b5fc 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.h +++ b/orte/mca/oob/tcp/oob_tcp_peer.h @@ -52,7 +52,6 @@ typedef struct { mca_oob_tcp_addr_t *active_addr; mca_oob_tcp_state_t state; int num_retries; - opal_event_base_t *ev_base; // progress thread this peer is assigned to opal_event_t send_event; /**< registration with event thread for send events */ bool send_ev_active; opal_event_t recv_event; /**< registration with event thread for recv events */ @@ -88,7 +87,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_peer_op_t); if (NULL != proxy) { \ pop->rtmod = strdup(proxy); \ } \ - ORTE_THREADSHIFT(pop, orte_oob_base.ev_base, \ + ORTE_THREADSHIFT(pop, orte_event_base, \ (cbfunc), ORTE_MSG_PRI); \ } while(0); diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.h b/orte/mca/oob/tcp/oob_tcp_sendrecv.h index 9412a4e0fd6..3ab8f479343 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.h @@ -82,7 +82,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); do { \ (s)->peer = (struct mca_oob_tcp_peer_t*)(p); \ (s)->activate = (f); \ - ORTE_THREADSHIFT((s), (p)->ev_base, \ + ORTE_THREADSHIFT((s), orte_event_base, \ mca_oob_tcp_queue_msg, ORTE_MSG_PRI); \ } while(0) @@ -235,7 +235,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_op_t); ORTE_NAME_PRINT(&((ms)->dst))); \ mop = OBJ_NEW(mca_oob_tcp_msg_op_t); \ mop->msg = (ms); \ - ORTE_THREADSHIFT(mop, (ms)->peer->ev_base, \ + ORTE_THREADSHIFT(mop, orte_event_base, \ (cbfunc), ORTE_MSG_PRI); \ } while(0); @@ -281,7 +281,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_error_t); mop->hop.jobid = (h)->jobid; \ mop->hop.vpid = (h)->vpid; \ /* this goes to the OOB framework, so use that event base */ \ - ORTE_THREADSHIFT(mop, orte_oob_base.ev_base, \ + ORTE_THREADSHIFT(mop, orte_event_base, \ (cbfunc), ORTE_MSG_PRI); \ } while(0) @@ -299,7 +299,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_error_t); mop->hop.vpid = (h)->vpid; \ /* this goes to the component, so use the framework \ * event base */ \ - ORTE_THREADSHIFT(mop, orte_oob_base.ev_base, \ + ORTE_THREADSHIFT(mop, orte_event_base, \ (c), ORTE_MSG_PRI); \ } while(0) diff --git a/orte/mca/plm/rsh/plm_rsh.h b/orte/mca/plm/rsh/plm_rsh.h index c523b99c55a..1b858e89210 100644 --- a/orte/mca/plm/rsh/plm_rsh.h +++ b/orte/mca/plm/rsh/plm_rsh.h @@ -12,7 +12,7 @@ * Copyright (c) 2011 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2011 IBM Corporation. All rights reserved. + * Copyright (c) 2011-2019 IBM Corporation. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * @@ -65,6 +65,7 @@ struct orte_plm_rsh_component_t { bool pass_environ_mca_params; char *ssh_args; char *pass_libpath; + char *chdir; }; typedef struct orte_plm_rsh_component_t orte_plm_rsh_component_t; diff --git a/orte/mca/plm/rsh/plm_rsh_component.c b/orte/mca/plm/rsh/plm_rsh_component.c index bb87725befa..c78f0c0251b 100644 --- a/orte/mca/plm/rsh/plm_rsh_component.c +++ b/orte/mca/plm/rsh/plm_rsh_component.c @@ -16,7 +16,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights * reserved. * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 IBM Corporation. All rights reserved. + * Copyright (c) 2011-2019 IBM Corporation. All rights reserved. * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * @@ -222,6 +222,14 @@ static int rsh_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_plm_rsh_component.pass_libpath); + mca_plm_rsh_component.chdir = NULL; + (void) mca_base_component_var_register (c, "chdir", + "Change working directory after rsh/ssh, but before exec of orted", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_2, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_plm_rsh_component.chdir); + return ORTE_SUCCESS; } diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index 7e34de9ecfe..0a795f077d1 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2011-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2011-2019 IBM Corporation. All rights reserved. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -343,11 +343,12 @@ static int setup_launch(int *argcptr, char ***argvptr, char *orted_cmd, *orted_prefix, *final_cmd; int orted_index; int rc; - int i, j; + int i, j, cnt; bool found; char *lib_base=NULL, *bin_base=NULL; char *opal_prefix = getenv("OPAL_PREFIX"); char* full_orted_cmd = NULL; + char * rtmod; /* Figure out the basenames for the libdir and bindir. This requires some explanation: @@ -500,10 +501,13 @@ static int setup_launch(int *argcptr, char ***argvptr, * we have to insert the orted_prefix in the right place */ (void)asprintf (&final_cmd, - "%s%s%s PATH=%s%s$PATH ; export PATH ; " + "%s%s%s%s%s%s PATH=%s%s$PATH ; export PATH ; " "LD_LIBRARY_PATH=%s%s$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; " "DYLD_LIBRARY_PATH=%s%s$DYLD_LIBRARY_PATH ; export DYLD_LIBRARY_PATH ; " "%s %s", + (NULL != mca_plm_rsh_component.chdir ? "cd " : " "), + (NULL != mca_plm_rsh_component.chdir ? mca_plm_rsh_component.chdir : " "), + (NULL != mca_plm_rsh_component.chdir ? " ; " : " "), (opal_prefix != NULL ? "OPAL_PREFIX=" : " "), (opal_prefix != NULL ? opal_prefix : " "), (opal_prefix != NULL ? " ; export OPAL_PREFIX;" : " "), @@ -530,7 +534,7 @@ static int setup_launch(int *argcptr, char ***argvptr, * we have to insert the orted_prefix in the right place */ (void)asprintf (&final_cmd, - "%s%s%s set path = ( %s $path ) ; " + "%s%s%s%s%s%s set path = ( %s $path ) ; " "if ( $?LD_LIBRARY_PATH == 1 ) " "set OMPI_have_llp ; " "if ( $?LD_LIBRARY_PATH == 0 ) " @@ -544,6 +548,9 @@ static int setup_launch(int *argcptr, char ***argvptr, "if ( $?OMPI_have_dllp == 1 ) " "setenv DYLD_LIBRARY_PATH %s%s$DYLD_LIBRARY_PATH ; " "%s %s", + (NULL != mca_plm_rsh_component.chdir ? "cd " : " "), + (NULL != mca_plm_rsh_component.chdir ? mca_plm_rsh_component.chdir : " "), + (NULL != mca_plm_rsh_component.chdir ? " ; " : " "), (opal_prefix != NULL ? "setenv OPAL_PREFIX " : " "), (opal_prefix != NULL ? opal_prefix : " "), (opal_prefix != NULL ? " ;" : " "), @@ -609,6 +616,18 @@ static int setup_launch(int *argcptr, char ***argvptr, (mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) { } + if (!mca_plm_rsh_component.no_tree_spawn) { + // Remove problematic and/or conflicting command line arguments that + // should not be passed on to our children. + cnt = opal_argv_count(orted_cmd_line); + for (i=0; i < cnt; i+=3) { + if (0 == strcmp(orted_cmd_line[i+1], "routed")) { + opal_argv_delete(&cnt, &orted_cmd_line, i, 3); + break; + } + } + } + /* * Add the basic arguments to the orted command line, including * all debug options @@ -627,6 +646,16 @@ static int setup_launch(int *argcptr, char ***argvptr, if (!mca_plm_rsh_component.no_tree_spawn) { opal_argv_append(&argc, &argv, "--tree-spawn"); orte_oob_base_get_addr(¶m); + + // When tree-spawn'ing we need to force the remote daemons to use + // the routing component that was used to setup the launch tree. + // Otherwise the orte_parent_uri will not match the orted they + // expect to find in the routing tree. + rtmod = orte_rml.get_routed(orte_coll_conduit); + opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(&argc, &argv, "routed"); + opal_argv_append(&argc, &argv, rtmod); + opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID); opal_argv_append(&argc, &argv, "orte_parent_uri"); opal_argv_append(&argc, &argv, param); @@ -1187,6 +1216,10 @@ static void launch_daemons(int fd, short args, void *cbdata) OBJ_CONSTRUCT(&coll, opal_list_t); rtmod = orte_rml.get_routed(orte_coll_conduit); orte_routed.get_routing_list(rtmod, &coll); + + OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, + "%s plm:rsh:launch Tree Launch using routed/%s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rtmod)); } /* setup the launch */ diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 568d2c40262..f8de6509e95 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2006-2019 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. @@ -272,14 +272,6 @@ static void launch_daemons(int fd, short args, void *cbdata) opal_argv_append(&argc, &argv, "--kill-on-bad-exit"); } - /* ensure the orteds are not bound to a single processor, - * just in case the TaskAffinity option is set by default. - * This will *not* release the orteds from any cpu-set - * constraint, but will ensure it doesn't get - * bound to only one processor - */ - opal_argv_append(&argc, &argv, "--cpu_bind=none"); - #if SLURM_CRAY_ENV /* * If in a SLURM/Cray env. make sure that Cray PMI is not pulled in, @@ -420,6 +412,23 @@ static void launch_daemons(int fd, short args, void *cbdata) /* setup environment */ env = opal_argv_copy(orte_launch_environ); + /* ensure the orteds are not bound to a single processor, + * just in case the TaskAffinity option is set by default. + * This will *not* release the orteds from any cpu-set + * constraint, but will ensure it doesn't get + * bound to only one processor + * + * NOTE: We used to pass --cpu_bind=none on the command line. But + * SLURM 19 changed this to --cpu-bind. There is no easy way to + * test at run time which of these two parameters is used (see + * https://github.com/open-mpi/ompi/pull/6654). There was + * discussion of using --test-only to see which one works, but + * --test-only is only effective if you're not already inside a + * SLURM allocation. Instead, set the env var SLURM_CPU_BIND to + * "none", which should do the same thing as --cpu*bind=none. + */ + opal_setenv("SLURM_CPU_BIND", "none", true, &env); + if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) { param = opal_argv_join(argv, ' '); opal_output(orte_plm_base_framework.framework_output, diff --git a/orte/mca/ras/lsf/ras_lsf_module.c b/orte/mca/ras/lsf/ras_lsf_module.c index becec82f213..6dd3b68be5f 100644 --- a/orte/mca/ras/lsf/ras_lsf_module.c +++ b/orte/mca/ras/lsf/ras_lsf_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -36,6 +36,7 @@ #include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/base/base.h" #include "orte/runtime/orte_globals.h" #include "orte/util/show_help.h" @@ -70,6 +71,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) char *affinity_file; struct stat buf; char *ptr; + bool directives_given = false; /* get the list of allocated nodes */ if ((num_nodes = lsb_getalloc(&nodelist)) < 0) { @@ -112,8 +114,19 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) /* release the nodelist from lsf */ opal_argv_free(nodelist); + /* check to see if any mapping or binding directives were given */ + if (NULL != jdata && NULL != jdata->map) { + if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) || + OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { + directives_given = true; + } + } else if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) || + OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { + directives_given = true; + } + /* check for an affinity file */ - if (NULL != (affinity_file = getenv("LSB_AFFINITY_HOSTFILE"))) { + if (!directives_given && NULL != (affinity_file = getenv("LSB_AFFINITY_HOSTFILE"))) { /* check to see if the file is empty - if it is, * then affinity wasn't actually set for this job */ if (0 != stat(affinity_file, &buf)) { diff --git a/orte/mca/regx/base/regx_base_default_fns.c b/orte/mca/regx/base/regx_base_default_fns.c index 6b70f78cad0..4e1645d16b5 100644 --- a/orte/mca/regx/base/regx_base_default_fns.c +++ b/orte/mca/regx/base/regx_base_default_fns.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science + * Copyright (c) 2018-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -1056,7 +1056,7 @@ static int regex_parse_node_range(char *base, char *range, int num_digits, char for (found = false, i = 0; i < len; ++i) { if (isdigit((int) range[i])) { if (!found) { - start = atoi(range + i); + start = strtol(range + i, NULL, 10); found = true; break; } diff --git a/orte/mca/regx/fwd/regx_fwd.c b/orte/mca/regx/fwd/regx_fwd.c index 893b96e0ae8..bdb7e145bf2 100644 --- a/orte/mca/regx/fwd/regx_fwd.c +++ b/orte/mca/regx/fwd/regx_fwd.c @@ -154,38 +154,25 @@ static int nidmap_create(opal_pointer_array_t *pool, char **regex) } /* is this node name already on our list? */ found = false; - for (item = opal_list_get_first(&nodenms); - !found && item != opal_list_get_end(&nodenms); - item = opal_list_get_next(item)) { - ndreg = (orte_regex_node_t*)item; - if (0 < strlen(prefix) && NULL == ndreg->prefix) { - continue; - } - if (0 == strlen(prefix) && NULL != ndreg->prefix) { - continue; - } - if (0 < strlen(prefix) && NULL != ndreg->prefix - && 0 != strcmp(prefix, ndreg->prefix)) { - continue; - } - if (NULL == suffix && NULL != ndreg->suffix) { - continue; - } - if (NULL != suffix && NULL == ndreg->suffix) { - continue; - } - if (NULL != suffix && NULL != ndreg->suffix && - 0 != strcmp(suffix, ndreg->suffix)) { - continue; - } - if (numdigits != ndreg->num_digits) { - continue; + if (0 != opal_list_get_size(&nodenms)) { + ndreg = (orte_regex_node_t*)opal_list_get_last(&nodenms); + + if ((0 < strlen(prefix) && NULL == ndreg->prefix) || + (0 == strlen(prefix) && NULL != ndreg->prefix) || + (0 < strlen(prefix) && NULL != ndreg->prefix && + 0 != strcmp(prefix, ndreg->prefix)) || + (NULL == suffix && NULL != ndreg->suffix) || + (NULL != suffix && NULL == ndreg->suffix) || + (NULL != suffix && NULL != ndreg->suffix && + 0 != strcmp(suffix, ndreg->suffix)) || + (numdigits != ndreg->num_digits)) { + found = false; + } else { + /* found a match - flag it */ + found = true; } - /* found a match - flag it */ - found = true; - /* get the last range on this nodeid - we do this - * to preserve order - */ + } + if (found) { range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges); if (NULL == range) { /* first range for this nodeid */ @@ -193,22 +180,18 @@ static int nidmap_create(opal_pointer_array_t *pool, char **regex) range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); - break; - } /* see if the node number is out of sequence */ - if (nodenum != (range->vpid + range->cnt)) { + } else if (nodenum != (range->vpid + range->cnt)) { /* start a new range */ range = OBJ_NEW(orte_regex_range_t); range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); - break; + } else { + /* everything matches - just increment the cnt */ + range->cnt++; } - /* everything matches - just increment the cnt */ - range->cnt++; - break; - } - if (!found) { + } else { /* need to add it */ ndreg = OBJ_NEW(orte_regex_node_t); if (0 < strlen(prefix)) { @@ -313,5 +296,9 @@ static int nidmap_create(opal_pointer_array_t *pool, char **regex) free(nodenames); free(tmp); *regex = tmp2; + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s Final regex: <%s>", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + *regex); return ORTE_SUCCESS; } diff --git a/orte/mca/regx/naive/Makefile.am b/orte/mca/regx/naive/Makefile.am new file mode 100644 index 00000000000..0cb7fea20d6 --- /dev/null +++ b/orte/mca/regx/naive/Makefile.am @@ -0,0 +1,36 @@ +# +# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2019 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + regx_naive_component.c \ + regx_naive.h \ + regx_naive.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_regx_naive_DSO +component_noinst = +component_install = mca_regx_naive.la +else +component_noinst = libmca_regx_naive.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_regx_naive_la_SOURCES = $(sources) +mca_regx_naive_la_LDFLAGS = -module -avoid-version +mca_regx_naive_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_regx_naive_la_SOURCES = $(sources) +libmca_regx_naive_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/errmgr/dvm/owner.txt b/orte/mca/regx/naive/owner.txt similarity index 91% rename from orte/mca/errmgr/dvm/owner.txt rename to orte/mca/regx/naive/owner.txt index 85b4416d206..2fd247dddb1 100644 --- a/orte/mca/errmgr/dvm/owner.txt +++ b/orte/mca/regx/naive/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: INTEL +owner: IBM status: active diff --git a/orte/mca/regx/naive/regx_naive.c b/orte/mca/regx/naive/regx_naive.c new file mode 100644 index 00000000000..710b70fb08d --- /dev/null +++ b/orte/mca/regx/naive/regx_naive.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/util/show_help.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/regx/base/base.h" + +#include "regx_naive.h" + +static int nidmap_create(opal_pointer_array_t *pool, char **regex); + +orte_regx_base_module_t orte_regx_naive_module = { + .nidmap_create = nidmap_create, + .nidmap_parse = orte_regx_base_nidmap_parse, + .extract_node_names = orte_regx_base_extract_node_names, + .encode_nodemap = orte_regx_base_encode_nodemap, + .decode_daemon_nodemap = orte_regx_base_decode_daemon_nodemap, + .generate_ppn = orte_regx_base_generate_ppn, + .parse_ppn = orte_regx_base_parse_ppn +}; + +static int nidmap_create(opal_pointer_array_t *pool, char **regex) +{ + char *node; + int n; + char *nodenames; + orte_regex_range_t *rng; + opal_list_t dvpids; + opal_list_item_t *item; + char **regexargs = NULL, **vpidargs = NULL, *tmp, *tmp2; + orte_node_t *nptr; + orte_vpid_t vpid; + + if (mca_regx_naive_component.compress_vpids) { + OBJ_CONSTRUCT(&dvpids, opal_list_t); + } + + rng = NULL; + for (n=0; n < pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { + continue; + } + /* if no daemon has been assigned, then this node is not being used */ + if (NULL == nptr->daemon) { + vpid = -1; // indicates no daemon assigned + } else { + vpid = nptr->daemon->name.vpid; + } + + if (mca_regx_naive_component.compress_vpids) { + /* deal with the daemon vpid - see if it is next in the + * current range */ + if (NULL == rng) { + /* just starting */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } else if (UINT32_MAX == vpid) { + if (-1 == rng->vpid) { + rng->cnt++; + } else { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } + } else if (-1 == rng->vpid) { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } else { + /* is this the next in line */ + if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { + rng->cnt++; + } else { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } + } + } + else { + asprintf(&tmp, "%u", vpid); + opal_argv_append_nosize(&vpidargs, tmp); + free(tmp); + } + + node = nptr->name; + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s PROCESS NODE <%s>", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + node); + + /* Don't compress the name - just add it to the list */ + if (NULL != node) { + /* solitary node */ + opal_argv_append_nosize(®exargs, node); + } + } + + /* assemble final result */ + nodenames = opal_argv_join(regexargs, ','); + /* cleanup */ + opal_argv_free(regexargs); + + if (mca_regx_naive_component.compress_vpids) { + /* do the same for the vpids */ + tmp = NULL; + while (NULL != (item = opal_list_remove_first(&dvpids))) { + rng = (orte_regex_range_t*)item; + if (1 < rng->cnt) { + if (NULL == tmp) { + asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); + } else { + asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); + free(tmp); + tmp = tmp2; + } + } else { + if (NULL == tmp) { + asprintf(&tmp, "%u", rng->vpid); + } else { + asprintf(&tmp2, "%s,%u", tmp, rng->vpid); + free(tmp); + tmp = tmp2; + } + } + OBJ_RELEASE(rng); + } + OPAL_LIST_DESTRUCT(&dvpids); + } + else { + tmp = opal_argv_join(vpidargs, ','); + /* cleanup */ + opal_argv_free(vpidargs); + } + + /* now concatenate the results into one string */ + asprintf(&tmp2, "%s@%s", nodenames, tmp); + free(nodenames); + free(tmp); + *regex = tmp2; + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s Final regex: <%s>", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + *regex); + return ORTE_SUCCESS; +} diff --git a/orte/mca/regx/naive/regx_naive.h b/orte/mca/regx/naive/regx_naive.h new file mode 100644 index 00000000000..012dfa25680 --- /dev/null +++ b/orte/mca/regx/naive/regx_naive.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_REGX_NONE_H_ +#define _MCA_REGX_NONE_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/regx/regx.h" + + +BEGIN_C_DECLS + +struct orte_regx_naive_component_t { + orte_regx_base_component_t super; + bool compress_vpids; +}; +typedef struct orte_regx_naive_component_t orte_regx_naive_component_t; + +ORTE_MODULE_DECLSPEC extern orte_regx_naive_component_t mca_regx_naive_component; +extern orte_regx_base_module_t orte_regx_naive_module; + +END_C_DECLS + +#endif /* MCA_REGX_ORTE_H_ */ diff --git a/orte/mca/regx/naive/regx_naive_component.c b/orte/mca/regx/naive/regx_naive_component.c new file mode 100644 index 00000000000..b29641f194f --- /dev/null +++ b/orte/mca/regx/naive/regx_naive_component.c @@ -0,0 +1,62 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/regx/regx.h" +#include "regx_naive.h" + +static int component_query(mca_base_module_t **module, int *priority); +static int component_register(void); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_regx_naive_component_t mca_regx_naive_component = { + { + .base_version = { + MCA_REGX_BASE_VERSION_1_0_0, + .mca_component_name = "naive", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + .mca_register_component_params = component_register, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + } +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + *module = (mca_base_module_t*)&orte_regx_naive_module; + *priority = 1; + return ORTE_SUCCESS; +} + +static int component_register(void) +{ + mca_base_component_t *c = &mca_regx_naive_component.super.base_version; + + mca_regx_naive_component.compress_vpids = false; + (void) mca_base_component_var_register (c, "compress_vpids", "Enable compression of vpids (default: false)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_regx_naive_component.compress_vpids); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/regx/reverse/regx_reverse.c b/orte/mca/regx/reverse/regx_reverse.c index b94a9be353f..1437f48ad75 100644 --- a/orte/mca/regx/reverse/regx_reverse.c +++ b/orte/mca/regx/reverse/regx_reverse.c @@ -142,7 +142,9 @@ static int nidmap_create(opal_pointer_array_t *pool, char **regex) for( j = 0; j <= i; ++j) { prefix[j] = node[j]; } - startnum = j; + if (numdigits) { + startnum = j; + } break; } } @@ -168,35 +170,25 @@ static int nidmap_create(opal_pointer_array_t *pool, char **regex) } /* is this node name already on our list? */ found = false; - for (item = opal_list_get_first(&nodenms); - !found && item != opal_list_get_end(&nodenms); - item = opal_list_get_next(item)) { - ndreg = (orte_regex_node_t*)item; - if (0 < strlen(prefix) && NULL == ndreg->prefix) { - continue; - } - if (0 == strlen(prefix) && NULL != ndreg->prefix) { - continue; - } - if (0 < strlen(prefix) && NULL != ndreg->prefix - && 0 != strcmp(prefix, ndreg->prefix)) { - continue; - } - if (NULL == suffix && NULL != ndreg->suffix) { - continue; - } - if (NULL != suffix && NULL == ndreg->suffix) { - continue; - } - if (NULL != suffix && NULL != ndreg->suffix && - 0 != strcmp(suffix, ndreg->suffix)) { - continue; - } - if (numdigits != ndreg->num_digits) { - continue; + if (0 != opal_list_get_size(&nodenms)) { + ndreg = (orte_regex_node_t*)opal_list_get_last(&nodenms); + + if ((0 < strlen(prefix) && NULL == ndreg->prefix) || + (0 == strlen(prefix) && NULL != ndreg->prefix) || + (0 < strlen(prefix) && NULL != ndreg->prefix && + 0 != strcmp(prefix, ndreg->prefix)) || + (NULL == suffix && NULL != ndreg->suffix) || + (NULL != suffix && NULL == ndreg->suffix) || + (NULL != suffix && NULL != ndreg->suffix && + 0 != strcmp(suffix, ndreg->suffix)) || + (numdigits != ndreg->num_digits)) { + found = false; + } else { + /* found a match - flag it */ + found = true; } - /* found a match - flag it */ - found = true; + } + if (found) { /* get the last range on this nodeid - we do this * to preserve order */ @@ -207,22 +199,18 @@ static int nidmap_create(opal_pointer_array_t *pool, char **regex) range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); - break; - } /* see if the node number is out of sequence */ - if (nodenum != (range->vpid + range->cnt)) { + } else if (nodenum != (range->vpid + range->cnt)) { /* start a new range */ range = OBJ_NEW(orte_regex_range_t); range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); - break; + } else { + /* everything matches - just increment the cnt */ + range->cnt++; } - /* everything matches - just increment the cnt */ - range->cnt++; - break; - } - if (!found) { + } else { /* need to add it */ ndreg = OBJ_NEW(orte_regex_node_t); if (0 < strlen(prefix)) { @@ -327,5 +315,9 @@ static int nidmap_create(opal_pointer_array_t *pool, char **regex) free(nodenames); free(tmp); *regex = tmp2; + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s Final regex: <%s>", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + *regex); return ORTE_SUCCESS; } diff --git a/orte/mca/rmaps/base/base.h b/orte/mca/rmaps/base/base.h index b893581b13d..fa0915993e5 100644 --- a/orte/mca/rmaps/base/base.h +++ b/orte/mca/rmaps/base/base.h @@ -123,7 +123,8 @@ ORTE_DECLSPEC int orte_rmaps_base_filter_nodes(orte_app_context_t *app, opal_list_t *nodes, bool remove); -ORTE_DECLSPEC int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, +ORTE_DECLSPEC int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata, + orte_mapping_policy_t *policy, char **device, char *spec); ORTE_DECLSPEC int orte_rmaps_base_set_ranking_policy(orte_ranking_policy_t *policy, orte_mapping_policy_t mapping, diff --git a/orte/mca/rmaps/base/help-orte-rmaps-base.txt b/orte/mca/rmaps/base/help-orte-rmaps-base.txt index 88dcab07a96..0d4724aeec7 100644 --- a/orte/mca/rmaps/base/help-orte-rmaps-base.txt +++ b/orte/mca/rmaps/base/help-orte-rmaps-base.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2018 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2014-2018 Intel, Inc. All rights reserved. @@ -23,12 +23,34 @@ # This is the US/English general help file for Open RTE's orterun. # [orte-rmaps-base:alloc-error] -There are not enough slots available in the system to satisfy the %d slots -that were requested by the application: +There are not enough slots available in the system to satisfy the %d +slots that were requested by the application: + %s -Either request fewer slots for your application, or make more slots available -for use. +Either request fewer slots for your application, or make more slots +available for use. + +A "slot" is the Open MPI term for an allocatable unit where we can +launch a process. The number of slots available are defined by the +environment in which Open MPI processes are run: + + 1. Hostfile, via "slots=N" clauses (N defaults to number of + processor cores if not provided) + 2. The --host command line parameter, via a ":N" suffix on the + hostname (N defaults to 1 if not provided) + 3. Resource manager (e.g., SLURM, PBS/Torque, LSF, etc.) + 4. If none of a hostfile, the --host command line parameter, or an + RM is present, Open MPI defaults to the number of processor cores + +In all the above cases, if you want Open MPI to default to the number +of hardware threads instead of the number of processor cores, use the +--use-hwthread-cpus option. + +Alternatively, you can use the --oversubscribe option to ignore the +number of available slots when deciding the number of processes to +launch. +# [orte-rmaps-base:not-all-mapped-alloc] Some of the requested hosts are not included in the current allocation for the application: diff --git a/orte/mca/rmaps/base/rmaps_base_frame.c b/orte/mca/rmaps/base/rmaps_base_frame.c index 9c71cdd990a..bca9a6d8693 100644 --- a/orte/mca/rmaps/base/rmaps_base_frame.c +++ b/orte/mca/rmaps/base/rmaps_base_frame.c @@ -12,7 +12,7 @@ * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -296,7 +296,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags) "rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=:PE=N, default =NUMA"); } - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping, + if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(NULL, &orte_rmaps_base.mapping, &orte_rmaps_base.device, rmaps_base_mapping_policy))) { return rc; @@ -556,6 +556,7 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp) for (i=0; NULL != ck2[i]; i++) { if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) { ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_SPAN); + ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_GIVEN); found = true; } else if (0 == strncasecmp(ck2[i], "pe", strlen("pe"))) { /* break this at the = sign to get the number */ @@ -593,11 +594,12 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp) return ORTE_ERR_TAKE_NEXT_OPTION; } -int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, +int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata, + orte_mapping_policy_t *policy, char **device, char *inspec) { char *ck; - char *ptr; + char *ptr, *cptr; orte_mapping_policy_t tmp; int rc; size_t len; @@ -618,130 +620,144 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy, if (NULL == inspec) { ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); - } else { - spec = strdup(inspec); // protect the input string - /* see if a colon was included - if so, then we have a policy + modifier */ - ck = strchr(spec, ':'); - if (NULL != ck) { - /* if the colon is the first character of the string, then we - * just have modifiers on the default mapping policy */ - if (ck == spec) { - ck++; - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s rmaps:base only modifiers %s provided - assuming bysocket mapping", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck); - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); - if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) && - ORTE_ERR_BAD_PARAM != rc) { - free(spec); - return ORTE_ERR_SILENT; - } + goto setpolicy; + } + + spec = strdup(inspec); // protect the input string + /* see if a colon was included - if so, then we have a policy + modifier */ + ck = strchr(spec, ':'); + if (NULL != ck) { + /* if the colon is the first character of the string, then we + * just have modifiers on the default mapping policy */ + if (ck == spec) { + ck++; // step over the colon + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "%s rmaps:base only modifiers %s provided - assuming bysocket mapping", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck); + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); + if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) && + ORTE_ERR_BAD_PARAM != rc) { free(spec); - goto setpolicy; + return ORTE_ERR_SILENT; } - /* split the string */ - *ck = '\0'; - ck++; - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s rmaps:base policy %s modifiers %s provided", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck); - /* if the policy is "dist", then we set the policy to that value - * and save the second argument as the device + free(spec); + goto setpolicy; + } + *ck = '\0'; // terminate spec where the colon was + ck++; // step past the colon + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "%s rmaps:base policy %s modifiers %s provided", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck); + + if (0 == strncasecmp(spec, "ppr", strlen(spec))) { + /* at this point, ck points to a string that contains at least + * two fields (specifying the #procs/obj and the object we are + * to map by). we have to allow additional modifiers here - e.g., + * specifying #pe's/proc or oversubscribe - so check for modifiers. if + * they are present, ck will look like "N:obj:mod1,mod2,mod3" */ - if (0 == strncasecmp(spec, "ppr", strlen(spec))) { - /* we have to allow additional modifiers here - e.g., specifying - * #pe's/proc or oversubscribe - so check for modifiers - */ - if (NULL == (ptr = strrchr(ck, ':'))) { - /* this is an error - there had to be at least one - * colon to delimit the number from the object type - */ - orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec); - free(spec); - return ORTE_ERR_SILENT; - } - ptr++; // move past the colon - /* check the remaining string for modifiers - may be none, so - * don't emit an error message if the modifier isn't recognized + if (NULL == (ptr = strchr(ck, ':'))) { + /* this is an error - there had to be at least one + * colon to delimit the number from the object type */ - if (ORTE_ERR_SILENT == (rc = check_modifiers(ptr, &tmp)) && + orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec); + free(spec); + return ORTE_ERR_SILENT; + } + ptr++; // move past the colon + /* at this point, ptr is pointing to the beginning of the string that describes + * the object plus any modifiers (i.e., "obj:mod1,mod2". We first check to see if there + * is another colon indicating that there are modifiers to the request */ + if (NULL != (cptr = strchr(ptr, ':'))) { + /* there are modifiers, so we terminate the object string + * at the location of the colon */ + *cptr = '\0'; + /* step over that colon */ + cptr++; + /* now check for modifiers - may be none, so + * don't emit an error message if the modifier + * isn't recognized */ + if (ORTE_ERR_SILENT == (rc = check_modifiers(cptr, &tmp)) && ORTE_ERR_BAD_PARAM != rc) { free(spec); return ORTE_ERR_SILENT; } - /* if we found something, then we need to adjust the string */ - if (ORTE_SUCCESS == rc) { - ptr--; - *ptr = '\0'; - } - /* now get the pattern */ + } + /* now save the pattern */ + if (NULL == jdata || NULL == jdata->map) { orte_rmaps_base.ppr = strdup(ck); - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR); - ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); - free(spec); - goto setpolicy; + } else { + jdata->map->ppr = strdup(ck); } - if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) && - ORTE_ERR_TAKE_NEXT_OPTION != rc) { - if (ORTE_ERR_BAD_PARAM == rc) { - orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec); - } - free(spec); - return rc; + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR); + ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); + free(spec); + goto setpolicy; + } + if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) && + ORTE_ERR_TAKE_NEXT_OPTION != rc) { + if (ORTE_ERR_BAD_PARAM == rc) { + orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec); } + free(spec); + return rc; } - len = strlen(spec); - if (0 == strncasecmp(spec, "slot", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT); - } else if (0 == strncasecmp(spec, "node", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE); - } else if (0 == strncasecmp(spec, "seq", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ); - } else if (0 == strncasecmp(spec, "core", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE); - } else if (0 == strncasecmp(spec, "l1cache", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE); - } else if (0 == strncasecmp(spec, "l2cache", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE); - } else if (0 == strncasecmp(spec, "l3cache", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE); - } else if (0 == strncasecmp(spec, "socket", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); - } else if (0 == strncasecmp(spec, "numa", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA); - } else if (0 == strncasecmp(spec, "board", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD); - } else if (0 == strncasecmp(spec, "hwthread", len)) { - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD); - /* if we are mapping processes to individual hwthreads, then - * we need to treat those hwthreads as separate cpus - */ - opal_hwloc_use_hwthreads_as_cpus = true; - } else if (0 == strncasecmp(spec, "dist", len)) { - if (NULL != rmaps_dist_device) { - if (NULL != (pch = strchr(rmaps_dist_device, ':'))) { - *pch = '\0'; - } - if (NULL != device) { - *device = strdup(rmaps_dist_device); - } - ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST); - } else { - orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true); - free(spec); - return ORTE_ERR_SILENT; + } + len = strlen(spec); + if (0 == strncasecmp(spec, "slot", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT); + } else if (0 == strncasecmp(spec, "node", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE); + } else if (0 == strncasecmp(spec, "seq", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ); + } else if (0 == strncasecmp(spec, "core", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE); + } else if (0 == strncasecmp(spec, "l1cache", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE); + } else if (0 == strncasecmp(spec, "l2cache", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE); + } else if (0 == strncasecmp(spec, "l3cache", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE); + } else if (0 == strncasecmp(spec, "socket", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET); + } else if (0 == strncasecmp(spec, "numa", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA); + } else if (0 == strncasecmp(spec, "board", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD); + } else if (0 == strncasecmp(spec, "hwthread", len)) { + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD); + /* if we are mapping processes to individual hwthreads, then + * we need to treat those hwthreads as separate cpus + */ + opal_hwloc_use_hwthreads_as_cpus = true; + } else if (0 == strncasecmp(spec, "dist", len)) { + if (NULL != rmaps_dist_device) { + if (NULL != (pch = strchr(rmaps_dist_device, ':'))) { + *pch = '\0'; + } + if (NULL != device) { + *device = strdup(rmaps_dist_device); } + ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST); } else { - orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec); + orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true); free(spec); return ORTE_ERR_SILENT; } + } else { + orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec); free(spec); - ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); + return ORTE_ERR_SILENT; } + free(spec); + ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN); setpolicy: - *policy = tmp; + if (NULL == jdata || NULL == jdata->map) { + *policy = tmp; + } else { + jdata->map->mapping = tmp; + } return ORTE_SUCCESS; } diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index 028e387629b..66a46b3ba08 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -2,14 +2,14 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. @@ -210,13 +210,11 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) { if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); + } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { + ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); } else { - /* pass along the directive */ - if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); - } else { - ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); - } + ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE); + ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN); } } @@ -556,17 +554,17 @@ void orte_rmaps_base_display_map(orte_job_t *jdata) if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } - memset(tmp1, 0, 1024); + memset(tmp1, 0, sizeof(tmp1)); if (orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, (void**)&bd, OPAL_PTR)) { if (NULL == bd) { - (void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND")); + (void)strncpy(tmp1, "UNBOUND", sizeof(tmp1)); } else { if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(tmp1, sizeof(tmp1), node->topology->topo, bd->cpuset)) { - (void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND")); + (void)strncpy(tmp1, "UNBOUND", sizeof(tmp1)); } } } else { - (void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND")); + (void)strncpy(tmp1, "UNBOUND", sizeof(tmp1)); } opal_output(orte_clean_output, "\t\t", ORTE_VPID_PRINT(proc->name.vpid), (long)proc->app_idx, diff --git a/orte/mca/rmaps/mindist/rmaps_mindist_module.c b/orte/mca/rmaps/mindist/rmaps_mindist_module.c index cadbde2a235..1e78678df30 100644 --- a/orte/mca/rmaps/mindist/rmaps_mindist_module.c +++ b/orte/mca/rmaps/mindist/rmaps_mindist_module.c @@ -15,7 +15,7 @@ * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 Mellanox Technologies, Inc. + * Copyright (c) 2017-2018 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -331,6 +331,9 @@ static int mindist_map(orte_job_t *jdata) } } } + /* first we need to fill summary object for root with information about nodes + * so we call opal_hwloc_base_get_nbobjs_by_type */ + opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE); OBJ_CONSTRUCT(&numa_list, opal_list_t); ret = opal_hwloc_get_sorted_numa_list(node->topology->topo, orte_rmaps_base.device, &numa_list); if (ret > 1) { diff --git a/orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt b/orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt index ce1705acd88..f357bf20f37 100644 --- a/orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt +++ b/orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt @@ -1,6 +1,6 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2018 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ @@ -90,14 +90,6 @@ some systems may require using full hostnames, such as [bad-index] Rankfile claimed host %s by index that is bigger than number of allocated hosts. # -[orte-rmaps-rf:alloc-error] -There are not enough slots available in the system to satisfy the %d slots -that were requested by the application: - %s - -Either request fewer slots for your application, or make more slots available -for use. -# [bad-rankfile] Error, invalid rank (%d) in the rankfile (%s) # diff --git a/orte/mca/rmaps/round_robin/help-orte-rmaps-rr.txt b/orte/mca/rmaps/round_robin/help-orte-rmaps-rr.txt index 2adb9781274..ca459dd7c56 100644 --- a/orte/mca/rmaps/round_robin/help-orte-rmaps-rr.txt +++ b/orte/mca/rmaps/round_robin/help-orte-rmaps-rr.txt @@ -11,6 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2018 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -19,15 +20,6 @@ # # This is the US/English general help file for Open RTE's orterun. # -[orte-rmaps-rr:alloc-error] -There are not enough slots available in the system to satisfy the %d slots -that were requested: - - application: %s - host: %s - -Either request fewer slots for your application, or make more slots available -for use. [orte-rmaps-rr:multi-apps-and-zero-np] RMAPS found multiple applications to be launched, with at least one that failed to specify the number of processes to execute. diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index 6426b1e4940..c515e9e41b8 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University + * Copyright (c) 2004-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -172,7 +172,14 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, --nxtra_nodes; } } - num_procs_to_assign = node->slots - node->slots_inuse + extra_procs_to_assign; + if(node->slots <= node->slots_inuse) { + /* nodes are already oversubscribed */ + num_procs_to_assign = extra_procs_to_assign; + } + else { + /* nodes have some room */ + num_procs_to_assign = node->slots - node->slots_inuse + extra_procs_to_assign; + } opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:slot adding up to %d procs to node %s", num_procs_to_assign, node->name); diff --git a/orte/mca/rmaps/seq/help-orte-rmaps-seq.txt b/orte/mca/rmaps/seq/help-orte-rmaps-seq.txt index 5fbe1095932..fbab660928b 100644 --- a/orte/mca/rmaps/seq/help-orte-rmaps-seq.txt +++ b/orte/mca/rmaps/seq/help-orte-rmaps-seq.txt @@ -10,6 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2018 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -18,19 +19,8 @@ # # This is the US/English general help file for Open RTE's orterun. # -[orte-rmaps-seq:alloc-error] -There are not enough slots available in the system to satisfy the %d slots -that were requested by the application: - - %s - -Either request fewer slots for your application or make more slots -available for use. If oversubscription is intended, add ---oversubscribe to the command line. -# [orte-rmaps-seq:resource-not-found] The specified hostfile contained a node (%s) that is not in your allocation. We therefore cannot map a process rank to it. Please check your allocation and hostfile to ensure the hostfile only contains allocated nodes. - diff --git a/orte/mca/rml/ofi/Makefile.am b/orte/mca/rml/ofi/Makefile.am deleted file mode 100644 index a6a4f90f0ae..00000000000 --- a/orte/mca/rml/ofi/Makefile.am +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. -# Copyright (c) 2017 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(opal_common_ofi_CPPFLAGS) - -sources = \ - rml_ofi.h \ - rml_ofi_request.h \ - rml_ofi_component.c \ - rml_ofi_send.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_rml_ofi_DSO -component_noinst = -component_install = mca_rml_ofi.la -else -component_noinst = libmca_rml_ofi.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_rml_ofi_la_SOURCES = $(sources) -mca_rml_ofi_la_LDFLAGS = -module -avoid-version -mca_rml_ofi_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/ofi/lib@OPAL_LIB_PREFIX@mca_common_ofi.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_rml_ofi_la_SOURCES = $(sources) -libmca_rml_ofi_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rml/ofi/configure.m4 b/orte/mca/rml/ofi/configure.m4 deleted file mode 100644 index 35327c29d47..00000000000 --- a/orte/mca/rml/ofi/configure.m4 +++ /dev/null @@ -1,31 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2013-2014 Intel, Inc. All rights reserved -# -# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_orte_rml_ofi_POST_CONFIG(will_build) -# ---------------------------------------- -# Only require the tag if we're actually going to be built - -# MCA_mtl_ofi_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_orte_rml_ofi_CONFIG],[ - AC_CONFIG_FILES([orte/mca/rml/ofi/Makefile]) - - # ensure we already ran the common OFI libfabric config - AC_REQUIRE([MCA_opal_common_ofi_CONFIG]) - - AS_IF([test "$opal_common_ofi_happy" = "yes"], - [$1], - [$2]) -])dnl diff --git a/orte/mca/rml/ofi/rml_ofi.h b/orte/mca/rml/ofi/rml_ofi.h deleted file mode 100644 index 465d28c4841..00000000000 --- a/orte/mca/rml/ofi/rml_ofi.h +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_RML_OFI_RML_OFI_H -#define MCA_RML_OFI_RML_OFI_H - -#include "orte_config.h" - -#include "opal/dss/dss_types.h" -#include "opal/mca/event/event.h" -#include "opal/mca/pmix/pmix.h" -#include "orte/mca/rml/base/base.h" - -#include -#include -#include -#include -#include -#include - -#include "rml_ofi_request.h" - -/** the maximum open OFI ofi_prov - assuming system will have no more than 20 transports*/ -#define MAX_OFI_PROVIDERS 40 -#define RML_OFI_PROV_ID_INVALID 0xFF - -/** RML/OFI key values **/ -/* (char*) ofi socket address (type IN) of the node process is running on */ -#define OPAL_RML_OFI_FI_SOCKADDR_IN "rml.ofi.fisockaddrin" -/* (char*) ofi socket address (type PSM) of the node process is running on */ -#define OPAL_RML_OFI_FI_ADDR_PSMX "rml.ofi.fiaddrpsmx" - -// MULTI_BUF_SIZE_FACTOR defines how large the multi recv buffer will be. -// In order to use FI_MULTI_RECV feature efficiently, we need to have a -// large recv buffer so that we don't need to repost the buffer often to -// get the remaining data when the buffer is full -#define MULTI_BUF_SIZE_FACTOR 128 -#define MIN_MULTI_BUF_SIZE (1024 * 1024) - -#define OFIADDR "ofiaddr" - -#define CLOSE_FID(fd) \ - do { \ - int _ret = 0; \ - if (0 != (fd)) { \ - _ret = fi_close(&(fd)->fid); \ - fd = NULL; \ - if (0 != _ret) { \ - opal_output_verbose(10,orte_rml_base_framework.framework_output, \ - " %s - fi_close failed with error- %d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret); \ - } \ - } \ - } while (0); - - -#define RML_OFI_RETRY_UNTIL_DONE(FUNC) \ - do { \ - do { \ - ret = FUNC; \ - if(OPAL_LIKELY(0 == ret)) {break;} \ - } while(-FI_EAGAIN == ret); \ - } while(0); - -BEGIN_C_DECLS - -struct orte_rml_ofi_module_t; - -/** This structure will hold the ep and all ofi objects for each transport -and also the corresponding fi_info -**/ -typedef struct { - - /** ofi provider ID **/ - uint8_t ofi_prov_id; - - /** fi_info for this transport */ - struct fi_info *fabric_info; - - /** Fabric Domain handle */ - struct fid_fabric *fabric; - - /** Access Domain handle */ - struct fid_domain *domain; - - /** Address vector handle */ - struct fid_av *av; - - /** Completion queue handle */ - struct fid_cq *cq; - - /** Endpoint to communicate on */ - struct fid_ep *ep; - - /** Endpoint name */ - char ep_name[FI_NAME_MAX]; - - /** Endpoint name length */ - size_t epnamelen; - - /** OFI memory region */ - struct fid_mr *mr_multi_recv; - - /** buffer for tx and rx */ - void *rxbuf; - - uint64_t rxbuf_size; - - /* event,fd associated with the cq */ - int fd; - - /*event associated with progress fn */ - opal_event_t progress_event; - bool progress_ev_active; - - struct fi_context rx_ctx1; - -} ofi_transport_ofi_prov_t; - - - struct orte_rml_ofi_module_t { - orte_rml_base_module_t api; - - /** current ofi transport id the component is using, this will be initialised - ** in the open_ofi_prov() call **/ - int cur_transport_id; - - /** Fabric info structure of all supported transports in system **/ - struct fi_info *fi_info_list; - - /** OFI ep and corr fi_info for all the transports (ofi_providers) **/ - ofi_transport_ofi_prov_t ofi_prov[MAX_OFI_PROVIDERS]; - - size_t min_ofi_recv_buf_sz; - - /** "Any source" address */ - fi_addr_t any_addr; - - /** number of ofi providers currently opened **/ - uint8_t ofi_prov_open_num; - - /** Unique message id for every message that is fragmented to be sent over OFI **/ - uint32_t cur_msgid; - - /* hashtable stores the peer addresses */ - opal_hash_table_t peers; - - opal_list_t recv_msg_queue_list; - opal_list_t queued_routing_messages; - opal_event_t *timer_event; - struct timeval timeout; -} ; -typedef struct orte_rml_ofi_module_t orte_rml_ofi_module_t; - -/* For every first send initiated to new peer - * select the peer provider, peer ep-addr, - * local provider and populate in orte_rml_ofi_peer_t instance. - * Insert this in hash table. - * */ -typedef struct { - opal_object_t super; - char* ofi_prov_name; /* peer (dest) provider chosen */ - void* ofi_ep; /* peer (dest) ep chosen */ - size_t ofi_ep_len; /* peer (dest) ep length */ - uint8_t src_prov_id; /* index of the local (src) provider used for this peer */ -} orte_rml_ofi_peer_t; -OBJ_CLASS_DECLARATION(orte_rml_ofi_peer_t); - -ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ofi_component; -extern orte_rml_ofi_module_t orte_rml_ofi; - -int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); -int orte_rml_ofi_send_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, - struct iovec* iov, - int count, - orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, - void* cbdata); - -/****************** INTERNAL OFI Functions*************/ -void free_ofi_prov_resources( int ofi_prov_id); -void print_provider_list_info (struct fi_info *fi ); -void print_provider_info (struct fi_info *cur_fi ); -int cq_progress_handler(int sd, short flags, void *cbdata); -int get_ofi_prov_id( opal_list_t *attributes); - -/** Send callback */ -int orte_rml_ofi_send_callback(struct fi_cq_data_entry *wc, - orte_rml_ofi_request_t*); - -/** Error callback */ -int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error, - orte_rml_ofi_request_t*); - -/* OFI Recv handler */ -int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id); - -bool user_override(void); -END_C_DECLS - -#endif diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c deleted file mode 100644 index b0cc89b3e14..00000000000 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ /dev/null @@ -1,1191 +0,0 @@ -/* - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/argv.h" -#include "opal/util/net.h" -#include "opal/util/output.h" -#include "opal/mca/backtrace/backtrace.h" -#include "opal/mca/event/event.h" - -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/rml/rml.h" -#include "orte/mca/state/state.h" -#endif -#include "orte/mca/rml/base/base.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/routed/routed.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" - -#include "rml_ofi.h" - - -static int rml_ofi_component_open(void); -static int rml_ofi_component_close(void); -static int rml_ofi_component_register(void); - -static int rml_ofi_component_init(void); -static orte_rml_base_module_t* open_conduit(opal_list_t *attributes); -static orte_rml_pathway_t* query_transports(void); - -/** - * component definition - */ -orte_rml_component_t mca_rml_ofi_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .base = { - ORTE_RML_BASE_VERSION_3_0_0, - - .mca_component_name = "ofi", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_open_component = rml_ofi_component_open, - .mca_close_component = rml_ofi_component_close, - .mca_register_component_params = rml_ofi_component_register - }, - .data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - .priority = 10, - .open_conduit = open_conduit, - .query_transports = query_transports, - .close_conduit = NULL -}; - -/* Local variables */ -orte_rml_ofi_module_t orte_rml_ofi = { - .api = { - .component = (struct orte_rml_component_t*)&mca_rml_ofi_component, - .ping = NULL, - .send_nb = orte_rml_ofi_send_nb, - .send_buffer_nb = orte_rml_ofi_send_buffer_nb, - .purge = NULL - } -}; - -/* Local variables */ -static bool init_done = false; -static char *ofi_transports_supported = NULL; -static char *initial_ofi_transports_supported = NULL; -static bool ofi_desired = false; -static bool routing_desired = false; - -/* return true if user override for choice of ofi provider */ -bool user_override(void) -{ - if( 0 == strcmp(initial_ofi_transports_supported, ofi_transports_supported ) ) - return false; - else - return true; -} - -static int -rml_ofi_component_open(void) -{ - /* Initialise endpoint and all queues */ - - orte_rml_ofi.fi_info_list = NULL; - orte_rml_ofi.min_ofi_recv_buf_sz = MIN_MULTI_BUF_SIZE; - orte_rml_ofi.cur_msgid = 1; - orte_rml_ofi.cur_transport_id = RML_OFI_PROV_ID_INVALID; - orte_rml_ofi.ofi_prov_open_num = 0; - OBJ_CONSTRUCT(&orte_rml_ofi.peers, opal_hash_table_t); - opal_hash_table_init(&orte_rml_ofi.peers, 128); - OBJ_CONSTRUCT(&orte_rml_ofi.recv_msg_queue_list, opal_list_t); - - for( uint8_t ofi_prov_id=0; ofi_prov_id < MAX_OFI_PROVIDERS ; ofi_prov_id++) { - orte_rml_ofi.ofi_prov[ofi_prov_id].fabric = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].domain = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].av = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].cq = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].ep = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].ep_name[0] = 0; - orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen = 0; - orte_rml_ofi.ofi_prov[ofi_prov_id].mr_multi_recv = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].rxbuf = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].rxbuf_size = 0; - orte_rml_ofi.ofi_prov[ofi_prov_id].progress_ev_active = false; - orte_rml_ofi.ofi_prov[ofi_prov_id].ofi_prov_id = RML_OFI_PROV_ID_INVALID; - } - - opal_output_verbose(10,orte_rml_base_framework.framework_output," from %s:%d rml_ofi_component_open()",__FILE__,__LINE__); - - if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON) { - return ORTE_ERROR; - } - if (!ofi_desired) { - return ORTE_ERROR; - } - return ORTE_SUCCESS; -} - - -void free_ofi_prov_resources( int ofi_prov_id) -{ - - int ret=0; - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - free_ofi_prov_resources() begin. OFI ofi_prov_id- %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ofi_prov_id); - if (orte_rml_ofi.ofi_prov[ofi_prov_id].ep) { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - close ep",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - CLOSE_FID(orte_rml_ofi.ofi_prov[ofi_prov_id].ep); - } - if (orte_rml_ofi.ofi_prov[ofi_prov_id].mr_multi_recv) { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - close mr_multi_recv",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - CLOSE_FID(orte_rml_ofi.ofi_prov[ofi_prov_id].mr_multi_recv); - } - if (orte_rml_ofi.ofi_prov[ofi_prov_id].cq) { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - close cq",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - CLOSE_FID(orte_rml_ofi.ofi_prov[ofi_prov_id].cq); - } - if (orte_rml_ofi.ofi_prov[ofi_prov_id].av) { - CLOSE_FID(orte_rml_ofi.ofi_prov[ofi_prov_id].av); - } - if (orte_rml_ofi.ofi_prov[ofi_prov_id].domain) { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - close domain",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - CLOSE_FID(orte_rml_ofi.ofi_prov[ofi_prov_id].domain); - } - if (orte_rml_ofi.ofi_prov[ofi_prov_id].fabric) { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - close fabric",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - fi_close((fid_t)orte_rml_ofi.ofi_prov[ofi_prov_id].fabric); - } - if (orte_rml_ofi.ofi_prov[ofi_prov_id].rxbuf) { - free(orte_rml_ofi.ofi_prov[ofi_prov_id].rxbuf); - } - - orte_rml_ofi.ofi_prov[ofi_prov_id].fabric = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].domain = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].av = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].cq = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].ep = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].ep_name[0] = 0; - orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen = 0; - orte_rml_ofi.ofi_prov[ofi_prov_id].rxbuf = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].rxbuf_size = 0; - orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].mr_multi_recv = NULL; - orte_rml_ofi.ofi_prov[ofi_prov_id].ofi_prov_id = RML_OFI_PROV_ID_INVALID; - - - if( orte_rml_ofi.ofi_prov[ofi_prov_id].progress_ev_active) { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - deleting progress event", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_event_del( &orte_rml_ofi.ofi_prov[ofi_prov_id].progress_event); - } - - return; -} - - -static int -rml_ofi_component_close(void) -{ - - int rc; - opal_object_t *value; - uint64_t key; - void *node; - uint8_t ofi_prov_id; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - rml_ofi_component_close() -begin, total open OFI providers = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),orte_rml_ofi.ofi_prov_open_num); - - if (orte_rml_ofi.fi_info_list) { - (void) fi_freeinfo(orte_rml_ofi.fi_info_list); - } - - /* Close endpoint and all queues */ - for (ofi_prov_id=0; ofi_prov_id < orte_rml_ofi.ofi_prov_open_num; ofi_prov_id++) { - free_ofi_prov_resources(ofi_prov_id); - } - - /* release all peers from the hash table */ - rc = opal_hash_table_get_first_key_uint64(&orte_rml_ofi.peers, &key, - (void **)&value, &node); - while (OPAL_SUCCESS == rc) { - if (NULL != value) { - OBJ_RELEASE(value); - } - rc = opal_hash_table_get_next_key_uint64 (&orte_rml_ofi.peers, &key, - (void **) &value, node, &node); - } - OBJ_DESTRUCT(&orte_rml_ofi.peers); - OPAL_LIST_DESTRUCT(&orte_rml_ofi.recv_msg_queue_list); - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - rml_ofi_component_close() end",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return ORTE_SUCCESS; -} - -static int rml_ofi_component_register(void) -{ - mca_base_component_t *component = &mca_rml_ofi_component.base; - - initial_ofi_transports_supported = "fabric,ethernet"; - ofi_transports_supported = strdup(initial_ofi_transports_supported); - mca_base_component_var_register(component, "transports", - "Comma-delimited list of transports to support (default=\"fabric,ethernet\"", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_2, - MCA_BASE_VAR_SCOPE_LOCAL, - &ofi_transports_supported); - - - ofi_desired = false; - mca_base_component_var_register(component, "desired", - "Use OFI for coll conduit", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_2, - MCA_BASE_VAR_SCOPE_LOCAL, - &ofi_desired); - - routing_desired = false; - mca_base_component_var_register(component, "routing", - "Route OFI messages", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_2, - MCA_BASE_VAR_SCOPE_LOCAL, - &routing_desired); - - return ORTE_SUCCESS; -} - -void print_provider_info (struct fi_info *cur_fi ) -{ - //Display all the details in the fi_info structure - opal_output_verbose(1,orte_rml_base_framework.framework_output, - " %s - Print_provider_info() ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " Provider name : %s",cur_fi->fabric_attr->prov_name); - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " Protocol : %s",fi_tostr(&cur_fi->ep_attr->protocol,FI_TYPE_PROTOCOL)); - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " EP Type : %s",fi_tostr(&cur_fi->ep_attr->type,FI_TYPE_EP_TYPE)); - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " address_format : %s",fi_tostr(&cur_fi->addr_format,FI_TYPE_ADDR_FORMAT)); -} - -void print_provider_list_info (struct fi_info *fi ) -{ - struct fi_info *cur_fi = fi; - int fi_count = 0; - //Display all the details in the fi_info structure - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - Print_provider_list_info() ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - while( NULL != cur_fi ) { - fi_count++; - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %d.\n",fi_count); - print_provider_info( cur_fi); - cur_fi = cur_fi->next; - } - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "Total # of providers supported is %d\n",fi_count); -} - -/* - * This returns all the supported transports in the system that support endpoint type RDM (reliable datagram) - * The providers returned is a list of type opal_valut_t holding opal_list_t - */ -static orte_rml_pathway_t* query_transports(void) -{ - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s:%d OFI Query Interface not implemented",__FILE__,__LINE__); - return NULL; -} - - -/** - ofi_prov [in]: the ofi ofi_prov_id that triggered the progress fn - **/ -static int orte_rml_ofi_progress(ofi_transport_ofi_prov_t* prov) -{ - ssize_t ret; - int count=0; /* number of messages read and processed */ - struct fi_cq_data_entry wc = { 0 }; - struct fi_cq_err_entry error = { 0 }; - orte_rml_ofi_request_t *ofi_req; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s orte_rml_ofi_progress called for OFI ofi_provid %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id); - /** - * Read the work completions from the CQ. - * From the completion's op_context, we get the associated OFI request. - * Call the request's callback. - */ - while (true) { - /* Read the cq - that triggered the libevent to call this progress fn. */ - ret = fi_cq_read(prov->cq, (void *)&wc, 1); - if (0 < ret) { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s cq read for OFI ofi_provid %d - wc.flags = %llx", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id, (long long unsigned int)wc.flags); - count++; - // check the flags to see if this is a send-completion or receive - if ( wc.flags & FI_SEND ) - { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s Send completion received on OFI provider id %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id); - if (NULL != wc.op_context) { - /* get the context from the wc and call the message handler */ - ofi_req = TO_OFI_REQ(wc.op_context); - assert(ofi_req); - ret = orte_rml_ofi_send_callback(&wc, ofi_req); - if (ORTE_SUCCESS != ret) { - opal_output(orte_rml_base_framework.framework_output, - "Error returned by OFI send callback handler when a send completion was received on OFI prov: %zd", - ret); - } - } - } else if ( (wc.flags & FI_RECV) && (wc.flags & FI_MULTI_RECV) ) { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s Received message on OFI ofi_prov_id %d - but buffer is consumed, need to repost", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id); - // reposting buffer - ret = fi_recv(orte_rml_ofi.ofi_prov[prov->ofi_prov_id].ep, - orte_rml_ofi.ofi_prov[prov->ofi_prov_id].rxbuf, - orte_rml_ofi.ofi_prov[prov->ofi_prov_id].rxbuf_size, - fi_mr_desc(orte_rml_ofi.ofi_prov[prov->ofi_prov_id].mr_multi_recv), - 0,&(prov->rx_ctx1)); - // call the receive message handler that will call the rml_base - ret = orte_rml_ofi_recv_handler(&wc, prov->ofi_prov_id); - if (ORTE_SUCCESS != ret) { - opal_output(orte_rml_base_framework.framework_output, - "Error returned by OFI Recv handler when handling the received message on the prov: %zd", - ret); - } - } else if ( wc.flags & FI_RECV ) { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s Received message on OFI provider id %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id); - // call the receive message handler that will call the rml_base - ret = orte_rml_ofi_recv_handler(&wc, prov->ofi_prov_id); - if (ORTE_SUCCESS != ret) { - opal_output(orte_rml_base_framework.framework_output, - "Error returned by OFI Recv handler when handling the received message on the OFI prov: %zd", - ret); - } - } else if ( wc.flags & FI_MULTI_RECV ) { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s Received buffer overrun message on OFI provider id %d - need to repost", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id); - // reposting buffer - ret = fi_recv(orte_rml_ofi.ofi_prov[prov->ofi_prov_id].ep, - orte_rml_ofi.ofi_prov[prov->ofi_prov_id].rxbuf, - orte_rml_ofi.ofi_prov[prov->ofi_prov_id].rxbuf_size, - fi_mr_desc(orte_rml_ofi.ofi_prov[prov->ofi_prov_id].mr_multi_recv), - 0,&(prov->rx_ctx1)); - if (ORTE_SUCCESS != ret) { - opal_output(orte_rml_base_framework.framework_output, - "Error returned by OFI when reposting buffer on the OFI prov: %zd", - ret); - } - }else { - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "CQ has unhandled completion event with FLAG wc.flags = 0x%llx", - (long long unsigned int)wc.flags); - } - } else if (ret == -FI_EAVAIL) { - /** - * An error occured and is being reported via the CQ. - * Read the error and forward it to the upper layer. - */ - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s cq_read for OFI provider id %d returned error 0x%zx <%s>", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id, ret, - fi_strerror((int) -ret) ); - ret = fi_cq_readerr(prov->cq,&error,0); - if (0 > ret) { - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "Error returned from fi_cq_readerr: %zd", ret); - } - assert(error.op_context); - /* get the context from wc and call the error handler */ - ofi_req = TO_OFI_REQ(error.op_context); - assert(ofi_req); - ret = orte_rml_ofi_error_callback(&error, ofi_req); - if (ORTE_SUCCESS != ret) { - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "Error returned by request error callback: %zd", - ret); - } - break; - } else if (ret == -FI_EAGAIN){ - /** - * The CQ is empty. Return. - */ - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s Empty cq for OFI provider id %d,exiting from ofi_progress()", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id ); - break; - } else { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s cq_read for OFI provider id %d returned error 0x%zx <%s>", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id, ret, - fi_strerror((int) -ret) ); - break; - } - } - return count; -} - - -/* - * call the ofi_progress() fn to read the cq - * - */ -int cq_progress_handler(int sd, short flags, void *cbdata) -{ - ofi_transport_ofi_prov_t* prov = (ofi_transport_ofi_prov_t*)cbdata; - int count; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s cq_progress_handler called for OFI Provider id %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - prov->ofi_prov_id); - - /* call the progress fn to read the cq and process the message - * for the ofi provider */ - count = orte_rml_ofi_progress(prov); - return count; -} - - -/* - * Returns the number of ofi-providers available - */ -static int rml_ofi_component_init(void) -{ - int ret, fi_version; - struct fi_info *hints, *fabric_info; - struct fi_cq_attr cq_attr = {0}; - struct fi_av_attr av_attr = {0}; - uint8_t cur_ofi_prov; - opal_buffer_t modex, entry, *eptr; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s - Entering rml_ofi_component_init()",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - - if (init_done) { - return orte_rml_ofi.ofi_prov_open_num; - } - - - /** - * Hints to filter providers - * See man fi_getinfo for a list of all filters - * mode: Select capabilities MTL is prepared to support. - * In this case, MTL will pass in context into communication calls - * ep_type: reliable datagram operation - * caps: Capabilities required from the provider. - * Tag matching is specified to implement MPI semantics. - * msg_order: Guarantee that messages with same tag are ordered. - */ - - hints = fi_allocinfo(); - if (!hints) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: Could not allocate fi_info\n", - __FILE__, __LINE__); - return orte_rml_ofi.ofi_prov_open_num; - } - - /** - * Refine filter for additional capabilities - * endpoint type : Reliable datagram - * threading: Disable locking - * control_progress: enable async progress - */ - hints->mode = FI_CONTEXT; - hints->ep_attr->type = FI_EP_RDM; /* Reliable datagram */ - - hints->domain_attr->threading = FI_THREAD_UNSPEC; - hints->domain_attr->control_progress = FI_PROGRESS_AUTO; - hints->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints->domain_attr->av_type = FI_AV_MAP; - - /** - * FI_VERSION provides binary backward and forward compatibility support - * Specify the version of OFI is coded to, the provider will select struct - * layouts that are compatible with this version. - */ - fi_version = FI_VERSION(1, 3); - - /** - * fi_getinfo: returns information about fabric services for reaching a - * remote node or service. this does not necessarily allocate resources. - * Pass NULL for name/service because we want a list of providers supported. - */ - ret = fi_getinfo(fi_version, /* OFI version requested */ - NULL, /* Optional name or fabric to resolve */ - NULL, /* Optional service name or port to request */ - 0ULL, /* Optional flag */ - hints, /* In: Hints to filter providers */ - &orte_rml_ofi.fi_info_list); /* Out: List of matching providers */ - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_getinfo failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - fi_freeinfo(hints); - return ORTE_ERROR; - } - - /* added for debug purpose - Print the provider info - print_transports_query(); - print_provider_list_info(orte_rml_ofi.fi_info_list); - */ - - /* create a buffer for constructing our modex blob */ - OBJ_CONSTRUCT(&modex, opal_buffer_t); - - /** create the OFI objects for each transport in the system - * (fi_info_list) and store it in the ofi_prov array **/ - orte_rml_ofi.ofi_prov_open_num = 0; // start the ofi_prov_id from 0 - for(fabric_info = orte_rml_ofi.fi_info_list; - NULL != fabric_info && orte_rml_ofi.ofi_prov_open_num < MAX_OFI_PROVIDERS; - fabric_info = fabric_info->next) - { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s:%d beginning to add endpoint for OFI_provider_id=%d ",__FILE__,__LINE__, - orte_rml_ofi.ofi_prov_open_num); - print_provider_info(fabric_info); - cur_ofi_prov = orte_rml_ofi.ofi_prov_open_num; - orte_rml_ofi.ofi_prov[cur_ofi_prov].ofi_prov_id = orte_rml_ofi.ofi_prov_open_num ; - orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info = fabric_info; - - // set FI_MULTI_RECV flag for all recv operations - fabric_info->rx_attr->op_flags = FI_MULTI_RECV; - /** - * Open fabric - * The getinfo struct returns a fabric attribute struct that can be used to - * instantiate the virtual or physical network. This opens a "fabric - * provider". See man fi_fabric for details. - */ - - ret = fi_fabric(fabric_info->fabric_attr, /* In: Fabric attributes */ - &orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric, /* Out: Fabric handle */ - NULL); /* Optional context for fabric events */ - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_fabric failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric = NULL; - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - - /** - * Create the access domain, which is the physical or virtual network or - * hardware port/collection of ports. Returns a domain object that can be - * used to create endpoints. See man fi_domain for details. - */ - ret = fi_domain(orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric, /* In: Fabric object */ - fabric_info, /* In: Provider */ - &orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, /* Out: Domain oject */ - NULL); /* Optional context for domain events */ - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_domain failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - orte_rml_ofi.ofi_prov[cur_ofi_prov].domain = NULL; - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - /** - * Create a transport level communication endpoint. To use the endpoint, - * it must be bound to completion counters or event queues and enabled, - * and the resources consumed by it, such as address vectors, counters, - * completion queues, etc. - * see man fi_endpoint for more details. - */ - ret = fi_endpoint(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, /* In: Domain object */ - fabric_info, /* In: Provider */ - &orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, /* Out: Endpoint object */ - NULL); /* Optional context */ - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_endpoint failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - /** - * Save the maximum inject size. - */ - //orte_rml_ofi.max_inject_size = prov->tx_attr->inject_size; - - /** - * Create the objects that will be bound to the endpoint. - * The objects include: - * - completion queue for events - * - address vector of other endpoint addresses - * - dynamic memory-spanning memory region - */ - cq_attr.format = FI_CQ_FORMAT_DATA; - cq_attr.wait_obj = FI_WAIT_FD; - cq_attr.wait_cond = FI_CQ_COND_NONE; - ret = fi_cq_open(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, - &cq_attr, &orte_rml_ofi.ofi_prov[cur_ofi_prov].cq, NULL); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_cq_open failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - /** - * The remote fi_addr will be stored in the ofi_endpoint struct. - * So, we use the AV in "map" mode. - */ - av_attr.type = FI_AV_MAP; - ret = fi_av_open(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, - &av_attr, &orte_rml_ofi.ofi_prov[cur_ofi_prov].av, NULL); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_av_open failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - /** - * Bind the CQ and AV to the endpoint object. - */ - ret = fi_ep_bind(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, - (fid_t)orte_rml_ofi.ofi_prov[cur_ofi_prov].cq, - FI_SEND | FI_RECV); - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_bind CQ-EP failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - ret = fi_ep_bind(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, - (fid_t)orte_rml_ofi.ofi_prov[cur_ofi_prov].av, - 0); - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_bind AV-EP failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - /** - * Enable the endpoint for communication - * This commits the bind operations. - */ - ret = fi_enable(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep); - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_enable failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s:%d ep enabled for ofi_prov_id - %d ",__FILE__,__LINE__, - orte_rml_ofi.ofi_prov[cur_ofi_prov].ofi_prov_id); - - - /** - * Get our address and publish it with modex. - **/ - orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen = sizeof (orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name); - ret = fi_getname((fid_t)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, - &orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name[0], - &orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_getname failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - /* create the modex entry for this provider */ - OBJ_CONSTRUCT(&entry, opal_buffer_t); - /* pack the provider's name */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &(orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->fabric_attr->prov_name), 1, OPAL_STRING))) { - OBJ_DESTRUCT(&entry); - free_ofi_prov_resources(cur_ofi_prov); - continue; - } - /* pack the provider's local index */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &cur_ofi_prov, 1, OPAL_UINT8))) { - OBJ_DESTRUCT(&entry); - free_ofi_prov_resources(cur_ofi_prov); - continue; - } - /* pack the size of the provider's connection blob */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen, 1, OPAL_SIZE))) { - OBJ_DESTRUCT(&entry); - free_ofi_prov_resources(cur_ofi_prov); - continue; - } - /* pack the blob itself */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name, - orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen, OPAL_BYTE))) { - OBJ_DESTRUCT(&entry); - free_ofi_prov_resources(cur_ofi_prov); - continue; - } - /* add this entry to the overall modex object */ - eptr = &entry; - if (OPAL_SUCCESS != (ret = opal_dss.pack(&modex, &eptr, 1, OPAL_BUFFER))) { - OBJ_DESTRUCT(&entry); - free_ofi_prov_resources(cur_ofi_prov); - continue; - } - OBJ_DESTRUCT(&entry); - - /*print debug information on opal_modex_string */ - switch ( orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->addr_format) { - case FI_SOCKADDR_IN : - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s:%d In FI_SOCKADDR_IN. ",__FILE__,__LINE__); - /* Address is of type sockaddr_in (IPv4) */ - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s sending Opal modex string for ofi prov_id %d, epnamelen = %lu ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - cur_ofi_prov, orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); - /*[debug] - print the sockaddr - port and s_addr */ - struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name; - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s port = 0x%x, InternetAddr = 0x%s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ntohs(ep_sockaddr->sin_port), inet_ntoa(ep_sockaddr->sin_addr)); - break; - } - - /** - * Set the ANY_SRC address. - */ - orte_rml_ofi.any_addr = FI_ADDR_UNSPEC; - - /** - * Allocate tx,rx buffers and Post a multi-RECV buffer for each endpoint - **/ - //[TODO later] For now not considering ep_attr prefix_size (add this later) - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size = MIN_MULTI_BUF_SIZE * MULTI_BUF_SIZE_FACTOR; - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf = malloc(orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size); - - ret = fi_mr_reg(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf, - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size, - FI_RECV, 0, 0, 0, &orte_rml_ofi.ofi_prov[cur_ofi_prov].mr_multi_recv, - &orte_rml_ofi.ofi_prov[cur_ofi_prov].rx_ctx1); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_mr_reg failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - ret = fi_setopt(&orte_rml_ofi.ofi_prov[cur_ofi_prov].ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - &orte_rml_ofi.min_ofi_recv_buf_sz, sizeof(orte_rml_ofi.min_ofi_recv_buf_sz) ); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_setopt failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - ret = fi_recv(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf, - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size, - fi_mr_desc(orte_rml_ofi.ofi_prov[cur_ofi_prov].mr_multi_recv), - 0,&orte_rml_ofi.ofi_prov[cur_ofi_prov].rx_ctx1); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_recv failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - /** - * get the fd and register the progress fn - **/ - ret = fi_control(&orte_rml_ofi.ofi_prov[cur_ofi_prov].cq->fid, FI_GETWAIT, - (void *) &orte_rml_ofi.ofi_prov[cur_ofi_prov].fd); - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_control failed to get fd: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - /* - create the event that will wait on the fd*/ - /* use the opal_event_set to do a libevent set on the fd - * so when something is available to read, the cq_porgress_handler - * will be called */ - opal_event_set(orte_event_base, - &orte_rml_ofi.ofi_prov[cur_ofi_prov].progress_event, - orte_rml_ofi.ofi_prov[cur_ofi_prov].fd, - OPAL_EV_READ|OPAL_EV_PERSIST, - cq_progress_handler, - &orte_rml_ofi.ofi_prov[cur_ofi_prov]); - opal_event_add(&orte_rml_ofi.ofi_prov[cur_ofi_prov].progress_event, 0); - orte_rml_ofi.ofi_prov[cur_ofi_prov].progress_ev_active = true; - - /** update the number of ofi_provs in the ofi_prov[] array **/ - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s:%d ofi_prov id - %d created ",__FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); - orte_rml_ofi.ofi_prov_open_num++; - } - if (fabric_info != NULL && orte_rml_ofi.ofi_prov_open_num >= MAX_OFI_PROVIDERS ) { - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s:%d fi_getinfo list not fully parsed as MAX_OFI_PROVIDERS - %d reached ",__FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); - } - - /** - * Free providers info since it's not needed anymore. - */ - fi_freeinfo(hints); - hints = NULL; - /* check if at least one ofi_prov was successfully opened */ - if (0 < orte_rml_ofi.ofi_prov_open_num) { - uint8_t *data; - int32_t sz; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s:%d ofi providers openened=%d returning orte_rml_ofi.api", - __FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); - - OBJ_CONSTRUCT(&orte_rml_ofi.recv_msg_queue_list,opal_list_t); - /* post the modex object */ - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s calling OPAL_MODEX_SEND_STRING for RML/OFI ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ret = opal_dss.unload(&modex, (void**)(&data), &sz); - OBJ_DESTRUCT(&modex); - if (OPAL_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } - OPAL_MODEX_SEND_STRING(ret, OPAL_PMIX_GLOBAL, - "rml.ofi", data, sz); - free(data); - if (OPAL_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } - } else { - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s:%d Failed to open any OFI Providers",__FILE__,__LINE__); - } - - return orte_rml_ofi.ofi_prov_open_num; -} - -/* return : the ofi_prov_id that corresponds to the transport requested by the attributes - if transport is not found RML_OFI_PROV_ID_INVALID is returned. - @[in]attributes : the attributes passed in to open_conduit reg the transport requested -*/ -int get_ofi_prov_id(opal_list_t *attributes) -{ - int ofi_prov_id = RML_OFI_PROV_ID_INVALID, prov_num=0; - char **providers = NULL, *provider; - struct fi_info *cur_fi; - char *comp_attrib = NULL; - char **comps; - int i; - bool choose_fabric= false; - - /* check the list of attributes in below order - * Attribute should have ORTE_RML_TRANSPORT_ATTRIB key - * with values "ethernet" or "fabric". "fabric" is higher priority. - * (or) ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA" - * if both above attributes are missing return failure - */ - //if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) ) { - - if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i]; i++) { - if (NULL != strstr(ofi_transports_supported, comps[i])) { - if (0 == strcmp(comps[i], "ethernet")) { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - user requested opening conduit using OFI ethernet/sockets provider", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_argv_append_nosize(&providers, "sockets"); - } else if (0 == strcmp(comps[i], "fabric")) { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - user requested opening conduit using OFI fabric provider", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_argv_prepend_nosize(&providers, "fabric"); /* fabric is higher priority so prepend it */ - } - } - } - } - /* if from the transport we don't know which provider we want, then check for the ORTE_RML_OFI_PROV_NAME_ATTRIB */ - if (NULL == providers) { - if (orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING)) { - opal_argv_append_nosize(&providers, provider); - } else { - ofi_prov_id = RML_OFI_PROV_ID_INVALID; - } - } - if (NULL != providers) { - /* go down the list of preferences in order */ - for (i=0; NULL != providers[i] && RML_OFI_PROV_ID_INVALID == ofi_prov_id; i++) { - // if generic transport "fabric" is requested then choose first available non-socket provider - if (0 == strcmp(providers[i],"fabric")) - choose_fabric=true; - else - choose_fabric=false; - // loop the orte_rml_ofi.ofi_provs[] and see if someone matches - for (prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num; prov_num++ ) { - cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info; - if (choose_fabric) { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - get_ofi_prov_id() -> comparing sockets != %s to choose first available fabric provider", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - cur_fi->fabric_attr->prov_name); - if (0 != strcmp("sockets", cur_fi->fabric_attr->prov_name)) { - ofi_prov_id = prov_num; - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Choosing provider %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - cur_fi->fabric_attr->prov_name); - break; - } - } else { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - get_ofi_prov_id() -> comparing %s = %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - providers[i], cur_fi->fabric_attr->prov_name); - if (0 == strcmp(providers[i], cur_fi->fabric_attr->prov_name)) { - ofi_prov_id = prov_num; - opal_output_verbose(20,orte_rml_base_framework.framework_output, "%s - Choosing provider %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - cur_fi->fabric_attr->prov_name); - break; - } - } - } - } - } - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - get_ofi_prov_id(), returning ofi_prov_id=%d ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ofi_prov_id); - return ofi_prov_id; -} - -/* - * Allocate a new module and initialise ofi_prov information - * for the requested provider and return the module * - */ -static orte_rml_base_module_t* make_module( int ofi_prov_id) -{ - orte_rml_ofi_module_t *mod = NULL; - - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - rml_ofi make_module() begin ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - if (RML_OFI_PROV_ID_INVALID == ofi_prov_id) { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - open_conduit did not select any ofi provider, returning NULL ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return NULL; - } - - - /* create a new module */ - mod = (orte_rml_ofi_module_t*)calloc(1,sizeof(orte_rml_ofi_module_t)); - if (NULL == mod) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return NULL; - } - /* copy the APIs over to it and the OFI provider information */ - memcpy(mod, &orte_rml_ofi, sizeof(orte_rml_ofi_module_t)); - /* setup the remaining data locations in mod, associate conduit with ofi provider selected*/ - mod->cur_transport_id = ofi_prov_id; - /* set the routed module */ - if (routing_desired) { - mod->api.routed = orte_routed.assign_module(NULL); - } else { - mod->api.routed = orte_routed.assign_module("direct"); - } - if (NULL == mod->api.routed) { - /* we can't work */ - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s - Failed to get%srouted support, disqualifying ourselves", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - routing_desired ? " " : " direct "); - free(mod); - return NULL; - } - return (orte_rml_base_module_t*)mod; -} - - -/* Order of attributes honoring * -* ORTE_RML_INCLUDE_COMP_ATTRIB * -* ORTE_RML_EXCLUDE_COMP_ATTRIB * -* ORTE_RML_TRANSPORT_ATTRIB * -* ORTE_RML_PROVIDER_ATTRIB */ -static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) -{ - char *comp_attrib = NULL; - char **comps; - int i; - orte_attribute_t *attr; - - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Entering rml_ofi_open_conduit()", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* Open all ofi endpoints */ - if (!init_done) { - rml_ofi_component_init(); - init_done = true; - } - - /* check if atleast 1 ofi provider is initialised */ - if ( 0 >= orte_rml_ofi.ofi_prov_open_num) { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Init did not open any Ofi endpoints, returning NULL", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return NULL; - } - - /* someone may require this specific component, so look for "ofi" */ - if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - /* they specified specific components - could be multiple */ - comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcmp(comps[i], "ofi")) { - /* we are a candidate, */ - opal_argv_free(comps); - return make_module(get_ofi_prov_id(attributes)); - } - } - /* we are not a candidate */ - opal_argv_free(comps); - return NULL; - } else if (orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - /* see if we are on the list */ - comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcmp(comps[i], "ofi")) { - /* we cannot be a candidate */ - opal_argv_free(comps); - return NULL; - } - } - } - - if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - ORTE_RML_TRANSPORT_TYPE = %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comp_attrib); - comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i]; i++) { - if (NULL != strstr(ofi_transports_supported, comps[i])) { - /* we are a candidate, */ - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Opening conduit using OFI.. ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_argv_free(comps); - return make_module(get_ofi_prov_id(attributes)); - } - } - opal_argv_free(comps); - } - - /* Alternatively, check the attributes to see if we qualify - we only handle - * "pt2pt" */ - OPAL_LIST_FOREACH(attr, attributes, orte_attribute_t) { - /* [TODO] add any additional attributes check here */ - - } - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - ofi is not a candidate as per attributes, returning NULL", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* if we get here, we cannot handle it */ - return NULL; -} - -static void pr_cons(orte_rml_ofi_peer_t *ptr) -{ - ptr->ofi_prov_name = NULL; - ptr->ofi_ep = NULL; - ptr->ofi_ep_len = 0; - ptr->src_prov_id = RML_OFI_PROV_ID_INVALID; -} - -static void pr_des(orte_rml_ofi_peer_t *ptr) -{ - if ( NULL != ptr->ofi_prov_name) - free(ptr->ofi_prov_name); - if ( 0 < ptr->ofi_ep_len) - free( ptr->ofi_ep); -} - -OBJ_CLASS_INSTANCE(orte_rml_ofi_peer_t, - opal_object_t, - pr_cons, pr_des); diff --git a/orte/mca/rml/ofi/rml_ofi_request.h b/orte/mca/rml/ofi/rml_ofi_request.h deleted file mode 100644 index 54b8203ae84..00000000000 --- a/orte/mca/rml/ofi/rml_ofi_request.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTE_RML_OFI_REQUEST_H -#define ORTE_RML_OFI_REQUEST_H - - -#define TO_OFI_REQ(_ptr_ctx) \ - container_of((_ptr_ctx), orte_rml_ofi_request_t, ctx) - -typedef enum { - ORTE_RML_OFI_SEND, - ORTE_RML_OFI_RECV, - ORTE_RML_OFI_ACK, - ORTE_RML_OFI_PROBE -} orte_rml_ofi_request_type_t; -/* orte_rml_ofi_msg_header_t contains the header information for the message being sent. -The header and data is passed on to the destination. The destination will re-construct the -orte_rml_sent_t struct once it receives this header and data.This header has the required information -to construct the orte_rml_sent_t struct and also if the message is split into packets, -then the packet information - total number of packets and the current packet number. -*/ -struct orte_rml_ofi_msg_header_t{ - opal_process_name_t origin; // originator process id from the send message - opal_process_name_t dst; // Destination process id from the send message - uint32_t seq_num; // seq_num from the send message - orte_rml_tag_t tag; // tag from the send message - uint32_t msgid; // unique msgid added by ofi plugin to keep track of fragmented msgs - uint32_t tot_pkts; // total packets this msg will be fragmented into by ofi plugin - uint32_t cur_pkt_num; // current packet number - }; -typedef struct orte_rml_ofi_msg_header_t orte_rml_ofi_msg_header_t; - -/* -orte_rml_ofi_pkts_t defines the packets in the message. Each packet contains header information -and the data. Create a list of packets to hold the entire message. -*/ -typedef struct { - //list_item_t - opal_list_item_t super; - /* header + data size */ - size_t pkt_size; - //header + data - void *data; -}orte_rml_ofi_send_pkt_t; -OBJ_CLASS_DECLARATION(orte_rml_ofi_send_pkt_t); - -/* -orte_rml_ofi_recv_pkt_t defines the packets in the receiving end of message. -Each packet contains the packet number and the data. -Create a list of packets to hold the entire message. -*/ -typedef struct { - //list_item_t - opal_list_item_t super; - /* current packet number */ - uint32_t cur_pkt_num; - /*data size */ - size_t pkt_size; - //data - void *data; -}orte_rml_ofi_recv_pkt_t; -OBJ_CLASS_DECLARATION(orte_rml_ofi_recv_pkt_t); - -/* -orte_rml_ofi_request_t holds the send request (orte_rml_send_t) -*/ -typedef struct { - opal_object_t super; - - /** OFI context */ - struct fi_context ctx; - - orte_rml_send_t *send; - - /** OFI provider_id the request will use - this is - * the reference to element into the orte_rml_ofi.ofi_prov[] **/ - uint8_t ofi_prov_id; - - /** OFI Request type */ - orte_rml_ofi_request_type_t type; - - /** Completion count used by blocking and/or synchronous operations */ - volatile int completion_count; - - /** Reference to the RML used to lookup */ - /* source of an ANY_SOURCE Recv */ - struct orte_rml_base_module_t* rml; - - /** header being sent **/ - orte_rml_ofi_msg_header_t hdr; - - /** Pack buffer */ - void *data_blob; - - /** Pack buffer size */ - size_t length; - - /** Header and data in a list of Packets orte_rml_ofi_send_pkt_t */ - opal_list_t pkt_list; - -} orte_rml_ofi_request_t; -OBJ_CLASS_DECLARATION(orte_rml_ofi_request_t); - - -/* This will hold all the pckts received at the destination. -Each entry will be indexed by [sender,msgid] and will have -all the packets for that msgid and sender. -*/ -typedef struct { - - opal_list_item_t super; //list_item_t - uint32_t msgid; // unique msgid added by ofi plugin to keep track of fragmented msgs - opal_process_name_t sender; // originator process id from the send message - uint32_t tot_pkts; // total packets this msg will be fragmented into by ofi plugin - uint32_t pkt_recd; // current packet number - opal_list_t pkt_list; // list holding Packets in this msg of type orte_rml_ofi_recv_pkt_t -} ofi_recv_msg_queue_t; -OBJ_CLASS_DECLARATION( ofi_recv_msg_queue_t); - -/* define an object for transferring send requests to the event lib */ -typedef struct { - opal_object_t super; - opal_event_t ev; - orte_rml_send_t send; - /* ofi provider id */ - int ofi_prov_id; -} ofi_send_request_t; -OBJ_CLASS_DECLARATION(ofi_send_request_t); - -#endif diff --git a/orte/mca/rml/ofi/rml_ofi_send.c b/orte/mca/rml/ofi/rml_ofi_send.c deleted file mode 100644 index 99a143c925d..00000000000 --- a/orte/mca/rml/ofi/rml_ofi_send.c +++ /dev/null @@ -1,1052 +0,0 @@ -/* - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "opal/dss/dss_types.h" -#include "opal/util/net.h" -#include "opal/util/output.h" -#include "opal/mca/event/event.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/base/base.h" -#include "orte/mca/rml/rml_types.h" - -#include -#include -#include -#include -#include -#include - -#include "rml_ofi.h" - -static void ofi_req_cons(orte_rml_ofi_request_t *ptr) -{ - OBJ_CONSTRUCT(&ptr->pkt_list, opal_list_t); -} -static void ofi_req_des(orte_rml_ofi_request_t *ptr) -{ - OPAL_LIST_DESTRUCT(&ptr->pkt_list); -} -OBJ_CLASS_INSTANCE(orte_rml_ofi_request_t, - opal_object_t, - ofi_req_cons, ofi_req_des); - - -static void ofi_send_req_cons(ofi_send_request_t *ptr) -{ - OBJ_CONSTRUCT(&ptr->send, orte_rml_send_t); -} -OBJ_CLASS_INSTANCE(ofi_send_request_t, - opal_object_t, - ofi_send_req_cons, NULL); - -OBJ_CLASS_INSTANCE(orte_rml_ofi_send_pkt_t, - opal_list_item_t, - NULL, NULL); - -OBJ_CLASS_INSTANCE(orte_rml_ofi_recv_pkt_t, - opal_list_item_t, - NULL, NULL); - - -static void ofi_recv_msg_queue_cons(ofi_recv_msg_queue_t *ptr) -{ - ptr->msgid = 0; - ptr->tot_pkts = 1; - ptr->pkt_recd = 0; - OBJ_CONSTRUCT(&ptr->pkt_list, opal_list_t); -} -static void ofi_recv_msg_queue_des(ofi_recv_msg_queue_t *ptr) -{ - OPAL_LIST_DESTRUCT(&ptr->pkt_list); -} -OBJ_CLASS_INSTANCE(ofi_recv_msg_queue_t, - opal_list_item_t, - ofi_recv_msg_queue_cons, ofi_recv_msg_queue_des); - -static void send_self_exe(int fd, short args, void* data) -{ - orte_self_send_xfer_t *xfer = (orte_self_send_xfer_t*)data; - - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml_send_to_self ofi callback executing for tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), xfer->tag); - - /* execute the send callback function - note that - * send-to-self always returns a SUCCESS status - */ - if (NULL != xfer->iov) { - if (NULL != xfer->cbfunc.iov) { - /* non-blocking iovec send */ - xfer->cbfunc.iov(ORTE_SUCCESS, ORTE_PROC_MY_NAME, xfer->iov, xfer->count, - xfer->tag, xfer->cbdata); - } - } else if (NULL != xfer->buffer) { - if (NULL != xfer->cbfunc.buffer) { - /* non-blocking buffer send */ - xfer->cbfunc.buffer(ORTE_SUCCESS, ORTE_PROC_MY_NAME, xfer->buffer, - xfer->tag, xfer->cbdata); - } - } else { - /* should never happen */ - abort(); - } - - /* cleanup the memory */ - OBJ_RELEASE(xfer); -} - -/** Send callback */ -/* [Desc] This is called from the progress fn when a send completion -** is received in the cq -** wc [in] : the completion queue data entry -** ofi_send_req [in]: ofi send request with the send msg and callback -*/ -int orte_rml_ofi_send_callback(struct fi_cq_data_entry *wc, - orte_rml_ofi_request_t* ofi_req) -{ - orte_rml_ofi_send_pkt_t *ofi_send_pkt, *next; - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s orte_rml_ofi_send_callback called, completion count = %d, msgid = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_req->completion_count, ofi_req->hdr.msgid); - assert(ofi_req->completion_count > 0); - ofi_req->completion_count--; - if ( 0 == ofi_req->completion_count ) { - // call the callback fn of the sender - ofi_req->send->status = ORTE_SUCCESS; - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s calling ORTE_RML_SEND_COMPLETE macro for msgid = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_req->hdr.msgid); - ORTE_RML_SEND_COMPLETE(ofi_req->send); - OPAL_LIST_FOREACH_SAFE(ofi_send_pkt, next, &ofi_req->pkt_list, orte_rml_ofi_send_pkt_t) { - free( ofi_send_pkt->data); - ofi_send_pkt->pkt_size=0; - opal_list_remove_item(&ofi_req->pkt_list, &ofi_send_pkt->super); - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Removed pkt from list ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - OBJ_RELEASE(ofi_send_pkt); - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Released packet ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - } - free(ofi_req->data_blob); - OBJ_RELEASE(ofi_req); - } - - // [TODO] need to check for error before returning success - return ORTE_SUCCESS; -} - -/** Error callback */ -/* [Desc] This is called from the progress fn when a send completion -** is received in the cq -** wc [in] : the completion queue data entry -** ofi_send_req [in]: ofi send request with the send msg and callback -*/ -int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error, - orte_rml_ofi_request_t* ofi_req) -{ - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s orte_rml_ofi_error_callback called ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - switch(error->err) { - default: - /* call the send-callback fn with error and return, also return failure status */ - ofi_req->send->status = ORTE_ERR_CONDUIT_SEND_FAIL; - ORTE_RML_SEND_COMPLETE(ofi_req->send); - } - return ORTE_SUCCESS; -} - -/** Recv handler */ -/* [Desc] This is called from the progress fn when a recv completion -** is received in the cq -** wc [in] : the completion queue data entry */ -int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id) -{ - orte_rml_ofi_msg_header_t msg_hdr; - uint32_t msglen, datalen = 0; - char *data, *totdata, *nextpkt; - ofi_recv_msg_queue_t *recv_msg_queue, *new_msg; - orte_rml_ofi_recv_pkt_t *ofi_recv_pkt, *new_pkt, *next; - bool msg_in_queue = false; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s orte_rml_ofi_recv_handler called ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - /*copy the header and data from buffer and pass it on - ** since this is the ofi_prov recv buffer don't want it to be released as - ** considering re-using it, so for now copying to newly allocated *data - ** the *data will be released by orte_rml_base functions */ - - memcpy(&msg_hdr,wc->buf,sizeof(orte_rml_ofi_msg_header_t)); - msglen = wc->len - sizeof(orte_rml_ofi_msg_header_t); - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Received packet -> msg id = %d wc->len = %lu, msglen = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid, wc->len, msglen ); - data = (char *)malloc(msglen); - memcpy(data,((char *)wc->buf+sizeof(orte_rml_ofi_msg_header_t)),msglen); - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s header info of received packet -> cur_pkt_num = %d, tot_pkts = %d ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.cur_pkt_num, msg_hdr.tot_pkts ); - /* To accomodate message bigger than recv buffer size, - check if current message is in multiple blocks and append them before sending it to RML */ - if ( msg_hdr.tot_pkts == 1) { - /* Since OFI is point-to-point, no need to check if the intended destination is me - send to RML */ - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Posting Recv for msgid %d, from peer - %s , Tag = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid, ORTE_NAME_PRINT(&msg_hdr.origin),msg_hdr.tag ); - ORTE_RML_POST_MESSAGE(&msg_hdr.origin, msg_hdr.tag, msg_hdr.seq_num,data,msglen); - } else { - msg_in_queue = false; - new_pkt = OBJ_NEW(orte_rml_ofi_recv_pkt_t); - new_pkt->cur_pkt_num = msg_hdr.cur_pkt_num; - new_pkt->pkt_size = msglen; - new_pkt->data = data; - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Just beofe checking if this message-pkt is already in queue. msgid-%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid ); - /* check if the queue has the [msgid,sender] entry */ - OPAL_LIST_FOREACH(recv_msg_queue, &orte_rml_ofi.recv_msg_queue_list, ofi_recv_msg_queue_t) { - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Checking msgid-%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid ); - if( (recv_msg_queue->msgid == msg_hdr.msgid) && (recv_msg_queue->sender.jobid == msg_hdr.origin.jobid) - && (recv_msg_queue->sender.vpid == msg_hdr.origin.vpid) ) { - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Found Msg entry in queue for msgid %d, sender jobid=%d, sender vpid=%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->sender.jobid, recv_msg_queue->sender.vpid); - msg_in_queue = true; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s msgid %d, tot_pkts=%d, opal_list_get_size()=%lu,total pkt_recd=%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->tot_pkts, - opal_list_get_size(&recv_msg_queue->pkt_list), recv_msg_queue->pkt_recd ); - if( recv_msg_queue->tot_pkts == (recv_msg_queue->pkt_recd +1) ) { - /* all packets received for this message - post message to rml and remove this from queue */ - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s All packets recd for msgid %d, tot_pkts=%d, opal_list_get_size()=%lu,total pkt_recd=%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->tot_pkts, - opal_list_get_size(&recv_msg_queue->pkt_list), recv_msg_queue->pkt_recd ); - totdata = NULL; - datalen = 0; - OPAL_LIST_FOREACH(ofi_recv_pkt, &recv_msg_queue->pkt_list, orte_rml_ofi_recv_pkt_t) { - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Adding data for packet %d, pktlength = %lu, cumulative datalen so far = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num, ofi_recv_pkt->pkt_size, datalen ); - if (0 == datalen) { - if (NULL != totdata) { - free(totdata); - } - totdata = (char *)malloc(ofi_recv_pkt->pkt_size); - if( totdata == NULL) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s Error: malloc failed for msgid %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),recv_msg_queue->msgid ); - return 1; //[TODO: error-handling needs to be implemented - } - memcpy(totdata,ofi_recv_pkt->data,ofi_recv_pkt->pkt_size); - - } else { - totdata = realloc(totdata,datalen+ofi_recv_pkt->pkt_size); - if (NULL != totdata ) { - memcpy((totdata+datalen),ofi_recv_pkt->data,ofi_recv_pkt->pkt_size); - } else { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s Error: realloc failed for msgid %d, from sender jobid=%d, sender vpid=%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->sender.jobid, - recv_msg_queue->sender.vpid); - return 1; //[TODO: error-handling needs to be implemented - } - } - datalen += ofi_recv_pkt->pkt_size; - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s packet %d done, datalen = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num,datalen); - } - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Adding leftover data recd, datalen = %d, new_pkt->pkt_size = %lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen, new_pkt->pkt_size); - //add the last packet - totdata =realloc(totdata,datalen+new_pkt->pkt_size); - if( NULL != totdata ) { - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Realloc completed for leftover data recd, datalen = %d, new->pkt->pkt_size = %lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen, new_pkt->pkt_size); - nextpkt = totdata+datalen; - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s totdata = %p,nextpkt = %p ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *)totdata, (void *)nextpkt); - memcpy(nextpkt,new_pkt->data,new_pkt->pkt_size); - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s memcpy completed for leftover data recd, datalen = %d, new->pkt->pkt_size = %lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen, new_pkt->pkt_size); - datalen += new_pkt->pkt_size; - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Posting Recv for msgid %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid ); - ORTE_RML_POST_MESSAGE(&msg_hdr.origin, msg_hdr.tag, msg_hdr.seq_num,totdata,datalen);\ - - // free the pkts - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s msgid %d - posting recv completed, freeing packets", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr.msgid ); - OPAL_LIST_FOREACH_SAFE(ofi_recv_pkt, next, &recv_msg_queue->pkt_list, orte_rml_ofi_recv_pkt_t) { - free( ofi_recv_pkt->data); - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s freed data for packet %d",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num ); - ofi_recv_pkt->pkt_size=0; - opal_list_remove_item(&recv_msg_queue->pkt_list, &ofi_recv_pkt->super); - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Removed pkt from list ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - OBJ_RELEASE(ofi_recv_pkt); - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Released packet ",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - } - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s freeing packets completed",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - //free the msg from the queue-list - opal_list_remove_item(&orte_rml_ofi.recv_msg_queue_list,&recv_msg_queue->super); - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Successfully removed msg from queue", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - OBJ_RELEASE(recv_msg_queue); - } else { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s Error: realloc failed for msgid %d, from sender jobid=%d, sender vpid=%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, recv_msg_queue->sender.jobid, - recv_msg_queue->sender.vpid); - return 1; //[TODO: error-handling needs to be implemented - } - } else { - /* add this packet to the msg in the queue ordered by cur_pkt_num */ - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Adding packet to list, msgid %d, pkt - %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg_queue->msgid, msg_hdr.cur_pkt_num ); - - bool pkt_added = false; - OPAL_LIST_FOREACH(ofi_recv_pkt, &recv_msg_queue->pkt_list, orte_rml_ofi_recv_pkt_t) { - if( msg_hdr.cur_pkt_num < ofi_recv_pkt->cur_pkt_num ) { - opal_list_insert_pos(&recv_msg_queue->pkt_list, (opal_list_item_t*)ofi_recv_pkt, &new_pkt->super); - recv_msg_queue->pkt_recd++; - pkt_added = true; - break; - } - } - if (!pkt_added) { - opal_list_append(&recv_msg_queue->pkt_list,&new_pkt->super); - recv_msg_queue->pkt_recd++; - } - } - } - break; //we found the msg or added it so exit out of the msg_queue loop - } - if( !msg_in_queue ) { - /*add to the queue as this is the first packet for [msgid,sender] */ - new_msg = OBJ_NEW(ofi_recv_msg_queue_t); - new_msg->msgid = msg_hdr.msgid; - new_msg->sender = msg_hdr.origin; - new_msg->tot_pkts = msg_hdr.tot_pkts; - new_msg->pkt_recd = 1; - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Adding first Msg entry in queue for msgid %d, sender jobid=%d, sender vpid=%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), new_msg->msgid, new_msg->sender.jobid, new_msg->sender.vpid); - opal_list_append(&new_msg->pkt_list, &new_pkt->super); - opal_list_append(&orte_rml_ofi.recv_msg_queue_list, &new_msg->super); - - } - } - return ORTE_SUCCESS; -} - -/* populate_peer_ofi_addr - * [Desc] This fn does a PMIx Modex recv on "rml.ofi" key - * to get the ofi address blob of all providers on the peer. - * Then it populates the array parameter peer_ofi_addr[] - * with providername, ofi_ep_name and ofi_ep_namelen - * [in] peer -> peer address - * [out] peer_ofi_addr[] -> array to hold the provider details on the peer - * [Return value] -> total providers on success. OPAL_ERROR if fails to load array. - */ -static int populate_peer_ofi_addr(orte_process_name_t *peer, orte_rml_ofi_peer_t *peer_ofi_addr ) -{ - - uint8_t *data; - int32_t sz, cnt; - opal_buffer_t modex, *entry; - char *prov_name; - uint8_t prov_num; - size_t entrysize; - uint8_t *bytes; - uint8_t tot_prov=0,cur_prov; - int ret = OPAL_ERROR; - - OPAL_MODEX_RECV_STRING(ret, "rml.ofi", peer, (void**)&data, &sz); - if (OPAL_SUCCESS != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi::populate_peer_ofi_addr() Modex_Recv Failed for peer %s. ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer)); - return OPAL_ERROR; - } - - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi::populate_peer_ofi_addr() Modex_Recv Succeeded. ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* load the data into a buffer for unpacking */ - OBJ_CONSTRUCT(&modex, opal_buffer_t); - opal_dss.load(&modex, data, sz); - cnt = 1; - /* cycle thru the returned providers and see which one we want to use */ - for(cur_prov=0;OPAL_SUCCESS == (ret = opal_dss.unpack(&modex, &entry, &cnt, OPAL_BUFFER));cur_prov++) { - /* unpack the provider name */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &prov_name, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(entry); - break; - } - /* unpack the provider's index on the remote peer - note that there - * is no guarantee that the same provider has the same local index! */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &prov_num, &cnt, OPAL_UINT8))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(entry); - break; - } - /* unpack the size of their connection blob */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &entrysize, &cnt, OPAL_SIZE))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(entry); - break; - } - /* create the necessary space */ - bytes = (uint8_t*)malloc(entrysize); - /* unpack the connection blob */ - cnt = entrysize; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, bytes, &cnt, OPAL_BYTE))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(entry); - break; - } - /* done with the buffer */ - OBJ_RELEASE(entry); - peer_ofi_addr[cur_prov].ofi_prov_name = prov_name; - peer_ofi_addr[cur_prov].ofi_ep = bytes; - peer_ofi_addr[cur_prov].ofi_ep_len = entrysize; - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi:populate_peer_ofi_addr() Unpacked peer provider %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),peer_ofi_addr[cur_prov].ofi_prov_name); - } - OBJ_DESTRUCT(&modex); // releases the data returned by the modex_recv - tot_prov=cur_prov; - return tot_prov; -} - - -/* check_provider_in_peer(prov_name, peer_ofi_addr) - * [Desc] This fn checks for a match of prov_name in the peer_ofi_addr array - * and returns the index of the match or OPAL_ERROR if not found. - * The peer_ofi_addr array has all the ofi providers in peer. - * [in] prov_name -> The provider name we want to use to send this message to peer. - * [in] tot_prov -> total provider entries in array - * [in] peer_ofi_addr[] -> array of provider details on the peer - * [in] local_ofi_prov_idx -> the index of local provider we are comparing with - * (index into orte_rml_ofi.ofi_prov[] array. - * [Return value] -> index that matches provider on success. OPAL_ERROR if no match found. - */ -static int check_provider_in_peer( char *prov_name, int tot_prov, orte_rml_ofi_peer_t *peer_ofi_addr, int local_ofi_prov_idx ) -{ - int idx; - int ret = OPAL_ERROR; - - for( idx=0; idx < tot_prov; idx++) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi:check_provider_in_peer() checking peer provider %s to match %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),peer_ofi_addr[idx].ofi_prov_name,prov_name); - if ( 0 == strcmp(prov_name, peer_ofi_addr[idx].ofi_prov_name) ) { - /* we found a matching provider on peer */ - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi:check_provider_in_peer() matched provider %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),peer_ofi_addr[idx].ofi_prov_name); - if ( 0 == strcmp(prov_name, "sockets") ) { - /* check if the address is reachable */ - struct sockaddr_in *ep_sockaddr, *ep_sockaddr2; - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi:check_provider_in_peer() checking if sockets provider is reachable ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ep_sockaddr = (struct sockaddr_in*)peer_ofi_addr[idx].ofi_ep; - ep_sockaddr2 = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[local_ofi_prov_idx].ep_name; - if (opal_net_samenetwork((struct sockaddr*)ep_sockaddr, (struct sockaddr*)ep_sockaddr2, 24)) { - /* we found same ofi provider reachable via ethernet on peer so return this idx*/ - ret = idx; - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi:check_provider_in_peer() sockets provider is reachable ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - break; - } - } else { - ret = idx; - break; - } - } - } - return ret; -} - -static void send_msg(int fd, short args, void *cbdata) -{ - ofi_send_request_t *req = (ofi_send_request_t*)cbdata; - orte_process_name_t *peer = &(req->send.dst); - orte_rml_tag_t tag = req->send.tag; - char *dest_ep_name; - size_t dest_ep_namelen = 0; - int ret = OPAL_ERROR, rc; - uint32_t total_packets; - fi_addr_t dest_fi_addr; - orte_rml_send_t *snd; - orte_rml_ofi_request_t* ofi_send_req = OBJ_NEW( orte_rml_ofi_request_t ); - uint8_t ofi_prov_id = req->ofi_prov_id; - orte_rml_ofi_send_pkt_t* ofi_msg_pkt; - size_t datalen_per_pkt, hdrsize, data_in_pkt; // the length of data in per packet excluding the header size - orte_rml_ofi_peer_t* pr; - uint64_t ui64; - struct sockaddr_in* ep_sockaddr; - - snd = OBJ_NEW(orte_rml_send_t); - snd->dst = *peer; - snd->origin = *ORTE_PROC_MY_NAME; - snd->tag = tag; - if (NULL != req->send.iov) { - snd->iov = req->send.iov; - snd->count = req->send.count; - snd->cbfunc.iov = req->send.cbfunc.iov; - } else { - snd->buffer = req->send.buffer; - snd->cbfunc.buffer = req->send.cbfunc.buffer; - } - snd->cbdata = req->send.cbdata; - - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s send_msg_transport to peer %s at tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - - /* get the peer address from our internal hash table */ - memcpy(&ui64, (char*)peer, sizeof(uint64_t)); - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s getting contact info for DAEMON peer %s from internal hash table", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer)); - if (OPAL_SUCCESS != (ret = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, - ui64, (void**)&pr) || NULL == pr)) { - orte_rml_ofi_peer_t peer_ofi_addr[MAX_OFI_PROVIDERS]; - int tot_peer_prov=0, peer_prov_id=ofi_prov_id; - bool peer_match_found=false; - - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi:Send peer OFI contact info not found in internal hash - checking modex", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* Do Modex_recv and populate the peer's providers and ofi ep address in peer_ofi_addr[] array */ - if( OPAL_ERROR == ( tot_peer_prov = populate_peer_ofi_addr( peer, peer_ofi_addr ))) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi::send_msg() Error when Populating peer ofi_addr array ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; - ORTE_RML_SEND_COMPLETE(snd); - //OBJ_RELEASE( ofi_send_req); - return ; - } - /* decide the provider we want to use from the list of providers in peer as per below order. - * 1. if the user specified the transport for this conduit (even giving us a prioritized list of candidates), - * then the one we selected is the _only_ one we will use. If the remote peer has a matching endpoint, - * then we use it - otherwise, we error out - * 2. if the user did not specify a transport, then we look for matches against _all_ of - * our available transports, starting with fabric and then going to Ethernet, taking the first one that matches. - * 3. if we cannot find any match, then we error out - */ - if ( true == user_override() ) { - /*case 1. User has specified the provider, find a match in peer for the current selected provider or error out*/ - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi::send_msg() Case1. looking for a match for current provider", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - if( OPAL_ERROR == ( peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->fabric_attr->prov_name, - tot_peer_prov, peer_ofi_addr, ofi_prov_id ) )) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi::send_msg() Peer is Unreachable - no common ofi provider ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; - ORTE_RML_SEND_COMPLETE(snd); - //OBJ_RELEASE( ofi_send_req); - return ; - } - peer_match_found = true; - } else { - /* case 2. look for any matching fabric (other than ethernet) provider */ - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi::send_msg() Case 2 - looking for any match for fabric provider", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - for(int cur_prov_id=0; cur_prov_id < orte_rml_ofi.ofi_prov_open_num && !peer_match_found ; cur_prov_id++) { - if( 0 != strcmp( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, "sockets" ) ) { - peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, - tot_peer_prov, peer_ofi_addr, cur_prov_id ); - if (OPAL_ERROR != peer_prov_id) { - peer_match_found = true; - ofi_prov_id = cur_prov_id; - } - } - } - /* if we haven't found a common provider for local node and peer to send message yet, check for ethernet */ - if(!peer_match_found) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi::send_msg() Case 2 - common fabric to peer not found,looking for ethernet provider", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - } - for(int cur_prov_id=0; cur_prov_id < orte_rml_ofi.ofi_prov_open_num && !peer_match_found ; cur_prov_id++) { - if( 0 == strcmp( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, "sockets" ) ) { - peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, - tot_peer_prov, peer_ofi_addr, cur_prov_id ); - if (OPAL_ERROR != peer_prov_id) { - peer_match_found = true; - ofi_prov_id = cur_prov_id; - } - } - } - /* if we haven't found a common provider yet, then error out - case 3 */ - if ( !peer_match_found ) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi::send_msg() Peer is Unreachable - no common ofi provider ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; - ORTE_RML_SEND_COMPLETE(snd); - //OBJ_RELEASE( ofi_send_req); - return ; - } - } - /* creating a copy of the chosen provider to put it in hashtable - * as the ofi_peer_addr array is local */ - pr = OBJ_NEW(orte_rml_ofi_peer_t); - pr->ofi_ep_len = peer_ofi_addr[peer_prov_id].ofi_ep_len; - pr->ofi_ep = malloc(pr->ofi_ep_len); - memcpy(pr->ofi_ep,peer_ofi_addr[peer_prov_id].ofi_ep,pr->ofi_ep_len); - pr->ofi_prov_name = strdup(peer_ofi_addr[peer_prov_id].ofi_prov_name); - pr->src_prov_id = ofi_prov_id; - if(OPAL_SUCCESS != - (rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s: ofi address insertion into hash table failed for peer %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer)); - ORTE_ERROR_LOG(rc); - } - dest_ep_name = pr->ofi_ep; - dest_ep_namelen = pr->ofi_ep_len; - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi: Peer ofi provider details added to hash table. Sending to provider %s on peer %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),pr->ofi_prov_name,ORTE_NAME_PRINT(peer)); - } else { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi: OFI peer contact info got from hash table", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - dest_ep_name = pr->ofi_ep; - dest_ep_namelen = pr->ofi_ep_len; - ofi_prov_id = pr->src_prov_id; - } - - //[Debug] printing additional info of IP - switch ( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->addr_format) - { - case FI_SOCKADDR_IN : - /* Address is of type sockaddr_in (IPv4) */ - /*[debug] - print the sockaddr - port and s_addr */ - ep_sockaddr = (struct sockaddr_in*)dest_ep_name; - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s peer %s epnamelen is %lu, port = %d (or) 0x%x, InternetAddr = 0x%s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer), - (unsigned long)orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen,ntohs(ep_sockaddr->sin_port), - ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr)); - /*[end debug]*/ - break; - } - //[Debug] end debug - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s peer ep name obtained for %s. length=%lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), dest_ep_namelen); - ret = fi_av_insert(orte_rml_ofi.ofi_prov[ofi_prov_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL); - if( ret != 1) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s fi_av_insert failed in send_msg() returned %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret ); - /* call the send-callback fn with error and return, also return failure status */ - snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; - - ORTE_RML_SEND_COMPLETE(snd); - - return; - } - ofi_send_req->send = snd; - ofi_send_req->completion_count = 1; - - /* [DESC] we want to send the pid,seqnum,tag in addition to the data - * copy all of this to header of message from the ofi_send_t* send - */ - ofi_send_req->hdr.dst = ofi_send_req->send->dst; - ofi_send_req->hdr.origin = ofi_send_req->send->origin; - ofi_send_req->hdr.seq_num = ofi_send_req->send->seq_num; - ofi_send_req->hdr.tag = ofi_send_req->send->tag; - - /* - * also insert ofi plugin specific header details - - * the unique msgid, for now initalise total_packets to 1 - */ - ofi_send_req->hdr.msgid = orte_rml_ofi.cur_msgid; - orte_rml_ofi.cur_msgid += 1; - total_packets = 1; - - /* copy the buffer/iov/data to the ofi_send_req->datablob and update ofi_send_req->length*/ - ofi_send_req->length = 0; - if( NULL != ofi_send_req->send->buffer) { - ofi_send_req->length = ofi_send_req->send->buffer->bytes_used; - ofi_send_req->data_blob = (char *)malloc(ofi_send_req->length); - memcpy(ofi_send_req->data_blob , - ofi_send_req->send->buffer->base_ptr, - ofi_send_req->send->buffer->bytes_used); - } else if ( NULL != ofi_send_req->send->iov) { - for (int i=0; i < ofi_send_req->send->count; i++) { - ofi_send_req->length += ofi_send_req->send->iov[i].iov_len; - } - ofi_send_req->data_blob = (char *)malloc(ofi_send_req->length); - int iovlen=0; - for (int i=0; i < ofi_send_req->send->count; i++) { - memcpy(((char *)ofi_send_req->data_blob + iovlen ), - ofi_send_req->send->iov[i].iov_base, - ofi_send_req->send->iov[i].iov_len); - iovlen += ofi_send_req->send->iov[i].iov_len; - } - } else { - //just send the data - ofi_send_req->length = ofi_send_req->send->count; - ofi_send_req->data_blob = (char *)malloc(ofi_send_req->length); - memcpy(ofi_send_req->data_blob , - ofi_send_req->send->data, - ofi_send_req->send->count); - } - - - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s Completed copying all data into ofi_send_req->data_blob, total data - %lu bytes", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_send_req->length ); - - /* Each packet will have header information, so the data length in each packet is datalen_per_packet. - * check if the ofi_send_req->send->buffer->bytes_used is greater than the data per packet datalen_per_packet(recv buffer) - * if so fragment and add info to header and send it in a loop back-to-back */ - hdrsize = sizeof(orte_rml_ofi_msg_header_t); - datalen_per_pkt = MIN_MULTI_BUF_SIZE - hdrsize; - if (ofi_send_req->length > datalen_per_pkt ) - { - total_packets = ( ofi_send_req->length / datalen_per_pkt ) + 1 ; - } - ofi_send_req->hdr.tot_pkts = total_packets; - - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s datalen_per_pkt = %lu, ofi_send_req->length= %lu, total packets = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), datalen_per_pkt, ofi_send_req->length, total_packets ); - - /* in a loop send create and send the packets */ - for(size_t pkt_num=1,sent_data=0; sent_data < ofi_send_req->length; pkt_num++) { - ofi_send_req->hdr.cur_pkt_num = pkt_num; - /* create the packet */ - ofi_msg_pkt = OBJ_NEW(orte_rml_ofi_send_pkt_t); - data_in_pkt = ((ofi_send_req->length - sent_data) >= datalen_per_pkt) ? - datalen_per_pkt : (ofi_send_req->length - sent_data); - ofi_msg_pkt->pkt_size = hdrsize + data_in_pkt; - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s Packet %lu -> data_in_pkt= %lu, header_size= %lu, pkt_size=%lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), pkt_num,data_in_pkt,hdrsize,ofi_msg_pkt->pkt_size ); - /* copy the header and data for this pkt */ - ofi_msg_pkt->data = malloc( ofi_msg_pkt->pkt_size); - memcpy(ofi_msg_pkt->data, &ofi_send_req->hdr, hdrsize ); - memcpy( ( (char *)ofi_msg_pkt->data + hdrsize ), - ((char*)ofi_send_req->data_blob + sent_data), - data_in_pkt); - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s Copying header, data into packets completed", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - /* add it to list */ - opal_list_append(&(ofi_send_req->pkt_list), &ofi_msg_pkt->super); - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s adding packet %lu to list done successful", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),pkt_num ); - sent_data += data_in_pkt; - } - - if( ofi_send_req->hdr.tot_pkts != ofi_send_req->hdr.cur_pkt_num ) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s Error: Total packets calculated [%d] does not match total created-%d pkts to peer %s with tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_send_req->hdr.tot_pkts, ofi_send_req->hdr.cur_pkt_num, - ORTE_NAME_PRINT(peer), tag); - } - /* do the fi_send() for all the pkts */ - ofi_send_req->completion_count= ofi_send_req->hdr.tot_pkts; - OPAL_LIST_FOREACH(ofi_msg_pkt, &ofi_send_req->pkt_list, orte_rml_ofi_send_pkt_t) { - /* debug purpose - copying the header from packet to verify if it is correct */ - struct orte_rml_ofi_msg_header_t *cur_hdr; - cur_hdr = (struct orte_rml_ofi_msg_header_t* ) ofi_msg_pkt->data; - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s Sending Pkt[%d] of total %d pkts for msgid:%d to peer %s with tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cur_hdr->cur_pkt_num, ofi_send_req->completion_count, - cur_hdr->msgid, ORTE_NAME_PRINT(peer), tag); - /* end debug*/ - - RML_OFI_RETRY_UNTIL_DONE(fi_send(orte_rml_ofi.ofi_prov[ofi_prov_id].ep, - ofi_msg_pkt->data, - ofi_msg_pkt->pkt_size, - fi_mr_desc(orte_rml_ofi.ofi_prov[ofi_prov_id].mr_multi_recv), - dest_fi_addr, - (void *)&ofi_send_req->ctx)); - - } - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s End of send_msg_transport. fi_send completed to peer %s with tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - OBJ_RELEASE(req); -} - -int orte_rml_ofi_send_nb(struct orte_rml_base_module_t* mod, - orte_process_name_t* peer, - struct iovec* iov, - int count, - orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_recv_t *rcv; - int bytes; - orte_self_send_xfer_t *xfer; - int i; - char* ptr; - ofi_send_request_t *req; - orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod; - int ofi_prov_id = ofi_mod->cur_transport_id; - - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s rml_ofi_send_transport to peer %s at tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - - - if( (0 > ofi_prov_id) || ( ofi_prov_id >= orte_rml_ofi.ofi_prov_open_num ) ) { - /* Invalid ofi_prov ID provided */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (ORTE_RML_TAG_INVALID == tag) { - /* cannot send to an invalid tag */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (NULL == peer || - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) { - /* cannot send to an invalid peer */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - /* if this is a message to myself, then just post the message - * for receipt - no need to dive into the ofi send_msg() - */ - if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) { /* local delivery */ - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_send_iovec_to_self at tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag)); - /* send to self is a tad tricky - we really don't want - * to track the send callback function throughout the recv - * process and execute it upon receipt as this would provide - * very different timing from a non-self message. Specifically, - * if we just retain a pointer to the incoming data - * and then execute the send callback prior to the receive, - * then the caller will think we are done with the data and - * can release it. So we have to copy the data in order to - * execute the send callback prior to receiving the message. - * - * In truth, this really is a better mimic of the non-self - * message behavior. If we actually pushed the message out - * on the wire and had it loop back, then we would receive - * a new block of data anyway. - */ - - /* setup the send callback */ - xfer = OBJ_NEW(orte_self_send_xfer_t); - xfer->iov = iov; - xfer->count = count; - xfer->cbfunc.iov = cbfunc; - xfer->tag = tag; - xfer->cbdata = cbdata; - /* setup the event for the send callback */ - opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); - opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); - opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); - - /* copy the message for the recv */ - rcv = OBJ_NEW(orte_rml_recv_t); - rcv->sender = *peer; - rcv->tag = tag; - /* get the total number of bytes in the iovec array */ - bytes = 0; - for (i = 0 ; i < count ; ++i) { - bytes += iov[i].iov_len; - } - /* get the required memory allocation */ - if (0 < bytes) { - rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(bytes); - rcv->iov.iov_len = bytes; - /* transfer the bytes */ - ptr = (char*)rcv->iov.iov_base; - for (i = 0 ; i < count ; ++i) { - memcpy(ptr, iov[i].iov_base, iov[i].iov_len); - ptr += iov[i].iov_len; - } - } - /* post the message for receipt - since the send callback was posted - * first and has the same priority, it will execute first - */ - ORTE_RML_ACTIVATE_MESSAGE(rcv); - return ORTE_SUCCESS; - } - - /* get ourselves into an event to protect against - * race conditions and threads - */ - req = OBJ_NEW(ofi_send_request_t); - req->ofi_prov_id = ofi_prov_id; - req->send.dst = *peer; - req->send.iov = iov; - req->send.count = count; - req->send.tag = tag; - req->send.cbfunc.iov = cbfunc; - req->send.cbdata = cbdata; - - /* setup the event for the send callback */ - opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); - - return ORTE_SUCCESS; -} - - -int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_recv_t *rcv; - orte_self_send_xfer_t *xfer; - ofi_send_request_t *req; - orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod; - int ofi_prov_id = ofi_mod->cur_transport_id; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s rml_ofi_send_buffer_transport to peer %s at tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - - - if( (0 > ofi_prov_id) || ( ofi_prov_id >= orte_rml_ofi.ofi_prov_open_num ) ) { - /* Invalid ofi_prov ID provided */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (ORTE_RML_TAG_INVALID == tag) { - /* cannot send to an invalid tag */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (NULL == peer || - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) { - /* cannot send to an invalid peer */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - /* if this is a message to myself, then just post the message - * for receipt - no need to dive into the oob - */ - if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) { /* local delivery */ - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_send_iovec_to_self at tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag)); - /* send to self is a tad tricky - we really don't want - * to track the send callback function throughout the recv - * process and execute it upon receipt as this would provide - * very different timing from a non-self message. Specifically, - * if we just retain a pointer to the incoming data - * and then execute the send callback prior to the receive, - * then the caller will think we are done with the data and - * can release it. So we have to copy the data in order to - * execute the send callback prior to receiving the message. - * - * In truth, this really is a better mimic of the non-self - * message behavior. If we actually pushed the message out - * on the wire and had it loop back, then we would receive - * a new block of data anyway. - */ - - /* setup the send callback */ - xfer = OBJ_NEW(orte_self_send_xfer_t); - xfer->buffer = buffer; - xfer->cbfunc.buffer = cbfunc; - xfer->tag = tag; - xfer->cbdata = cbdata; - /* setup the event for the send callback */ - opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); - opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); - opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); - - /* copy the message for the recv */ - rcv = OBJ_NEW(orte_rml_recv_t); - rcv->sender = *peer; - rcv->tag = tag; - rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(buffer->bytes_used); - memcpy(rcv->iov.iov_base, buffer->base_ptr, buffer->bytes_used); - rcv->iov.iov_len = buffer->bytes_used; - /* post the message for receipt - since the send callback was posted - * first and has the same priority, it will execute first - */ - ORTE_RML_ACTIVATE_MESSAGE(rcv); - return ORTE_SUCCESS; - } - - /* get ourselves into an event to protect against - * race conditions and threads - */ - req = OBJ_NEW(ofi_send_request_t); - req->ofi_prov_id = ofi_prov_id; - req->send.dst = *peer; - req->send.buffer = buffer; - req->send.tag = tag; - req->send.cbfunc.buffer = cbfunc; - req->send.cbdata = cbdata; - - /* setup the event for the send callback */ - opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); - - return ORTE_SUCCESS; -} diff --git a/orte/mca/routed/debruijn/Makefile.am b/orte/mca/routed/debruijn/Makefile.am deleted file mode 100644 index 2a90f6989b7..00000000000 --- a/orte/mca/routed/debruijn/Makefile.am +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2007-2012 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - routed_debruijn.h \ - routed_debruijn.c \ - routed_debruijn_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_routed_debruijn_DSO -component_noinst = -component_install = mca_routed_debruijn.la -else -component_noinst = libmca_routed_debruijn.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_routed_debruijn_la_SOURCES = $(sources) -mca_routed_debruijn_la_LDFLAGS = -module -avoid-version -mca_routed_debruijn_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_routed_debruijn_la_SOURCES = $(sources) -libmca_routed_debruijn_la_LDFLAGS = -module -avoid-version - diff --git a/orte/mca/routed/debruijn/owner.txt b/orte/mca/routed/debruijn/owner.txt deleted file mode 100644 index b4ba3c21f5e..00000000000 --- a/orte/mca/routed/debruijn/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: LANL? -status: unmaintained diff --git a/orte/mca/routed/debruijn/routed_debruijn.c b/orte/mca/routed/debruijn/routed_debruijn.c deleted file mode 100644 index 4545fcae779..00000000000 --- a/orte/mca/routed/debruijn/routed_debruijn.c +++ /dev/null @@ -1,481 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2007-2012 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include - -#include "opal/dss/dss.h" -#include "opal/class/opal_hash_table.h" -#include "opal/class/opal_bitmap.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ess/ess.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" -#include "orte/runtime/runtime.h" -#include "orte/runtime/data_type_support/orte_dt_support.h" - -#include "orte/mca/rml/base/rml_contact.h" - -#include "orte/mca/routed/base/base.h" -#include "routed_debruijn.h" - - -static int init(void); -static int finalize(void); -static int delete_route(orte_process_name_t *proc); -static int update_route(orte_process_name_t *target, - orte_process_name_t *route); -static orte_process_name_t get_route(orte_process_name_t *target); -static int route_lost(const orte_process_name_t *route); -static bool route_is_defined(const orte_process_name_t *target); -static void update_routing_plan(void); -static void get_routing_list(opal_list_t *coll); -static int set_lifeline(orte_process_name_t *proc); -static size_t num_routes(void); - -#if OPAL_ENABLE_FT_CR == 1 -static int debruijn_ft_event(int state); -#endif - -orte_routed_module_t orte_routed_debruijn_module = { - .initialize = init, - .finalize = finalize, - .delete_route = delete_route, - .update_route = update_route, - .get_route = get_route, - .route_lost = route_lost, - .route_is_defined = route_is_defined, - .set_lifeline = set_lifeline, - .update_routing_plan = update_routing_plan, - .get_routing_list = get_routing_list, - .num_routes = num_routes, -#if OPAL_ENABLE_FT_CR == 1 - .ft_event = debruijn_ft_event -#else - NULL -#endif -}; - -/* local globals */ -static orte_process_name_t *lifeline=NULL; -static orte_process_name_t local_lifeline; -static opal_list_t my_children; -static bool hnp_direct=true; -static int log_nranks; -static int log_npeers; -static unsigned int rank_mask; - -static int init(void) -{ - lifeline = NULL; - - if (ORTE_PROC_IS_DAEMON) { - /* if we are using static ports, set my lifeline to point at my parent */ - if (orte_static_ports) { - lifeline = ORTE_PROC_MY_PARENT; - } else { - /* set our lifeline to the HNP - we will abort if that connection is lost */ - lifeline = ORTE_PROC_MY_HNP; - } - ORTE_PROC_MY_PARENT->jobid = ORTE_PROC_MY_NAME->jobid; - } else if (ORTE_PROC_IS_APP) { - /* if we don't have a designated daemon, just - * disqualify ourselves */ - if (NULL == orte_process_info.my_daemon_uri) { - return ORTE_ERR_TAKE_NEXT_OPTION; - } - /* set our lifeline to the local daemon - we will abort if this connection is lost */ - lifeline = ORTE_PROC_MY_DAEMON; - orte_routing_is_enabled = true; - } - - /* setup the list of children */ - OBJ_CONSTRUCT(&my_children, opal_list_t); - - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - - lifeline = NULL; - - /* deconstruct the list of children */ - while (NULL != (item = opal_list_remove_first(&my_children))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&my_children); - - return ORTE_SUCCESS; -} - -static int delete_route(orte_process_name_t *proc) -{ - if (proc->jobid == ORTE_JOBID_INVALID || - proc->vpid == ORTE_VPID_INVALID) { - return ORTE_ERR_BAD_PARAM; - } - - /* if I am an application process, I don't have any routes - * so there is nothing for me to do - */ - if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && - !ORTE_PROC_IS_TOOL) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, - "%s routed_debruijn_delete_route for %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - - /* THIS CAME FROM OUR OWN JOB FAMILY...there is nothing - * to do here. The routes will be redefined when we update - * the routing tree - */ - - return ORTE_SUCCESS; -} - -static int update_route(orte_process_name_t *target, - orte_process_name_t *route) -{ - if (target->jobid == ORTE_JOBID_INVALID || - target->vpid == ORTE_VPID_INVALID) { - return ORTE_ERR_BAD_PARAM; - } - - /* if I am an application process, we don't update the route since - * we automatically route everything through the local daemon - */ - if (ORTE_PROC_IS_APP) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, - "%s routed_debruijn_update: %s --> %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(target), - ORTE_NAME_PRINT(route))); - - - /* if I am a daemon and the target is my HNP, then check - * the route - if it isn't direct, then we just flag that - * we have a route to the HNP - */ - if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target) && - OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, route)) { - hnp_direct = false; - return ORTE_SUCCESS; - } - - return ORTE_SUCCESS; -} - -static inline unsigned int debruijn_next_hop (int target) -{ - const int my_id = ORTE_PROC_MY_NAME->vpid; - uint64_t route, mask = rank_mask; - unsigned int i, next_hop; - - if (target == my_id) { - return my_id; - } - - i = -log_npeers; - do { - i += log_npeers; - mask = (mask >> i) << i; - route = (my_id << i) | target; - } while ((route & mask) != (((my_id << i) & target) & mask)); - - next_hop = (int)((route >> (i - log_npeers)) & rank_mask); - - /* if the next hop does not exist route to the lowest proc with the same lower routing bits */ - return (next_hop < orte_process_info.num_procs) ? next_hop : (next_hop & (rank_mask >> log_npeers)); -} - -static orte_process_name_t get_route(orte_process_name_t *target) -{ - orte_process_name_t ret; - - /* initialize */ - - do { - ret = *ORTE_NAME_INVALID; - - if (ORTE_JOBID_INVALID == target->jobid || - ORTE_VPID_INVALID == target->vpid) { - break; - } - - /* if it is me, then the route is just direct */ - if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) { - ret = *target; - break; - } - - /* if I am an application process, always route via my local daemon */ - if (ORTE_PROC_IS_APP) { - ret = *ORTE_PROC_MY_DAEMON; - break; - } - - /* if I am a tool, the route is direct if target is in - * my own job family, and to the target's HNP if not - */ - if (ORTE_PROC_IS_TOOL) { - if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { - ret = *target; - } else { - ORTE_HNP_NAME_FROM_JOB(&ret, target->jobid); - } - - break; - } - - /****** HNP AND DAEMONS ONLY ******/ - - if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) { - if (!hnp_direct || orte_static_ports) { - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, - "%s routing to the HNP through my parent %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT))); - ret = *ORTE_PROC_MY_PARENT; - } else { - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, - "%s routing direct to the HNP", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ret = *ORTE_PROC_MY_HNP; - } - - break; - } - - ret.jobid = ORTE_PROC_MY_NAME->jobid; - /* find out what daemon hosts this proc */ - if (ORTE_VPID_INVALID == (ret.vpid = orte_get_proc_daemon_vpid(target))) { - /* we don't yet know about this daemon. just route this to the "parent" */ - ret = *ORTE_PROC_MY_PARENT; - break; - } - - /* if the daemon is me, then send direct to the target! */ - if (ORTE_PROC_MY_NAME->vpid == ret.vpid) { - ret = *target; - break; - } - - /* find next hop */ - ret.vpid = debruijn_next_hop (ret.vpid); - } while (0); - - OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, - "%s routed_debruijn_get(%s) --> %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(target), - ORTE_NAME_PRINT(&ret))); - - return ret; -} - -static int route_lost(const orte_process_name_t *route) -{ - opal_list_item_t *item; - orte_routed_tree_t *child; - - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, - "%s route to %s lost", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(route))); - - /* if we lose the connection to the lifeline and we are NOT already, - * in finalize, tell the OOB to abort. - * NOTE: we cannot call abort from here as the OOB needs to first - * release a thread-lock - otherwise, we will hang!! - */ - if (!orte_finalizing && - NULL != lifeline && - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) { - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, - "%s routed:debruijn: Connection to lifeline %s lost", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(lifeline))); - return ORTE_ERR_FATAL; - } - - /* if we are the HNP or daemon, and the route is a daemon, - * see if it is one of our children - if so, remove it - */ - if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) && - route->jobid == ORTE_PROC_MY_NAME->jobid) { - for (item = opal_list_get_first(&my_children); - item != opal_list_get_end(&my_children); - item = opal_list_get_next(item)) { - child = (orte_routed_tree_t*)item; - if (child->vpid == route->vpid) { - opal_list_remove_item(&my_children, item); - OBJ_RELEASE(item); - return ORTE_SUCCESS; - } - } - } - - /* we don't care about this one, so return success */ - return ORTE_SUCCESS; -} - -static bool route_is_defined(const orte_process_name_t *target) -{ - /* find out what daemon hosts this proc */ - if (ORTE_VPID_INVALID == orte_get_proc_daemon_vpid((orte_process_name_t*)target)) { - return false; - } - - return true; -} - -static int set_lifeline(orte_process_name_t *proc) -{ - /* we have to copy the proc data because there is no - * guarantee that it will be preserved - */ - local_lifeline.jobid = proc->jobid; - local_lifeline.vpid = proc->vpid; - lifeline = &local_lifeline; - - return ORTE_SUCCESS; -} - -static unsigned int ilog2 (unsigned int v) -{ - const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; - const unsigned int S[] = {1, 2, 4, 8, 16}; - int i; - - register unsigned int r = 0; - for (i = 4; i >= 0; i--) { - if (v & b[i]) { - v >>= S[i]; - r |= S[i]; - } - } - - return r; -} - -static void update_routing_plan(void) -{ - orte_routed_tree_t *child; - opal_list_item_t *item; - int my_vpid = ORTE_PROC_MY_NAME->vpid; - int i; - - /* if I am anything other than a daemon or the HNP, this - * is a meaningless command as I am not allowed to route - */ - if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { - return; - } - - /* clear the list of children if any are already present */ - while (NULL != (item = opal_list_remove_first(&my_children))) { - OBJ_RELEASE(item); - } - - log_nranks = (int) ilog2 ((unsigned int)orte_process_info.num_procs) ; - assert(log_nranks < 31); - - if (log_nranks < 3) { - log_npeers = 1; - } else if (log_nranks < 7) { - log_npeers = 2; - } else { - log_npeers = 4; - } - - /* round log_nranks to a multiple of log_npeers */ - log_nranks = ((log_nranks + log_npeers) & ~(log_npeers - 1)) - 1; - - rank_mask = (1 << (log_nranks + 1)) - 1; - - /* compute my parent */ - ORTE_PROC_MY_PARENT->vpid = my_vpid ? my_vpid >> log_npeers : -1; - - /* only add peers to the routing tree if this rank is the smallest rank that will send to - the any peer */ - if ((my_vpid >> (log_nranks + 1 - log_npeers)) == 0) { - for (i = (1 << log_npeers) - 1 ; i >= 0 ; --i) { - int next = ((my_vpid << log_npeers) | i) & rank_mask; - - /* add a peer to the routing tree only if its vpid is smaller than this rank */ - if (next > my_vpid && next < (int)orte_process_info.num_procs) { - child = OBJ_NEW(orte_routed_tree_t); - child->vpid = next; - opal_list_append (&my_children, &child->super); - } - } - } -} - -static void get_routing_list(opal_list_t *coll) -{ - /* if I am anything other than a daemon or the HNP, this - * is a meaningless command as I am not allowed to route - */ - if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { - return; - } - - orte_routed_base_xcast_routing(coll, &my_children); -} - -static size_t num_routes(void) -{ - return opal_list_get_size(&my_children); -} - -#if OPAL_ENABLE_FT_CR == 1 -static int debruijn_ft_event(int state) -{ - int ret, exit_status = ORTE_SUCCESS; - - /******** Checkpoint Prep ********/ - if(OPAL_CRS_CHECKPOINT == state) { - } - /******** Continue Recovery ********/ - else if (OPAL_CRS_CONTINUE == state ) { - } - else if (OPAL_CRS_TERM == state ) { - /* Nothing */ - } - else { - /* Error state = Nothing */ - } - - cleanup: - return exit_status; -} -#endif - diff --git a/orte/mca/routed/debruijn/routed_debruijn.h b/orte/mca/routed/debruijn/routed_debruijn.h deleted file mode 100644 index 303b1fa9b1f..00000000000 --- a/orte/mca/routed/debruijn/routed_debruijn.h +++ /dev/null @@ -1,27 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2007-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_ROUTED_DEBRUIJN_H -#define MCA_ROUTED_DEBRUIJN_H - -#include "orte_config.h" - -#include "orte/mca/routed/routed.h" - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_debruijn_component; - -extern orte_routed_module_t orte_routed_debruijn_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/routed/debruijn/routed_debruijn_component.c b/orte/mca/routed/debruijn/routed_debruijn_component.c deleted file mode 100644 index c16d014c23c..00000000000 --- a/orte/mca/routed/debruijn/routed_debruijn_component.c +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2004-2008 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" - -#include "orte/mca/routed/base/base.h" -#include "routed_debruijn.h" - -static int orte_routed_debruijn_component_query(mca_base_module_t **module, int *priority); - -/** - * component definition - */ -orte_routed_component_t mca_routed_debruijn_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .base_version = { - ORTE_ROUTED_BASE_VERSION_3_0_0, - - .mca_component_name = "debruijn", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = orte_routed_debruijn_component_query - }, - .base_data = { - /* This component can be checkpointed */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int orte_routed_debruijn_component_query(mca_base_module_t **module, int *priority) -{ - /* Debruijn shall be our default, especially for large systems. For smaller - * systems, we will allow other options that have even fewer hops to - * support wireup - */ - *priority = 10; - *module = (mca_base_module_t *) &orte_routed_debruijn_module; - return ORTE_SUCCESS; -} diff --git a/orte/mca/rtc/base/help-orte-rtc-base.txt b/orte/mca/rtc/base/help-orte-rtc-base.txt index ade22e57b2a..8414cc58850 100644 --- a/orte/mca/rtc/base/help-orte-rtc-base.txt +++ b/orte/mca/rtc/base/help-orte-rtc-base.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2018 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. @@ -22,13 +22,6 @@ # # This is the US/English general help file for Open RTE's orterun. # -[orte-rtc-base:alloc-error] -There are not enough slots available in the system to satisfy the %d slots -that were requested by the application: - %s - -Either request fewer slots for your application, or make more slots available -for use. [orte-rtc-base:not-all-mapped-alloc] Some of the requested hosts are not included in the current allocation for the application: diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index 2b1763c29b5..f9ce1327ea1 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -15,8 +15,8 @@ * Copyright (c) 2011-2017 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2017 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -914,9 +914,9 @@ static int setup_fork(orte_job_t *jdata, /* setup yield schedule - do not override any user-supplied directive! */ if (oversubscribed) { - opal_setenv("OMPI_MCA_mpi_yield_when_idle", "1", false, &app->env); + opal_setenv("OMPI_MCA_mpi_oversubscribe", "1", true, &app->env); } else { - opal_setenv("OMPI_MCA_mpi_yield_when_idle", "0", false, &app->env); + opal_setenv("OMPI_MCA_mpi_oversubscribe", "0", true, &app->env); } /* set the app_context number into the environment */ diff --git a/orte/mca/snapc/full/snapc_full_app.c b/orte/mca/snapc/full/snapc_full_app.c index 1dd5a8d5edd..3a436bc81d4 100644 --- a/orte/mca/snapc/full/snapc_full_app.c +++ b/orte/mca/snapc/full/snapc_full_app.c @@ -150,7 +150,11 @@ int app_coord_init() "app) Startup Barrier...")); } - opal_pmix.fence(NULL, 0); + if (ORTE_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } if( 0 == ORTE_PROC_MY_NAME->vpid ) { OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle, @@ -216,7 +220,11 @@ int app_coord_finalize() "app) Shutdown Barrier...")); } - opal_pmix.fence(NULL, 0); + if (ORTE_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } if( 0 == ORTE_PROC_MY_NAME->vpid ) { OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle, diff --git a/orte/mca/state/base/Makefile.am b/orte/mca/state/base/Makefile.am index e8db0481f9d..623a4904399 100644 --- a/orte/mca/state/base/Makefile.am +++ b/orte/mca/state/base/Makefile.am @@ -1,6 +1,8 @@ # # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -8,6 +10,8 @@ # $HEADER$ # +dist_ortedata_DATA += base/help-state-base.txt + headers += \ base/state_private.h \ base/base.h diff --git a/orte/mca/state/base/help-state-base.txt b/orte/mca/state/base/help-state-base.txt new file mode 100644 index 00000000000..06c4c310048 --- /dev/null +++ b/orte/mca/state/base/help-state-base.txt @@ -0,0 +1,13 @@ +# -*- text -*- +# +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# +[normal-termination-but] +While %s job %s terminated normally, %d %s. Further examination may be required. diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index 3a2f6e0b834..16f35c88680 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,6 +40,7 @@ #include "orte/mca/routed/routed.h" #include "orte/util/session_dir.h" #include "orte/util/threads.h" +#include "orte/util/show_help.h" #include "orte/mca/state/base/base.h" #include "orte/mca/state/base/state_private.h" @@ -847,10 +850,7 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) } /* warn user */ - opal_output(orte_clean_output, - "-------------------------------------------------------\n" - "While %s job %s terminated normally, %d %s. Further examination may be required.\n" - "-------------------------------------------------------", + orte_show_help("help-state-base.txt", "normal-termination-but", true, (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child", (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), i32, (1 == i32) ? "process returned\na non-zero exit code." : @@ -953,8 +953,9 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) one_still_alive = false; j = opal_hash_table_get_first_key_uint32(orte_job_data, &u32, (void **)&job, &nptr); while (OPAL_SUCCESS == j) { - /* skip the daemon job */ - if (job->jobid == ORTE_PROC_MY_NAME->jobid) { + /* skip the daemon job and all jobs from other families */ + if (job->jobid == ORTE_PROC_MY_NAME->jobid || + ORTE_JOB_FAMILY(job->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { goto next; } /* if this is the job we are checking AND it normally terminated, diff --git a/orte/mca/state/dvm/Makefile.am b/orte/mca/state/dvm/Makefile.am deleted file mode 100644 index 6122ab0e7fa..00000000000 --- a/orte/mca/state/dvm/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - state_dvm.h \ - state_dvm_component.c \ - state_dvm.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_state_dvm_DSO -component_noinst = -component_install = mca_state_dvm.la -else -component_noinst = libmca_state_dvm.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_state_dvm_la_SOURCES = $(sources) -mca_state_dvm_la_LDFLAGS = -module -avoid-version -mca_state_dvm_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_state_dvm_la_SOURCES =$(sources) -libmca_state_dvm_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/state/dvm/owner.txt b/orte/mca/state/dvm/owner.txt deleted file mode 100644 index 85b4416d206..00000000000 --- a/orte/mca/state/dvm/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: active diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c deleted file mode 100644 index 4d11b21d899..00000000000 --- a/orte/mca/state/dvm/state_dvm.c +++ /dev/null @@ -1,688 +0,0 @@ -/* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include - -#include "opal/util/output.h" -#include "opal/mca/pmix/pmix.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/filem/filem.h" -#include "orte/mca/grpcomm/grpcomm.h" -#include "orte/mca/iof/base/base.h" -#include "orte/mca/odls/odls_types.h" -#include "orte/mca/plm/base/base.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/regx/regx.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/routed/routed.h" -#include "orte/util/session_dir.h" -#include "orte/util/threads.h" -#include "orte/runtime/orte_quit.h" -#include "orte/runtime/orte_wait.h" - -#include "orte/mca/state/state.h" -#include "orte/mca/state/base/base.h" -#include "orte/mca/state/base/state_private.h" -#include "state_dvm.h" - -/* - * Module functions: Global - */ -static int init(void); -static int finalize(void); - -/* local functions */ -static void init_complete(int fd, short args, void *cbdata); -static void vm_ready(int fd, short args, void *cbata); -static void check_complete(int fd, short args, void *cbdata); -static void cleanup_job(int fd, short args, void *cbdata); - -/****************** - * DVM module - used when mpirun is persistent - ******************/ -orte_state_base_module_t orte_state_dvm_module = { - init, - finalize, - orte_state_base_activate_job_state, - orte_state_base_add_job_state, - orte_state_base_set_job_state_callback, - orte_state_base_set_job_state_priority, - orte_state_base_remove_job_state, - orte_state_base_activate_proc_state, - orte_state_base_add_proc_state, - orte_state_base_set_proc_state_callback, - orte_state_base_set_proc_state_priority, - orte_state_base_remove_proc_state -}; - -static void dvm_notify(int sd, short args, void *cbdata); - -/* defined default state machine sequence - individual - * plm's must add a state for launching daemons - */ -static orte_job_state_t launch_states[] = { - ORTE_JOB_STATE_INIT, - ORTE_JOB_STATE_INIT_COMPLETE, - ORTE_JOB_STATE_ALLOCATE, - ORTE_JOB_STATE_ALLOCATION_COMPLETE, - ORTE_JOB_STATE_DAEMONS_LAUNCHED, - ORTE_JOB_STATE_DAEMONS_REPORTED, - ORTE_JOB_STATE_VM_READY, - ORTE_JOB_STATE_MAP, - ORTE_JOB_STATE_MAP_COMPLETE, - ORTE_JOB_STATE_SYSTEM_PREP, - ORTE_JOB_STATE_LAUNCH_APPS, - ORTE_JOB_STATE_SEND_LAUNCH_MSG, - ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE, - ORTE_JOB_STATE_RUNNING, - ORTE_JOB_STATE_REGISTERED, - /* termination states */ - ORTE_JOB_STATE_TERMINATED, - ORTE_JOB_STATE_NOTIFY_COMPLETED, - ORTE_JOB_STATE_NOTIFIED, - ORTE_JOB_STATE_ALL_JOBS_COMPLETE -}; -static orte_state_cbfunc_t launch_callbacks[] = { - orte_plm_base_setup_job, - init_complete, - orte_ras_base_allocate, - orte_plm_base_allocation_complete, - orte_plm_base_daemons_launched, - orte_plm_base_daemons_reported, - vm_ready, - orte_rmaps_base_map_job, - orte_plm_base_mapping_complete, - orte_plm_base_complete_setup, - orte_plm_base_launch_apps, - orte_plm_base_send_launch_msg, - orte_state_base_local_launch_complete, - orte_plm_base_post_launch, - orte_plm_base_registered, - check_complete, - dvm_notify, - cleanup_job, - orte_quit -}; - -static orte_proc_state_t proc_states[] = { - ORTE_PROC_STATE_RUNNING, - ORTE_PROC_STATE_REGISTERED, - ORTE_PROC_STATE_IOF_COMPLETE, - ORTE_PROC_STATE_WAITPID_FIRED, - ORTE_PROC_STATE_TERMINATED -}; -static orte_state_cbfunc_t proc_callbacks[] = { - orte_state_base_track_procs, - orte_state_base_track_procs, - orte_state_base_track_procs, - orte_state_base_track_procs, - orte_state_base_track_procs -}; - -static void force_quit(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - - /* give us a chance to stop the orteds */ - orte_plm.terminate_orteds(); - OBJ_RELEASE(caddy); -} - -/************************ - * API Definitions - ************************/ -static int init(void) -{ - int i, rc; - int num_states; - - /* setup the state machines */ - OBJ_CONSTRUCT(&orte_job_states, opal_list_t); - OBJ_CONSTRUCT(&orte_proc_states, opal_list_t); - - /* setup the job state machine */ - num_states = sizeof(launch_states) / sizeof(orte_job_state_t); - for (i=0; i < num_states; i++) { - if (ORTE_SUCCESS != (rc = orte_state.add_job_state(launch_states[i], - launch_callbacks[i], - ORTE_SYS_PRI))) { - ORTE_ERROR_LOG(rc); - } - } - /* add the termination response */ - if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_DAEMONS_TERMINATED, - orte_quit, ORTE_SYS_PRI))) { - ORTE_ERROR_LOG(rc); - } - /* add a default error response */ - if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_FORCED_EXIT, - force_quit, ORTE_ERROR_PRI))) { - ORTE_ERROR_LOG(rc); - } - /* add callback to report progress, if requested */ - if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_REPORT_PROGRESS, - orte_state_base_report_progress, ORTE_ERROR_PRI))) { - ORTE_ERROR_LOG(rc); - } - if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) { - orte_state_base_print_job_state_machine(); - } - - /* populate the proc state machine to allow us to - * track proc lifecycle changes - */ - num_states = sizeof(proc_states) / sizeof(orte_proc_state_t); - for (i=0; i < num_states; i++) { - if (ORTE_SUCCESS != (rc = orte_state.add_proc_state(proc_states[i], - proc_callbacks[i], - ORTE_SYS_PRI))) { - ORTE_ERROR_LOG(rc); - } - } - if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) { - orte_state_base_print_proc_state_machine(); - } - - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - - /* cleanup the proc state machine */ - while (NULL != (item = opal_list_remove_first(&orte_proc_states))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&orte_proc_states); - - return ORTE_SUCCESS; -} - -static void files_ready(int status, void *cbdata) -{ - orte_job_t *jdata = (orte_job_t*)cbdata; - - if (ORTE_SUCCESS != status) { - ORTE_FORCED_TERMINATE(status); - return; - } else { - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); - } -} - -static void init_complete(int sd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - - ORTE_ACQUIRE_OBJECT(caddy); - - /* nothing to do here but move along - if it is the - * daemon job, then next step is allocate */ - ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_ALLOCATE); - OBJ_RELEASE(caddy); -} - -static void vm_ready(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - int rc; - opal_buffer_t *buf; - orte_daemon_cmd_flag_t command = ORTE_DAEMON_DVM_NIDMAP_CMD; - orte_grpcomm_signature_t *sig; - opal_buffer_t *wireup; - orte_job_t *jptr; - orte_proc_t *dmn; - opal_byte_object_t bo, *boptr; - int8_t flag; - int32_t numbytes, v; - char *nidmap; - opal_list_t *modex; - opal_value_t *val, *kv; - - ORTE_ACQUIRE_OBJECT(caddy); - - /* if this is my job, then we are done */ - if (ORTE_PROC_MY_NAME->jobid == caddy->jdata->jobid) { - /* if there is only one daemon in the job, then there - * is just a little bit to do */ - if (1 == orte_process_info.num_procs) { - if (!orte_nidmap_communicated) { - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, &orte_node_regex))) { - ORTE_ERROR_LOG(rc); - return; - } - orte_nidmap_communicated = true; - } - } else { - /* send the daemon map to every daemon in this DVM - we - * do this here so we don't have to do it for every - * job we are going to launch */ - buf = OBJ_NEW(opal_buffer_t); - opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD); - /* if we couldn't provide the allocation regex on the orted - * cmd line, then we need to provide all the info here */ - if (!orte_nidmap_communicated) { - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, &nidmap))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - return; - } - orte_nidmap_communicated = true; - } else { - nidmap = NULL; - } - opal_dss.pack(buf, &nidmap, 1, OPAL_STRING); - if (NULL != nidmap) { - free(nidmap); - } - /* provide the info on the capabilities of each node */ - if (!orte_node_info_communicated) { - flag = 1; - opal_dss.pack(buf, &flag, 1, OPAL_INT8); - if (ORTE_SUCCESS != (rc = orte_regx.encode_nodemap(buf))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - return; - } - orte_node_info_communicated = true; - /* get wireup info for daemons */ - jptr = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - wireup = OBJ_NEW(opal_buffer_t); - for (v=0; v < jptr->procs->size; v++) { - if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) { - continue; - } - val = NULL; - if (opal_pmix.legacy_get()) { - if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } else { - /* pack the name of the daemon */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - /* pack the URI */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - OBJ_RELEASE(val); - } - } else { - if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } else { - /* pack the name of the daemon */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - /* the data is returned as a list of key-value pairs in the opal_value_t */ - if (OPAL_PTR != val->type) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - modex = (opal_list_t*)val->data.ptr; - numbytes = (int32_t)opal_list_get_size(modex); - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - OPAL_LIST_FOREACH(kv, modex, opal_value_t) { - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(wireup); - return; - } - } - OPAL_LIST_RELEASE(modex); - OBJ_RELEASE(val); - } - } - } - /* put it in a byte object for xmission */ - opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); - /* pack the byte object - zero-byte objects are fine */ - bo.size = numbytes; - boptr = &bo; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); - OBJ_RELEASE(buf); - return; - } - /* release the data since it has now been copied into our buffer */ - if (NULL != bo.bytes) { - free(bo.bytes); - } - OBJ_RELEASE(wireup); - } else { - flag = 0; - opal_dss.pack(buf, &flag, 1, OPAL_INT8); - } - - /* goes to all daemons */ - sig = OBJ_NEW(orte_grpcomm_signature_t); - sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; - sig->signature[0].vpid = ORTE_VPID_WILDCARD; - if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, buf))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OBJ_RELEASE(sig); - ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); - return; - } - OBJ_RELEASE(buf); - } - /* notify that the vm is ready */ - fprintf(stdout, "DVM ready\n"); fflush(stdout); - OBJ_RELEASE(caddy); - return; - } - - /* progress the job */ - caddy->jdata->state = ORTE_JOB_STATE_VM_READY; - - /* position any required files */ - if (ORTE_SUCCESS != orte_filem.preposition_files(caddy->jdata, files_ready, caddy->jdata)) { - ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); - } - - /* cleanup */ - OBJ_RELEASE(caddy); -} - -static void check_complete(int fd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata; - orte_proc_t *proc; - int i; - orte_node_t *node; - orte_job_map_t *map; - orte_std_cntr_t index; - char *rtmod; - - ORTE_ACQUIRE_OBJECT(caddy); - jdata = caddy->jdata; - - opal_output_verbose(2, orte_state_base_framework.framework_output, - "%s state:dvm:check_job_complete on job %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid)); - - if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) { - /* just check to see if the daemons are complete */ - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:dvm:check_job_complete - received NULL job, checking daemons", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - if (0 == orte_routed.num_routes(rtmod)) { - /* orteds are done! */ - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s orteds complete - exiting", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - if (NULL == jdata) { - jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - } - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED); - OBJ_RELEASE(caddy); - return; - } - OBJ_RELEASE(caddy); - return; - } - - /* mark the job as terminated, but don't override any - * abnormal termination flags - */ - if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) { - jdata->state = ORTE_JOB_STATE_TERMINATED; - } - - /* tell the IOF that the job is complete */ - if (NULL != orte_iof.complete) { - orte_iof.complete(jdata); - } - - /* tell the PMIx subsystem the job is complete */ - if (NULL != opal_pmix.server_deregister_nspace) { - opal_pmix.server_deregister_nspace(jdata->jobid, NULL, NULL); - } - - /* Release the resources used by this job. Since some errmgrs may want - * to continue using resources allocated to the job as part of their - * fault recovery procedure, we only do this once the job is "complete". - * Note that an aborted/killed job -is- flagged as complete and will - * therefore have its resources released. We need to do this after - * we call the errmgr so that any attempt to restart the job will - * avoid doing so in the exact same place as the current job - */ - if (NULL != jdata->map) { - map = jdata->map; - for (index = 0; index < map->nodes->size; index++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, index))) { - continue; - } - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:dvm releasing procs from node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name)); - for (i = 0; i < node->procs->size; i++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { - continue; - } - if (proc->name.jobid != jdata->jobid) { - /* skip procs from another job */ - continue; - } - if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_TOOL)) { - node->slots_inuse--; - node->num_procs--; - } - - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:dvm releasing proc %s from node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name), node->name)); - /* set the entry in the node array to NULL */ - opal_pointer_array_set_item(node->procs, i, NULL); - /* release the proc once for the map entry */ - OBJ_RELEASE(proc); - } - /* set the node location to NULL */ - opal_pointer_array_set_item(map->nodes, index, NULL); - /* maintain accounting */ - OBJ_RELEASE(node); - /* flag that the node is no longer in a map */ - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); - } - OBJ_RELEASE(map); - jdata->map = NULL; - } - - if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { - /* this was a debugger daemon. notify that a debugger has detached */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DEBUGGER_DETACH); - } else if (jdata->state != ORTE_JOB_STATE_NOTIFIED) { - OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, - "%s state:dvm:check_job_completed state is terminated - activating notify", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_NOTIFY_COMPLETED); - /* mark the job as notified */ - jdata->state = ORTE_JOB_STATE_NOTIFIED; - } - - OBJ_RELEASE(caddy); -} - -static void cleanup_job(int sd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata; - - ORTE_ACQUIRE_OBJECT(caddy); - jdata = caddy->jdata; - - /* remove this object from the job array */ - opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, NULL); - - OBJ_RELEASE(caddy); -} - -typedef struct { - opal_list_t *info; - orte_job_t *jdata; -} mycaddy_t; - -static void notify_complete(int status, void *cbdata) -{ - mycaddy_t *mycaddy = (mycaddy_t*)cbdata; - - OPAL_LIST_RELEASE(mycaddy->info); - ORTE_ACTIVATE_JOB_STATE(mycaddy->jdata, ORTE_JOB_STATE_NOTIFIED); - OBJ_RELEASE(mycaddy->jdata); - free(mycaddy); -} - -static void dvm_notify(int sd, short args, void *cbdata) -{ - orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; - orte_proc_t *pptr=NULL; - int ret; - opal_buffer_t *reply; - orte_daemon_cmd_flag_t command; - orte_grpcomm_signature_t *sig; - bool notify = true; - opal_list_t *info; - opal_value_t *val; - opal_process_name_t pname, *proc, pnotify; - mycaddy_t *mycaddy; - - /* see if there was any problem */ - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, (void**)&pptr, OPAL_PTR) && NULL != pptr) { - ret = pptr->exit_code; - /* or whether we got cancelled by the user */ - } else if (orte_get_attribute(&jdata->attributes, ORTE_JOB_CANCELLED, NULL, OPAL_BOOL)) { - ret = ORTE_ERR_JOB_CANCELLED; - } else { - ret = ORTE_SUCCESS; - } - - if (0 == ret && orte_get_attribute(&jdata->attributes, ORTE_JOB_SILENT_TERMINATION, NULL, OPAL_BOOL)) { - notify = false; - } - /* if the jobid matches that of the requestor, then don't notify */ - proc = &pnotify; - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, (void**)&proc, OPAL_NAME)) { - if (pnotify.jobid == jdata->jobid) { - notify = false; - } - } - - if (notify) { - /* the source is the job that terminated */ - pname.jobid = jdata->jobid; - pname.vpid = OPAL_VPID_WILDCARD; - - info = OBJ_NEW(opal_list_t); - /* ensure this only goes to the job terminated event handler */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_EVENT_NON_DEFAULT); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(info, &val->super); - /* tell the server not to cache the event as subsequent jobs - * do not need to know about it */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_EVENT_DO_NOT_CACHE); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(info, &val->super); - /* provide the status */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_JOB_TERM_STATUS); - val->type = OPAL_STATUS; - val->data.status = ret; - opal_list_append(info, &val->super); - /* tell the requestor which job or proc */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PROCID); - val->type = OPAL_NAME; - val->data.name.jobid = jdata->jobid; - if (NULL != pptr) { - val->data.name.vpid = pptr->name.vpid; - } else { - val->data.name.vpid = ORTE_VPID_WILDCARD; - } - opal_list_append(info, &val->super); - /* pass along the proc to be notified */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_EVENT_CUSTOM_RANGE); - val->type = OPAL_NAME; - val->data.name.jobid = pnotify.jobid; - val->data.name.vpid = pnotify.vpid; - opal_list_append(info, &val->super); - /* setup the caddy */ - mycaddy = (mycaddy_t*)malloc(sizeof(mycaddy_t)); - mycaddy->info = info; - OBJ_RETAIN(jdata); - mycaddy->jdata = jdata; - opal_pmix.server_notify_event(OPAL_ERR_JOB_TERMINATED, &pname, - info, notify_complete, mycaddy); - } - - /* now ensure that _all_ daemons know that this job has terminated so even - * those that did not participate in it will know to cleanup the resources - * they assigned to the job. This is necessary now that the mapping function - * has been moved to the backend daemons - otherwise, non-participating daemons - * retain the slot assignments on the participating daemons, and then incorrectly - * map subsequent jobs thinking those nodes are still "busy" */ - reply = OBJ_NEW(opal_buffer_t); - command = ORTE_DAEMON_DVM_CLEANUP_JOB_CMD; - opal_dss.pack(reply, &command, 1, ORTE_DAEMON_CMD); - opal_dss.pack(reply, &jdata->jobid, 1, ORTE_JOBID); - sig = OBJ_NEW(orte_grpcomm_signature_t); - sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; - sig->signature[0].vpid = ORTE_VPID_WILDCARD; - orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, reply); - OBJ_RELEASE(reply); - OBJ_RELEASE(sig); -} diff --git a/orte/mca/state/dvm/state_dvm.h b/orte/mca/state/dvm/state_dvm.h deleted file mode 100644 index 5137d8422de..00000000000 --- a/orte/mca/state/dvm/state_dvm.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - */ - -#ifndef MCA_STATE_DVM_EXPORT_H -#define MCA_STATE_DVM_EXPORT_H - -#include "orte_config.h" - -#include "orte/mca/state/state.h" - -BEGIN_C_DECLS - -/* - * Local Component structures - */ - -ORTE_MODULE_DECLSPEC extern orte_state_base_component_t mca_state_dvm_component; - -ORTE_DECLSPEC extern orte_state_base_module_t orte_state_dvm_module; - -END_C_DECLS - -#endif /* MCA_STATE_DVM_EXPORT_H */ diff --git a/orte/mca/state/dvm/state_dvm_component.c b/orte/mca/state/dvm/state_dvm_component.c deleted file mode 100644 index df17c61ed95..00000000000 --- a/orte/mca/state/dvm/state_dvm_component.c +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "opal/util/output.h" - -#include "orte/mca/state/state.h" -#include "orte/mca/state/base/base.h" -#include "state_dvm.h" - -/* - * Public string for version number - */ -const char *orte_state_dvm_component_version_string = - "ORTE STATE dvm MCA component version " ORTE_VERSION; - -/* - * Local functionality - */ -static int state_dvm_open(void); -static int state_dvm_close(void); -static int state_dvm_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointer to our public functions in it - */ -orte_state_base_component_t mca_state_dvm_component = -{ - /* Handle the general mca_component_t struct containing - * meta information about the component - */ - .base_version = { - ORTE_STATE_BASE_VERSION_1_0_0, - /* Component name and version */ - .mca_component_name = "dvm", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = state_dvm_open, - .mca_close_component = state_dvm_close, - .mca_query_component = state_dvm_component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int state_dvm_open(void) -{ - return ORTE_SUCCESS; -} - -static int state_dvm_close(void) -{ - return ORTE_SUCCESS; -} - -static int state_dvm_component_query(mca_base_module_t **module, int *priority) -{ - /* used by DVM masters */ - if (ORTE_PROC_IS_MASTER) { - *priority = 100; - *module = (mca_base_module_t *)&orte_state_dvm_module; - return ORTE_SUCCESS; - } - - *priority = 0; - *module = NULL; - return ORTE_ERR_NOT_AVAILABLE; -} diff --git a/orte/orted/help-orted.txt b/orte/orted/help-orted.txt index fa7e25b487b..2f390d067a2 100644 --- a/orte/orted/help-orted.txt +++ b/orte/orted/help-orted.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -89,3 +89,21 @@ to it - please check the connection info and ensure the server is alive: Connection: %s +# +[mpir-debugger-detected] +Open MPI has detected that you have attached a debugger to this MPI +job, and that debugger is using the legacy "MPIR" method of +attachment. + +Please note that Open MPI has deprecated the "MPIR" debugger +attachment method in favor of the new "PMIx" debugger attchment +mechanisms. + +*** This means that future versions of Open MPI may not support the +*** "MPIR" debugger attachment method at all. Specifically: the +*** debugger you just attached may not work with future versions of +*** Open MPI. + +You may wish to contact your debugger vendor to inquire about support +for PMIx-based debugger attachment mechanisms. Meantime, you can +disable this warning by setting the OMPI_MPIR_DO_NOT_WARN envar to 1. diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index a1ad7d7b522..0db2703e46d 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -173,14 +173,44 @@ char MPIR_attach_fifo[MPIR_MAX_PATH_LENGTH] = {0}; int MPIR_force_to_main = 0; static void orte_debugger_init_before_spawn(orte_job_t *jdata); -ORTE_DECLSPEC void* __opal_attribute_optnone__ MPIR_Breakpoint(void); +ORTE_DECLSPEC void __opal_attribute_optnone__ MPIR_Breakpoint(void); + +/* + * Attempt to prevent the compiler from optimizing out + * MPIR_Breakpoint(). + * + * Some older versions of automake can add -O3 to every + * file via CFLAGS (which was demonstrated in automake v1.13.4), + * so there is a possibility that the compiler will see + * this function as a NOOP and optimize it out on older versions. + * While using the current/recommended version of automake + * does not do this, the following will help those + * stuck with an older version, as well as guard against + * future regressions. + * + * See the following git issue for more discussion: + * https://github.com/open-mpi/ompi/issues/5501 + */ +volatile void* volatile orte_noop_mpir_breakpoint_ptr = NULL; /* * Breakpoint function for parallel debuggers */ -void* MPIR_Breakpoint(void) +void MPIR_Breakpoint(void) { - return NULL; + /* + * Actually do something with this pointer to make + * sure the compiler does not optimize out this function. + * The compiler should be forced to keep this + * function around due to the volatile void* type. + * + * This pointer doesn't actually do anything other than + * prevent unwanted optimization, and + * *should not* be used anywhere else in the code. + * So pointing this to the weeds should be OK. + */ + orte_noop_mpir_breakpoint_ptr = (volatile void *) 0x42; + return; } /* local objects */ @@ -324,6 +354,14 @@ int orte_submit_init(int argc, char *argv[], * exit with a giant warning flag */ if (0 == geteuid() && !orte_cmd_options.run_as_root) { + /* check for two envars that allow override of this protection */ + char *r1, *r2; + if (NULL != (r1 = getenv("OMPI_ALLOW_RUN_AS_ROOT")) && + NULL != (r2 = getenv("OMPI_ALLOW_RUN_AS_ROOT_CONFIRM"))) { + if (0 == strcmp(r1, "1") && 0 == strcmp(r2, "1")) { + goto moveon; + } + } /* show_help is not yet available, so print an error manually */ fprintf(stderr, "--------------------------------------------------------------------------\n"); if (orte_cmd_options.help) { @@ -338,13 +376,17 @@ int orte_submit_init(int argc, char *argv[], fprintf(stderr, "We strongly suggest that you run %s as a non-root user.\n\n", orte_basename); - fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your command line. However, we reiterate our strong advice\n"); - fprintf(stderr, "against doing so - please do so at your own risk.\n"); + fprintf(stderr, "You can override this protection by adding the --allow-run-as-root option\n"); + fprintf(stderr, "to the cmd line or by setting two environment variables in the following way:\n"); + fprintf(stderr, "the variable OMPI_ALLOW_RUN_AS_ROOT=1 to indicate the desire to override this\n"); + fprintf(stderr, "protection, and OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 to confirm the choice and\n"); + fprintf(stderr, "add one more layer of certainty that you want to do so.\n"); + fprintf(stderr, "We reiterate our advice against doing so - please proceed at your own risk.\n"); fprintf(stderr, "--------------------------------------------------------------------------\n"); exit(1); } + moveon: /* process any mca params */ rc = mca_base_cmd_line_process_args(orte_cmd_line, &environ, &environ); if (ORTE_SUCCESS != rc) { @@ -864,7 +906,7 @@ int orte_submit_job(char *argv[], int *index, jdata->map = OBJ_NEW(orte_job_map_t); if (NULL != orte_cmd_options.mapping_policy) { - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, NULL, orte_cmd_options.mapping_policy))) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping, NULL, orte_cmd_options.mapping_policy))) { ORTE_ERROR_LOG(rc); return rc; } @@ -2245,6 +2287,8 @@ struct MPIR_PROCDESC { * spawn we need to check if we are being run under a TotalView-like * debugger; if so then inform applications via an MCA parameter. */ +static bool mpir_warning_printed = false; + static void orte_debugger_init_before_spawn(orte_job_t *jdata) { char *env_name; @@ -2293,6 +2337,15 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata) launchit: opal_output_verbose(1, orte_debug_output, "Info: Spawned by a debugger"); + /* if we haven't previously warned about it */ + if (!mpir_warning_printed) { + mpir_warning_printed = true; + /* check for silencing envar */ + if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) { + orte_show_help("help-orted.txt", "mpir-debugger-detected", true); + } + } + /* tell the procs they are being debugged */ (void) mca_base_var_env_name ("orte_in_parallel_debugger", &env_name); @@ -2506,6 +2559,14 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata) if (MPIR_being_debugged || NULL != orte_debugger_test_daemon || NULL != getenv("ORTE_TEST_DEBUGGER_ATTACH")) { OBJ_RELEASE(caddy); + /* if we haven't previously warned about it */ + if (!mpir_warning_printed) { + mpir_warning_printed = true; + /* check for silencing envar */ + if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) { + orte_show_help("help-orted.txt", "mpir-debugger-detected", true); + } + } if (!mpir_breakpoint_fired) { /* record that we have triggered the debugger */ mpir_breakpoint_fired = true; @@ -2601,6 +2662,15 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata) */ if (MPIR_being_debugged || NULL != orte_debugger_test_daemon || NULL != getenv("ORTE_TEST_DEBUGGER_ATTACH")) { + /* if we haven't previously warned about it */ + if (!mpir_warning_printed) { + mpir_warning_printed = true; + /* check for silencing envar */ + if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) { + orte_show_help("help-orted.txt", "mpir-debugger-detected", true); + } + } + /* if we are not launching debugger daemons, then trigger * the debugger - otherwise, we need to wait for the debugger * daemons to be started @@ -2909,6 +2979,15 @@ static void attach_debugger(int fd, short event, void *arg) "%s Attaching debugger %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_debugger_test_daemon) ? MPIR_executable_path : orte_debugger_test_daemon); + /* if we haven't previously warned about it */ + if (!mpir_warning_printed) { + mpir_warning_printed = true; + /* check for silencing envar */ + if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) { + orte_show_help("help-orted.txt", "mpir-debugger-detected", true); + } + } + /* a debugger has attached! All the MPIR_Proctable * data is already available, so we only need to * check to see if we should spawn any daemons @@ -3024,6 +3103,15 @@ static void run_debugger(char *basename, opal_cmd_line_t *cmd_line, free(env_name); } + /* if we haven't previously warned about it */ + if (!mpir_warning_printed) { + mpir_warning_printed = true; + /* check for silencing envar */ + if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) { + orte_show_help("help-orted.txt", "mpir-debugger-detected", true); + } + } + /* Launch the debugger */ execvp(new_argv[0], new_argv); value = opal_argv_join(new_argv, ' '); diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 89b4303ba54..c3ab28f7ae8 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -42,6 +42,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/base/base.h" +#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/state/state.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" @@ -301,14 +302,7 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, /*** MAP-BY ***/ } else if (0 == strcmp(info->key, OPAL_PMIX_MAPBY)) { - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", - true, "mapping", info->data.string, - orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); - return ORTE_ERR_BAD_PARAM; - } - rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, + rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping, NULL, info->data.string); if (ORTE_SUCCESS != rc) { return rc; @@ -316,13 +310,6 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, /*** RANK-BY ***/ } else if (0 == strcmp(info->key, OPAL_PMIX_RANKBY)) { - if (ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { - /* not allowed to provide multiple ranking policies */ - orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", - true, "ranking", info->data.string, - orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); - return ORTE_ERR_BAD_PARAM; - } rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking, jdata->map->mapping, info->data.string); @@ -332,13 +319,6 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, /*** BIND-TO ***/ } else if (0 == strcmp(info->key, OPAL_PMIX_BINDTO)) { - if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { - /* not allowed to provide multiple mapping policies */ - orte_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, - info->data.string, - opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); - return ORTE_ERR_BAD_PARAM; - } rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, info->data.string); if (ORTE_SUCCESS != rc) { @@ -537,7 +517,14 @@ static void _cnlk(int status, opal_list_t *data, void *cbdata) int rc, cnt; opal_pmix_pdata_t *pdat; orte_job_t *jdata; - opal_buffer_t buf; + orte_node_t *node; + orte_proc_t *proc; + opal_buffer_t buf, bucket; + opal_byte_object_t *bo; + orte_process_name_t dmn, pname; + char *uri; + opal_value_t val; + opal_list_t nodes; ORTE_ACQUIRE_OBJECT(cd); @@ -554,6 +541,7 @@ static void _cnlk(int status, opal_list_t *data, void *cbdata) pdat = (opal_pmix_pdata_t*)opal_list_get_first(data); if (OPAL_BYTE_OBJECT != pdat->value.type) { rc = ORTE_ERR_BAD_PARAM; + ORTE_ERROR_LOG(rc); goto release; } /* the data will consist of a packed buffer with the job data in it */ @@ -563,15 +551,107 @@ static void _cnlk(int status, opal_list_t *data, void *cbdata) pdat->value.data.bo.size = 0; cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(&buf, &jdata, &cnt, ORTE_JOB))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + goto release; + } + + /* unpack the byte object containing the daemon uri's */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &bo, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); goto release; } + /* load it into a buffer */ + OBJ_CONSTRUCT(&bucket, opal_buffer_t); + opal_dss.load(&bucket, bo->bytes, bo->size); + bo->bytes = NULL; + free(bo); + /* prep a list to save the nodes */ + OBJ_CONSTRUCT(&nodes, opal_list_t); + /* unpack and store the URI's */ + cnt = 1; + while (OPAL_SUCCESS == (rc = opal_dss.unpack(&bucket, &uri, &cnt, OPAL_STRING))) { + rc = orte_rml_base_parse_uris(uri, &dmn, NULL); + if (ORTE_SUCCESS != rc) { + OBJ_DESTRUCT(&buf); + OBJ_DESTRUCT(&bucket); + goto release; + } + /* save a node object for this daemon */ + node = OBJ_NEW(orte_node_t); + node->daemon = OBJ_NEW(orte_proc_t); + memcpy(&node->daemon->name, &dmn, sizeof(orte_process_name_t)); + opal_list_append(&nodes, &node->super); + /* register the URI */ + OBJ_CONSTRUCT(&val, opal_value_t); + val.key = OPAL_PMIX_PROC_URI; + val.type = OPAL_STRING; + val.data.string = uri; + if (OPAL_SUCCESS != (rc = opal_pmix.store_local(&dmn, &val))) { + ORTE_ERROR_LOG(rc); + val.key = NULL; + val.data.string = NULL; + OBJ_DESTRUCT(&val); + OBJ_DESTRUCT(&buf); + OBJ_DESTRUCT(&bucket); + goto release; + } + val.key = NULL; + val.data.string = NULL; + OBJ_DESTRUCT(&val); + cnt = 1; + } + OBJ_DESTRUCT(&bucket); + + /* unpack the proc-to-daemon map */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &bo, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + goto release; + } + /* load it into a buffer */ + OBJ_CONSTRUCT(&bucket, opal_buffer_t); + opal_dss.load(&bucket, bo->bytes, bo->size); + bo->bytes = NULL; + free(bo); + /* unpack and store the map */ + cnt = 1; + while (OPAL_SUCCESS == (rc = opal_dss.unpack(&bucket, &pname, &cnt, ORTE_NAME))) { + /* get the name of the daemon hosting it */ + if (OPAL_SUCCESS != (rc = opal_dss.unpack(&bucket, &dmn, &cnt, ORTE_NAME))) { + OBJ_DESTRUCT(&buf); + OBJ_DESTRUCT(&bucket); + goto release; + } + /* create the proc object */ + proc = OBJ_NEW(orte_proc_t); + memcpy(&proc->name, &pname, sizeof(orte_process_name_t)); + opal_pointer_array_set_item(jdata->procs, pname.vpid, proc); + /* find the daemon */ + OPAL_LIST_FOREACH(node, &nodes, orte_node_t) { + if (node->daemon->name.vpid == dmn.vpid) { + OBJ_RETAIN(node); + proc->node = node; + break; + } + } + } + OBJ_DESTRUCT(&bucket); + OPAL_LIST_DESTRUCT(&nodes); OBJ_DESTRUCT(&buf); + + /* register the nspace */ if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, true))) { + ORTE_ERROR_LOG(rc); OBJ_RELEASE(jdata); goto release; } - OBJ_RELEASE(jdata); // no reason to keep this around + + /* save the job object so we don't endlessly cycle */ + opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata); /* restart the cnct processor */ ORTE_PMIX_OPERATION(cd->procs, cd->info, _cnct, cd->cbfunc, cd->cbdata); @@ -617,6 +697,7 @@ static void _cnct(int sd, short args, void *cbdata) * out about it, and all we can do is return an error */ if (orte_pmix_server_globals.server.jobid == ORTE_PROC_MY_HNP->jobid && orte_pmix_server_globals.server.vpid == ORTE_PROC_MY_HNP->vpid) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); rc = ORTE_ERR_NOT_SUPPORTED; goto release; } @@ -632,6 +713,7 @@ static void _cnct(int sd, short args, void *cbdata) kv->data.uint32 = geteuid(); opal_list_append(cd->info, &kv->super); if (ORTE_SUCCESS != (rc = pmix_server_lookup_fn(&nm->name, keys, cd->info, _cnlk, cd))) { + ORTE_ERROR_LOG(rc); opal_argv_free(keys); goto release; } @@ -645,6 +727,7 @@ static void _cnct(int sd, short args, void *cbdata) if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_NSPACE_REGISTERED, NULL, OPAL_BOOL)) { /* it hasn't been registered yet, so register it now */ if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, true))) { + ORTE_ERROR_LOG(rc); goto release; } } diff --git a/orte/orted/pmix/pmix_server_fence.c b/orte/orted/pmix/pmix_server_fence.c index fe0f942cd10..e5a1dab4bab 100644 --- a/orte/orted/pmix/pmix_server_fence.c +++ b/orte/orted/pmix/pmix_server_fence.c @@ -227,6 +227,7 @@ static void dmodex_req(int sd, short args, void *cbdata) rc = ORTE_ERR_NOT_FOUND; goto callback; } + /* point the request to the daemon that is hosting the * target process */ req->proxy.vpid = dmn->name.vpid; @@ -240,7 +241,8 @@ static void dmodex_req(int sd, short args, void *cbdata) /* if we are the host daemon, then this is a local request, so * just wait for the data to come in */ - if (ORTE_PROC_MY_NAME->vpid == dmn->name.vpid) { + if (ORTE_PROC_MY_NAME->jobid == dmn->name.jobid && + ORTE_PROC_MY_NAME->vpid == dmn->name.vpid) { return; } diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 8993a6cdf0f..648d69557a7 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -356,6 +356,17 @@ void pmix_server_notify(int status, orte_process_name_t* sender, } } + /* protect against infinite loops by marking that this notification was + * passed down to the server by me */ + if (NULL == cd->info) { + cd->info = OBJ_NEW(opal_list_t); + } + val = OBJ_NEW(opal_value_t); + val->key = strdup("orte.notify.donotloop"); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(cd->info, &val->super); + opal_output_verbose(2, orte_pmix_server_globals.output, "%s NOTIFYING PMIX SERVER OF STATUS %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret); @@ -382,6 +393,14 @@ int pmix_server_notify_event(int code, opal_process_name_t *source, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(source), code); + /* check to see if this is one we sent down */ + OPAL_LIST_FOREACH(val, info, opal_value_t) { + if (0 == strcmp(val->key, "orte.notify.donotloop")) { + /* yep - do not process */ + goto done; + } + } + /* a local process has generated an event - we need to xcast it * to all the daemons so it can be passed down to their local * procs */ @@ -448,6 +467,7 @@ int pmix_server_notify_event(int code, opal_process_name_t *source, /* maintain accounting */ OBJ_RELEASE(sig); + done: /* execute the callback */ if (NULL != cbfunc) { cbfunc(ORTE_SUCCESS, cbdata); @@ -469,7 +489,7 @@ static void _query(int sd, short args, void *cbdata) orte_job_t *jdata; orte_proc_t *proct; orte_app_context_t *app; - int rc, i, k, num_replies; + int rc = ORTE_SUCCESS, i, k, num_replies; opal_list_t *results, targets, *array; size_t n; uint32_t key; @@ -696,7 +716,7 @@ static void _query(int sd, short args, void *cbdata) } } if (ORTE_JOBID_INVALID == jobid) { - rc = ORTE_ERR_BAD_PARAM; + rc = ORTE_ERR_NOT_FOUND; goto done; } /* construct a list of values with opal_proc_info_t @@ -790,12 +810,12 @@ static void _query(int sd, short args, void *cbdata) } done: - if (0 == opal_list_get_size(results)) { - rc = ORTE_ERR_NOT_FOUND; - } else if (opal_list_get_size(results) < opal_list_get_size(cd->info)) { - rc = ORTE_ERR_PARTIAL_SUCCESS; - } else { - rc = ORTE_SUCCESS; + if (ORTE_SUCCESS == rc) { + if (0 == opal_list_get_size(results)) { + rc = ORTE_ERR_NOT_FOUND; + } else if (opal_list_get_size(results) < opal_list_get_size(cd->info)) { + rc = ORTE_ERR_PARTIAL_SUCCESS; + } } cd->infocbfunc(rc, results, cd->cbdata, qrel, results); } @@ -1166,5 +1186,5 @@ int pmix_server_job_ctrl_fn(const opal_process_name_t *requestor, } } - return ORTE_SUCCESS; + return ORTE_OPERATION_SUCCEEDED; } diff --git a/orte/orted/pmix/pmix_server_register_fns.c b/orte/orted/pmix/pmix_server_register_fns.c index 395d89e07fa..0a0a54d764a 100644 --- a/orte/orted/pmix/pmix_server_register_fns.c +++ b/orte/orted/pmix/pmix_server_register_fns.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -71,6 +71,9 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force) gid_t gid; opal_list_t *cache; hwloc_obj_t machine; + opal_buffer_t buf, bucket; + opal_byte_object_t bo, *boptr; + orte_proc_t *proc; opal_output_verbose(2, orte_pmix_server_globals.output, "%s register nspace for %s", @@ -472,21 +475,52 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata, bool force) jdata->num_local_procs, info, NULL, NULL); OPAL_LIST_RELEASE(info); + if (OPAL_SUCCESS != rc) { + return rc; + } - /* if the user has connected us to an external server, then we must - * assume there is going to be some cross-mpirun exchange, and so + /* if I am the HNP and this job is a member of my family, then we must + * assume there could be some cross-mpirun exchange, and so * we protect against that situation by publishing the job info * for this job - this allows any subsequent "connect" to retrieve * the job info */ - if (NULL != orte_data_server_uri) { - opal_buffer_t buf; + if (ORTE_PROC_IS_HNP && ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) == ORTE_JOB_FAMILY(jdata->jobid)) { + /* pack the job - note that this doesn't include the procs + * or their locations */ OBJ_CONSTRUCT(&buf, opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &jdata, 1, ORTE_JOB))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); return rc; } + + /* pack the hostname, daemon vpid and contact URI for each involved node */ + map = jdata->map; + OBJ_CONSTRUCT(&bucket, opal_buffer_t); + for (i=0; i < map->nodes->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { + continue; + } + opal_dss.pack(&bucket, &node->daemon->rml_uri, 1, OPAL_STRING); + } + opal_dss.unload(&bucket, (void**)&bo.bytes, &bo.size); + boptr = &bo; + opal_dss.pack(&buf, &boptr, 1, OPAL_BYTE_OBJECT); + + /* pack the proc name and daemon vpid for each proc */ + OBJ_CONSTRUCT(&bucket, opal_buffer_t); + for (i=0; i < jdata->procs->size; i++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) { + continue; + } + opal_dss.pack(&bucket, &proc->name, 1, ORTE_NAME); + opal_dss.pack(&bucket, &proc->node->daemon->name, 1, ORTE_NAME); + } + opal_dss.unload(&bucket, (void**)&bo.bytes, &bo.size); + boptr = &bo; + opal_dss.pack(&buf, &boptr, 1, OPAL_BYTE_OBJECT); + info = OBJ_NEW(opal_list_t); /* create a key-value with the key being the string jobid * and the value being the byte object */ diff --git a/orte/runtime/Makefile.am b/orte/runtime/Makefile.am index 4081e269233..a8defbf8eb7 100644 --- a/orte/runtime/Makefile.am +++ b/orte/runtime/Makefile.am @@ -38,7 +38,6 @@ headers += \ lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ runtime/orte_finalize.c \ - runtime/orte_init.c \ runtime/orte_locks.c \ runtime/orte_globals.c \ runtime/orte_quit.c \ @@ -52,3 +51,12 @@ lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ runtime/orte_cr.c \ runtime/orte_data_server.c \ runtime/orte_info_support.c + +# The MPIR portion of the library must be built with flags to +# enable stepping out of MPI_INIT into main. +# Use an intermediate library to isolate the debug object. +noinst_LTLIBRARIES += libruntime_mpir.la +libruntime_mpir_la_SOURCES = \ + runtime/orte_init.c +libruntime_mpir_la_CFLAGS = $(MPIR_UNWIND_CFLAGS) +lib@ORTE_LIB_PREFIX@open_rte_la_LIBADD += libruntime_mpir.la diff --git a/orte/runtime/data_type_support/orte_dt_print_fns.c b/orte/runtime/data_type_support/orte_dt_print_fns.c index d017adc1cfa..fbcfa698506 100644 --- a/orte/runtime/data_type_support/orte_dt_print_fns.c +++ b/orte/runtime/data_type_support/orte_dt_print_fns.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -442,9 +442,12 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_ */ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_type_t type) { - char *tmp, *tmp2, *pfx2; - hwloc_obj_t loc=NULL, bd=NULL; - char locale[1024], bind[1024]; + char *tmp, *tmp3, *pfx2; + hwloc_obj_t loc=NULL; + char locale[1024], tmp1[1024], tmp2[1024]; + hwloc_cpuset_t mycpus; + char *str=NULL, *cpu_bitmap=NULL; + /* set default result */ *output = NULL; @@ -470,10 +473,6 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_ } if (!orte_devel_level_output) { - hwloc_cpuset_t mycpus; - char tmp1[1024], tmp2[1024]; - char *str=NULL, *cpu_bitmap=NULL; - if (orte_get_attribute(&src->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING) && NULL != src->node->topology && NULL != src->node->topology->topo) { mycpus = hwloc_bitmap_alloc(); @@ -509,10 +508,10 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_ asprintf(&tmp, "\n%sData for proc: %s", pfx2, ORTE_NAME_PRINT(&src->name)); - asprintf(&tmp2, "%s\n%s\tPid: %ld\tLocal rank: %lu\tNode rank: %lu\tApp rank: %d", tmp, pfx2, + asprintf(&tmp3, "%s\n%s\tPid: %ld\tLocal rank: %lu\tNode rank: %lu\tApp rank: %d", tmp, pfx2, (long)src->pid, (unsigned long)src->local_rank, (unsigned long)src->node_rank, src->app_rank); free(tmp); - tmp = tmp2; + tmp = tmp3; if (orte_get_attribute(&src->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&loc, OPAL_PTR)) { if (NULL != loc) { @@ -525,23 +524,26 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_ } else { strcpy(locale, "UNKNOWN"); } - if (orte_get_attribute(&src->attributes, ORTE_PROC_HWLOC_BOUND, (void**)&bd, OPAL_PTR)) { - if (NULL != bd) { - if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(bind, sizeof(bind), src->node->topology->topo, bd->cpuset)) { - strcpy(bind, "UNBOUND"); - } - } else { - strcpy(bind, "UNBOUND"); - } + if (orte_get_attribute(&src->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING) && + NULL != src->node->topology && NULL != src->node->topology->topo) { + mycpus = hwloc_bitmap_alloc(); + hwloc_bitmap_list_sscanf(mycpus, cpu_bitmap); + opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), src->node->topology->topo, mycpus); } else { - strcpy(bind, "UNBOUND"); + snprintf(tmp2, sizeof(tmp2), "UNBOUND"); } - asprintf(&tmp2, "%s\n%s\tState: %s\tApp_context: %ld\n%s\tLocale: %s\n%s\tBinding: %s", tmp, pfx2, - orte_proc_state_to_str(src->state), (long)src->app_idx, pfx2, locale, pfx2, bind); + asprintf(&tmp3, "%s\n%s\tState: %s\tApp_context: %ld\n%s\tLocale: %s\n%s\tBinding: %s", tmp, pfx2, + orte_proc_state_to_str(src->state), (long)src->app_idx, pfx2, locale, pfx2, tmp2); free(tmp); + if (NULL != str) { + free(str); + } + if (NULL != cpu_bitmap) { + free(cpu_bitmap); + } /* set the return */ - *output = tmp2; + *output = tmp3; free(pfx2); return ORTE_SUCCESS; diff --git a/orte/test/system/regex.c b/orte/test/system/regex.c index 1fb3496f3b7..45d1c140efd 100644 --- a/orte/test/system/regex.c +++ b/orte/test/system/regex.c @@ -13,16 +13,19 @@ #include "opal/util/argv.h" #include "orte/util/proc_info.h" -#include "orte/util/regex.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/runtime/runtime.h" +#include "orte/mca/regx/regx.h" +#include "orte/mca/regx/base/base.h" int main(int argc, char **argv) { int rc; - char *regex, *save; + char *regex = NULL, **nodelist; char **nodes=NULL; int i; + opal_pointer_array_t *node_pool; + orte_node_t *nptr; if (argc < 1 || NULL == argv[1]) { fprintf(stderr, "usage: regex \n"); @@ -31,10 +34,19 @@ int main(int argc, char **argv) orte_init(&argc, &argv, ORTE_PROC_NON_MPI); + if (ORTE_SUCCESS != (rc = mca_base_framework_open(&orte_regx_base_framework, 0))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_regx_base_select())) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (NULL != strchr(argv[1], '[')) { /* given a regex to analyze */ fprintf(stderr, "ANALYZING REGEX: %s\n", argv[1]); - if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(argv[1], &nodes))) { + if (ORTE_SUCCESS != (rc = orte_regx.extract_node_names(argv[1], &nodes))) { ORTE_ERROR_LOG(rc); } for (i=0; NULL != nodes[i]; i++) { @@ -45,23 +57,61 @@ int main(int argc, char **argv) return 0; } - save = strdup(argv[1]); - if (ORTE_SUCCESS != (rc = orte_regex_create(save, ®ex))) { + node_pool = OBJ_NEW(opal_pointer_array_t); + nodelist = opal_argv_split(argv[1], ','); + for (i=0; NULL != nodelist[i]; i++) { + orte_proc_t *daemon = NULL; + + nptr = OBJ_NEW(orte_node_t); + nptr->name = strdup(nodelist[i]); + daemon = OBJ_NEW(orte_proc_t); + daemon->name.jobid = 123; + daemon->name.vpid = i; + nptr->daemon = daemon; + + nptr->index = opal_pointer_array_add(node_pool, nptr); + } + + + + if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(node_pool, ®ex))) { ORTE_ERROR_LOG(rc); } else { + char *vpids = strchr(regex, '@'); + vpids[0] = '\0'; fprintf(stderr, "REGEX: %s\n", regex); - if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(regex, &nodes))) { + if (ORTE_SUCCESS != (rc = orte_regx.extract_node_names(regex, &nodes))) { ORTE_ERROR_LOG(rc); } free(regex); regex = opal_argv_join(nodes, ','); - opal_argv_free(nodes); if (0 == strcmp(regex, argv[1])) { fprintf(stderr, "EXACT MATCH\n"); } else { fprintf(stderr, "ERROR: %s\n", regex); + if (opal_argv_count(nodes) != opal_argv_count(nodelist)) { + fprintf(stderr, "ERROR: number of nodes %d, expected %d\n", + opal_argv_count(nodes), opal_argv_count(nodelist)); + goto exit; + } + for (i=0; NULL != nodelist[i]; i++) { + if (0 == strcmp(nodelist[i], nodes[i])) { + fprintf(stderr, "%s OK\n", nodelist[i]); + } + fprintf(stderr, "%s ERROR, expect %s\n", nodes[i], nodelist[i]); + } } free(regex); + regex = NULL; + } +exit: + opal_argv_free(nodelist); + opal_argv_free(nodes); + + + for (i=0; (nptr = opal_pointer_array_get_item(node_pool, i)) != NULL; i++) { + free(nptr->name); + OBJ_RELEASE(nptr->daemon); } - free(save); + OBJ_RELEASE(node_pool); } diff --git a/orte/tools/Makefile.am b/orte/tools/Makefile.am index 228f5f43af6..be78bb56ea1 100644 --- a/orte/tools/Makefile.am +++ b/orte/tools/Makefile.am @@ -26,28 +26,16 @@ SUBDIRS += \ tools/orte-clean \ - tools/orte-ps \ tools/orted \ tools/orterun \ tools/wrappers \ - tools/orte-top \ tools/orte-info \ tools/orte-server DIST_SUBDIRS += \ tools/orte-clean \ - tools/orte-ps \ tools/orted \ tools/orterun \ tools/wrappers \ - tools/orte-top \ tools/orte-info \ - tools/orte-server \ - tools/orte-dvm \ - tools/ompi-prun - -if OPAL_WANT_PRUN -SUBDIRS += \ - tools/ompi-prun \ - tools/orte-dvm -endif + tools/orte-server diff --git a/orte/tools/ompi-prun/Makefile.am b/orte/tools/ompi-prun/Makefile.am deleted file mode 100644 index 17ace88ea3b..00000000000 --- a/orte/tools/ompi-prun/Makefile.am +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This is not quite in the Automake spirit, but we have to do it. -# Since the totalview portion of the library must be built with -g, we -# must eliminate the CFLAGS that are passed in here by default (which -# may already have debugging and/or optimization flags). We use -# post-processed forms of the CFLAGS in the library targets down -# below. - -CFLAGS = $(CFLAGS_WITHOUT_OPTFLAGS) $(DEBUGGER_CFLAGS) - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = ompi-prun.1 -EXTRA_DIST = $(man_pages:.1=.1in) - -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = ompi-prun - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -endif # OPAL_INSTALL_BINARIES - -ompi_prun_SOURCES = \ - main.c \ - prun.c \ - prun.h - -ompi_prun_LDADD = \ - $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -distclean-local: - rm -f $(man_pages) diff --git a/orte/tools/ompi-prun/main.c b/orte/tools/ompi-prun/main.c deleted file mode 100644 index 15b205b1378..00000000000 --- a/orte/tools/ompi-prun/main.c +++ /dev/null @@ -1,33 +0,0 @@ -/*************************************************************************** - * * - * Open MPI: Open Source High Performance Computing * - * * - * http://www.open-mpi.org/ * - * * - ***************************************************************************/ - -#include "prun.h" - -int main(int argc, char *argv[]) -{ - return prun(argc, argv); -} - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ diff --git a/orte/tools/ompi-prun/ompi-prun.1in b/orte/tools/ompi-prun/ompi-prun.1in deleted file mode 100644 index 32b88943462..00000000000 --- a/orte/tools/ompi-prun/ompi-prun.1in +++ /dev/null @@ -1,1597 +0,0 @@ -.\" -*- nroff -*- -.\" Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\" Copyright (c) 2017-2018 Intel, Inc. All rights reserved. -.\" Copyright (c) 2017 Los Alamos National Security, LLC. All rights -.\" reserved. -.\" $COPYRIGHT$ -.\" -.\" Man page for PSRVR's prun command -.\" -.\" .TH name section center-footer left-footer center-header -.TH PRUN 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -prun \- Execute serial and parallel jobs with the PMIx Reference Server. - -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.PP -Single Process Multiple Data (SPMD) Model: - -.B prun -[ options ] -.B -[ ] -.P - -Multiple Instruction Multiple Data (MIMD) Model: - -.B prun -[ global_options ] - [ local_options1 ] -.B -[ ] : - [ local_options2 ] -.B -[ ] : - ... : - [ local_optionsN ] -.B -[ ] -.P - -Note that in both models, invoking \fIprun\fP via an absolute path -name is equivalent to specifying the \fI--prefix\fP option with a -\fI\fR value equivalent to the directory where \fIprun\fR -resides, minus its last subdirectory. For example: - - \fB%\fP /usr/local/bin/prun ... - -is equivalent to - - \fB%\fP prun --prefix /usr/local - -. -.\" ************************** -.\" Quick Summary Section -.\" ************************** -.SH QUICK SUMMARY -. -If you are simply looking for how to run an application, you -probably want to use a command line of the following form: - - \fB%\fP prun [ -np X ] [ --hostfile ] - -This will run X copies of \fI\fR in your current run-time -environment (if running under a supported resource manager, PSRVR's -\fIprun\fR will usually automatically use the corresponding resource manager -process starter, as opposed to, for example, \fIrsh\fR or \fIssh\fR, -which require the use of a hostfile, or will default to running all X -copies on the localhost), scheduling (by default) in a round-robin fashion by -CPU slot. See the rest of this page for more details. -.P -Please note that prun automatically binds processes. Three binding patterns are used in the absence of any further directives: -.TP 18 -.B Bind to core: -when the number of processes is <= 2 -. -. -.TP -.B Bind to socket: -when the number of processes is > 2 -. -. -.TP -.B Bind to none: -when oversubscribed -. -. -.P -If your application uses threads, then you probably want to ensure that you are -either not bound at all (by specifying --bind-to none), or bound to multiple cores -using an appropriate binding level or specific number of processing elements per -application process. -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH OPTIONS -. -.I prun -will send the name of the directory where it was invoked on the local -node to each of the remote nodes, and attempt to change to that -directory. See the "Current Working Directory" section below for further -details. -.\" -.\" Start options listing -.\" Indent 10 characters from start of first column to start of second column -.TP 10 -.B -The program executable. This is identified as the first non-recognized argument -to prun. -. -. -.TP -.B -Pass these run-time arguments to every new process. These must always -be the last arguments to \fIprun\fP. If an app context file is used, -\fI\fP will be ignored. -. -. -.TP -.B -h\fR,\fP --help -Display help for this command -. -. -.TP -.B -q\fR,\fP --quiet -Suppress informative messages from prun during application execution. -. -. -.TP -.B -v\fR,\fP --verbose -Be verbose -. -. -.TP -.B -V\fR,\fP --version -Print version number. If no other arguments are given, this will also -cause prun to exit. -. -. -.TP -.B -N \fR\fP -.br -Launch num processes per node on all allocated nodes (synonym for npernode). -. -. -. -.TP -.B -display-map\fR,\fP --display-map -Display a table showing the mapped location of each process prior to launch. -. -. -. -.TP -.B -display-allocation\fR,\fP --display-allocation -Display the detected resource allocation. -. -. -. -.TP -.B -output-proctable\fR,\fP --output-proctable -Output the debugger proctable after launch. -. -. -. -.TP -.B -max-vm-size\fR,\fP --max-vm-size \fR\fP -Number of processes to run. -. -. -. -.TP -.B -novm\fR,\fP --novm -Execute without creating an allocation-spanning virtual machine (only start -daemons on nodes hosting application procs). -. -. -. -.TP -.B -hnp\fR,\fP --hnp \fR\fP -Specify the URI of the \fRpsrvr\fP process, or the name of the file (specified as -file:filename) that contains that info. -. -. -. -.P -Use one of the following options to specify which hosts (nodes) within the \fRpsrvr\fP to run on. -. -. -.TP -.B -H\fR,\fP -host\fR,\fP --host \fR\fP -List of hosts on which to invoke processes. -. -. -.TP -.B -hostfile\fR,\fP --hostfile \fR\fP -Provide a hostfile to use. -.\" JJH - Should have man page for how to format a hostfile properly. -. -. -.TP -.B -default-hostfile\fR,\fP --default-hostfile \fR\fP -Provide a default hostfile. -. -. -.TP -.B -machinefile\fR,\fP --machinefile \fR\fP -Synonym for \fI-hostfile\fP. -. -. -. -. -.TP -.B -cpu-set\fR,\fP --cpu-set \fR\fP -Restrict launched processes to the specified logical cpus on each node (comma-separated -list). Note that the binding options will still apply within the specified envelope - e.g., -you can elect to bind each process to only one cpu within the specified cpu set. -. -. -. -.P -The following options specify the number of processes to launch. Note that none -of the options imply a particular binding policy - e.g., requesting N processes -for each socket does not imply that the processes will be bound to the socket. -. -. -.TP -.B -c\fR,\fP -n\fR,\fP --n\fR,\fP -np \fR<#>\fP -Run this many copies of the program on the given nodes. This option -indicates that the specified file is an executable program and not an -application context. If no value is provided for the number of copies to -execute (i.e., neither the "-np" nor its synonyms are provided on the command -line), prun will automatically execute a copy of the program on -each process slot (see below for description of a "process slot"). This -feature, however, can only be used in the SPMD model and will return an -error (without beginning execution of the application) otherwise. -. -. -.TP -.B —map-by ppr:N: -Launch N times the number of objects of the specified type on each node. -. -. -.TP -.B -npersocket\fR,\fP --npersocket \fR<#persocket>\fP -On each node, launch this many processes times the number of processor -sockets on the node. -The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option. -(deprecated in favor of --map-by ppr:n:socket) -. -. -.TP -.B -npernode\fR,\fP --npernode \fR<#pernode>\fP -On each node, launch this many processes. -(deprecated in favor of --map-by ppr:n:node) -. -. -.TP -.B -pernode\fR,\fP --pernode -On each node, launch one process -- equivalent to \fI-npernode\fP 1. -(deprecated in favor of --map-by ppr:1:node) -. -. -. -. -.P -To map processes: -. -. -.TP -.B --map-by \fR\fP -Map to the specified object, defaults to \fIsocket\fP. Supported options -include slot, hwthread, core, L1cache, L2cache, L3cache, socket, numa, -board, node, sequential, distance, and ppr. Any object can include -modifiers by adding a \fR:\fP and any combination of PE=n (bind n -processing elements to each proc), SPAN (load -balance the processes across the allocation), OVERSUBSCRIBE (allow -more processes on a node than processing elements), and NOOVERSUBSCRIBE. -This includes PPR, where the pattern would be terminated by another colon -to separate it from the modifiers. -. -.TP -.B -bycore\fR,\fP --bycore -Map processes by core (deprecated in favor of --map-by core) -. -.TP -.B -byslot\fR,\fP --byslot -Map and rank processes round-robin by slot. -. -.TP -.B -nolocal\fR,\fP --nolocal -Do not run any copies of the launched application on the same node as -prun is running. This option will override listing the localhost -with \fB--host\fR or any other host-specifying mechanism. -. -.TP -.B -nooversubscribe\fR,\fP --nooversubscribe -Do not oversubscribe any nodes; error (without starting any processes) -if the requested number of processes would cause oversubscription. -This option implicitly sets "max_slots" equal to the "slots" value for -each node. (Enabled by default). -. -.TP -.B -oversubscribe\fR,\fP --oversubscribe -Nodes are allowed to be oversubscribed, even on a managed system, and -overloading of processing elements. -. -.TP -.B -bynode\fR,\fP --bynode -Launch processes one per node, cycling by node in a round-robin -fashion. This spreads processes evenly among nodes and assigns -ranks in a round-robin, "by node" manner. -. -.TP -.B -cpu-list\fR,\fP --cpu-list \fR\fP -List of processor IDs to bind processes to [default=NULL]. -. -. -. -. -.P -To order processes' ranks: -. -. -.TP -.B --rank-by \fR\fP -Rank in round-robin fashion according to the specified object, -defaults to \fIslot\fP. Supported options -include slot, hwthread, core, L1cache, L2cache, L3cache, -socket, numa, board, and node. -. -. -. -. -.P -For process binding: -. -.TP -.B --bind-to \fR\fP -Bind processes to the specified object, defaults to \fIcore\fP. Supported options -include slot, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board, and none. -. -.TP -.B -cpus-per-proc\fR,\fP --cpus-per-proc \fR<#perproc>\fP -Bind each process to the specified number of cpus. -(deprecated in favor of --map-by :PE=n) -. -.TP -.B -cpus-per-rank\fR,\fP --cpus-per-rank \fR<#perrank>\fP -Alias for \fI-cpus-per-proc\fP. -(deprecated in favor of --map-by :PE=n) -. -.TP -.B -bind-to-core\fR,\fP --bind-to-core -Bind processes to cores (deprecated in favor of --bind-to core) -. -.TP -.B -bind-to-socket\fR,\fP --bind-to-socket -Bind processes to processor sockets (deprecated in favor of --bind-to socket) -. -.TP -.B -report-bindings\fR,\fP --report-bindings -Report any bindings for launched processes. -. -. -. -. -.P -For rankfiles: -. -. -.TP -.B -rf\fR,\fP --rankfile \fR\fP -Provide a rankfile file. -. -. -. -. -.P -To manage standard I/O: -. -. -.TP -.B -output-filename\fR,\fP --output-filename \fR\fP -Redirect the stdout, stderr, and stddiag of all processes to a process-unique version of -the specified filename. Any directories in the filename will automatically be created. -Each output file will consist of filename.id, where the id will be the -processes' rank, left-filled with -zero's for correct ordering in listings. -. -. -.TP -.B -stdin\fR,\fP --stdin\fR \fP -The rank of the process that is to receive stdin. The -default is to forward stdin to rank 0, but this option -can be used to forward stdin to any process. It is also acceptable to -specify \fInone\fP, indicating that no processes are to receive stdin. -. -. -.TP -.B -merge-stderr-to-stdout\fR,\fP --merge-stderr-to-stdout -Merge stderr to stdout for each process. -. -. -.TP -.B -tag-output\fR,\fP --tag-output -Tag each line of output to stdout, stderr, and stddiag with \fB[jobid, MCW_rank]\fP -indicating the process jobid and rank of the process that generated the output, -and the channel which generated it. -. -. -.TP -.B -timestamp-output\fR,\fP --timestamp-output -Timestamp each line of output to stdout, stderr, and stddiag. -. -. -.TP -.B -xml\fR,\fP --xml -Provide all output to stdout, stderr, and stddiag in an xml format. -. -. -.TP -.B -xml-file\fR,\fP --xml-file \fR\fP -Provide all output in XML format to the specified file. -. -. -.TP -.B -xterm\fR,\fP --xterm \fR\fP -Display the output from the processes identified by their ranks in separate xterm windows. The ranks are specified -as a comma-separated list of ranges, with a -1 indicating all. A separate -window will be created for each specified process. -.B Note: -xterm will normally terminate the window upon termination of the process running -within it. However, by adding a "!" to the end of the list of specified ranks, -the proper options will be provided to ensure that xterm keeps the window open -\fIafter\fP the process terminates, thus allowing you to see the process' output. -Each xterm window will subsequently need to be manually closed. -.B Note: -In some environments, xterm may require that the executable be in the user's -path, or be specified in absolute or relative terms. Thus, it may be necessary -to specify a local executable as "./foo" instead of just "foo". If xterm fails to -find the executable, prun will hang, but still respond correctly to a ctrl-c. -If this happens, please check that the executable is being specified correctly -and try again. -. -. -. -. -.P -To manage files and runtime environment: -. -. -.TP -.B -path\fR,\fP --path \fR\fP - that will be used when attempting to locate the requested -executables. This is used prior to using the local PATH setting. -. -. -.TP -.B --prefix \fR\fP -Prefix directory that will be used to set the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote node before invoking -the target process. See the "Remote Execution" section, below. -. -. -.TP -.B --noprefix -Disable the automatic --prefix behavior -. -. -.TP -.B -s\fR,\fP --preload-binary -Copy the specified executable(s) to remote machines prior to starting remote processes. The -executables will be copied to the session directory and will be deleted upon -completion of the job. -. -. -.TP -.B --preload-files \fR\fP -Preload the comma separated list of files to the current working directory of the remote -machines where processes will be launched prior to starting those processes. -. -. -.TP -.B -set-cwd-to-session-dir\fR,\fP --set-cwd-to-session-dir -Set the working directory of the started processes to their session directory. -. -. -.TP -.B -wd \fR\fP -Synonym for \fI-wdir\fP. -. -. -.TP -.B -wdir \fR\fP -Change to the directory before the user's program executes. -See the "Current Working Directory" section for notes on relative paths. -.B Note: -If the \fI-wdir\fP option appears both on the command line and in an -application context, the context will take precedence over the command -line. Thus, if the path to the desired wdir is different -on the backend nodes, then it must be specified as an absolute path that -is correct for the backend node. -. -. -.TP -.B -x \fR\fP -Export the specified environment variables to the remote nodes before -executing the program. Only one environment variable can be specified -per \fI-x\fP option. Existing environment variables can be specified -or new variable names specified with corresponding values. For -example: - \fB%\fP prun -x DISPLAY -x OFILE=/tmp/out ... - -The parser for the \fI-x\fP option is not very sophisticated; it does -not even understand quoted values. Users are advised to set variables -in the environment, and then use \fI-x\fP to export (not define) them. -. -. -. -. -.P -Setting MCA parameters: -. -. -.TP -.B -gpmca\fR,\fP --gpmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -pmca\fR,\fP --pmca \fR \fP -Send arguments to various MCA modules. See the "MCA" section, below. -. -. -.TP -.B -am \fR\fP -Aggregate MCA parameter set file list. -. -. -.TP -.B -tune\fR,\fP --tune \fR\fP -Specify a tune file to set arguments for various MCA modules and environment variables. -See the "Setting MCA parameters and environment variables from file" section, below. -. -. -. -. -.P -For debugging: -. -. -.TP -.B -debug\fR,\fP --debug -Invoke the user-level debugger indicated by the \fIorte_base_user_debugger\fP -MCA parameter. -. -. -.TP -.B --get-stack-traces -When paired with the -.B --timeout -option, -.I prun -will obtain and print out stack traces from all launched processes -that are still alive when the timeout expires. Note that obtaining -stack traces can take a little time and produce a lot of output, -especially for large process-count jobs. -. -. -.TP -.B -debugger\fR,\fP --debugger \fR\fP -Sequence of debuggers to search for when \fI--debug\fP is used (i.e. -a synonym for \fIorte_base_user_debugger\fP MCA parameter). -. -. -.TP -.B --timeout \fR -The maximum number of seconds that -.I prun -will run. After this many seconds, -.I prun -will abort the launched job and exit with a non-zero exit status. -Using -.B --timeout -can be also useful when combined with the -.B --get-stack-traces -option. -. -. -.TP -.B -tv\fR,\fP --tv -Launch processes under the TotalView debugger. -Deprecated backwards compatibility flag. Synonym for \fI--debug\fP. -. -. -. -. -.P -There are also other options: -. -. -.TP -.B --allow-run-as-root -Allow -.I prun -to run when executed by the root user -.RI ( prun -defaults to aborting when launched as the root user). -. -. -.TP -.B --app \fR\fP -Provide an appfile, ignoring all other command line options. -. -. -.TP -.B -cf\fR,\fP --cartofile \fR\fP -Provide a cartography file. -. -. -.TP -.B -continuous\fR,\fP --continuous -Job is to run until explicitly terminated. -. -. -.TP -.B -disable-recovery\fR,\fP --disable-recovery -Disable recovery (resets all recovery options to off). -. -. -.TP -.B -do-not-launch\fR,\fP --do-not-launch -Perform all necessary operations to prepare to launch the application, but do not actually launch it. -. -. -.TP -.B -do-not-resolve\fR,\fP --do-not-resolve -Do not attempt to resolve interfaces. -. -. -.TP -.B -enable-recovery\fR,\fP --enable-recovery -Enable recovery from process failure [Default = disabled]. -. -. -.TP -.B -index-argv-by-rank\fR,\fP --index-argv-by-rank -Uniquely index argv[0] for each process using its rank. -. -. -.TP -.B -max-restarts\fR,\fP --max-restarts \fR\fP -Max number of times to restart a failed process. -. -. -.TP -.B --ppr \fR\fP -Comma-separated list of number of processes on a given resource type [default: none]. -. -. -.TP -.B -report-child-jobs-separately\fR,\fP --report-child-jobs-separately -Return the exit status of the primary job only. -. -. -.TP -.B -report-events\fR,\fP --report-events \fR\fP -Report events to a tool listening at the specified URI. -. -. -.TP -.B -report-pid\fR,\fP --report-pid \fR\fP -Print out prun's PID during startup. The channel must be either a '-' to indicate -that the pid is to be output to stdout, a '+' to indicate that the pid is to be -output to stderr, or a filename to which the pid is to be written. -. -. -.TP -.B -report-uri\fR,\fP --report-uri \fR\fP -Print out prun's URI during startup. The channel must be either a '-' to indicate -that the URI is to be output to stdout, a '+' to indicate that the URI is to be -output to stderr, or a filename to which the URI is to be written. -. -. -.TP -.B -show-progress\fR,\fP --show-progress -Output a brief periodic report on launch progress. -. -. -.TP -.B -terminate\fR,\fP --terminate -Terminate the DVM. -. -. -.TP -.B -use-hwthread-cpus\fR,\fP --use-hwthread-cpus -Use hardware threads as independent cpus. -. -. -.TP -.B -use-regexp\fR,\fP --use-regexp -Use regular expressions for launch. -. -. -. -. -.P -The following options are useful for developers; they are not generally -useful to most users: -. -.TP -.B -d\fR,\fP --debug-devel -Enable debugging. This is not generally useful for most users. -. -. -.TP -.B -display-devel-allocation\fR,\fP --display-devel-allocation -Display a detailed list of the allocation being used by this job. -. -. -.TP -.B -display-devel-map\fR,\fP --display-devel-map -Display a more detailed table showing the mapped location of each process prior to launch. -. -. -.TP -.B -display-diffable-map\fR,\fP --display-diffable-map -Display a diffable process map just before launch. -. -. -.TP -.B -display-topo\fR,\fP --display-topo -Display the topology as part of the process map just before launch. -. -. -.TP -.B --report-state-on-timeout -When paired with the -.B --timeout -command line option, report the run-time subsystem state of each -process when the timeout expires. -. -. -.P -There may be other options listed with \fIprun --help\fP. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -One invocation of \fIprun\fP starts an application running under PSRVR. If the application is single process multiple data (SPMD), the application -can be specified on the \fIprun\fP command line. - -If the application is multiple instruction multiple data (MIMD), comprising of -multiple programs, the set of programs and argument can be specified in one of -two ways: Extended Command Line Arguments, and Application Context. -.PP -An application context describes the MIMD program set including all arguments -in a separate file. -.\" See appcontext(5) for a description of the application context syntax. -This file essentially contains multiple \fIprun\fP command lines, less the -command name itself. The ability to specify different options for different -instantiations of a program is another reason to use an application context. -.PP -Extended command line arguments allow for the description of the application -layout on the command line using colons (\fI:\fP) to separate the specification -of programs and arguments. Some options are globally set across all specified -programs (e.g. --hostfile), while others are specific to a single program -(e.g. -np). -. -. -. -.SS Specifying Host Nodes -. -Host nodes can be identified on the \fIprun\fP command line with the \fI-host\fP -option or in a hostfile. -. -.PP -For example, -. -.TP 4 -prun -H aa,aa,bb ./a.out -launches two processes on node aa and one on bb. -. -.PP -Or, consider the hostfile -. - - \fB%\fP cat myhostfile - aa slots=2 - bb slots=2 - cc slots=2 - -. -.PP -Here, we list both the host names (aa, bb, and cc) but also how many "slots" -there are for each. Slots indicate how many processes can potentially execute -on a node. For best performance, the number of slots may be chosen to be the -number of cores on the node or the number of processor sockets. If the hostfile -does not provide slots information, PSRVR will attempt to discover the number -of cores (or hwthreads, if the use-hwthreads-as-cpus option is set) and set the -number of slots to that value. This default behavior also occurs when specifying -the \fI-host\fP option with a single hostname. Thus, the command -. -.TP 4 -prun -H aa ./a.out -launches a number of processes equal to the number of cores on node aa. -. -.PP -. -.TP 4 -prun -hostfile myhostfile ./a.out -will launch two processes on each of the three nodes. -. -.TP 4 -prun -hostfile myhostfile -host aa ./a.out -will launch two processes, both on node aa. -. -.TP 4 -prun -hostfile myhostfile -host dd ./a.out -will find no hosts to run on and abort with an error. -That is, the specified host dd is not in the specified hostfile. -. -.PP -When running under resource managers (e.g., SLURM, Torque, etc.), -PSRVR will obtain both the hostnames and the number of slots directly -from the resource manger. -. -.SS Specifying Number of Processes -. -As we have just seen, the number of processes to run can be set using the -hostfile. Other mechanisms exist. -. -.PP -The number of processes launched can be specified as a multiple of the -number of nodes or processor sockets available. For example, -. -.TP 4 -prun -H aa,bb -npersocket 2 ./a.out -launches processes 0-3 on node aa and process 4-7 on node bb, -where aa and bb are both dual-socket nodes. -The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option, -which is discussed in a later section. -. -.TP 4 -prun -H aa,bb -npernode 2 ./a.out -launches processes 0-1 on node aa and processes 2-3 on node bb. -. -.TP 4 -prun -H aa,bb -npernode 1 ./a.out -launches one process per host node. -. -.TP 4 -prun -H aa,bb -pernode ./a.out -is the same as \fI-npernode\fP 1. -. -. -.PP -Another alternative is to specify the number of processes with the -\fI-np\fP option. Consider now the hostfile -. - - \fB%\fP cat myhostfile - aa slots=4 - bb slots=4 - cc slots=4 - -. -.PP -Now, -. -.TP 4 -prun -hostfile myhostfile -np 6 ./a.out -will launch processes 0-3 on node aa and processes 4-5 on node bb. The remaining -slots in the hostfile will not be used since the \fI-np\fP option indicated -that only 6 processes should be launched. -. -.SS Mapping Processes to Nodes: Using Policies -. -The examples above illustrate the default mapping of process processes -to nodes. This mapping can also be controlled with various -\fIprun\fP options that describe mapping policies. -. -. -.PP -Consider the same hostfile as above, again with \fI-np\fP 6: -. - - node aa node bb node cc - - prun 0 1 2 3 4 5 - - prun --map-by node 0 3 1 4 2 5 - - prun -nolocal 0 1 2 3 4 5 -. -.PP -The \fI--map-by node\fP option will load balance the processes across -the available nodes, numbering each process in a round-robin fashion. -. -.PP -The \fI-nolocal\fP option prevents any processes from being mapped onto the -local host (in this case node aa). While \fIprun\fP typically consumes -few system resources, \fI-nolocal\fP can be helpful for launching very -large jobs where \fIprun\fP may actually need to use noticeable amounts -of memory and/or processing time. -. -.PP -Just as \fI-np\fP can specify fewer processes than there are slots, it can -also oversubscribe the slots. For example, with the same hostfile: -. -.TP 4 -prun -hostfile myhostfile -np 14 ./a.out -will launch processes 0-3 on node aa, 4-7 on bb, and 8-11 on cc. It will -then add the remaining two processes to whichever nodes it chooses. -. -.PP -One can also specify limits to oversubscription. For example, with the same -hostfile: -. -.TP 4 -prun -hostfile myhostfile -np 14 -nooversubscribe ./a.out -will produce an error since \fI-nooversubscribe\fP prevents oversubscription. -. -.PP -Limits to oversubscription can also be specified in the hostfile itself: -. - % cat myhostfile - aa slots=4 max_slots=4 - bb max_slots=4 - cc slots=4 -. -.PP -The \fImax_slots\fP field specifies such a limit. When it does, the -\fIslots\fP value defaults to the limit. Now: -. -.TP 4 -prun -hostfile myhostfile -np 14 ./a.out -causes the first 12 processes to be launched as before, but the remaining -two processes will be forced onto node cc. The other two nodes are -protected by the hostfile against oversubscription by this job. -. -.PP -Using the \fI--nooversubscribe\fR option can be helpful since PSRVR -currently does not get "max_slots" values from the resource manager. -. -.PP -Of course, \fI-np\fP can also be used with the \fI-H\fP or \fI-host\fP -option. For example, -. -.TP 4 -prun -H aa,bb -np 8 ./a.out -launches 8 processes. Since only two hosts are specified, after the first -two processes are mapped, one to aa and one to bb, the remaining processes -oversubscribe the specified hosts. -. -.PP -And here is a MIMD example: -. -.TP 4 -prun -H aa -np 1 hostname : -H bb,cc -np 2 uptime -will launch process 0 running \fIhostname\fP on node aa and processes 1 and 2 -each running \fIuptime\fP on nodes bb and cc, respectively. -. -.SS Mapping, Ranking, and Binding: Oh My! -. -PSRVR employs a three-phase procedure for assigning process locations and -ranks: -. -.TP 10 -\fBmapping\fP -Assigns a default location to each process -. -.TP 10 -\fBranking\fP -Assigns a rank value to each process -. -.TP 10 -\fBbinding\fP -Constrains each process to run on specific processors -. -.PP -The \fImapping\fP step is used to assign a default location to each process -based on the mapper being employed. Mapping by slot, node, and sequentially results -in the assignment of the processes to the node level. In contrast, mapping by object, allows -the mapper to assign the process to an actual object on each node. -. -.PP -\fBNote:\fP the location assigned to the process is independent of where it will be bound - the -assignment is used solely as input to the binding algorithm. -. -.PP -The mapping of process processes to nodes can be defined not just -with general policies but also, if necessary, using arbitrary mappings -that cannot be described by a simple policy. One can use the "sequential -mapper," which reads the hostfile line by line, assigning processes -to nodes in whatever order the hostfile specifies. Use the -\fI-pmca rmaps seq\fP option. For example, using the same hostfile -as before: -. -.PP -prun -hostfile myhostfile -pmca rmaps seq ./a.out -. -.PP -will launch three processes, one on each of nodes aa, bb, and cc, respectively. -The slot counts don't matter; one process is launched per line on -whatever node is listed on the line. -. -.PP -Another way to specify arbitrary mappings is with a rankfile, which -gives you detailed control over process binding as well. Rankfiles -are discussed below. -. -.PP -The second phase focuses on the \fIranking\fP of the process within -the job. PSRVR -separates this from the mapping procedure to allow more flexibility in the -relative placement of processes. This is best illustrated by considering the -following two cases where we used the —map-by ppr:2:socket option: -. -.PP - node aa node bb - - rank-by core 0 1 ! 2 3 4 5 ! 6 7 - - rank-by socket 0 2 ! 1 3 4 6 ! 5 7 - - rank-by socket:span 0 4 ! 1 5 2 6 ! 3 7 -. -.PP -Ranking by core and by slot provide the identical result - a simple -progression of ranks across each node. Ranking by -socket does a round-robin ranking within each node until all processes -have been assigned a rank, and then progresses to the next -node. Adding the \fIspan\fP modifier to the ranking directive causes -the ranking algorithm to treat the entire allocation as a single -entity - thus, the MCW ranks are assigned across all sockets before -circling back around to the beginning. -. -.PP -The \fIbinding\fP phase actually binds each process to a given set of processors. This can -improve performance if the operating system is placing processes -suboptimally. For example, it might oversubscribe some multi-core -processor sockets, leaving other sockets idle; this can lead -processes to contend unnecessarily for common resources. Or, it -might spread processes out too widely; this can be suboptimal if -application performance is sensitive to interprocess communication -costs. Binding can also keep the operating system from migrating -processes excessively, regardless of how optimally those processes -were placed to begin with. -. -.PP -The processors to be used for binding can be identified in terms of -topological groupings - e.g., binding to an l3cache will bind each -process to all processors within the scope of a single L3 cache within -their assigned location. Thus, if a process is assigned by the mapper -to a certain socket, then a \fI—bind-to l3cache\fP directive will -cause the process to be bound to the processors that share a single L3 -cache within that socket. -. -.PP -To help balance loads, the binding directive uses a round-robin method when binding to -levels lower than used in the mapper. For example, consider the case where a job is -mapped to the socket level, and then bound to core. Each socket will have multiple cores, -so if multiple processes are mapped to a given socket, the binding algorithm will assign -each process located to a socket to a unique core in a round-robin manner. -. -.PP -Alternatively, processes mapped by l2cache and then bound to socket will simply be bound -to all the processors in the socket where they are located. In this manner, users can -exert detailed control over relative MCW rank location and binding. -. -.PP -Finally, \fI--report-bindings\fP can be used to report bindings. -. -.PP -As an example, consider a node with two processor sockets, each comprising -four cores. We run \fIprun\fP with \fI-np 4 --report-bindings\fP and -the following additional options: -. - - % prun ... --map-by core --bind-to core - [...] ... binding child [...,0] to cpus 0001 - [...] ... binding child [...,1] to cpus 0002 - [...] ... binding child [...,2] to cpus 0004 - [...] ... binding child [...,3] to cpus 0008 - - % prun ... --map-by socket --bind-to socket - [...] ... binding child [...,0] to socket 0 cpus 000f - [...] ... binding child [...,1] to socket 1 cpus 00f0 - [...] ... binding child [...,2] to socket 0 cpus 000f - [...] ... binding child [...,3] to socket 1 cpus 00f0 - - % prun ... --map-by core:PE=2 --bind-to core - [...] ... binding child [...,0] to cpus 0003 - [...] ... binding child [...,1] to cpus 000c - [...] ... binding child [...,2] to cpus 0030 - [...] ... binding child [...,3] to cpus 00c0 - - % prun ... --bind-to none -. -.PP -Here, \fI--report-bindings\fP shows the binding of each process as a mask. -In the first case, the processes bind to successive cores as indicated by -the masks 0001, 0002, 0004, and 0008. In the second case, processes bind -to all cores on successive sockets as indicated by the masks 000f and 00f0. -The processes cycle through the processor sockets in a round-robin fashion -as many times as are needed. In the third case, the masks show us that -2 cores have been bound per process. In the fourth case, binding is -turned off and no bindings are reported. -. -.PP -PSRVR's support for process binding depends on the underlying -operating system. Therefore, certain process binding options may not be available -on every system. -. -.PP -Process binding can also be set with MCA parameters. -Their usage is less convenient than that of \fIprun\fP options. -On the other hand, MCA parameters can be set not only on the \fIprun\fP -command line, but alternatively in a system or user mca-params.conf file -or as environment variables, as described in the MCA section below. -Some examples include: -. -.PP - prun option MCA parameter key value - - --map-by core rmaps_base_mapping_policy core - --map-by socket rmaps_base_mapping_policy socket - --rank-by core rmaps_base_ranking_policy core - --bind-to core hwloc_base_binding_policy core - --bind-to socket hwloc_base_binding_policy socket - --bind-to none hwloc_base_binding_policy none -. -. -.SS Rankfiles -. -Rankfiles are text files that specify detailed information about how -individual processes should be mapped to nodes, and to which -processor(s) they should be bound. Each line of a rankfile specifies -the location of one process. The general form of each line in the -rankfile is: -. - - rank = slot= -. -.PP -For example: -. - - $ cat myrankfile - rank 0=aa slot=1:0-2 - rank 1=bb slot=0:0,1 - rank 2=cc slot=1-2 - $ prun -H aa,bb,cc,dd -rf myrankfile ./a.out -. -.PP -Means that -. - - Rank 0 runs on node aa, bound to logical socket 1, cores 0-2. - Rank 1 runs on node bb, bound to logical socket 0, cores 0 and 1. - Rank 2 runs on node cc, bound to logical cores 1 and 2. -. -.PP -Rankfiles can alternatively be used to specify \fIphysical\fP processor -locations. In this case, the syntax is somewhat different. Sockets are -no longer recognized, and the slot number given must be the number of -the physical PU as most OS's do not assign a unique physical identifier -to each core in the node. Thus, a proper physical rankfile looks something -like the following: -. - - $ cat myphysicalrankfile - rank 0=aa slot=1 - rank 1=bb slot=8 - rank 2=cc slot=6 -. -.PP -This means that -. - - Rank 0 will run on node aa, bound to the core that contains physical PU 1 - Rank 1 will run on node bb, bound to the core that contains physical PU 8 - Rank 2 will run on node cc, bound to the core that contains physical PU 6 -. -.PP -Rankfiles are treated as \fIlogical\fP by default, and the MCA parameter -rmaps_rank_file_physical must be set to 1 to indicate that the rankfile -is to be considered as \fIphysical\fP. -. -.PP -The hostnames listed above are "absolute," meaning that actual -resolveable hostnames are specified. However, hostnames can also be -specified as "relative," meaning that they are specified in relation -to an externally-specified list of hostnames (e.g., by prun's --host -argument, a hostfile, or a job scheduler). -. -.PP -The "relative" specification is of the form "+n", where X is an -integer specifying the Xth hostname in the set of all available -hostnames, indexed from 0. For example: -. - - $ cat myrankfile - rank 0=+n0 slot=1:0-2 - rank 1=+n1 slot=0:0,1 - rank 2=+n2 slot=1-2 - $ prun -H aa,bb,cc,dd -rf myrankfile ./a.out -. -.PP -All socket/core slot locations are be -specified as -.I logical -indexes. You can use tools such as HWLOC's "lstopo" to find the -logical indexes of socket and cores. -. -. -.SS Application Context or Executable Program? -. -To distinguish the two different forms, \fIprun\fP -looks on the command line for \fI--app\fP option. If -it is specified, then the file named on the command line is -assumed to be an application context. If it is not -specified, then the file is assumed to be an executable program. -. -. -. -.SS Locating Files -. -If no relative or absolute path is specified for a file, prun will first look for files by searching the directories specified -by the \fI--path\fP option. If there is no \fI--path\fP option set or -if the file is not found at the \fI--path\fP location, then prun -will search the user's PATH environment variable as defined on the -source node(s). -.PP -If a relative directory is specified, it must be relative to the initial -working directory determined by the specific starter used. For example when -using the rsh or ssh starters, the initial directory is $HOME by default. Other -starters may set the initial directory to the current working directory from -the invocation of \fIprun\fP. -. -. -. -.SS Current Working Directory -. -The \fI\-wdir\fP prun option (and its synonym, \fI\-wd\fP) allows -the user to change to an arbitrary directory before the program is -invoked. It can also be used in application context files to specify -working directories on specific nodes and/or for specific -applications. -.PP -If the \fI\-wdir\fP option appears both in a context file and on the -command line, the context file directory will override the command -line value. -.PP -If the \fI-wdir\fP option is specified, prun will attempt to -change to the specified directory on all of the remote nodes. If this -fails, \fIprun\fP will abort. -.PP -If the \fI-wdir\fP option is \fBnot\fP specified, prun will send -the directory name where \fIprun\fP was invoked to each of the -remote nodes. The remote nodes will try to change to that -directory. If they are unable (e.g., if the directory does not exist on -that node), then prun will use the default directory determined by -the starter. -.PP -All directory changing occurs before the user's program is invoked. -. -. -. -.SS Standard I/O -. -PSRVR directs UNIX standard input to /dev/null on all processes -except the rank 0 process. The rank 0 process -inherits standard input from \fIprun\fP. -.B Note: -The node that invoked \fIprun\fP need not be the same as the node where the -rank 0 process resides. PSRVR handles the redirection of -\fIprun\fP's standard input to the rank 0 process. -.PP -PSRVR directs UNIX standard output and error from remote nodes to the node -that invoked \fIprun\fP and prints it on the standard output/error of -\fIprun\fP. -Local processes inherit the standard output/error of \fIprun\fP and transfer -to it directly. -.PP -Thus it is possible to redirect standard I/O for applications by -using the typical shell redirection procedure on \fIprun\fP. - - \fB%\fP prun -np 2 my_app < my_input > my_output - -Note that in this example \fIonly\fP the rank 0 process will -receive the stream from \fImy_input\fP on stdin. The stdin on all the other -nodes will be tied to /dev/null. However, the stdout from all nodes will -be collected into the \fImy_output\fP file. -. -. -. -.SS Signal Propagation -. -When prun receives a SIGTERM and SIGINT, it will attempt to kill -the entire job by sending all processes in the job a SIGTERM, waiting -a small number of seconds, then sending all processes in the job a -SIGKILL. -. -.PP -SIGUSR1 and SIGUSR2 signals received by prun are propagated to -all processes in the job. -. -.PP -A SIGTSTOP signal to prun will cause a SIGSTOP signal to be sent -to all of the programs started by prun and likewise a SIGCONT signal -to prun will cause a SIGCONT sent. -. -.PP -Other signals are not currently propagated -by prun. -. -. -.SS Process Termination / Signal Handling -. -During the run of an application, if any process dies abnormally -(either exiting before invoking \fIPMIx_Finalize\fP, or dying as the result of a -signal), \fIprun\fP will print out an error message and kill the rest of the -application. -.PP -. -. -.SS Process Environment -. -Processes in the application inherit their environment from the -PSRVR daemon upon the node on which they are running. The -environment is typically inherited from the user's shell. On remote -nodes, the exact environment is determined by the boot MCA module -used. The \fIrsh\fR launch module, for example, uses either -\fIrsh\fR/\fIssh\fR to launch the PSRVR daemon on remote nodes, and -typically executes one or more of the user's shell-setup files before -launching the daemon. When running dynamically linked -applications which require the \fILD_LIBRARY_PATH\fR environment -variable to be set, care must be taken to ensure that it is correctly -set when booting PSRVR. -.PP -See the "Remote Execution" section for more details. -. -. -.SS Remote Execution -. -PSRVR requires that the \fIPATH\fR environment variable be set to -find executables on remote nodes (this is typically only necessary in -\fIrsh\fR- or \fIssh\fR-based environments -- batch/scheduled -environments typically copy the current environment to the execution -of remote jobs, so if the current environment has \fIPATH\fR and/or -\fILD_LIBRARY_PATH\fR set properly, the remote nodes will also have it -set properly). If PSRVR was compiled with shared library support, -it may also be necessary to have the \fILD_LIBRARY_PATH\fR environment -variable set on remote nodes as well (especially to find the shared -libraries required to run user applications). -.PP -However, it is not always desirable or possible to edit shell -startup files to set \fIPATH\fR and/or \fILD_LIBRARY_PATH\fR. The -\fI--prefix\fR option is provided for some simple configurations where -this is not possible. -.PP -The \fI--prefix\fR option takes a single argument: the base directory -on the remote node where PSRVR is installed. PSRVR will use -this directory to set the remote \fIPATH\fR and \fILD_LIBRARY_PATH\fR -before executing any user applications. This allows -running jobs without having pre-configured the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote nodes. -.PP -PSRVR adds the basename of the current -node's "bindir" (the directory where PSRVR's executables are -installed) to the prefix and uses that to set the \fIPATH\fR on the -remote node. Similarly, PSRVR adds the basename of the current -node's "libdir" (the directory where PSRVR's libraries are -installed) to the prefix and uses that to set the -\fILD_LIBRARY_PATH\fR on the remote node. For example: -.TP 15 -Local bindir: -/local/node/directory/bin -.TP -Local libdir: -/local/node/directory/lib64 -.PP -If the following command line is used: - - \fB%\fP prun --prefix /remote/node/directory - -PSRVR will add "/remote/node/directory/bin" to the \fIPATH\fR -and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the -remote node before attempting to execute anything. -.PP -The \fI--prefix\fR option is not sufficient if the installation paths -on the remote node are different than the local node (e.g., if "/lib" -is used on the local node, but "/lib64" is used on the remote node), -or if the installation paths are something other than a subdirectory -under a common prefix. -.PP -Note that executing \fIprun\fR via an absolute pathname is -equivalent to specifying \fI--prefix\fR without the last subdirectory -in the absolute pathname to \fIprun\fR. For example: - - \fB%\fP /usr/local/bin/prun ... - -is equivalent to - - \fB%\fP prun --prefix /usr/local -. -. -. -.SS Exported Environment Variables -. -All environment variables that are named in the form PMIX_* will automatically -be exported to new processes on the local and remote nodes. Environmental -parameters can also be set/forwarded to the new processes using the MCA -parameter \fImca_base_env_list\fP. While the syntax of the \fI\-x\fP option and MCA param -allows the definition of new variables, note that the parser -for these options are currently not very sophisticated - it does not even -understand quoted values. Users are advised to set variables in the -environment and use the option to export them; not to define them. -. -. -. -.SS Setting MCA Parameters -. -The \fI-pmca\fP switch allows the passing of parameters to various MCA -(Modular Component Architecture) modules. -.\" PSRVR's MCA modules are described in detail in psrvrmca(7). -MCA modules have direct impact on programs because they allow tunable -parameters to be set at run time (such as which BTL communication device driver -to use, what parameters to pass to that BTL, etc.). -.PP -The \fI-pmca\fP switch takes two arguments: \fI\fP and \fI\fP. -The \fI\fP argument generally specifies which MCA module will receive the value. -For example, the \fI\fP "btl" is used to select which BTL to be used for -transporting messages. The \fI\fP argument is the value that is -passed. -For example: -. -.TP 4 -prun -pmca btl tcp,self -np 1 foo -Tells PSRVR to use the "tcp" and "self" BTLs, and to run a single copy of -"foo" on an allocated node. -. -.TP -prun -pmca btl self -np 1 foo -Tells PSRVR to use the "self" BTL, and to run a single copy of "foo" on an -allocated node. -.\" And so on. PSRVR's BTL MCA modules are described in psrvrmca_btl(7). -.PP -The \fI-pmca\fP switch can be used multiple times to specify different -\fI\fP and/or \fI\fP arguments. If the same \fI\fP is -specified more than once, the \fI\fPs are concatenated with a comma -(",") separating them. -.PP -Note that the \fI-pmca\fP switch is simply a shortcut for setting environment variables. -The same effect may be accomplished by setting corresponding environment -variables before running \fIprun\fP. -The form of the environment variables that PSRVR sets is: - - PMIX_MCA_= -.PP -Thus, the \fI-pmca\fP switch overrides any previously set environment -variables. The \fI-pmca\fP settings similarly override MCA parameters set -in the -$OPAL_PREFIX/etc/psrvr-mca-params.conf or $HOME/.psrvr/mca-params.conf -file. -. -.PP -Unknown \fI\fP arguments are still set as -environment variable -- they are not checked (by \fIprun\fP) for correctness. -Illegal or incorrect \fI\fP arguments may or may not be reported -- it -depends on the specific MCA module. -.PP -To find the available component types under the MCA architecture, or to find the -available parameters for a specific component, use the \fIpinfo\fP command. -See the \fIpinfo(1)\fP man page for detailed information on the command. -. -. -. -.SS Setting MCA parameters and environment variables from file. -The \fI-tune\fP command line option and its synonym \fI-pmca mca_base_envar_file_prefix\fP allows a user -to set mca parameters and environment variables with the syntax described below. -This option requires a single file or list of files separated by "," to follow. -.PP -A valid line in the file may contain zero or many "-x", "-pmca", or “--pmca” arguments. -The following patterns are supported: -pmca var val -pmca var "val" -x var=val -x var. -If any argument is duplicated in the file, the last value read will be used. -.PP -MCA parameters and environment specified on the command line have higher precedence than variables specified in the file. -. -. -. -.SS Running as root -. -The PSRVR team strongly advises against executing -.I prun -as the root user. Applications should be run as regular -(non-root) users. -. -.PP -Reflecting this advice, prun will refuse to run as root by default. -To override this default, you can add the -.I --allow-run-as-root -option to the -.I prun -command line. -. -.SS Exit status -. -There is no standard definition for what \fIprun\fP should return as an exit -status. After considerable discussion, we settled on the following method for -assigning the \fIprun\fP exit status (note: in the following description, -the "primary" job is the initial application started by prun - all jobs that -are spawned by that job are designated "secondary" jobs): -. -.IP \[bu] 2 -if all processes in the primary job normally terminate with exit status 0, we return 0 -.IP \[bu] -if one or more processes in the primary job normally terminate with non-zero exit status, -we return the exit status of the process with the lowest rank to have a non-zero status -.IP \[bu] -if all processes in the primary job normally terminate with exit status 0, and one or more -processes in a secondary job normally terminate with non-zero exit status, we (a) return -the exit status of the process with the lowest rank in the lowest jobid to have a non-zero -status, and (b) output a message summarizing the exit status of the primary and all secondary jobs. -.IP \[bu] -if the cmd line option --report-child-jobs-separately is set, we will return -only- the -exit status of the primary job. Any non-zero exit status in secondary jobs will be -reported solely in a summary print statement. -. -.PP -By default, PSRVR records and notes that processes exited with non-zero termination status. -This is generally not considered an "abnormal termination" - i.e., PSRVR will not abort a -job if one or more processes return a non-zero status. Instead, the default behavior simply -reports the number of processes terminating with non-zero status upon completion of the job. -.PP -However, in some cases it can be desirable to have the job abort when any process terminates -with non-zero status. For example, a non-PMIx job might detect a bad result from a calculation -and want to abort, but doesn't want to generate a core file. Or a PMIx job might continue past -a call to PMIx_Finalize, but indicate that all processes should abort due to some post-PMIx result. -.PP -It is not anticipated that this situation will occur frequently. However, in the interest of -serving the broader community, PSRVR now has a means for allowing users to direct that jobs be -aborted upon any process exiting with non-zero status. Setting the MCA parameter -"orte_abort_on_non_zero_status" to 1 will cause PSRVR to abort all processes once any process - exits with non-zero status. -.PP -Terminations caused in this manner will be reported on the console as an "abnormal termination", -with the first process to so exit identified along with its exit status. -.PP -.\" ************************** -.\" Return Value Section -.\" ************************** -. -.SH RETURN VALUE -. -\fIprun\fP returns 0 if all processes started by \fIprun\fP exit after calling -PMIx_Finalize. A non-zero value is returned if an internal error occurred in -prun, or one or more processes exited before calling PMIx_Finalize. If an -internal error occurred in prun, the corresponding error code is returned. -In the event that one or more processes exit before calling PMIx_Finalize, the -return value of the rank of the process that \fIprun\fP first notices died -before calling PMIx_Finalize will be returned. Note that, in general, this will -be the first process that died but is not guaranteed to be so. -. -.PP -If the -.B --timeout -command line option is used and the timeout expires before the job -completes (thereby forcing -.I prun -to kill the job) -.I prun -will return an exit status equivalent to the value of -.B ETIMEDOUT -(which is typically 110 on Linux and OS X systems). - -. -.\" ************************** -.\" See Also Section -.\" ************************** -. diff --git a/orte/tools/ompi-prun/prun b/orte/tools/ompi-prun/prun deleted file mode 100755 index 87fad39fec8..00000000000 --- a/orte/tools/ompi-prun/prun +++ /dev/null @@ -1,228 +0,0 @@ -#! /bin/sh - -# prun - temporary wrapper script for .libs/prun -# Generated by libtool (GNU libtool) 2.4.6 -# -# The prun program cannot be directly executed until all the libtool -# libraries that it depends on are installed. -# -# This wrapper script should never be moved out of the build directory. -# If it is, it will not operate correctly. - -# Sed substitution that helps us do robust quoting. It backslashifies -# metacharacters that are still active within double-quoted strings. -sed_quote_subst='s|\([`"$\\]\)|\\\1|g' - -# Be Bourne compatible -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then - emulate sh - NULLCMD=: - # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac -fi -BIN_SH=xpg4; export BIN_SH # for Tru64 -DUALCASE=1; export DUALCASE # for MKS sh - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH - -relink_command="(cd /home/common/openmpi/foobar/orte/tools/prun; LIBRARY_PATH=/opt/local/lib; export LIBRARY_PATH; { test -z \"\${COMPILER_PATH+set}\" || unset COMPILER_PATH || { COMPILER_PATH=; export COMPILER_PATH; }; }; { test -z \"\${GCC_EXEC_PREFIX+set}\" || unset GCC_EXEC_PREFIX || { GCC_EXEC_PREFIX=; export GCC_EXEC_PREFIX; }; }; { test -z \"\${LD_RUN_PATH+set}\" || unset LD_RUN_PATH || { LD_RUN_PATH=; export LD_RUN_PATH; }; }; LD_LIBRARY_PATH=/home/common/openmpi/build/foobar/lib:/home/common/local/lib:/home/common/pmix/build/prrte/lib; export LD_LIBRARY_PATH; PATH=/home/common/openmpi/build/foobar/bin:/home/common/local/bin:/home/common/pmix/build/prrte/bin:/home/common/local/sbin:/usr/lib64/qt-3.3/bin:/home/rhc/perl5/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/home/rhc/.local/bin:/home/rhc/bin; export PATH; gcc -Wall -Wundef -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wstrict-prototypes -Wcomment -pedantic -Werror-implicit-function-declaration -fno-strict-aliasing -mcx16 -pthread -g -o \$progdir/\$file main.o prun.o ../../../orte/.libs/libopen-rte.so /home/common/openmpi/foobar/opal/.libs/libopen-pal.so ../../../opal/.libs/libopen-pal.so -ldl -ludev -lrt -lm -lutil -lz -pthread -Wl,-rpath -Wl,/home/common/openmpi/foobar/orte/.libs -Wl,-rpath -Wl,/home/common/openmpi/foobar/opal/.libs -Wl,-rpath -Wl,/home/common/openmpi/build/foobar/lib)" - -# This environment variable determines our operation mode. -if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then - # install mode needs the following variables: - generated_by_libtool_version='2.4.6' - notinst_deplibs=' ../../../orte/libopen-rte.la /home/common/openmpi/foobar/opal/libopen-pal.la ../../../opal/libopen-pal.la' -else - # When we are sourced in execute mode, $file and $ECHO are already set. - if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then - file="$0" - -# A function that is used when there is no print builtin or printf. -func_fallback_echo () -{ - eval 'cat <<_LTECHO_EOF -$1 -_LTECHO_EOF' -} - ECHO="printf %s\\n" - fi - -# Very basic option parsing. These options are (a) specific to -# the libtool wrapper, (b) are identical between the wrapper -# /script/ and the wrapper /executable/ that is used only on -# windows platforms, and (c) all begin with the string --lt- -# (application programs are unlikely to have options that match -# this pattern). -# -# There are only two supported options: --lt-debug and -# --lt-dump-script. There is, deliberately, no --lt-help. -# -# The first argument to this parsing function should be the -# script's ../../../libtool value, followed by no. -lt_option_debug= -func_parse_lt_options () -{ - lt_script_arg0=$0 - shift - for lt_opt - do - case "$lt_opt" in - --lt-debug) lt_option_debug=1 ;; - --lt-dump-script) - lt_dump_D=`$ECHO "X$lt_script_arg0" | /usr/bin/sed -e 's/^X//' -e 's%/[^/]*$%%'` - test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=. - lt_dump_F=`$ECHO "X$lt_script_arg0" | /usr/bin/sed -e 's/^X//' -e 's%^.*/%%'` - cat "$lt_dump_D/$lt_dump_F" - exit 0 - ;; - --lt-*) - $ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2 - exit 1 - ;; - esac - done - - # Print the debug banner immediately: - if test -n "$lt_option_debug"; then - echo "prun:prun:$LINENO: libtool wrapper (GNU libtool) 2.4.6" 1>&2 - fi -} - -# Used when --lt-debug. Prints its arguments to stdout -# (redirection is the responsibility of the caller) -func_lt_dump_args () -{ - lt_dump_args_N=1; - for lt_arg - do - $ECHO "prun:prun:$LINENO: newargv[$lt_dump_args_N]: $lt_arg" - lt_dump_args_N=`expr $lt_dump_args_N + 1` - done -} - -# Core function for launching the target application -func_exec_program_core () -{ - - if test -n "$lt_option_debug"; then - $ECHO "prun:prun:$LINENO: newargv[0]: $progdir/$program" 1>&2 - func_lt_dump_args ${1+"$@"} 1>&2 - fi - exec "$progdir/$program" ${1+"$@"} - - $ECHO "$0: cannot exec $program $*" 1>&2 - exit 1 -} - -# A function to encapsulate launching the target application -# Strips options in the --lt-* namespace from $@ and -# launches target application with the remaining arguments. -func_exec_program () -{ - case " $* " in - *\ --lt-*) - for lt_wr_arg - do - case $lt_wr_arg in - --lt-*) ;; - *) set x "$@" "$lt_wr_arg"; shift;; - esac - shift - done ;; - esac - func_exec_program_core ${1+"$@"} -} - - # Parse options - func_parse_lt_options "$0" ${1+"$@"} - - # Find the directory that this script lives in. - thisdir=`$ECHO "$file" | /usr/bin/sed 's%/[^/]*$%%'` - test "x$thisdir" = "x$file" && thisdir=. - - # Follow symbolic links until we get to the real thisdir. - file=`ls -ld "$file" | /usr/bin/sed -n 's/.*-> //p'` - while test -n "$file"; do - destdir=`$ECHO "$file" | /usr/bin/sed 's%/[^/]*$%%'` - - # If there was a directory component, then change thisdir. - if test "x$destdir" != "x$file"; then - case "$destdir" in - [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;; - *) thisdir="$thisdir/$destdir" ;; - esac - fi - - file=`$ECHO "$file" | /usr/bin/sed 's%^.*/%%'` - file=`ls -ld "$thisdir/$file" | /usr/bin/sed -n 's/.*-> //p'` - done - - # Usually 'no', except on cygwin/mingw when embedded into - # the cwrapper. - WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=no - if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then - # special case for '.' - if test "$thisdir" = "."; then - thisdir=`pwd` - fi - # remove .libs from thisdir - case "$thisdir" in - *[\\/].libs ) thisdir=`$ECHO "$thisdir" | /usr/bin/sed 's%[\\/][^\\/]*$%%'` ;; - .libs ) thisdir=. ;; - esac - fi - - # Try to get the absolute directory name. - absdir=`cd "$thisdir" && pwd` - test -n "$absdir" && thisdir="$absdir" - - program=lt-'prun' - progdir="$thisdir/.libs" - - if test ! -f "$progdir/$program" || - { file=`ls -1dt "$progdir/$program" "$progdir/../$program" 2>/dev/null | /usr/bin/sed 1q`; \ - test "X$file" != "X$progdir/$program"; }; then - - file="$$-$program" - - if test ! -d "$progdir"; then - mkdir "$progdir" - else - rm -f "$progdir/$file" - fi - - # relink executable if necessary - if test -n "$relink_command"; then - if relink_command_output=`eval $relink_command 2>&1`; then : - else - $ECHO "$relink_command_output" >&2 - rm -f "$progdir/$file" - exit 1 - fi - fi - - mv -f "$progdir/$file" "$progdir/$program" 2>/dev/null || - { rm -f "$progdir/$program"; - mv -f "$progdir/$file" "$progdir/$program"; } - rm -f "$progdir/$file" - fi - - if test -f "$progdir/$program"; then - if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then - # Run the actual program with our arguments. - func_exec_program ${1+"$@"} - fi - else - # The program doesn't exist. - $ECHO "$0: error: '$progdir/$program' does not exist" 1>&2 - $ECHO "This script is just a wrapper for $program." 1>&2 - $ECHO "See the libtool documentation for more information." 1>&2 - exit 1 - fi -fi diff --git a/orte/tools/ompi-prun/prun.1 b/orte/tools/ompi-prun/prun.1 deleted file mode 100644 index 74ce2294db0..00000000000 --- a/orte/tools/ompi-prun/prun.1 +++ /dev/null @@ -1,1597 +0,0 @@ -.\" -*- nroff -*- -.\" Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\" Copyright (c) 2017-2018 Intel, Inc. All rights reserved. -.\" Copyright (c) 2017 Los Alamos National Security, LLC. All rights -.\" reserved. -.\" $COPYRIGHT$ -.\" -.\" Man page for PSRVR's prun command -.\" -.\" .TH name section center-footer left-footer center-header -.TH PRUN 1 "Unreleased developer copy" "gitclone" "Open MPI" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -prun \- Execute serial and parallel jobs with the PMIx Reference Server. - -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.PP -Single Process Multiple Data (SPMD) Model: - -.B prun -[ options ] -.B -[ ] -.P - -Multiple Instruction Multiple Data (MIMD) Model: - -.B prun -[ global_options ] - [ local_options1 ] -.B -[ ] : - [ local_options2 ] -.B -[ ] : - ... : - [ local_optionsN ] -.B -[ ] -.P - -Note that in both models, invoking \fIprun\fP via an absolute path -name is equivalent to specifying the \fI--prefix\fP option with a -\fI\fR value equivalent to the directory where \fIprun\fR -resides, minus its last subdirectory. For example: - - \fB%\fP /usr/local/bin/prun ... - -is equivalent to - - \fB%\fP prun --prefix /usr/local - -. -.\" ************************** -.\" Quick Summary Section -.\" ************************** -.SH QUICK SUMMARY -. -If you are simply looking for how to run an application, you -probably want to use a command line of the following form: - - \fB%\fP prun [ -np X ] [ --hostfile ] - -This will run X copies of \fI\fR in your current run-time -environment (if running under a supported resource manager, PSRVR's -\fIprun\fR will usually automatically use the corresponding resource manager -process starter, as opposed to, for example, \fIrsh\fR or \fIssh\fR, -which require the use of a hostfile, or will default to running all X -copies on the localhost), scheduling (by default) in a round-robin fashion by -CPU slot. See the rest of this page for more details. -.P -Please note that prun automatically binds processes. Three binding patterns are used in the absence of any further directives: -.TP 18 -.B Bind to core: -when the number of processes is <= 2 -. -. -.TP -.B Bind to socket: -when the number of processes is > 2 -. -. -.TP -.B Bind to none: -when oversubscribed -. -. -.P -If your application uses threads, then you probably want to ensure that you are -either not bound at all (by specifying --bind-to none), or bound to multiple cores -using an appropriate binding level or specific number of processing elements per -application process. -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH OPTIONS -. -.I prun -will send the name of the directory where it was invoked on the local -node to each of the remote nodes, and attempt to change to that -directory. See the "Current Working Directory" section below for further -details. -.\" -.\" Start options listing -.\" Indent 10 characters from start of first column to start of second column -.TP 10 -.B -The program executable. This is identified as the first non-recognized argument -to prun. -. -. -.TP -.B -Pass these run-time arguments to every new process. These must always -be the last arguments to \fIprun\fP. If an app context file is used, -\fI\fP will be ignored. -. -. -.TP -.B -h\fR,\fP --help -Display help for this command -. -. -.TP -.B -q\fR,\fP --quiet -Suppress informative messages from prun during application execution. -. -. -.TP -.B -v\fR,\fP --verbose -Be verbose -. -. -.TP -.B -V\fR,\fP --version -Print version number. If no other arguments are given, this will also -cause prun to exit. -. -. -.TP -.B -N \fR\fP -.br -Launch num processes per node on all allocated nodes (synonym for npernode). -. -. -. -.TP -.B -display-map\fR,\fP --display-map -Display a table showing the mapped location of each process prior to launch. -. -. -. -.TP -.B -display-allocation\fR,\fP --display-allocation -Display the detected resource allocation. -. -. -. -.TP -.B -output-proctable\fR,\fP --output-proctable -Output the debugger proctable after launch. -. -. -. -.TP -.B -max-vm-size\fR,\fP --max-vm-size \fR\fP -Number of processes to run. -. -. -. -.TP -.B -novm\fR,\fP --novm -Execute without creating an allocation-spanning virtual machine (only start -daemons on nodes hosting application procs). -. -. -. -.TP -.B -hnp\fR,\fP --hnp \fR\fP -Specify the URI of the \fRpsrvr\fP process, or the name of the file (specified as -file:filename) that contains that info. -. -. -. -.P -Use one of the following options to specify which hosts (nodes) within the \fRpsrvr\fP to run on. -. -. -.TP -.B -H\fR,\fP -host\fR,\fP --host \fR\fP -List of hosts on which to invoke processes. -. -. -.TP -.B -hostfile\fR,\fP --hostfile \fR\fP -Provide a hostfile to use. -.\" JJH - Should have man page for how to format a hostfile properly. -. -. -.TP -.B -default-hostfile\fR,\fP --default-hostfile \fR\fP -Provide a default hostfile. -. -. -.TP -.B -machinefile\fR,\fP --machinefile \fR\fP -Synonym for \fI-hostfile\fP. -. -. -. -. -.TP -.B -cpu-set\fR,\fP --cpu-set \fR\fP -Restrict launched processes to the specified logical cpus on each node (comma-separated -list). Note that the binding options will still apply within the specified envelope - e.g., -you can elect to bind each process to only one cpu within the specified cpu set. -. -. -. -.P -The following options specify the number of processes to launch. Note that none -of the options imply a particular binding policy - e.g., requesting N processes -for each socket does not imply that the processes will be bound to the socket. -. -. -.TP -.B -c\fR,\fP -n\fR,\fP --n\fR,\fP -np \fR<#>\fP -Run this many copies of the program on the given nodes. This option -indicates that the specified file is an executable program and not an -application context. If no value is provided for the number of copies to -execute (i.e., neither the "-np" nor its synonyms are provided on the command -line), prun will automatically execute a copy of the program on -each process slot (see below for description of a "process slot"). This -feature, however, can only be used in the SPMD model and will return an -error (without beginning execution of the application) otherwise. -. -. -.TP -.B —map-by ppr:N: -Launch N times the number of objects of the specified type on each node. -. -. -.TP -.B -npersocket\fR,\fP --npersocket \fR<#persocket>\fP -On each node, launch this many processes times the number of processor -sockets on the node. -The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option. -(deprecated in favor of --map-by ppr:n:socket) -. -. -.TP -.B -npernode\fR,\fP --npernode \fR<#pernode>\fP -On each node, launch this many processes. -(deprecated in favor of --map-by ppr:n:node) -. -. -.TP -.B -pernode\fR,\fP --pernode -On each node, launch one process -- equivalent to \fI-npernode\fP 1. -(deprecated in favor of --map-by ppr:1:node) -. -. -. -. -.P -To map processes: -. -. -.TP -.B --map-by \fR\fP -Map to the specified object, defaults to \fIsocket\fP. Supported options -include slot, hwthread, core, L1cache, L2cache, L3cache, socket, numa, -board, node, sequential, distance, and ppr. Any object can include -modifiers by adding a \fR:\fP and any combination of PE=n (bind n -processing elements to each proc), SPAN (load -balance the processes across the allocation), OVERSUBSCRIBE (allow -more processes on a node than processing elements), and NOOVERSUBSCRIBE. -This includes PPR, where the pattern would be terminated by another colon -to separate it from the modifiers. -. -.TP -.B -bycore\fR,\fP --bycore -Map processes by core (deprecated in favor of --map-by core) -. -.TP -.B -byslot\fR,\fP --byslot -Map and rank processes round-robin by slot. -. -.TP -.B -nolocal\fR,\fP --nolocal -Do not run any copies of the launched application on the same node as -prun is running. This option will override listing the localhost -with \fB--host\fR or any other host-specifying mechanism. -. -.TP -.B -nooversubscribe\fR,\fP --nooversubscribe -Do not oversubscribe any nodes; error (without starting any processes) -if the requested number of processes would cause oversubscription. -This option implicitly sets "max_slots" equal to the "slots" value for -each node. (Enabled by default). -. -.TP -.B -oversubscribe\fR,\fP --oversubscribe -Nodes are allowed to be oversubscribed, even on a managed system, and -overloading of processing elements. -. -.TP -.B -bynode\fR,\fP --bynode -Launch processes one per node, cycling by node in a round-robin -fashion. This spreads processes evenly among nodes and assigns -ranks in a round-robin, "by node" manner. -. -.TP -.B -cpu-list\fR,\fP --cpu-list \fR\fP -List of processor IDs to bind processes to [default=NULL]. -. -. -. -. -.P -To order processes' ranks: -. -. -.TP -.B --rank-by \fR\fP -Rank in round-robin fashion according to the specified object, -defaults to \fIslot\fP. Supported options -include slot, hwthread, core, L1cache, L2cache, L3cache, -socket, numa, board, and node. -. -. -. -. -.P -For process binding: -. -.TP -.B --bind-to \fR\fP -Bind processes to the specified object, defaults to \fIcore\fP. Supported options -include slot, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board, and none. -. -.TP -.B -cpus-per-proc\fR,\fP --cpus-per-proc \fR<#perproc>\fP -Bind each process to the specified number of cpus. -(deprecated in favor of --map-by :PE=n) -. -.TP -.B -cpus-per-rank\fR,\fP --cpus-per-rank \fR<#perrank>\fP -Alias for \fI-cpus-per-proc\fP. -(deprecated in favor of --map-by :PE=n) -. -.TP -.B -bind-to-core\fR,\fP --bind-to-core -Bind processes to cores (deprecated in favor of --bind-to core) -. -.TP -.B -bind-to-socket\fR,\fP --bind-to-socket -Bind processes to processor sockets (deprecated in favor of --bind-to socket) -. -.TP -.B -report-bindings\fR,\fP --report-bindings -Report any bindings for launched processes. -. -. -. -. -.P -For rankfiles: -. -. -.TP -.B -rf\fR,\fP --rankfile \fR\fP -Provide a rankfile file. -. -. -. -. -.P -To manage standard I/O: -. -. -.TP -.B -output-filename\fR,\fP --output-filename \fR\fP -Redirect the stdout, stderr, and stddiag of all processes to a process-unique version of -the specified filename. Any directories in the filename will automatically be created. -Each output file will consist of filename.id, where the id will be the -processes' rank, left-filled with -zero's for correct ordering in listings. -. -. -.TP -.B -stdin\fR,\fP --stdin\fR \fP -The rank of the process that is to receive stdin. The -default is to forward stdin to rank 0, but this option -can be used to forward stdin to any process. It is also acceptable to -specify \fInone\fP, indicating that no processes are to receive stdin. -. -. -.TP -.B -merge-stderr-to-stdout\fR,\fP --merge-stderr-to-stdout -Merge stderr to stdout for each process. -. -. -.TP -.B -tag-output\fR,\fP --tag-output -Tag each line of output to stdout, stderr, and stddiag with \fB[jobid, MCW_rank]\fP -indicating the process jobid and rank of the process that generated the output, -and the channel which generated it. -. -. -.TP -.B -timestamp-output\fR,\fP --timestamp-output -Timestamp each line of output to stdout, stderr, and stddiag. -. -. -.TP -.B -xml\fR,\fP --xml -Provide all output to stdout, stderr, and stddiag in an xml format. -. -. -.TP -.B -xml-file\fR,\fP --xml-file \fR\fP -Provide all output in XML format to the specified file. -. -. -.TP -.B -xterm\fR,\fP --xterm \fR\fP -Display the output from the processes identified by their ranks in separate xterm windows. The ranks are specified -as a comma-separated list of ranges, with a -1 indicating all. A separate -window will be created for each specified process. -.B Note: -xterm will normally terminate the window upon termination of the process running -within it. However, by adding a "!" to the end of the list of specified ranks, -the proper options will be provided to ensure that xterm keeps the window open -\fIafter\fP the process terminates, thus allowing you to see the process' output. -Each xterm window will subsequently need to be manually closed. -.B Note: -In some environments, xterm may require that the executable be in the user's -path, or be specified in absolute or relative terms. Thus, it may be necessary -to specify a local executable as "./foo" instead of just "foo". If xterm fails to -find the executable, prun will hang, but still respond correctly to a ctrl-c. -If this happens, please check that the executable is being specified correctly -and try again. -. -. -. -. -.P -To manage files and runtime environment: -. -. -.TP -.B -path\fR,\fP --path \fR\fP - that will be used when attempting to locate the requested -executables. This is used prior to using the local PATH setting. -. -. -.TP -.B --prefix \fR\fP -Prefix directory that will be used to set the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote node before invoking -the target process. See the "Remote Execution" section, below. -. -. -.TP -.B --noprefix -Disable the automatic --prefix behavior -. -. -.TP -.B -s\fR,\fP --preload-binary -Copy the specified executable(s) to remote machines prior to starting remote processes. The -executables will be copied to the session directory and will be deleted upon -completion of the job. -. -. -.TP -.B --preload-files \fR\fP -Preload the comma separated list of files to the current working directory of the remote -machines where processes will be launched prior to starting those processes. -. -. -.TP -.B -set-cwd-to-session-dir\fR,\fP --set-cwd-to-session-dir -Set the working directory of the started processes to their session directory. -. -. -.TP -.B -wd \fR\fP -Synonym for \fI-wdir\fP. -. -. -.TP -.B -wdir \fR\fP -Change to the directory before the user's program executes. -See the "Current Working Directory" section for notes on relative paths. -.B Note: -If the \fI-wdir\fP option appears both on the command line and in an -application context, the context will take precedence over the command -line. Thus, if the path to the desired wdir is different -on the backend nodes, then it must be specified as an absolute path that -is correct for the backend node. -. -. -.TP -.B -x \fR\fP -Export the specified environment variables to the remote nodes before -executing the program. Only one environment variable can be specified -per \fI-x\fP option. Existing environment variables can be specified -or new variable names specified with corresponding values. For -example: - \fB%\fP prun -x DISPLAY -x OFILE=/tmp/out ... - -The parser for the \fI-x\fP option is not very sophisticated; it does -not even understand quoted values. Users are advised to set variables -in the environment, and then use \fI-x\fP to export (not define) them. -. -. -. -. -.P -Setting MCA parameters: -. -. -.TP -.B -gpmca\fR,\fP --gpmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -pmca\fR,\fP --pmca \fR \fP -Send arguments to various MCA modules. See the "MCA" section, below. -. -. -.TP -.B -am \fR\fP -Aggregate MCA parameter set file list. -. -. -.TP -.B -tune\fR,\fP --tune \fR\fP -Specify a tune file to set arguments for various MCA modules and environment variables. -See the "Setting MCA parameters and environment variables from file" section, below. -. -. -. -. -.P -For debugging: -. -. -.TP -.B -debug\fR,\fP --debug -Invoke the user-level debugger indicated by the \fIorte_base_user_debugger\fP -MCA parameter. -. -. -.TP -.B --get-stack-traces -When paired with the -.B --timeout -option, -.I prun -will obtain and print out stack traces from all launched processes -that are still alive when the timeout expires. Note that obtaining -stack traces can take a little time and produce a lot of output, -especially for large process-count jobs. -. -. -.TP -.B -debugger\fR,\fP --debugger \fR\fP -Sequence of debuggers to search for when \fI--debug\fP is used (i.e. -a synonym for \fIorte_base_user_debugger\fP MCA parameter). -. -. -.TP -.B --timeout \fR -The maximum number of seconds that -.I prun -will run. After this many seconds, -.I prun -will abort the launched job and exit with a non-zero exit status. -Using -.B --timeout -can be also useful when combined with the -.B --get-stack-traces -option. -. -. -.TP -.B -tv\fR,\fP --tv -Launch processes under the TotalView debugger. -Deprecated backwards compatibility flag. Synonym for \fI--debug\fP. -. -. -. -. -.P -There are also other options: -. -. -.TP -.B --allow-run-as-root -Allow -.I prun -to run when executed by the root user -.RI ( prun -defaults to aborting when launched as the root user). -. -. -.TP -.B --app \fR\fP -Provide an appfile, ignoring all other command line options. -. -. -.TP -.B -cf\fR,\fP --cartofile \fR\fP -Provide a cartography file. -. -. -.TP -.B -continuous\fR,\fP --continuous -Job is to run until explicitly terminated. -. -. -.TP -.B -disable-recovery\fR,\fP --disable-recovery -Disable recovery (resets all recovery options to off). -. -. -.TP -.B -do-not-launch\fR,\fP --do-not-launch -Perform all necessary operations to prepare to launch the application, but do not actually launch it. -. -. -.TP -.B -do-not-resolve\fR,\fP --do-not-resolve -Do not attempt to resolve interfaces. -. -. -.TP -.B -enable-recovery\fR,\fP --enable-recovery -Enable recovery from process failure [Default = disabled]. -. -. -.TP -.B -index-argv-by-rank\fR,\fP --index-argv-by-rank -Uniquely index argv[0] for each process using its rank. -. -. -.TP -.B -max-restarts\fR,\fP --max-restarts \fR\fP -Max number of times to restart a failed process. -. -. -.TP -.B --ppr \fR\fP -Comma-separated list of number of processes on a given resource type [default: none]. -. -. -.TP -.B -report-child-jobs-separately\fR,\fP --report-child-jobs-separately -Return the exit status of the primary job only. -. -. -.TP -.B -report-events\fR,\fP --report-events \fR\fP -Report events to a tool listening at the specified URI. -. -. -.TP -.B -report-pid\fR,\fP --report-pid \fR\fP -Print out prun's PID during startup. The channel must be either a '-' to indicate -that the pid is to be output to stdout, a '+' to indicate that the pid is to be -output to stderr, or a filename to which the pid is to be written. -. -. -.TP -.B -report-uri\fR,\fP --report-uri \fR\fP -Print out prun's URI during startup. The channel must be either a '-' to indicate -that the URI is to be output to stdout, a '+' to indicate that the URI is to be -output to stderr, or a filename to which the URI is to be written. -. -. -.TP -.B -show-progress\fR,\fP --show-progress -Output a brief periodic report on launch progress. -. -. -.TP -.B -terminate\fR,\fP --terminate -Terminate the DVM. -. -. -.TP -.B -use-hwthread-cpus\fR,\fP --use-hwthread-cpus -Use hardware threads as independent cpus. -. -. -.TP -.B -use-regexp\fR,\fP --use-regexp -Use regular expressions for launch. -. -. -. -. -.P -The following options are useful for developers; they are not generally -useful to most users: -. -.TP -.B -d\fR,\fP --debug-devel -Enable debugging. This is not generally useful for most users. -. -. -.TP -.B -display-devel-allocation\fR,\fP --display-devel-allocation -Display a detailed list of the allocation being used by this job. -. -. -.TP -.B -display-devel-map\fR,\fP --display-devel-map -Display a more detailed table showing the mapped location of each process prior to launch. -. -. -.TP -.B -display-diffable-map\fR,\fP --display-diffable-map -Display a diffable process map just before launch. -. -. -.TP -.B -display-topo\fR,\fP --display-topo -Display the topology as part of the process map just before launch. -. -. -.TP -.B --report-state-on-timeout -When paired with the -.B --timeout -command line option, report the run-time subsystem state of each -process when the timeout expires. -. -. -.P -There may be other options listed with \fIprun --help\fP. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -One invocation of \fIprun\fP starts an application running under PSRVR. If the application is single process multiple data (SPMD), the application -can be specified on the \fIprun\fP command line. - -If the application is multiple instruction multiple data (MIMD), comprising of -multiple programs, the set of programs and argument can be specified in one of -two ways: Extended Command Line Arguments, and Application Context. -.PP -An application context describes the MIMD program set including all arguments -in a separate file. -.\" See appcontext(5) for a description of the application context syntax. -This file essentially contains multiple \fIprun\fP command lines, less the -command name itself. The ability to specify different options for different -instantiations of a program is another reason to use an application context. -.PP -Extended command line arguments allow for the description of the application -layout on the command line using colons (\fI:\fP) to separate the specification -of programs and arguments. Some options are globally set across all specified -programs (e.g. --hostfile), while others are specific to a single program -(e.g. -np). -. -. -. -.SS Specifying Host Nodes -. -Host nodes can be identified on the \fIprun\fP command line with the \fI-host\fP -option or in a hostfile. -. -.PP -For example, -. -.TP 4 -prun -H aa,aa,bb ./a.out -launches two processes on node aa and one on bb. -. -.PP -Or, consider the hostfile -. - - \fB%\fP cat myhostfile - aa slots=2 - bb slots=2 - cc slots=2 - -. -.PP -Here, we list both the host names (aa, bb, and cc) but also how many "slots" -there are for each. Slots indicate how many processes can potentially execute -on a node. For best performance, the number of slots may be chosen to be the -number of cores on the node or the number of processor sockets. If the hostfile -does not provide slots information, PSRVR will attempt to discover the number -of cores (or hwthreads, if the use-hwthreads-as-cpus option is set) and set the -number of slots to that value. This default behavior also occurs when specifying -the \fI-host\fP option with a single hostname. Thus, the command -. -.TP 4 -prun -H aa ./a.out -launches a number of processes equal to the number of cores on node aa. -. -.PP -. -.TP 4 -prun -hostfile myhostfile ./a.out -will launch two processes on each of the three nodes. -. -.TP 4 -prun -hostfile myhostfile -host aa ./a.out -will launch two processes, both on node aa. -. -.TP 4 -prun -hostfile myhostfile -host dd ./a.out -will find no hosts to run on and abort with an error. -That is, the specified host dd is not in the specified hostfile. -. -.PP -When running under resource managers (e.g., SLURM, Torque, etc.), -PSRVR will obtain both the hostnames and the number of slots directly -from the resource manger. -. -.SS Specifying Number of Processes -. -As we have just seen, the number of processes to run can be set using the -hostfile. Other mechanisms exist. -. -.PP -The number of processes launched can be specified as a multiple of the -number of nodes or processor sockets available. For example, -. -.TP 4 -prun -H aa,bb -npersocket 2 ./a.out -launches processes 0-3 on node aa and process 4-7 on node bb, -where aa and bb are both dual-socket nodes. -The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option, -which is discussed in a later section. -. -.TP 4 -prun -H aa,bb -npernode 2 ./a.out -launches processes 0-1 on node aa and processes 2-3 on node bb. -. -.TP 4 -prun -H aa,bb -npernode 1 ./a.out -launches one process per host node. -. -.TP 4 -prun -H aa,bb -pernode ./a.out -is the same as \fI-npernode\fP 1. -. -. -.PP -Another alternative is to specify the number of processes with the -\fI-np\fP option. Consider now the hostfile -. - - \fB%\fP cat myhostfile - aa slots=4 - bb slots=4 - cc slots=4 - -. -.PP -Now, -. -.TP 4 -prun -hostfile myhostfile -np 6 ./a.out -will launch processes 0-3 on node aa and processes 4-5 on node bb. The remaining -slots in the hostfile will not be used since the \fI-np\fP option indicated -that only 6 processes should be launched. -. -.SS Mapping Processes to Nodes: Using Policies -. -The examples above illustrate the default mapping of process processes -to nodes. This mapping can also be controlled with various -\fIprun\fP options that describe mapping policies. -. -. -.PP -Consider the same hostfile as above, again with \fI-np\fP 6: -. - - node aa node bb node cc - - prun 0 1 2 3 4 5 - - prun --map-by node 0 3 1 4 2 5 - - prun -nolocal 0 1 2 3 4 5 -. -.PP -The \fI--map-by node\fP option will load balance the processes across -the available nodes, numbering each process in a round-robin fashion. -. -.PP -The \fI-nolocal\fP option prevents any processes from being mapped onto the -local host (in this case node aa). While \fIprun\fP typically consumes -few system resources, \fI-nolocal\fP can be helpful for launching very -large jobs where \fIprun\fP may actually need to use noticeable amounts -of memory and/or processing time. -. -.PP -Just as \fI-np\fP can specify fewer processes than there are slots, it can -also oversubscribe the slots. For example, with the same hostfile: -. -.TP 4 -prun -hostfile myhostfile -np 14 ./a.out -will launch processes 0-3 on node aa, 4-7 on bb, and 8-11 on cc. It will -then add the remaining two processes to whichever nodes it chooses. -. -.PP -One can also specify limits to oversubscription. For example, with the same -hostfile: -. -.TP 4 -prun -hostfile myhostfile -np 14 -nooversubscribe ./a.out -will produce an error since \fI-nooversubscribe\fP prevents oversubscription. -. -.PP -Limits to oversubscription can also be specified in the hostfile itself: -. - % cat myhostfile - aa slots=4 max_slots=4 - bb max_slots=4 - cc slots=4 -. -.PP -The \fImax_slots\fP field specifies such a limit. When it does, the -\fIslots\fP value defaults to the limit. Now: -. -.TP 4 -prun -hostfile myhostfile -np 14 ./a.out -causes the first 12 processes to be launched as before, but the remaining -two processes will be forced onto node cc. The other two nodes are -protected by the hostfile against oversubscription by this job. -. -.PP -Using the \fI--nooversubscribe\fR option can be helpful since PSRVR -currently does not get "max_slots" values from the resource manager. -. -.PP -Of course, \fI-np\fP can also be used with the \fI-H\fP or \fI-host\fP -option. For example, -. -.TP 4 -prun -H aa,bb -np 8 ./a.out -launches 8 processes. Since only two hosts are specified, after the first -two processes are mapped, one to aa and one to bb, the remaining processes -oversubscribe the specified hosts. -. -.PP -And here is a MIMD example: -. -.TP 4 -prun -H aa -np 1 hostname : -H bb,cc -np 2 uptime -will launch process 0 running \fIhostname\fP on node aa and processes 1 and 2 -each running \fIuptime\fP on nodes bb and cc, respectively. -. -.SS Mapping, Ranking, and Binding: Oh My! -. -PSRVR employs a three-phase procedure for assigning process locations and -ranks: -. -.TP 10 -\fBmapping\fP -Assigns a default location to each process -. -.TP 10 -\fBranking\fP -Assigns a rank value to each process -. -.TP 10 -\fBbinding\fP -Constrains each process to run on specific processors -. -.PP -The \fImapping\fP step is used to assign a default location to each process -based on the mapper being employed. Mapping by slot, node, and sequentially results -in the assignment of the processes to the node level. In contrast, mapping by object, allows -the mapper to assign the process to an actual object on each node. -. -.PP -\fBNote:\fP the location assigned to the process is independent of where it will be bound - the -assignment is used solely as input to the binding algorithm. -. -.PP -The mapping of process processes to nodes can be defined not just -with general policies but also, if necessary, using arbitrary mappings -that cannot be described by a simple policy. One can use the "sequential -mapper," which reads the hostfile line by line, assigning processes -to nodes in whatever order the hostfile specifies. Use the -\fI-pmca rmaps seq\fP option. For example, using the same hostfile -as before: -. -.PP -prun -hostfile myhostfile -pmca rmaps seq ./a.out -. -.PP -will launch three processes, one on each of nodes aa, bb, and cc, respectively. -The slot counts don't matter; one process is launched per line on -whatever node is listed on the line. -. -.PP -Another way to specify arbitrary mappings is with a rankfile, which -gives you detailed control over process binding as well. Rankfiles -are discussed below. -. -.PP -The second phase focuses on the \fIranking\fP of the process within -the job. PSRVR -separates this from the mapping procedure to allow more flexibility in the -relative placement of processes. This is best illustrated by considering the -following two cases where we used the —map-by ppr:2:socket option: -. -.PP - node aa node bb - - rank-by core 0 1 ! 2 3 4 5 ! 6 7 - - rank-by socket 0 2 ! 1 3 4 6 ! 5 7 - - rank-by socket:span 0 4 ! 1 5 2 6 ! 3 7 -. -.PP -Ranking by core and by slot provide the identical result - a simple -progression of ranks across each node. Ranking by -socket does a round-robin ranking within each node until all processes -have been assigned a rank, and then progresses to the next -node. Adding the \fIspan\fP modifier to the ranking directive causes -the ranking algorithm to treat the entire allocation as a single -entity - thus, the MCW ranks are assigned across all sockets before -circling back around to the beginning. -. -.PP -The \fIbinding\fP phase actually binds each process to a given set of processors. This can -improve performance if the operating system is placing processes -suboptimally. For example, it might oversubscribe some multi-core -processor sockets, leaving other sockets idle; this can lead -processes to contend unnecessarily for common resources. Or, it -might spread processes out too widely; this can be suboptimal if -application performance is sensitive to interprocess communication -costs. Binding can also keep the operating system from migrating -processes excessively, regardless of how optimally those processes -were placed to begin with. -. -.PP -The processors to be used for binding can be identified in terms of -topological groupings - e.g., binding to an l3cache will bind each -process to all processors within the scope of a single L3 cache within -their assigned location. Thus, if a process is assigned by the mapper -to a certain socket, then a \fI—bind-to l3cache\fP directive will -cause the process to be bound to the processors that share a single L3 -cache within that socket. -. -.PP -To help balance loads, the binding directive uses a round-robin method when binding to -levels lower than used in the mapper. For example, consider the case where a job is -mapped to the socket level, and then bound to core. Each socket will have multiple cores, -so if multiple processes are mapped to a given socket, the binding algorithm will assign -each process located to a socket to a unique core in a round-robin manner. -. -.PP -Alternatively, processes mapped by l2cache and then bound to socket will simply be bound -to all the processors in the socket where they are located. In this manner, users can -exert detailed control over relative MCW rank location and binding. -. -.PP -Finally, \fI--report-bindings\fP can be used to report bindings. -. -.PP -As an example, consider a node with two processor sockets, each comprising -four cores. We run \fIprun\fP with \fI-np 4 --report-bindings\fP and -the following additional options: -. - - % prun ... --map-by core --bind-to core - [...] ... binding child [...,0] to cpus 0001 - [...] ... binding child [...,1] to cpus 0002 - [...] ... binding child [...,2] to cpus 0004 - [...] ... binding child [...,3] to cpus 0008 - - % prun ... --map-by socket --bind-to socket - [...] ... binding child [...,0] to socket 0 cpus 000f - [...] ... binding child [...,1] to socket 1 cpus 00f0 - [...] ... binding child [...,2] to socket 0 cpus 000f - [...] ... binding child [...,3] to socket 1 cpus 00f0 - - % prun ... --map-by core:PE=2 --bind-to core - [...] ... binding child [...,0] to cpus 0003 - [...] ... binding child [...,1] to cpus 000c - [...] ... binding child [...,2] to cpus 0030 - [...] ... binding child [...,3] to cpus 00c0 - - % prun ... --bind-to none -. -.PP -Here, \fI--report-bindings\fP shows the binding of each process as a mask. -In the first case, the processes bind to successive cores as indicated by -the masks 0001, 0002, 0004, and 0008. In the second case, processes bind -to all cores on successive sockets as indicated by the masks 000f and 00f0. -The processes cycle through the processor sockets in a round-robin fashion -as many times as are needed. In the third case, the masks show us that -2 cores have been bound per process. In the fourth case, binding is -turned off and no bindings are reported. -. -.PP -PSRVR's support for process binding depends on the underlying -operating system. Therefore, certain process binding options may not be available -on every system. -. -.PP -Process binding can also be set with MCA parameters. -Their usage is less convenient than that of \fIprun\fP options. -On the other hand, MCA parameters can be set not only on the \fIprun\fP -command line, but alternatively in a system or user mca-params.conf file -or as environment variables, as described in the MCA section below. -Some examples include: -. -.PP - prun option MCA parameter key value - - --map-by core rmaps_base_mapping_policy core - --map-by socket rmaps_base_mapping_policy socket - --rank-by core rmaps_base_ranking_policy core - --bind-to core hwloc_base_binding_policy core - --bind-to socket hwloc_base_binding_policy socket - --bind-to none hwloc_base_binding_policy none -. -. -.SS Rankfiles -. -Rankfiles are text files that specify detailed information about how -individual processes should be mapped to nodes, and to which -processor(s) they should be bound. Each line of a rankfile specifies -the location of one process. The general form of each line in the -rankfile is: -. - - rank = slot= -. -.PP -For example: -. - - $ cat myrankfile - rank 0=aa slot=1:0-2 - rank 1=bb slot=0:0,1 - rank 2=cc slot=1-2 - $ prun -H aa,bb,cc,dd -rf myrankfile ./a.out -. -.PP -Means that -. - - Rank 0 runs on node aa, bound to logical socket 1, cores 0-2. - Rank 1 runs on node bb, bound to logical socket 0, cores 0 and 1. - Rank 2 runs on node cc, bound to logical cores 1 and 2. -. -.PP -Rankfiles can alternatively be used to specify \fIphysical\fP processor -locations. In this case, the syntax is somewhat different. Sockets are -no longer recognized, and the slot number given must be the number of -the physical PU as most OS's do not assign a unique physical identifier -to each core in the node. Thus, a proper physical rankfile looks something -like the following: -. - - $ cat myphysicalrankfile - rank 0=aa slot=1 - rank 1=bb slot=8 - rank 2=cc slot=6 -. -.PP -This means that -. - - Rank 0 will run on node aa, bound to the core that contains physical PU 1 - Rank 1 will run on node bb, bound to the core that contains physical PU 8 - Rank 2 will run on node cc, bound to the core that contains physical PU 6 -. -.PP -Rankfiles are treated as \fIlogical\fP by default, and the MCA parameter -rmaps_rank_file_physical must be set to 1 to indicate that the rankfile -is to be considered as \fIphysical\fP. -. -.PP -The hostnames listed above are "absolute," meaning that actual -resolveable hostnames are specified. However, hostnames can also be -specified as "relative," meaning that they are specified in relation -to an externally-specified list of hostnames (e.g., by prun's --host -argument, a hostfile, or a job scheduler). -. -.PP -The "relative" specification is of the form "+n", where X is an -integer specifying the Xth hostname in the set of all available -hostnames, indexed from 0. For example: -. - - $ cat myrankfile - rank 0=+n0 slot=1:0-2 - rank 1=+n1 slot=0:0,1 - rank 2=+n2 slot=1-2 - $ prun -H aa,bb,cc,dd -rf myrankfile ./a.out -. -.PP -All socket/core slot locations are be -specified as -.I logical -indexes. You can use tools such as HWLOC's "lstopo" to find the -logical indexes of socket and cores. -. -. -.SS Application Context or Executable Program? -. -To distinguish the two different forms, \fIprun\fP -looks on the command line for \fI--app\fP option. If -it is specified, then the file named on the command line is -assumed to be an application context. If it is not -specified, then the file is assumed to be an executable program. -. -. -. -.SS Locating Files -. -If no relative or absolute path is specified for a file, prun will first look for files by searching the directories specified -by the \fI--path\fP option. If there is no \fI--path\fP option set or -if the file is not found at the \fI--path\fP location, then prun -will search the user's PATH environment variable as defined on the -source node(s). -.PP -If a relative directory is specified, it must be relative to the initial -working directory determined by the specific starter used. For example when -using the rsh or ssh starters, the initial directory is $HOME by default. Other -starters may set the initial directory to the current working directory from -the invocation of \fIprun\fP. -. -. -. -.SS Current Working Directory -. -The \fI\-wdir\fP prun option (and its synonym, \fI\-wd\fP) allows -the user to change to an arbitrary directory before the program is -invoked. It can also be used in application context files to specify -working directories on specific nodes and/or for specific -applications. -.PP -If the \fI\-wdir\fP option appears both in a context file and on the -command line, the context file directory will override the command -line value. -.PP -If the \fI-wdir\fP option is specified, prun will attempt to -change to the specified directory on all of the remote nodes. If this -fails, \fIprun\fP will abort. -.PP -If the \fI-wdir\fP option is \fBnot\fP specified, prun will send -the directory name where \fIprun\fP was invoked to each of the -remote nodes. The remote nodes will try to change to that -directory. If they are unable (e.g., if the directory does not exist on -that node), then prun will use the default directory determined by -the starter. -.PP -All directory changing occurs before the user's program is invoked. -. -. -. -.SS Standard I/O -. -PSRVR directs UNIX standard input to /dev/null on all processes -except the rank 0 process. The rank 0 process -inherits standard input from \fIprun\fP. -.B Note: -The node that invoked \fIprun\fP need not be the same as the node where the -rank 0 process resides. PSRVR handles the redirection of -\fIprun\fP's standard input to the rank 0 process. -.PP -PSRVR directs UNIX standard output and error from remote nodes to the node -that invoked \fIprun\fP and prints it on the standard output/error of -\fIprun\fP. -Local processes inherit the standard output/error of \fIprun\fP and transfer -to it directly. -.PP -Thus it is possible to redirect standard I/O for applications by -using the typical shell redirection procedure on \fIprun\fP. - - \fB%\fP prun -np 2 my_app < my_input > my_output - -Note that in this example \fIonly\fP the rank 0 process will -receive the stream from \fImy_input\fP on stdin. The stdin on all the other -nodes will be tied to /dev/null. However, the stdout from all nodes will -be collected into the \fImy_output\fP file. -. -. -. -.SS Signal Propagation -. -When prun receives a SIGTERM and SIGINT, it will attempt to kill -the entire job by sending all processes in the job a SIGTERM, waiting -a small number of seconds, then sending all processes in the job a -SIGKILL. -. -.PP -SIGUSR1 and SIGUSR2 signals received by prun are propagated to -all processes in the job. -. -.PP -A SIGTSTOP signal to prun will cause a SIGSTOP signal to be sent -to all of the programs started by prun and likewise a SIGCONT signal -to prun will cause a SIGCONT sent. -. -.PP -Other signals are not currently propagated -by prun. -. -. -.SS Process Termination / Signal Handling -. -During the run of an application, if any process dies abnormally -(either exiting before invoking \fIPMIx_Finalize\fP, or dying as the result of a -signal), \fIprun\fP will print out an error message and kill the rest of the -application. -.PP -. -. -.SS Process Environment -. -Processes in the application inherit their environment from the -PSRVR daemon upon the node on which they are running. The -environment is typically inherited from the user's shell. On remote -nodes, the exact environment is determined by the boot MCA module -used. The \fIrsh\fR launch module, for example, uses either -\fIrsh\fR/\fIssh\fR to launch the PSRVR daemon on remote nodes, and -typically executes one or more of the user's shell-setup files before -launching the daemon. When running dynamically linked -applications which require the \fILD_LIBRARY_PATH\fR environment -variable to be set, care must be taken to ensure that it is correctly -set when booting PSRVR. -.PP -See the "Remote Execution" section for more details. -. -. -.SS Remote Execution -. -PSRVR requires that the \fIPATH\fR environment variable be set to -find executables on remote nodes (this is typically only necessary in -\fIrsh\fR- or \fIssh\fR-based environments -- batch/scheduled -environments typically copy the current environment to the execution -of remote jobs, so if the current environment has \fIPATH\fR and/or -\fILD_LIBRARY_PATH\fR set properly, the remote nodes will also have it -set properly). If PSRVR was compiled with shared library support, -it may also be necessary to have the \fILD_LIBRARY_PATH\fR environment -variable set on remote nodes as well (especially to find the shared -libraries required to run user applications). -.PP -However, it is not always desirable or possible to edit shell -startup files to set \fIPATH\fR and/or \fILD_LIBRARY_PATH\fR. The -\fI--prefix\fR option is provided for some simple configurations where -this is not possible. -.PP -The \fI--prefix\fR option takes a single argument: the base directory -on the remote node where PSRVR is installed. PSRVR will use -this directory to set the remote \fIPATH\fR and \fILD_LIBRARY_PATH\fR -before executing any user applications. This allows -running jobs without having pre-configured the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote nodes. -.PP -PSRVR adds the basename of the current -node's "bindir" (the directory where PSRVR's executables are -installed) to the prefix and uses that to set the \fIPATH\fR on the -remote node. Similarly, PSRVR adds the basename of the current -node's "libdir" (the directory where PSRVR's libraries are -installed) to the prefix and uses that to set the -\fILD_LIBRARY_PATH\fR on the remote node. For example: -.TP 15 -Local bindir: -/local/node/directory/bin -.TP -Local libdir: -/local/node/directory/lib64 -.PP -If the following command line is used: - - \fB%\fP prun --prefix /remote/node/directory - -PSRVR will add "/remote/node/directory/bin" to the \fIPATH\fR -and "/remote/node/directory/lib64" to the \fLD_LIBRARY_PATH\fR on the -remote node before attempting to execute anything. -.PP -The \fI--prefix\fR option is not sufficient if the installation paths -on the remote node are different than the local node (e.g., if "/lib" -is used on the local node, but "/lib64" is used on the remote node), -or if the installation paths are something other than a subdirectory -under a common prefix. -.PP -Note that executing \fIprun\fR via an absolute pathname is -equivalent to specifying \fI--prefix\fR without the last subdirectory -in the absolute pathname to \fIprun\fR. For example: - - \fB%\fP /usr/local/bin/prun ... - -is equivalent to - - \fB%\fP prun --prefix /usr/local -. -. -. -.SS Exported Environment Variables -. -All environment variables that are named in the form PMIX_* will automatically -be exported to new processes on the local and remote nodes. Environmental -parameters can also be set/forwarded to the new processes using the MCA -parameter \fImca_base_env_list\fP. While the syntax of the \fI\-x\fP option and MCA param -allows the definition of new variables, note that the parser -for these options are currently not very sophisticated - it does not even -understand quoted values. Users are advised to set variables in the -environment and use the option to export them; not to define them. -. -. -. -.SS Setting MCA Parameters -. -The \fI-pmca\fP switch allows the passing of parameters to various MCA -(Modular Component Architecture) modules. -.\" PSRVR's MCA modules are described in detail in psrvrmca(7). -MCA modules have direct impact on programs because they allow tunable -parameters to be set at run time (such as which BTL communication device driver -to use, what parameters to pass to that BTL, etc.). -.PP -The \fI-pmca\fP switch takes two arguments: \fI\fP and \fI\fP. -The \fI\fP argument generally specifies which MCA module will receive the value. -For example, the \fI\fP "btl" is used to select which BTL to be used for -transporting messages. The \fI\fP argument is the value that is -passed. -For example: -. -.TP 4 -prun -pmca btl tcp,self -np 1 foo -Tells PSRVR to use the "tcp" and "self" BTLs, and to run a single copy of -"foo" on an allocated node. -. -.TP -prun -pmca btl self -np 1 foo -Tells PSRVR to use the "self" BTL, and to run a single copy of "foo" on an -allocated node. -.\" And so on. PSRVR's BTL MCA modules are described in psrvrmca_btl(7). -.PP -The \fI-pmca\fP switch can be used multiple times to specify different -\fI\fP and/or \fI\fP arguments. If the same \fI\fP is -specified more than once, the \fI\fPs are concatenated with a comma -(",") separating them. -.PP -Note that the \fI-pmca\fP switch is simply a shortcut for setting environment variables. -The same effect may be accomplished by setting corresponding environment -variables before running \fIprun\fP. -The form of the environment variables that PSRVR sets is: - - PMIX_MCA_= -.PP -Thus, the \fI-pmca\fP switch overrides any previously set environment -variables. The \fI-pmca\fP settings similarly override MCA parameters set -in the -$OPAL_PREFIX/etc/psrvr-mca-params.conf or $HOME/.psrvr/mca-params.conf -file. -. -.PP -Unknown \fI\fP arguments are still set as -environment variable -- they are not checked (by \fIprun\fP) for correctness. -Illegal or incorrect \fI\fP arguments may or may not be reported -- it -depends on the specific MCA module. -.PP -To find the available component types under the MCA architecture, or to find the -available parameters for a specific component, use the \fIpinfo\fP command. -See the \fIpinfo(1)\fP man page for detailed information on the command. -. -. -. -.SS Setting MCA parameters and environment variables from file. -The \fI-tune\fP command line option and its synonym \fI-pmca mca_base_envar_file_prefix\fP allows a user -to set mca parameters and environment variables with the syntax described below. -This option requires a single file or list of files separated by "," to follow. -.PP -A valid line in the file may contain zero or many "-x", "-pmca", or “--pmca” arguments. -The following patterns are supported: -pmca var val -pmca var "val" -x var=val -x var. -If any argument is duplicated in the file, the last value read will be used. -.PP -MCA parameters and environment specified on the command line have higher precedence than variables specified in the file. -. -. -. -.SS Running as root -. -The PSRVR team strongly advises against executing -.I prun -as the root user. Applications should be run as regular -(non-root) users. -. -.PP -Reflecting this advice, prun will refuse to run as root by default. -To override this default, you can add the -.I --allow-run-as-root -option to the -.I prun -command line. -. -.SS Exit status -. -There is no standard definition for what \fIprun\fP should return as an exit -status. After considerable discussion, we settled on the following method for -assigning the \fIprun\fP exit status (note: in the following description, -the "primary" job is the initial application started by prun - all jobs that -are spawned by that job are designated "secondary" jobs): -. -.IP \[bu] 2 -if all processes in the primary job normally terminate with exit status 0, we return 0 -.IP \[bu] -if one or more processes in the primary job normally terminate with non-zero exit status, -we return the exit status of the process with the lowest rank to have a non-zero status -.IP \[bu] -if all processes in the primary job normally terminate with exit status 0, and one or more -processes in a secondary job normally terminate with non-zero exit status, we (a) return -the exit status of the process with the lowest rank in the lowest jobid to have a non-zero -status, and (b) output a message summarizing the exit status of the primary and all secondary jobs. -.IP \[bu] -if the cmd line option --report-child-jobs-separately is set, we will return -only- the -exit status of the primary job. Any non-zero exit status in secondary jobs will be -reported solely in a summary print statement. -. -.PP -By default, PSRVR records and notes that processes exited with non-zero termination status. -This is generally not considered an "abnormal termination" - i.e., PSRVR will not abort a -job if one or more processes return a non-zero status. Instead, the default behavior simply -reports the number of processes terminating with non-zero status upon completion of the job. -.PP -However, in some cases it can be desirable to have the job abort when any process terminates -with non-zero status. For example, a non-PMIx job might detect a bad result from a calculation -and want to abort, but doesn't want to generate a core file. Or a PMIx job might continue past -a call to PMIx_Finalize, but indicate that all processes should abort due to some post-PMIx result. -.PP -It is not anticipated that this situation will occur frequently. However, in the interest of -serving the broader community, PSRVR now has a means for allowing users to direct that jobs be -aborted upon any process exiting with non-zero status. Setting the MCA parameter -"orte_abort_on_non_zero_status" to 1 will cause PSRVR to abort all processes once any process - exits with non-zero status. -.PP -Terminations caused in this manner will be reported on the console as an "abnormal termination", -with the first process to so exit identified along with its exit status. -.PP -.\" ************************** -.\" Return Value Section -.\" ************************** -. -.SH RETURN VALUE -. -\fIprun\fP returns 0 if all processes started by \fIprun\fP exit after calling -PMIx_Finalize. A non-zero value is returned if an internal error occurred in -prun, or one or more processes exited before calling PMIx_Finalize. If an -internal error occurred in prun, the corresponding error code is returned. -In the event that one or more processes exit before calling PMIx_Finalize, the -return value of the rank of the process that \fIprun\fP first notices died -before calling PMIx_Finalize will be returned. Note that, in general, this will -be the first process that died but is not guaranteed to be so. -. -.PP -If the -.B --timeout -command line option is used and the timeout expires before the job -completes (thereby forcing -.I prun -to kill the job) -.I prun -will return an exit status equivalent to the value of -.B ETIMEDOUT -(which is typically 110 on Linux and OS X systems). - -. -.\" ************************** -.\" See Also Section -.\" ************************** -. diff --git a/orte/tools/ompi-prun/prun.c b/orte/tools/ompi-prun/prun.c deleted file mode 100644 index 17683b803f5..00000000000 --- a/orte/tools/ompi-prun/prun.c +++ /dev/null @@ -1,1373 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#include -#include -#ifdef HAVE_STRINGS_H -#include -#endif /* HAVE_STRINGS_H */ -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_PARAM_H -#include -#endif -#include -#include -#include -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_WAIT_H -#include -#endif /* HAVE_SYS_WAIT_H */ -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ -#include -#ifdef HAVE_SYS_STAT_H -#include -#endif - -#include "opal/mca/event/event.h" -#include "opal/mca/installdirs/installdirs.h" -#include "opal/mca/pmix/base/base.h" -#include "opal/mca/base/base.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/util/basename.h" -#include "opal/util/cmd_line.h" -#include "opal/util/opal_environ.h" -#include "opal/util/opal_getcwd.h" -#include "opal/util/show_help.h" -#include "opal/util/fd.h" -#include "opal/sys/atomic.h" - -#include "opal/version.h" -#include "opal/runtime/opal.h" -#include "opal/runtime/opal_info_support.h" -#include "opal/runtime/opal_progress_threads.h" -#include "opal/util/os_path.h" -#include "opal/util/path.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/dss/dss.h" - -#include "orte/runtime/runtime.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/schizo/base/base.h" -#include "orte/mca/state/state.h" -#include "orte/orted/orted_submit.h" - -/* ensure I can behave like a daemon */ -#include "prun.h" - -typedef struct { - opal_object_t super; - opal_pmix_lock_t lock; - opal_list_t info; -} myinfo_t; -static void mcon(myinfo_t *p) -{ - OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); - OBJ_CONSTRUCT(&p->info, opal_list_t); -} -static void mdes(myinfo_t *p) -{ - OPAL_PMIX_DESTRUCT_LOCK(&p->lock); - OPAL_LIST_DESTRUCT(&p->info); -} -static OBJ_CLASS_INSTANCE(myinfo_t, opal_object_t, - mcon, mdes); - -static struct { - bool terminate_dvm; - bool system_server_first; - bool system_server_only; - int pid; -} myoptions; - -static opal_list_t job_info; -static volatile bool active = false; -static orte_jobid_t myjobid = ORTE_JOBID_INVALID; -static myinfo_t myinfo; - -static int create_app(int argc, char* argv[], - opal_list_t *jdata, - opal_pmix_app_t **app, - bool *made_app, char ***app_env); -static int parse_locals(opal_list_t *jdata, int argc, char* argv[]); -static void set_classpath_jar_file(opal_pmix_app_t *app, int index, char *jarfile); -static size_t evid = INT_MAX; - - -static opal_cmd_line_init_t cmd_line_init[] = { - /* tell the dvm to terminate */ - { NULL, '\0', "terminate", "terminate", 0, - &myoptions.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL, - "Terminate the DVM", OPAL_CMD_LINE_OTYPE_DVM }, - - /* look first for a system server */ - { NULL, '\0', "system-server-first", "system-server-first", 0, - &myoptions.system_server_first, OPAL_CMD_LINE_TYPE_BOOL, - "First look for a system server and connect to it if found", OPAL_CMD_LINE_OTYPE_DVM }, - - /* connect only to a system server */ - { NULL, '\0', "system-server-only", "system-server-only", 0, - &myoptions.system_server_only, OPAL_CMD_LINE_TYPE_BOOL, - "Connect only to a system-level server", OPAL_CMD_LINE_OTYPE_DVM }, - - /* provide a connection PID */ - { NULL, '\0', "pid", "pid", 1, - &myoptions.pid, OPAL_CMD_LINE_TYPE_INT, - "PID of the session-level daemon to which we should connect", - OPAL_CMD_LINE_OTYPE_DVM }, - - /* End of list */ - { NULL, '\0', NULL, NULL, 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } -}; - - -static void infocb(int status, - opal_list_t *info, - void *cbdata, - opal_pmix_release_cbfunc_t release_fn, - void *release_cbdata) -{ - opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; - OPAL_ACQUIRE_OBJECT(lock); - - if (NULL != release_fn) { - release_fn(release_cbdata); - } - OPAL_PMIX_WAKEUP_THREAD(lock); -} - -static void regcbfunc(int status, size_t ref, void *cbdata) -{ - opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; - OPAL_ACQUIRE_OBJECT(lock); - evid = ref; - OPAL_PMIX_WAKEUP_THREAD(lock); -} - -static void opcbfunc(int status, void *cbdata) -{ - opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; - OPAL_ACQUIRE_OBJECT(lock); - OPAL_PMIX_WAKEUP_THREAD(lock); -} - -static bool fired = false; -static void evhandler(int status, - const opal_process_name_t *source, - opal_list_t *info, opal_list_t *results, - opal_pmix_notification_complete_fn_t cbfunc, - void *cbdata) -{ - opal_value_t *val; - int jobstatus=0; - orte_jobid_t jobid = ORTE_JOBID_INVALID; - - /* we should always have info returned to us - if not, there is - * nothing we can do */ - if (NULL != info) { - OPAL_LIST_FOREACH(val, info, opal_value_t) { - if (0 == strcmp(val->key, OPAL_PMIX_JOB_TERM_STATUS)) { - jobstatus = val->data.integer; - } else if (0 == strcmp(val->key, OPAL_PMIX_PROCID)) { - jobid = val->data.name.jobid; - } - } - if (orte_cmd_options.verbose && (myjobid != ORTE_JOBID_INVALID && jobid == myjobid)) { - opal_output(0, "JOB %s COMPLETED WITH STATUS %d", - ORTE_JOBID_PRINT(jobid), jobstatus); - } - } - - /* only terminate if this was our job - keep in mind that we - * can get notifications of job termination prior to our spawn - * having completed! */ - if (!fired && (myjobid != ORTE_JOBID_INVALID && jobid == myjobid)) { - fired = true; - active = false; - } - - /* we _always_ have to execute the evhandler callback or - * else the event progress engine will hang */ - if (NULL != cbfunc) { - cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata); - } -} - -typedef struct { - opal_pmix_lock_t lock; - opal_list_t list; -} mylock_t; - - -static void setupcbfunc(int status, - opal_list_t *info, - void *provided_cbdata, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - mylock_t *mylock = (mylock_t*)provided_cbdata; - opal_value_t *kv; - - if (NULL != info) { - /* cycle across the provided info */ - while (NULL != (kv = (opal_value_t*)opal_list_remove_first(info))) { - opal_list_append(&mylock->list, &kv->super); - } - } - - /* release the caller */ - if (NULL != cbfunc) { - cbfunc(OPAL_SUCCESS, cbdata); - } - - OPAL_PMIX_WAKEUP_THREAD(&mylock->lock); -} - -static void launchhandler(int status, - const opal_process_name_t *source, - opal_list_t *info, opal_list_t *results, - opal_pmix_notification_complete_fn_t cbfunc, - void *cbdata) -{ - opal_value_t *p; - - /* the info list will include the launch directives, so - * transfer those to the myinfo_t for return to the main thread */ - while (NULL != (p = (opal_value_t*)opal_list_remove_first(info))) { - opal_list_append(&myinfo.info, &p->super); - } - - /* we _always_ have to execute the evhandler callback or - * else the event progress engine will hang */ - if (NULL != cbfunc) { - cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata); - } - - /* now release the thread */ - OPAL_PMIX_WAKEUP_THREAD(&myinfo.lock); -} - -int prun(int argc, char *argv[]) -{ - int rc, i; - char *param; - opal_pmix_lock_t lock; - opal_list_t apps, *lt; - opal_pmix_app_t *app; - opal_value_t *val, *kv, *kv2; - opal_list_t info, codes; - struct timespec tp = {0, 100000}; - mylock_t mylock; - - /* init the globals */ - memset(&orte_cmd_options, 0, sizeof(orte_cmd_options)); - memset(&myoptions, 0, sizeof(myoptions)); - OBJ_CONSTRUCT(&job_info, opal_list_t); - OBJ_CONSTRUCT(&apps, opal_list_t); - - /* search the argv for MCA params */ - for (i=0; NULL != argv[i]; i++) { - if (':' == argv[i][0] || - NULL == argv[i+1] || NULL == argv[i+2]) { - break; - } - if (0 == strncmp(argv[i], "-"OPAL_MCA_CMD_LINE_ID, strlen("-"OPAL_MCA_CMD_LINE_ID)) || - 0 == strncmp(argv[i], "--"OPAL_MCA_CMD_LINE_ID, strlen("--"OPAL_MCA_CMD_LINE_ID)) || - 0 == strncmp(argv[i], "-g"OPAL_MCA_CMD_LINE_ID, strlen("-g"OPAL_MCA_CMD_LINE_ID)) || - 0 == strncmp(argv[i], "--g"OPAL_MCA_CMD_LINE_ID, strlen("--g"OPAL_MCA_CMD_LINE_ID))) { - (void) mca_base_var_env_name (argv[i+1], ¶m); - opal_setenv(param, argv[i+2], true, &environ); - free(param); - } else if (0 == strcmp(argv[i], "-am") || - 0 == strcmp(argv[i], "--am")) { - (void)mca_base_var_env_name("mca_base_param_file_prefix", ¶m); - opal_setenv(param, argv[i+1], true, &environ); - free(param); - } else if (0 == strcmp(argv[i], "-tune") || - 0 == strcmp(argv[i], "--tune")) { - (void)mca_base_var_env_name("mca_base_envar_file_prefix", ¶m); - opal_setenv(param, argv[i+1], true, &environ); - free(param); - } - } - - /* init only the util portion of OPAL */ - if (OPAL_SUCCESS != (rc = opal_init_util(&argc, &argv))) { - return rc; - } - - /* set our proc type for schizo selection */ - orte_process_info.proc_type = ORTE_PROC_TOOL; - - /* open the SCHIZO framework so we can setup the command line */ - if (ORTE_SUCCESS != (rc = mca_base_framework_open(&orte_schizo_base_framework, 0))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_schizo_base_select())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* setup our cmd line */ - orte_cmd_line = OBJ_NEW(opal_cmd_line_t); - if (OPAL_SUCCESS != (rc = opal_cmd_line_add(orte_cmd_line, cmd_line_init))) { - return rc; - } - - /* setup the rest of the cmd line only once */ - if (OPAL_SUCCESS != (rc = orte_schizo.define_cli(orte_cmd_line))) { - return rc; - } - - /* now that options have been defined, finish setup */ - mca_base_cmd_line_setup(orte_cmd_line); - - /* parse the result to get values */ - if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line, - true, false, argc, argv)) ) { - if (OPAL_ERR_SILENT != rc) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(rc)); - } - return rc; - } - - /* see if print version is requested. Do this before - * check for help so that --version --help works as - * one might expect. */ - if (orte_cmd_options.version) { - char *str; - str = opal_info_make_version_str("all", - OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION, - OPAL_GREEK_VERSION, - OPAL_REPO_REV); - if (NULL != str) { - fprintf(stdout, "%s (%s) %s\n\nReport bugs to %s\n", - "prun", "PMIx Reference Server", str, PACKAGE_BUGREPORT); - free(str); - } - exit(0); - } - - /* check if we are running as root - if we are, then only allow - * us to proceed if the allow-run-as-root flag was given. Otherwise, - * exit with a giant warning flag - */ - if (0 == geteuid() && !orte_cmd_options.run_as_root) { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "--------------------------------------------------------------------------\n"); - if (orte_cmd_options.help) { - fprintf(stderr, "prun cannot provide the help message when run as root.\n\n"); - } else { - fprintf(stderr, "prun has detected an attempt to run as root.\n\n"); - } - - fprintf(stderr, "Running as root is *strongly* discouraged as any mistake (e.g., in\n"); - fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); - fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); - - fprintf(stderr, "We strongly suggest that you run prun as a non-root user.\n\n"); - - fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your command line. However, we reiterate our strong advice\n"); - fprintf(stderr, "against doing so - please do so at your own risk.\n"); - fprintf(stderr, "--------------------------------------------------------------------------\n"); - exit(1); - } - - /* process any mca params */ - rc = mca_base_cmd_line_process_args(orte_cmd_line, &environ, &environ); - if (ORTE_SUCCESS != rc) { - return rc; - } - - /* Check for help request */ - if (orte_cmd_options.help) { - char *str, *args = NULL; - args = opal_cmd_line_get_usage_msg(orte_cmd_line); - str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, - "prun", "PSVR", OPAL_VERSION, - "prun", args, - PACKAGE_BUGREPORT); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - - /* If someone asks for help, that should be all we do */ - exit(0); - } - - /* ensure we ONLY take the ess/tool component */ - opal_setenv(OPAL_MCA_PREFIX"ess", "tool", true, &environ); - /* tell the ess/tool component how we want to connect */ - if (myoptions.system_server_only) { - opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_only", "1", true, &environ); - } - if (myoptions.system_server_first) { - opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_first", "1", true, &environ); - } - /* if they specified the DVM's pid, then pass it along */ - if (0 != myoptions.pid) { - asprintf(¶m, "%d", myoptions.pid); - opal_setenv(OPAL_MCA_PREFIX"ess_tool_server_pid", param, true, &environ); - free(param); - } - /* if they specified the URI, then pass it along */ - if (NULL != orte_cmd_options.hnp) { - opal_setenv("PMIX_MCA_ptl_tcp_server_uri", orte_cmd_options.hnp, true, &environ); - } - - /* now initialize ORTE */ - if (OPAL_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_TOOL))) { - OPAL_ERROR_LOG(rc); - return rc; - } - - /* if the user just wants us to terminate a DVM, then do so */ - if (myoptions.terminate_dvm) { - OBJ_CONSTRUCT(&info, opal_list_t); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_JOB_CTRL_TERMINATE); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&info, &val->super); - fprintf(stderr, "TERMINATING DVM..."); - OPAL_PMIX_CONSTRUCT_LOCK(&lock); - rc = opal_pmix.job_control(NULL, &info, infocb, (void*)&lock); - OPAL_PMIX_WAIT_THREAD(&lock); - OPAL_PMIX_DESTRUCT_LOCK(&lock); - OPAL_LIST_DESTRUCT(&info); - fprintf(stderr, "DONE\n"); - goto DONE; - } - - /* get here if they want to run an application, so let's parse - * the cmd line to get it */ - - if (OPAL_SUCCESS != (rc = parse_locals(&apps, argc, argv))) { - OPAL_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&apps); - goto DONE; - } - - /* bozo check */ - if (0 == opal_list_get_size(&apps)) { - opal_output(0, "No application specified!"); - goto DONE; - } - - /* init flag */ - active = true; - - /* register for job terminations so we get notified when - * our job completes */ - OPAL_PMIX_CONSTRUCT_LOCK(&lock); - OBJ_CONSTRUCT(&info, opal_list_t); - val = OBJ_NEW(opal_value_t); - val->key = strdup("foo"); - val->type = OPAL_INT; - val->data.integer = OPAL_ERR_JOB_TERMINATED; - opal_list_append(&info, &val->super); - opal_pmix.register_evhandler(&info, NULL, evhandler, regcbfunc, &lock); - OPAL_PMIX_WAIT_THREAD(&lock); - OPAL_PMIX_DESTRUCT_LOCK(&lock); - OPAL_LIST_DESTRUCT(&info); - - /* we want to be notified upon job completion */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_NOTIFY_COMPLETION); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - - /* see if they specified the personality */ - if (NULL != orte_cmd_options.personality) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PERSONALITY); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.personality); - opal_list_append(&job_info, &val->super); - } - - /* check for stdout/err directives */ - /* if we were asked to tag output, mark it so */ - if (orte_cmd_options.tag_output) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_TAG_OUTPUT); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - /* if we were asked to timestamp output, mark it so */ - if (orte_cmd_options.timestamp_output) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_TIMESTAMP_OUTPUT); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - /* if we were asked to output to files, pass it along */ - if (NULL != orte_cmd_options.output_filename) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_OUTPUT_TO_FILE); - val->type = OPAL_STRING; - /* if the given filename isn't an absolute path, then - * convert it to one so the name will be relative to - * the directory where prun was given as that is what - * the user will have seen */ - if (!opal_path_is_absolute(orte_cmd_options.output_filename)) { - char cwd[OPAL_PATH_MAX]; - getcwd(cwd, sizeof(cwd)); - val->data.string = opal_os_path(false, cwd, orte_cmd_options.output_filename, NULL); - } else { - val->data.string = strdup(orte_cmd_options.output_filename); - } - opal_list_append(&job_info, &val->super); - } - /* if we were asked to merge stderr to stdout, mark it so */ - if (orte_cmd_options.merge) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_MERGE_STDERR_STDOUT); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - - /* check what user wants us to do with stdin */ - if (NULL != orte_cmd_options.stdin_target) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_STDIN_TGT); - val->type = OPAL_UINT32; - opal_list_append(&job_info, &val->super); - if (0 == strcmp(orte_cmd_options.stdin_target, "all")) { - val->data.uint32 = ORTE_VPID_WILDCARD; - } else if (0 == strcmp(orte_cmd_options.stdin_target, "none")) { - val->data.uint32 = ORTE_VPID_INVALID; - } else { - val->data.uint32 = strtoul(orte_cmd_options.stdin_target, NULL, 10); - } - } - - /* if we want the argv's indexed, indicate that */ - if (orte_cmd_options.index_argv) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_INDEX_ARGV); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - - if (NULL != orte_cmd_options.mapping_policy) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_MAPBY); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.mapping_policy); - opal_list_append(&job_info, &val->super); - } else if (orte_cmd_options.pernode) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PPR); - val->type = OPAL_STRING; - val->data.string = strdup("1:node"); - opal_list_append(&job_info, &val->super); - } else if (0 < orte_cmd_options.npernode) { - /* define the ppr */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PPR); - val->type = OPAL_STRING; - (void)asprintf(&val->data.string, "%d:node", orte_cmd_options.npernode); - opal_list_append(&job_info, &val->super); - } else if (0 < orte_cmd_options.npersocket) { - /* define the ppr */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PPR); - val->type = OPAL_STRING; - (void)asprintf(&val->data.string, "%d:socket", orte_cmd_options.npernode); - opal_list_append(&job_info, &val->super); - } - - /* if the user specified cpus/rank, set it */ - if (0 < orte_cmd_options.cpus_per_proc) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_CPUS_PER_PROC); - val->type = OPAL_UINT32; - val->data.uint32 = orte_cmd_options.cpus_per_proc; - opal_list_append(&job_info, &val->super); - } - - /* if the user specified a ranking policy, then set it */ - if (NULL != orte_cmd_options.ranking_policy) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_RANKBY); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.ranking_policy); - opal_list_append(&job_info, &val->super); - } - - /* if the user specified a binding policy, then set it */ - if (NULL != orte_cmd_options.binding_policy) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_BINDTO); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.binding_policy); - opal_list_append(&job_info, &val->super); - } - - /* if they asked for nolocal, mark it so */ - if (orte_cmd_options.nolocal) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_NO_PROCS_ON_HEAD); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - if (orte_cmd_options.no_oversubscribe) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_NO_OVERSUBSCRIBE); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - if (orte_cmd_options.oversubscribe) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_NO_OVERSUBSCRIBE); - val->type = OPAL_BOOL; - val->data.flag = false; - opal_list_append(&job_info, &val->super); - } - if (orte_cmd_options.report_bindings) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_REPORT_BINDINGS); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - if (NULL != orte_cmd_options.cpu_list) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_CPU_LIST); - val->type = OPAL_STRING; - val->data.string = strdup(orte_cmd_options.cpu_list); - opal_list_append(&job_info, &val->super); - } - - /* mark if recovery was enabled on the cmd line */ - if (orte_enable_recovery) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_JOB_RECOVERABLE); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - /* record the max restarts */ - if (0 < orte_max_restarts) { - OPAL_LIST_FOREACH(app, &apps, opal_pmix_app_t) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_MAX_RESTARTS); - val->type = OPAL_UINT32; - val->data.uint32 = orte_max_restarts; - opal_list_append(&app->info, &val->super); - } - } - /* if continuous operation was specified */ - if (orte_cmd_options.continuous) { - /* mark this job as continuously operating */ - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_JOB_CONTINUOUS); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&job_info, &val->super); - } - - /* pickup any relevant envars */ - if (NULL != opal_pmix.server_setup_application) { - OBJ_CONSTRUCT(&info, opal_list_t); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_SETUP_APP_ENVARS); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&info, &val->super); - - OPAL_PMIX_CONSTRUCT_LOCK(&mylock.lock); - OBJ_CONSTRUCT(&mylock.list, opal_list_t); - rc = opal_pmix.server_setup_application(ORTE_PROC_MY_NAME->jobid, - &info, setupcbfunc, &mylock); - if (OPAL_SUCCESS != rc) { - OPAL_LIST_DESTRUCT(&info); - OPAL_PMIX_DESTRUCT_LOCK(&mylock.lock); - OBJ_DESTRUCT(&mylock.list); - goto DONE; - } - OPAL_PMIX_WAIT_THREAD(&mylock.lock); - OPAL_PMIX_DESTRUCT_LOCK(&mylock.lock); - /* transfer any returned ENVARS to the job_info */ - while (NULL != (val = (opal_value_t*)opal_list_remove_first(&mylock.list))) { - if (0 == strcmp(val->key, OPAL_PMIX_SET_ENVAR) || - 0 == strcmp(val->key, OPAL_PMIX_ADD_ENVAR) || - 0 == strcmp(val->key, OPAL_PMIX_UNSET_ENVAR) || - 0 == strcmp(val->key, OPAL_PMIX_PREPEND_ENVAR) || - 0 == strcmp(val->key, OPAL_PMIX_APPEND_ENVAR)) { - opal_list_append(&job_info, &val->super); - } else { - OBJ_RELEASE(val); - } - } - OPAL_LIST_DESTRUCT(&mylock.list); - } - - /* if we were launched by a tool wanting to direct our - * operation, then we need to pause here and give it - * a chance to tell us what we need to do */ - if (NULL != (param = getenv("PMIX_LAUNCHER_PAUSE_FOR_TOOL")) && - 0 == strcmp(param, "1")) { - /* register for the PMIX_LAUNCH_DIRECTIVE event */ - OPAL_PMIX_CONSTRUCT_LOCK(&lock); - OBJ_CONSTRUCT(&codes, opal_list_t); - val = OBJ_NEW(opal_value_t); - val->key = strdup("foo"); - val->type = OPAL_INT; - val->data.integer = OPAL_PMIX_LAUNCH_DIRECTIVE; - opal_list_append(&codes, &val->super); - /* setup the myinfo object to capture the returned - * values - must do so prior to registering in case - * the event has already arrived */ - OBJ_CONSTRUCT(&myinfo, myinfo_t); - /* go ahead and register */ - opal_pmix.register_evhandler(&codes, NULL, launchhandler, regcbfunc, &lock); - OPAL_PMIX_WAIT_THREAD(&lock); - OPAL_PMIX_DESTRUCT_LOCK(&lock); - OPAL_LIST_DESTRUCT(&codes); - /* now wait for the launch directives to arrive */ - OPAL_PMIX_WAIT_THREAD(&myinfo.lock); - /* process the returned directives */ - OPAL_LIST_FOREACH(val, &myinfo.info, opal_value_t) { - if (0 == strcmp(val->key, OPAL_PMIX_DEBUG_JOB_DIRECTIVES)) { - /* there will be a pointer to a list containing the directives */ - lt = (opal_list_t*)val->data.ptr; - while (NULL != (kv = (opal_value_t*)opal_list_remove_first(lt))) { - opal_output(0, "JOB DIRECTIVE: %s", kv->key); - opal_list_append(&job_info, &kv->super); - } - } else if (0 == strcmp(val->key, OPAL_PMIX_DEBUG_APP_DIRECTIVES)) { - /* there will be a pointer to a list containing the directives */ - lt = (opal_list_t*)val->data.ptr; - OPAL_LIST_FOREACH(kv, lt, opal_value_t) { - opal_output(0, "APP DIRECTIVE: %s", kv->key); - OPAL_LIST_FOREACH(app, &apps, opal_pmix_app_t) { - /* the value can only be on one list at a time, so replicate it */ - kv2 = OBJ_NEW(opal_value_t); - opal_value_xfer(kv2, kv); - opal_list_append(&app->info, &kv2->super); - } - } - } - } - } - - if (OPAL_SUCCESS != (rc = opal_pmix.spawn(&job_info, &apps, &myjobid))) { - opal_output(0, "Job failed to spawn: %s", opal_strerror(rc)); - goto DONE; - } - OPAL_LIST_DESTRUCT(&job_info); - OPAL_LIST_DESTRUCT(&apps); - - if (orte_cmd_options.verbose) { - opal_output(0, "JOB %s EXECUTING", OPAL_JOBID_PRINT(myjobid)); - } - - while (active) { - nanosleep(&tp, NULL); - } - OPAL_PMIX_CONSTRUCT_LOCK(&lock); - opal_pmix.deregister_evhandler(evid, opcbfunc, &lock); - OPAL_PMIX_WAIT_THREAD(&lock); - OPAL_PMIX_DESTRUCT_LOCK(&lock); - - DONE: - /* cleanup and leave */ - orte_finalize(); - return 0; -} - -static int parse_locals(opal_list_t *jdata, int argc, char* argv[]) -{ - int i, rc; - int temp_argc; - char **temp_argv, **env; - opal_pmix_app_t *app; - bool made_app; - - /* Make the apps */ - temp_argc = 0; - temp_argv = NULL; - opal_argv_append(&temp_argc, &temp_argv, argv[0]); - - /* NOTE: This bogus env variable is necessary in the calls to - create_app(), below. See comment immediately before the - create_app() function for an explanation. */ - - env = NULL; - for (i = 1; i < argc; ++i) { - if (0 == strcmp(argv[i], ":")) { - /* Make an app with this argv */ - if (opal_argv_count(temp_argv) > 1) { - if (NULL != env) { - opal_argv_free(env); - env = NULL; - } - app = NULL; - rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env); - if (OPAL_SUCCESS != rc) { - /* Assume that the error message has already been - printed; no need to cleanup -- we can just - exit */ - exit(1); - } - if (made_app) { - opal_list_append(jdata, &app->super); - } - - /* Reset the temps */ - - temp_argc = 0; - temp_argv = NULL; - opal_argv_append(&temp_argc, &temp_argv, argv[0]); - } - } else { - opal_argv_append(&temp_argc, &temp_argv, argv[i]); - } - } - - if (opal_argv_count(temp_argv) > 1) { - app = NULL; - rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env); - if (ORTE_SUCCESS != rc) { - /* Assume that the error message has already been printed; - no need to cleanup -- we can just exit */ - exit(1); - } - if (made_app) { - opal_list_append(jdata, &app->super); - } - } - if (NULL != env) { - opal_argv_free(env); - } - opal_argv_free(temp_argv); - - /* All done */ - - return ORTE_SUCCESS; -} - - -/* - * This function takes a "char ***app_env" parameter to handle the - * specific case: - * - * orterun --mca foo bar -app appfile - * - * That is, we'll need to keep foo=bar, but the presence of the app - * file will cause an invocation of parse_appfile(), which will cause - * one or more recursive calls back to create_app(). Since the - * foo=bar value applies globally to all apps in the appfile, we need - * to pass in the "base" environment (that contains the foo=bar value) - * when we parse each line in the appfile. - * - * This is really just a special case -- when we have a simple case like: - * - * orterun --mca foo bar -np 4 hostname - * - * Then the upper-level function (parse_locals()) calls create_app() - * with a NULL value for app_env, meaning that there is no "base" - * environment that the app needs to be created from. - */ -static int create_app(int argc, char* argv[], - opal_list_t *jdata, - opal_pmix_app_t **app_ptr, - bool *made_app, char ***app_env) -{ - char cwd[OPAL_PATH_MAX]; - int i, j, count, rc; - char *param, *value; - opal_pmix_app_t *app = NULL; - bool found = false; - char *appname = NULL; - opal_value_t *val; - - *made_app = false; - - /* parse the cmd line - do this every time thru so we can - * repopulate the globals */ - if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line, true, false, - argc, argv)) ) { - if (OPAL_ERR_SILENT != rc) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(rc)); - } - return rc; - } - - /* Setup application context */ - app = OBJ_NEW(opal_pmix_app_t); - opal_cmd_line_get_tail(orte_cmd_line, &count, &app->argv); - - /* See if we have anything left */ - if (0 == count) { - opal_show_help("help-orterun.txt", "orterun:executable-not-specified", - true, "prun", "prun"); - rc = OPAL_ERR_NOT_FOUND; - goto cleanup; - } - - /* Grab all MCA environment variables */ - app->env = opal_argv_copy(*app_env); - for (i=0; NULL != environ[i]; i++) { - if (0 == strncmp("PMIX_", environ[i], 5) || - 0 == strncmp("OMPI_", environ[i], 5)) { - /* check for duplicate in app->env - this - * would have been placed there by the - * cmd line processor. By convention, we - * always let the cmd line override the - * environment - */ - param = strdup(environ[i]); - value = strchr(param, '='); - *value = '\0'; - value++; - opal_setenv(param, value, false, &app->env); - free(param); - } - } - - /* set necessary env variables for external usage from tune conf file*/ - int set_from_file = 0; - char **vars = NULL; - if (OPAL_SUCCESS == mca_base_var_process_env_list_from_file(&vars) && - NULL != vars) { - for (i=0; NULL != vars[i]; i++) { - value = strchr(vars[i], '='); - /* terminate the name of the param */ - *value = '\0'; - /* step over the equals */ - value++; - /* overwrite any prior entry */ - opal_setenv(vars[i], value, true, &app->env); - /* save it for any comm_spawn'd apps */ - opal_setenv(vars[i], value, true, &orte_forwarded_envars); - } - set_from_file = 1; - opal_argv_free(vars); - } - /* Did the user request to export any environment variables on the cmd line? */ - char *env_set_flag; - env_set_flag = getenv("OMPI_MCA_mca_base_env_list"); - if (opal_cmd_line_is_taken(orte_cmd_line, "x")) { - if (NULL != env_set_flag) { - opal_show_help("help-orterun.txt", "orterun:conflict-env-set", false); - return ORTE_ERR_FATAL; - } - j = opal_cmd_line_get_ninsts(orte_cmd_line, "x"); - for (i = 0; i < j; ++i) { - param = opal_cmd_line_get_param(orte_cmd_line, "x", i, 0); - - if (NULL != (value = strchr(param, '='))) { - /* terminate the name of the param */ - *value = '\0'; - /* step over the equals */ - value++; - /* overwrite any prior entry */ - opal_setenv(param, value, true, &app->env); - /* save it for any comm_spawn'd apps */ - opal_setenv(param, value, true, &orte_forwarded_envars); - } else { - value = getenv(param); - if (NULL != value) { - /* overwrite any prior entry */ - opal_setenv(param, value, true, &app->env); - /* save it for any comm_spawn'd apps */ - opal_setenv(param, value, true, &orte_forwarded_envars); - } else { - opal_output(0, "Warning: could not find environment variable \"%s\"\n", param); - } - } - } - } else if (NULL != env_set_flag) { - /* if mca_base_env_list was set, check if some of env vars were set via -x from a conf file. - * If this is the case, error out. - */ - if (!set_from_file) { - /* set necessary env variables for external usage */ - vars = NULL; - if (OPAL_SUCCESS == mca_base_var_process_env_list(env_set_flag, &vars) && - NULL != vars) { - for (i=0; NULL != vars[i]; i++) { - value = strchr(vars[i], '='); - /* terminate the name of the param */ - *value = '\0'; - /* step over the equals */ - value++; - /* overwrite any prior entry */ - opal_setenv(vars[i], value, true, &app->env); - /* save it for any comm_spawn'd apps */ - opal_setenv(vars[i], value, true, &orte_forwarded_envars); - } - opal_argv_free(vars); - } - } else { - opal_show_help("help-orterun.txt", "orterun:conflict-env-set", false); - return ORTE_ERR_FATAL; - } - } - - /* Did the user request a specific wdir? */ - - if (NULL != orte_cmd_options.wdir) { - /* if this is a relative path, convert it to an absolute path */ - if (opal_path_is_absolute(orte_cmd_options.wdir)) { - app->cwd = strdup(orte_cmd_options.wdir); - } else { - /* get the cwd */ - if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { - opal_show_help("help-orterun.txt", "orterun:init-failure", - true, "get the cwd", rc); - goto cleanup; - } - /* construct the absolute path */ - app->cwd = opal_os_path(false, cwd, orte_cmd_options.wdir, NULL); - } - } else if (orte_cmd_options.set_cwd_to_session_dir) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_SET_SESSION_CWD); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&app->info, &val->super); - } else { - if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { - opal_show_help("help-orterun.txt", "orterun:init-failure", - true, "get the cwd", rc); - goto cleanup; - } - app->cwd = strdup(cwd); - } - - /* Did the user specify a hostfile. Need to check for both - * hostfile and machine file. - * We can only deal with one hostfile per app context, otherwise give an error. - */ - found = false; - if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "hostfile"))) { - if (1 < j) { - opal_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, "prun", NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(orte_cmd_line, "hostfile", 0, 0); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_HOSTFILE); - val->type = OPAL_STRING; - val->data.string = value; - opal_list_append(&app->info, &val->super); - found = true; - } - } - if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "machinefile"))) { - if (1 < j || found) { - opal_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, "prun", NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(orte_cmd_line, "machinefile", 0, 0); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_HOSTFILE); - val->type = OPAL_STRING; - val->data.string = value; - opal_list_append(&app->info, &val->super); - } - } - - /* Did the user specify any hosts? */ - if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "host"))) { - char **targ=NULL, *tval; - for (i = 0; i < j; ++i) { - value = opal_cmd_line_get_param(orte_cmd_line, "host", i, 0); - opal_argv_append_nosize(&targ, value); - } - tval = opal_argv_join(targ, ','); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_HOST); - val->type = OPAL_STRING; - val->data.string = tval; - opal_list_append(&app->info, &val->super); - } - - /* check for bozo error */ - if (0 > orte_cmd_options.num_procs) { - opal_show_help("help-orterun.txt", "orterun:negative-nprocs", - true, "prun", app->argv[0], - orte_cmd_options.num_procs, NULL); - return ORTE_ERR_FATAL; - } - - app->maxprocs = orte_cmd_options.num_procs; - - /* see if we need to preload the binary to - * find the app - don't do this for java apps, however, as we - * can't easily find the class on the cmd line. Java apps have to - * preload their binary via the preload_files option - */ - if (NULL == strstr(app->argv[0], "java")) { - if (orte_cmd_options.preload_binaries) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_SET_SESSION_CWD); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&app->info, &val->super); - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PRELOAD_BIN); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&app->info, &val->super); - } - } - if (NULL != orte_cmd_options.preload_files) { - val = OBJ_NEW(opal_value_t); - val->key = strdup(OPAL_PMIX_PRELOAD_FILES); - val->type = OPAL_BOOL; - val->data.flag = true; - opal_list_append(&app->info, &val->super); - } - - /* Do not try to find argv[0] here -- the starter is responsible - for that because it may not be relevant to try to find it on - the node where orterun is executing. So just strdup() argv[0] - into app. */ - - app->cmd = strdup(app->argv[0]); - if (NULL == app->cmd) { - opal_show_help("help-orterun.txt", "orterun:call-failed", - true, "prun", "library", "strdup returned NULL", errno); - rc = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - - /* if this is a Java application, we have a bit more work to do. Such - * applications actually need to be run under the Java virtual machine - * and the "java" command will start the "executable". So we need to ensure - * that all the proper java-specific paths are provided - */ - appname = opal_basename(app->cmd); - if (0 == strcmp(appname, "java")) { - /* see if we were given a library path */ - found = false; - for (i=1; NULL != app->argv[i]; i++) { - if (NULL != strstr(app->argv[i], "java.library.path")) { - char *dptr; - /* find the '=' that delineates the option from the path */ - if (NULL == (dptr = strchr(app->argv[i], '='))) { - /* that's just wrong */ - rc = ORTE_ERR_BAD_PARAM; - goto cleanup; - } - /* step over the '=' */ - ++dptr; - /* yep - but does it include the path to the mpi libs? */ - found = true; - if (NULL == strstr(app->argv[i], opal_install_dirs.libdir)) { - /* doesn't appear to - add it to be safe */ - if (':' == app->argv[i][strlen(app->argv[i]-1)]) { - asprintf(&value, "-Djava.library.path=%s%s", dptr, opal_install_dirs.libdir); - } else { - asprintf(&value, "-Djava.library.path=%s:%s", dptr, opal_install_dirs.libdir); - } - free(app->argv[i]); - app->argv[i] = value; - } - break; - } - } - if (!found) { - /* need to add it right after the java command */ - asprintf(&value, "-Djava.library.path=%s", opal_install_dirs.libdir); - opal_argv_insert_element(&app->argv, 1, value); - free(value); - } - - /* see if we were given a class path */ - found = false; - for (i=1; NULL != app->argv[i]; i++) { - if (NULL != strstr(app->argv[i], "cp") || - NULL != strstr(app->argv[i], "classpath")) { - /* yep - but does it include the path to the mpi libs? */ - found = true; - /* check if mpi.jar exists - if so, add it */ - value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, i+1, "mpi.jar"); - } - free(value); - /* check for oshmem support */ - value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, i+1, "shmem.jar"); - } - free(value); - /* always add the local directory */ - asprintf(&value, "%s:%s", app->cwd, app->argv[i+1]); - free(app->argv[i+1]); - app->argv[i+1] = value; - break; - } - } - if (!found) { - /* check to see if CLASSPATH is in the environment */ - found = false; // just to be pedantic - for (i=0; NULL != environ[i]; i++) { - if (0 == strncmp(environ[i], "CLASSPATH", strlen("CLASSPATH"))) { - value = strchr(environ[i], '='); - ++value; /* step over the = */ - opal_argv_insert_element(&app->argv, 1, value); - /* check for mpi.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, 1, "mpi.jar"); - } - free(value); - /* check for shmem.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); - if (access(value, F_OK ) != -1) { - set_classpath_jar_file(app, 1, "shmem.jar"); - } - free(value); - /* always add the local directory */ - (void)asprintf(&value, "%s:%s", app->cwd, app->argv[1]); - free(app->argv[1]); - app->argv[1] = value; - opal_argv_insert_element(&app->argv, 1, "-cp"); - found = true; - break; - } - } - if (!found) { - /* need to add it right after the java command - have - * to include the working directory and trust that - * the user set cwd if necessary - */ - char *str, *str2; - /* always start with the working directory */ - str = strdup(app->cwd); - /* check for mpi.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL); - if (access(value, F_OK ) != -1) { - (void)asprintf(&str2, "%s:%s", str, value); - free(str); - str = str2; - } - free(value); - /* check for shmem.jar */ - value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL); - if (access(value, F_OK ) != -1) { - asprintf(&str2, "%s:%s", str, value); - free(str); - str = str2; - } - free(value); - opal_argv_insert_element(&app->argv, 1, str); - free(str); - opal_argv_insert_element(&app->argv, 1, "-cp"); - } - } - /* try to find the actual command - may not be perfect */ - for (i=1; i < opal_argv_count(app->argv); i++) { - if (NULL != strstr(app->argv[i], "java.library.path")) { - continue; - } else if (NULL != strstr(app->argv[i], "cp") || - NULL != strstr(app->argv[i], "classpath")) { - /* skip the next field */ - i++; - continue; - } - /* declare this the winner */ - opal_setenv("OMPI_COMMAND", app->argv[i], true, &app->env); - /* collect everything else as the cmd line */ - if ((i+1) < opal_argv_count(app->argv)) { - value = opal_argv_join(&app->argv[i+1], ' '); - opal_setenv("OMPI_ARGV", value, true, &app->env); - free(value); - } - break; - } - } else { - /* add the cmd to the environment for MPI_Info to pickup */ - opal_setenv("OMPI_COMMAND", appname, true, &app->env); - if (1 < opal_argv_count(app->argv)) { - value = opal_argv_join(&app->argv[1], ' '); - opal_setenv("OMPI_ARGV", value, true, &app->env); - free(value); - } - } - - *app_ptr = app; - app = NULL; - *made_app = true; - - /* All done */ - - cleanup: - if (NULL != app) { - OBJ_RELEASE(app); - } - if (NULL != appname) { - free(appname); - } - return rc; -} - -static void set_classpath_jar_file(opal_pmix_app_t *app, int index, char *jarfile) -{ - if (NULL == strstr(app->argv[index], jarfile)) { - /* nope - need to add it */ - char *fmt = ':' == app->argv[index][strlen(app->argv[index]-1)] - ? "%s%s/%s" : "%s:%s/%s"; - char *str; - asprintf(&str, fmt, app->argv[index], opal_install_dirs.libdir, jarfile); - free(app->argv[index]); - app->argv[index] = str; - } -} diff --git a/orte/tools/ompi-prun/prun.h b/orte/tools/ompi-prun/prun.h deleted file mode 100644 index eb86cc6d003..00000000000 --- a/orte/tools/ompi-prun/prun.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef PRUN_H -#define PRUN_H - -#include "orte_config.h" - -BEGIN_C_DECLS - -/** - * Main body of prun functionality - */ -int prun(int argc, char *argv[]); - -END_C_DECLS - -#endif /* ORTERUN_ORTERUN_H */ diff --git a/orte/tools/orte-dvm/Makefile.am b/orte/tools/orte-dvm/Makefile.am deleted file mode 100644 index 3723b846cd0..00000000000 --- a/orte/tools/orte-dvm/Makefile.am +++ /dev/null @@ -1,57 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This is not quite in the Automake spirit, but we have to do it. -# Since the totalview portion of the library must be built with -g, we -# must eliminate the CFLAGS that are passed in here by default (which -# may already have debugging and/or optimization flags). We use -# post-processed forms of the CFLAGS in the library targets down -# below. - -CFLAGS = $(CFLAGS_WITHOUT_OPTFLAGS) $(DEBUGGER_CFLAGS) - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = orte-dvm.1 -EXTRA_DIST = $(man_pages:.1=.1in) - -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = orte-dvm - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -endif # OPAL_INSTALL_BINARIES - -orte_dvm_SOURCES = \ - orte-dvm.c - -orte_dvm_LDADD = \ - $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -distclean-local: - rm -f $(man_pages) diff --git a/orte/tools/orte-dvm/orte-dvm.1in b/orte/tools/orte-dvm/orte-dvm.1in deleted file mode 100644 index d4d74df9136..00000000000 --- a/orte/tools/orte-dvm/orte-dvm.1in +++ /dev/null @@ -1,193 +0,0 @@ -.\” -*- nroff -*- -.\" Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\” Copyright (c) 2015 Intel, Inc. All rights reserved -.\" $COPYRIGHT$ -.\" -.\" Man page for ORTE's orte-dvm command -.\" -.\" .TH name section center-footer left-footer center-header -.TH ORTE-DVM 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -orte-dvm, ompi_dvm \- Establish a Distributed Virtual Machine (DVM). - -.B Note: -\fIorte-dvm\fP and \fIompi-dvm\fP are synonyms for each -other. Using either of the names will produce the same behavior. -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.PP -.B orte-dvm -[ options ] -.P - -Invoking \fIorte-dvm\fP via an absolute path -name is equivalent to specifying the \fI--prefix\fP option with a -\fI\fR value equivalent to the directory where \fIorte-dvm\fR -resides, minus its last subdirectory. For example: - - \fB%\fP /usr/local/bin/orte-dvm ... - -is equivalent to - - \fB%\fP orte-dvm --prefix /usr/local - -. -.\" ************************** -.\" Quick Summary Section -.\" ************************** -.SH QUICK SUMMARY -. -\fIorte-dvm\fP will establish a DVM that can be used to execute subsequent -applications. Use of \fIorte-dvm\fP can be advantageous, for example, when you want to -execute a number of short-lived tasks. In such cases, the time required to start -the ORTE DVM can be a significant fraction of the time to execute the -overall application. Thus, creating a persistent DVM can speed the overall -execution. In addition, a persistent DVM will support executing multiple parallel -applications while maintaining separation between their respective cores. -.\" ************************** -.\" Options Section -.\" ************************** -.SH OPTIONS -. -.\" -.\" Start options listing -.\" Indent 10 characters from start of first column to start of second column -. -.TP -.B -h\fR,\fP --help -Display help for this command -. -. -.TP -.B -V\fR,\fP --version -Print version number. If no other arguments are given, this will also -cause orte-dvm to exit. -. -. -.P -Use one of the following options to specify which hosts (nodes) of the cluster to use -for the DVM. -. -. -.TP -.B -H\fR,\fP -host\fR,\fP --host \fR\fP -List of hosts for the DVM. -. -. -.TP -.B --hostfile\fR,\fP --hostfile \fR\fP -Provide a hostfile to use. -. -. -.TP -.B -machinefile\fR,\fP --machinefile \fR\fP -Synonym for \fI-hostfile\fP. -. -. -.TP -.B --prefix \fR\fP -Prefix directory that will be used to set the \fIPATH\fR and -\fILD_LIBRARY_PATH\fR on the remote node before invoking the ORTE daemon. -. -. -..P -Setting MCA parameters: -. -. -.TP -.B -gmca\fR,\fP --gmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -mca\fR,\fP --mca -Send arguments to various MCA modules. See the "MCA" section, below. -. -. -. -. -.TP -.B -report-uri\fR,\fP --report-uri -Print out orte-dvm's URI during startup. The channel must be either a '-' to indicate that -the URI is to be output to stdout, a '+' to indicate that the URI is to be output to stderr, -or a filename to which the URI is to be written. -. -. -.P -The following options are useful for developers; they are not generally -useful to most ORTE and/or MPI users: -. -.TP -.B -d\fR,\fP --debug-devel -Enable debugging of the ORTE layer. -. -. -.TP -.B --debug-daemons-file -Enable debugging of the ORTE daemons in the DVM, storing -output in files. -. -. -.P -There may be other options listed with \fIorte-dvm --help\fP. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -\fIorte-dvm\fP starts a Distributed Virtual Machine (DVM) by launching -a daemon on each node of the allocation, as modified or specified by -the \fI-host\fP and \fI-hostfile\fP options. Applications can subsequently -be executed using the \fIorte-submit\fP command. -. -The DVM remains in operation until receiving the \fIorte-submit -terminate\fP -command. -. -. -. -.SS Specifying Host Nodes -. -Host nodes can be identified on the \fIorte-dvm\fP command line with the \fI-host\fP -option or in a hostfile. -. -.PP -For example, -. -.TP 4 -orte-dvm -H aa,aa,bb ./a.out -launches two processes on node aa and one on bb. -. -.PP -Or, consider the hostfile -. - - \fB%\fP cat myhostfile - aa slots=2 - bb slots=2 - cc slots=2 - -. -.PP -Here, we list both the host names (aa, bb, and cc) but also how many "slots" -there are for each. Slots indicate how many processes can potentially execute -on a node. For best performance, the number of slots may be chosen to be the -number of cores on the node or the number of processor sockets. If the hostfile -does not provide slots information, a default of 1 is assumed. -When running under resource managers (e.g., SLURM, Torque, etc.), -Open MPI will obtain both the hostnames and the number of slots directly -from the resource manger. -. -. diff --git a/orte/tools/orte-dvm/orte-dvm.c b/orte/tools/orte-dvm/orte-dvm.c deleted file mode 100644 index 522c539af33..00000000000 --- a/orte/tools/orte-dvm/orte-dvm.c +++ /dev/null @@ -1,482 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#include -#include -#ifdef HAVE_STRINGS_H -#include -#endif /* HAVE_STRINGS_H */ -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_PARAM_H -#include -#endif -#include -#include -#include -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_WAIT_H -#include -#endif /* HAVE_SYS_WAIT_H */ -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ -#include -#ifdef HAVE_SYS_STAT_H -#include -#endif - -#include "opal/mca/event/event.h" -#include "opal/mca/installdirs/installdirs.h" -#include "opal/mca/base/base.h" -#include "opal/mca/pmix/pmix.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/util/basename.h" -#include "opal/util/cmd_line.h" -#include "opal/util/opal_environ.h" -#include "opal/util/opal_getcwd.h" -#include "opal/util/show_help.h" -#include "opal/util/fd.h" -#include "opal/util/daemon_init.h" - -#include "opal/version.h" -#include "opal/runtime/opal.h" -#include "opal/runtime/opal_info_support.h" -#include "opal/util/os_path.h" -#include "opal/util/path.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/grpcomm/grpcomm.h" -#include "orte/mca/odls/odls.h" -#include "orte/mca/oob/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/state/state.h" - -#include "orte/runtime/runtime.h" -#include "orte/runtime/orte_globals.h" -#include "orte/util/show_help.h" -#include "orte/util/threads.h" - -#include "orte/orted/orted.h" - -/* - * Globals - */ -static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT; - -/* - * Globals - */ -static struct { - bool help; - bool version; - char *prefix; - bool run_as_root; - bool set_sid; - bool daemonize; - bool system_server; - char *report_uri; - bool remote_connections; -} myglobals; - -static opal_cmd_line_init_t cmd_line_init[] = { - /* Various "obvious" options */ - { NULL, 'h', NULL, "help", 0, - &myglobals.help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - { NULL, 'V', NULL, "version", 0, - &myglobals.version, OPAL_CMD_LINE_TYPE_BOOL, - "Print version and exit" }, - - { NULL, '\0', "prefix", "prefix", 1, - &myglobals.prefix, OPAL_CMD_LINE_TYPE_STRING, - "Prefix to be used to look for ORTE executables" }, - - { "orte_daemonize", '\0', NULL, "daemonize", 0, - &myglobals.daemonize, OPAL_CMD_LINE_TYPE_BOOL, - "Daemonize the orte-dvm into the background" }, - - { NULL, '\0', NULL, "set-sid", 0, - &myglobals.set_sid, OPAL_CMD_LINE_TYPE_BOOL, - "Direct the orte-dvm to separate from the current session"}, - - { "orte_debug_daemons", '\0', "debug-daemons", "debug-daemons", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Debug daemons" }, - - { "orte_debug", 'd', "debug-devel", "debug-devel", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Enable debugging of OpenRTE" }, - - { NULL, '\0', "allow-run-as-root", "allow-run-as-root", 0, - &myglobals.run_as_root, OPAL_CMD_LINE_TYPE_BOOL, - "Allow execution as root (STRONGLY DISCOURAGED)" }, - - /* Specify the launch agent to be used */ - { "orte_launch_agent", '\0', "launch-agent", "launch-agent", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Command used to start processes on remote nodes (default: orted)" }, - - /* maximum size of VM - typically used to subdivide an allocation */ - { "orte_max_vm_size", '\0', "max-vm-size", "max-vm-size", 1, - NULL, OPAL_CMD_LINE_TYPE_INT, - "Maximum size of VM" }, - - /* Set a hostfile */ - { NULL, '\0', "hostfile", "hostfile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile" }, - { NULL, '\0', "machinefile", "machinefile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile" }, - { "orte_default_hostfile", '\0', "default-hostfile", "default-hostfile", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a default hostfile" }, - - { NULL, 'H', "host", "host", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "List of hosts to invoke processes on" }, - - { NULL, '\0', "system-server", "system-server", 0, - &myglobals.system_server, OPAL_CMD_LINE_TYPE_BOOL, - "Provide a system-level server connection point - only one allowed per node" }, - - { NULL, '\0', "report-uri", "report-uri", 1, - &myglobals.report_uri, OPAL_CMD_LINE_TYPE_STRING, - "Printout URI on stdout [-], stderr [+], or a file [anything else]", - OPAL_CMD_LINE_OTYPE_DEBUG }, - - { NULL, '\0', "remote-tools", "remote-tools", 0, - &myglobals.remote_connections, OPAL_CMD_LINE_TYPE_BOOL, - "Enable connections from remote tools" }, - - /* End of list */ - { NULL, '\0', NULL, NULL, 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } -}; - -int main(int argc, char *argv[]) -{ - int rc, i, j; - opal_cmd_line_t cmd_line; - char *param, *value; - orte_job_t *jdata=NULL; - orte_app_context_t *app; - - /* Setup and parse the command line */ - memset(&myglobals, 0, sizeof(myglobals)); - /* find our basename (the name of the executable) so that we can - use it in pretty-print error messages */ - orte_basename = opal_basename(argv[0]); - - opal_cmd_line_create(&cmd_line, cmd_line_init); - mca_base_cmd_line_setup(&cmd_line); - if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(&cmd_line, true, false, - argc, argv)) ) { - if (OPAL_ERR_SILENT != rc) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(rc)); - } - return rc; - } - - /* print version if requested. Do this before check for help so - that --version --help works as one might expect. */ - if (myglobals.version) { - char *str; - str = opal_info_make_version_str("all", - OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION, - OPAL_GREEK_VERSION, - OPAL_REPO_REV); - if (NULL != str) { - fprintf(stdout, "%s %s\n\nReport bugs to %s\n", - orte_basename, str, PACKAGE_BUGREPORT); - free(str); - } - exit(0); - } - - /* check if we are running as root - if we are, then only allow - * us to proceed if the allow-run-as-root flag was given. Otherwise, - * exit with a giant warning flag - */ - if (0 == geteuid() && !myglobals.run_as_root) { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "--------------------------------------------------------------------------\n"); - if (myglobals.help) { - fprintf(stderr, "%s cannot provide the help message when run as root.\n\n", orte_basename); - } else { - fprintf(stderr, "%s has detected an attempt to run as root.\n\n", orte_basename); - } - - fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n"); - fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); - fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); - - fprintf(stderr, "We strongly suggest that you run %s as a non-root user.\n\n", orte_basename); - - fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your command line. However, we reiterate our strong advice\n"); - fprintf(stderr, "against doing so - please do so at your own risk.\n"); - fprintf(stderr, "--------------------------------------------------------------------------\n"); - exit(1); - } - - /* - * Since this process can now handle MCA/GMCA parameters, make sure to - * process them. - * NOTE: It is "safe" to call mca_base_cmd_line_process_args() before - * opal_init_util() since mca_base_cmd_line_process_args() does *not* - * depend upon opal_init_util() functionality. - */ - if (OPAL_SUCCESS != mca_base_cmd_line_process_args(&cmd_line, &environ, &environ)) { - exit(1); - } - - /* Need to initialize OPAL so that install_dirs are filled in */ - if (OPAL_SUCCESS != opal_init(&argc, &argv)) { - exit(1); - } - - /* Check for help request */ - if (myglobals.help) { - char *str, *args = NULL; - char *project_name = NULL; - if (0 == strcmp(orte_basename, "mpirun")) { - project_name = "Open MPI"; - } else { - project_name = "OpenRTE"; - } - args = opal_cmd_line_get_usage_msg(&cmd_line); - str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, - orte_basename, project_name, OPAL_VERSION, - orte_basename, args, - PACKAGE_BUGREPORT); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - - /* If someone asks for help, that should be all we do */ - exit(0); - } - - if (myglobals.system_server) { - /* we should act as system-level PMIx server */ - opal_setenv(OPAL_MCA_PREFIX"pmix_system_server", "1", true, &environ); - } - /* always act as session-level PMIx server */ - opal_setenv(OPAL_MCA_PREFIX"pmix_session_server", "1", true, &environ); - /* if we were asked to report a uri, set the MCA param to do so */ - if (NULL != myglobals.report_uri) { - opal_setenv("PMIX_MCA_ptl_tcp_report_uri", myglobals.report_uri, true, &environ); - } - if (myglobals.remote_connections) { - opal_setenv("PMIX_MCA_ptl_tcp_remote_connections", "1", true, &environ); - } - - /* Setup MCA params */ - orte_register_params(); - - /* save the environment for launch purposes. This MUST be - * done so that we can pass it to any local procs we - * spawn - otherwise, those local procs won't see any - * non-MCA envars were set in the enviro prior to calling - * orterun - */ - orte_launch_environ = opal_argv_copy(environ); - -#if defined(HAVE_SETSID) - /* see if we were directed to separate from current session */ - if (myglobals.set_sid) { - setsid(); - } -#endif - - /* detach from controlling terminal - * otherwise, remain attached so output can get to us - */ - if(!orte_debug_flag && - !orte_debug_daemons_flag && - myglobals.daemonize) { - opal_daemon_init(NULL); - } - - /* Intialize our Open RTE environment */ - if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_MASTER))) { - /* cannot call ORTE_ERROR_LOG as it could be the errmgr - * never got loaded! - */ - return rc; - } - /* finalize OPAL. As it was opened again from orte_init->opal_init - * we continue to have a reference count on it. So we have to finalize it twice... - */ - opal_finalize(); - - /* get the daemon job object - was created by ess/hnp component */ - if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { - orte_show_help("help-orterun.txt", "bad-job-object", true, - orte_basename); - exit(0); - } - /* also should have created a daemon "app" */ - if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) { - orte_show_help("help-orterun.txt", "bad-app-object", true, - orte_basename); - exit(0); - } - - /* Did the user specify a prefix, or want prefix by default? */ - if (opal_cmd_line_is_taken(&cmd_line, "prefix") || want_prefix_by_default) { - size_t param_len; - /* if both the prefix was given and we have a prefix - * given above, check to see if they match - */ - if (opal_cmd_line_is_taken(&cmd_line, "prefix") && - NULL != myglobals.prefix) { - /* if they don't match, then that merits a warning */ - param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); - /* ensure we strip any trailing '/' */ - if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) { - param[strlen(param)-1] = '\0'; - } - value = strdup(myglobals.prefix); - if (0 == strcmp(OPAL_PATH_SEP, &(value[strlen(value)-1]))) { - value[strlen(value)-1] = '\0'; - } - if (0 != strcmp(param, value)) { - orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict", - true, orte_basename, value, param); - /* let the global-level prefix take precedence since we - * know that one is being used - */ - free(param); - param = strdup(myglobals.prefix); - } - free(value); - } else if (NULL != myglobals.prefix) { - param = myglobals.prefix; - } else if (opal_cmd_line_is_taken(&cmd_line, "prefix")){ - /* must be --prefix alone */ - param = strdup(opal_cmd_line_get_param(&cmd_line, "prefix", 0, 0)); - } else { - /* --enable-orterun-prefix-default was given to orterun */ - param = strdup(opal_install_dirs.prefix); - } - - if (NULL != param) { - /* "Parse" the param, aka remove superfluous path_sep. */ - param_len = strlen(param); - while (0 == strcmp (OPAL_PATH_SEP, &(param[param_len-1]))) { - param[param_len-1] = '\0'; - param_len--; - if (0 == param_len) { - orte_show_help("help-orterun.txt", "orterun:empty-prefix", - true, orte_basename, orte_basename); - return ORTE_ERR_FATAL; - } - } - orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, param, OPAL_STRING); - free(param); - } - } - - /* Did the user specify a hostfile. Need to check for both - * hostfile and machine file. - * We can only deal with one hostfile per app context, otherwise give an error. - */ - if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) { - if(1 < j) { - orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, orte_basename, NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0); - orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING); - } - } - if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) { - if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) { - orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, orte_basename, NULL); - return ORTE_ERR_FATAL; - } else { - value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0); - orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, value, OPAL_STRING); - } - } - - /* Did the user specify any hosts? */ - if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "host"))) { - char **targ=NULL, *tval; - for (i = 0; i < j; ++i) { - value = opal_cmd_line_get_param(&cmd_line, "host", i, 0); - opal_argv_append_nosize(&targ, value); - } - tval = opal_argv_join(targ, ','); - orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, tval, OPAL_STRING); - opal_argv_free(targ); - free(tval); - } - OBJ_DESTRUCT(&cmd_line); - - /* setup to listen for commands sent specifically to me, even though I would probably - * be the one sending them! Unfortunately, since I am a participating daemon, - * there are times I need to send a command to "all daemons", and that means *I* have - * to receive it too - */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON, - ORTE_RML_PERSISTENT, orte_daemon_recv, NULL); - - /* spawn the DVM - we skip the initial steps as this - * isn't a user-level application */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOCATE); - - /* loop the event lib until an exit event is detected */ - while (orte_event_base_active) { - opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); - } - ORTE_ACQUIRE_OBJECT(orte_event_base_active); - - /* cleanup and leave */ - orte_finalize(); - - if (orte_debug_flag) { - fprintf(stderr, "exiting with status %d\n", orte_exit_status); - } - exit(orte_exit_status); -} diff --git a/orte/tools/orte-ps/Makefile.am b/orte/tools/orte-ps/Makefile.am deleted file mode 100644 index 758ea925097..00000000000 --- a/orte/tools/orte-ps/Makefile.am +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = orte-ps.1 -EXTRA_DIST = $(man_pages:.1=.1in) - -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = orte-ps - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -dist_ortedata_DATA = help-orte-ps.txt - -endif # OPAL_INSTALL_BINARIES - -orte_ps_SOURCES = orte-ps.c -orte_ps_LDADD = \ - $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -distclean-local: - rm -f $(man_pages) diff --git a/orte/tools/orte-ps/help-orte-ps.txt b/orte/tools/orte-ps/help-orte-ps.txt deleted file mode 100644 index 875f7cd1b3f..00000000000 --- a/orte/tools/orte-ps/help-orte-ps.txt +++ /dev/null @@ -1,46 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI PS tool -# -[usage] -ompi-ps [OPTIONS] - Open MPI Job and Process Status Tool - -%s -# -[vpid-usage] -Error: You specified a vpid (%d) without also specifying a jobid. - Use the '-j' option to specify a jobid. -# -[need-vpid] -Error: You specified a jobid (%d) without also specifying a vpid. - Use the '-p' option to specify a vpid. -# -[invalid-vpid] -Error: The specified vpid (%d) is not valid for job %d. -# -[stale-hnp] -An attempt was made to obtain ps information from at least -one non-responsive HNP: - -HNP name: %s - -You may want to cleanup stale session directories in your temporary -directory (e.g., $TMPDIR). diff --git a/orte/tools/orte-ps/orte-ps.1in b/orte/tools/orte-ps/orte-ps.1in deleted file mode 100644 index aa6d3cb7cd3..00000000000 --- a/orte/tools/orte-ps/orte-ps.1in +++ /dev/null @@ -1,101 +0,0 @@ -.\" -.\" Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -.\" University Research and Technology -.\" Corporation. All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\" -.\" Man page for OMPI's ompi-ps command -.\" -.\" .TH name section center-footer left-footer center-header -.TH OMPI-PS 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -ompi-ps, orte-ps \- Displays information about the active jobs and processes -in Open MPI. -. -.PP -. -\fBNOTE:\fP \fIompi-ps\fP, and \fIorte-ps\fP are exact -synonyms for each other. Using any of the names will result in exactly -identical behavior. -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.B ompi-ps -.B [ options ] -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH Options -. -\fIompi-ps\fR will display information about running job(s) in the current -universe. -. -.TP 10 -.B -h | --help -Display help for this command -. -. -.TP -.B -v | --verbose -Enable verbose output for debugging -. -. -.TP -.B --daemons -Display daemon job information. -. -. -.TP -.B -j | --jobid -Display the state of a specific job in the universe. By default all jobs will -be displayed. -. -. -.TP -.B -p | --vpid -Display the state of a specific vpid (process) in the universe. By default all -vpids cooresponding to processes will be displayed. Must be used in conjunction -with the \fB--jobid\fP option. -. -. -.TP -.B -n | --nodes -Display all of the allocated nodes, and their cooresponding states. By default -this is disabled. -. -. -.TP -.B -gmca | --gmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -mca | --mca -Send arguments to various MCA modules. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -.PP -\fIompi-ps\fR displays the state of jobs running inside an Open RTE universe. -. -. -.\" ************************** -.\" See Also Section -.\" ************************** -. -.SH SEE ALSO -orterun(1), orte-clean(1) -. diff --git a/orte/tools/orte-ps/orte-ps.c b/orte/tools/orte-ps/orte-ps.c deleted file mode 100644 index 4f444ad0125..00000000000 --- a/orte/tools/orte-ps/orte-ps.c +++ /dev/null @@ -1,985 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @fie - * ORTE PS command - * - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_WAIT_H -#include -#endif /* HAVE_SYS_WAIT_H */ -#include -#ifdef HAVE_DIRENT_H -#include -#endif /* HAVE_DIRENT_H */ - -#include "opal/util/basename.h" -#include "opal/util/cmd_line.h" -#include "opal/util/output.h" -#include "opal/util/opal_environ.h" -#include "opal/util/show_help.h" -#include "opal/mca/base/base.h" -#include "opal/runtime/opal.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif - -#include "orte/runtime/runtime.h" -#include "orte/util/error_strings.h" -#include "orte/util/hnp_contact.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/util/comm/comm.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#endif -#include "orte/runtime/orte_globals.h" - -struct orte_ps_mpirun_info_t { - /** This is an object, so it must have a super */ - opal_list_item_t super; - - /* HNP info */ - orte_hnp_contact_t *hnp; - - /* array of jobs */ - orte_std_cntr_t num_jobs; - orte_job_t **jobs; - - /* array of nodes */ - orte_std_cntr_t num_nodes; - orte_node_t **nodes; -}; -typedef struct orte_ps_mpirun_info_t orte_ps_mpirun_info_t; - -static void orte_ps_mpirun_info_construct(orte_ps_mpirun_info_t *ptr) -{ - ptr->hnp = NULL; - ptr->num_jobs = 0; - ptr->jobs = NULL; - ptr->num_nodes = 0; - ptr->nodes = NULL; -} -static void orte_ps_mpirun_info_destruct(orte_ps_mpirun_info_t *ptr) -{ - orte_std_cntr_t i; - - if (NULL != ptr->hnp) OBJ_RELEASE(ptr->hnp); - if (NULL != ptr->jobs) { - for (i=0; i < ptr->num_jobs; i++) { - OBJ_RELEASE(ptr->jobs[i]); - } - free(ptr->jobs); - } - if (NULL != ptr->nodes) { - for (i=0; i < ptr->num_nodes; i++) { - OBJ_RELEASE(ptr->nodes[i]); - } - free(ptr->nodes); - } -} - -OBJ_CLASS_INSTANCE(orte_ps_mpirun_info_t, - opal_list_item_t, - orte_ps_mpirun_info_construct, - orte_ps_mpirun_info_destruct); - -/****************** - * Local Functions - ******************/ -static int orte_ps_init(int argc, char *argv[]); -static int parse_args(int argc, char *argv[]); - -static int gather_information(orte_ps_mpirun_info_t *hnpinfo); -static int gather_active_jobs(orte_ps_mpirun_info_t *hnpinfo); -static int gather_nodes(orte_ps_mpirun_info_t *hnpinfo); -static int gather_vpid_info(orte_ps_mpirun_info_t *hnpinfo); - -static int pretty_print(orte_ps_mpirun_info_t *hnpinfo); -static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes); -static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs); -static int pretty_print_vpids(orte_job_t *job); -static void pretty_print_dashed_line(int len); - -static char *pretty_node_state(orte_node_state_t state); - -static int parseable_print(orte_ps_mpirun_info_t *hnpinfo); - -/***************************************** - * Global Vars for Command line Arguments - *****************************************/ -typedef struct { - bool help; - bool verbose; - bool parseable; - orte_jobid_t jobid; - bool nodes; - bool daemons; - int output; - pid_t pid; -} orte_ps_globals_t; - -orte_ps_globals_t orte_ps_globals = {0}; - -opal_cmd_line_init_t cmd_line_opts[] = { - { NULL, - 'h', NULL, "help", - 0, - &orte_ps_globals.help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - - { NULL, - 'v', NULL, "verbose", - 0, - &orte_ps_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, - "Be Verbose" }, - - { NULL, - '\0', NULL, "parseable", - 0, - &orte_ps_globals.parseable, OPAL_CMD_LINE_TYPE_BOOL, - "Provide parseable output" }, - - { NULL, - '\0', NULL, "daemons", - 0, - &orte_ps_globals.daemons, OPAL_CMD_LINE_TYPE_INT, - "Display daemon job information" }, - - { NULL, - 'j', NULL, "jobid", - 1, - &orte_ps_globals.jobid, OPAL_CMD_LINE_TYPE_INT, - "Specify a local jobid for the given mpirun - a value from 0 to N" }, - - { NULL, - 'p', NULL, "pid", - 1, - &orte_ps_globals.pid, OPAL_CMD_LINE_TYPE_INT, - "Specify mpirun pid" }, - - { NULL, - 'n', NULL, "nodes", - 0, - &orte_ps_globals.nodes, OPAL_CMD_LINE_TYPE_INT, - "Display Node Information" }, - - /* End of list */ - { NULL, - '\0', NULL, NULL, - 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, - NULL } -}; - -int -main(int argc, char *argv[]) -{ - int ret, exit_status = ORTE_SUCCESS; - opal_list_t hnp_list; - opal_list_item_t* item = NULL; - orte_ps_mpirun_info_t hnpinfo; - bool reported = false; - - /*************** - * Initialize - ***************/ - OBJ_CONSTRUCT(&hnp_list, opal_list_t); - - if (ORTE_SUCCESS != (ret = orte_ps_init(argc, argv))) { - exit_status = ret; - goto cleanup; - } - - /* - * Get the directory listing - */ - opal_output_verbose(10, orte_ps_globals.output, - "orte_ps: Acquiring list of HNPs and setting contact info into RML...\n"); - - if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) { - exit_status = ret; - goto cleanup; - } - - opal_output_verbose(10, orte_ps_globals.output, - "orte_ps: Found %d HNPs\n", - (int)opal_list_get_size(&hnp_list)); - - /* - * For each hnp in the listing - */ - while (NULL != (item = opal_list_remove_first(&hnp_list))) { - orte_hnp_contact_t *hnp = (orte_hnp_contact_t*)item; - hnpinfo.hnp = hnp; - - opal_output_verbose(10, orte_ps_globals.output, - "orte_ps: Processing HNP %lu\n", - (unsigned long)hnpinfo.hnp->pid); - - if (0 < orte_ps_globals.pid && - hnpinfo.hnp->pid != orte_ps_globals.pid) { - continue; - } - - /* - * Gather the information - */ - opal_output_verbose(10, orte_ps_globals.output, - "orte_ps: Gathering Information for HNP: %s:%d\n", - ORTE_NAME_PRINT(&(hnpinfo.hnp->name)), - hnpinfo.hnp->pid); - - if( ORTE_SUCCESS != (ret = gather_information(&hnpinfo)) ) { - /* this could be due to a stale session directory - if so, - * just skip this entry, but don't abort - */ - if (!reported && ORTE_ERR_SILENT == ret) { - orte_show_help("help-orte-ps.txt", "stale-hnp", true, - ORTE_NAME_PRINT(&(hnpinfo.hnp->name))); - reported = true; - continue; - } - goto cleanup; - } - - /* Print the information */ - if (orte_ps_globals.parseable) { - if (ORTE_SUCCESS != (ret = parseable_print(&hnpinfo))) { - exit_status = ret; - goto cleanup; - } - } else { - if(ORTE_SUCCESS != (ret = pretty_print(&hnpinfo)) ) { - exit_status = ret; - goto cleanup; - } - } - } - - /*************** - * Cleanup - ***************/ - cleanup: - orte_finalize(); - - return exit_status; -} - -static int parse_args(int argc, char *argv[]) { - int ret; - opal_cmd_line_t cmd_line; - orte_ps_globals_t tmp = { false, /* help */ - false, /* verbose */ - false, /* parseable */ - ORTE_JOBID_WILDCARD, /* jobid */ - false, /* nodes */ - false, /* daemons */ - -1, /* output */ - 0}; /* pid */ - - orte_ps_globals = tmp; - - /* Parse the command line options */ - opal_cmd_line_create(&cmd_line, cmd_line_opts); - - mca_base_open(); - mca_base_cmd_line_setup(&cmd_line); - ret = opal_cmd_line_parse(&cmd_line, false, false, argc, argv); - - if (OPAL_SUCCESS != ret) { - if (OPAL_ERR_SILENT != ret) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(ret)); - } - return ret; - } - - /** - * Now start parsing our specific arguments - */ - if (orte_ps_globals.help) { - char *str, *args = NULL; - args = opal_cmd_line_get_usage_msg(&cmd_line); - str = opal_show_help_string("help-orte-ps.txt", "usage", true, - args); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - /* If we show the help message, that should be all we do */ - exit(0); - } - - /* if the jobid is given, then we need a pid */ - if (ORTE_JOBID_WILDCARD != orte_ps_globals.jobid && - 0 == orte_ps_globals.pid) { - orte_show_help("help-orte-ps.txt", "need-vpid", true, - orte_ps_globals.jobid); - return ORTE_ERROR; - } - - return ORTE_SUCCESS; -} - -static int orte_ps_init(int argc, char *argv[]) { - int ret; -#if OPAL_ENABLE_FT_CR == 1 - char * tmp_env_var = NULL; -#endif - - /* - * Make sure to init util before parse_args - * to ensure installdirs is setup properly - * before calling mca_base_open(); - */ - if( ORTE_SUCCESS != (ret = opal_init_util(&argc, &argv)) ) { - return ret; - } - - /* - * Parse Command Line Arguments - */ - if (ORTE_SUCCESS != (ret = parse_args(argc, argv))) { - return ret; - } - - /* - * Setup OPAL Output handle from the verbose argument - */ - if( orte_ps_globals.verbose ) { - orte_ps_globals.output = opal_output_open(NULL); - opal_output_set_verbosity(orte_ps_globals.output, 10); - } else { - orte_ps_globals.output = 0; /* Default=STDERR */ - } - -#if OPAL_ENABLE_FT_CR == 1 - /* Disable the checkpoint notification routine for this - * tool. As we will never need to checkpoint this tool. - * Note: This must happen before opal_init(). - */ - opal_cr_set_enabled(false); - - /* Select the none component, since we don't actually use a checkpointer */ - (void) mca_base_var_env_name("crs", &tmp_env_var); - opal_setenv(tmp_env_var, - "none", - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); - opal_setenv(tmp_env_var, - "1", - true, &environ); - free(tmp_env_var); -#endif - - /* we are never allowed to operate as a distributed tool, - * so insist on the ess/tool component */ - opal_setenv("OMPI_MCA_ess", "tool", true, &environ); - - /*************************** - * We need all of OPAL and the TOOL portion of ORTE - ***************************/ - ret = orte_init(&argc, &argv, ORTE_PROC_TOOL); - - return ret; -} - -static int pretty_print(orte_ps_mpirun_info_t *hnpinfo) { - char *header; - int len_hdr; - - /* - * Print header and remember header length - */ - len_hdr = asprintf(&header, "Information from mpirun %s", ORTE_JOBID_PRINT(hnpinfo->hnp->name.jobid)); - - printf("\n\n%s\n", header); - free(header); - pretty_print_dashed_line(len_hdr); - - /* - * Print Node Information - */ - if( orte_ps_globals.nodes ) - pretty_print_nodes(hnpinfo->nodes, hnpinfo->num_nodes); - - /* - * Print Job Information - */ - pretty_print_jobs(hnpinfo->jobs, hnpinfo->num_jobs); - - return ORTE_SUCCESS; -} - -static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes) { - int line_len; - int len_name = 0, - len_state = 0, - len_slots = 0, - len_slots_i = 0, - len_slots_m = 0; - orte_node_t *node; - orte_std_cntr_t i; - - /* - * Caculate segment lengths - */ - len_name = (int) strlen("Node Name"); - len_state = (int) strlen("State"); - len_slots = (int) strlen("Slots"); - len_slots_i = (int) strlen("Slots In Use"); - len_slots_m = (int) strlen("Slots Max"); - - for(i=0; i < num_nodes; i++) { - node = nodes[i]; - - if( NULL != node->name && - (int)strlen(node->name) > len_name) - len_name = (int) strlen(node->name); - - if( (int)strlen(pretty_node_state(node->state)) > len_state ) - len_state = (int)strlen(pretty_node_state(node->state)); - } - - line_len = (len_name + 3 + - len_state + 3 + - len_slots + 3 + - len_slots_i + 3 + - len_slots_m) + 2; - - /* - * Print the header - */ - printf("%*s | ", len_name, "Node Name"); - printf("%*s | ", len_state, "State"); - printf("%*s | ", len_slots, "Slots"); - printf("%*s | ", len_slots_m, "Slots Max"); - printf("%*s | ", len_slots_i, "Slots In Use"); - printf("\n"); - - pretty_print_dashed_line(line_len); - - /* - * Print Info - */ - for(i=0; i < num_nodes; i++) { - node = nodes[i]; - - printf("%*s | ", len_name, node->name); - printf("%*s | ", len_state, pretty_node_state(node->state)); - printf("%*d | ", len_slots, (uint)node->slots); - printf("%*d | ", len_slots_m, (uint)node->slots_max); - printf("%*d | ", len_slots_i, (uint)node->slots_inuse); - printf("\n"); - - } - - return ORTE_SUCCESS; -} - -static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) { - int len_jobid = 0, - len_state = 0, - len_slots = 0, - len_vpid_r = 0, - len_ckpt_s = 0, - len_ckpt_r = 0, - len_ckpt_l = 0; - int line_len; - orte_job_t *job; - orte_std_cntr_t i; - char *jobstr; - orte_jobid_t mask=0x0000ffff; -#if OPAL_ENABLE_FT_CR == 1 - char * state_str = NULL; - size_t ckpt_state; - char *snap_ref = NULL; - char *snap_loc = NULL; -#endif - - for(i=0; i < num_jobs; i++) { - job = jobs[i]; - - /* check the jobid to see if this is the daemons' job */ - if ((0 == (mask & job->jobid)) && !orte_ps_globals.daemons) { - continue; - } - - /* setup the printed name - do -not- free this! */ - jobstr = ORTE_JOBID_PRINT(job->jobid); - - /* - * Caculate segment lengths - */ - len_jobid = strlen(jobstr);; - len_state = (int) (strlen(orte_job_state_to_str(job->state)) < strlen("State") ? - strlen("State") : - strlen(orte_job_state_to_str(job->state))); - len_slots = 6; - len_vpid_r = (int) strlen("Num Procs"); -#if OPAL_ENABLE_FT_CR == 1 - orte_get_attribute(&job->attributes, ORTE_JOB_CKPT_STATE, (void**)&ckpt_state, OPAL_INT32); - orte_get_attribute(&job->attributes, ORTE_JOB_SNAPSHOT_REF, (void**)&snap_ref, OPAL_STRING); - orte_get_attribute(&job->attributes, ORTE_JOB_SNAPSHOT_LOC, (void**)&snap_loc, OPAL_STRING); - orte_snapc_ckpt_state_str(&state_str, ckpt_state); - len_ckpt_s = (int) (strlen(state_str) < strlen("Ckpt State") ? - strlen("Ckpt State") : - strlen(state_str) ); - len_ckpt_r = (int) (NULL == snap_ref ? strlen("Ckpt Ref") : (strlen(snap_ref) < strlen("Ckpt Ref") ? - strlen("Ckpt Ref") : strlen(snap_ref))); - len_ckpt_l = (int) (NULL == snap_loc ? strlen("Ckpt Loc") : (strlen(snap_loc) < strlen("Ckpt Loc") ? - strlen("Ckpt Loc") : strlen(snap_loc))); -#else - len_ckpt_s = -3; - len_ckpt_r = -3; - len_ckpt_l = -3; -#endif - - line_len = (len_jobid + 3 + - len_state + 3 + - len_slots + 3 + - len_vpid_r + 3 + - len_ckpt_s + 3 + - len_ckpt_r + 3 + - len_ckpt_l) - + 2; - - /* - * Print Header - */ - printf("\n"); - printf("%*s | ", len_jobid , "JobID"); - printf("%*s | ", len_state , "State"); - printf("%*s | ", len_slots , "Slots"); - printf("%*s | ", len_vpid_r , "Num Procs"); -#if OPAL_ENABLE_FT_CR == 1 - printf("%*s | ", len_ckpt_s , "Ckpt State"); - printf("%*s | ", len_ckpt_r , "Ckpt Ref"); - printf("%*s |", len_ckpt_l , "Ckpt Loc"); -#endif - printf("\n"); - - pretty_print_dashed_line(line_len); - - /* - * Print Info - */ - printf("%*s | ", len_jobid , ORTE_JOBID_PRINT(job->jobid)); - printf("%*s | ", len_state , orte_job_state_to_str(job->state)); - printf("%*d | ", len_slots , (uint)job->total_slots_alloc); - printf("%*d | ", len_vpid_r, job->num_procs); -#if OPAL_ENABLE_FT_CR == 1 - printf("%*s | ", len_ckpt_s, state_str); - printf("%*s | ", len_ckpt_r, (NULL == snap_ref ? "" : snap_ref)); - printf("%*s |", len_ckpt_l, (NULL == snap_loc ? "" : snap_loc)); -#endif - printf("\n"); - - - pretty_print_vpids(job); - printf("\n\n"); /* give a little room between job outputs */ - } - - return ORTE_SUCCESS; -} - -static int pretty_print_vpids(orte_job_t *job) { - int len_o_proc_name = 0, - len_proc_name = 0, - len_rank = 0, - len_pid = 0, - len_state = 0, - len_node = 0, - len_ckpt_s = 0, - len_ckpt_r = 0, - len_ckpt_l = 0; - int i, line_len; - orte_vpid_t v; - orte_proc_t *vpid; - orte_app_context_t *app; - char *o_proc_name; -#if OPAL_ENABLE_FT_CR == 1 - char *state_str = NULL; - size_t ckpt_state; - char *snap_ref = NULL; - char *snap_loc = NULL; -#endif - char **nodename = NULL; - - if (0 == job->num_procs) { - return ORTE_SUCCESS; - } - - /* - * Caculate segment lengths - */ - len_o_proc_name = (int)strlen("ORTE Name"); - len_proc_name = (int)strlen("Process Name"); - len_rank = (int)strlen("Local Rank"); - len_pid = 6; - len_state = 0; - len_node = 0; -#if OPAL_ENABLE_FT_CR == 1 - len_ckpt_s = strlen("Ckpt State"); - len_ckpt_r = strlen("Ckpt Ref"); - len_ckpt_l = strlen("Ckpt Loc"); -#else - len_ckpt_s = -3; - len_ckpt_r = -3; - len_ckpt_l = -3; -#endif - - nodename = (char **) malloc(job->num_procs * sizeof(char *)); - for(v=0; v < job->num_procs; v++) { - char *rankstr; - vpid = (orte_proc_t*)job->procs->addr[v]; - - /* - * Find my app context - */ - if( 0 >= (int)job->num_apps ) { - if( 0 == vpid->name.vpid ) { - if( (int)strlen("orterun") > len_proc_name) - len_proc_name = strlen("orterun"); - } - else { - if( (int)strlen("orted") > len_proc_name) - len_proc_name = strlen("orted"); - } - } - for( i = 0; i < (int)job->num_apps; ++i) { - app = (orte_app_context_t*)job->apps->addr[i]; - if( app->idx == vpid->app_idx ) { - if( (int)strlen(app->app) > len_proc_name) - len_proc_name = strlen(app->app); - break; - } - } - - o_proc_name = orte_util_print_name_args(&vpid->name); - if ((int)strlen(o_proc_name) > len_o_proc_name) - len_o_proc_name = strlen(o_proc_name); - - asprintf(&rankstr, "%u", (uint)vpid->local_rank); - if ((int)strlen(rankstr) > len_rank) - len_rank = strlen(rankstr); - free(rankstr); - - nodename[v] = NULL; - if( orte_get_attribute(&vpid->attributes, ORTE_PROC_NODENAME, (void**)&nodename[v], OPAL_STRING) && - (int)strlen(nodename[v]) > len_node) { - len_node = strlen(nodename[v]); - } else if ((int)strlen("Unknown") > len_node) { - len_node = strlen("Unknown"); - } - - if( (int)strlen(orte_proc_state_to_str(vpid->state)) > len_state) - len_state = strlen(orte_proc_state_to_str(vpid->state)); - -#if OPAL_ENABLE_FT_CR == 1 - orte_get_attribute(&vpid->attributes, ORTE_PROC_CKPT_STATE, (void**)&ckpt_state, OPAL_INT32); - orte_get_attribute(&vpid->attributes, ORTE_PROC_SNAPSHOT_REF, (void**)&snap_ref, OPAL_STRING); - orte_get_attribute(&vpid->attributes, ORTE_PROC_SNAPSHOT_LOC, (void**)&snap_loc, OPAL_STRING); - orte_snapc_ckpt_state_str(&state_str, ckpt_state); - if( (int)strlen(state_str) > len_ckpt_s) - len_ckpt_s = strlen(state_str); - - if(NULL != snap_ref && (int)strlen(snap_ref) > len_ckpt_r) - len_ckpt_r = strlen(snap_ref); - - if(NULL != snap_loc && (int)strlen(snap_loc) > len_ckpt_l) - len_ckpt_l = strlen(snap_loc); -#endif - } - - line_len = (len_o_proc_name + 3 + - len_proc_name + 3 + - len_rank + 3 + - len_pid + 3 + - len_state + 3 + - len_node + 3 + - len_ckpt_s + 3 + - len_ckpt_r + 3 + - len_ckpt_l) - + 2; - - /* - * Print Header - */ - printf("\t"); - printf("%*s | ", len_proc_name , "Process Name"); - printf("%*s | ", len_o_proc_name , "ORTE Name"); - printf("%*s | ", len_rank , "Local Rank"); - printf("%*s | ", len_pid , "PID"); - printf("%*s | ", len_node , "Node"); - printf("%*s | ", len_state , "State"); -#if OPAL_ENABLE_FT_CR == 1 - printf("%*s | ", len_ckpt_s , "Ckpt State"); - printf("%*s | ", len_ckpt_r , "Ckpt Ref"); - printf("%*s |", len_ckpt_l , "Ckpt Loc"); -#endif - printf("\n"); - - printf("\t"); - pretty_print_dashed_line(line_len); - - /* - * Print Info - */ - for(v=0; v < job->num_procs; v++) { - vpid = (orte_proc_t*)job->procs->addr[v]; - - printf("\t"); - - if( 0 >= (int)job->num_apps ) { - if( 0 == vpid->name.vpid ) { - printf("%*s | ", len_proc_name, "orterun"); - } else { - printf("%*s | ", len_proc_name, "orted"); - } - } - for( i = 0; i < (int)job->num_apps; ++i) { - app = (orte_app_context_t*)job->apps->addr[i]; - if( app->idx == vpid->app_idx ) { - printf("%*s | ", len_proc_name, app->app); - break; - } - } - - o_proc_name = orte_util_print_name_args(&vpid->name); - - printf("%*s | ", len_o_proc_name, o_proc_name); - printf("%*u | ", len_rank , (uint)vpid->local_rank); - printf("%*d | ", len_pid , vpid->pid); - printf("%*s | ", len_node , (NULL == nodename[v]) ? "Unknown" : nodename[v]); - printf("%*s | ", len_state , orte_proc_state_to_str(vpid->state)); - - if (NULL != nodename[v]) { - free(nodename[v]); - } -#if OPAL_ENABLE_FT_CR == 1 - printf("%*s | ", len_ckpt_s, state_str); - printf("%*s | ", len_ckpt_r, (NULL == snap_ref ? "" : snap_ref)); - printf("%*s |", len_ckpt_l, (NULL == snap_loc ? "" : snap_loc)); -#endif - printf("\n"); - - } - if (NULL != nodename) { - free(nodename); - } - return ORTE_SUCCESS; -} - -static void pretty_print_dashed_line(int len) { - static const char dashes[9] = "--------"; - - while (len >= 8) { - printf("%8.8s", dashes); - len -= 8; - } - printf("%*.*s\n", len, len, dashes); -} - -static int gather_information(orte_ps_mpirun_info_t *hnpinfo) { - int ret; - - if( ORTE_SUCCESS != (ret = gather_active_jobs(hnpinfo) )) { - goto cleanup; - } - - if( ORTE_SUCCESS != (ret = gather_nodes(hnpinfo) )) { - goto cleanup; - } - - if( ORTE_SUCCESS != (ret = gather_vpid_info(hnpinfo) )) { - goto cleanup; - } - - cleanup: - return ret; -} - -static int gather_active_jobs(orte_ps_mpirun_info_t *hnpinfo) { - int ret; - - if (ORTE_SUCCESS != (ret = orte_util_comm_query_job_info(&(hnpinfo->hnp->name), orte_ps_globals.jobid, - &hnpinfo->num_jobs, &hnpinfo->jobs))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -static int gather_nodes(orte_ps_mpirun_info_t *hnpinfo) { - int ret; - - if (ORTE_SUCCESS != (ret = orte_util_comm_query_node_info(&(hnpinfo->hnp->name), NULL, - &hnpinfo->num_nodes, &hnpinfo->nodes))) { - ORTE_ERROR_LOG(ret); - } - opal_output(0, "RECEIVED %d NODES", hnpinfo->num_nodes); - return ret; -} - -static int gather_vpid_info(orte_ps_mpirun_info_t *hnpinfo) { - int ret; - orte_std_cntr_t i; - int cnt; - orte_job_t *job; - orte_proc_t **procs; - - /* - * For each Job in the HNP - */ - for(i=0; i < hnpinfo->num_jobs; i++) { - job = hnpinfo->jobs[i]; - - /* - * Skip getting the vpid's for the HNP, unless asked to do so - * The HNP is always the first in the array - */ - if( 0 == i && !orte_ps_globals.daemons) { - continue; - } - - /* query the HNP for info on the procs in this job */ - if (ORTE_SUCCESS != (ret = orte_util_comm_query_proc_info(&(hnpinfo->hnp->name), - job->jobid, - ORTE_VPID_WILDCARD, - &cnt, - &procs))) { - ORTE_ERROR_LOG(ret); - } - job->procs->addr = (void**)procs; - job->procs->size = cnt; - job->num_procs = cnt; - } - - return ORTE_SUCCESS; -} - -static char *pretty_node_state(orte_node_state_t state) { - switch(state) { - case ORTE_NODE_STATE_DOWN: - return strdup("Down"); - break; - case ORTE_NODE_STATE_UP: - return strdup("Up"); - break; - case ORTE_NODE_STATE_REBOOT: - return strdup("Reboot"); - break; - case ORTE_NODE_STATE_UNKNOWN: - default: - return strdup("Unknown"); - break; - } -} - -static int parseable_print(orte_ps_mpirun_info_t *hnpinfo) -{ - orte_job_t **jobs; - orte_node_t **nodes; - orte_proc_t *proc; - orte_app_context_t *app; - char *appname; - int i, j; - char *nodename; - - /* don't include the daemon job in the number of jobs reported */ - printf("mpirun:%lu:num nodes:%d:num jobs:%d\n", - (unsigned long)hnpinfo->hnp->pid, hnpinfo->num_nodes, hnpinfo->num_jobs-1); - - if (orte_ps_globals.nodes) { - nodes = hnpinfo->nodes; - for (i=0; i < hnpinfo->num_nodes; i++) { - printf("node:%s:state:%s:slots:%d:in use:%d\n", - nodes[i]->name, pretty_node_state(nodes[i]->state), - nodes[i]->slots, nodes[i]->slots_inuse); - } - } - - jobs = hnpinfo->jobs; - /* skip job=0 as that's the daemon job */ - for (i=1; i < hnpinfo->num_jobs; i++) { - printf("jobid:%d:state:%s:slots:%d:num procs:%d\n", - ORTE_LOCAL_JOBID(jobs[i]->jobid), - orte_job_state_to_str(jobs[i]->state), - jobs[i]->total_slots_alloc, - jobs[i]->num_procs); - /* print the proc info */ - for (j=0; j < jobs[i]->procs->size; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jobs[i]->procs, j))) { - continue; - } - app = (orte_app_context_t*)opal_pointer_array_get_item(jobs[i]->apps, proc->app_idx); - if (NULL == app) { - appname = strdup("NULL"); - } else { - appname = opal_basename(app->app); - } - nodename = NULL; - orte_get_attribute(&proc->attributes, ORTE_PROC_NODENAME, (void**)&nodename, OPAL_STRING); - printf("process:%s:rank:%s:pid:%lu:node:%s:state:%s\n", - appname, ORTE_VPID_PRINT(proc->name.vpid), - (unsigned long)proc->pid, - (NULL == nodename) ? "unknown" : nodename, - orte_proc_state_to_str(proc->state)); - free(appname); - if (NULL != nodename) { - free(nodename); - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/tools/orte-top/Makefile.am b/orte/tools/orte-top/Makefile.am deleted file mode 100644 index ab3b86508ad..00000000000 --- a/orte/tools/orte-top/Makefile.am +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = orte-top.1 -EXTRA_DIST = orte-top.1in - -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = orte-top - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -dist_ortedata_DATA = help-orte-top.txt - -endif # OPAL_INSTALL_BINARIES - -orte_top_SOURCES = orte-top.c -orte_top_LDADD = \ - $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -distclean-local: - rm -f $(man_pages) diff --git a/orte/tools/orte-top/help-orte-top.txt b/orte/tools/orte-top/help-orte-top.txt deleted file mode 100644 index 5eae695eedf..00000000000 --- a/orte/tools/orte-top/help-orte-top.txt +++ /dev/null @@ -1,82 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2009 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open MPI's orte-top tool. -# -[orte-top:usage] -Return statistics on specified process ranks - -Usage: %s [OPTIONS] - -%s -# -[orte-top:pid-not-found] -We could not find an mpirun matching the provided pid on this machine. - -Pid provided: %d -# -[orte-top:no-contact-given] -This tool requires that you specify contact info for the mpirun executing -the specified rank(s). Please use the --help option for more information. -# -[orte-top:hnp-filename-bad] -We are unable to parse the filename where contact info for the -mpirun to be contacted was to be found. The option we were given was: - ---%s %s - -This appears to be missing the required ':' following the -keyword "file". Please use the --help option for more information on -the correct format for this command line option. -# -[orte-top:hnp-filename-access] -We are unable to access the filename where contact info for the -mpirun to be contacted was to be found. The filename we were given was: - -File: %s - -Please use the --help option for more information on -the correct format for this command line option. -# -[orte-top:hnp-file-bad] -We are unable to read the mpirun's contact info from the -given filename. The filename we were given was: - -FILE: %s - -Please use the --help option for more information on -the correct format for this command line option. -# -[orte-top:hnp-uri-bad] -We are unable to correctly parse the mpirun's contact info. The uri we were given was: - -URI: %s - -Please remember that this is *not* a standard uri, but -a special format used internally by Open MPI for communications. It can -best be generated by simply directing mpirun to put its -uri in a file, and then giving us that filename. -# -[orte-top:cant-open-logfile] -We are unable to open the specified output log file. - -File: %s - -Please use the --help option for more information on -the correct format for this command line option. diff --git a/orte/tools/orte-top/orte-top.1in b/orte/tools/orte-top/orte-top.1in deleted file mode 100644 index c33654a5b43..00000000000 --- a/orte/tools/orte-top/orte-top.1in +++ /dev/null @@ -1,106 +0,0 @@ -.\" -.\" Copyright (c) 2007 Los Alamos National Security, LLC -.\" All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\" -.\" Man page for OMPI's ompi-server command -.\" -.\" .TH name section center-footer left-footer center-header -.TH OMPI-TOP 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -ompi-top, orte-top \- Diagnostic to provide process info similar to the popular "top" program. -. -.PP -. -\fBNOTE:\fP \fIompi-top\fP, and \fIorte-top\fP are exact -synonyms for each other. Using any of the names will result in exactly -identical behavior. -. - -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.BR ompi-top " [ options ]" -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH Options -. -\fIompi-top\fR collects and displays process information in a manner similar -to that of the popular "top" program. -. -.TP 10 -.B -h | --help -Display help for this command -. -. -.TP -.B -pid | --pid \fR\fP -The pid of the mpirun whose processes you want information about, or the name -of the file (specified as file:filename) that contains that info. Note that -the ompi-top command must be executed on the same node as mpirun to use this option. -. -. -.TP -.B -uri | --uri \fR\fP -Specify the URI of the mpirun whose processes you want information about, or the name -of the file (specified as file:filename) that contains that info. Note that -the ompi-top command does not have to be executed on the same node as mpirun to use this option. -. -. -.TP -.B -rank | --rank \fR\fP -The rank of the processes to be monitored. This can consist of a single rank, or -a comma-separated list of ranks. These can include rank ranges separated by a '-'. -If this option is not provided, or a value of -1 is given, ompi-top will default -to displaying information on all ranks. -. -. -.TP -.B -bynode | --bynode -Display the results grouped by node, with each node's processes reported in rank -order. If this option is not provided, ompi-top will default to displaying all -results in rank order. -. -. -.TP -.B -update-rate | --update-rate \fR\fP -The time (in seconds) between updates of the displayed information. If this option -is not provided, ompi-top will default to executing only once. -. -. -.TP -.B -timestamp | --timestamp -Provide an approximate time when each sample was taken. This time is approximate as it -only shows the time when the sample command was issued. -. -. -.TP -.B -log-file | --log-file \fR\fP -Log the results to the specified file instead of displaying them to stdout. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -.PP -\fIompi-top\fR collects and displays process information in a manner similar -to that of the popular "top" program. It doesn't do the fancy screen display, but -does allow you to monitor available process information (to the limits of the underlying -operating system) of processes irrespective of their location. -. -.\" ************************** -.\" See Also Section -.\" ************************** -. -.SH SEE ALSO -. diff --git a/orte/tools/orte-top/orte-top.c b/orte/tools/orte-top/orte-top.c deleted file mode 100644 index 2111146b1ef..00000000000 --- a/orte/tools/orte-top/orte-top.c +++ /dev/null @@ -1,1041 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_NETDB_H -#include -#endif -#ifdef HAVE_SYS_PARAM_H -#include -#endif -#include -#include -#include -#include - -#include "opal/util/cmd_line.h" -#include "opal/util/argv.h" -#include "opal/util/show_help.h" -#include "opal/util/opal_environ.h" -#include "opal/dss/dss.h" -#include "opal/mca/base/base.h" -#include "opal/mca/pmix/pmix.h" -#include "opal/runtime/opal.h" -#include "opal/mca/event/event.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/iof/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/odls/odls_types.h" -#include "orte/mca/routed/routed.h" -#include "orte/runtime/orte_globals.h" -#include "orte/util/hnp_contact.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/util/proc_info.h" -#include "orte/util/threads.h" -#include "orte/runtime/orte_wait.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/runtime/orte_quit.h" - -/* - * Local variables & functions - */ -static void abort_exit_callback(int fd, short flags, void *arg); -static opal_event_t term_handler; -static opal_event_t int_handler; -static opal_list_t hnp_list; -static bool all_recvd; -static int32_t num_replies; -static int32_t num_recvd; -static opal_buffer_t cmdbuf; -static FILE *fp = NULL; -static bool help; -static char *hnppidstr; -static char *hnpuristr; -static char *ranks; -static orte_hnp_contact_t *target_hnp; -static int update_rate; -static bool timestamp; -static char *logfile; -static bool bynode; -static opal_list_t recvd_stats; -static char *sample_time; -static bool need_header = true; -static int num_lines=0; -static bool fields_set = false; -static int nodefield = 0; -static int rankfield = 0; -static int pidfield = 0; -static int cmdfield = 0; -static int timefield = 6; -static int prifield = 0; -static int thrfield = 0; -static int vsizefield = 0; -static int rssfield = 0; -static int pkvfield = 0; -static int pfield = 0; - -/* flag what fields were actually found */ -static bool pri_found = false; -static bool thr_found = false; -static bool vsize_found = false; -static bool rss_found = false; -static bool pkv_found = false; -static bool p_found = false; - -#define MAX_LINES 20 - -opal_cmd_line_init_t cmd_line_opts[] = { - { NULL, - 'h', NULL, "help", - 0, - &help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - - { NULL, - '\0', "pid", "pid", - 1, - &hnppidstr, OPAL_CMD_LINE_TYPE_STRING, - "The pid of the mpirun that you wish to query/monitor" }, - - { NULL, - '\0', "uri", "uri", - 1, - &hnpuristr, OPAL_CMD_LINE_TYPE_STRING, - "The uri of the mpirun that you wish to query/monitor" }, - - { NULL, - '\0', "rank", "rank", - 1, - &ranks, OPAL_CMD_LINE_TYPE_STRING, - "Rank whose resource usage is to be displayed/monitored" }, - - { NULL, - '\0', "update-rate", "update-rate", - 1, - &update_rate, OPAL_CMD_LINE_TYPE_INT, - "Number of seconds between updates" }, - - { NULL, - '\0', "timestamp", "timestamp", - 0, - ×tamp, OPAL_CMD_LINE_TYPE_BOOL, - "Time stamp each sample" }, - - { NULL, - '\0', "log-file", "log-file", - 1, - &logfile, OPAL_CMD_LINE_TYPE_STRING, - "Output file for returned statistics" }, - - { NULL, - '\0', "bynode", "bynode", - 0, - &bynode, OPAL_CMD_LINE_TYPE_BOOL, - "Group statistics by node, sorted by rank within each node" }, - - /* End of list */ - { NULL, - '\0', NULL, NULL, - 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, - NULL } -}; - - -static void recv_stats(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, orte_rml_tag_t tag, - void* cbdata); - -static void pretty_print(void); -static void print_headers(void); - -static void send_cmd(int fd, short dummy, void *arg) -{ - int ret; - opal_buffer_t *buf; - - all_recvd = false; - num_replies = INT_MAX; - num_recvd = 0; - buf = OBJ_NEW(opal_buffer_t); - opal_dss.copy_payload(buf, &cmdbuf); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &(target_hnp->name), buf, - ORTE_RML_TAG_DAEMON, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(buf); - orte_quit(0,0,NULL); - return; - } -} - -int -main(int argc, char *argv[]) -{ - int ret; - opal_cmd_line_t cmd_line; - opal_list_item_t* item = NULL; - orte_daemon_cmd_flag_t command; - pid_t hnppid; - orte_process_name_t proc; - char **r1=NULL, **r2; - int i; - orte_vpid_t vstart, vend; - int vint; - char *rtmod; - opal_value_t val; - - /*************** - * Initialize - ***************/ - - /* - * Make sure to init util before parse_args - * to ensure installdirs is setup properly - * before calling mca_base_open(); - */ - if( ORTE_SUCCESS != (ret = opal_init_util(&argc, &argv)) ) { - return ret; - } - - /* initialize the globals */ - help = false; - hnppidstr = NULL; - ranks = NULL; - target_hnp = NULL; - update_rate = -1; - timestamp = false; - logfile = NULL; - - /* Parse the command line options */ - opal_cmd_line_create(&cmd_line, cmd_line_opts); - - mca_base_open(); - mca_base_cmd_line_setup(&cmd_line); - ret = opal_cmd_line_parse(&cmd_line, false, false, argc, argv); - if (OPAL_SUCCESS != ret) { - if (OPAL_ERR_SILENT != ret) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(ret)); - } - return 1; - } - - /** - * Now start parsing our specific arguments - */ - if (help) { - char *str, *args = NULL; - args = opal_cmd_line_get_usage_msg(&cmd_line); - str = opal_show_help_string("help-orte-top.txt", "orte-top:usage", - true, "orte-top", args); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - /* If we show the help message, that should be all we do */ - return 0; - } - - /* we are never allowed to operate as a distributed tool, - * so insist on the ess/tool component */ - opal_setenv("OMPI_MCA_ess", "tool", true, &environ); - - /*************************** - * We need all of OPAL and the TOOL portion of ORTE - ***************************/ - if (ORTE_SUCCESS != orte_init(&argc, &argv, ORTE_PROC_TOOL)) { - orte_finalize(); - return 1; - } - - /* get our routed module */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - - /* setup the list for recvd stats */ - OBJ_CONSTRUCT(&recvd_stats, opal_list_t); - - /** setup callbacks for abort signals - from this point - * forward, we need to abort in a manner that allows us - * to cleanup - */ - opal_event_signal_set(orte_event_base, &term_handler, SIGTERM, - abort_exit_callback, &term_handler); - opal_event_signal_add(&term_handler, NULL); - opal_event_signal_set(orte_event_base, &int_handler, SIGINT, - abort_exit_callback, &int_handler); - opal_event_signal_add(&int_handler, NULL); - - /* - * Must specify the mpirun pid - */ - if (NULL != hnppidstr) { - if (0 == strncmp(hnppidstr, "file", strlen("file")) || - 0 == strncmp(hnppidstr, "FILE", strlen("FILE"))) { - char input[1024], *filename; - FILE *fp; - - /* it is a file - get the filename */ - filename = strchr(hnppidstr, ':'); - if (NULL == filename) { - /* filename is not correctly formatted */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "pid", hnppidstr); - orte_finalize(); - exit(1); - } - ++filename; /* space past the : */ - - if (0 >= strlen(filename)) { - /* they forgot to give us the name! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "pid", hnppidstr); - orte_finalize(); - exit(1); - } - - /* open the file and extract the pid */ - fp = fopen(filename, "r"); - if (NULL == fp) { /* can't find or read file! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-access", true, filename); - orte_finalize(); - exit(1); - } - if (NULL == fgets(input, 1024, fp)) { - /* something malformed about file */ - fclose(fp); - orte_show_help("help-orte-top.txt", "orte-top:hnp-file-bad", true, filename); - orte_finalize(); - exit(1); - } - fclose(fp); - input[strlen(input)-1] = '\0'; /* remove newline */ - /* convert the pid */ - hnppid = strtoul(input, NULL, 10); - } else { - /* should just be the pid itself */ - hnppid = strtoul(hnppidstr, NULL, 10); - } - /* - * Get the list of available hnp's and setup contact info - * to them in the RML - */ - OBJ_CONSTRUCT(&hnp_list, opal_list_t); - if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) { - orte_show_help("help-orte-top.txt", "orte-top:pid-not-found", true, hnppid); - orte_finalize(); - exit(1); - } - - /* - * For each hnp in the listing - */ - while (NULL != (item = opal_list_remove_first(&hnp_list))) { - orte_hnp_contact_t *hnp = (orte_hnp_contact_t*)item; - if (hnppid == hnp->pid) { - /* this is the one we want */ - target_hnp = hnp; - /* let it continue to run so we deconstruct the list */ - continue; - } - OBJ_RELEASE(hnp); - } - OBJ_DESTRUCT(&hnp_list); - - /* if we get here without finding the one we wanted, then abort */ - if (NULL == target_hnp) { - orte_show_help("help-orte-top.txt", "orte-top:pid-not-found", true, hnppid); - orte_finalize(); - exit(1); - } - } else if (NULL != hnpuristr) { - if (0 == strncmp(hnpuristr, "file", strlen("file")) || - 0 == strncmp(hnpuristr, "FILE", strlen("FILE"))) { - char input[1024], *filename; - FILE *fp; - - /* it is a file - get the filename */ - filename = strchr(hnpuristr, ':'); - if (NULL == filename) { - /* filename is not correctly formatted */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", hnpuristr); - orte_finalize(); - exit(1); - } - ++filename; /* space past the : */ - - if (0 >= strlen(filename)) { - /* they forgot to give us the name! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", hnpuristr); - orte_finalize(); - exit(1); - } - - /* open the file and extract the uri */ - fp = fopen(filename, "r"); - if (NULL == fp) { /* can't find or read file! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-access", true, filename); - orte_finalize(); - exit(1); - } - if (NULL == fgets(input, 1024, fp)) { - /* something malformed about file */ - fclose(fp); - orte_show_help("help-orte-top.txt", "orte-top:hnp-file-bad", true, filename); - orte_finalize(); - exit(1); - } - fclose(fp); - input[strlen(input)-1] = '\0'; /* remove newline */ - /* construct the target hnp info */ - target_hnp = OBJ_NEW(orte_hnp_contact_t); - target_hnp->rml_uri = strdup(input); - } else { - /* should just be the uri itself - construct the target hnp info */ - target_hnp = OBJ_NEW(orte_hnp_contact_t); - target_hnp->rml_uri = strdup(hnpuristr); - } - /* extract the name */ - if (ORTE_SUCCESS != orte_rml_base_parse_uris(target_hnp->rml_uri, &target_hnp->name, NULL)) { - orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, target_hnp->rml_uri); - orte_finalize(); - exit(1); - } - /* set the info in our contact table */ - OBJ_CONSTRUCT(&val, opal_value_t); - val.key = OPAL_PMIX_PROC_URI; - val.type = OPAL_STRING; - val.data.string = target_hnp->rml_uri; - if (OPAL_SUCCESS != (ret = opal_pmix.store_local(&target_hnp->name, &val))) { - ORTE_ERROR_LOG(ret); - val.key = NULL; - val.data.string = NULL; - OBJ_DESTRUCT(&val); - orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, target_hnp->rml_uri); - orte_finalize(); - exit(1); - } - val.key = NULL; - val.data.string = NULL; - OBJ_DESTRUCT(&val); - - /* set the route to be direct */ - if (ORTE_SUCCESS != orte_routed.update_route(rtmod, &target_hnp->name, &target_hnp->name)) { - orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, target_hnp->rml_uri); - orte_finalize(); - exit(1); - } - } else { - orte_show_help("help-orte-top.txt", "orte-top:no-contact-given", true); - orte_finalize(); - exit(1); - } - - /* set the target hnp as our lifeline so we will terminate if it exits */ - orte_routed.set_lifeline(rtmod, &target_hnp->name); - - /* if an output file was specified, open it */ - if (NULL != logfile) { - fp = fopen(logfile, "w"); - if (NULL == fp) { - orte_show_help("help-orte-top.txt", "orte-top:cant-open-logfile", true, logfile); - orte_finalize(); - exit(1); - } - } else { - fp = stdout; - } - - /* setup a non-blocking recv to get answers - we don't know how - * many daemons are going to send replies, so we just have to - * accept whatever comes back - */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_TOOL, - ORTE_RML_NON_PERSISTENT, recv_stats, NULL); - - - /* setup the command to get the resource usage */ - OBJ_CONSTRUCT(&cmdbuf, opal_buffer_t); - command = ORTE_DAEMON_TOP_CMD; - if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmdbuf, &command, 1, ORTE_DAEMON_CMD))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - - proc.jobid = ORTE_PROC_MY_NAME->jobid+1; /* only support initial launch at this time */ - - /* parse the rank list - this can be a comma-separated list of ranks, - * each element being either a single rank or a range. We also allow - * for a -1 to indicate all ranks. If not rank is given, we assume -1 - */ - if (NULL == ranks) { - /* take all ranks */ - proc.vpid = ORTE_VPID_WILDCARD; - if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmdbuf, &proc, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - goto SEND; - } - - /* split on commas */ - r1 = opal_argv_split(ranks, ','); - /* for each resulting element, check for range */ - for (i=0; i < opal_argv_count(r1); i++) { - r2 = opal_argv_split(r1[i], '-'); - if (1 < opal_argv_count(r2)) { - /* given range - get start and end */ - vstart = strtol(r2[0], NULL, 10); - vend = strtol(r2[1], NULL, 10); - } else { - /* check for wildcard - have to do this here because - * the -1 would have been caught in the split - */ - vint = strtol(r1[i], NULL, 10); - if (-1 == vint) { - proc.vpid = ORTE_VPID_WILDCARD; - if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmdbuf, &proc, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - opal_argv_free(r2); - goto SEND; - } - vstart = strtol(r2[0], NULL, 10); - vend = vstart + 1; - } - for (proc.vpid = vstart; proc.vpid < vend; proc.vpid++) { - if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmdbuf, &proc, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - } - opal_argv_free(r2); - } - -SEND: - if (NULL != r1) { - opal_argv_free(r1); - } - send_cmd(0, 0, NULL); - - /* now wait until the termination event fires */ - while (orte_event_base_active) { - opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); - } - ORTE_ACQUIRE_OBJECT(orte_event_base_active); - - /*************** - * Cleanup - ***************/ -cleanup: - /* Remove the TERM and INT signal handlers */ - opal_event_signal_del(&term_handler); - opal_event_signal_del(&int_handler); - - while (NULL != (item = opal_list_remove_first(&recvd_stats))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&recvd_stats); - OBJ_DESTRUCT(&cmdbuf); - if (NULL != fp && fp != stdout) { - fclose(fp); - } - orte_finalize(); - - return ret; -} - -static void abort_exit_callback(int fd, short ign, void *arg) -{ - opal_list_item_t *item; - - /* Remove the TERM and INT signal handlers */ - opal_event_signal_del(&term_handler); - OBJ_DESTRUCT(&term_handler); - opal_event_signal_del(&int_handler); - OBJ_DESTRUCT(&int_handler); - - while (NULL != (item = opal_list_remove_first(&recvd_stats))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&recvd_stats); - OBJ_DESTRUCT(&cmdbuf); - if (NULL != fp && fp != stdout) { - fclose(fp); - } - ORTE_UPDATE_EXIT_STATUS(1); - orte_quit(0,0,NULL); -} - -static void recv_stats(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, orte_rml_tag_t tag, - void* cbdata) -{ - int32_t n; - opal_pstats_t *stats; - orte_process_name_t proc; - int ret; - - /* if the sender is the HNP we contacted, this message - * contains info on the number of responses we should get - */ - if (sender->vpid == 0) { - n = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &num_replies, &n, OPAL_INT32))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - n = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &sample_time, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - } - - n = 1; - while (ORTE_SUCCESS == opal_dss.unpack(buffer, &proc, &n, ORTE_NAME)) { - n = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &stats, &n, OPAL_PSTAT))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - /* if field sizes are not yet set, do so now */ - if (!fields_set) { - int tmp; - char *ctmp; - - tmp = strlen(stats->node); - if (nodefield < tmp) { - nodefield = tmp; - } - - asprintf(&ctmp, "%d", stats->rank); - tmp = strlen(ctmp); - free(ctmp); - if (rankfield < tmp) { - rankfield = tmp; - } - - asprintf(&ctmp, "%lu", (unsigned long)stats->pid); - tmp = strlen(ctmp); - free(ctmp); - if (pidfield < tmp) { - pidfield = tmp; - } - - tmp = strlen(stats->cmd); - if (cmdfield < tmp) { - cmdfield = tmp; - } - - if (0 <= stats->priority) { - pri_found = true; - asprintf(&ctmp, "%d", stats->priority); - tmp = strlen(ctmp); - free(ctmp); - if (prifield < tmp) { - prifield = tmp; - } - } - - if (0 <= stats->num_threads) { - thr_found = true; - asprintf(&ctmp, "%d", stats->num_threads); - tmp = strlen(ctmp); - free(ctmp); - if (thrfield < tmp) { - thrfield = tmp; - } - } - - if (0 < stats->vsize) { - vsize_found = true; - asprintf(&ctmp, "%8.2f", stats->vsize); - tmp = strlen(ctmp); - free(ctmp); - if (vsizefield < tmp) { - vsizefield = tmp; - } - } - - if (0 < stats->rss) { - rss_found = true; - asprintf(&ctmp, "%8.2f", stats->rss); - tmp = strlen(ctmp); - free(ctmp); - if (rssfield < tmp) { - rssfield = tmp; - } - } - - if (0 < stats->peak_vsize) { - pkv_found = true; - asprintf(&ctmp, "%8.2f", stats->peak_vsize); - tmp = strlen(ctmp); - free(ctmp); - if (pkvfield < tmp) { - pkvfield = tmp; - } - } - - if (0 <= stats->processor) { - p_found = true; - asprintf(&ctmp, "%d", stats->processor); - tmp = strlen(ctmp); - free(ctmp); - if (pfield < tmp) { - pfield = tmp; - } - } - } - /* add it to the list */ - opal_list_append(&recvd_stats, &stats->super); - } - - cleanup: - /* check for completion */ - num_recvd++; - if (num_replies <= num_recvd) { - /* flag that field sizes are set */ - fields_set = true; - - /* pretty-print what we got */ - pretty_print(); - - /* see if we want to do it again */ - if (0 < update_rate) { - ORTE_TIMER_EVENT(update_rate, 0, send_cmd, ORTE_SYS_PRI); - } else { - orte_finalize(); - exit(0); - } - } - - /* repost the receive */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_TOOL, - ORTE_RML_NON_PERSISTENT, recv_stats, NULL); -} - -/* static values needed for printing */ -static int lennode = 0; -static int lenrank = 0; -static int lenpid = 0; -static int lencmd = 0; -static int lenstate = 0; -static int lentime = 0; -static int lenpri = 0; -static int lenthr = 0; -static int lenvsize = 0; -static int lenrss = 0; -static int lenpkv = 0; -static int lensh = 0; -static int lenp = 0; - -static void print_ranks(opal_list_t *statlist) -{ - opal_list_item_t *item; - opal_pstats_t *stats, *pstats; - int32_t minrank; - char pretty_time[10]; - int i; - - /* sort the results by rank */ - while (0 < opal_list_get_size(statlist)) { - minrank = INT32_MAX; - pstats = NULL; - for (item = opal_list_get_first(statlist); - item != opal_list_get_end(statlist); - item = opal_list_get_next(item)) { - stats = (opal_pstats_t*)item; - if (stats->rank < minrank) { - pstats = stats; - minrank = stats->rank; - } - } - memset(pretty_time, 0, sizeof(pretty_time)); - if (pstats->time.tv_sec >= 3600) { - snprintf(pretty_time, sizeof(pretty_time), "%5.1fH", - (double)pstats->time.tv_sec / (double)(3600)); - } else { - snprintf(pretty_time, sizeof(pretty_time), "%3ld:%02ld", - (unsigned long)pstats->time.tv_sec/60, - (unsigned long)pstats->time.tv_sec % 60); - } - - if (bynode) { - /* print blanks in the nodename field */ - for (i=0; i < lennode; i++) { - fprintf(fp, " "); - } - fprintf(fp, " | "); - /* print fields */ - fprintf(fp, "%*d | ", lenrank, pstats->rank); - } else { - fprintf(fp, "%*d | ", lenrank, pstats->rank); - fprintf(fp, "%*s | ", lennode, pstats->node); - } - fprintf(fp, "%*s | ", lencmd, pstats->cmd); - fprintf(fp, "%*lu | ", lenpid, (unsigned long)pstats->pid); - fprintf(fp, "%*c | ", lenstate, pstats->state[0]); - fprintf(fp, "%*s | ", lentime, pretty_time); - if (pri_found) { - fprintf(fp, "%*d | ", lenpri, pstats->priority); - } - if (thr_found) { - fprintf(fp, "%*d | ", lenthr, pstats->num_threads); - } - if (vsize_found) { - fprintf(fp, "%*lu | ", lenvsize, (unsigned long)pstats->vsize); - } - if (rss_found) { - fprintf(fp, "%*lu | ", lenvsize, (unsigned long)pstats->rss); - } - if (pkv_found) { - fprintf(fp, "%*lu | ", lenpkv, (unsigned long)pstats->peak_vsize); - } - if (p_found) { - fprintf(fp, "%*d | ", lenp, pstats->processor); - } - fprintf(fp, "\n"); - num_lines++; - opal_list_remove_item(statlist, &pstats->super); - OBJ_RELEASE(pstats); - } -} - -static void pretty_print(void) -{ - opal_list_item_t *item, *next; - opal_pstats_t *stats; - opal_list_t tmplist; - char *node; - - if (bynode) { - if (need_header) { - print_headers(); - need_header = false; - } - if (timestamp) { - fprintf(fp, "TIMESTAMP: %s\n", sample_time); - } - if (NULL != sample_time) { - free(sample_time); - sample_time = NULL; - } - /* sort the results by node and then rank */ - while (NULL != (item = opal_list_remove_first(&recvd_stats))) { - OBJ_CONSTRUCT(&tmplist, opal_list_t); - stats = (opal_pstats_t*)item; - node = strdup(stats->node); - opal_list_append(&tmplist, &stats->super); - /* cycle through the rest of the list looking - * for matching nodes - */ - item = opal_list_get_first(&recvd_stats); - while (item != opal_list_get_end(&recvd_stats)) { - stats = (opal_pstats_t*)item; - next = opal_list_get_next(item); - if (0 == strcmp(stats->node, node)) { - opal_list_remove_item(&recvd_stats, item); - opal_list_append(&tmplist, &stats->super); - } - item = next; - } - fprintf(fp, "%*s\n", lennode, node); - free(node); - print_ranks(&tmplist); - OBJ_DESTRUCT(&tmplist); - } - } else { - if (need_header) { - print_headers(); - need_header = false; - } - if (timestamp) { - fprintf(fp, "\n\nTIMESTAMP: %s\n", sample_time); - } - if (NULL != sample_time) { - free(sample_time); - sample_time = NULL; - } - print_ranks(&recvd_stats); - } - - /* provide some separation between iterations */ - fprintf(fp, "\n"); - - /* if we have printed more than MAX_LINES since the last header, - * flag that we need to print the header next time - */ - if (MAX_LINES < num_lines) { - need_header = true; - num_lines = 0; - fprintf(fp, "\n\n"); - } -} - -static void print_headers(void) -{ - int num_fields = 0; - int i; - int linelen; - - lennode = strlen("Nodename"); - if (nodefield > lennode) { - lennode = nodefield; - } - num_fields++; - - lenrank = strlen("Rank"); - if (rankfield > lenrank) { - lenrank = rankfield; - } - num_fields++; - - lenpid = strlen("Pid"); - if (pidfield > lenpid) { - lenpid = pidfield; - } - num_fields++; - - lencmd = strlen("Command"); - if (cmdfield > lencmd) { - lencmd = cmdfield; - } - num_fields++; - - lenstate = strlen("State"); - num_fields++; - - lentime = strlen("Time"); - if (timefield > lentime) { - lentime = timefield; - } - num_fields++; - - if (pri_found) { - lenpri = strlen("Pri"); - if (prifield > lenpri) { - lenpri = prifield; - } - num_fields++; - } - - if (thr_found) { - lenthr = strlen("#threads"); - if (thrfield > lenthr) { - lenthr = thrfield; - } - num_fields++; - } - - if (vsize_found) { - lenvsize = strlen("Vsize"); - if (vsizefield > lenvsize) { - lenvsize = vsizefield; - } - num_fields++; - } - - if (rss_found) { - lenrss = strlen("RSS"); - if (rssfield > lenrss) { - lenrss = rssfield; - } - num_fields++; - } - - if (pkv_found) { - lenpkv = strlen("Peak Vsize"); - if (pkvfield > lenpkv) { - lenpkv = pkvfield; - } - num_fields++; - } - - if (p_found) { - lenp = strlen("Processor"); - if (pfield > lenp) { - lenp = pfield; - } - num_fields++; - } - - linelen = lennode + lenrank + lenpid + lencmd + lenstate + lentime + lenpri + lenthr + lenvsize + lenrss + lenpkv + lensh + lenp; - /* add spacing */ - linelen += num_fields * 3; - - /* print the rip line */ - for(i = 0; i < linelen; ++i) { - fprintf(fp, "="); - } - fprintf(fp, "\n"); - - /* print the header */ - if (bynode) { - fprintf(fp, "%*s | ", lennode , "Nodename"); - fprintf(fp, "%*s | ", lenrank , "Rank"); - } else { - fprintf(fp, "%*s | ", lenrank , "Rank"); - fprintf(fp, "%*s | ", lennode , "Nodename"); - } - fprintf(fp, "%*s | ", lencmd , "Command"); - fprintf(fp, "%*s | ", lenpid , "Pid"); - fprintf(fp, "%*s | ", lenstate , "State"); - fprintf(fp, "%*s | ", lentime , "Time"); - if (pri_found) { - fprintf(fp, "%*s | ", lenpri , "Pri"); - } - if (thr_found) { - fprintf(fp, "%*s | ", lenthr , "#threads"); - } - if (vsize_found) { - fprintf(fp, "%*s | ", lenvsize , "Vsize"); - } - if (rss_found) { - fprintf(fp, "%*s | ", lenrss , "RSS"); - } - if (pkv_found) { - fprintf(fp, "%*s | ", lenpkv , "Peak Vsize"); - } - if (p_found) { - fprintf(fp, "%*s | ", lenp , "Processor"); - } - fprintf(fp, "\n"); - - /* print the separator */ - for(i = 0; i < linelen; ++i) { - fprintf(fp, "-"); - } - fprintf(fp, "\n"); - -} diff --git a/orte/tools/orterun/orterun.1in b/orte/tools/orterun/orterun.1in index aef58239ef8..4d9d5665d42 100644 --- a/orte/tools/orterun/orterun.1in +++ b/orte/tools/orterun/orterun.1in @@ -645,7 +645,10 @@ Allow .I mpirun to run when executed by the root user .RI ( mpirun -defaults to aborting when launched as the root user). +defaults to aborting when launched as the root user). Be sure to see +the +.I Running as root +section, below, for more detail. . . .TP @@ -1628,7 +1631,26 @@ To override this default, you can add the .I --allow-run-as-root option to the .I mpirun -command line. +command line, or you can set the environmental parameters +.I OMPI_ALLOW_RUN_AS_ROOT=1 +and +.IR OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 . +Note that it takes setting +.I two +environment variables to effect the same behavior as +.I --allow-run-as-root +in order to stress the Open MPI team's strong advice against running +as the root user. After extended discussions with communities who use +containers (where running as the root user is the default), there was +a persistent desire to be able to enable root execution of +.I mpirun +via an environmental control (vs. the existing +.I --allow-run-as-root +command line parameter). The compromise of using +.I two +environment variables was reached: it allows root execution via an +environmental control, but it conveys the Open MPI team's strong +recomendation against this behavior. . .SS Exit status . diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 85aba0a0f33..3a7abc6a361 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -14,7 +14,7 @@ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -81,7 +81,6 @@ #include "opal/class/opal_pointer_array.h" #include "opal/dss/dss.h" -#include "orte/mca/dfs/dfs.h" #include "orte/mca/odls/odls.h" #include "orte/mca/rml/rml.h" #include "orte/mca/state/state.h" @@ -138,28 +137,6 @@ int orterun(int argc, char *argv[]) exit(1); } - /* check if we are running as root - if we are, then only allow - * us to proceed if the allow-run-as-root flag was given. Otherwise, - * exit with a giant warning flag - */ - if (0 == geteuid() && !orte_cmd_options.run_as_root) { - fprintf(stderr, "--------------------------------------------------------------------------\n"); - if (NULL != orte_cmd_options.help) { - fprintf(stderr, "%s cannot provide the help message when run as root.\n", orte_basename); - } else { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename); - } - fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n"); - fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); - fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); - fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your cmd line. However, we reiterate our strong advice\n"); - fprintf(stderr, "against doing so - please do so at your own risk.\n"); - fprintf(stderr, "--------------------------------------------------------------------------\n"); - exit(1); - } - /* setup to listen for commands sent specifically to me, even though I would probably * be the one sending them! Unfortunately, since I am a participating daemon, * there are times I need to send a command to "all daemons", and that means *I* have diff --git a/orte/util/attr.c b/orte/util/attr.c index 9e8716f0928..87047db7f5a 100644 --- a/orte/util/attr.c +++ b/orte/util/attr.c @@ -269,8 +269,6 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key) return "JOB-LAUNCH-MSG-RECVD"; case ORTE_JOB_MAX_LAUNCH_MSG_RECVD: return "JOB-MAX-LAUNCH-MSG-RECVD"; - case ORTE_JOB_FILE_MAPS: - return "JOB-FILE-MAPS"; case ORTE_JOB_CKPT_STATE: return "JOB-CKPT-STATE"; case ORTE_JOB_SNAPSHOT_REF: diff --git a/orte/util/attr.h b/orte/util/attr.h index 73bb21192aa..621b577f04c 100644 --- a/orte/util/attr.h +++ b/orte/util/attr.h @@ -101,7 +101,6 @@ typedef uint16_t orte_job_flags_t; #define ORTE_JOB_LAUNCH_MSG_SENT (ORTE_JOB_START_KEY + 1) // timeval - time launch message was sent #define ORTE_JOB_LAUNCH_MSG_RECVD (ORTE_JOB_START_KEY + 2) // timeval - time launch message was recvd #define ORTE_JOB_MAX_LAUNCH_MSG_RECVD (ORTE_JOB_START_KEY + 3) // timeval - max time for launch msg to be received -#define ORTE_JOB_FILE_MAPS (ORTE_JOB_START_KEY + 4) // opal_buffer_t - file maps associates with this job #define ORTE_JOB_CKPT_STATE (ORTE_JOB_START_KEY + 5) // size_t - ckpt state #define ORTE_JOB_SNAPSHOT_REF (ORTE_JOB_START_KEY + 6) // string - snapshot reference #define ORTE_JOB_SNAPSHOT_LOC (ORTE_JOB_START_KEY + 7) // string - snapshot location diff --git a/orte/util/hostfile/hostfile.c b/orte/util/hostfile/hostfile.c index f502d3bfa06..2a236a030f5 100644 --- a/orte/util/hostfile/hostfile.c +++ b/orte/util/hostfile/hostfile.c @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015-2017 Research Organization for Information Science + * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -159,7 +159,7 @@ static int hostfile_parse_line(int token, opal_list_t* updates, if (1 == cnt) { node_name = strdup(argv[0]); } else if (2 == cnt) { - username = argv[0]; + username = strdup(argv[0]); node_name = strdup(argv[1]); } else { opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */ @@ -274,7 +274,7 @@ static int hostfile_parse_line(int token, opal_list_t* updates, if (1 == cnt) { node_name = strdup(argv[0]); } else if (2 == cnt) { - username = argv[0]; + username = strdup(argv[0]); node_name = strdup(argv[1]); } else { opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */ diff --git a/orte/util/session_dir.c b/orte/util/session_dir.c index 90f464fefbb..657cec6586c 100644 --- a/orte/util/session_dir.c +++ b/orte/util/session_dir.c @@ -12,7 +12,7 @@ * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -61,6 +61,7 @@ #include "orte/util/show_help.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/ras/base/base.h" #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" @@ -370,6 +371,16 @@ int orte_session_dir(bool create, orte_process_name_t *proc) int orte_session_dir_cleanup(orte_jobid_t jobid) { + /* special case - if a daemon is colocated with mpirun, + * then we let mpirun do the rest to avoid a race + * condition. this scenario always results in the rank=1 + * daemon colocated with mpirun */ + if (orte_ras_base.launch_orted_on_hn && + ORTE_PROC_IS_DAEMON && + 1 == ORTE_PROC_MY_NAME->vpid) { + return ORTE_SUCCESS; + } + if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) { /* we haven't created them or RM will clean them up for us*/ return ORTE_SUCCESS; @@ -386,6 +397,7 @@ orte_session_dir_cleanup(orte_jobid_t jobid) return ORTE_ERR_NOT_INITIALIZED; } + /* recursively blow the whole session away for our job family, * saving only output files */ @@ -461,20 +473,6 @@ orte_session_dir_finalize(orte_process_name_t *proc) opal_os_dirpath_destroy(orte_process_info.proc_session_dir, false, orte_dir_check_file); - opal_os_dirpath_destroy(orte_process_info.job_session_dir, - false, orte_dir_check_file); - /* only remove the jobfam session dir if we are the - * local daemon and we are finalizing our own session dir */ - if ((ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) && - (ORTE_PROC_MY_NAME == proc)) { - opal_os_dirpath_destroy(orte_process_info.jobfam_session_dir, - false, orte_dir_check_file); - } - - if( NULL != orte_process_info.top_session_dir ){ - opal_os_dirpath_destroy(orte_process_info.top_session_dir, - false, orte_dir_check_file); - } if (opal_os_dirpath_is_empty(orte_process_info.proc_session_dir)) { if (orte_debug_flag) { @@ -492,6 +490,32 @@ orte_session_dir_finalize(orte_process_name_t *proc) } } + /* special case - if a daemon is colocated with mpirun, + * then we let mpirun do the rest to avoid a race + * condition. this scenario always results in the rank=1 + * daemon colocated with mpirun */ + if (orte_ras_base.launch_orted_on_hn && + ORTE_PROC_IS_DAEMON && + 1 == ORTE_PROC_MY_NAME->vpid) { + return ORTE_SUCCESS; + } + + opal_os_dirpath_destroy(orte_process_info.job_session_dir, + false, orte_dir_check_file); + + /* only remove the jobfam session dir if we are the + * local daemon and we are finalizing our own session dir */ + if ((ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) && + (ORTE_PROC_MY_NAME == proc)) { + opal_os_dirpath_destroy(orte_process_info.jobfam_session_dir, + false, orte_dir_check_file); + } + + if( NULL != orte_process_info.top_session_dir ){ + opal_os_dirpath_destroy(orte_process_info.top_session_dir, + false, orte_dir_check_file); + } + if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) { if (orte_debug_flag) { opal_output(0, "sess_dir_finalize: found job session dir empty - deleting"); diff --git a/oshmem/include/oshmem/constants.h b/oshmem/include/oshmem/constants.h index 17a560692db..be8d4b20327 100644 --- a/oshmem/include/oshmem/constants.h +++ b/oshmem/include/oshmem/constants.h @@ -105,11 +105,6 @@ enum { #define SHMEM_UNDEFINED -32766 /* undefined stuff */ -#define SHMEM_CTX_PRIVATE (1<<0) -#define SHMEM_CTX_SERIALIZED (1<<1) -#define SHMEM_CTX_NOSTORE (1<<2) - - #ifndef UNREFERENCED_PARAMETER #define UNREFERENCED_PARAMETER(P) ((void)P) #endif diff --git a/oshmem/include/pshmem.h b/oshmem/include/pshmem.h index 08adb40f221..0c72bcf5330 100644 --- a/oshmem/include/pshmem.h +++ b/oshmem/include/pshmem.h @@ -2,8 +2,8 @@ * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved - * Copyright (c) 2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -14,32 +14,7 @@ #ifndef PSHMEM_SHMEM_H #define PSHMEM_SHMEM_H - -#include /* include for ptrdiff_t */ -#include /* include for fixed width types */ -#if defined(c_plusplus) || defined(__cplusplus) -# include -# define OSHMEM_COMPLEX_TYPE(type) std::complex -#else -# include -# define OSHMEM_COMPLEX_TYPE(type) type complex -#endif - - -#ifndef OSHMEM_DECLSPEC -# if defined(OPAL_C_HAVE_VISIBILITY) && (OPAL_C_HAVE_VISIBILITY == 1) -# define OSHMEM_DECLSPEC __attribute__((visibility("default"))) -# else -# define OSHMEM_DECLSPEC -# endif -#endif - -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) -#define OSHMEMP_HAVE_C11 1 -#else -#define OSHMEMP_HAVE_C11 0 -#endif - +#include #include #if defined(c_plusplus) || defined(__cplusplus) @@ -105,19 +80,23 @@ OSHMEM_DECLSPEC void pshmem_ctx_long_p(shmem_ctx_t ctx, long* addr, long value, OSHMEM_DECLSPEC void pshmem_ctx_float_p(shmem_ctx_t ctx, float* addr, float value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_double_p(shmem_ctx_t ctx, double* addr, double value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longlong_p(shmem_ctx_t ctx, long long* addr, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_schar_p(shmem_ctx_t ctx, signed char* addr, signed char value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uchar_p(shmem_ctx_t ctx, unsigned char* addr, unsigned char value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ushort_p(shmem_ctx_t ctx, unsigned short* addr, unsigned short value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_p(shmem_ctx_t ctx, unsigned int* addr, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_p(shmem_ctx_t ctx, unsigned long* addr, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_p(shmem_ctx_t ctx, unsigned long long* addr, unsigned long long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longdouble_p(shmem_ctx_t ctx, long double* addr, long double value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_p(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - char*: pshmem_ctx_char_p, \ - short*: pshmem_ctx_short_p, \ - int*: pshmem_ctx_int_p, \ - long*: pshmem_ctx_long_p, \ - long long*: pshmem_ctx_longlong_p, \ - float*: pshmem_ctx_float_p, \ - double*: pshmem_ctx_double_p, \ - long double*: pshmem_ctx_longdouble_p)(ctx, dst, val, pe) -#endif +OSHMEM_DECLSPEC void pshmem_ctx_int8_p(shmem_ctx_t ctx, int8_t* addr, int8_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int16_p(shmem_ctx_t ctx, int16_t* addr, int16_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_p(shmem_ctx_t ctx, int32_t* addr, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_p(shmem_ctx_t ctx, int64_t* addr, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint8_p(shmem_ctx_t ctx, uint8_t* addr, uint8_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint16_p(shmem_ctx_t ctx, uint16_t* addr, uint16_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_p(shmem_ctx_t ctx, uint32_t* addr, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_p(shmem_ctx_t ctx, uint64_t* addr, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_p(shmem_ctx_t ctx, size_t* addr, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_p(shmem_ctx_t ctx, ptrdiff_t* addr, ptrdiff_t value, int pe); OSHMEM_DECLSPEC void pshmem_char_p(char* addr, char value, int pe); OSHMEM_DECLSPEC void pshmem_short_p(short* addr, short value, int pe); @@ -126,18 +105,56 @@ OSHMEM_DECLSPEC void pshmem_long_p(long* addr, long value, int pe); OSHMEM_DECLSPEC void pshmem_float_p(float* addr, float value, int pe); OSHMEM_DECLSPEC void pshmem_double_p(double* addr, double value, int pe); OSHMEM_DECLSPEC void pshmem_longlong_p(long long* addr, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_schar_p(signed char* addr, signed char value, int pe); +OSHMEM_DECLSPEC void pshmem_uchar_p(unsigned char* addr, unsigned char value, int pe); +OSHMEM_DECLSPEC void pshmem_ushort_p(unsigned short* addr, unsigned short value, int pe); +OSHMEM_DECLSPEC void pshmem_uint_p(unsigned int* addr, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_p(unsigned long* addr, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_p(unsigned long long* addr, unsigned long long value, int pe); OSHMEM_DECLSPEC void pshmem_longdouble_p(long double* addr, long double value, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_p(dst, val, pe) \ - _Generic(&*(dst), \ - char*: pshmem_char_p, \ - short*: pshmem_short_p, \ - int*: pshmem_int_p, \ - long*: pshmem_long_p, \ - long long*: pshmem_longlong_p, \ - float*: pshmem_float_p, \ - double*: pshmem_double_p, \ - long double*: pshmem_longdouble_p)(dst, val, pe) +OSHMEM_DECLSPEC void pshmem_int8_p(int8_t* addr, int8_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int16_p(int16_t* addr, int16_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_p(int32_t* addr, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_p(int64_t* addr, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint8_p(uint8_t* addr, uint8_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint16_p(uint16_t* addr, uint16_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_p(uint32_t* addr, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_p(uint64_t* addr, uint64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_size_p(size_t* addr, size_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_p(ptrdiff_t* addr, ptrdiff_t value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_p(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_ctx_char_p, \ + short*: pshmem_ctx_short_p, \ + int*: pshmem_ctx_int_p, \ + long*: pshmem_ctx_long_p, \ + long long*: pshmem_ctx_longlong_p, \ + signed char*: pshmem_ctx_schar_p, \ + unsigned char*: pshmem_ctx_uchar_p, \ + unsigned short*: pshmem_ctx_ushort_p, \ + unsigned int*: pshmem_ctx_uint_p, \ + unsigned long*: pshmem_ctx_ulong_p, \ + unsigned long long*: pshmem_ctx_ulonglong_p, \ + float*: pshmem_ctx_float_p, \ + double*: pshmem_ctx_double_p, \ + long double*: pshmem_ctx_longdouble_p, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_p, \ + short*: pshmem_short_p, \ + int*: pshmem_int_p, \ + long*: pshmem_long_p, \ + long long*: pshmem_longlong_p, \ + signed char*: pshmem_schar_p, \ + unsigned char*: pshmem_uchar_p, \ + unsigned short*: pshmem_ushort_p, \ + unsigned int*: pshmem_uint_p, \ + unsigned long*: pshmem_ulong_p, \ + unsigned long long*: pshmem_ulonglong_p, \ + float*: pshmem_float_p, \ + double*: pshmem_double_p, \ + long double*: pshmem_longdouble_p)(__VA_ARGS__) #endif /* @@ -145,44 +162,86 @@ OSHMEM_DECLSPEC void pshmem_longdouble_p(long double* addr, long double value, */ OSHMEM_DECLSPEC void pshmem_ctx_char_put(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_short_put(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_int_put(shmem_ctx_t ctx, int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_put(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_long_put(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_float_put(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_double_put(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longlong_put(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_schar_put(shmem_ctx_t ctx, signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uchar_put(shmem_ctx_t ctx, unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ushort_put(shmem_ctx_t ctx, unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_put(shmem_ctx_t ctx, unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_put(shmem_ctx_t ctx, unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_put(shmem_ctx_t ctx, unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longdouble_put(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_put(ctx, dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_ctx_char_put, \ - short*: pshmem_ctx_short_put, \ - int*: pshmem_ctx_int_put, \ - long*: pshmem_ctx_long_put, \ - long long*: pshmem_ctx_longlong_put, \ - float*: pshmem_ctx_float_put, \ - double*: pshmem_ctx_double_put, \ - long double*: pshmem_ctx_longdouble_put)(ctx, dst, src, len, pe) -#endif +OSHMEM_DECLSPEC void pshmem_ctx_int8_put(shmem_ctx_t ctx, int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int16_put(shmem_ctx_t ctx, int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_put(shmem_ctx_t ctx, int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_put(shmem_ctx_t ctx, int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint8_put(shmem_ctx_t ctx, uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint16_put(shmem_ctx_t ctx, uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_put(shmem_ctx_t ctx, uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_put(shmem_ctx_t ctx, uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_put(shmem_ctx_t ctx, size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_put(shmem_ctx_t ctx, ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_char_put(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_put(short *target, const short *source, size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_int_put(int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int_put(int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_long_put(long *target, const long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_float_put(float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_double_put(double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_longlong_put(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_schar_put(signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uchar_put(unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ushort_put(unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint_put(unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_put(unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_put(unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_longdouble_put(long double *target, const long double *source, size_t len, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_put(dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_char_put, \ - short*: pshmem_short_put, \ - int*: pshmem_int_put, \ - long*: pshmem_long_put, \ - long long*: pshmem_longlong_put, \ - float*: pshmem_float_put, \ - double*: pshmem_double_put, \ - long double*: pshmem_longdouble_put)(dst, src, len, pe) +OSHMEM_DECLSPEC void pshmem_int8_put(int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int16_put(int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int32_put(int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int64_put(int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint8_put(uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint16_put(uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_put(uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_put(uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_size_put(size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_put(ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_put(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)),\ + char*: pshmem_ctx_char_put, \ + short*: pshmem_ctx_short_put, \ + int*: pshmem_ctx_int_put, \ + long*: pshmem_ctx_long_put, \ + long long*: pshmem_ctx_longlong_put, \ + signed char*: pshmem_ctx_schar_put, \ + unsigned char*: pshmem_ctx_uchar_put, \ + unsigned short*: pshmem_ctx_ushort_put, \ + unsigned int*: pshmem_ctx_uint_put, \ + unsigned long*: pshmem_ctx_ulong_put, \ + unsigned long long*: pshmem_ctx_ulonglong_put, \ + float*: pshmem_ctx_float_put, \ + double*: pshmem_ctx_double_put, \ + long double*: pshmem_ctx_longdouble_put, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_put, \ + short*: pshmem_short_put, \ + int*: pshmem_int_put, \ + long*: pshmem_long_put, \ + long long*: pshmem_longlong_put, \ + signed char*: pshmem_schar_put, \ + unsigned char*: pshmem_uchar_put, \ + unsigned short*: pshmem_ushort_put, \ + unsigned int*: pshmem_uint_put, \ + unsigned long*: pshmem_ulong_put, \ + unsigned long long*: pshmem_ulonglong_put, \ + float*: pshmem_float_put, \ + double*: pshmem_double_put, \ + long double*: pshmem_longdouble_put)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void pshmem_ctx_put8(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); @@ -209,19 +268,23 @@ OSHMEM_DECLSPEC void pshmem_ctx_long_iput(shmem_ctx_t ctx, long* target, const l OSHMEM_DECLSPEC void pshmem_ctx_float_iput(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_double_iput(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longlong_iput(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_schar_iput(shmem_ctx_t ctx, signed char* target, const signed char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uchar_iput(shmem_ctx_t ctx, unsigned char* target, const unsigned char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ushort_iput(shmem_ctx_t ctx, unsigned short* target, const unsigned short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_iput(shmem_ctx_t ctx, unsigned int* target, const unsigned int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_iput(shmem_ctx_t ctx, unsigned long* target, const unsigned long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_iput(shmem_ctx_t ctx, unsigned long long* target, const unsigned long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longdouble_iput(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_iput(ctx, dst, src, tst, sst, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_ctx_char_iput, \ - short*: pshmem_ctx_short_iput, \ - int*: pshmem_ctx_int_iput, \ - long*: pshmem_ctx_long_iput, \ - long long*: pshmem_ctx_longlong_iput, \ - float*: pshmem_ctx_float_iput, \ - double*: pshmem_ctx_double_iput, \ - long double*: pshmem_ctx_longdouble_iput)(ctx, dst, src, tst, sst, len, pe) -#endif +OSHMEM_DECLSPEC void pshmem_ctx_int8_iput(shmem_ctx_t ctx, int8_t* target, const int8_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int16_iput(shmem_ctx_t ctx, int16_t* target, const int16_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_iput(shmem_ctx_t ctx, int32_t* target, const int32_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_iput(shmem_ctx_t ctx, int64_t* target, const int64_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint8_iput(shmem_ctx_t ctx, uint8_t* target, const uint8_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint16_iput(shmem_ctx_t ctx, uint16_t* target, const uint16_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_iput(shmem_ctx_t ctx, uint32_t* target, const uint32_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_iput(shmem_ctx_t ctx, uint64_t* target, const uint64_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_iput(shmem_ctx_t ctx, size_t* target, const size_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_iput(shmem_ctx_t ctx, ptrdiff_t* target, const ptrdiff_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_char_iput(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_iput(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -230,18 +293,56 @@ OSHMEM_DECLSPEC void pshmem_long_iput(long* target, const long* source, ptrdiff_ OSHMEM_DECLSPEC void pshmem_float_iput(float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_double_iput(double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_longlong_iput(long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_schar_iput(signed char* target, const signed char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uchar_iput(unsigned char* target, const unsigned char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ushort_iput(unsigned short* target, const unsigned short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint_iput(unsigned int* target, const unsigned int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_iput(unsigned long* target, const unsigned long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_iput(unsigned long long* target, const unsigned long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_longdouble_iput(long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_iput(dst, src, tst, sst, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_char_iput, \ - short*: pshmem_short_iput, \ - int*: pshmem_int_iput, \ - long*: pshmem_long_iput, \ - long long*: pshmem_longlong_iput, \ - float*: pshmem_float_iput, \ - double*: pshmem_double_iput, \ - long double*: pshmem_longdouble_iput)(dst, src, tst, sst, len, pe) +OSHMEM_DECLSPEC void pshmem_int8_iput(int8_t* target, const int8_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int16_iput(int16_t* target, const int16_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int32_iput(int32_t* target, const int32_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int64_iput(int64_t* target, const int64_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint8_iput(uint8_t* target, const uint8_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint16_iput(uint16_t* target, const uint16_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_iput(uint32_t* target, const uint32_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_iput(uint64_t* target, const uint64_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_size_iput(size_t* target, const size_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_iput(ptrdiff_t* target, const ptrdiff_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_iput(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_ctx_char_iput, \ + short*: pshmem_ctx_short_iput, \ + int*: pshmem_ctx_int_iput, \ + long*: pshmem_ctx_long_iput, \ + long long*: pshmem_ctx_longlong_iput, \ + signed char*: pshmem_ctx_schar_iput, \ + unsigned char*: pshmem_ctx_uchar_iput, \ + unsigned short*: pshmem_ctx_ushort_iput, \ + unsigned int*: pshmem_ctx_uint_iput, \ + unsigned long*: pshmem_ctx_ulong_iput, \ + unsigned long long*: pshmem_ctx_ulonglong_iput, \ + float*: pshmem_ctx_float_iput, \ + double*: pshmem_ctx_double_iput, \ + long double*: pshmem_ctx_longdouble_iput, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_iput, \ + short*: pshmem_short_iput, \ + int*: pshmem_int_iput, \ + long*: pshmem_long_iput, \ + long long*: pshmem_longlong_iput, \ + signed char*: pshmem_schar_iput, \ + unsigned char*: pshmem_uchar_iput, \ + unsigned short*: pshmem_ushort_iput, \ + unsigned int*: pshmem_uint_iput, \ + unsigned long*: pshmem_ulong_iput, \ + unsigned long long*: pshmem_ulonglong_iput, \ + float*: pshmem_float_iput, \ + double*: pshmem_double_iput, \ + long double*: pshmem_longdouble_iput)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void pshmem_ctx_iput8(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -261,45 +362,86 @@ OSHMEM_DECLSPEC void pshmem_iput128(void* target, const void* source, ptrdiff_t */ OSHMEM_DECLSPEC void pshmem_ctx_char_put_nbi(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_short_put_nbi(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_int_put_nbi(shmem_ctx_t ctx, int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_put_nbi(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_long_put_nbi(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_longlong_put_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_float_put_nbi(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_double_put_nbi(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_put_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_schar_put_nbi(shmem_ctx_t ctx, signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uchar_put_nbi(shmem_ctx_t ctx, unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ushort_put_nbi(shmem_ctx_t ctx, unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_put_nbi(shmem_ctx_t ctx, unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_put_nbi(shmem_ctx_t ctx, unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_put_nbi(shmem_ctx_t ctx, unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longdouble_put_nbi(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_put_nbi(ctx, dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_ctx_char_put_nbi, \ - short*: pshmem_ctx_short_put_nbi, \ - int*: pshmem_ctx_int_put_nbi, \ - long*: pshmem_ctx_long_put_nbi, \ - long long*: pshmem_ctx_longlong_put_nbi, \ - float*: pshmem_ctx_float_put_nbi, \ - double*: pshmem_ctx_double_put_nbi, \ - long double*: pshmem_ctx_longdouble_put_nbi)(ctx, dst, src, len, pe) -#endif +OSHMEM_DECLSPEC void pshmem_ctx_int8_put_nbi(shmem_ctx_t ctx, int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int16_put_nbi(shmem_ctx_t ctx, int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_put_nbi(shmem_ctx_t ctx, int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_put_nbi(shmem_ctx_t ctx, int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint8_put_nbi(shmem_ctx_t ctx, uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint16_put_nbi(shmem_ctx_t ctx, uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_put_nbi(shmem_ctx_t ctx, uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_put_nbi(shmem_ctx_t ctx, uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_put_nbi(shmem_ctx_t ctx, size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_put_nbi(shmem_ctx_t ctx, ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_putmem_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_char_put_nbi(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_put_nbi(short *target, const short *source, size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_int_put_nbi(int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int_put_nbi(int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_long_put_nbi(long *target, const long *source, size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_longlong_put_nbi(long long *target, const long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_float_put_nbi(float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_double_put_nbi(double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_put_nbi(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_schar_put_nbi(signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uchar_put_nbi(unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ushort_put_nbi(unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint_put_nbi(unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_put_nbi(unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_put_nbi(unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_longdouble_put_nbi(long double *target, const long double *source, size_t len, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_put_nbi(dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_char_put_nbi, \ - short*: pshmem_short_put_nbi, \ - int*: pshmem_int_put_nbi, \ - long*: pshmem_long_put_nbi, \ - long long*: pshmem_longlong_put_nbi, \ - float*: pshmem_float_put_nbi, \ - double*: pshmem_double_put_nbi, \ - long double*: pshmem_longdouble_put_nbi)(dst, src, len, pe) +OSHMEM_DECLSPEC void pshmem_int8_put_nbi(int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int16_put_nbi(int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int32_put_nbi(int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int64_put_nbi(int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint8_put_nbi(uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint16_put_nbi(uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_put_nbi(uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_put_nbi(uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_size_put_nbi(size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_put_nbi(ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_put_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)),\ + char*: pshmem_ctx_char_put_nbi, \ + short*: pshmem_ctx_short_put_nbi, \ + int*: pshmem_ctx_int_put_nbi, \ + long*: pshmem_ctx_long_put_nbi, \ + long long*: pshmem_ctx_longlong_put_nbi, \ + signed char*: pshmem_ctx_schar_put_nbi, \ + unsigned char*: pshmem_ctx_uchar_put_nbi, \ + unsigned short*: pshmem_ctx_ushort_put_nbi, \ + unsigned int*: pshmem_ctx_uint_put_nbi, \ + unsigned long*: pshmem_ctx_ulong_put_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_put_nbi, \ + float*: pshmem_ctx_float_put_nbi, \ + double*: pshmem_ctx_double_put_nbi, \ + long double*: pshmem_ctx_longdouble_put_nbi, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_put_nbi, \ + short*: pshmem_short_put_nbi, \ + int*: pshmem_int_put_nbi, \ + long*: pshmem_long_put_nbi, \ + long long*: pshmem_longlong_put_nbi, \ + signed char*: pshmem_schar_put_nbi, \ + unsigned char*: pshmem_uchar_put_nbi, \ + unsigned short*: pshmem_ushort_put_nbi, \ + unsigned int*: pshmem_uint_put_nbi, \ + unsigned long*: pshmem_ulong_put_nbi, \ + unsigned long long*: pshmem_ulonglong_put_nbi, \ + float*: pshmem_float_put_nbi, \ + double*: pshmem_double_put_nbi, \ + long double*: pshmem_longdouble_put_nbi)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void pshmem_ctx_put8_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); @@ -314,6 +456,7 @@ OSHMEM_DECLSPEC void pshmem_put16_nbi(void *target, const void *source, size_t OSHMEM_DECLSPEC void pshmem_put32_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_put64_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_put128_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_putmem_nbi(void *target, const void *source, size_t len, int pe); /* * Elemental get routines @@ -326,18 +469,22 @@ OSHMEM_DECLSPEC float pshmem_ctx_float_g(shmem_ctx_t ctx, const float* addr, in OSHMEM_DECLSPEC double pshmem_ctx_double_g(shmem_ctx_t ctx, const double* addr, int pe); OSHMEM_DECLSPEC long long pshmem_ctx_longlong_g(shmem_ctx_t ctx, const long long* addr, int pe); OSHMEM_DECLSPEC long double pshmem_ctx_longdouble_g(shmem_ctx_t ctx, const long double* addr, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_g(ctx, addr, pe) \ - _Generic(&*(addr), \ - char*: pshmem_ctx_char_g, \ - short*: pshmem_ctx_short_g, \ - int*: pshmem_ctx_int_g, \ - long*: pshmem_ctx_long_g, \ - long long*: pshmem_ctx_longlong_g, \ - float*: pshmem_ctx_float_g, \ - double*: pshmem_ctx_double_g, \ - long double*: pshmem_ctx_longdouble_g)(ctx, addr, pe) -#endif +OSHMEM_DECLSPEC signed char pshmem_ctx_schar_g(shmem_ctx_t ctx, const signed char* addr, int pe); +OSHMEM_DECLSPEC unsigned char pshmem_ctx_uchar_g(shmem_ctx_t ctx, const unsigned char* addr, int pe); +OSHMEM_DECLSPEC unsigned short pshmem_ctx_ushort_g(shmem_ctx_t ctx, const unsigned short* addr, int pe); +OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_g(shmem_ctx_t ctx, const unsigned int* addr, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_g(shmem_ctx_t ctx, const unsigned long* addr, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_g(shmem_ctx_t ctx, const unsigned long long* addr, int pe); +OSHMEM_DECLSPEC int8_t pshmem_ctx_int8_g(shmem_ctx_t ctx, const int8_t* addr, int pe); +OSHMEM_DECLSPEC int16_t pshmem_ctx_int16_g(shmem_ctx_t ctx, const int16_t* addr, int pe); +OSHMEM_DECLSPEC int32_t pshmem_ctx_int32_g(shmem_ctx_t ctx, const int32_t* addr, int pe); +OSHMEM_DECLSPEC int64_t pshmem_ctx_int64_g(shmem_ctx_t ctx, const int64_t* addr, int pe); +OSHMEM_DECLSPEC uint8_t pshmem_ctx_uint8_g(shmem_ctx_t ctx, const uint8_t* addr, int pe); +OSHMEM_DECLSPEC uint16_t pshmem_ctx_uint16_g(shmem_ctx_t ctx, const uint16_t* addr, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_ctx_uint32_g(shmem_ctx_t ctx, const uint32_t* addr, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_ctx_uint64_g(shmem_ctx_t ctx, const uint64_t* addr, int pe); +OSHMEM_DECLSPEC size_t pshmem_ctx_size_g(shmem_ctx_t ctx, const size_t* addr, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ctx_ptrdiff_g(shmem_ctx_t ctx, const ptrdiff_t* addr, int pe); OSHMEM_DECLSPEC char pshmem_char_g(const char* addr, int pe); OSHMEM_DECLSPEC short pshmem_short_g(const short* addr, int pe); @@ -347,17 +494,55 @@ OSHMEM_DECLSPEC float pshmem_float_g(const float* addr, int pe); OSHMEM_DECLSPEC double pshmem_double_g(const double* addr, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_g(const long long* addr, int pe); OSHMEM_DECLSPEC long double pshmem_longdouble_g(const long double* addr, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_g(addr, pe) \ - _Generic(&*(dst), \ - char*: pshmem_char_g, \ - short*: pshmem_short_g, \ - int*: pshmem_int_g, \ - long*: pshmem_long_g, \ - long long*: pshmem_longlong_g, \ - float*: pshmem_float_g, \ - double*: pshmem_double_g, \ - long double*: pshmem_longdouble_g)(addr, pe) +OSHMEM_DECLSPEC signed char pshmem_schar_g(const signed char* addr, int pe); +OSHMEM_DECLSPEC unsigned char pshmem_uchar_g(const unsigned char* addr, int pe); +OSHMEM_DECLSPEC unsigned short pshmem_ushort_g(const unsigned short* addr, int pe); +OSHMEM_DECLSPEC unsigned int pshmem_uint_g(const unsigned int* addr, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ulong_g(const unsigned long* addr, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_g(const unsigned long long* addr, int pe); +OSHMEM_DECLSPEC int8_t pshmem_int8_g(const int8_t* addr, int pe); +OSHMEM_DECLSPEC int16_t pshmem_int16_g(const int16_t* addr, int pe); +OSHMEM_DECLSPEC int32_t pshmem_int32_g(const int32_t* addr, int pe); +OSHMEM_DECLSPEC int64_t pshmem_int64_g(const int64_t* addr, int pe); +OSHMEM_DECLSPEC uint8_t pshmem_uint8_g(const uint8_t* addr, int pe); +OSHMEM_DECLSPEC uint16_t pshmem_uint16_g(const uint16_t* addr, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_uint32_g(const uint32_t* addr, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_uint64_g(const uint64_t* addr, int pe); +OSHMEM_DECLSPEC size_t pshmem_size_g(const size_t* addr, int pe); +OSHMEM_DECLSPEC ptrdiff_t pshmem_ptrdiff_g(const ptrdiff_t* addr, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_g(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_ctx_char_g, \ + short*: pshmem_ctx_short_g, \ + int*: pshmem_ctx_int_g, \ + long*: pshmem_ctx_long_g, \ + long long*: pshmem_ctx_longlong_g, \ + signed char*: pshmem_ctx_schar_g, \ + unsigned char*: pshmem_ctx_uchar_g, \ + unsigned short*: pshmem_ctx_ushort_g, \ + unsigned int*: pshmem_ctx_uint_g, \ + unsigned long*: pshmem_ctx_ulong_g, \ + unsigned long long*: pshmem_ctx_ulonglong_g, \ + float*: pshmem_ctx_float_g, \ + double*: pshmem_ctx_double_g, \ + long double*: pshmem_ctx_longdouble_g, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_g, \ + short*: pshmem_short_g, \ + int*: pshmem_int_g, \ + long*: pshmem_long_g, \ + long long*: pshmem_longlong_g, \ + signed char*: pshmem_schar_g, \ + unsigned char*: pshmem_char_g, \ + unsigned short*: pshmem_short_g, \ + unsigned int*: pshmem_int_g, \ + unsigned long*: pshmem_long_g, \ + unsigned long long*: pshmem_longlong_g, \ + float*: pshmem_float_g, \ + double*: pshmem_double_g, \ + long double*: pshmem_longdouble_g)(__VA_ARGS__) #endif /* @@ -370,19 +555,23 @@ OSHMEM_DECLSPEC void pshmem_ctx_long_get(shmem_ctx_t ctx, long *target, const l OSHMEM_DECLSPEC void pshmem_ctx_float_get(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_double_get(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longlong_get(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_schar_get(shmem_ctx_t ctx, signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uchar_get(shmem_ctx_t ctx, unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ushort_get(shmem_ctx_t ctx, unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_get(shmem_ctx_t ctx, unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_get(shmem_ctx_t ctx, unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_get(shmem_ctx_t ctx, unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longdouble_get(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_get(ctx, dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_ctx_char_get, \ - short*: pshmem_ctx_short_get, \ - int*: pshmem_ctx_int_get, \ - long*: pshmem_ctx_long_get, \ - long long*: pshmem_ctx_longlong_get, \ - float*: pshmem_ctx_float_get, \ - double*: pshmem_ctx_double_get, \ - long double*: pshmem_ctx_longdouble_get)(ctx, dst, src, len, pe) -#endif +OSHMEM_DECLSPEC void pshmem_ctx_int8_get(shmem_ctx_t ctx, int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int16_get(shmem_ctx_t ctx, int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_get(shmem_ctx_t ctx, int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_get(shmem_ctx_t ctx, int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint8_get(shmem_ctx_t ctx, uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint16_get(shmem_ctx_t ctx, uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_get(shmem_ctx_t ctx, uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_get(shmem_ctx_t ctx, uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_get(shmem_ctx_t ctx, size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_get(shmem_ctx_t ctx, ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_char_get(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_get(short *target, const short *source, size_t len, int pe); @@ -391,18 +580,56 @@ OSHMEM_DECLSPEC void pshmem_long_get(long *target, const long *source, size_t l OSHMEM_DECLSPEC void pshmem_float_get(float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_double_get(double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_longlong_get(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_schar_get(signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uchar_get(unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ushort_get(unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint_get(unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_get(unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_get(unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_longdouble_get(long double *target, const long double *source, size_t len, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_get(dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_char_get, \ - short*: pshmem_short_get, \ - int*: pshmem_int_get, \ - long*: pshmem_long_get, \ - long long*: pshmem_longlong_get, \ - float*: pshmem_float_get, \ - double*: pshmem_double_get, \ - long double*: pshmem_longdouble_get)(dst, src, len, pe) +OSHMEM_DECLSPEC void pshmem_int8_get(int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int16_get(int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int32_get(int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int64_get(int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint8_get(uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint16_get(uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_get(uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_get(uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_size_get(size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_get(ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_get(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_ctx_char_get, \ + short*: pshmem_ctx_short_get, \ + int*: pshmem_ctx_int_get, \ + long*: pshmem_ctx_long_get, \ + long long*: pshmem_ctx_longlong_get, \ + signed char*: pshmem_ctx_schar_get, \ + unsigned char*: pshmem_ctx_uchar_get, \ + unsigned short*: pshmem_ctx_ushort_get, \ + unsigned int*: pshmem_ctx_uint_get, \ + unsigned long*: pshmem_ctx_ulong_get, \ + unsigned long long*: pshmem_ctx_ulonglong_get, \ + float*: pshmem_ctx_float_get, \ + double*: pshmem_ctx_double_get, \ + long double*: pshmem_ctx_longdouble_get, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_get, \ + short*: pshmem_short_get, \ + int*: pshmem_int_get, \ + long*: pshmem_long_get, \ + long long*: pshmem_longlong_get, \ + signed char*: pshmem_schar_get, \ + unsigned char*: pshmem_uchar_get, \ + unsigned short*: pshmem_ushort_get, \ + unsigned int*: pshmem_uint_get, \ + unsigned long*: pshmem_ulong_get, \ + unsigned long long*: pshmem_ulonglong_get, \ + float*: pshmem_float_get, \ + double*: pshmem_double_get, \ + long double*: pshmem_longdouble_get)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void pshmem_ctx_get8(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); @@ -422,46 +649,88 @@ OSHMEM_DECLSPEC void pshmem_getmem(void *target, const void *source, size_t len /* * Strided get routines */ -OSHMEM_DECLSPEC void pshmem_ctx_char_iget(shmem_ctx_t ctx, char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_short_iget(shmem_ctx_t ctx, short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_int_iget(shmem_ctx_t ctx, int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_float_iget(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_double_iget(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_longlong_iget(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_longdouble_iget(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_long_iget(shmem_ctx_t ctx, long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_char_iget(shmem_ctx_t ctx, char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_iget(shmem_ctx_t ctx, short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_iget(shmem_ctx_t ctx, int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_iget(shmem_ctx_t ctx, long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_iget(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_schar_iget(shmem_ctx_t ctx, signed char* target, const signed char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uchar_iget(shmem_ctx_t ctx, unsigned char* target, const unsigned char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ushort_iget(shmem_ctx_t ctx, unsigned short* target, const unsigned short* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_iget(shmem_ctx_t ctx, unsigned int* target, const unsigned int* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_iget(shmem_ctx_t ctx, unsigned long* target, const unsigned long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_iget(shmem_ctx_t ctx, unsigned long long* target, const unsigned long long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_iget(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_iget(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_iget(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int8_iget(shmem_ctx_t ctx, int8_t* target, const int8_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int16_iget(shmem_ctx_t ctx, int16_t* target, const int16_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_iget(shmem_ctx_t ctx, int32_t* target, const int32_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_iget(shmem_ctx_t ctx, int64_t* target, const int64_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint8_iget(shmem_ctx_t ctx, uint8_t* target, const uint8_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint16_iget(shmem_ctx_t ctx, uint16_t* target, const uint16_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_iget(shmem_ctx_t ctx, uint32_t* target, const uint32_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_iget(shmem_ctx_t ctx, uint64_t* target, const uint64_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_iget(shmem_ctx_t ctx, size_t* target, const size_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_iget(shmem_ctx_t ctx, ptrdiff_t* target, const ptrdiff_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +OSHMEM_DECLSPEC void pshmem_char_iget(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_short_iget(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int_iget(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_float_iget(float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_double_iget(double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_iget(long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_longdouble_iget(long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_long_iget(long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_schar_iget(signed char* target, const signed char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uchar_iget(unsigned char* target, const unsigned char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ushort_iget(unsigned short* target, const unsigned short* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint_iget(unsigned int* target, const unsigned int* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_iget(unsigned long* target, const unsigned long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_iget(unsigned long long* target, const unsigned long long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int8_iget(int8_t* target, const int8_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int16_iget(int16_t* target, const int16_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int32_iget(int32_t* target, const int32_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int64_iget(int64_t* target, const int64_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint8_iget(uint8_t* target, const uint8_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint16_iget(uint16_t* target, const uint16_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_iget(uint32_t* target, const uint32_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_iget(uint64_t* target, const uint64_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_size_iget(size_t* target, const size_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_iget(ptrdiff_t* target, const ptrdiff_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_iget(ctx, dst, src, tst, sst, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_ctx_char_iget, \ - short*: pshmem_ctx_short_iget, \ - int*: pshmem_ctx_int_iget, \ - long*: pshmem_ctx_long_iget, \ - long long*: pshmem_ctx_longlong_iget, \ - float*: pshmem_ctx_float_iget, \ - double*: pshmem_ctx_double_iget, \ - long double*: pshmem_ctx_longdouble_iget)(ctx, dst, src, tst, sst, len, pe) -#endif - -OSHMEM_DECLSPEC void pshmem_char_iget(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_short_iget(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_int_iget(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_float_iget(float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_double_iget(double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_longlong_iget(long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_longdouble_iget(long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void pshmem_long_iget(long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_iget(dst, src, tst, sst, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_char_iget, \ - short*: pshmem_short_iget, \ - int*: pshmem_int_iget, \ - long*: pshmem_long_iget, \ - long long*: pshmem_longlong_iget, \ - float*: pshmem_float_iget, \ - double*: pshmem_double_iget, \ - long double*: pshmem_longdouble_iget)(dst, src, tst, sst, len, pe) +#define pshmem_iget(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_ctx_char_iget, \ + short*: pshmem_ctx_short_iget, \ + int*: pshmem_ctx_int_iget, \ + long*: pshmem_ctx_long_iget, \ + long long*: pshmem_ctx_longlong_iget, \ + signed char*: pshmem_ctx_schar_iget, \ + unsigned char*: pshmem_ctx_uchar_iget, \ + unsigned short*: pshmem_ctx_ushort_iget, \ + unsigned int*: pshmem_ctx_uint_iget, \ + unsigned long*: pshmem_ctx_ulong_iget, \ + unsigned long long*: pshmem_ctx_ulonglong_iget, \ + float*: pshmem_ctx_float_iget, \ + double*: pshmem_ctx_double_iget, \ + long double*: pshmem_ctx_longdouble_iget, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_iget, \ + short*: pshmem_short_iget, \ + int*: pshmem_int_iget, \ + long*: pshmem_long_iget, \ + long long*: pshmem_longlong_iget, \ + signed char*: pshmem_schar_iget, \ + unsigned char*: pshmem_uchar_iget, \ + unsigned short*: pshmem_ushort_iget, \ + unsigned int*: pshmem_uint_iget, \ + unsigned long*: pshmem_ulong_iget, \ + unsigned long long*: pshmem_ulonglong_iget, \ + float*: pshmem_float_iget, \ + double*: pshmem_double_iget, \ + long double*: pshmem_longdouble_iget)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void pshmem_ctx_iget8(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -484,21 +753,25 @@ OSHMEM_DECLSPEC void pshmem_ctx_short_get_nbi(shmem_ctx_t ctx, short *target, c OSHMEM_DECLSPEC void pshmem_ctx_int_get_nbi(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_long_get_nbi(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longlong_get_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_schar_get_nbi(shmem_ctx_t ctx, signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uchar_get_nbi(shmem_ctx_t ctx, unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ushort_get_nbi(shmem_ctx_t ctx, unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_get_nbi(shmem_ctx_t ctx, unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_get_nbi(shmem_ctx_t ctx, unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_get_nbi(shmem_ctx_t ctx, unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_float_get_nbi(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_double_get_nbi(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longdouble_get_nbi(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_get_nbi(ctx, dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_ctx_char_get_nbi, \ - short*: pshmem_ctx_short_get_nbi, \ - int*: pshmem_ctx_int_get_nbi, \ - long*: pshmem_ctx_long_get_nbi, \ - long long*: pshmem_ctx_longlong_get_nbi, \ - float*: pshmem_ctx_float_get_nbi, \ - double*: pshmem_ctx_double_get_nbi, \ - long double*: pshmem_ctx_longdouble_get_nbi)(ctx, dst, src, len, pe) -#endif +OSHMEM_DECLSPEC void pshmem_ctx_int8_get_nbi(shmem_ctx_t ctx, int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int16_get_nbi(shmem_ctx_t ctx, int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int32_get_nbi(shmem_ctx_t ctx, int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_get_nbi(shmem_ctx_t ctx, int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint8_get_nbi(shmem_ctx_t ctx, uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint16_get_nbi(shmem_ctx_t ctx, uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_get_nbi(shmem_ctx_t ctx, uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_get_nbi(shmem_ctx_t ctx, uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_size_get_nbi(shmem_ctx_t ctx, size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ptrdiff_get_nbi(shmem_ctx_t ctx, ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_getmem_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_char_get_nbi(char *target, const char *source, size_t len, int pe); @@ -506,20 +779,58 @@ OSHMEM_DECLSPEC void pshmem_short_get_nbi(short *target, const short *source, s OSHMEM_DECLSPEC void pshmem_int_get_nbi(int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_long_get_nbi(long *target, const long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_longlong_get_nbi(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_schar_get_nbi(signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uchar_get_nbi(unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ushort_get_nbi(unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint_get_nbi(unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_get_nbi(unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_get_nbi(unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_float_get_nbi(float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_double_get_nbi(double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_longdouble_get_nbi(long double *target, const long double *source, size_t len, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_get_nbi(dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: pshmem_char_get_nbi, \ - short*: pshmem_short_get_nbi, \ - int*: pshmem_int_get_nbi, \ - long*: pshmem_long_get_nbi, \ - long long*: pshmem_longlong_get_nbi, \ - float*: pshmem_float_get_nbi, \ - double*: pshmem_double_get_nbi, \ - long double*: pshmem_longdouble_get_nbi)(dst, src, len, pe) +OSHMEM_DECLSPEC void pshmem_int8_get_nbi(int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int16_get_nbi(int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int32_get_nbi(int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_int64_get_nbi(int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint8_get_nbi(uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint16_get_nbi(uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_get_nbi(uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_get_nbi(uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_size_get_nbi(size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ptrdiff_get_nbi(ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_get_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: pshmem_ctx_char_get_nbi, \ + short*: pshmem_ctx_short_get_nbi, \ + int*: pshmem_ctx_int_get_nbi, \ + long*: pshmem_ctx_long_get_nbi, \ + long long*: pshmem_ctx_longlong_get_nbi, \ + signed char*: pshmem_ctx_schar_get_nbi, \ + unsigned char*: pshmem_ctx_uchar_get_nbi, \ + unsigned short*: pshmem_ctx_ushort_get_nbi, \ + unsigned int*: pshmem_ctx_uint_get_nbi, \ + unsigned long*: pshmem_ctx_ulong_get_nbi, \ + unsigned long long*: pshmem_ctx_ulonglong_get_nbi, \ + float*: pshmem_ctx_float_get_nbi, \ + double*: pshmem_ctx_double_get_nbi, \ + long double*: pshmem_ctx_longdouble_get_nbi, \ + default: __opshmem_datatype_ignore), \ + char*: pshmem_char_get_nbi, \ + short*: pshmem_short_get_nbi, \ + int*: pshmem_int_get_nbi, \ + long*: pshmem_long_get_nbi, \ + long long*: pshmem_longlong_get_nbi, \ + signed char*: pshmem_schar_get_nbi, \ + unsigned char*: pshmem_uchar_get_nbi, \ + unsigned short*: pshmem_ushort_get_nbi, \ + unsigned int*: pshmem_uint_get_nbi, \ + unsigned long*: pshmem_ulong_get_nbi, \ + unsigned long long*: pshmem_ulonglong_get_nbi, \ + float*: pshmem_float_get_nbi, \ + double*: pshmem_double_get_nbi, \ + long double*: pshmem_longdouble_get_nbi)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void pshmem_ctx_get8_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); @@ -541,32 +852,42 @@ OSHMEM_DECLSPEC void pshmem_get128_nbi(void *target, const void *source, size_t /* Atomic swap */ OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_swap(shmem_ctx_t ctx, int *target, int value, int pe); OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_swap(shmem_ctx_t ctx, long *target, long value, int pe); -OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_swap(shmem_ctx_t ctx, long long*target, long long value, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_swap(shmem_ctx_t ctx, long long *target, long long value, int pe); +OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_swap(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_swap(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_swap(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC float pshmem_ctx_float_atomic_swap(shmem_ctx_t ctx, float *target, float value, int pe); OSHMEM_DECLSPEC double pshmem_ctx_double_atomic_swap(shmem_ctx_t ctx, double *target, double value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_swap(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_ctx_int_atomic_swap, \ - long*: pshmem_ctx_long_atomic_swap, \ - long long*: pshmem_ctx_longlong_atomic_swap, \ - float*: pshmem_ctx_float_atomic_swap, \ - double*: pshmem_ctx_double_atomic_swap)(ctx, dst, val, pe) -#endif OSHMEM_DECLSPEC int pshmem_int_atomic_swap(int *target, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_swap(long *target, long value, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_atomic_swap(long long*target, long long value, int pe); +OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_swap(unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_swap(unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_swap(unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC float pshmem_float_atomic_swap(float *target, float value, int pe); OSHMEM_DECLSPEC double pshmem_double_atomic_swap(double *target, double value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_swap(dst, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_atomic_swap, \ - long*: pshmem_long_atomic_swap, \ - long long*: pshmem_longlong_atomic_swap, \ - float*: pshmem_float_atomic_swap, \ - double*: pshmem_double_atomic_swap)(dst, val, pe) +#define pshmem_atomic_swap(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_swap, \ + long*: pshmem_ctx_long_atomic_swap, \ + long long*: pshmem_ctx_longlong_atomic_swap, \ + unsigned int*: pshmem_ctx_uint_atomic_swap, \ + unsigned long*: pshmem_ctx_ulong_atomic_swap, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_swap,\ + float*: pshmem_ctx_float_atomic_swap, \ + double*: pshmem_ctx_double_atomic_swap, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_swap, \ + long*: pshmem_long_atomic_swap, \ + long long*: pshmem_longlong_atomic_swap, \ + unsigned int*: pshmem_uint_atomic_swap, \ + unsigned long*: pshmem_ulong_atomic_swap, \ + unsigned long long*: pshmem_ulonglong_atomic_swap, \ + float*: pshmem_float_atomic_swap, \ + double*: pshmem_double_atomic_swap)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int pshmem_int_swap(int *target, int value, int pe); @@ -574,53 +895,63 @@ OSHMEM_DECLSPEC long pshmem_long_swap(long *target, long value, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_swap(long long*target, long long value, int pe); OSHMEM_DECLSPEC float pshmem_float_swap(float *target, float value, int pe); OSHMEM_DECLSPEC double pshmem_double_swap(double *target, double value, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_swap(dst, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_swap, \ - long*: pshmem_long_swap, \ - long long*: pshmem_longlong_swap, \ - float*: pshmem_float_swap, \ +#if OSHMEM_HAVE_C11 +#define pshmem_swap(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_swap, \ + long*: pshmem_long_swap, \ + long long*: pshmem_longlong_swap, \ + float*: pshmem_float_swap, \ double*: pshmem_double_swap)(dst, val, pe) #endif /* Atomic set */ OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_set(shmem_ctx_t ctx, int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_set(shmem_ctx_t ctx, long *target, long value, int pe); -OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_set(shmem_ctx_t ctx, long long*target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_set(shmem_ctx_t ctx, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_set(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_set(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_set(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_float_atomic_set(shmem_ctx_t ctx, float *target, float value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_double_atomic_set(shmem_ctx_t ctx, double *target, double value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_set(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_ctx_int_atomic_set, \ - long*: pshmem_ctx_long_atomic_set, \ - long long*: pshmem_ctx_longlong_atomic_set, \ - float*: pshmem_ctx_float_atomic_set, \ - double*: pshmem_ctx_double_atomic_set)(ctx, dst, val, pe) -#endif OSHMEM_DECLSPEC void pshmem_int_atomic_set(int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_long_atomic_set(long *target, long value, int pe); -OSHMEM_DECLSPEC void pshmem_longlong_atomic_set(long long*target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_set(long long *target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_uint_atomic_set(unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_set(unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_set(unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void pshmem_float_atomic_set(float *target, float value, int pe); OSHMEM_DECLSPEC void pshmem_double_atomic_set(double *target, double value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_set(dst, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_atomic_set, \ - long*: pshmem_long_atomic_set, \ - long long*: pshmem_longlong_atomic_set, \ - float*: pshmem_float_atomic_set, \ - double*: pshmem_double_atomic_set)(dst, val, pe) +#define pshmem_atomic_set(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_set, \ + long*: pshmem_ctx_long_atomic_set, \ + long long*: pshmem_ctx_longlong_atomic_set, \ + unsigned int*: pshmem_ctx_uint_atomic_set, \ + unsigned long*: pshmem_ctx_ulong_atomic_set, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_set,\ + float*: pshmem_ctx_float_atomic_set, \ + double*: pshmem_ctx_double_atomic_set, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_set, \ + long*: pshmem_long_atomic_set, \ + long long*: pshmem_longlong_atomic_set, \ + unsigned int*: pshmem_uint_atomic_set, \ + unsigned long*: pshmem_ulong_atomic_set, \ + unsigned long long*: pshmem_ulonglong_atomic_set, \ + float*: pshmem_float_atomic_set, \ + double*: pshmem_double_atomic_set)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void pshmem_int_set(int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_long_set(long *target, long value, int pe); -OSHMEM_DECLSPEC void pshmem_longlong_set(long long*target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_set(long long *target, long long value, int pe); OSHMEM_DECLSPEC void pshmem_float_set(float *target, float value, int pe); OSHMEM_DECLSPEC void pshmem_double_set(double *target, double value, int pe); -#if OSHMEMP_HAVE_C11 +#if OSHMEM_HAVE_C11 #define pshmem_set(dst, val, pe) \ _Generic(&*(dst), \ int*: pshmem_int_set, \ @@ -634,31 +965,40 @@ OSHMEM_DECLSPEC void pshmem_double_set(double *target, double value, int pe); OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_compare_swap(shmem_ctx_t ctx, int *target, int cond, int value, int pe); OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_compare_swap(shmem_ctx_t ctx, long *target, long cond, long value, int pe); OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_compare_swap(shmem_ctx_t ctx, long long *target, long long cond, long long value, int pe); - -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_compare_swap(ctx, dst, cond, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_ctx_int_atomic_compare_swap, \ - long*: pshmem_ctx_long_atomic_compare_swap, \ - long long*: pshmem_ctx_longlong_atomic_compare_swap)(ctx, dst, cond, val, pe) -#endif +OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_compare_swap(shmem_ctx_t ctx, unsigned int *target, unsigned int cond, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_compare_swap(shmem_ctx_t ctx, unsigned long *target, unsigned long cond, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_compare_swap(shmem_ctx_t ctx, unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); OSHMEM_DECLSPEC int pshmem_int_atomic_compare_swap(int *target, int cond, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_compare_swap(long *target, long cond, long value, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_atomic_compare_swap(long long *target, long long cond, long long value, int pe); +OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_compare_swap(unsigned int *target, unsigned int cond, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_compare_swap(unsigned long *target, unsigned long cond, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_compare_swap(unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_compare_swap(dst, cond, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_atomic_compare_swap, \ - long*: pshmem_long_atomic_compare_swap, \ - long long*: pshmem_longlong_atomic_compare_swap)(dst, cond, val, pe) +#define pshmem_atomic_compare_swap(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_compare_swap, \ + long*: pshmem_ctx_long_atomic_compare_swap, \ + long long*: pshmem_ctx_longlong_atomic_compare_swap, \ + unsigned int*: pshmem_ctx_uint_atomic_compare_swap, \ + unsigned long*: pshmem_ctx_ulong_atomic_compare_swap, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_compare_swap, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_compare_swap, \ + long*: pshmem_long_atomic_compare_swap, \ + long long*: pshmem_longlong_atomic_compare_swap, \ + unsigned int*: pshmem_uint_atomic_compare_swap, \ + unsigned long*: pshmem_ulong_atomic_compare_swap, \ + unsigned long long*: pshmem_ulonglong_atomic_compare_swap)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int pshmem_int_cswap(int *target, int cond, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_cswap(long *target, long cond, long value, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_cswap(long long *target, long long cond, long long value, int pe); -#if OSHMEMP_HAVE_C11 +#if OSHMEM_HAVE_C11 #define pshmem_cswap(dst, cond, val, pe) \ _Generic(&*(dst), \ int*: pshmem_int_cswap, \ @@ -670,29 +1010,39 @@ OSHMEM_DECLSPEC long long pshmem_longlong_cswap(long long *target, long long con OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_fetch_add(shmem_ctx_t ctx, int *target, int value, int pe); OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_fetch_add(shmem_ctx_t ctx, long *target, long value, int pe); OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch_add(shmem_ctx_t ctx, long long *target, long long value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch_add(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_ctx_int_atomic_fetch_add, \ - long*: pshmem_ctx_long_atomic_fetch_add, \ - long long*: pshmem_ctx_longlong_atomic_fetch_add)(ctx, dst, val, pe) -#endif +OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_add(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_add(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_add(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC int pshmem_int_atomic_fetch_add(int *target, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_fetch_add(long *target, long value, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch_add(long long *target, long long value, int pe); +OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_add(unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_add(unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_add(unsigned long long *target, unsigned long long value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch_add(dst, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_atomic_fetch_add, \ - long*: pshmem_long_atomic_fetch_add, \ - long long*: pshmem_longlong_atomic_fetch_add)(dst, val, pe) +#define pshmem_atomic_fetch_add(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_fetch_add, \ + long*: pshmem_ctx_long_atomic_fetch_add, \ + long long*: pshmem_ctx_longlong_atomic_fetch_add, \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_add, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_add, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_add, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_fetch_add, \ + long*: pshmem_long_atomic_fetch_add, \ + long long*: pshmem_longlong_atomic_fetch_add, \ + unsigned int*: pshmem_uint_atomic_fetch_add, \ + unsigned long*: pshmem_ulong_atomic_fetch_add, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_add)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int pshmem_int_fadd(int *target, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_fadd(long *target, long value, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_fadd(long long *target, long long value, int pe); -#if OSHMEMP_HAVE_C11 +#if OSHMEM_HAVE_C11 #define pshmem_fadd(dst, val, pe) \ _Generic(&*(dst), \ int*: pshmem_int_fadd, \ @@ -701,103 +1051,191 @@ OSHMEM_DECLSPEC long long pshmem_longlong_fadd(long long *target, long long valu #endif /* Atomic Fetch&And */ +OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_fetch_and(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_fetch_and(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch_and(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_and(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_and(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_and(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEMP_HAVE_C11 -#define pshmem_atomic_fetch_and(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_ctx_uint_atomic_fetch_and, \ - unsigned long*: pshmem_ctx_ulong_atomic_fetch_and, \ - unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_and)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC int32_t pshmem_ctx_int32_atomic_fetch_and(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_ctx_int64_atomic_fetch_and(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_ctx_uint32_atomic_fetch_and(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_ctx_uint64_atomic_fetch_and(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC int pshmem_int_atomic_fetch_and(int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_fetch_and(long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch_and(long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_and(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_and(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_and(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t pshmem_int32_atomic_fetch_and(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_int64_atomic_fetch_and(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_uint32_atomic_fetch_and(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_uint64_atomic_fetch_and(uint64_t *target, uint64_t value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch_and(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_uint_atomic_fetch_and, \ - unsigned long*: pshmem_ulong_atomic_fetch_and, \ - unsigned long long*: pshmem_ulonglong_atomic_fetch_and)(dst, val, pe) +#define pshmem_atomic_fetch_and(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_fetch_and, \ + long*: pshmem_ctx_long_atomic_fetch_and, \ + long long*: pshmem_ctx_longlong_atomic_fetch_and, \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_and, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_and, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_and, \ + int32_t*: pshmem_ctx_int32_atomic_fetch_and, \ + int64_t*: pshmem_ctx_int64_atomic_fetch_and, \ + uint32_t*: pshmem_ctx_uint32_atomic_fetch_and, \ + uint64_t*: pshmem_ctx_uint64_atomic_fetch_and, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_fetch_and, \ + long*: pshmem_long_atomic_fetch_and, \ + long long*: pshmem_longlong_atomic_fetch_and, \ + unsigned int*: pshmem_uint_atomic_fetch_and, \ + unsigned long*: pshmem_ulong_atomic_fetch_and, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_and, \ + int32_t*: pshmem_ctx_int32_atomic_fetch_and, \ + int64_t*: pshmem_ctx_int64_atomic_fetch_and, \ + uint32_t*: pshmem_ctx_uint32_atomic_fetch_and, \ + uint64_t*: pshmem_ctx_uint64_atomic_fetch_and)(__VA_ARGS__) #endif /* Atomic Fetch&Or */ +OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_fetch_or(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_fetch_or(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch_or(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_or(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_or(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_or(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch_or(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_ctx_uint_atomic_fetch_or, \ - unsigned long*: pshmem_ctx_ulong_atomic_fetch_or, \ - unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_or)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC int32_t pshmem_ctx_int32_atomic_fetch_or(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_ctx_int64_atomic_fetch_or(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_ctx_uint32_atomic_fetch_or(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_ctx_uint64_atomic_fetch_or(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC int pshmem_int_atomic_fetch_or(int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_fetch_or(long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch_or(long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_or(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_or(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_or(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t pshmem_int32_atomic_fetch_or(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_int64_atomic_fetch_or(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_uint32_atomic_fetch_or(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_uint64_atomic_fetch_or(uint64_t *target, uint64_t value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch_or(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_uint_atomic_fetch_or, \ - unsigned long*: pshmem_ulong_atomic_fetch_or, \ - unsigned long long*: pshmem_ulonglong_atomic_fetch_or)(dst, val, pe) +#define pshmem_atomic_fetch_or(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_fetch_or, \ + long*: pshmem_ctx_long_atomic_fetch_or, \ + long long*: pshmem_ctx_longlong_atomic_fetch_or, \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_or, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_or, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_or, \ + int32_t*: pshmem_ctx_int32_atomic_fetch_or, \ + int64_t*: pshmem_ctx_int64_atomic_fetch_or, \ + uint32_t*: pshmem_ctx_uint32_atomic_fetch_or, \ + uint64_t*: pshmem_ctx_uint64_atomic_fetch_or, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_fetch_or, \ + long*: pshmem_long_atomic_fetch_or, \ + long long*: pshmem_longlong_atomic_fetch_or, \ + unsigned int*: pshmem_uint_atomic_fetch_or, \ + unsigned long*: pshmem_ulong_atomic_fetch_or, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_or, \ + int32_t*: pshmem_ctx_int32_atomic_fetch_or, \ + int64_t*: pshmem_ctx_int64_atomic_fetch_or, \ + uint32_t*: pshmem_ctx_uint32_atomic_fetch_or, \ + uint64_t*: pshmem_ctx_uint64_atomic_fetch_or)(__VA_ARGS__) #endif /* Atomic Fetch&Xor */ +OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_fetch_xor(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_fetch_xor(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch_xor(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_xor(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_xor(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_xor(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch_xor(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_ctx_uint_atomic_fetch_xor, \ - unsigned long*: pshmem_ctx_ulong_atomic_fetch_xor, \ - unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_xor)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC int32_t pshmem_ctx_int32_atomic_fetch_xor(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_ctx_int64_atomic_fetch_xor(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_ctx_uint32_atomic_fetch_xor(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_ctx_uint64_atomic_fetch_xor(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC int pshmem_int_atomic_fetch_xor(int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_fetch_xor(long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch_xor(long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_xor(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_xor(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_xor(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t pshmem_int32_atomic_fetch_xor(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmem_int64_atomic_fetch_xor(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t pshmem_uint32_atomic_fetch_xor(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t pshmem_uint64_atomic_fetch_xor(uint64_t *target, uint64_t value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch_xor(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_uint_atomic_fetch_xor, \ - unsigned long*: pshmem_ulong_atomic_fetch_xor, \ - unsigned long long*: pshmem_ulonglong_atomic_fetch_xor)(dst, val, pe) +#define pshmem_atomic_fetch_xor(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_fetch_xor, \ + long*: pshmem_ctx_long_atomic_fetch_xor, \ + long long*: pshmem_ctx_longlong_atomic_fetch_xor, \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_xor, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_xor, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_xor, \ + int32_t*: pshmem_ctx_int32_atomic_fetch_xor, \ + int64_t*: pshmem_ctx_int64_atomic_fetch_xor, \ + uint32_t*: pshmem_ctx_uint32_atomic_fetch_xor, \ + uint64_t*: pshmem_ctx_uint64_atomic_fetch_xor, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_fetch_xor, \ + long*: pshmem_long_atomic_fetch_xor, \ + long long*: pshmem_longlong_atomic_fetch_xor, \ + unsigned int*: pshmem_uint_atomic_fetch_xor, \ + unsigned long*: pshmem_ulong_atomic_fetch_xor, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_xor, \ + int32_t*: pshmem_ctx_int32_atomic_fetch_xor, \ + int64_t*: pshmem_ctx_int64_atomic_fetch_xor, \ + uint32_t*: pshmem_ctx_uint32_atomic_fetch_xor, \ + uint64_t*: pshmem_ctx_uint64_atomic_fetch_xor)(__VA_ARGS__) #endif /* Atomic Fetch */ OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_fetch(shmem_ctx_t ctx, const int *target, int pe); OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_fetch(shmem_ctx_t ctx, const long *target, int pe); OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch(shmem_ctx_t ctx, const long long *target, int pe); +OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch(shmem_ctx_t ctx, const unsigned int *target, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch(shmem_ctx_t ctx, const unsigned long *target, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch(shmem_ctx_t ctx, const unsigned long long *target, int pe); OSHMEM_DECLSPEC float pshmem_ctx_float_atomic_fetch(shmem_ctx_t ctx, const float *target, int pe); OSHMEM_DECLSPEC double pshmem_ctx_double_atomic_fetch(shmem_ctx_t ctx, const double *target, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch(ctx, dst, pe) \ - _Generic(&*(dst), \ - int*: pshmem_ctx_int_atomic_fetch, \ - long*: pshmem_ctx_long_atomic_fetch, \ - long long*: pshmem_ctx_longlong_atomic_fetch,\ - float*: pshmem_ctx_float_atomic_fetch, \ - double*: pshmem_ctx_double_atomic_fetch)(ctx, dst, pe) -#endif OSHMEM_DECLSPEC int pshmem_int_atomic_fetch(const int *target, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_fetch(const long *target, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch(const long long *target, int pe); +OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch(const unsigned int *target, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch(const unsigned long *target, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch(const unsigned long long *target, int pe); OSHMEM_DECLSPEC float pshmem_float_atomic_fetch(const float *target, int pe); OSHMEM_DECLSPEC double pshmem_double_atomic_fetch(const double *target, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch(dst, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_atomic_fetch, \ - long*: pshmem_long_atomic_fetch, \ - long long*: pshmem_longlong_atomic_fetch, \ - float*: pshmem_float_atomic_fetch, \ - double*: pshmem_double_atomic_fetch)(dst, pe) +#define pshmem_atomic_fetch(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_fetch, \ + long*: pshmem_ctx_long_atomic_fetch, \ + long long*: pshmem_ctx_longlong_atomic_fetch, \ + unsigned int*: pshmem_ctx_uint_atomic_fetch, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch, \ + float*: pshmem_ctx_float_atomic_fetch, \ + double*: pshmem_ctx_double_atomic_fetch, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_fetch, \ + long*: pshmem_long_atomic_fetch, \ + long long*: pshmem_longlong_atomic_fetch, \ + unsigned int*: pshmem_uint_atomic_fetch, \ + unsigned long*: pshmem_ulong_atomic_fetch, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch, \ + float*: pshmem_float_atomic_fetch, \ + double*: pshmem_double_atomic_fetch)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int pshmem_int_fetch(const int *target, int pe); @@ -806,42 +1244,52 @@ OSHMEM_DECLSPEC long long pshmem_longlong_fetch(const long long *target, int pe) OSHMEM_DECLSPEC float pshmem_float_fetch(const float *target, int pe); OSHMEM_DECLSPEC double pshmem_double_fetch(const double *target, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_fetch(dst, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_fetch, \ - long*: pshmem_long_fetch, \ - long long*: pshmem_longlong_fetch, \ - float*: pshmem_float_fetch, \ - double*: pshmem_double_fetch)(dst, pe) +#define pshmem_fetch(dst, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_fetch, \ + long*: pshmem_long_fetch, \ + long long*: pshmem_longlong_fetch, \ + float*: pshmem_float_fetch, \ + double*: pshmem_double_fetch)(dst, pe) #endif /* Atomic Fetch&Inc */ OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_fetch_inc(shmem_ctx_t ctx, int *target, int pe); OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_fetch_inc(shmem_ctx_t ctx, long *target, int pe); OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch_inc(shmem_ctx_t ctx, long long *target, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch_inc(ctx, dst, pe) \ - _Generic(&*(dst), \ - int*: pshmem_ctx_int_atomic_fetch_inc, \ - long*: pshmem_ctx_long_atomic_fetch_inc, \ - long long*: pshmem_ctx_longlong_atomic_fetch_inc)(ctx, dst, pe) -#endif +OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_inc(shmem_ctx_t ctx, unsigned int *target, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_inc(shmem_ctx_t ctx, unsigned long *target, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_inc(shmem_ctx_t ctx, unsigned long long *target, int pe); OSHMEM_DECLSPEC int pshmem_int_atomic_fetch_inc(int *target, int pe); OSHMEM_DECLSPEC long pshmem_long_atomic_fetch_inc(long *target, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch_inc(long long *target, int pe); +OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_inc(unsigned int *target, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_inc(unsigned long *target, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_inc(unsigned long long *target, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_fetch_inc(dst, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_atomic_fetch_inc, \ - long*: pshmem_long_atomic_fetch_inc, \ - long long*: pshmem_longlong_atomic_fetch_inc)(dst, pe) +#define pshmem_atomic_fetch_inc(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_fetch_inc, \ + long*: pshmem_ctx_long_atomic_fetch_inc, \ + long long*: pshmem_ctx_longlong_atomic_fetch_inc, \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_inc, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_inc, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_inc, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_fetch_inc, \ + long*: pshmem_long_atomic_fetch_inc, \ + long long*: pshmem_longlong_atomic_fetch_inc, \ + unsigned int*: pshmem_uint_atomic_fetch_inc, \ + unsigned long*: pshmem_ulong_atomic_fetch_inc, \ + unsigned long long*: pshmem_ulonglong_atomic_fetch_inc)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int pshmem_int_finc(int *target, int pe); OSHMEM_DECLSPEC long pshmem_long_finc(long *target, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_finc(long long *target, int pe); -#if OSHMEMP_HAVE_C11 +#if OSHMEM_HAVE_C11 #define pshmem_finc(dst, pe) \ _Generic(&*(dst), \ int*: pshmem_int_finc, \ @@ -853,29 +1301,39 @@ OSHMEM_DECLSPEC long long pshmem_longlong_finc(long long *target, int pe); OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_add(shmem_ctx_t ctx, int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_add(shmem_ctx_t ctx, long *target, long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_add(shmem_ctx_t ctx, long long *target, long long value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_add(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_ctx_int_atomic_add, \ - long*: pshmem_ctx_long_atomic_add, \ - long long*: pshmem_ctx_longlong_atomic_add)(ctx, dst, val, pe) -#endif +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_add(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_add(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_add(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void pshmem_int_atomic_add(int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_long_atomic_add(long *target, long value, int pe); OSHMEM_DECLSPEC void pshmem_longlong_atomic_add(long long *target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_uint_atomic_add(unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_add(unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_add(unsigned long long *target, unsigned long long value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_add(dst, val, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_atomic_add, \ - long*: pshmem_long_atomic_add, \ - long long*: pshmem_longlong_atomic_add)(dst, val, pe) +#define pshmem_atomic_add(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_add, \ + long*: pshmem_ctx_long_atomic_add, \ + long long*: pshmem_ctx_longlong_atomic_add, \ + unsigned int*: pshmem_ctx_uint_atomic_add, \ + unsigned long*: pshmem_ctx_ulong_atomic_add, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_add, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_add, \ + long*: pshmem_long_atomic_add, \ + long long*: pshmem_longlong_atomic_add, \ + unsigned int*: pshmem_uint_atomic_add, \ + unsigned long*: pshmem_ulong_atomic_add, \ + unsigned long long*: pshmem_ulonglong_atomic_add)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void pshmem_int_add(int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_long_add(long *target, long value, int pe); OSHMEM_DECLSPEC void pshmem_longlong_add(long long *target, long long value, int pe); -#if OSHMEMP_HAVE_C11 +#if OSHMEM_HAVE_C11 #define pshmem_add(dst, val, pe) \ _Generic(&*(dst), \ int*: pshmem_int_add, \ @@ -884,101 +1342,165 @@ OSHMEM_DECLSPEC void pshmem_longlong_add(long long *target, long long value, int #endif /* Atomic And */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_and(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_and(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_and(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_and(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_and(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_and(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_and(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_ctx_uint_atomic_and, \ - unsigned long*: pshmem_ctx_ulong_atomic_and, \ - unsigned long long*: pshmem_ctx_ulonglong_atomic_and)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_and(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_and(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_and(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_and(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void pshmem_int_atomic_and(int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_and(long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_and(long long *target, long long value, int pe); OSHMEM_DECLSPEC void pshmem_uint_atomic_and(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ulong_atomic_and(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_and(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_and(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_and(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_and(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_and(uint64_t *target, uint64_t value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_and(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_uint_atomic_and, \ - unsigned long*: pshmem_ulong_atomic_and, \ - unsigned long long*: pshmem_ulonglong_atomic_and)(dst, val, pe) +#define pshmem_atomic_and(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_and, \ + long*: pshmem_ctx_long_atomic_and, \ + long long*: pshmem_ctx_longlong_atomic_and, \ + unsigned int*: pshmem_ctx_uint_atomic_and, \ + unsigned long*: pshmem_ctx_ulong_atomic_and, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_and, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_and, \ + long*: pshmem_long_atomic_and, \ + long long*: pshmem_longlong_atomic_and, \ + unsigned int*: pshmem_uint_atomic_and, \ + unsigned long*: pshmem_ulong_atomic_and, \ + unsigned long long*: pshmem_ulonglong_atomic_and)(__VA_ARGS__) #endif /* Atomic Or */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_or(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_or(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_or(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_or(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_or(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_or(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_or(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_ctx_uint_atomic_or, \ - unsigned long*: pshmem_ctx_ulong_atomic_or, \ - unsigned long long*: pshmem_ctx_ulonglong_atomic_or)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_or(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_or(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_or(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_or(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void pshmem_int_atomic_or(int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_or(long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_or(long long *target, long long value, int pe); OSHMEM_DECLSPEC void pshmem_uint_atomic_or(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ulong_atomic_or(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_or(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_or(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_or(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_or(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_or(uint64_t *target, uint64_t value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_or(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_uint_atomic_or, \ - unsigned long*: pshmem_ulong_atomic_or, \ - unsigned long long*: pshmem_ulonglong_atomic_or)(dst, val, pe) +#define pshmem_atomic_or(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_or, \ + long*: pshmem_ctx_long_atomic_or, \ + long long*: pshmem_ctx_longlong_atomic_or, \ + unsigned int*: pshmem_ctx_uint_atomic_or, \ + unsigned long*: pshmem_ctx_ulong_atomic_or, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_or, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_or, \ + long*: pshmem_long_atomic_or, \ + long long*: pshmem_longlong_atomic_or, \ + unsigned int*: pshmem_uint_atomic_or, \ + unsigned long*: pshmem_ulong_atomic_or, \ + unsigned long long*: pshmem_ulonglong_atomic_or)(__VA_ARGS__) #endif /* Atomic Xor */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_xor(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_xor(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_xor(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_xor(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_xor(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_xor(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_xor(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_ctx_uint_atomic_xor, \ - unsigned long*: pshmem_ctx_ulong_atomic_xor, \ - unsigned long long*: pshmem_ctx_ulonglong_atomic_xor)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC void pshmem_ctx_int32_atomic_xor(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int64_atomic_xor(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint32_atomic_xor(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_uint64_atomic_xor(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void pshmem_int_atomic_xor(int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_xor(long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_xor(long long *target, long long value, int pe); OSHMEM_DECLSPEC void pshmem_uint_atomic_xor(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ulong_atomic_xor(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_xor(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void pshmem_int32_atomic_xor(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_int64_atomic_xor(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint32_atomic_xor(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void pshmem_uint64_atomic_xor(uint64_t *target, uint64_t value, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_xor(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: pshmem_uint_atomic_xor, \ - unsigned long*: pshmem_ulong_atomic_xor, \ - unsigned long long*: pshmem_ulonglong_atomic_xor)(dst, val, pe) +#define pshmem_atomic_xor(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_xor, \ + long*: pshmem_ctx_long_atomic_xor, \ + long long*: pshmem_ctx_longlong_atomic_xor, \ + unsigned int*: pshmem_ctx_uint_atomic_xor, \ + unsigned long*: pshmem_ctx_ulong_atomic_xor, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_xor, \ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_xor, \ + long*: pshmem_long_atomic_xor, \ + long long*: pshmem_longlong_atomic_xor, \ + unsigned int*: pshmem_uint_atomic_xor, \ + unsigned long*: pshmem_ulong_atomic_xor, \ + unsigned long long*: pshmem_ulonglong_atomic_xor)(__VA_ARGS__) #endif /* Atomic Inc */ OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_inc(shmem_ctx_t ctx, int *target, int pe); OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_inc(shmem_ctx_t ctx, long *target, int pe); OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_inc(shmem_ctx_t ctx, long long *target, int pe); -#if OSHMEM_HAVE_C11 -#define pshmem_atomic_inc(ctx, dst, pe) \ - _Generic(&*(dst), \ - int*: pshmem_ctx_int_atomic_inc, \ - long*: pshmem_ctx_long_atomic_inc, \ - long long*: pshmem_ctx_longlong_atomic_inc)(ctx, dst, pe) -#endif +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_inc(shmem_ctx_t ctx, unsigned int *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_inc(shmem_ctx_t ctx, unsigned long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_inc(shmem_ctx_t ctx, unsigned long long *target, int pe); OSHMEM_DECLSPEC void pshmem_int_atomic_inc(int *target, int pe); OSHMEM_DECLSPEC void pshmem_long_atomic_inc(long *target, int pe); OSHMEM_DECLSPEC void pshmem_longlong_atomic_inc(long long *target, int pe); +OSHMEM_DECLSPEC void pshmem_uint_atomic_inc(unsigned int *target, int pe); +OSHMEM_DECLSPEC void pshmem_ulong_atomic_inc(unsigned long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_inc(unsigned long long *target, int pe); #if OSHMEM_HAVE_C11 -#define pshmem_atomic_inc(dst, pe) \ - _Generic(&*(dst), \ - int*: pshmem_int_atomic_inc, \ - long*: pshmem_long_atomic_inc, \ - long long*: pshmem_longlong_atomic_inc)(dst, pe) +#define pshmem_atomic_inc(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: pshmem_ctx_int_atomic_inc, \ + long*: pshmem_ctx_long_atomic_inc, \ + long long*: pshmem_ctx_longlong_atomic_inc, \ + unsigned int*: pshmem_ctx_uint_atomic_inc, \ + unsigned long*: pshmem_ctx_ulong_atomic_inc, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_inc,\ + default: __opshmem_datatype_ignore), \ + int*: pshmem_int_atomic_inc, \ + long*: pshmem_long_atomic_inc, \ + long long*: pshmem_longlong_atomic_inc, \ + unsigned int*: pshmem_uint_atomic_inc, \ + unsigned long*: pshmem_ulong_atomic_inc, \ + unsigned long long*: pshmem_ulonglong_atomic_inc)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void pshmem_int_inc(int *target, int pe); OSHMEM_DECLSPEC void pshmem_long_inc(long *target, int pe); OSHMEM_DECLSPEC void pshmem_longlong_inc(long long *target, int pe); -#if OSHMEMP_HAVE_C11 +#if OSHMEM_HAVE_C11 #define pshmem_inc(dst, pe) \ _Generic(&*(dst), \ int*: pshmem_int_inc, \ @@ -1006,26 +1528,54 @@ OSHMEM_DECLSPEC void pshmem_short_wait_until(volatile short *addr, int cmp, sho OSHMEM_DECLSPEC void pshmem_int_wait_until(volatile int *addr, int cmp, int value); OSHMEM_DECLSPEC void pshmem_long_wait_until(volatile long *addr, int cmp, long value); OSHMEM_DECLSPEC void pshmem_longlong_wait_until(volatile long long *addr, int cmp, long long value); +OSHMEM_DECLSPEC void pshmem_ushort_wait_until(volatile unsigned short *addr, int cmp, unsigned short value); +OSHMEM_DECLSPEC void pshmem_uint_wait_until(volatile unsigned int *addr, int cmp, unsigned int value); +OSHMEM_DECLSPEC void pshmem_ulong_wait_until(volatile unsigned long *addr, int cmp, unsigned long value); +OSHMEM_DECLSPEC void pshmem_ulonglong_wait_until(volatile unsigned long long *addr, int cmp, unsigned long long value); +OSHMEM_DECLSPEC void pshmem_int32_wait_until(volatile int32_t *addr, int cmp, int32_t value); +OSHMEM_DECLSPEC void pshmem_int64_wait_until(volatile int64_t *addr, int cmp, int64_t value); +OSHMEM_DECLSPEC void pshmem_uint32_wait_until(volatile uint32_t *addr, int cmp, uint32_t value); +OSHMEM_DECLSPEC void pshmem_uint64_wait_until(volatile uint64_t *addr, int cmp, uint64_t value); +OSHMEM_DECLSPEC void pshmem_size_wait_until(volatile size_t *addr, int cmp, size_t value); +OSHMEM_DECLSPEC void pshmem_ptrdiff_wait_until(volatile ptrdiff_t *addr, int cmp, ptrdiff_t value); #if OSHMEM_HAVE_C11 #define pshmem_wait_until(addr, cmp, value) \ _Generic(&*(addr), \ short*: pshmem_short_wait_until, \ int*: pshmem_int_wait_until, \ long*: pshmem_long_wait_until, \ - long long*: pshmem_longlong_wait_until(addr, cmp, value) + long long*: pshmem_longlong_wait_until, \ + unsigned short*: pshmem_short_wait_until, \ + unsigned int*: pshmem_int_wait_until, \ + unsigned long*: pshmem_long_wait_until, \ + unsigned long long*: pshmem_longlong_wait_until)(addr, cmp, value) #endif OSHMEM_DECLSPEC int pshmem_short_test(volatile short *addr, int cmp, short value); OSHMEM_DECLSPEC int pshmem_int_test(volatile int *addr, int cmp, int value); OSHMEM_DECLSPEC int pshmem_long_test(volatile long *addr, int cmp, long value); OSHMEM_DECLSPEC int pshmem_longlong_test(volatile long long *addr, int cmp, long long value); +OSHMEM_DECLSPEC int pshmem_ushort_test(volatile unsigned short *addr, int cmp, unsigned short value); +OSHMEM_DECLSPEC int pshmem_uint_test(volatile unsigned int *addr, int cmp, unsigned int value); +OSHMEM_DECLSPEC int pshmem_ulong_test(volatile unsigned long *addr, int cmp, unsigned long value); +OSHMEM_DECLSPEC int pshmem_ulonglong_test(volatile unsigned long long *addr, int cmp, unsigned long long value); +OSHMEM_DECLSPEC int pshmem_int32_test(volatile int32_t *addr, int cmp, int32_t value); +OSHMEM_DECLSPEC int pshmem_int64_test(volatile int64_t *addr, int cmp, int64_t value); +OSHMEM_DECLSPEC int pshmem_uint32_test(volatile uint32_t *addr, int cmp, uint32_t value); +OSHMEM_DECLSPEC int pshmem_uint64_test(volatile uint64_t *addr, int cmp, uint64_t value); +OSHMEM_DECLSPEC int pshmem_size_test(volatile size_t *addr, int cmp, size_t value); +OSHMEM_DECLSPEC int pshmem_ptrdiff_test(volatile ptrdiff_t *addr, int cmp, ptrdiff_t value); #if OSHMEM_HAVE_C11 -#define pshmem_test(addr, cmp, value) \ - _Generic(&*(addr), \ - short*: pshmem_short_test, \ - int*: pshmem_int_test, \ - long*: pshmem_long_test, \ - long long*: pshmem_longlong_test(addr, cmp, value) +#define pshmem_test(addr, cmp, value) \ + _Generic(&*(addr), \ + short*: pshmem_short_test, \ + int*: pshmem_int_test, \ + long*: pshmem_long_test, \ + long long*: pshmem_longlong_test, \ + unsigned short*: pshmem_short_test, \ + unsigned int*: pshmem_int_test, \ + unsigned long*: pshmem_long_test, \ + unsigned long long*: pshmem_longlong_test)(addr, cmp, value) #endif /* diff --git a/oshmem/include/pshmemx.h b/oshmem/include/pshmemx.h index 50a141a97dd..0b4ffcbd202 100644 --- a/oshmem/include/pshmemx.h +++ b/oshmem/include/pshmemx.h @@ -16,6 +16,11 @@ extern "C" { #endif +/* + * Symmetric heap routines + */ +OSHMEM_DECLSPEC void* pshmemx_malloc_with_hint(size_t size, long hint); + /* * Legacy API @@ -184,14 +189,6 @@ OSHMEM_DECLSPEC void pshmemx_int64_prod_to_all(int64_t *target, const int64_t *s /* * Backward compatibility section */ -#define pshmem_int16_p pshmemx_int16_p -#define pshmem_int32_p pshmemx_int32_p -#define pshmem_int64_p pshmemx_int64_p - -#define pshmem_int16_g pshmemx_int16_g -#define pshmem_int32_g pshmemx_int32_g -#define pshmem_int64_g pshmemx_int64_g - #define pshmem_int32_swap pshmemx_int32_swap #define pshmem_int64_swap pshmemx_int64_swap @@ -218,8 +215,6 @@ OSHMEM_DECLSPEC void pshmemx_int64_prod_to_all(int64_t *target, const int64_t *s #define pshmem_int32_wait pshmemx_int32_wait #define pshmem_int64_wait pshmemx_int64_wait -#define pshmem_int32_wait_until pshmemx_int32_wait_until -#define pshmem_int64_wait_until pshmemx_int64_wait_until #define pshmem_int16_and_to_all pshmemx_int16_and_to_all #define pshmem_int32_and_to_all pshmemx_int32_and_to_all diff --git a/oshmem/include/shmem.h.in b/oshmem/include/shmem.h.in index 43a2a582545..853659b613b 100644 --- a/oshmem/include/shmem.h.in +++ b/oshmem/include/shmem.h.in @@ -54,6 +54,22 @@ extern "C" { #endif +#if OSHMEM_HAVE_C11 +#define __OSHMEM_VAR_ARG1_EXPAND(_arg1, ...) _arg1 +#define __OSHMEM_VAR_ARG1(...) __OSHMEM_VAR_ARG1_EXPAND(__VA_ARGS__, _extra) +#define __OSHMEM_VAR_ARG2(_arg1, ...) __OSHMEM_VAR_ARG1_EXPAND(__VA_ARGS__, _extra) +static inline void __oshmem_datatype_ignore(void) {} +#endif + +/* + * SHMEM_Init_thread constants + */ +enum { + SHMEM_THREAD_SINGLE, + SHMEM_THREAD_FUNNELED, + SHMEM_THREAD_SERIALIZED, + SHMEM_THREAD_MULTIPLE +}; /* * OpenSHMEM API (www.openshmem.org) @@ -83,6 +99,10 @@ extern "C" { #define SHMEM_VENDOR_STRING "http://www.open-mpi.org/" #define SHMEM_MAX_NAME_LEN 256 +#define SHMEM_CTX_PRIVATE (1<<0) +#define SHMEM_CTX_SERIALIZED (1<<1) +#define SHMEM_CTX_NOSTORE (1<<2) + /* * Deprecated (but still valid) names */ @@ -130,6 +150,7 @@ enum shmem_wait_ops { #define SHMEM_ALLTOALLS_SYNC_SIZE _SHMEM_ALLTOALLS_SYNC_SIZE #define SHMEM_REDUCE_MIN_WRKDATA_SIZE _SHMEM_REDUCE_MIN_WRKDATA_SIZE #define SHMEM_SYNC_VALUE _SHMEM_SYNC_VALUE +#define SHMEM_SYNC_SIZE _SHMEM_COLLECT_SYNC_SIZE /* @@ -179,7 +200,9 @@ OSHMEM_DECLSPEC void *shmem_ptr(const void *ptr, int pe); typedef struct { int dummy; } * shmem_ctx_t; -#define SHMEM_CTX_DEFAULT oshmem_ctx_default; +#define SHMEM_CTX_DEFAULT oshmem_ctx_default + +extern shmem_ctx_t oshmem_ctx_default; OSHMEM_DECLSPEC int shmem_ctx_create(long options, shmem_ctx_t *ctx); OSHMEM_DECLSPEC void shmem_ctx_destroy(shmem_ctx_t ctx); @@ -194,19 +217,23 @@ OSHMEM_DECLSPEC void shmem_ctx_long_p(shmem_ctx_t ctx, long* addr, long value, OSHMEM_DECLSPEC void shmem_ctx_float_p(shmem_ctx_t ctx, float* addr, float value, int pe); OSHMEM_DECLSPEC void shmem_ctx_double_p(shmem_ctx_t ctx, double* addr, double value, int pe); OSHMEM_DECLSPEC void shmem_ctx_longlong_p(shmem_ctx_t ctx, long long* addr, long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_schar_p(shmem_ctx_t ctx, signed char* addr, signed char value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uchar_p(shmem_ctx_t ctx, unsigned char* addr, unsigned char value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ushort_p(shmem_ctx_t ctx, unsigned short* addr, unsigned short value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_p(shmem_ctx_t ctx, unsigned int* addr, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_p(shmem_ctx_t ctx, unsigned long* addr, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_p(shmem_ctx_t ctx, unsigned long long* addr, unsigned long long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_longdouble_p(shmem_ctx_t ctx, long double* addr, long double value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_p(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - char*: shmem_ctx_char_p, \ - short*: shmem_ctx_short_p, \ - int*: shmem_ctx_int_p, \ - long*: shmem_ctx_long_p, \ - long long*: shmem_ctx_longlong_p, \ - float*: shmem_ctx_float_p, \ - double*: shmem_ctx_double_p, \ - long double*: shmem_ctx_longdouble_p)(ctx, dst, val, pe) -#endif +OSHMEM_DECLSPEC void shmem_ctx_int8_p(shmem_ctx_t ctx, int8_t* addr, int8_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int16_p(shmem_ctx_t ctx, int16_t* addr, int16_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_p(shmem_ctx_t ctx, int32_t* addr, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_p(shmem_ctx_t ctx, int64_t* addr, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint8_p(shmem_ctx_t ctx, uint8_t* addr, uint8_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint16_p(shmem_ctx_t ctx, uint16_t* addr, uint16_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_p(shmem_ctx_t ctx, uint32_t* addr, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_p(shmem_ctx_t ctx, uint64_t* addr, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_p(shmem_ctx_t ctx, size_t* addr, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_p(shmem_ctx_t ctx, ptrdiff_t* addr, ptrdiff_t value, int pe); OSHMEM_DECLSPEC void shmem_char_p(char* addr, char value, int pe); OSHMEM_DECLSPEC void shmem_short_p(short* addr, short value, int pe); @@ -215,18 +242,56 @@ OSHMEM_DECLSPEC void shmem_long_p(long* addr, long value, int pe); OSHMEM_DECLSPEC void shmem_float_p(float* addr, float value, int pe); OSHMEM_DECLSPEC void shmem_double_p(double* addr, double value, int pe); OSHMEM_DECLSPEC void shmem_longlong_p(long long* addr, long long value, int pe); +OSHMEM_DECLSPEC void shmem_schar_p(signed char* addr, signed char value, int pe); +OSHMEM_DECLSPEC void shmem_uchar_p(unsigned char* addr, unsigned char value, int pe); +OSHMEM_DECLSPEC void shmem_ushort_p(unsigned short* addr, unsigned short value, int pe); +OSHMEM_DECLSPEC void shmem_uint_p(unsigned int* addr, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ulong_p(unsigned long* addr, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_p(unsigned long long* addr, unsigned long long value, int pe); OSHMEM_DECLSPEC void shmem_longdouble_p(long double* addr, long double value, int pe); +OSHMEM_DECLSPEC void shmem_int8_p(int8_t* addr, int8_t value, int pe); +OSHMEM_DECLSPEC void shmem_int16_p(int16_t* addr, int16_t value, int pe); +OSHMEM_DECLSPEC void shmem_int32_p(int32_t* addr, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_p(int64_t* addr, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint8_p(uint8_t* addr, uint8_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint16_p(uint16_t* addr, uint16_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_p(uint32_t* addr, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_p(uint64_t* addr, uint64_t value, int pe); +OSHMEM_DECLSPEC void shmem_size_p(size_t* addr, size_t value, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_p(ptrdiff_t* addr, ptrdiff_t value, int pe); #if OSHMEM_HAVE_C11 -#define shmem_p(dst, val, pe) \ - _Generic(&*(dst), \ - char*: shmem_char_p, \ - short*: shmem_short_p, \ - int*: shmem_int_p, \ - long*: shmem_long_p, \ - long long*: shmem_longlong_p, \ - float*: shmem_float_p, \ - double*: shmem_double_p, \ - long double*: shmem_longdouble_p)(dst, val, pe) +#define shmem_p(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_ctx_char_p, \ + short*: shmem_ctx_short_p, \ + int*: shmem_ctx_int_p, \ + long*: shmem_ctx_long_p, \ + long long*: shmem_ctx_longlong_p, \ + signed char*: shmem_ctx_schar_p, \ + unsigned char*: shmem_ctx_uchar_p, \ + unsigned short*: shmem_ctx_ushort_p, \ + unsigned int*: shmem_ctx_uint_p, \ + unsigned long*: shmem_ctx_ulong_p, \ + unsigned long long*: shmem_ctx_ulonglong_p, \ + float*: shmem_ctx_float_p, \ + double*: shmem_ctx_double_p, \ + long double*: shmem_ctx_longdouble_p, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_p, \ + short*: shmem_short_p, \ + int*: shmem_int_p, \ + long*: shmem_long_p, \ + long long*: shmem_longlong_p, \ + signed char*: shmem_schar_p, \ + unsigned char*: shmem_uchar_p, \ + unsigned short*: shmem_ushort_p, \ + unsigned int*: shmem_uint_p, \ + unsigned long*: shmem_ulong_p, \ + unsigned long long*: shmem_ulonglong_p, \ + float*: shmem_float_p, \ + double*: shmem_double_p, \ + long double*: shmem_longdouble_p)(__VA_ARGS__) #endif /* @@ -234,44 +299,86 @@ OSHMEM_DECLSPEC void shmem_longdouble_p(long double* addr, long double value, i */ OSHMEM_DECLSPEC void shmem_ctx_char_put(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_short_put(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_int_put(shmem_ctx_t ctx, int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_put(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_long_put(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_float_put(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_double_put(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_longlong_put(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_schar_put(shmem_ctx_t ctx, signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uchar_put(shmem_ctx_t ctx, unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ushort_put(shmem_ctx_t ctx, unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_put(shmem_ctx_t ctx, unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_put(shmem_ctx_t ctx, unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_put(shmem_ctx_t ctx, unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_longdouble_put(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_put(ctx, dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_ctx_char_put, \ - short*: shmem_ctx_short_put, \ - int*: shmem_ctx_int_put, \ - long*: shmem_ctx_long_put, \ - long long*: shmem_ctx_longlong_put, \ - float*: shmem_ctx_float_put, \ - double*: shmem_ctx_double_put, \ - long double*: shmem_ctx_longdouble_put)(ctx, dst, src, len, pe) -#endif +OSHMEM_DECLSPEC void shmem_ctx_int8_put(shmem_ctx_t ctx, int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int16_put(shmem_ctx_t ctx, int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_put(shmem_ctx_t ctx, int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_put(shmem_ctx_t ctx, int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint8_put(shmem_ctx_t ctx, uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint16_put(shmem_ctx_t ctx, uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_put(shmem_ctx_t ctx, uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_put(shmem_ctx_t ctx, uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_put(shmem_ctx_t ctx, size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_put(shmem_ctx_t ctx, ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_char_put(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_put(short *target, const short *source, size_t len, int pe); -OSHMEM_DECLSPEC void shmem_int_put(int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int_put(int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_long_put(long *target, const long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_float_put(float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_double_put(double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_longlong_put(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_schar_put(signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uchar_put(unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ushort_put(unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint_put(unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulong_put(unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_put(unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_longdouble_put(long double *target, const long double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int8_put(int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int16_put(int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int32_put(int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int64_put(int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint8_put(uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint16_put(uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint32_put(uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint64_put(uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_size_put(size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_put(ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); #if OSHMEM_HAVE_C11 -#define shmem_put(dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_char_put, \ - short*: shmem_short_put, \ - int*: shmem_int_put, \ - long*: shmem_long_put, \ - long long*: shmem_longlong_put, \ - float*: shmem_float_put, \ - double*: shmem_double_put, \ - long double*: shmem_longdouble_put)(dst, src, len, pe) +#define shmem_put(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)),\ + char*: shmem_ctx_char_put, \ + short*: shmem_ctx_short_put, \ + int*: shmem_ctx_int_put, \ + long*: shmem_ctx_long_put, \ + long long*: shmem_ctx_longlong_put, \ + signed char*: shmem_ctx_schar_put, \ + unsigned char*: shmem_ctx_uchar_put, \ + unsigned short*: shmem_ctx_ushort_put, \ + unsigned int*: shmem_ctx_uint_put, \ + unsigned long*: shmem_ctx_ulong_put, \ + unsigned long long*: shmem_ctx_ulonglong_put, \ + float*: shmem_ctx_float_put, \ + double*: shmem_ctx_double_put, \ + long double*: shmem_ctx_longdouble_put, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_put, \ + short*: shmem_short_put, \ + int*: shmem_int_put, \ + long*: shmem_long_put, \ + long long*: shmem_longlong_put, \ + signed char*: shmem_schar_put, \ + unsigned char*: shmem_uchar_put, \ + unsigned short*: shmem_ushort_put, \ + unsigned int*: shmem_uint_put, \ + unsigned long*: shmem_ulong_put, \ + unsigned long long*: shmem_ulonglong_put, \ + float*: shmem_float_put, \ + double*: shmem_double_put, \ + long double*: shmem_longdouble_put)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void shmem_ctx_put8(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); @@ -299,19 +406,23 @@ OSHMEM_DECLSPEC void shmem_ctx_long_iput(shmem_ctx_t ctx, long* target, const lo OSHMEM_DECLSPEC void shmem_ctx_float_iput(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_double_iput(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_longlong_iput(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_schar_iput(shmem_ctx_t ctx, signed char* target, const signed char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uchar_iput(shmem_ctx_t ctx, unsigned char* target, const unsigned char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ushort_iput(shmem_ctx_t ctx, unsigned short* target, const unsigned short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_iput(shmem_ctx_t ctx, unsigned int* target, const unsigned int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_iput(shmem_ctx_t ctx, unsigned long* target, const unsigned long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_iput(shmem_ctx_t ctx, unsigned long long* target, const unsigned long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_longdouble_iput(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_iput(ctx, dst, src, tst, sst, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_ctx_char_iput, \ - short*: shmem_ctx_short_iput, \ - int*: shmem_ctx_int_iput, \ - long*: shmem_ctx_long_iput, \ - long long*: shmem_ctx_longlong_iput, \ - float*: shmem_ctx_float_iput, \ - double*: shmem_ctx_double_iput, \ - long double*: shmem_ctx_longdouble_iput)(ctx, dst, src, tst, sst, len, pe) -#endif +OSHMEM_DECLSPEC void shmem_ctx_int8_iput(shmem_ctx_t ctx, int8_t* target, const int8_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int16_iput(shmem_ctx_t ctx, int16_t* target, const int16_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_iput(shmem_ctx_t ctx, int32_t* target, const int32_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_iput(shmem_ctx_t ctx, int64_t* target, const int64_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint8_iput(shmem_ctx_t ctx, uint8_t* target, const uint8_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint16_iput(shmem_ctx_t ctx, uint16_t* target, const uint16_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_iput(shmem_ctx_t ctx, uint32_t* target, const uint32_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_iput(shmem_ctx_t ctx, uint64_t* target, const uint64_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_iput(shmem_ctx_t ctx, size_t* target, const size_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_iput(shmem_ctx_t ctx, ptrdiff_t* target, const ptrdiff_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_char_iput(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_iput(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -320,18 +431,56 @@ OSHMEM_DECLSPEC void shmem_long_iput(long* target, const long* source, ptrdiff_t OSHMEM_DECLSPEC void shmem_float_iput(float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_double_iput(double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_longlong_iput(long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_schar_iput(signed char* target, const signed char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uchar_iput(unsigned char* target, const unsigned char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ushort_iput(unsigned short* target, const unsigned short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint_iput(unsigned int* target, const unsigned int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulong_iput(unsigned long* target, const unsigned long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_iput(unsigned long long* target, const unsigned long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_longdouble_iput(long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int8_iput(int8_t* target, const int8_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int16_iput(int16_t* target, const int16_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int32_iput(int32_t* target, const int32_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int64_iput(int64_t* target, const int64_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint8_iput(uint8_t* target, const uint8_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint16_iput(uint16_t* target, const uint16_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint32_iput(uint32_t* target, const uint32_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint64_iput(uint64_t* target, const uint64_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_size_iput(size_t* target, const size_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_iput(ptrdiff_t* target, const ptrdiff_t* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); #if OSHMEM_HAVE_C11 -#define shmem_iput(dst, src, tst, sst, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_char_iput, \ - short*: shmem_short_iput, \ - int*: shmem_int_iput, \ - long*: shmem_long_iput, \ - long long*: shmem_longlong_iput, \ - float*: shmem_float_iput, \ - double*: shmem_double_iput, \ - long double*: shmem_longdouble_iput)(dst, src, tst, sst, len, pe) +#define shmem_iput(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)),\ + char*: shmem_ctx_char_iput, \ + short*: shmem_ctx_short_iput, \ + int*: shmem_ctx_int_iput, \ + long*: shmem_ctx_long_iput, \ + long long*: shmem_ctx_longlong_iput, \ + signed char*: shmem_ctx_schar_iput, \ + unsigned char*: shmem_ctx_uchar_iput, \ + unsigned short*: shmem_ctx_ushort_iput, \ + unsigned int*: shmem_ctx_uint_iput, \ + unsigned long*: shmem_ctx_ulong_iput, \ + unsigned long long*: shmem_ctx_ulonglong_iput, \ + float*: shmem_ctx_float_iput, \ + double*: shmem_ctx_double_iput, \ + long double*: shmem_ctx_longdouble_iput, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_iput, \ + short*: shmem_short_iput, \ + int*: shmem_int_iput, \ + long*: shmem_long_iput, \ + long long*: shmem_longlong_iput, \ + signed char*: shmem_schar_iput, \ + unsigned char*: shmem_uchar_iput, \ + unsigned short*: shmem_ushort_iput, \ + unsigned int*: shmem_uint_iput, \ + unsigned long*: shmem_ulong_iput, \ + unsigned long long*: shmem_ulonglong_iput, \ + float*: shmem_float_iput, \ + double*: shmem_double_iput, \ + long double*: shmem_longdouble_iput)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void shmem_ctx_iput8(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -351,44 +500,86 @@ OSHMEM_DECLSPEC void shmem_iput128(void* target, const void* source, ptrdiff_t t */ OSHMEM_DECLSPEC void shmem_ctx_char_put_nbi(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_short_put_nbi(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_int_put_nbi(shmem_ctx_t ctx, int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_put_nbi(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_long_put_nbi(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_longlong_put_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_float_put_nbi(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_double_put_nbi(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_put_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_schar_put_nbi(shmem_ctx_t ctx, signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uchar_put_nbi(shmem_ctx_t ctx, unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ushort_put_nbi(shmem_ctx_t ctx, unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_put_nbi(shmem_ctx_t ctx, unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_put_nbi(shmem_ctx_t ctx, unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_put_nbi(shmem_ctx_t ctx, unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_longdouble_put_nbi(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_put_nbi(ctx, dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_ctx_char_put_nbi, \ - short*: shmem_ctx_short_put_nbi, \ - int*: shmem_ctx_int_put_nbi, \ - long*: shmem_ctx_long_put_nbi, \ - long long*: shmem_ctx_longlong_put_nbi, \ - float*: shmem_ctx_float_put_nbi, \ - double*: shmem_ctx_double_put_nbi, \ - long double*: shmem_ctx_longdouble_put_nbi)(ctx, dst, src, len, pe) -#endif +OSHMEM_DECLSPEC void shmem_ctx_int8_put_nbi(shmem_ctx_t ctx, int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int16_put_nbi(shmem_ctx_t ctx, int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_put_nbi(shmem_ctx_t ctx, int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_put_nbi(shmem_ctx_t ctx, int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint8_put_nbi(shmem_ctx_t ctx, uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint16_put_nbi(shmem_ctx_t ctx, uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_put_nbi(shmem_ctx_t ctx, uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_put_nbi(shmem_ctx_t ctx, uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_put_nbi(shmem_ctx_t ctx, size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_put_nbi(shmem_ctx_t ctx, ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_char_put_nbi(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_put_nbi(short *target, const short *source, size_t len, int pe); -OSHMEM_DECLSPEC void shmem_int_put_nbi(int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int_put_nbi(int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_long_put_nbi(long *target, const long *source, size_t len, int pe); -OSHMEM_DECLSPEC void shmem_longlong_put_nbi(long long *target, const long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_float_put_nbi(float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_double_put_nbi(double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_longlong_put_nbi(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_schar_put_nbi(signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uchar_put_nbi(unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ushort_put_nbi(unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint_put_nbi(unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulong_put_nbi(unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_put_nbi(unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_longdouble_put_nbi(long double *target, const long double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int8_put_nbi(int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int16_put_nbi(int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int32_put_nbi(int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int64_put_nbi(int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint8_put_nbi(uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint16_put_nbi(uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint32_put_nbi(uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint64_put_nbi(uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_size_put_nbi(size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_put_nbi(ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); #if OSHMEM_HAVE_C11 -#define shmem_put_nbi(dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_char_put_nbi, \ - short*: shmem_short_put_nbi, \ - int*: shmem_int_put_nbi, \ - long*: shmem_long_put_nbi, \ - long long*: shmem_longlong_put_nbi, \ - float*: shmem_float_put_nbi, \ - double*: shmem_double_put_nbi, \ - long double*: shmem_longdouble_put_nbi)(dst, src, len, pe) +#define shmem_put_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_ctx_char_put_nbi, \ + short*: shmem_ctx_short_put_nbi, \ + int*: shmem_ctx_int_put_nbi, \ + long*: shmem_ctx_long_put_nbi, \ + long long*: shmem_ctx_longlong_put_nbi, \ + signed char*: shmem_ctx_schar_put_nbi, \ + unsigned char*: shmem_ctx_uchar_put_nbi, \ + unsigned short*: shmem_ctx_ushort_put_nbi, \ + unsigned int*: shmem_ctx_uint_put_nbi, \ + unsigned long*: shmem_ctx_ulong_put_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_put_nbi, \ + float*: shmem_ctx_float_put_nbi, \ + double*: shmem_ctx_double_put_nbi, \ + long double*: shmem_ctx_longdouble_put_nbi, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_put_nbi, \ + short*: shmem_short_put_nbi, \ + int*: shmem_int_put_nbi, \ + long*: shmem_long_put_nbi, \ + long long*: shmem_longlong_put_nbi, \ + signed char*: shmem_schar_put_nbi, \ + unsigned char*: shmem_uchar_put_nbi, \ + unsigned short*: shmem_ushort_put_nbi, \ + unsigned int*: shmem_uint_put_nbi, \ + unsigned long*: shmem_ulong_put_nbi, \ + unsigned long long*: shmem_ulonglong_put_nbi, \ + float*: shmem_float_put_nbi, \ + double*: shmem_double_put_nbi, \ + long double*: shmem_longdouble_put_nbi)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void shmem_ctx_put8_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); @@ -416,18 +607,22 @@ OSHMEM_DECLSPEC float shmem_ctx_float_g(shmem_ctx_t ctx, const float* addr, int OSHMEM_DECLSPEC double shmem_ctx_double_g(shmem_ctx_t ctx, const double* addr, int pe); OSHMEM_DECLSPEC long long shmem_ctx_longlong_g(shmem_ctx_t ctx, const long long* addr, int pe); OSHMEM_DECLSPEC long double shmem_ctx_longdouble_g(shmem_ctx_t ctx, const long double* addr, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_g(ctx, addr, pe) \ - _Generic(&*(addr), \ - char*: shmem_ctx_char_g, \ - short*: shmem_ctx_short_g, \ - int*: shmem_ctx_int_g, \ - long*: shmem_ctx_long_g, \ - long long*: shmem_ctx_longlong_g, \ - float*: shmem_ctx_float_g, \ - double*: shmem_ctx_double_g, \ - long double*: shmem_ctx_longdouble_g)(ctx, addr, pe) -#endif +OSHMEM_DECLSPEC signed char shmem_ctx_schar_g(shmem_ctx_t ctx, const signed char* addr, int pe); +OSHMEM_DECLSPEC unsigned char shmem_ctx_uchar_g(shmem_ctx_t ctx, const unsigned char* addr, int pe); +OSHMEM_DECLSPEC unsigned short shmem_ctx_ushort_g(shmem_ctx_t ctx, const unsigned short* addr, int pe); +OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_g(shmem_ctx_t ctx, const unsigned int* addr, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_g(shmem_ctx_t ctx, const unsigned long* addr, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_g(shmem_ctx_t ctx, const unsigned long long* addr, int pe); +OSHMEM_DECLSPEC int8_t shmem_ctx_int8_g(shmem_ctx_t ctx, const int8_t* addr, int pe); +OSHMEM_DECLSPEC int16_t shmem_ctx_int16_g(shmem_ctx_t ctx, const int16_t* addr, int pe); +OSHMEM_DECLSPEC int32_t shmem_ctx_int32_g(shmem_ctx_t ctx, const int32_t* addr, int pe); +OSHMEM_DECLSPEC int64_t shmem_ctx_int64_g(shmem_ctx_t ctx, const int64_t* addr, int pe); +OSHMEM_DECLSPEC uint8_t shmem_ctx_uint8_g(shmem_ctx_t ctx, const uint8_t* addr, int pe); +OSHMEM_DECLSPEC uint16_t shmem_ctx_uint16_g(shmem_ctx_t ctx, const uint16_t* addr, int pe); +OSHMEM_DECLSPEC uint32_t shmem_ctx_uint32_g(shmem_ctx_t ctx, const uint32_t* addr, int pe); +OSHMEM_DECLSPEC uint64_t shmem_ctx_uint64_g(shmem_ctx_t ctx, const uint64_t* addr, int pe); +OSHMEM_DECLSPEC size_t shmem_ctx_size_g(shmem_ctx_t ctx, const size_t* addr, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ctx_ptrdiff_g(shmem_ctx_t ctx, const ptrdiff_t* addr, int pe); OSHMEM_DECLSPEC char shmem_char_g(const char* addr, int pe); OSHMEM_DECLSPEC short shmem_short_g(const short* addr, int pe); @@ -437,17 +632,55 @@ OSHMEM_DECLSPEC float shmem_float_g(const float* addr, int pe); OSHMEM_DECLSPEC double shmem_double_g(const double* addr, int pe); OSHMEM_DECLSPEC long long shmem_longlong_g(const long long* addr, int pe); OSHMEM_DECLSPEC long double shmem_longdouble_g(const long double* addr, int pe); +OSHMEM_DECLSPEC signed char shmem_schar_g(const signed char* addr, int pe); +OSHMEM_DECLSPEC unsigned char shmem_uchar_g(const unsigned char* addr, int pe); +OSHMEM_DECLSPEC unsigned short shmem_ushort_g(const unsigned short* addr, int pe); +OSHMEM_DECLSPEC unsigned int shmem_uint_g(const unsigned int* addr, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ulong_g(const unsigned long* addr, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_g(const unsigned long long* addr, int pe); +OSHMEM_DECLSPEC int8_t shmem_int8_g(const int8_t* addr, int pe); +OSHMEM_DECLSPEC int16_t shmem_int16_g(const int16_t* addr, int pe); +OSHMEM_DECLSPEC int32_t shmem_int32_g(const int32_t* addr, int pe); +OSHMEM_DECLSPEC int64_t shmem_int64_g(const int64_t* addr, int pe); +OSHMEM_DECLSPEC uint8_t shmem_uint8_g(const uint8_t* addr, int pe); +OSHMEM_DECLSPEC uint16_t shmem_uint16_g(const uint16_t* addr, int pe); +OSHMEM_DECLSPEC uint32_t shmem_uint32_g(const uint32_t* addr, int pe); +OSHMEM_DECLSPEC uint64_t shmem_uint64_g(const uint64_t* addr, int pe); +OSHMEM_DECLSPEC size_t shmem_size_g(const size_t* addr, int pe); +OSHMEM_DECLSPEC ptrdiff_t shmem_ptrdiff_g(const ptrdiff_t* addr, int pe); #if OSHMEM_HAVE_C11 -#define shmem_g(addr, pe) \ - _Generic(&*(addr), \ - char*: shmem_char_g, \ - short*: shmem_short_g, \ - int*: shmem_int_g, \ - long*: shmem_long_g, \ - long long*: shmem_longlong_g, \ - float*: shmem_float_g, \ - double*: shmem_double_g, \ - long double*: shmem_longdouble_g)(addr, pe) +#define shmem_g(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)),\ + char*: shmem_ctx_char_g, \ + short*: shmem_ctx_short_g, \ + int*: shmem_ctx_int_g, \ + long*: shmem_ctx_long_g, \ + long long*: shmem_ctx_longlong_g, \ + signed char*: shmem_ctx_schar_g, \ + unsigned char*: shmem_ctx_uchar_g, \ + unsigned short*: shmem_ctx_ushort_g, \ + unsigned int*: shmem_ctx_uint_g, \ + unsigned long*: shmem_ctx_ulong_g, \ + unsigned long long*: shmem_ctx_ulonglong_g, \ + float*: shmem_ctx_float_g, \ + double*: shmem_ctx_double_g, \ + long double*: shmem_ctx_longdouble_g, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_g, \ + short*: shmem_short_g, \ + int*: shmem_int_g, \ + long*: shmem_long_g, \ + long long*: shmem_longlong_g, \ + signed char*: shmem_schar_g, \ + unsigned char*: shmem_uchar_g, \ + unsigned short*: shmem_ushort_g, \ + unsigned int*: shmem_uint_g, \ + unsigned long*: shmem_ulong_g, \ + unsigned long long*: shmem_ulonglong_g, \ + float*: shmem_float_g, \ + double*: shmem_double_g, \ + long double*: shmem_longdouble_g)(__VA_ARGS__) #endif /* @@ -460,19 +693,23 @@ OSHMEM_DECLSPEC void shmem_ctx_long_get(shmem_ctx_t ctx, long *target, const lo OSHMEM_DECLSPEC void shmem_ctx_float_get(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_double_get(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_longlong_get(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_schar_get(shmem_ctx_t ctx, signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uchar_get(shmem_ctx_t ctx, unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ushort_get(shmem_ctx_t ctx, unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_get(shmem_ctx_t ctx, unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_get(shmem_ctx_t ctx, unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_get(shmem_ctx_t ctx, unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_longdouble_get(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_get(ctx, dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_ctx_char_get, \ - short*: shmem_ctx_short_get, \ - int*: shmem_ctx_int_get, \ - long*: shmem_ctx_long_get, \ - long long*: shmem_ctx_longlong_get, \ - float*: shmem_ctx_float_get, \ - double*: shmem_ctx_double_get, \ - long double*: shmem_ctx_longdouble_get)(ctx, dst, src, len, pe) -#endif +OSHMEM_DECLSPEC void shmem_ctx_int8_get(shmem_ctx_t ctx, int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int16_get(shmem_ctx_t ctx, int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_get(shmem_ctx_t ctx, int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_get(shmem_ctx_t ctx, int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint8_get(shmem_ctx_t ctx, uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint16_get(shmem_ctx_t ctx, uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_get(shmem_ctx_t ctx, uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_get(shmem_ctx_t ctx, uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_get(shmem_ctx_t ctx, size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_get(shmem_ctx_t ctx, ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_char_get(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_get(short *target, const short *source, size_t len, int pe); @@ -481,18 +718,56 @@ OSHMEM_DECLSPEC void shmem_long_get(long *target, const long *source, size_t le OSHMEM_DECLSPEC void shmem_float_get(float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_double_get(double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_longlong_get(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_schar_get(signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uchar_get(unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ushort_get(unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint_get(unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulong_get(unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_get(unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_longdouble_get(long double *target, const long double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int8_get(int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int16_get(int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int32_get(int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int64_get(int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint8_get(uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint16_get(uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint32_get(uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint64_get(uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_size_get(size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_get(ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); #if OSHMEM_HAVE_C11 -#define shmem_get(dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_char_get, \ - short*: shmem_short_get, \ - int*: shmem_int_get, \ - long*: shmem_long_get, \ - long long*: shmem_longlong_get, \ - float*: shmem_float_get, \ - double*: shmem_double_get, \ - long double*: shmem_longdouble_get)(dst, src, len, pe) +#define shmem_get(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)),\ + char*: shmem_ctx_char_get, \ + short*: shmem_ctx_short_get, \ + int*: shmem_ctx_int_get, \ + long*: shmem_ctx_long_get, \ + long long*: shmem_ctx_longlong_get, \ + signed char*: shmem_ctx_schar_get, \ + unsigned char*: shmem_ctx_uchar_get, \ + unsigned short*: shmem_ctx_ushort_get, \ + unsigned int*: shmem_ctx_uint_get, \ + unsigned long*: shmem_ctx_ulong_get, \ + unsigned long long*: shmem_ctx_ulonglong_get, \ + float*: shmem_ctx_float_get, \ + double*: shmem_ctx_double_get, \ + long double*: shmem_ctx_longdouble_get, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_get, \ + short*: shmem_short_get, \ + int*: shmem_int_get, \ + long*: shmem_long_get, \ + long long*: shmem_longlong_get, \ + signed char*: shmem_schar_get, \ + unsigned char*: shmem_uchar_get, \ + unsigned short*: shmem_ushort_get, \ + unsigned int*: shmem_uint_get, \ + unsigned long*: shmem_ulong_get, \ + unsigned long long*: shmem_ulonglong_get, \ + float*: shmem_float_get, \ + double*: shmem_double_get, \ + long double*: shmem_longdouble_get)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void shmem_ctx_get8(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); @@ -512,46 +787,88 @@ OSHMEM_DECLSPEC void shmem_getmem(void *target, const void *source, size_t len, /* * Strided get routines */ -OSHMEM_DECLSPEC void shmem_ctx_char_iget(shmem_ctx_t ctx, char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_short_iget(shmem_ctx_t ctx, short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_int_iget(shmem_ctx_t ctx, int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_float_iget(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_double_iget(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_longlong_iget(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_longdouble_iget(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_ctx_long_iget(shmem_ctx_t ctx, long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_iget(ctx, dst, src, tst, sst, len, pe)\ - _Generic(&*(dst), \ - char*: shmem_ctx_char_iget, \ - short*: shmem_ctx_short_iget, \ - int*: shmem_ctx_int_iget, \ - long*: shmem_ctx_long_iget, \ - long long*: shmem_ctx_longlong_iget, \ - float*: shmem_ctx_float_iget, \ - double*: shmem_ctx_double_iget, \ - long double*: shmem_ctx_longdouble_iget)(ctx, dst, src, tst, sst, len, pe) -#endif - -OSHMEM_DECLSPEC void shmem_char_iget(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_short_iget(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_int_iget(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_float_iget(float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_double_iget(double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_longlong_iget(long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_longdouble_iget(long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); -OSHMEM_DECLSPEC void shmem_long_iget(long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_char_iget(shmem_ctx_t ctx, char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_iget(shmem_ctx_t ctx, short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_iget(shmem_ctx_t ctx, int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_iget(shmem_ctx_t ctx, long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_iget(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_schar_iget(shmem_ctx_t ctx, signed char* target, const signed char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uchar_iget(shmem_ctx_t ctx, unsigned char* target, const unsigned char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ushort_iget(shmem_ctx_t ctx, unsigned short* target, const unsigned short* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_iget(shmem_ctx_t ctx, unsigned int* target, const unsigned int* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_iget(shmem_ctx_t ctx, unsigned long* target, const unsigned long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_iget(shmem_ctx_t ctx, unsigned long long* target, const unsigned long long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_iget(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_iget(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_iget(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int8_iget(shmem_ctx_t ctx, int8_t* target, const int8_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int16_iget(shmem_ctx_t ctx, int16_t* target, const int16_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_iget(shmem_ctx_t ctx, int32_t* target, const int32_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_iget(shmem_ctx_t ctx, int64_t* target, const int64_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint8_iget(shmem_ctx_t ctx, uint8_t* target, const uint8_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint16_iget(shmem_ctx_t ctx, uint16_t* target, const uint16_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_iget(shmem_ctx_t ctx, uint32_t* target, const uint32_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_iget(shmem_ctx_t ctx, uint64_t* target, const uint64_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_iget(shmem_ctx_t ctx, size_t* target, const size_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_iget(shmem_ctx_t ctx, ptrdiff_t* target, const ptrdiff_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +OSHMEM_DECLSPEC void shmem_char_iget(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_short_iget(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int_iget(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_float_iget(float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_double_iget(double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_longlong_iget(long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_longdouble_iget(long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_long_iget(long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_schar_iget(signed char* target, const signed char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uchar_iget(unsigned char* target, const unsigned char* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ushort_iget(unsigned short* target, const unsigned short* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint_iget(unsigned int* target, const unsigned int* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulong_iget(unsigned long* target, const unsigned long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_iget(unsigned long long* target, const unsigned long long* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int8_iget(int8_t* target, const int8_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int16_iget(int16_t* target, const int16_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int32_iget(int32_t* target, const int32_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int64_iget(int64_t* target, const int64_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint8_iget(uint8_t* target, const uint8_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint16_iget(uint16_t* target, const uint16_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint32_iget(uint32_t* target, const uint32_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint64_iget(uint64_t* target, const uint64_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_size_iget(size_t* target, const size_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_iget(ptrdiff_t* target, const ptrdiff_t* source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); #if OSHMEM_HAVE_C11 -#define shmem_iget(dst, src, tst, sst, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_char_iget, \ - short*: shmem_short_iget, \ - int*: shmem_int_iget, \ - long*: shmem_long_iget, \ - long long*: shmem_longlong_iget, \ - float*: shmem_float_iget, \ - double*: shmem_double_iget, \ - long double*: shmem_longdouble_iget)(dst, src, tst, sst, len, pe) +#define shmem_iget(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_ctx_char_iget, \ + short*: shmem_ctx_short_iget, \ + int*: shmem_ctx_int_iget, \ + long*: shmem_ctx_long_iget, \ + long long*: shmem_ctx_longlong_iget, \ + signed char*: shmem_ctx_schar_iget, \ + unsigned char*: shmem_ctx_uchar_iget, \ + unsigned short*: shmem_ctx_ushort_iget, \ + unsigned int*: shmem_ctx_uint_iget, \ + unsigned long*: shmem_ctx_ulong_iget, \ + unsigned long long*: shmem_ctx_ulonglong_iget, \ + float*: shmem_ctx_float_iget, \ + double*: shmem_ctx_double_iget, \ + long double*: shmem_ctx_longdouble_iget, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_iget, \ + short*: shmem_short_iget, \ + int*: shmem_int_iget, \ + long*: shmem_long_iget, \ + long long*: shmem_longlong_iget, \ + signed char*: shmem_schar_iget, \ + unsigned char*: shmem_uchar_iget, \ + unsigned short*: shmem_ushort_iget, \ + unsigned int*: shmem_uint_iget, \ + unsigned long*: shmem_ulong_iget, \ + unsigned long long*: shmem_ulonglong_iget, \ + float*: shmem_float_iget, \ + double*: shmem_double_iget, \ + long double*: shmem_longdouble_iget)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void shmem_ctx_iget8(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -574,42 +891,83 @@ OSHMEM_DECLSPEC void shmem_ctx_short_get_nbi(shmem_ctx_t ctx, short *target, co OSHMEM_DECLSPEC void shmem_ctx_int_get_nbi(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_long_get_nbi(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_longlong_get_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_schar_get_nbi(shmem_ctx_t ctx, signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uchar_get_nbi(shmem_ctx_t ctx, unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ushort_get_nbi(shmem_ctx_t ctx, unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_get_nbi(shmem_ctx_t ctx, unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_get_nbi(shmem_ctx_t ctx, unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_get_nbi(shmem_ctx_t ctx, unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_float_get_nbi(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_double_get_nbi(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_ctx_longdouble_get_nbi(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_get_nbi(ctx, dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_ctx_char_get_nbi, \ - short*: shmem_ctx_short_get_nbi, \ - int*: shmem_ctx_int_get_nbi, \ - long*: shmem_ctx_long_get_nbi, \ - long long*: shmem_ctx_longlong_get_nbi, \ - float*: shmem_ctx_float_get_nbi, \ - double*: shmem_ctx_double_get_nbi, \ - long double*: shmem_ctx_longdouble_get_nbi)(ctx, dst, src, len, pe) -#endif +OSHMEM_DECLSPEC void shmem_ctx_int8_get_nbi(shmem_ctx_t ctx, int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int16_get_nbi(shmem_ctx_t ctx, int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int32_get_nbi(shmem_ctx_t ctx, int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_get_nbi(shmem_ctx_t ctx, int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint8_get_nbi(shmem_ctx_t ctx, uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint16_get_nbi(shmem_ctx_t ctx, uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_get_nbi(shmem_ctx_t ctx, uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_get_nbi(shmem_ctx_t ctx, uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_size_get_nbi(shmem_ctx_t ctx, size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ptrdiff_get_nbi(shmem_ctx_t ctx, ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); -OSHMEM_DECLSPEC void shmem_getmem_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_char_get_nbi(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_get_nbi(short *target, const short *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_int_get_nbi(int *target, const int *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_long_get_nbi(long *target, const long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_longlong_get_nbi(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_schar_get_nbi(signed char *target, const signed char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uchar_get_nbi(unsigned char *target, const unsigned char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ushort_get_nbi(unsigned short *target, const unsigned short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint_get_nbi(unsigned int *target, const unsigned int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulong_get_nbi(unsigned long *target, const unsigned long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_get_nbi(unsigned long long *target, const unsigned long long *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_float_get_nbi(float *target, const float *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_double_get_nbi(double *target, const double *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_longdouble_get_nbi(long double *target, const long double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int8_get_nbi(int8_t *target, const int8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int16_get_nbi(int16_t *target, const int16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int32_get_nbi(int32_t *target, const int32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int64_get_nbi(int64_t *target, const int64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint8_get_nbi(uint8_t *target, const uint8_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint16_get_nbi(uint16_t *target, const uint16_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint32_get_nbi(uint32_t *target, const uint32_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_uint64_get_nbi(uint64_t *target, const uint64_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_size_get_nbi(size_t *target, const size_t *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ptrdiff_get_nbi(ptrdiff_t *target, const ptrdiff_t *source, size_t len, int pe); #if OSHMEM_HAVE_C11 -#define shmem_get_nbi(dst, src, len, pe) \ - _Generic(&*(dst), \ - char*: shmem_char_get_nbi, \ - short*: shmem_short_get_nbi, \ - int*: shmem_int_get_nbi, \ - long*: shmem_long_get_nbi, \ - long long*: shmem_longlong_get_nbi, \ - float*: shmem_float_get_nbi, \ - double*: shmem_double_get_nbi, \ - long double*: shmem_longdouble_get_nbi)(dst, src, len, pe) +#define shmem_get_nbi(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic(&*(__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + char*: shmem_ctx_char_get_nbi, \ + short*: shmem_ctx_short_get_nbi, \ + int*: shmem_ctx_int_get_nbi, \ + long*: shmem_ctx_long_get_nbi, \ + long long*: shmem_ctx_longlong_get_nbi, \ + signed char*: shmem_ctx_schar_get_nbi, \ + unsigned char*: shmem_ctx_uchar_get_nbi, \ + unsigned short*: shmem_ctx_ushort_get_nbi, \ + unsigned int*: shmem_ctx_uint_get_nbi, \ + unsigned long*: shmem_ctx_ulong_get_nbi, \ + unsigned long long*: shmem_ctx_ulonglong_get_nbi, \ + float*: shmem_ctx_float_get_nbi, \ + double*: shmem_ctx_double_get_nbi, \ + long double*: shmem_ctx_longdouble_get_nbi, \ + default: __oshmem_datatype_ignore), \ + char*: shmem_char_get_nbi, \ + short*: shmem_short_get_nbi, \ + int*: shmem_int_get_nbi, \ + long*: shmem_long_get_nbi, \ + long long*: shmem_longlong_get_nbi, \ + signed char*: shmem_schar_get_nbi, \ + unsigned char*: shmem_uchar_get_nbi, \ + unsigned short*: shmem_ushort_get_nbi, \ + unsigned int*: shmem_uint_get_nbi, \ + unsigned long*: shmem_ulong_get_nbi, \ + unsigned long long*: shmem_ulonglong_get_nbi, \ + float*: shmem_float_get_nbi, \ + double*: shmem_double_get_nbi, \ + long double*: shmem_longdouble_get_nbi)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void shmem_ctx_get8_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); @@ -624,6 +982,7 @@ OSHMEM_DECLSPEC void shmem_get16_nbi(void *target, const void *source, size_t l OSHMEM_DECLSPEC void shmem_get32_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_get64_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_get128_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_getmem_nbi(void *target, const void *source, size_t len, int pe); /* * Atomic operations @@ -631,32 +990,42 @@ OSHMEM_DECLSPEC void shmem_get128_nbi(void *target, const void *source, size_t /* Atomic swap */ OSHMEM_DECLSPEC int shmem_ctx_int_atomic_swap(shmem_ctx_t ctx, int *target, int value, int pe); OSHMEM_DECLSPEC long shmem_ctx_long_atomic_swap(shmem_ctx_t ctx, long *target, long value, int pe); -OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_swap(shmem_ctx_t ctx, long long*target, long long value, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_swap(shmem_ctx_t ctx, long long *target, long long value, int pe); +OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_swap(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_swap(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_swap(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC float shmem_ctx_float_atomic_swap(shmem_ctx_t ctx, float *target, float value, int pe); OSHMEM_DECLSPEC double shmem_ctx_double_atomic_swap(shmem_ctx_t ctx, double *target, double value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_swap(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_ctx_int_atomic_swap, \ - long*: shmem_ctx_long_atomic_swap, \ - long long*: shmem_ctx_longlong_atomic_swap, \ - float*: shmem_ctx_float_atomic_swap, \ - double*: shmem_ctx_double_atomic_swap)(ctx, dst, val, pe) -#endif OSHMEM_DECLSPEC int shmem_int_atomic_swap(int *target, int value, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_swap(long *target, long value, int pe); OSHMEM_DECLSPEC long long shmem_longlong_atomic_swap(long long*target, long long value, int pe); +OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_swap(unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_swap(unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_swap(unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC float shmem_float_atomic_swap(float *target, float value, int pe); OSHMEM_DECLSPEC double shmem_double_atomic_swap(double *target, double value, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_swap(dst, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_int_atomic_swap, \ - long*: shmem_long_atomic_swap, \ - long long*: shmem_longlong_atomic_swap, \ - float*: shmem_float_atomic_swap, \ - double*: shmem_double_atomic_swap)(dst, val, pe) +#define shmem_atomic_swap(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_swap, \ + long*: shmem_ctx_long_atomic_swap, \ + long long*: shmem_ctx_longlong_atomic_swap, \ + unsigned int*: shmem_ctx_uint_atomic_swap, \ + unsigned long*: shmem_ctx_ulong_atomic_swap, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_swap,\ + float*: shmem_ctx_float_atomic_swap, \ + double*: shmem_ctx_double_atomic_swap, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_swap, \ + long*: shmem_long_atomic_swap, \ + long long*: shmem_longlong_atomic_swap, \ + unsigned int*: shmem_uint_atomic_swap, \ + unsigned long*: shmem_ulong_atomic_swap, \ + unsigned long long*: shmem_ulonglong_atomic_swap, \ + float*: shmem_float_atomic_swap, \ + double*: shmem_double_atomic_swap)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int shmem_int_swap(int *target, int value, int pe); @@ -677,37 +1046,47 @@ OSHMEM_DECLSPEC double shmem_double_swap(double *target, double value, int pe); /* Atomic set */ OSHMEM_DECLSPEC void shmem_ctx_int_atomic_set(shmem_ctx_t ctx, int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_ctx_long_atomic_set(shmem_ctx_t ctx, long *target, long value, int pe); -OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_set(shmem_ctx_t ctx, long long*target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_set(shmem_ctx_t ctx, long long *target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_set(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_set(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_set(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_float_atomic_set(shmem_ctx_t ctx, float *target, float value, int pe); OSHMEM_DECLSPEC void shmem_ctx_double_atomic_set(shmem_ctx_t ctx, double *target, double value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_set(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_ctx_int_atomic_set, \ - long*: shmem_ctx_long_atomic_set, \ - long long*: shmem_ctx_longlong_atomic_set, \ - float*: shmem_ctx_float_atomic_set, \ - double*: shmem_ctx_double_atomic_set)(ctx, dst, val, pe) -#endif OSHMEM_DECLSPEC void shmem_int_atomic_set(int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_long_atomic_set(long *target, long value, int pe); -OSHMEM_DECLSPEC void shmem_longlong_atomic_set(long long*target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_set(long long *target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_uint_atomic_set(unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_set(unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_set(unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void shmem_float_atomic_set(float *target, float value, int pe); OSHMEM_DECLSPEC void shmem_double_atomic_set(double *target, double value, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_set(dst, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_int_atomic_set, \ - long*: shmem_long_atomic_set, \ - long long*: shmem_longlong_atomic_set, \ - float*: shmem_float_atomic_set, \ - double*: shmem_double_atomic_set)(dst, val, pe) +#define shmem_atomic_set(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)),\ + int*: shmem_ctx_int_atomic_set, \ + long*: shmem_ctx_long_atomic_set, \ + long long*: shmem_ctx_longlong_atomic_set, \ + unsigned int*: shmem_ctx_uint_atomic_set, \ + unsigned long*: shmem_ctx_ulong_atomic_set, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_set,\ + float*: shmem_ctx_float_atomic_set, \ + double*: shmem_ctx_double_atomic_set, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_set, \ + long*: shmem_long_atomic_set, \ + long long*: shmem_longlong_atomic_set, \ + unsigned int*: shmem_uint_atomic_set, \ + unsigned long*: shmem_ulong_atomic_set, \ + unsigned long long*: shmem_ulonglong_atomic_set, \ + float*: shmem_float_atomic_set, \ + double*: shmem_double_atomic_set)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void shmem_int_set(int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_long_set(long *target, long value, int pe); -OSHMEM_DECLSPEC void shmem_longlong_set(long long*target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_set(long long *target, long long value, int pe); OSHMEM_DECLSPEC void shmem_float_set(float *target, float value, int pe); OSHMEM_DECLSPEC void shmem_double_set(double *target, double value, int pe); #if OSHMEM_HAVE_C11 @@ -724,25 +1103,34 @@ OSHMEM_DECLSPEC void shmem_double_set(double *target, double value, int pe); OSHMEM_DECLSPEC int shmem_ctx_int_atomic_compare_swap(shmem_ctx_t ctx, int *target, int cond, int value, int pe); OSHMEM_DECLSPEC long shmem_ctx_long_atomic_compare_swap(shmem_ctx_t ctx, long *target, long cond, long value, int pe); OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_compare_swap(shmem_ctx_t ctx, long long *target, long long cond, long long value, int pe); - -#if OSHMEM_HAVE_C11 -#define shmem_atomic_compare_swap(ctx, dst, cond, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_ctx_int_atomic_compare_swap, \ - long*: shmem_ctx_long_atomic_compare_swap, \ - long long*: shmem_ctx_longlong_atomic_compare_swap)(ctx, dst, cond, val, pe) -#endif +OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_compare_swap(shmem_ctx_t ctx, unsigned int *target, unsigned int cond, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_compare_swap(shmem_ctx_t ctx, unsigned long *target, unsigned long cond, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_compare_swap(shmem_ctx_t ctx, unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); OSHMEM_DECLSPEC int shmem_int_atomic_compare_swap(int *target, int cond, int value, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_compare_swap(long *target, long cond, long value, int pe); OSHMEM_DECLSPEC long long shmem_longlong_atomic_compare_swap(long long *target, long long cond, long long value, int pe); +OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_compare_swap(unsigned int *target, unsigned int cond, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_compare_swap(unsigned long *target, unsigned long cond, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_compare_swap(unsigned long long *target, unsigned long long cond, unsigned long long value, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_compare_swap(dst, cond, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_int_atomic_compare_swap, \ - long*: shmem_long_atomic_compare_swap, \ - long long*: shmem_longlong_atomic_compare_swap)(dst, cond, val, pe) +#define shmem_atomic_compare_swap(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_compare_swap, \ + long*: shmem_ctx_long_atomic_compare_swap, \ + long long*: shmem_ctx_longlong_atomic_compare_swap, \ + unsigned int*: shmem_ctx_uint_atomic_compare_swap, \ + unsigned long*: shmem_ctx_ulong_atomic_compare_swap, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_compare_swap, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_compare_swap, \ + long*: shmem_long_atomic_compare_swap, \ + long long*: shmem_longlong_atomic_compare_swap, \ + unsigned int*: shmem_uint_atomic_compare_swap, \ + unsigned long*: shmem_ulong_atomic_compare_swap, \ + unsigned long long*: shmem_ulonglong_atomic_compare_swap)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int shmem_int_cswap(int *target, int cond, int value, int pe); @@ -761,23 +1149,33 @@ OSHMEM_DECLSPEC long long shmem_longlong_cswap(long long *target, long long cond OSHMEM_DECLSPEC int shmem_ctx_int_atomic_fetch_add(shmem_ctx_t ctx, int *target, int value, int pe); OSHMEM_DECLSPEC long shmem_ctx_long_atomic_fetch_add(shmem_ctx_t ctx, long *target, long value, int pe); OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch_add(shmem_ctx_t ctx, long long *target, long long value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_add(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_ctx_int_atomic_fetch_add, \ - long*: shmem_ctx_long_atomic_fetch_add, \ - long long*: shmem_ctx_longlong_atomic_fetch_add)(ctx, dst, val, pe) -#endif +OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_add(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_add(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_add(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC int shmem_int_atomic_fetch_add(int *target, int value, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_fetch_add(long *target, long value, int pe); OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch_add(long long *target, long long value, int pe); +OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_add(unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_add(unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_add(unsigned long long *target, unsigned long long value, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_add(dst, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_int_atomic_fetch_add, \ - long*: shmem_long_atomic_fetch_add, \ - long long*: shmem_longlong_atomic_fetch_add)(dst, val, pe) +#define shmem_atomic_fetch_add(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_fetch_add, \ + long*: shmem_ctx_long_atomic_fetch_add, \ + long long*: shmem_ctx_longlong_atomic_fetch_add, \ + unsigned int*: shmem_ctx_uint_atomic_fetch_add, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_add, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_add, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_fetch_add, \ + long*: shmem_long_atomic_fetch_add, \ + long long*: shmem_longlong_atomic_fetch_add, \ + unsigned int*: shmem_uint_atomic_fetch_add, \ + unsigned long*: shmem_ulong_atomic_fetch_add, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_add)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int shmem_int_fadd(int *target, int value, int pe); @@ -792,103 +1190,167 @@ OSHMEM_DECLSPEC long long shmem_longlong_fadd(long long *target, long long value #endif /* Atomic Fetch&And */ +OSHMEM_DECLSPEC int shmem_ctx_int_atomic_fetch_and(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_ctx_long_atomic_fetch_and(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch_and(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_and(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_and(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_and(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_and(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_ctx_uint_atomic_fetch_and, \ - unsigned long*: shmem_ctx_ulong_atomic_fetch_and, \ - unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_and)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC int32_t shmem_ctx_int32_atomic_fetch_and(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_ctx_int64_atomic_fetch_and(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_ctx_uint32_atomic_fetch_and(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_ctx_uint64_atomic_fetch_and(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC int shmem_int_atomic_fetch_and(int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_fetch_and(long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch_and(long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_and(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_and(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_and(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t shmem_int32_atomic_fetch_and(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_int64_atomic_fetch_and(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_uint32_atomic_fetch_and(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_uint64_atomic_fetch_and(uint64_t *target, uint64_t value, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_and(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_uint_atomic_fetch_and, \ - unsigned long*: shmem_ulong_atomic_fetch_and, \ - unsigned long long*: shmem_ulonglong_atomic_fetch_and)(dst, val, pe) +#define shmem_atomic_fetch_and(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_fetch_and, \ + long*: shmem_ctx_long_atomic_fetch_and, \ + long long*: shmem_ctx_longlong_atomic_fetch_and, \ + unsigned int*: shmem_ctx_uint_atomic_fetch_and, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_and, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_and, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_fetch_and, \ + long*: shmem_long_atomic_fetch_and, \ + long long*: shmem_longlong_atomic_fetch_and, \ + unsigned int*: shmem_uint_atomic_fetch_and, \ + unsigned long*: shmem_ulong_atomic_fetch_and, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_and)(__VA_ARGS__) #endif /* Atomic Fetch&Or */ +OSHMEM_DECLSPEC int shmem_ctx_int_atomic_fetch_or(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_ctx_long_atomic_fetch_or(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch_or(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_or(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_or(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_or(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_or(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_ctx_uint_atomic_fetch_or, \ - unsigned long*: shmem_ctx_ulong_atomic_fetch_or, \ - unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_or)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC int32_t shmem_ctx_int32_atomic_fetch_or(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_ctx_int64_atomic_fetch_or(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_ctx_uint32_atomic_fetch_or(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_ctx_uint64_atomic_fetch_or(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC int shmem_int_atomic_fetch_or(int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_fetch_or(long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch_or(long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_or(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_or(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_or(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t shmem_int32_atomic_fetch_or(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_int64_atomic_fetch_or(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_uint32_atomic_fetch_or(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_uint64_atomic_fetch_or(uint64_t *target, uint64_t value, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_or(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_uint_atomic_fetch_or, \ - unsigned long*: shmem_ulong_atomic_fetch_or, \ - unsigned long long*: shmem_ulonglong_atomic_fetch_or)(dst, val, pe) +#define shmem_atomic_fetch_or(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_fetch_or, \ + long*: shmem_ctx_long_atomic_fetch_or, \ + long long*: shmem_ctx_longlong_atomic_fetch_or, \ + unsigned int*: shmem_ctx_uint_atomic_fetch_or, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_or, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_or, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_fetch_or, \ + long*: shmem_long_atomic_fetch_or, \ + long long*: shmem_longlong_atomic_fetch_or, \ + unsigned int*: shmem_uint_atomic_fetch_or, \ + unsigned long*: shmem_ulong_atomic_fetch_or, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_or)(__VA_ARGS__) #endif /* Atomic Fetch&Xor */ +OSHMEM_DECLSPEC int shmem_ctx_int_atomic_fetch_xor(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_ctx_long_atomic_fetch_xor(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch_xor(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_xor(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_xor(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_xor(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_xor(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_ctx_uint_atomic_fetch_xor, \ - unsigned long*: shmem_ctx_ulong_atomic_fetch_xor, \ - unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_xor)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC int32_t shmem_ctx_int32_atomic_fetch_xor(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_ctx_int64_atomic_fetch_xor(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_ctx_uint32_atomic_fetch_xor(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_ctx_uint64_atomic_fetch_xor(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC int shmem_int_atomic_fetch_xor(int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_fetch_xor(long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch_xor(long long *target, long long value, int pe); OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_xor(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_xor(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_xor(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC int32_t shmem_int32_atomic_fetch_xor(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmem_int64_atomic_fetch_xor(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC uint32_t shmem_uint32_atomic_fetch_xor(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC uint64_t shmem_uint64_atomic_fetch_xor(uint64_t *target, uint64_t value, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_xor(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_uint_atomic_fetch_xor, \ - unsigned long*: shmem_ulong_atomic_fetch_xor, \ - unsigned long long*: shmem_ulonglong_atomic_fetch_xor)(dst, val, pe) +#define shmem_atomic_fetch_xor(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_fetch_xor, \ + long*: shmem_ctx_long_atomic_fetch_xor, \ + long long*: shmem_ctx_longlong_atomic_fetch_xor, \ + unsigned int*: shmem_ctx_uint_atomic_fetch_xor, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_xor, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_xor, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_fetch_xor, \ + long*: shmem_long_atomic_fetch_xor, \ + long long*: shmem_longlong_atomic_fetch_xor, \ + unsigned int*: shmem_uint_atomic_fetch_xor, \ + unsigned long*: shmem_ulong_atomic_fetch_xor, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_xor)(__VA_ARGS__) #endif /* Atomic Fetch */ OSHMEM_DECLSPEC int shmem_ctx_int_atomic_fetch(shmem_ctx_t ctx, const int *target, int pe); OSHMEM_DECLSPEC long shmem_ctx_long_atomic_fetch(shmem_ctx_t ctx, const long *target, int pe); OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch(shmem_ctx_t ctx, const long long *target, int pe); +OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch(shmem_ctx_t ctx, const unsigned int *target, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch(shmem_ctx_t ctx, const unsigned long *target, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch(shmem_ctx_t ctx, const unsigned long long *target, int pe); OSHMEM_DECLSPEC float shmem_ctx_float_atomic_fetch(shmem_ctx_t ctx, const float *target, int pe); OSHMEM_DECLSPEC double shmem_ctx_double_atomic_fetch(shmem_ctx_t ctx, const double *target, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch(ctx, dst, pe) \ - _Generic(&*(dst), \ - int*: shmem_ctx_int_atomic_fetch, \ - long*: shmem_ctx_long_atomic_fetch, \ - long long*: shmem_ctx_longlong_atomic_fetch,\ - float*: shmem_ctx_float_atomic_fetch, \ - double*: shmem_ctx_double_atomic_fetch)(ctx, dst, pe) -#endif OSHMEM_DECLSPEC int shmem_int_atomic_fetch(const int *target, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_fetch(const long *target, int pe); OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch(const long long *target, int pe); +OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch(const unsigned int *target, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch(const unsigned long *target, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch(const unsigned long long *target, int pe); OSHMEM_DECLSPEC float shmem_float_atomic_fetch(const float *target, int pe); OSHMEM_DECLSPEC double shmem_double_atomic_fetch(const double *target, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch(dst, pe) \ - _Generic(&*(dst), \ - int*: shmem_int_atomic_fetch, \ - long*: shmem_long_atomic_fetch, \ - long long*: shmem_longlong_atomic_fetch, \ - float*: shmem_float_atomic_fetch, \ - double*: shmem_double_atomic_fetch)(dst, pe) +#define shmem_atomic_fetch(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_fetch, \ + long*: shmem_ctx_long_atomic_fetch, \ + long long*: shmem_ctx_longlong_atomic_fetch, \ + unsigned int*: shmem_ctx_uint_atomic_fetch, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch, \ + float*: shmem_ctx_float_atomic_fetch, \ + double*: shmem_ctx_double_atomic_fetch, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_fetch, \ + long*: shmem_long_atomic_fetch, \ + long long*: shmem_longlong_atomic_fetch, \ + unsigned int*: shmem_uint_atomic_fetch, \ + unsigned long*: shmem_ulong_atomic_fetch, \ + unsigned long long*: shmem_ulonglong_atomic_fetch, \ + float*: shmem_float_atomic_fetch, \ + double*: shmem_double_atomic_fetch)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int shmem_int_fetch(const int *target, int pe); @@ -910,23 +1372,33 @@ OSHMEM_DECLSPEC double shmem_double_fetch(const double *target, int pe); OSHMEM_DECLSPEC int shmem_ctx_int_atomic_fetch_inc(shmem_ctx_t ctx, int *target, int pe); OSHMEM_DECLSPEC long shmem_ctx_long_atomic_fetch_inc(shmem_ctx_t ctx, long *target, int pe); OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch_inc(shmem_ctx_t ctx, long long *target, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_inc(ctx, dst, pe) \ - _Generic(&*(dst), \ - int*: shmem_ctx_int_atomic_fetch_inc,\ - long*: shmem_ctx_long_atomic_fetch_inc,\ - long long*: shmem_ctx_longlong_atomic_fetch_inc)(ctx, dst, pe) -#endif +OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_inc(shmem_ctx_t ctx, unsigned int *target, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_inc(shmem_ctx_t ctx, unsigned long *target, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_inc(shmem_ctx_t ctx, unsigned long long *target, int pe); OSHMEM_DECLSPEC int shmem_int_atomic_fetch_inc(int *target, int pe); OSHMEM_DECLSPEC long shmem_long_atomic_fetch_inc(long *target, int pe); OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch_inc(long long *target, int pe); +OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_inc(unsigned int *target, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_inc(unsigned long *target, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_inc(unsigned long long *target, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_fetch_inc(dst, pe) \ - _Generic(&*(dst), \ - int*: shmem_int_atomic_fetch_inc, \ - long*: shmem_long_atomic_fetch_inc, \ - long long*: shmem_longlong_atomic_fetch_inc)(dst, pe) +#define shmem_atomic_fetch_inc(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_fetch_inc, \ + long*: shmem_ctx_long_atomic_fetch_inc, \ + long long*: shmem_ctx_longlong_atomic_fetch_inc, \ + unsigned int*: shmem_ctx_uint_atomic_fetch_inc, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_inc, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_inc, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_fetch_inc, \ + long*: shmem_long_atomic_fetch_inc, \ + long long*: shmem_longlong_atomic_fetch_inc, \ + unsigned int*: shmem_uint_atomic_fetch_inc, \ + unsigned long*: shmem_ulong_atomic_fetch_inc, \ + unsigned long long*: shmem_ulonglong_atomic_fetch_inc)(__VA_ARGS__) #endif OSHMEM_DECLSPEC int shmem_int_finc(int *target, int pe); @@ -944,23 +1416,33 @@ OSHMEM_DECLSPEC long long shmem_longlong_finc(long long *target, int pe); OSHMEM_DECLSPEC void shmem_ctx_int_atomic_add(shmem_ctx_t ctx, int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_ctx_long_atomic_add(shmem_ctx_t ctx, long *target, long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_add(shmem_ctx_t ctx, long long *target, long long value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_add(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_ctx_int_atomic_add, \ - long*: shmem_ctx_long_atomic_add, \ - long long*: shmem_ctx_longlong_atomic_add)(ctx, dst, val, pe) -#endif +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_add(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_add(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_add(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); OSHMEM_DECLSPEC void shmem_int_atomic_add(int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_long_atomic_add(long *target, long value, int pe); OSHMEM_DECLSPEC void shmem_longlong_atomic_add(long long *target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_uint_atomic_add(unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_add(unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_add(unsigned long long *target, unsigned long long value, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_add(dst, val, pe) \ - _Generic(&*(dst), \ - int*: shmem_int_atomic_add, \ - long*: shmem_long_atomic_add, \ - long long*: shmem_longlong_atomic_add)(dst, val, pe) +#define shmem_atomic_add(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_add, \ + long*: shmem_ctx_long_atomic_add, \ + long long*: shmem_ctx_longlong_atomic_add, \ + unsigned int*: shmem_ctx_uint_atomic_add, \ + unsigned long*: shmem_ctx_ulong_atomic_add, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_add, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_add, \ + long*: shmem_long_atomic_add, \ + long long*: shmem_longlong_atomic_add, \ + unsigned int*: shmem_uint_atomic_add, \ + unsigned long*: shmem_ulong_atomic_add, \ + unsigned long long*: shmem_ulonglong_atomic_add)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void shmem_int_add(int *target, int value, int pe); @@ -975,95 +1457,162 @@ OSHMEM_DECLSPEC void shmem_longlong_add(long long *target, long long value, int #endif /* Atomic And */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_and(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_and(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_and(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_and(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_and(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_and(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_and(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_ctx_uint_atomic_and, \ - unsigned long*: shmem_ctx_ulong_atomic_and, \ - unsigned long long*: shmem_ctx_ulonglong_atomic_and)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_and(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_and(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_and(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_and(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void shmem_int_atomic_and(int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_and(long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_and(long long *target, long long value, int pe); OSHMEM_DECLSPEC void shmem_uint_atomic_and(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ulong_atomic_and(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ulonglong_atomic_and(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_and(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_and(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_and(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_and(uint64_t *target, uint64_t value, int pe); + #if OSHMEM_HAVE_C11 -#define shmem_atomic_and(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_uint_atomic_and, \ - unsigned long*: shmem_ulong_atomic_and, \ - unsigned long long*: shmem_ulonglong_atomic_and)(dst, val, pe) +#define shmem_atomic_and(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_and, \ + long*: shmem_ctx_long_atomic_and, \ + long long*: shmem_ctx_longlong_atomic_and, \ + unsigned int*: shmem_ctx_uint_atomic_and, \ + unsigned long*: shmem_ctx_ulong_atomic_and, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_and, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_and, \ + long*: shmem_long_atomic_and, \ + long long*: shmem_longlong_atomic_and, \ + unsigned int*: shmem_uint_atomic_and, \ + unsigned long*: shmem_ulong_atomic_and, \ + unsigned long long*: shmem_ulonglong_atomic_and)(__VA_ARGS__) #endif /* Atomic Or */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_or(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_or(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_or(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_or(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_or(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_or(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_or(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_ctx_uint_atomic_or, \ - unsigned long*: shmem_ctx_ulong_atomic_or, \ - unsigned long long*: shmem_ctx_ulonglong_atomic_or)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_or(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_or(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_or(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_or(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void shmem_int_atomic_or(int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_or(long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_or(long long *target, long long value, int pe); OSHMEM_DECLSPEC void shmem_uint_atomic_or(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ulong_atomic_or(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ulonglong_atomic_or(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_or(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_or(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_or(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_or(uint64_t *target, uint64_t value, int pe); + #if OSHMEM_HAVE_C11 -#define shmem_atomic_or(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_uint_atomic_or, \ - unsigned long*: shmem_ulong_atomic_or, \ - unsigned long long*: shmem_ulonglong_atomic_or)(dst, val, pe) +#define shmem_atomic_or(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_or, \ + long*: shmem_ctx_long_atomic_or, \ + long long*: shmem_ctx_longlong_atomic_or, \ + unsigned int*: shmem_ctx_uint_atomic_or, \ + unsigned long*: shmem_ctx_ulong_atomic_or, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_or, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_or, \ + long*: shmem_long_atomic_or, \ + long long*: shmem_longlong_atomic_or, \ + unsigned int*: shmem_uint_atomic_or, \ + unsigned long*: shmem_ulong_atomic_or, \ + unsigned long long*: shmem_ulonglong_atomic_or)(__VA_ARGS__) #endif /* Atomic Xor */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_xor(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_xor(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_xor(shmem_ctx_t ctx, long long *target, long long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_xor(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_xor(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_xor(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_xor(ctx, dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_ctx_uint_atomic_xor, \ - unsigned long*: shmem_ctx_ulong_atomic_xor, \ - unsigned long long*: shmem_ctx_ulonglong_atomic_xor)(ctx, dst, val, pe) -#endif - +OSHMEM_DECLSPEC void shmem_ctx_int32_atomic_xor(shmem_ctx_t ctx, int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int64_atomic_xor(shmem_ctx_t ctx, int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint32_atomic_xor(shmem_ctx_t ctx, uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_uint64_atomic_xor(shmem_ctx_t ctx, uint64_t *target, uint64_t value, int pe); + +OSHMEM_DECLSPEC void shmem_int_atomic_xor(int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_xor(long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_xor(long long *target, long long value, int pe); OSHMEM_DECLSPEC void shmem_uint_atomic_xor(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ulong_atomic_xor(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ulonglong_atomic_xor(unsigned long long *target, unsigned long long value, int pe); +OSHMEM_DECLSPEC void shmem_int32_atomic_xor(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmem_int64_atomic_xor(int64_t *target, int64_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint32_atomic_xor(uint32_t *target, uint32_t value, int pe); +OSHMEM_DECLSPEC void shmem_uint64_atomic_xor(uint64_t *target, uint64_t value, int pe); + #if OSHMEM_HAVE_C11 -#define shmem_atomic_xor(dst, val, pe) \ - _Generic(&*(dst), \ - unsigned int*: shmem_uint_atomic_xor, \ - unsigned long*: shmem_ulong_atomic_xor, \ - unsigned long long*: shmem_ulonglong_atomic_xor)(dst, val, pe) +#define shmem_atomic_xor(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)), \ + int*: shmem_ctx_int_atomic_xor, \ + long*: shmem_ctx_long_atomic_xor, \ + long long*: shmem_ctx_longlong_atomic_xor, \ + unsigned int*: shmem_ctx_uint_atomic_xor, \ + unsigned long*: shmem_ctx_ulong_atomic_xor, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_xor, \ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_xor, \ + long*: shmem_long_atomic_xor, \ + long long*: shmem_longlong_atomic_xor, \ + unsigned int*: shmem_uint_atomic_xor, \ + unsigned long*: shmem_ulong_atomic_xor, \ + unsigned long long*: shmem_ulonglong_atomic_xor)(__VA_ARGS__) #endif /* Atomic Inc */ OSHMEM_DECLSPEC void shmem_ctx_int_atomic_inc(shmem_ctx_t ctx, int *target, int pe); OSHMEM_DECLSPEC void shmem_ctx_long_atomic_inc(shmem_ctx_t ctx, long *target, int pe); OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_inc(shmem_ctx_t ctx, long long *target, int pe); -#if OSHMEM_HAVE_C11 -#define shmem_atomic_inc(ctx, dst, pe) \ - _Generic(&*(dst), \ - int*: shmem_ctx_int_atomic_inc, \ - long*: shmem_ctx_long_atomic_inc, \ - long long*: shmem_ctx_longlong_atomic_inc)(ctx, dst, pe) -#endif +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_inc(shmem_ctx_t ctx, unsigned int *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_inc(shmem_ctx_t ctx, unsigned long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_inc(shmem_ctx_t ctx, unsigned long long *target, int pe); OSHMEM_DECLSPEC void shmem_int_atomic_inc(int *target, int pe); OSHMEM_DECLSPEC void shmem_long_atomic_inc(long *target, int pe); OSHMEM_DECLSPEC void shmem_longlong_atomic_inc(long long *target, int pe); +OSHMEM_DECLSPEC void shmem_uint_atomic_inc(unsigned int *target, int pe); +OSHMEM_DECLSPEC void shmem_ulong_atomic_inc(unsigned long *target, int pe); +OSHMEM_DECLSPEC void shmem_ulonglong_atomic_inc(unsigned long long *target, int pe); #if OSHMEM_HAVE_C11 -#define shmem_atomic_inc(dst, pe) \ - _Generic(&*(dst), \ - int*: shmem_int_atomic_inc, \ - long*: shmem_long_atomic_inc, \ - long long*: shmem_longlong_atomic_inc)(dst, pe) +#define shmem_atomic_inc(...) \ + _Generic(&*(__OSHMEM_VAR_ARG1(__VA_ARGS__)), \ + shmem_ctx_t: _Generic((__OSHMEM_VAR_ARG2(__VA_ARGS__)),\ + int*: shmem_ctx_int_atomic_inc, \ + long*: shmem_ctx_long_atomic_inc, \ + long long*: shmem_ctx_longlong_atomic_inc, \ + unsigned int*: shmem_ctx_uint_atomic_inc, \ + unsigned long*: shmem_ctx_ulong_atomic_inc, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_inc,\ + default: __oshmem_datatype_ignore), \ + int*: shmem_int_atomic_inc, \ + long*: shmem_long_atomic_inc, \ + long long*: shmem_longlong_atomic_inc, \ + unsigned int*: shmem_uint_atomic_inc, \ + unsigned long*: shmem_ulong_atomic_inc, \ + unsigned long long*: shmem_ulonglong_atomic_inc)(__VA_ARGS__) #endif OSHMEM_DECLSPEC void shmem_int_inc(int *target, int pe); @@ -1097,26 +1646,54 @@ OSHMEM_DECLSPEC void shmem_short_wait_until(volatile short *addr, int cmp, shor OSHMEM_DECLSPEC void shmem_int_wait_until(volatile int *addr, int cmp, int value); OSHMEM_DECLSPEC void shmem_long_wait_until(volatile long *addr, int cmp, long value); OSHMEM_DECLSPEC void shmem_longlong_wait_until(volatile long long *addr, int cmp, long long value); +OSHMEM_DECLSPEC void shmem_ushort_wait_until(volatile unsigned short *addr, int cmp, unsigned short value); +OSHMEM_DECLSPEC void shmem_uint_wait_until(volatile unsigned int *addr, int cmp, unsigned int value); +OSHMEM_DECLSPEC void shmem_ulong_wait_until(volatile unsigned long *addr, int cmp, unsigned long value); +OSHMEM_DECLSPEC void shmem_ulonglong_wait_until(volatile unsigned long long *addr, int cmp, unsigned long long value); +OSHMEM_DECLSPEC void shmem_int32_wait_until(volatile int32_t *addr, int cmp, int32_t value); +OSHMEM_DECLSPEC void shmem_int64_wait_until(volatile int64_t *addr, int cmp, int64_t value); +OSHMEM_DECLSPEC void shmem_uint32_wait_until(volatile uint32_t *addr, int cmp, uint32_t value); +OSHMEM_DECLSPEC void shmem_uint64_wait_until(volatile uint64_t *addr, int cmp, uint64_t value); +OSHMEM_DECLSPEC void shmem_size_wait_until(volatile size_t *addr, int cmp, size_t value); +OSHMEM_DECLSPEC void shmem_ptrdiff_wait_until(volatile ptrdiff_t *addr, int cmp, ptrdiff_t value); #if OSHMEM_HAVE_C11 -#define shmem_wait_until(addr, cmp, value) \ - _Generic(&*(addr), \ - short*: shmem_short_wait_until, \ - int*: shmem_int_wait_until, \ - long*: shmem_long_wait_until, \ - long long*: shmem_longlong_wait_until(addr, cmp, value) +#define shmem_wait_until(addr, cmp, value) \ + _Generic(&*(addr), \ + short*: shmem_short_wait_until, \ + int*: shmem_int_wait_until, \ + long*: shmem_long_wait_until, \ + long long*: shmem_longlong_wait_until, \ + unsigned short*: shmem_ushort_wait_until, \ + unsigned int*: shmem_uint_wait_until, \ + unsigned long*: shmem_ulong_wait_until, \ + unsigned long long*: shmem_ulonglong_wait_until)(addr, cmp, value) #endif OSHMEM_DECLSPEC int shmem_short_test(volatile short *addr, int cmp, short value); OSHMEM_DECLSPEC int shmem_int_test(volatile int *addr, int cmp, int value); OSHMEM_DECLSPEC int shmem_long_test(volatile long *addr, int cmp, long value); OSHMEM_DECLSPEC int shmem_longlong_test(volatile long long *addr, int cmp, long long value); +OSHMEM_DECLSPEC int shmem_ushort_test(volatile unsigned short *addr, int cmp, unsigned short value); +OSHMEM_DECLSPEC int shmem_uint_test(volatile unsigned int *addr, int cmp, unsigned int value); +OSHMEM_DECLSPEC int shmem_ulong_test(volatile unsigned long *addr, int cmp, unsigned long value); +OSHMEM_DECLSPEC int shmem_ulonglong_test(volatile unsigned long long *addr, int cmp, unsigned long long value); +OSHMEM_DECLSPEC int shmem_int32_test(volatile int32_t *addr, int cmp, int32_t value); +OSHMEM_DECLSPEC int shmem_int64_test(volatile int64_t *addr, int cmp, int64_t value); +OSHMEM_DECLSPEC int shmem_uint32_test(volatile uint32_t *addr, int cmp, uint32_t value); +OSHMEM_DECLSPEC int shmem_uint64_test(volatile uint64_t *addr, int cmp, uint64_t value); +OSHMEM_DECLSPEC int shmem_size_test(volatile size_t *addr, int cmp, size_t value); +OSHMEM_DECLSPEC int shmem_ptrdiff_test(volatile ptrdiff_t *addr, int cmp, ptrdiff_t value); #if OSHMEM_HAVE_C11 -#define shmem_test(addr, cmp, value) \ - _Generic(&*(addr), \ - short*: shmem_short_test, \ - int*: shmem_int_test, \ - long*: shmem_long_test, \ - long long*: shmem_longlong_test(addr, cmp, value) +#define shmem_test(addr, cmp, value) \ + _Generic(&*(addr), \ + short*: shmem_short_test, \ + int*: shmem_int_test, \ + long*: shmem_long_test, \ + long long*: shmem_longlong_test, \ + unsigned short*: shmem_ushort_test, \ + unsigned int*: shmem_uint_test, \ + unsigned long*: shmem_ulong_test, \ + unsigned long long*: shmem_ulonglong_test)(addr, cmp, value) #endif /* diff --git a/oshmem/include/shmemx.h b/oshmem/include/shmemx.h index da67a0cb51f..f7e7de68295 100644 --- a/oshmem/include/shmemx.h +++ b/oshmem/include/shmemx.h @@ -18,11 +18,29 @@ extern "C" { #endif +enum { + SHMEM_HINT_NONE = 0, + SHMEM_HINT_LOW_LAT_MEM = 1 << 0, + SHMEM_HINT_HIGH_BW_MEM = 1 << 1, + SHMEM_HINT_NEAR_NIC_MEM = 1 << 2, + SHMEM_HINT_DEVICE_GPU_MEM = 1 << 3, + SHMEM_HINT_DEVICE_NIC_MEM = 1 << 4, + + SHMEM_HINT_PSYNC = 1 << 16, + SHMEM_HINT_PWORK = 1 << 17, + SHMEM_HINT_ATOMICS = 1 << 18 +}; + /* * All OpenSHMEM extension APIs that are not part of this specification must be defined in the shmemx.h include * file. These extensions shall use the shmemx_ prefix for all routine, variable, and constant names. */ +/* + * Symmetric heap routines + */ +OSHMEM_DECLSPEC void* shmemx_malloc_with_hint(size_t size, long hint); + /* * Elemental put routines */ @@ -168,17 +186,24 @@ OSHMEM_DECLSPEC void shmemx_int16_prod_to_all(int16_t *target, const int16_t *so OSHMEM_DECLSPEC void shmemx_int32_prod_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync); OSHMEM_DECLSPEC void shmemx_int64_prod_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync); +/* shmemx_alltoall_global_nb is a nonblocking collective routine, where each PE + * exchanges “size” bytes of data with all other PEs in the OpenSHMEM job. + + * @param dest A symmetric data object that is large enough to receive + * “size” bytes of data from each PE in the OpenSHMEM job. + * @param source A symmetric data object that contains “size” bytes of data + * for each PE in the OpenSHMEM job. + * @param size The number of bytes to be sent to each PE in the job. + * @param counter A symmetric data object to be atomically incremented after + * the target buffer is updated. + * + * @return OSHMEM_SUCCESS or failure status. + */ +OSHMEM_DECLSPEC void shmemx_alltoall_global_nb(void *dest, const void *source, size_t size, long *counter); + /* * Backward compatibility section */ -#define shmem_int16_p shmemx_int16_p -#define shmem_int32_p shmemx_int32_p -#define shmem_int64_p shmemx_int64_p - -#define shmem_int16_g shmemx_int16_g -#define shmem_int32_g shmemx_int32_g -#define shmem_int64_g shmemx_int64_g - #define shmem_int32_swap shmemx_int32_swap #define shmem_int64_swap shmemx_int64_swap @@ -204,8 +229,6 @@ OSHMEM_DECLSPEC void shmemx_int64_prod_to_all(int64_t *target, const int64_t *so #define shmem_int32_wait shmemx_int32_wait #define shmem_int64_wait shmemx_int64_wait -#define shmem_int32_wait_until shmemx_int32_wait_until -#define shmem_int64_wait_until shmemx_int64_wait_until #define shmem_int16_and_to_all shmemx_int16_and_to_all #define shmem_int32_and_to_all shmemx_int32_and_to_all diff --git a/oshmem/mca/atomic/atomic.h b/oshmem/mca/atomic/atomic.h index 68e44b58409..912f2a71955 100644 --- a/oshmem/mca/atomic/atomic.h +++ b/oshmem/mca/atomic/atomic.h @@ -135,7 +135,7 @@ struct mca_atomic_base_component_1_0_0_t { mca_base_component_data_t atomic_data; /** Component initialization function */ - mca_atomic_base_component_init_fn_t atomic_init; + mca_atomic_base_component_init_fn_t atomic_startup; mca_atomic_base_component_finalize_fn_t atomic_finalize; mca_atomic_base_component_query_fn_t atomic_query; diff --git a/oshmem/mca/atomic/base/atomic_base_available.c b/oshmem/mca/atomic/base/atomic_base_available.c index a3fe99a2aa2..927e1fe7798 100644 --- a/oshmem/mca/atomic/base/atomic_base_available.c +++ b/oshmem/mca/atomic/base/atomic_base_available.c @@ -104,7 +104,7 @@ static int init_query(const mca_base_component_t * component, mca_atomic_base_component_t *atomic = (mca_atomic_base_component_t *) component; - ret = atomic->atomic_init(enable_progress_threads, enable_threads); + ret = atomic->atomic_startup(enable_progress_threads, enable_threads); } else { /* Unrecognized coll API version */ diff --git a/oshmem/mca/atomic/basic/atomic_basic.h b/oshmem/mca/atomic/basic/atomic_basic.h index ee1bd0df01d..b581f855052 100644 --- a/oshmem/mca/atomic/basic/atomic_basic.h +++ b/oshmem/mca/atomic/basic/atomic_basic.h @@ -31,7 +31,7 @@ OSHMEM_DECLSPEC void atomic_basic_unlock(shmem_ctx_t ctx, int pe); /* API functions */ -int mca_atomic_basic_init(bool enable_progress_threads, bool enable_threads); +int mca_atomic_basic_startup(bool enable_progress_threads, bool enable_threads); int mca_atomic_basic_finalize(void); mca_atomic_base_module_t* mca_atomic_basic_query(int *priority); diff --git a/oshmem/mca/atomic/basic/atomic_basic_component.c b/oshmem/mca/atomic/basic/atomic_basic_component.c index 0d760cc7d3b..fc688164c2d 100644 --- a/oshmem/mca/atomic/basic/atomic_basic_component.c +++ b/oshmem/mca/atomic/basic/atomic_basic_component.c @@ -62,7 +62,7 @@ mca_atomic_base_component_t mca_atomic_basic_component = { /* Initialization / querying functions */ - .atomic_init = mca_atomic_basic_init, + .atomic_startup = mca_atomic_basic_startup, .atomic_finalize = mca_atomic_basic_finalize, .atomic_query = mca_atomic_basic_query, }; diff --git a/oshmem/mca/atomic/basic/atomic_basic_module.c b/oshmem/mca/atomic/basic/atomic_basic_module.c index a6d35d46983..1b8a1b102b9 100644 --- a/oshmem/mca/atomic/basic/atomic_basic_module.c +++ b/oshmem/mca/atomic/basic/atomic_basic_module.c @@ -34,7 +34,7 @@ enum { * Initial query function that is invoked during initialization, allowing * this module to indicate what level of thread support it provides. */ -int mca_atomic_basic_init(bool enable_progress_threads, bool enable_threads) +int mca_atomic_basic_startup(bool enable_progress_threads, bool enable_threads) { int rc = OSHMEM_SUCCESS; void* ptr = NULL; diff --git a/oshmem/mca/atomic/mxm/atomic_mxm.h b/oshmem/mca/atomic/mxm/atomic_mxm.h index 39efdfc4668..ba491d2b29a 100644 --- a/oshmem/mca/atomic/mxm/atomic_mxm.h +++ b/oshmem/mca/atomic/mxm/atomic_mxm.h @@ -37,7 +37,7 @@ OSHMEM_DECLSPEC void atomic_mxm_unlock(int pe); /* API functions */ -int mca_atomic_mxm_init(bool enable_progress_threads, bool enable_threads); +int mca_atomic_mxm_startup(bool enable_progress_threads, bool enable_threads); int mca_atomic_mxm_finalize(void); mca_atomic_base_module_t* mca_atomic_mxm_query(int *priority); diff --git a/oshmem/mca/atomic/mxm/atomic_mxm_component.c b/oshmem/mca/atomic/mxm/atomic_mxm_component.c index 005f2701f24..201087d7673 100644 --- a/oshmem/mca/atomic/mxm/atomic_mxm_component.c +++ b/oshmem/mca/atomic/mxm/atomic_mxm_component.c @@ -66,7 +66,7 @@ mca_atomic_base_component_t mca_atomic_mxm_component = { /* Initialization / querying functions */ - .atomic_init = mca_atomic_mxm_init, + .atomic_startup = mca_atomic_mxm_startup, .atomic_finalize = mca_atomic_mxm_finalize, .atomic_query = mca_atomic_mxm_query, }; diff --git a/oshmem/mca/atomic/mxm/atomic_mxm_module.c b/oshmem/mca/atomic/mxm/atomic_mxm_module.c index c081651707b..b4bee48fa1e 100644 --- a/oshmem/mca/atomic/mxm/atomic_mxm_module.c +++ b/oshmem/mca/atomic/mxm/atomic_mxm_module.c @@ -22,7 +22,7 @@ * Initial query function that is invoked during initialization, allowing * this module to indicate what level of thread support it provides. */ -int mca_atomic_mxm_init(bool enable_progress_threads, bool enable_threads) +int mca_atomic_mxm_startup(bool enable_progress_threads, bool enable_threads) { return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/atomic/ucx/atomic_ucx.h b/oshmem/mca/atomic/ucx/atomic_ucx.h index a6797130194..dd588bdc0b5 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx.h +++ b/oshmem/mca/atomic/ucx/atomic_ucx.h @@ -37,7 +37,7 @@ OSHMEM_DECLSPEC void atomic_ucx_unlock(int pe); /* API functions */ -int mca_atomic_ucx_init(bool enable_progress_threads, bool enable_threads); +int mca_atomic_ucx_startup(bool enable_progress_threads, bool enable_threads); int mca_atomic_ucx_finalize(void); mca_atomic_base_module_t* mca_atomic_ucx_query(int *priority); diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_component.c b/oshmem/mca/atomic/ucx/atomic_ucx_component.c index 437941ef9c9..27088440723 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_component.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_component.c @@ -70,7 +70,7 @@ mca_atomic_base_component_t mca_atomic_ucx_component = { /* Initialization / querying functions */ - mca_atomic_ucx_init, + mca_atomic_ucx_startup, mca_atomic_ucx_finalize, mca_atomic_ucx_query }; @@ -86,6 +86,8 @@ static int ucx_register(void) MCA_BASE_VAR_SCOPE_ALL_EQ, &mca_atomic_ucx_component.priority); + opal_common_ucx_mca_var_register(&mca_atomic_ucx_component.atomic_version); + return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c b/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c index 25fe9926882..8c5fa1d1a64 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c @@ -40,11 +40,16 @@ int mca_atomic_ucx_cswap(shmem_ctx_t ctx, assert(NULL != prev); *prev = value; - ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self); + ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self); status_ptr = ucp_atomic_fetch_nb(ucx_ctx->ucp_peers[pe].ucp_conn, UCP_ATOMIC_FETCH_OP_CSWAP, cond, prev, size, rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb); + + if (OPAL_LIKELY(!UCS_PTR_IS_ERR(status_ptr))) { + mca_spml_ucx_remote_op_posted(ucx_ctx, pe); + } + return opal_common_ucx_wait_request(status_ptr, ucx_ctx->ucp_worker, "ucp_atomic_fetch_nb"); } diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_module.c b/oshmem/mca/atomic/ucx/atomic_ucx_module.c index 53a00773cb6..882b83f6520 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_module.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_module.c @@ -22,7 +22,7 @@ * Initial query function that is invoked during initialization, allowing * this module to indicate what level of thread support it provides. */ -int mca_atomic_ucx_init(bool enable_progress_threads, bool enable_threads) +int mca_atomic_ucx_startup(bool enable_progress_threads, bool enable_threads) { return OSHMEM_SUCCESS; } @@ -47,10 +47,15 @@ int mca_atomic_ucx_op(shmem_ctx_t ctx, assert((8 == size) || (4 == size)); - ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self); + ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self); status = ucp_atomic_post(ucx_ctx->ucp_peers[pe].ucp_conn, op, value, size, rva, ucx_mkey->rkey); + + if (OPAL_LIKELY(UCS_OK == status)) { + mca_spml_ucx_remote_op_posted(ucx_ctx, pe); + } + return ucx_status_to_oshmem(status); } @@ -70,7 +75,7 @@ int mca_atomic_ucx_fop(shmem_ctx_t ctx, assert((8 == size) || (4 == size)); - ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self); + ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self); status_ptr = ucp_atomic_fetch_nb(ucx_ctx->ucp_peers[pe].ucp_conn, op, value, prev, size, rva, ucx_mkey->rkey, diff --git a/oshmem/mca/memheap/base/base.h b/oshmem/mca/memheap/base/base.h index 6b4a79fb9ed..ec84d4c734f 100644 --- a/oshmem/mca/memheap/base/base.h +++ b/oshmem/mca/memheap/base/base.h @@ -36,19 +36,23 @@ OSHMEM_DECLSPEC int mca_memheap_base_select(void); #define MEMHEAP_BASE_MIN_ORDER 3 /* forces 64 bit alignment */ #define MEMHEAP_BASE_PAGE_ORDER 21 #define MEMHEAP_BASE_PRIVATE_SIZE (1ULL << MEMHEAP_BASE_PAGE_ORDER) /* should be at least the same as a huge page size */ -#define MEMHEAP_BASE_MIN_SIZE (1ULL << MEMHEAP_BASE_PAGE_ORDER) /* must fit into at least one huge page */ +#define MEMHEAP_BASE_MIN_SIZE (1ULL << MEMHEAP_BASE_PAGE_ORDER) /* must fit into at least one huge page */ extern int mca_memheap_base_already_opened; extern int mca_memheap_base_key_exchange; -#define MCA_MEMHEAP_MAX_SEGMENTS 4 -#define HEAP_SEG_INDEX 0 -#define SYMB_SEG_INDEX 1 -#define MCA_MEMHEAP_SEG_COUNT (SYMB_SEG_INDEX+1) +#define MCA_MEMHEAP_MAX_SEGMENTS 8 +#define HEAP_SEG_INDEX 0 +#define MCA_MEMHEAP_SEG_COUNT 2 #define MEMHEAP_SEG_INVALID 0xFFFF +typedef struct mca_memheap_base_config { + long device_nic_mem_seg_size; /* Used for SHMEM_HINT_DEVICE_NIC_MEM */ +} mca_memheap_base_config_t; + + typedef struct mca_memheap_map { map_segment_t mem_segs[MCA_MEMHEAP_MAX_SEGMENTS]; /* TODO: change into pointer array */ int n_segments; @@ -56,8 +60,9 @@ typedef struct mca_memheap_map { } mca_memheap_map_t; extern mca_memheap_map_t mca_memheap_base_map; +extern mca_memheap_base_config_t mca_memheap_base_config; -int mca_memheap_base_alloc_init(mca_memheap_map_t *, size_t); +int mca_memheap_base_alloc_init(mca_memheap_map_t *, size_t, long); void mca_memheap_base_alloc_exit(mca_memheap_map_t *); int mca_memheap_base_static_init(mca_memheap_map_t *); void mca_memheap_base_static_exit(mca_memheap_map_t *); @@ -69,7 +74,8 @@ void memheap_oob_destruct(void); OSHMEM_DECLSPEC int mca_memheap_base_is_symmetric_addr(const void* va); OSHMEM_DECLSPEC sshmem_mkey_t *mca_memheap_base_get_mkey(void* va, int tr_id); -OSHMEM_DECLSPEC sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s, +OSHMEM_DECLSPEC sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(shmem_ctx_t ctx, + map_segment_t *s, int pe, void* va, int btl_id, @@ -172,10 +178,12 @@ static inline int memheap_is_va_in_segment(void *va, int segno) static inline int memheap_find_segnum(void *va) { - if (OPAL_LIKELY(memheap_is_va_in_segment(va, SYMB_SEG_INDEX))) { - return SYMB_SEG_INDEX; - } else if (memheap_is_va_in_segment(va, HEAP_SEG_INDEX)) { - return HEAP_SEG_INDEX; + int i; + + for (i = 0; i < mca_memheap_base_map.n_segments; i++) { + if (memheap_is_va_in_segment(va, i)) { + return i; + } } return MEMHEAP_SEG_INVALID; } @@ -192,18 +200,17 @@ static inline void *map_segment_va2rva(mkey_segment_t *seg, void *va) return memheap_va2rva(va, seg->super.va_base, seg->rva_base); } -static inline map_base_segment_t *map_segment_find_va(map_base_segment_t *segs, size_t elem_size, void *va) +static inline map_base_segment_t *map_segment_find_va(map_base_segment_t *segs, + size_t elem_size, void *va) { map_base_segment_t *rseg; + int i; - rseg = (map_base_segment_t *)((char *)segs + elem_size * HEAP_SEG_INDEX); - if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) { - return rseg; - } - - rseg = (map_base_segment_t *)((char *)segs + elem_size * SYMB_SEG_INDEX); - if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) { - return rseg; + for (i = 0; i < MCA_MEMHEAP_MAX_SEGMENTS; i++) { + rseg = (map_base_segment_t *)((char *)segs + elem_size * i); + if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) { + return rseg; + } } return NULL; @@ -213,21 +220,14 @@ void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno) static inline map_segment_t *memheap_find_va(void* va) { - map_segment_t *s; - - /* most probably there will be only two segments: heap and global data */ - if (OPAL_LIKELY(memheap_is_va_in_segment(va, SYMB_SEG_INDEX))) { - s = &memheap_map->mem_segs[SYMB_SEG_INDEX]; - } else if (memheap_is_va_in_segment(va, HEAP_SEG_INDEX)) { - s = &memheap_map->mem_segs[HEAP_SEG_INDEX]; - } else if (memheap_map->n_segments - 2 > 0) { - s = bsearch(va, - &memheap_map->mem_segs[SYMB_SEG_INDEX+1], - memheap_map->n_segments - 2, - sizeof(*s), - mca_memheap_seg_cmp); - } else { - s = NULL; + map_segment_t *s = NULL; + int i; + + for (i = 0; i < memheap_map->n_segments; i++) { + if (memheap_is_va_in_segment(va, i)) { + s = &memheap_map->mem_segs[i]; + break; + } } #if MEMHEAP_BASE_DEBUG == 1 @@ -243,7 +243,8 @@ static inline map_segment_t *memheap_find_va(void* va) return s; } -static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe, +static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(shmem_ctx_t ctx, + int pe, void* va, int btl_id, void** rva) @@ -273,7 +274,7 @@ static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe, return mkey; } - return mca_memheap_base_get_cached_mkey_slow(s, pe, va, btl_id, rva); + return mca_memheap_base_get_cached_mkey_slow(ctx, s, pe, va, btl_id, rva); } static inline int mca_memheap_base_num_transports(void) diff --git a/oshmem/mca/memheap/base/memheap_base_alloc.c b/oshmem/mca/memheap/base/memheap_base_alloc.c index 341eec97a96..b83499f250c 100644 --- a/oshmem/mca/memheap/base/memheap_base_alloc.c +++ b/oshmem/mca/memheap/base/memheap_base_alloc.c @@ -19,17 +19,21 @@ #include "oshmem/mca/memheap/base/base.h" -int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size) +int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size, long hint) { int ret = OSHMEM_SUCCESS; char * seg_filename = NULL; assert(map); - assert(HEAP_SEG_INDEX == map->n_segments); + if (hint == 0) { + assert(HEAP_SEG_INDEX == map->n_segments); + } else { + assert(HEAP_SEG_INDEX < map->n_segments); + } map_segment_t *s = &map->mem_segs[map->n_segments]; seg_filename = oshmem_get_unique_file_name(oshmem_my_proc_id()); - ret = mca_sshmem_segment_create(s, seg_filename, size); + ret = mca_sshmem_segment_create(s, seg_filename, size, hint); if (OSHMEM_SUCCESS == ret) { map->n_segments++; @@ -45,12 +49,34 @@ int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size) void mca_memheap_base_alloc_exit(mca_memheap_map_t *map) { - if (map) { - map_segment_t *s = &map->mem_segs[HEAP_SEG_INDEX]; + int i; + + if (!map) { + return; + } + + for (i = 0; i < map->n_segments; ++i) { + map_segment_t *s = &map->mem_segs[i]; + if (s->type != MAP_SEGMENT_STATIC) { + mca_sshmem_segment_detach(s, NULL); + mca_sshmem_unlink(s); + } + } +} - assert(s); +int mca_memheap_alloc_with_hint(size_t size, long hint, void** ptr) +{ + int i; - mca_sshmem_segment_detach(s, NULL); - mca_sshmem_unlink(s); + for (i = 0; i < mca_memheap_base_map.n_segments; i++) { + map_segment_t *s = &mca_memheap_base_map.mem_segs[i]; + if (s->allocator && (hint && s->alloc_hints)) { + /* Do not fall back to default allocator since it will break the + * symmetry between PEs + */ + return s->allocator->realloc(s, size, NULL, ptr); + } } + + return MCA_MEMHEAP_CALL(alloc(size, ptr)); } diff --git a/oshmem/mca/memheap/base/memheap_base_frame.c b/oshmem/mca/memheap/base/memheap_base_frame.c index 6f4d3c75b28..23ebf0860db 100644 --- a/oshmem/mca/memheap/base/memheap_base_frame.c +++ b/oshmem/mca/memheap/base/memheap_base_frame.c @@ -52,6 +52,12 @@ static int mca_memheap_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &mca_memheap_base_key_exchange); + mca_base_var_register("oshmem", "memheap", "base", "device_nic_mem_seg_size", + "Size of memory block used for allocations with hint SHMEM_HINT_DEVICE_NIC_MEM", + MCA_BASE_VAR_TYPE_LONG, NULL, 0, + MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &mca_memheap_base_config.device_nic_mem_seg_size); return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/memheap/base/memheap_base_mkey.c b/oshmem/mca/memheap/base/memheap_base_mkey.c index a2e21f1a5be..a4c24744aab 100644 --- a/oshmem/mca/memheap/base/memheap_base_mkey.c +++ b/oshmem/mca/memheap/base/memheap_base_mkey.c @@ -55,6 +55,7 @@ struct oob_comm { oob_comm_request_t req_pool[MEMHEAP_RECV_REQS_MAX]; opal_list_t req_list; int is_inited; + shmem_ctx_t ctx; }; mca_memheap_map_t* memheap_map = NULL; @@ -66,7 +67,7 @@ static int send_buffer(int pe, opal_buffer_t *msg); static int oshmem_mkey_recv_cb(void); /* pickup list of rkeys and remote va */ -static int memheap_oob_get_mkeys(int pe, +static int memheap_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t va_seg_num, sshmem_mkey_t *mkey); @@ -142,7 +143,7 @@ static void memheap_attach_segment(sshmem_mkey_t *mkey, int tr_id) } -static void unpack_remote_mkeys(opal_buffer_t *msg, int remote_pe) +static void unpack_remote_mkeys(shmem_ctx_t ctx, opal_buffer_t *msg, int remote_pe) { int32_t cnt; int32_t n; @@ -182,7 +183,7 @@ static void unpack_remote_mkeys(opal_buffer_t *msg, int remote_pe) } else { memheap_oob.mkeys[tr_id].u.key = MAP_SEGMENT_SHM_INVALID; } - MCA_SPML_CALL(rmkey_unpack(&memheap_oob.mkeys[tr_id], memheap_oob.segno, remote_pe, tr_id)); + MCA_SPML_CALL(rmkey_unpack(ctx, &memheap_oob.mkeys[tr_id], memheap_oob.segno, remote_pe, tr_id)); } MEMHEAP_VERBOSE(5, @@ -242,7 +243,7 @@ static void do_recv(int source_pe, opal_buffer_t* buffer) case MEMHEAP_RKEY_RESP: MEMHEAP_VERBOSE(5, "*** RKEY RESP"); OPAL_THREAD_LOCK(&memheap_oob.lck); - unpack_remote_mkeys(buffer, source_pe); + unpack_remote_mkeys(memheap_oob.ctx, buffer, source_pe); memheap_oob.mkeys_rcvd = MEMHEAP_RKEY_RESP; opal_condition_broadcast(&memheap_oob.cond); OPAL_THREAD_UNLOCK(&memheap_oob.lck); @@ -455,14 +456,14 @@ static int send_buffer(int pe, opal_buffer_t *msg) return rc; } -static int memheap_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys) +static int memheap_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t seg, sshmem_mkey_t *mkeys) { opal_buffer_t *msg; uint8_t cmd; int i; int rc; - if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(pe, seg, mkeys))) { + if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(ctx, pe, seg, mkeys))) { for (i = 0; i < memheap_map->num_transports; i++) { MEMHEAP_VERBOSE(5, "MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d %s", @@ -478,6 +479,7 @@ static int memheap_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys) memheap_oob.mkeys = mkeys; memheap_oob.segno = seg; memheap_oob.mkeys_rcvd = 0; + memheap_oob.ctx = ctx; msg = OBJ_NEW(opal_buffer_t); if (!msg) { @@ -645,7 +647,7 @@ void mca_memheap_modex_recv_all(void) } memheap_oob.mkeys = s->mkeys_cache[i]; memheap_oob.segno = j; - unpack_remote_mkeys(msg, i); + unpack_remote_mkeys(oshmem_ctx_default, msg, i); } } @@ -674,7 +676,8 @@ void mca_memheap_modex_recv_all(void) } } -sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s, +sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(shmem_ctx_t ctx, + map_segment_t *s, int pe, void* va, int btl_id, @@ -692,7 +695,7 @@ sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s, if (!s->mkeys_cache[pe]) return NULL ; - rc = memheap_oob_get_mkeys(pe, + rc = memheap_oob_get_mkeys(ctx, pe, s - memheap_map->mem_segs, s->mkeys_cache[pe]); if (OSHMEM_SUCCESS != rc) @@ -746,7 +749,7 @@ void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno) { map_segment_t *s; - if (segno >= MCA_MEMHEAP_SEG_COUNT) { + if (segno >= MCA_MEMHEAP_MAX_SEGMENTS) { return; } diff --git a/oshmem/mca/memheap/base/memheap_base_select.c b/oshmem/mca/memheap/base/memheap_base_select.c index 54676a7e326..9b856c76972 100644 --- a/oshmem/mca/memheap/base/memheap_base_select.c +++ b/oshmem/mca/memheap/base/memheap_base_select.c @@ -22,6 +22,13 @@ #include "oshmem/mca/memheap/memheap.h" #include "oshmem/mca/memheap/base/base.h" #include "orte/mca/errmgr/errmgr.h" +#include "oshmem/include/shmemx.h" +#include "oshmem/mca/sshmem/base/base.h" + + +mca_memheap_base_config_t mca_memheap_base_config = { + .device_nic_mem_seg_size = 0 +}; mca_memheap_base_module_t mca_memheap = {0}; @@ -95,7 +102,7 @@ static memheap_context_t* _memheap_create(void) { int rc = OSHMEM_SUCCESS; static memheap_context_t context; - size_t user_size; + size_t user_size, size; user_size = _memheap_size(); if (user_size < MEMHEAP_BASE_MIN_SIZE) { @@ -106,7 +113,18 @@ static memheap_context_t* _memheap_create(void) /* Inititialize symmetric area */ if (OSHMEM_SUCCESS == rc) { rc = mca_memheap_base_alloc_init(&mca_memheap_base_map, - user_size + MEMHEAP_BASE_PRIVATE_SIZE); + user_size + MEMHEAP_BASE_PRIVATE_SIZE, 0); + } + + /* Initialize atomic symmetric area */ + size = mca_memheap_base_config.device_nic_mem_seg_size; + if ((OSHMEM_SUCCESS == rc) && (size > 0)) { + rc = mca_memheap_base_alloc_init(&mca_memheap_base_map, size, + SHMEM_HINT_DEVICE_NIC_MEM); + if (rc == OSHMEM_ERR_NOT_IMPLEMENTED) { + /* do not treat NOT_IMPLEMENTED as error */ + rc = OSHMEM_SUCCESS; + } } /* Inititialize static/global variables area */ diff --git a/oshmem/mca/memheap/base/memheap_base_static.c b/oshmem/mca/memheap/base/memheap_base_static.c index edbb11aa310..4e97253a9ee 100644 --- a/oshmem/mca/memheap/base/memheap_base_static.c +++ b/oshmem/mca/memheap/base/memheap_base_static.c @@ -49,7 +49,7 @@ int mca_memheap_base_static_init(mca_memheap_map_t *map) int ret = OSHMEM_SUCCESS; assert(map); - assert(SYMB_SEG_INDEX <= map->n_segments); + assert(HEAP_SEG_INDEX < map->n_segments); ret = _load_segments(); diff --git a/oshmem/mca/memheap/memheap.h b/oshmem/mca/memheap/memheap.h index 7cad1e9e3f3..07c4e2f2f05 100644 --- a/oshmem/mca/memheap/memheap.h +++ b/oshmem/mca/memheap/memheap.h @@ -138,6 +138,8 @@ typedef struct mca_memheap_base_module_t mca_memheap_base_module_t; OSHMEM_DECLSPEC extern mca_memheap_base_module_t mca_memheap; +int mca_memheap_alloc_with_hint(size_t size, long hint, void**); + static inline int mca_memheap_base_mkey_is_shm(sshmem_mkey_t *mkey) { return (0 == mkey->len) && (MAP_SEGMENT_SHM_INVALID != (int)mkey->u.key); diff --git a/oshmem/mca/scoll/base/scoll_base_select.c b/oshmem/mca/scoll/base/scoll_base_select.c index fdaddfe1699..15d5a8d714b 100644 --- a/oshmem/mca/scoll/base/scoll_base_select.c +++ b/oshmem/mca/scoll/base/scoll_base_select.c @@ -77,6 +77,7 @@ static int scoll_null_broadcast(struct oshmem_group_t *group, const void *source, size_t nlong, long *pSync, + bool nlong_type, int alg) { if (oshmem_proc_group_is_member(group)) { diff --git a/oshmem/mca/scoll/basic/scoll_basic.h b/oshmem/mca/scoll/basic/scoll_basic.h index b45b8380f8d..066ff6cacdd 100644 --- a/oshmem/mca/scoll/basic/scoll_basic.h +++ b/oshmem/mca/scoll/basic/scoll_basic.h @@ -61,6 +61,7 @@ int mca_scoll_basic_broadcast(struct oshmem_group_t *group, const void *source, size_t nlong, long *pSync, + bool nlong_type, int alg); int mca_scoll_basic_collect(struct oshmem_group_t *group, void *target, diff --git a/oshmem/mca/scoll/basic/scoll_basic_alltoall.c b/oshmem/mca/scoll/basic/scoll_basic_alltoall.c index 9843d985e78..6a87e85578f 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_alltoall.c +++ b/oshmem/mca/scoll/basic/scoll_basic_alltoall.c @@ -61,6 +61,11 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group, return OSHMEM_ERR_BAD_PARAM; } + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nelems)) { + return OPAL_SUCCESS; + } + if ((sst == 1) && (dst == 1)) { rc = a2a_alg_simple(group, target, source, nelems, element_size); } else { @@ -79,7 +84,7 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group, /* Wait for operation completion */ SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe); - rc = BARRIER_FUNC(group, pSync + 1, SCOLL_DEFAULT_ALG); + rc = BARRIER_FUNC(group, pSync, SCOLL_DEFAULT_ALG); /* Restore initial values */ SCOLL_VERBOSE(12, "PE#%d Restore special synchronization array", diff --git a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c index ef9bf1869b5..44c8436a0ea 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c +++ b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c @@ -41,6 +41,7 @@ int mca_scoll_basic_broadcast(struct oshmem_group_t *group, const void *source, size_t nlong, long *pSync, + bool nlong_type, int alg) { int rc = OSHMEM_SUCCESS; @@ -55,6 +56,11 @@ int mca_scoll_basic_broadcast(struct oshmem_group_t *group, if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) { int i = 0; + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(nlong_type && !nlong)) { + return OSHMEM_SUCCESS; + } + if (pSync) { alg = (alg == SCOLL_DEFAULT_ALG ? mca_scoll_basic_param_broadcast_algorithm : alg); diff --git a/oshmem/mca/scoll/basic/scoll_basic_collect.c b/oshmem/mca/scoll/basic/scoll_basic_collect.c index eda5f93406c..e5fb03f5350 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_collect.c +++ b/oshmem/mca/scoll/basic/scoll_basic_collect.c @@ -67,6 +67,12 @@ int mca_scoll_basic_collect(struct oshmem_group_t *group, int i = 0; if (nlong_type) { + + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OPAL_SUCCESS; + } + alg = (alg == SCOLL_DEFAULT_ALG ? mca_scoll_basic_param_collect_algorithm : alg); switch (alg) { @@ -193,6 +199,7 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group, target, group->proc_count * nlong, (pSync + 1), + true, SCOLL_DEFAULT_ALG); } @@ -303,6 +310,7 @@ static int _algorithm_f_tournament(struct oshmem_group_t *group, target, group->proc_count * nlong, (pSync + 1), + true, SCOLL_DEFAULT_ALG); } @@ -624,6 +632,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group, target, offset, (pSync + 1), + false, SCOLL_DEFAULT_ALG); } diff --git a/oshmem/mca/scoll/basic/scoll_basic_reduce.c b/oshmem/mca/scoll/basic/scoll_basic_reduce.c index 9d6db9eed47..b8ecb9e7daf 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_reduce.c +++ b/oshmem/mca/scoll/basic/scoll_basic_reduce.c @@ -78,6 +78,11 @@ int mca_scoll_basic_reduce(struct oshmem_group_t *group, if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) { int i = 0; + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OSHMEM_SUCCESS; + } + if (pSync) { alg = (alg == SCOLL_DEFAULT_ALG ? mca_scoll_basic_param_reduce_algorithm : alg); @@ -237,6 +242,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, target, nlong, (pSync + 1), + true, SCOLL_DEFAULT_ALG); } @@ -355,6 +361,7 @@ static int _algorithm_tournament(struct oshmem_group_t *group, target, nlong, (pSync + 1), + true, SCOLL_DEFAULT_ALG); } @@ -634,6 +641,7 @@ static int _algorithm_linear(struct oshmem_group_t *group, target, nlong, (pSync + 1), + true, SCOLL_DEFAULT_ALG); } @@ -802,6 +810,7 @@ static int _algorithm_log(struct oshmem_group_t *group, target, nlong, (pSync + 1), + true, SCOLL_DEFAULT_ALG); } diff --git a/oshmem/mca/scoll/fca/scoll_fca.h b/oshmem/mca/scoll/fca/scoll_fca.h index 38215ec8684..e220abe3152 100644 --- a/oshmem/mca/scoll/fca/scoll_fca.h +++ b/oshmem/mca/scoll/fca/scoll_fca.h @@ -115,6 +115,7 @@ int mca_scoll_fca_broadcast(struct oshmem_group_t *group, const void *source, size_t nlong, long *pSync, + bool nlong_type, int algorithm_type); int mca_scoll_fca_collect(struct oshmem_group_t *group, void *target, diff --git a/oshmem/mca/scoll/fca/scoll_fca_ops.c b/oshmem/mca/scoll/fca/scoll_fca_ops.c index 0aa05c29755..887f98796b2 100644 --- a/oshmem/mca/scoll/fca/scoll_fca_ops.c +++ b/oshmem/mca/scoll/fca/scoll_fca_ops.c @@ -50,6 +50,7 @@ int mca_scoll_fca_broadcast(struct oshmem_group_t *group, const void *source, size_t nlong, long *pSync, + bool nlong_type, int alg) { mca_scoll_fca_module_t *fca_module = @@ -87,6 +88,7 @@ int mca_scoll_fca_broadcast(struct oshmem_group_t *group, source, nlong, pSync, + nlong_type, SCOLL_DEFAULT_ALG); return rc; } diff --git a/oshmem/mca/scoll/mpi/scoll_mpi.h b/oshmem/mca/scoll/mpi/scoll_mpi.h index 4c30f8193b4..40d163e74cb 100644 --- a/oshmem/mca/scoll/mpi/scoll_mpi.h +++ b/oshmem/mca/scoll/mpi/scoll_mpi.h @@ -90,6 +90,7 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group, const void *source, size_t nlong, long *pSync, + bool nlong_type, int alg); int mca_scoll_mpi_collect(struct oshmem_group_t *group, diff --git a/oshmem/mca/scoll/mpi/scoll_mpi_module.c b/oshmem/mca/scoll/mpi/scoll_mpi_module.c index 1228cf8a3a2..ca487caa696 100644 --- a/oshmem/mca/scoll/mpi/scoll_mpi_module.c +++ b/oshmem/mca/scoll/mpi/scoll_mpi_module.c @@ -110,7 +110,7 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority) mca_scoll_mpi_module_t *mpi_module; int err, i; int tag; - ompi_group_t* parent_group, *new_group; + ompi_group_t* world_group, *new_group; ompi_communicator_t* newcomm = NULL; *priority = 0; mca_scoll_mpi_component_t *cm; @@ -129,7 +129,7 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority) osh_group->ompi_comm = &(ompi_mpi_comm_world.comm); OPAL_TIMING_ENV_NEXT(comm_query, "ompi_mpi_comm_world"); } else { - err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &parent_group); + err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &world_group); if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) { return NULL; } @@ -143,20 +143,14 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority) OPAL_TIMING_ENV_NEXT(comm_query, "malloc"); + /* Fill the map "group_rank-to-world_rank" in order to create a new proc group */ for (i = 0; i < osh_group->proc_count; i++) { - ompi_proc_t* ompi_proc; - for( int j = 0; j < ompi_group_size(parent_group); j++ ) { - ompi_proc = ompi_group_peer_lookup(parent_group, j); - if( 0 == opal_compare_proc(ompi_proc->super.proc_name, osh_group->proc_array[i]->super.proc_name)) { - ranks[i] = j; - break; - } - } + ranks[i] = osh_group->proc_array[i]->super.proc_name.vpid; } OPAL_TIMING_ENV_NEXT(comm_query, "build_ranks"); - err = ompi_group_incl(parent_group, osh_group->proc_count, ranks, &new_group); + err = ompi_group_incl(world_group, osh_group->proc_count, ranks, &new_group); if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) { free(ranks); return NULL; diff --git a/oshmem/mca/scoll/mpi/scoll_mpi_ops.c b/oshmem/mca/scoll/mpi/scoll_mpi_ops.c index 8506dd524d8..2aa87a0222d 100644 --- a/oshmem/mca/scoll/mpi/scoll_mpi_ops.c +++ b/oshmem/mca/scoll/mpi/scoll_mpi_ops.c @@ -38,6 +38,7 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group, const void *source, size_t nlong, long *pSync, + bool nlong_type, int alg) { mca_scoll_mpi_module_t *mpi_module; @@ -60,8 +61,8 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group, * Since ompi coll components doesn't support size_t at the moment, * and considering this contradiction, we cast size_t to int here * in case if the value is less than INT_MAX and fallback to previous module otherwise. */ + if (OPAL_UNLIKELY(!nlong_type || (INT_MAX < nlong))) { #ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS - if (INT_MAX < nlong) { MPI_COLL_VERBOSE(20,"RUNNING FALLBACK BCAST"); PREVIOUS_SCOLL_FN(mpi_module, broadcast, group, PE_root, @@ -69,13 +70,21 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group, source, nlong, pSync, + nlong_type, SCOLL_DEFAULT_ALG); return rc; - } - rc = mpi_module->comm->c_coll->coll_bcast(buf, (int)nlong, dtype, root, mpi_module->comm, mpi_module->comm->c_coll->coll_bcast_module); #else - rc = mpi_module->comm->c_coll->coll_bcast(buf, nlong, dtype, root, mpi_module->comm, mpi_module->comm->c_coll->coll_bcast_module); + MPI_COLL_ERROR(20, "variable broadcast length, or exceeds INT_MAX: %zu", nlong); + return OSHMEM_ERR_NOT_SUPPORTED; #endif + } + + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OSHMEM_SUCCESS; + } + + rc = mpi_module->comm->c_coll->coll_bcast(buf, nlong, dtype, root, mpi_module->comm, mpi_module->comm->c_coll->coll_bcast_module); if (OMPI_SUCCESS != rc){ MPI_COLL_VERBOSE(20,"RUNNING FALLBACK BCAST"); PREVIOUS_SCOLL_FN(mpi_module, broadcast, group, @@ -84,6 +93,7 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group, source, nlong, pSync, + nlong_type, SCOLL_DEFAULT_ALG); } return rc; @@ -97,18 +107,25 @@ int mca_scoll_mpi_collect(struct oshmem_group_t *group, bool nlong_type, int alg) { + ompi_datatype_t* stype = &ompi_mpi_char.dt; + ompi_datatype_t* rtype = &ompi_mpi_char.dt; mca_scoll_mpi_module_t *mpi_module; - ompi_datatype_t* stype; - ompi_datatype_t* rtype; int rc; + int len; + int i; void *sbuf, *rbuf; + int *disps, *recvcounts; MPI_COLL_VERBOSE(20,"RUNNING MPI ALLGATHER"); mpi_module = (mca_scoll_mpi_module_t *) group->g_scoll.scoll_collect_module; + if (nlong_type == true) { + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OSHMEM_SUCCESS; + } + sbuf = (void *) source; rbuf = target; - stype = &ompi_mpi_char.dt; - rtype = &ompi_mpi_char.dt; /* Open SHMEM specification has the following constrains (page 85): * "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a * default integer value". And also fortran signature says "INTEGER". @@ -142,15 +159,52 @@ int mca_scoll_mpi_collect(struct oshmem_group_t *group, SCOLL_DEFAULT_ALG); } } else { - MPI_COLL_VERBOSE(20,"RUNNING FALLBACK COLLECT"); - PREVIOUS_SCOLL_FN(mpi_module, collect, group, - target, - source, - nlong, - pSync, - nlong_type, - SCOLL_DEFAULT_ALG); + if (INT_MAX < nlong) { + MPI_COLL_VERBOSE(20,"RUNNING FALLBACK COLLECT"); + PREVIOUS_SCOLL_FN(mpi_module, collect, group, + target, + source, + nlong, + pSync, + nlong_type, + SCOLL_DEFAULT_ALG); + return rc; + } + + len = nlong; + disps = malloc(group->proc_count * sizeof(*disps)); + if (disps == NULL) { + rc = OSHMEM_ERR_OUT_OF_RESOURCE; + goto complete; + } + + recvcounts = malloc(group->proc_count * sizeof(*recvcounts)); + if (recvcounts == NULL) { + rc = OSHMEM_ERR_OUT_OF_RESOURCE; + goto failed_mem; + } + + rc = mpi_module->comm->c_coll->coll_allgather(&len, sizeof(len), stype, recvcounts, + sizeof(len), rtype, mpi_module->comm, + mpi_module->comm->c_coll->coll_allgather_module); + if (rc != OSHMEM_SUCCESS) { + goto failed_allgather; + } + + disps[0] = 0; + for (i = 1; i < group->proc_count; i++) { + disps[i] = disps[i - 1] + recvcounts[i - 1]; + } + + rc = mpi_module->comm->c_coll->coll_allgatherv(source, nlong, stype, target, recvcounts, + disps, rtype, mpi_module->comm, + mpi_module->comm->c_coll->coll_allgatherv_module); +failed_allgather: + free(recvcounts); +failed_mem: + free(disps); } +complete: return rc; } @@ -177,6 +231,12 @@ int mca_scoll_mpi_reduce(struct oshmem_group_t *group, dtype = shmem_dtype_to_ompi_dtype(op); h_op = shmem_op_to_ompi_op(op->op); count = nlong/op->dt_size; + + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OSHMEM_SUCCESS; + } + /* Open SHMEM specification has the following constrains (page 85): * "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a * default integer value". And also fortran signature says "INTEGER". diff --git a/oshmem/mca/scoll/scoll.h b/oshmem/mca/scoll/scoll.h index cc6cfe6094f..4839e0d9c52 100644 --- a/oshmem/mca/scoll/scoll.h +++ b/oshmem/mca/scoll/scoll.h @@ -122,6 +122,7 @@ typedef int (*mca_scoll_base_module_broadcast_fn_t)(struct oshmem_group_t *group const void *source, size_t nlong, long *pSync, + bool nlong_type, int alg); typedef int (*mca_scoll_base_module_collect_fn_t)(struct oshmem_group_t *group, void *target, diff --git a/oshmem/mca/spml/base/base.h b/oshmem/mca/spml/base/base.h index 4aeff7d760a..75a4eaec18d 100644 --- a/oshmem/mca/spml/base/base.h +++ b/oshmem/mca/spml/base/base.h @@ -72,11 +72,12 @@ OSHMEM_DECLSPEC int mca_spml_base_test(void* addr, void* value, int datatype, int *out_value); -OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(int pe, +OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(shmem_ctx_t ctx, + int pe, uint32_t seg, sshmem_mkey_t *mkeys); -OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t seg, int pe, int tr_id); +OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t seg, int pe, int tr_id); OSHMEM_DECLSPEC void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey); OSHMEM_DECLSPEC void *mca_spml_base_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *mkey, int pe); @@ -92,6 +93,10 @@ OSHMEM_DECLSPEC int mca_spml_base_get_nb(void *dst_addr, void **handle); OSHMEM_DECLSPEC void mca_spml_base_memuse_hook(void *addr, size_t length); + +OSHMEM_DECLSPEC int mca_spml_base_put_all_nb(void *target, const void *source, + size_t size, long *counter); + /* * MCA framework */ diff --git a/oshmem/mca/spml/base/spml_base.c b/oshmem/mca/spml/base/spml_base.c index 86d544e88e2..52ca7b4d618 100644 --- a/oshmem/mca/spml/base/spml_base.c +++ b/oshmem/mca/spml/base/spml_base.c @@ -247,12 +247,12 @@ int mca_spml_base_wait_nb(void* handle) return OSHMEM_SUCCESS; } -int mca_spml_base_oob_get_mkeys(int pe, uint32_t segno, sshmem_mkey_t *mkeys) +int mca_spml_base_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t segno, sshmem_mkey_t *mkeys) { return OSHMEM_ERROR; } -void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id) +void mca_spml_base_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id) { } @@ -280,3 +280,9 @@ int mca_spml_base_get_nb(void *dst_addr, size_t size, void mca_spml_base_memuse_hook(void *addr, size_t length) { } + +int mca_spml_base_put_all_nb(void *target, const void *source, + size_t size, long *counter) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.c b/oshmem/mca/spml/ikrit/spml_ikrit.c index 523baf77633..f5b47cd3c08 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit.c @@ -151,43 +151,46 @@ int mca_spml_ikrit_put_simple(void* dst_addr, void* src_addr, int dst); -static void mca_spml_ikrit_cache_mkeys(sshmem_mkey_t *, uint32_t seg, int remote_pe, int tr_id); +static void mca_spml_ikrit_cache_mkeys(shmem_ctx_t ctx, sshmem_mkey_t *, + uint32_t seg, int remote_pe, int tr_id); static mxm_mem_key_t *mca_spml_ikrit_get_mkey_slow(int pe, void *va, int ptl_id, void **rva); mca_spml_ikrit_t mca_spml_ikrit = { - { + .super = { /* Init mca_spml_base_module_t */ - mca_spml_ikrit_add_procs, - mca_spml_ikrit_del_procs, - mca_spml_ikrit_enable, - mca_spml_ikrit_register, - mca_spml_ikrit_deregister, - mca_spml_ikrit_oob_get_mkeys, - mca_spml_ikrit_ctx_create, - mca_spml_ikrit_ctx_destroy, - mca_spml_ikrit_put, - mca_spml_ikrit_put_nb, - mca_spml_ikrit_get, - mca_spml_ikrit_get_nb, - mca_spml_ikrit_recv, - mca_spml_ikrit_send, - mca_spml_base_wait, - mca_spml_base_wait_nb, - mca_spml_base_test, - mca_spml_ikrit_fence, /* fence is implemented as quiet */ - mca_spml_ikrit_fence, - mca_spml_ikrit_cache_mkeys, - mca_spml_base_rmkey_free, - mca_spml_base_rmkey_ptr, - mca_spml_base_memuse_hook, - - (void*)&mca_spml_ikrit + .spml_add_procs = mca_spml_ikrit_add_procs, + .spml_del_procs = mca_spml_ikrit_del_procs, + .spml_enable = mca_spml_ikrit_enable, + .spml_register = mca_spml_ikrit_register, + .spml_deregister = mca_spml_ikrit_deregister, + .spml_oob_get_mkeys = mca_spml_ikrit_oob_get_mkeys, + .spml_ctx_create = mca_spml_ikrit_ctx_create, + .spml_ctx_destroy = mca_spml_ikrit_ctx_destroy, + .spml_put = mca_spml_ikrit_put, + .spml_put_nb = mca_spml_ikrit_put_nb, + .spml_get = mca_spml_ikrit_get, + .spml_get_nb = mca_spml_ikrit_get_nb, + .spml_recv = mca_spml_ikrit_recv, + .spml_send = mca_spml_ikrit_send, + .spml_wait = mca_spml_base_wait, + .spml_wait_nb = mca_spml_base_wait_nb, + .spml_test = mca_spml_base_test, + .spml_fence = mca_spml_ikrit_fence, /* fence is implemented as quiet */ + .spml_quiet = mca_spml_ikrit_fence, + .spml_rmkey_unpack = mca_spml_ikrit_cache_mkeys, + .spml_rmkey_free = mca_spml_base_rmkey_free, + .spml_rmkey_ptr = mca_spml_base_rmkey_ptr, + .spml_memuse_hook = mca_spml_base_memuse_hook, + .spml_put_all_nb = mca_spml_base_put_all_nb, + + .self = (void*)&mca_spml_ikrit }, - mca_spml_ikrit_get_mkey_slow + .get_mkey_slow = mca_spml_ikrit_get_mkey_slow }; -static void mca_spml_ikrit_cache_mkeys(sshmem_mkey_t *mkey, uint32_t seg, int dst_pe, int tr_id) +static void mca_spml_ikrit_cache_mkeys(shmem_ctx_t ctx, sshmem_mkey_t *mkey, + uint32_t seg, int dst_pe, int tr_id) { mxm_peer_t *peer; @@ -211,7 +214,7 @@ mxm_mem_key_t *mca_spml_ikrit_get_mkey_slow(int pe, void *va, int ptl_id, void * sshmem_mkey_t *mkey; retry: - mkey = mca_memheap_base_get_cached_mkey(pe, va, ptl_id, rva); + mkey = mca_memheap_base_get_cached_mkey(oshmem_ctx_default, pe, va, ptl_id, rva); if (NULL == mkey) { SPML_ERROR("pe=%d: %p is not address of shared variable", pe, va); oshmem_shmem_abort(-1); @@ -437,9 +440,9 @@ int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs) } sshmem_mkey_t *mca_spml_ikrit_register(void* addr, - size_t size, - uint64_t shmid, - int *count) + size_t size, + uint64_t shmid, + int *count) { int i; sshmem_mkey_t *mkeys; @@ -506,7 +509,8 @@ sshmem_mkey_t *mca_spml_ikrit_register(void* addr, my_rank, i, addr, (unsigned long long)size, mca_spml_base_mkey2str(&mkeys[i])); - mca_spml_ikrit_cache_mkeys(&mkeys[i], memheap_find_segnum(addr), my_rank, i); + mca_spml_ikrit_cache_mkeys(oshmem_ctx_default, &mkeys[i], + memheap_find_segnum(addr), my_rank, i); } *count = MXM_PTL_LAST; @@ -550,7 +554,8 @@ int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys) } -int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys) +int mca_spml_ikrit_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t seg, + sshmem_mkey_t *mkeys) { int ptl; @@ -569,7 +574,7 @@ int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys) mkeys[ptl].len = 0; mkeys[ptl].va_base = mca_memheap_seg2base_va(seg); mkeys[ptl].u.key = MAP_SEGMENT_SHM_INVALID; - mca_spml_ikrit_cache_mkeys(&mkeys[ptl], seg, pe, ptl); + mca_spml_ikrit_cache_mkeys(ctx, &mkeys[ptl], seg, pe, ptl); return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.h b/oshmem/mca/spml/ikrit/spml_ikrit.h index e275c3bf592..b819957efcf 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.h +++ b/oshmem/mca/spml/ikrit/spml_ikrit.h @@ -183,7 +183,7 @@ extern sshmem_mkey_t *mca_spml_ikrit_register(void* addr, uint64_t shmid, int *count); extern int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys); -extern int mca_spml_ikrit_oob_get_mkeys(int pe, +extern int mca_spml_ikrit_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t segno, sshmem_mkey_t *mkeys); diff --git a/oshmem/mca/spml/spml.h b/oshmem/mca/spml/spml.h index c78ed6cbddb..ca62b5f0bd4 100644 --- a/oshmem/mca/spml/spml.h +++ b/oshmem/mca/spml/spml.h @@ -132,7 +132,7 @@ typedef int (*mca_spml_base_module_test_fn_t)(void* addr, * * @param mkey remote mkey */ -typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(sshmem_mkey_t *, uint32_t segno, int remote_pe, int tr_id); +typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(shmem_ctx_t ctx, sshmem_mkey_t *, uint32_t segno, int remote_pe, int tr_id); /** * If possible, get a pointer to the remote memory described by the mkey @@ -180,7 +180,7 @@ typedef int (*mca_spml_base_module_deregister_fn_t)(sshmem_mkey_t *mkeys); * * @return OSHMEM_SUCCSESS if keys are found */ -typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(int pe, +typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(shmem_ctx_t ctx, int pe, uint32_t seg, sshmem_mkey_t *mkeys); @@ -314,6 +314,35 @@ typedef int (*mca_spml_base_module_send_fn_t)(void *buf, int dst, mca_spml_base_put_mode_t mode); +/** + * The routine transfers the data asynchronously from the source PE to all + * PEs in the OpenSHMEM job. The routine returns immediately. The source and + * target buffers are reusable only after the completion of the routine. + * After the data is transferred to the target buffers, the counter object + * is updated atomically. The counter object can be read either using atomic + * operations such as shmem_atomic_fetch or can use point-to-point synchronization + * routines such as shmem_wait_until and shmem_test. + * + * Shmem_quiet may be used for completing the operation, but not required for + * progress or completion. In a multithreaded OpenSHMEM program, the user + * (the OpenSHMEM program) should ensure the correct ordering of + * shmemx_alltoall_global calls. + * + * @param dest A symmetric data object that is large enough to receive + * “size” bytes of data from each PE in the OpenSHMEM job. + * @param source A symmetric data object that contains “size” bytes of data + * for each PE in the OpenSHMEM job. + * @param size The number of bytes to be sent to each PE in the job. + * @param counter A symmetric data object to be atomically incremented after + * the target buffer is updated. + * + * @return OSHMEM_SUCCESS or failure status. + */ +typedef int (*mca_spml_base_module_put_all_nb_fn_t)(void *dest, + const void *source, + size_t size, + long *counter); + /** * Assures ordering of delivery of put() requests * @@ -381,6 +410,7 @@ struct mca_spml_base_module_1_0_0_t { mca_spml_base_module_mkey_ptr_fn_t spml_rmkey_ptr; mca_spml_base_module_memuse_hook_fn_t spml_memuse_hook; + mca_spml_base_module_put_all_nb_fn_t spml_put_all_nb; void *self; }; diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index 277910b3ca7..36d3467bf5c 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -34,61 +34,60 @@ #include "oshmem/proc/proc.h" #include "oshmem/mca/spml/base/base.h" #include "oshmem/mca/spml/base/spml_base_putreq.h" +#include "oshmem/mca/atomic/atomic.h" #include "oshmem/runtime/runtime.h" #include "orte/util/show_help.h" #include "oshmem/mca/spml/ucx/spml_ucx_component.h" +#include "oshmem/mca/sshmem/ucx/sshmem_ucx.h" /* Turn ON/OFF debug output from build (default 0) */ #ifndef SPML_UCX_PUT_DEBUG #define SPML_UCX_PUT_DEBUG 0 #endif -static -spml_ucx_mkey_t * mca_spml_ucx_get_mkey_slow(int pe, void *va, void **rva); - mca_spml_ucx_t mca_spml_ucx = { - { + .super = { /* Init mca_spml_base_module_t */ - mca_spml_ucx_add_procs, - mca_spml_ucx_del_procs, - mca_spml_ucx_enable, - mca_spml_ucx_register, - mca_spml_ucx_deregister, - mca_spml_base_oob_get_mkeys, - mca_spml_ucx_ctx_create, - mca_spml_ucx_ctx_destroy, - mca_spml_ucx_put, - mca_spml_ucx_put_nb, - mca_spml_ucx_get, - mca_spml_ucx_get_nb, - mca_spml_ucx_recv, - mca_spml_ucx_send, - mca_spml_base_wait, - mca_spml_base_wait_nb, - mca_spml_base_test, - mca_spml_ucx_fence, - mca_spml_ucx_quiet, - mca_spml_ucx_rmkey_unpack, - mca_spml_ucx_rmkey_free, - mca_spml_ucx_rmkey_ptr, - mca_spml_ucx_memuse_hook, - (void*)&mca_spml_ucx + .spml_add_procs = mca_spml_ucx_add_procs, + .spml_del_procs = mca_spml_ucx_del_procs, + .spml_enable = mca_spml_ucx_enable, + .spml_register = mca_spml_ucx_register, + .spml_deregister = mca_spml_ucx_deregister, + .spml_oob_get_mkeys = mca_spml_base_oob_get_mkeys, + .spml_ctx_create = mca_spml_ucx_ctx_create, + .spml_ctx_destroy = mca_spml_ucx_ctx_destroy, + .spml_put = mca_spml_ucx_put, + .spml_put_nb = mca_spml_ucx_put_nb, + .spml_get = mca_spml_ucx_get, + .spml_get_nb = mca_spml_ucx_get_nb, + .spml_recv = mca_spml_ucx_recv, + .spml_send = mca_spml_ucx_send, + .spml_wait = mca_spml_base_wait, + .spml_wait_nb = mca_spml_base_wait_nb, + .spml_test = mca_spml_base_test, + .spml_fence = mca_spml_ucx_fence, + .spml_quiet = mca_spml_ucx_quiet, + .spml_rmkey_unpack = mca_spml_ucx_rmkey_unpack, + .spml_rmkey_free = mca_spml_ucx_rmkey_free, + .spml_rmkey_ptr = mca_spml_ucx_rmkey_ptr, + .spml_memuse_hook = mca_spml_ucx_memuse_hook, + .spml_put_all_nb = mca_spml_ucx_put_all_nb, + .self = (void*)&mca_spml_ucx }, - NULL, /* ucp_context */ - 1, /* num_disconnect */ - 0, /* heap_reg_nb */ - 0, /* enabled */ - mca_spml_ucx_get_mkey_slow + .ucp_context = NULL, + .num_disconnect = 1, + .heap_reg_nb = 0, + .enabled = 0, + .get_mkey_slow = NULL, + .synchronized_quiet = false }; -OBJ_CLASS_INSTANCE(mca_spml_ucx_ctx_list_item_t, opal_list_item_t, NULL, NULL); - mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = { - NULL, /* ucp_worker */ - NULL, /* ucp_peers */ - 0 /* options */ + .ucp_worker = NULL, + .ucp_peers = NULL, + .options = 0 }; int mca_spml_ucx_enable(bool enable) @@ -103,28 +102,11 @@ int mca_spml_ucx_enable(bool enable) return OSHMEM_SUCCESS; } - -static void mca_spml_ucx_waitall(void **reqs, int *count_p) -{ - int i; - - SPML_UCX_VERBOSE(10, "waiting for %d disconnect requests", *count_p); - for (i = 0; i < *count_p; ++i) { - opal_common_ucx_wait_request(reqs[i], mca_spml_ucx_ctx_default.ucp_worker, "ucp_disconnect_nb"); - reqs[i] = NULL; - } - - *count_p = 0; -} - int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs) { - int my_rank = oshmem_my_proc_id(); - int num_reqs; - size_t max_reqs; - void *dreq, **dreqs; - ucp_ep_h ep; - size_t i, n; + opal_common_ucx_del_proc_t *del_procs; + size_t i; + int ret; oshmem_shmem_barrier(); @@ -132,53 +114,30 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs) return OSHMEM_SUCCESS; } - max_reqs = mca_spml_ucx.num_disconnect; - if (max_reqs > nprocs) { - max_reqs = nprocs; - } - - dreqs = malloc(sizeof(*dreqs) * max_reqs); - if (dreqs == NULL) { + del_procs = malloc(sizeof(*del_procs) * nprocs); + if (del_procs == NULL) { return OMPI_ERR_OUT_OF_RESOURCE; } - num_reqs = 0; - for (i = 0; i < nprocs; ++i) { - n = (i + my_rank) % nprocs; - ep = mca_spml_ucx_ctx_default.ucp_peers[n].ucp_conn; - if (ep == NULL) { - continue; - } + del_procs[i].ep = mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn; + del_procs[i].vpid = i; - mca_spml_ucx_ctx_default.ucp_peers[n].ucp_conn = NULL; - - SPML_UCX_VERBOSE(10, "disconnecting from peer %zu", n); - dreq = ucp_disconnect_nb(ep); - if (dreq != NULL) { - if (UCS_PTR_IS_ERR(dreq)) { - SPML_UCX_ERROR("ucp_disconnect_nb(%zu) failed: %s", n, - ucs_status_string(UCS_PTR_STATUS(dreq))); - continue; - } else { - dreqs[num_reqs++] = dreq; - if (num_reqs >= mca_spml_ucx.num_disconnect) { - mca_spml_ucx_waitall(dreqs, &num_reqs); - } - } - } + /* mark peer as disconnected */ + mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn = NULL; } - /* num_reqs == 0 is processed by mca_pml_ucx_waitall routine, - * so suppress coverity warning */ - /* coverity[uninit_use_in_call] */ - mca_spml_ucx_waitall(dreqs, &num_reqs); - free(dreqs); - free(mca_spml_ucx.remote_addrs_tbl); - opal_common_ucx_mca_pmix_fence(mca_spml_ucx_ctx_default.ucp_worker); + ret = opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(), + mca_spml_ucx.num_disconnect, + mca_spml_ucx_ctx_default.ucp_worker); + /* No need to barrier here - barrier is called in _shmem_finalize */ + free(del_procs); + free(mca_spml_ucx.remote_addrs_tbl); free(mca_spml_ucx_ctx_default.ucp_peers); + mca_spml_ucx_ctx_default.ucp_peers = NULL; - return OSHMEM_SUCCESS; + + return ret; } /* TODO: move func into common place, use it with rkey exchng too */ @@ -256,9 +215,43 @@ static void dump_address(int pe, char *addr, size_t len) static char spml_ucx_transport_ids[1] = { 0 }; +int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs) +{ + int res; + + if (mca_spml_ucx.synchronized_quiet) { + ctx->put_proc_indexes = malloc(nprocs * sizeof(*ctx->put_proc_indexes)); + if (NULL == ctx->put_proc_indexes) { + return OSHMEM_ERR_OUT_OF_RESOURCE; + } + + OBJ_CONSTRUCT(&ctx->put_op_bitmap, opal_bitmap_t); + res = opal_bitmap_init(&ctx->put_op_bitmap, nprocs); + if (OPAL_SUCCESS != res) { + free(ctx->put_proc_indexes); + ctx->put_proc_indexes = NULL; + return res; + } + + ctx->put_proc_count = 0; + } + + return OSHMEM_SUCCESS; +} + +int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx) +{ + if (mca_spml_ucx.synchronized_quiet && ctx->put_proc_indexes) { + OBJ_DESTRUCT(&ctx->put_op_bitmap); + free(ctx->put_proc_indexes); + } + + return OSHMEM_SUCCESS; +} + int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) { - size_t i, n; + size_t i, j, n; int rc = OSHMEM_ERROR; int my_rank = oshmem_my_proc_id(); ucs_status_t err; @@ -275,6 +268,11 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) goto error; } + rc = mca_spml_ucx_init_put_op_mask(&mca_spml_ucx_ctx_default, nprocs); + if (OSHMEM_SUCCESS != rc) { + goto error; + } + err = ucp_worker_get_address(mca_spml_ucx_ctx_default.ucp_worker, &wk_local_addr, &wk_addr_len); if (err != UCS_OK) { goto error; @@ -287,7 +285,7 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) goto error; } - opal_progress_register(spml_ucx_progress); + opal_progress_register(spml_ucx_default_progress); mca_spml_ucx.remote_addrs_tbl = (char **)calloc(nprocs, sizeof(char *)); memset(mca_spml_ucx.remote_addrs_tbl, 0, nprocs * sizeof(char *)); @@ -311,6 +309,10 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) OSHMEM_PROC_DATA(procs[i])->num_transports = 1; OSHMEM_PROC_DATA(procs[i])->transport_ids = spml_ucx_transport_ids; + for (j = 0; j < MCA_MEMHEAP_MAX_SEGMENTS; j++) { + mca_spml_ucx_ctx_default.ucp_peers[i].mkeys[j].key.rkey = NULL; + } + mca_spml_ucx.remote_addrs_tbl[i] = (char *)malloc(wk_rsizes[i]); memcpy(mca_spml_ucx.remote_addrs_tbl[i], (char *)(wk_raddrs + wk_roffs[i]), wk_rsizes[i]); @@ -322,6 +324,8 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) free(wk_roffs); SPML_UCX_VERBOSE(50, "*** ADDED PROCS ***"); + + opal_common_ucx_mca_proc_added(); return OSHMEM_SUCCESS; error2: @@ -333,6 +337,8 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) free(mca_spml_ucx.remote_addrs_tbl[i]); } } + + mca_spml_ucx_clear_put_op_mask(&mca_spml_ucx_ctx_default); if (mca_spml_ucx_ctx_default.ucp_peers) free(mca_spml_ucx_ctx_default.ucp_peers); if (mca_spml_ucx.remote_addrs_tbl) @@ -347,22 +353,6 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) } - -static -spml_ucx_mkey_t * mca_spml_ucx_get_mkey_slow(int pe, void *va, void **rva) -{ - sshmem_mkey_t *r_mkey; - - r_mkey = mca_memheap_base_get_cached_mkey(pe, va, 0, rva); - if (OPAL_UNLIKELY(!r_mkey)) { - SPML_UCX_ERROR("pe=%d: %p is not address of symmetric variable", - pe, va); - oshmem_shmem_abort(-1); - return NULL; - } - return (spml_ucx_mkey_t *)(r_mkey->spml_context); -} - void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey) { spml_ucx_mkey_t *ucx_mkey; @@ -391,31 +381,26 @@ void *mca_spml_ucx_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *mkey, int pe) #endif } -static void mca_spml_ucx_cache_mkey(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe) -{ - ucp_peer_t *peer; - - peer = &(ucx_ctx->ucp_peers[dst_pe]); - mkey_segment_init(&peer->mkeys[segno].super, mkey, segno); -} - -void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id) +void mca_spml_ucx_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id) { spml_ucx_mkey_t *ucx_mkey; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; ucs_status_t err; - ucx_mkey = &mca_spml_ucx_ctx_default.ucp_peers[pe].mkeys[segno].key; + ucx_mkey = &ucx_ctx->ucp_peers[pe].mkeys[segno].key; - err = ucp_ep_rkey_unpack(mca_spml_ucx_ctx_default.ucp_peers[pe].ucp_conn, - mkey->u.data, + err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn, + mkey->u.data, &ucx_mkey->rkey); if (UCS_OK != err) { SPML_UCX_ERROR("failed to unpack rkey: %s", ucs_status_string(err)); goto error_fatal; } - mkey->spml_context = ucx_mkey; - mca_spml_ucx_cache_mkey(&mca_spml_ucx_ctx_default, mkey, segno, pe); + if (ucx_ctx == &mca_spml_ucx_ctx_default) { + mkey->spml_context = ucx_mkey; + } + mca_spml_ucx_cache_mkey(ucx_ctx, mkey, segno, pe); return; error_fatal: @@ -499,11 +484,12 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr, } } else { - ucx_mkey->mem_h = (ucp_mem_h)mem_seg->context; + mca_sshmem_ucx_segment_context_t *ctx = mem_seg->context; + ucx_mkey->mem_h = ctx->ucp_memh; } - status = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h, - &mkeys[0].u.data, &len); + status = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h, + &mkeys[0].u.data, &len); if (UCS_OK != status) { goto error_unmap; } @@ -545,7 +531,7 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys) if (!mkeys) return OSHMEM_SUCCESS; - if (!mkeys[0].spml_context) + if (!mkeys[0].spml_context) return OSHMEM_SUCCESS; mem_seg = memheap_find_va(mkeys[0].va_base); @@ -554,30 +540,72 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys) if (OPAL_UNLIKELY(NULL == mem_seg)) { return OSHMEM_ERROR; } - + if (MAP_SEGMENT_ALLOC_UCX != mem_seg->type) { ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h); } ucp_rkey_destroy(ucx_mkey->rkey); + ucx_mkey->rkey = NULL; if (0 < mkeys[0].len) { ucp_rkey_buffer_release(mkeys[0].u.data); } + free(mkeys); + return OSHMEM_SUCCESS; } -int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) +static inline void _ctx_add(mca_spml_ucx_ctx_array_t *array, mca_spml_ucx_ctx_t *ctx) +{ + int i; + + if (array->ctxs_count < array->ctxs_num) { + array->ctxs[array->ctxs_count] = ctx; + } else { + array->ctxs = realloc(array->ctxs, (array->ctxs_num + MCA_SPML_UCX_CTXS_ARRAY_INC) * sizeof(mca_spml_ucx_ctx_t *)); + opal_atomic_wmb (); + for (i = array->ctxs_num; i < array->ctxs_num + MCA_SPML_UCX_CTXS_ARRAY_INC; i++) { + array->ctxs[i] = NULL; + } + array->ctxs[array->ctxs_num] = ctx; + array->ctxs_num += MCA_SPML_UCX_CTXS_ARRAY_INC; + } + + opal_atomic_wmb (); + array->ctxs_count++; +} + +static inline void _ctx_remove(mca_spml_ucx_ctx_array_t *array, mca_spml_ucx_ctx_t *ctx) +{ + int i; + + for (i = 0; i < array->ctxs_count; i++) { + if (array->ctxs[i] == ctx) { + array->ctxs[i] = array->ctxs[array->ctxs_count-1]; + array->ctxs[array->ctxs_count-1] = NULL; + break; + } + } + + array->ctxs_count--; + opal_atomic_wmb (); +} + +static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx_ctx_p) { - mca_spml_ucx_ctx_list_item_t *ctx_item; ucp_worker_params_t params; ucp_ep_params_t ep_params; size_t i, nprocs = oshmem_num_procs(); + int j; ucs_status_t err; + spml_ucx_mkey_t *ucx_mkey; + sshmem_mkey_t *mkey; + mca_spml_ucx_ctx_t *ucx_ctx; int rc = OSHMEM_ERROR; - ctx_item = OBJ_NEW(mca_spml_ucx_ctx_list_item_t); - ctx_item->ctx.options = options; + ucx_ctx = malloc(sizeof(mca_spml_ucx_ctx_t)); + ucx_ctx->options = options; params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED) { @@ -587,82 +615,112 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) } err = ucp_worker_create(mca_spml_ucx.ucp_context, ¶ms, - &ctx_item->ctx.ucp_worker); + &ucx_ctx->ucp_worker); if (UCS_OK != err) { - OBJ_RELEASE(ctx_item); + free(ucx_ctx); return OSHMEM_ERROR; } - ctx_item->ctx.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(ctx_item->ctx.ucp_peers))); - if (NULL == ctx_item->ctx.ucp_peers) { + ucx_ctx->ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(ucx_ctx->ucp_peers))); + if (NULL == ucx_ctx->ucp_peers) { goto error; } + rc = mca_spml_ucx_init_put_op_mask(ucx_ctx, nprocs); + if (OSHMEM_SUCCESS != rc) { + goto error2; + } + for (i = 0; i < nprocs; i++) { ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]); - err = ucp_ep_create(ctx_item->ctx.ucp_worker, &ep_params, - &ctx_item->ctx.ucp_peers[i].ucp_conn); + err = ucp_ep_create(ucx_ctx->ucp_worker, &ep_params, + &ucx_ctx->ucp_peers[i].ucp_conn); if (UCS_OK != err) { SPML_ERROR("ucp_ep_create(proc=%d/%d) failed: %s", i, nprocs, ucs_status_string(err)); goto error2; } - } - - SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); - opal_list_append(&(mca_spml_ucx.ctx_list), &ctx_item->super); - - SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); + for (j = 0; j < memheap_map->n_segments; j++) { + mkey = &memheap_map->mem_segs[j].mkeys_cache[i][0]; + ucx_mkey = &ucx_ctx->ucp_peers[i].mkeys[j].key; + if (mkey->u.data) { + err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn, + mkey->u.data, + &ucx_mkey->rkey); + if (UCS_OK != err) { + SPML_UCX_ERROR("failed to unpack rkey"); + goto error2; + } + mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i); + } + } + } - (*ctx) = (shmem_ctx_t)(&ctx_item->ctx); + *ucx_ctx_p = ucx_ctx; return OSHMEM_SUCCESS; error2: for (i = 0; i < nprocs; i++) { - if (ctx_item->ctx.ucp_peers[i].ucp_conn) { - ucp_ep_destroy(ctx_item->ctx.ucp_peers[i].ucp_conn); + if (ucx_ctx->ucp_peers[i].ucp_conn) { + ucp_ep_destroy(ucx_ctx->ucp_peers[i].ucp_conn); } } - if (ctx_item->ctx.ucp_peers) - free(ctx_item->ctx.ucp_peers); + mca_spml_ucx_clear_put_op_mask(ucx_ctx); + + if (ucx_ctx->ucp_peers) + free(ucx_ctx->ucp_peers); error: - ucp_worker_destroy(ctx_item->ctx.ucp_worker); - OBJ_RELEASE(ctx_item); + ucp_worker_destroy(ucx_ctx->ucp_worker); + free(ucx_ctx); rc = OSHMEM_ERR_OUT_OF_RESOURCE; SPML_ERROR("ctx create FAILED rc=%d", rc); return rc; } -void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx) +int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) { - mca_spml_ucx_ctx_list_item_t *ctx_item, *next; - size_t i, nprocs = oshmem_num_procs(); + mca_spml_ucx_ctx_t *ucx_ctx; + int rc; - MCA_SPML_CALL(quiet(ctx)); + /* Take a lock controlling context creation. AUX context may set specific + * UCX parameters affecting worker creation, which are not needed for + * regular contexts. */ + pthread_mutex_lock(&mca_spml_ucx.ctx_create_mutex); + rc = mca_spml_ucx_ctx_create_common(options, &ucx_ctx); + pthread_mutex_unlock(&mca_spml_ucx.ctx_create_mutex); + if (rc != OSHMEM_SUCCESS) { + return rc; + } + + if (mca_spml_ucx.active_array.ctxs_count == 0) { + opal_progress_register(spml_ucx_ctx_progress); + } SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); + _ctx_add(&mca_spml_ucx.active_array, ucx_ctx); + SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); - /* delete context object from list */ - OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.ctx_list), - mca_spml_ucx_ctx_list_item_t) { - if ((shmem_ctx_t)(&ctx_item->ctx) == ctx) { - opal_list_remove_item(&(mca_spml_ucx.ctx_list), &ctx_item->super); - for (i = 0; i < nprocs; i++) { - ucp_ep_destroy(ctx_item->ctx.ucp_peers[i].ucp_conn); - } - free(ctx_item->ctx.ucp_peers); - ucp_worker_destroy(ctx_item->ctx.ucp_worker); - OBJ_RELEASE(ctx_item); - break; - } - } + (*ctx) = (shmem_ctx_t)ucx_ctx; + return OSHMEM_SUCCESS; +} +void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx) +{ + MCA_SPML_CALL(quiet(ctx)); + + SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); + _ctx_remove(&mca_spml_ucx.active_array, (mca_spml_ucx_ctx_t *)ctx); + _ctx_add(&mca_spml_ucx.idle_array, (mca_spml_ucx_ctx_t *)ctx); SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); + + if (!mca_spml_ucx.active_array.ctxs_count) { + opal_progress_unregister(spml_ucx_ctx_progress); + } } int mca_spml_ucx_get(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_addr, int src) @@ -676,7 +734,7 @@ int mca_spml_ucx_get(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_add ucs_status_t status; #endif - ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, src, src_addr, &rva, &mca_spml_ucx); + ucx_mkey = mca_spml_ucx_get_mkey(ctx, src, src_addr, &rva, &mca_spml_ucx); #if HAVE_DECL_UCP_GET_NB request = ucp_get_nb(ucx_ctx->ucp_peers[src].ucp_conn, dst_addr, size, (uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb); @@ -695,7 +753,7 @@ int mca_spml_ucx_get_nb(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_ spml_ucx_mkey_t *ucx_mkey; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; - ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, src, src_addr, &rva, &mca_spml_ucx); + ucx_mkey = mca_spml_ucx_get_mkey(ctx, src, src_addr, &rva, &mca_spml_ucx); status = ucp_get_nbi(ucx_ctx->ucp_peers[src].ucp_conn, dst_addr, size, (uint64_t)rva, ucx_mkey->rkey); @@ -707,22 +765,29 @@ int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_add void *rva; spml_ucx_mkey_t *ucx_mkey; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; + int res; #if HAVE_DECL_UCP_PUT_NB ucs_status_ptr_t request; #else ucs_status_t status; #endif - ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, dst, dst_addr, &rva, &mca_spml_ucx); + ucx_mkey = mca_spml_ucx_get_mkey(ctx, dst, dst_addr, &rva, &mca_spml_ucx); #if HAVE_DECL_UCP_PUT_NB request = ucp_put_nb(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, (uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb); - return opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker, "ucp_put_nb"); + res = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker, "ucp_put_nb"); #else status = ucp_put(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, (uint64_t)rva, ucx_mkey->rkey); - return ucx_status_to_oshmem(status); + res = ucx_status_to_oshmem(status); #endif + + if (OPAL_LIKELY(OSHMEM_SUCCESS == res)) { + mca_spml_ucx_remote_op_posted(ucx_ctx, dst); + } + + return res; } int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_addr, int dst, void **handle) @@ -732,10 +797,14 @@ int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_ spml_ucx_mkey_t *ucx_mkey; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; - ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, dst, dst_addr, &rva, &mca_spml_ucx); + ucx_mkey = mca_spml_ucx_get_mkey(ctx, dst, dst_addr, &rva, &mca_spml_ucx); status = ucp_put_nbi(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, (uint64_t)rva, ucx_mkey->rkey); + if (OPAL_LIKELY(status >= 0)) { + mca_spml_ucx_remote_op_posted(ucx_ctx, dst); + } + return ucx_status_to_oshmem_nb(status); } @@ -746,6 +815,8 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx) ucs_status_t err; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; + opal_atomic_wmb(); + err = ucp_worker_fence(ucx_ctx->ucp_worker); if (UCS_OK != err) { SPML_UCX_ERROR("fence failed: %s", ucs_status_string(err)); @@ -757,14 +828,44 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx) int mca_spml_ucx_quiet(shmem_ctx_t ctx) { + int flush_get_data; int ret; + unsigned i; + int idx; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; + if (mca_spml_ucx.synchronized_quiet) { + for (i = 0; i < ucx_ctx->put_proc_count; i++) { + idx = ucx_ctx->put_proc_indexes[i]; + ret = mca_spml_ucx_get_nb(ctx, + ucx_ctx->ucp_peers[idx].mkeys->super.super.va_base, + sizeof(flush_get_data), &flush_get_data, idx, NULL); + if (OMPI_SUCCESS != ret) { + oshmem_shmem_abort(-1); + return ret; + } + + opal_bitmap_clear_bit(&ucx_ctx->put_op_bitmap, idx); + } + ucx_ctx->put_proc_count = 0; + } + + opal_atomic_wmb(); + ret = opal_common_ucx_worker_flush(ucx_ctx->ucp_worker); if (OMPI_SUCCESS != ret) { oshmem_shmem_abort(-1); return ret; } + + /* If put_all_nb op/s is/are being executed asynchronously, need to wait its + * completion as well. */ + if (ctx == oshmem_ctx_default) { + while (mca_spml_ucx.aux_refcnt) { + opal_progress(); + } + } + return OSHMEM_SUCCESS; } @@ -802,3 +903,101 @@ int mca_spml_ucx_send(void* buf, return rc; } + +/* this can be called with request==NULL in case of immediate completion */ +static void mca_spml_ucx_put_all_complete_cb(void *request, ucs_status_t status) +{ + if (mca_spml_ucx.async_progress && (--mca_spml_ucx.aux_refcnt == 0)) { + opal_event_evtimer_del(mca_spml_ucx.tick_event); + opal_progress_unregister(spml_ucx_progress_aux_ctx); + } + + if (request != NULL) { + ucp_request_free(request); + } +} + +/* Should be called with AUX lock taken */ +static int mca_spml_ucx_create_aux_ctx(void) +{ + unsigned major = 0; + unsigned minor = 0; + unsigned rel_number = 0; + int rc; + bool rand_dci_supp; + + ucp_get_version(&major, &minor, &rel_number); + rand_dci_supp = UCX_VERSION(major, minor, rel_number) >= UCX_VERSION(1, 6, 0); + + if (rand_dci_supp) { + pthread_mutex_lock(&mca_spml_ucx.ctx_create_mutex); + opal_setenv("UCX_DC_MLX5_TX_POLICY", "rand", 0, &environ); + } + + rc = mca_spml_ucx_ctx_create_common(SHMEM_CTX_PRIVATE, &mca_spml_ucx.aux_ctx); + + if (rand_dci_supp) { + opal_unsetenv("UCX_DC_MLX5_TX_POLICY", &environ); + pthread_mutex_unlock(&mca_spml_ucx.ctx_create_mutex); + } + + return rc; +} + +int mca_spml_ucx_put_all_nb(void *dest, const void *source, size_t size, long *counter) +{ + int my_pe = oshmem_my_proc_id(); + long val = 1; + int peer, dst_pe, rc; + shmem_ctx_t ctx; + struct timeval tv; + void *request; + + mca_spml_ucx_aux_lock(); + if (mca_spml_ucx.async_progress) { + if (mca_spml_ucx.aux_ctx == NULL) { + rc = mca_spml_ucx_create_aux_ctx(); + if (rc != OMPI_SUCCESS) { + mca_spml_ucx_aux_unlock(); + oshmem_shmem_abort(-1); + } + } + + if (mca_spml_ucx.aux_refcnt++ == 0) { + tv.tv_sec = 0; + tv.tv_usec = mca_spml_ucx.async_tick; + opal_event_evtimer_add(mca_spml_ucx.tick_event, &tv); + opal_progress_register(spml_ucx_progress_aux_ctx); + } + ctx = (shmem_ctx_t)mca_spml_ucx.aux_ctx; + } else { + ctx = oshmem_ctx_default; + } + + assert(ctx != NULL); /* make coverity happy */ + + for (peer = 0; peer < oshmem_num_procs(); peer++) { + dst_pe = (peer + my_pe) % oshmem_num_procs(); + rc = mca_spml_ucx_put_nb(ctx, + (void*)((uintptr_t)dest + my_pe * size), + size, + (void*)((uintptr_t)source + dst_pe * size), + dst_pe, NULL); + RUNTIME_CHECK_RC(rc); + + mca_spml_ucx_fence(ctx); + + rc = MCA_ATOMIC_CALL(add(ctx, (void*)counter, val, sizeof(val), dst_pe)); + RUNTIME_CHECK_RC(rc); + } + + request = ucp_worker_flush_nb(((mca_spml_ucx_ctx_t*)ctx)->ucp_worker, 0, + mca_spml_ucx_put_all_complete_cb); + if (!UCS_PTR_IS_PTR(request)) { + mca_spml_ucx_put_all_complete_cb(NULL, UCS_PTR_STATUS(request)); + } + + mca_spml_ucx_aux_unlock(); + + return OSHMEM_SUCCESS; +} diff --git a/oshmem/mca/spml/ucx/spml_ucx.h b/oshmem/mca/spml/ucx/spml_ucx.h index 1b2f0b58d81..95c56622351 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.h +++ b/oshmem/mca/spml/ucx/spml_ucx.h @@ -33,6 +33,7 @@ #include "opal/class/opal_free_list.h" #include "opal/class/opal_list.h" +#include "opal/class/opal_bitmap.h" #include "orte/runtime/orte_globals.h" #include "opal/mca/common/ucx/common_ucx.h" @@ -62,7 +63,7 @@ typedef struct spml_ucx_cached_mkey spml_ucx_cached_mkey_t; struct ucp_peer { ucp_ep_h ucp_conn; - spml_ucx_cached_mkey_t mkeys[MCA_MEMHEAP_SEG_COUNT]; + spml_ucx_cached_mkey_t mkeys[MCA_MEMHEAP_MAX_SEGMENTS]; }; typedef struct ucp_peer ucp_peer_t; @@ -70,18 +71,21 @@ struct mca_spml_ucx_ctx { ucp_worker_h ucp_worker; ucp_peer_t *ucp_peers; long options; + opal_bitmap_t put_op_bitmap; + int *put_proc_indexes; + unsigned put_proc_count; }; typedef struct mca_spml_ucx_ctx mca_spml_ucx_ctx_t; extern mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default; -struct mca_spml_ucx_ctx_list_item { - opal_list_item_t super; - mca_spml_ucx_ctx_t ctx; -}; -typedef struct mca_spml_ucx_ctx_list_item mca_spml_ucx_ctx_list_item_t; +typedef spml_ucx_mkey_t * (*mca_spml_ucx_get_mkey_slow_fn_t)(shmem_ctx_t ctx, int pe, void *va, void **rva); -typedef spml_ucx_mkey_t * (*mca_spml_ucx_get_mkey_slow_fn_t)(int pe, void *va, void **rva); +typedef struct mca_spml_ucx_ctx_array { + int ctxs_count; + int ctxs_num; + mca_spml_ucx_ctx_t **ctxs; +} mca_spml_ucx_ctx_array_t; struct mca_spml_ucx { mca_spml_base_module_t super; @@ -91,13 +95,23 @@ struct mca_spml_ucx { bool enabled; mca_spml_ucx_get_mkey_slow_fn_t get_mkey_slow; char **remote_addrs_tbl; - opal_list_t ctx_list; + mca_spml_ucx_ctx_array_t active_array; + mca_spml_ucx_ctx_array_t idle_array; int priority; /* component priority */ shmem_internal_mutex_t internal_mutex; + pthread_mutex_t ctx_create_mutex; + /* Fields controlling aux context for put_all_nb SPML routine */ + bool async_progress; + int async_tick; + opal_event_base_t *async_event_base; + opal_event_t *tick_event; + mca_spml_ucx_ctx_t *aux_ctx; + pthread_spinlock_t async_lock; + int aux_refcnt; + bool synchronized_quiet; }; typedef struct mca_spml_ucx mca_spml_ucx_t; - extern mca_spml_ucx_t mca_spml_ucx; extern int mca_spml_ucx_enable(bool enable); @@ -117,23 +131,28 @@ extern int mca_spml_ucx_get_nb(shmem_ctx_t ctx, void **handle); extern int mca_spml_ucx_put(shmem_ctx_t ctx, - void* dst_addr, - size_t size, - void* src_addr, - int dst); + void* dst_addr, + size_t size, + void* src_addr, + int dst); extern int mca_spml_ucx_put_nb(shmem_ctx_t ctx, - void* dst_addr, - size_t size, - void* src_addr, - int dst, - void **handle); + void* dst_addr, + size_t size, + void* src_addr, + int dst, + void **handle); extern int mca_spml_ucx_recv(void* buf, size_t size, int src); extern int mca_spml_ucx_send(void* buf, - size_t size, - int dst, - mca_spml_base_put_mode_t mode); + size_t size, + int dst, + mca_spml_base_put_mode_t mode); + +extern int mca_spml_ucx_put_all_nb(void *target, + const void *source, + size_t size, + long *counter); extern sshmem_mkey_t *mca_spml_ucx_register(void* addr, size_t size, @@ -143,7 +162,7 @@ extern int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys); extern void mca_spml_ucx_memuse_hook(void *addr, size_t length); -extern void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id); +extern void mca_spml_ucx_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id); extern void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey); extern void *mca_spml_ucx_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *, int pe); @@ -151,20 +170,45 @@ extern int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs); extern int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs); extern int mca_spml_ucx_fence(shmem_ctx_t ctx); extern int mca_spml_ucx_quiet(shmem_ctx_t ctx); -extern int spml_ucx_progress(void); +extern int spml_ucx_default_progress(void); +extern int spml_ucx_ctx_progress(void); +extern int spml_ucx_progress_aux_ctx(void); +void mca_spml_ucx_async_cb(int fd, short event, void *cbdata); + +int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs); +int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx); + +static inline void mca_spml_ucx_aux_lock(void) +{ + if (mca_spml_ucx.async_progress) { + pthread_spin_lock(&mca_spml_ucx.async_lock); + } +} + +static inline void mca_spml_ucx_aux_unlock(void) +{ + if (mca_spml_ucx.async_progress) { + pthread_spin_unlock(&mca_spml_ucx.async_lock); + } +} + +static void mca_spml_ucx_cache_mkey(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe) +{ + ucp_peer_t *peer; + peer = &(ucx_ctx->ucp_peers[dst_pe]); + mkey_segment_init(&peer->mkeys[segno].super, mkey, segno); +} static inline spml_ucx_mkey_t * -mca_spml_ucx_get_mkey(mca_spml_ucx_ctx_t *ucx_ctx, int pe, void *va, void **rva, mca_spml_ucx_t* module) +mca_spml_ucx_get_mkey(shmem_ctx_t ctx, int pe, void *va, void **rva, mca_spml_ucx_t* module) { spml_ucx_cached_mkey_t *mkey; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; mkey = ucx_ctx->ucp_peers[pe].mkeys; mkey = (spml_ucx_cached_mkey_t *)map_segment_find_va(&mkey->super.super, sizeof(*mkey), va); - if (OPAL_UNLIKELY(NULL == mkey)) { - assert(module->get_mkey_slow); - return module->get_mkey_slow(pe, va, rva); - } + assert(mkey != NULL); *rva = map_segment_va2rva(&mkey->super, va); return &mkey->key; } @@ -187,6 +231,19 @@ static inline int ucx_status_to_oshmem_nb(ucs_status_t status) #endif } +static inline void mca_spml_ucx_remote_op_posted(mca_spml_ucx_ctx_t *ctx, int dst) +{ + if (OPAL_UNLIKELY(mca_spml_ucx.synchronized_quiet)) { + if (!opal_bitmap_is_set_bit(&ctx->put_op_bitmap, dst)) { + ctx->put_proc_indexes[ctx->put_proc_count++] = dst; + opal_bitmap_set_bit(&ctx->put_op_bitmap, dst); + } + } +} + +#define MCA_SPML_UCX_CTXS_ARRAY_SIZE 64 +#define MCA_SPML_UCX_CTXS_ARRAY_INC 64 + END_C_DECLS #endif diff --git a/oshmem/mca/spml/ucx/spml_ucx_component.c b/oshmem/mca/spml/ucx/spml_ucx_component.c index a4a4a26385a..3d29bd4e5d8 100644 --- a/oshmem/mca/spml/ucx/spml_ucx_component.c +++ b/oshmem/mca/spml/ucx/spml_ucx_component.c @@ -24,6 +24,7 @@ #include "orte/util/show_help.h" #include "opal/util/opal_environ.h" +#include "opal/runtime/opal_progress_threads.h" static int mca_spml_ucx_component_register(void); static int mca_spml_ucx_component_open(void); @@ -38,26 +39,25 @@ mca_spml_base_component_2_0_0_t mca_spml_ucx_component = { /* First, the mca_base_component_t struct containing meta information about the component itself */ - { - MCA_SPML_BASE_VERSION_2_0_0, - - "ucx", /* MCA component name */ - OSHMEM_MAJOR_VERSION, /* MCA component major version */ - OSHMEM_MINOR_VERSION, /* MCA component minor version */ - OSHMEM_RELEASE_VERSION, /* MCA component release version */ - mca_spml_ucx_component_open, /* component open */ - mca_spml_ucx_component_close, /* component close */ - NULL, - mca_spml_ucx_component_register + .spmlm_version = { + MCA_SPML_BASE_VERSION_2_0_0, + + .mca_component_name = "ucx", + .mca_component_major_version = OSHMEM_MAJOR_VERSION, + .mca_component_minor_version = OSHMEM_MINOR_VERSION, + .mca_component_release_version = OSHMEM_RELEASE_VERSION, + .mca_open_component = mca_spml_ucx_component_open, + .mca_close_component = mca_spml_ucx_component_close, + .mca_query_component = NULL, + .mca_register_component_params = mca_spml_ucx_component_register }, - { + .spmlm_data = { /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT + .param_field = MCA_BASE_METADATA_PARAM_CHECKPOINT }, - mca_spml_ucx_component_init, /* component init */ - mca_spml_ucx_component_fini /* component finalize */ - + .spmlm_init = mca_spml_ucx_component_init, + .spmlm_finalize = mca_spml_ucx_component_fini }; @@ -91,11 +91,26 @@ static inline void mca_spml_ucx_param_register_string(const char* param_name, storage); } +static inline void mca_spml_ucx_param_register_bool(const char* param_name, + bool default_value, + const char *help_msg, + bool *storage) +{ + *storage = default_value; + (void) mca_base_component_var_register(&mca_spml_ucx_component.spmlm_version, + param_name, + help_msg, + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + storage); +} + static int mca_spml_ucx_component_register(void) { mca_spml_ucx_param_register_int("priority", 21, - "[integer] ucx priority", - &mca_spml_ucx.priority); + "[integer] ucx priority", + &mca_spml_ucx.priority); mca_spml_ucx_param_register_int("num_disconnect", 1, "How may disconnects go in parallel", @@ -105,15 +120,71 @@ static int mca_spml_ucx_component_register(void) "Use non-blocking memory registration for shared heap", &mca_spml_ucx.heap_reg_nb); + mca_spml_ucx_param_register_bool("async_progress", 0, + "Enable asynchronous progress thread", + &mca_spml_ucx.async_progress); + + mca_spml_ucx_param_register_int("async_tick_usec", 3000, + "Asynchronous progress tick granularity (in usec)", + &mca_spml_ucx.async_tick); + + mca_spml_ucx_param_register_bool("synchronized_quiet", 0, + "Use synchronized quiet on shmem_quiet or shmem_barrier_all operations", + &mca_spml_ucx.synchronized_quiet); + + opal_common_ucx_mca_var_register(&mca_spml_ucx_component.spmlm_version); + return OSHMEM_SUCCESS; } -int spml_ucx_progress(void) +int spml_ucx_ctx_progress(void) +{ + int i; + for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { + ucp_worker_progress(mca_spml_ucx.active_array.ctxs[i]->ucp_worker); + } + return 1; +} + +int spml_ucx_default_progress(void) { ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker); return 1; } +int spml_ucx_progress_aux_ctx(void) +{ + unsigned count; + + if (OPAL_UNLIKELY(!mca_spml_ucx.aux_ctx)) { + return 0; + } + + if (pthread_spin_trylock(&mca_spml_ucx.async_lock)) { + return 0; + } + + count = ucp_worker_progress(mca_spml_ucx.aux_ctx->ucp_worker); + pthread_spin_unlock(&mca_spml_ucx.async_lock); + + return count; +} + +void mca_spml_ucx_async_cb(int fd, short event, void *cbdata) +{ + int count = 0; + + if (pthread_spin_trylock(&mca_spml_ucx.async_lock)) { + return; + } + + do { + count = ucp_worker_progress(mca_spml_ucx.aux_ctx->ucp_worker); + } while (count); + + pthread_spin_unlock(&mca_spml_ucx.async_lock); +} + static int mca_spml_ucx_component_open(void) { return OSHMEM_SUCCESS; @@ -141,8 +212,12 @@ static int spml_ucx_init(void) opal_common_ucx_mca_register(); memset(¶ms, 0, sizeof(params)); - params.field_mask = UCP_PARAM_FIELD_FEATURES|UCP_PARAM_FIELD_ESTIMATED_NUM_EPS|UCP_PARAM_FIELD_MT_WORKERS_SHARED; - params.features = UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64; + params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_ESTIMATED_NUM_EPS | + UCP_PARAM_FIELD_MT_WORKERS_SHARED; + params.features = UCP_FEATURE_RMA | + UCP_FEATURE_AMO32 | + UCP_FEATURE_AMO64; params.estimated_num_eps = ompi_proc_world_size(); if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE) { params.mt_workers_shared = 1; @@ -150,6 +225,11 @@ static int spml_ucx_init(void) params.mt_workers_shared = 0; } +#if HAVE_DECL_UCP_PARAM_FIELD_ESTIMATED_NUM_PPN + params.estimated_num_ppn = opal_process_info.num_local_peers + 1; + params.field_mask |= UCP_PARAM_FIELD_ESTIMATED_NUM_PPN; +#endif + err = ucp_init(¶ms, ucp_config, &mca_spml_ucx.ucp_context); ucp_config_release(ucp_config); if (UCS_OK != err) { @@ -167,8 +247,15 @@ static int spml_ucx_init(void) oshmem_mpi_thread_provided = SHMEM_THREAD_SINGLE; } - OBJ_CONSTRUCT(&(mca_spml_ucx.ctx_list), opal_list_t); + mca_spml_ucx.active_array.ctxs_count = mca_spml_ucx.idle_array.ctxs_count = 0; + mca_spml_ucx.active_array.ctxs_num = mca_spml_ucx.idle_array.ctxs_num = MCA_SPML_UCX_CTXS_ARRAY_SIZE; + mca_spml_ucx.active_array.ctxs = calloc(mca_spml_ucx.active_array.ctxs_num, + sizeof(mca_spml_ucx_ctx_t *)); + mca_spml_ucx.idle_array.ctxs = calloc(mca_spml_ucx.idle_array.ctxs_num, + sizeof(mca_spml_ucx_ctx_t *)); + SHMEM_MUTEX_INIT(mca_spml_ucx.internal_mutex); + pthread_mutex_init(&mca_spml_ucx.ctx_create_mutex, NULL); wkr_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE) { @@ -191,6 +278,22 @@ static int spml_ucx_init(void) oshmem_mpi_thread_provided = SHMEM_THREAD_SINGLE; } + if (mca_spml_ucx.async_progress) { + pthread_spin_init(&mca_spml_ucx.async_lock, 0); + mca_spml_ucx.async_event_base = opal_progress_thread_init(NULL); + if (NULL == mca_spml_ucx.async_event_base) { + SPML_UCX_ERROR("failed to init async progress thread"); + return OSHMEM_ERROR; + } + + mca_spml_ucx.tick_event = opal_event_alloc(); + opal_event_set(mca_spml_ucx.async_event_base, mca_spml_ucx.tick_event, + -1, EV_PERSIST, mca_spml_ucx_async_cb, NULL); + } + + mca_spml_ucx.aux_ctx = NULL; + mca_spml_ucx.aux_refcnt = 0; + oshmem_ctx_default = (shmem_ctx_t) &mca_spml_ucx_ctx_default; return OSHMEM_SUCCESS; @@ -216,20 +319,114 @@ mca_spml_ucx_component_init(int* priority, return &mca_spml_ucx.super; } +static void _ctx_cleanup(mca_spml_ucx_ctx_t *ctx) +{ + int i, j, nprocs = oshmem_num_procs(); + opal_common_ucx_del_proc_t *del_procs; + + del_procs = malloc(sizeof(*del_procs) * nprocs); + + for (i = 0; i < nprocs; ++i) { + for (j = 0; j < memheap_map->n_segments; j++) { + if (ctx->ucp_peers[i].mkeys[j].key.rkey != NULL) { + ucp_rkey_destroy(ctx->ucp_peers[i].mkeys[j].key.rkey); + } + } + + del_procs[i].ep = ctx->ucp_peers[i].ucp_conn; + del_procs[i].vpid = i; + ctx->ucp_peers[i].ucp_conn = NULL; + } + + opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(), + mca_spml_ucx.num_disconnect, + ctx->ucp_worker); + free(del_procs); + mca_spml_ucx_clear_put_op_mask(ctx); + free(ctx->ucp_peers); +} + static int mca_spml_ucx_component_fini(void) { - opal_progress_unregister(spml_ucx_progress); - - if (mca_spml_ucx_ctx_default.ucp_worker) { - ucp_worker_destroy(mca_spml_ucx_ctx_default.ucp_worker); + int fenced = 0, i; + int ret = OSHMEM_SUCCESS; + + opal_progress_unregister(spml_ucx_default_progress); + if (mca_spml_ucx.active_array.ctxs_count) { + opal_progress_unregister(spml_ucx_ctx_progress); } + if(!mca_spml_ucx.enabled) return OSHMEM_SUCCESS; /* never selected.. return success.. */ + if (mca_spml_ucx.async_progress) { + opal_progress_thread_finalize(NULL); + opal_event_evtimer_del(mca_spml_ucx.tick_event); + if (mca_spml_ucx.aux_ctx != NULL) { + _ctx_cleanup(mca_spml_ucx.aux_ctx); + } + opal_progress_unregister(spml_ucx_progress_aux_ctx); + pthread_spin_destroy(&mca_spml_ucx.async_lock); + } + + /* delete context objects from list */ + for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { + _ctx_cleanup(mca_spml_ucx.active_array.ctxs[i]); + } + + for (i = 0; i < mca_spml_ucx.idle_array.ctxs_count; i++) { + _ctx_cleanup(mca_spml_ucx.idle_array.ctxs[i]); + } + + + ret = opal_common_ucx_mca_pmix_fence_nb(&fenced); + if (OPAL_SUCCESS != ret) { + return ret; + } + + while (!fenced) { + for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { + ucp_worker_progress(mca_spml_ucx.active_array.ctxs[i]->ucp_worker); + } + + for (i = 0; i < mca_spml_ucx.idle_array.ctxs_count; i++) { + ucp_worker_progress(mca_spml_ucx.idle_array.ctxs[i]->ucp_worker); + } + + ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker); + + if (mca_spml_ucx.aux_ctx != NULL) { + ucp_worker_progress(mca_spml_ucx.aux_ctx->ucp_worker); + } + } + + /* delete all workers */ + for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { + ucp_worker_destroy(mca_spml_ucx.active_array.ctxs[i]->ucp_worker); + free(mca_spml_ucx.active_array.ctxs[i]); + } + + for (i = 0; i < mca_spml_ucx.idle_array.ctxs_count; i++) { + ucp_worker_destroy(mca_spml_ucx.idle_array.ctxs[i]->ucp_worker); + free(mca_spml_ucx.idle_array.ctxs[i]); + } + + if (mca_spml_ucx_ctx_default.ucp_worker) { + ucp_worker_destroy(mca_spml_ucx_ctx_default.ucp_worker); + } + + if (mca_spml_ucx.aux_ctx != NULL) { + ucp_worker_destroy(mca_spml_ucx.aux_ctx->ucp_worker); + } + mca_spml_ucx.enabled = false; /* not anymore */ - OBJ_DESTRUCT(&(mca_spml_ucx.ctx_list)); + free(mca_spml_ucx.active_array.ctxs); + free(mca_spml_ucx.idle_array.ctxs); + free(mca_spml_ucx.aux_ctx); + SHMEM_MUTEX_DESTROY(mca_spml_ucx.internal_mutex); + pthread_mutex_destroy(&mca_spml_ucx.ctx_create_mutex); if (mca_spml_ucx.ucp_context) { ucp_cleanup(mca_spml_ucx.ucp_context); diff --git a/oshmem/mca/sshmem/base/base.h b/oshmem/mca/sshmem/base/base.h index ea44ff50964..9db5b06916a 100644 --- a/oshmem/mca/sshmem/base/base.h +++ b/oshmem/mca/sshmem/base/base.h @@ -31,7 +31,7 @@ extern char* mca_sshmem_base_backing_file_dir; OSHMEM_DECLSPEC int mca_sshmem_segment_create(map_segment_t *ds_buf, const char *file_name, - size_t size); + size_t size, long hint); OSHMEM_DECLSPEC void * mca_sshmem_segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey); diff --git a/oshmem/mca/sshmem/base/sshmem_base_wrappers.c b/oshmem/mca/sshmem/base/sshmem_base_wrappers.c index d70490cb4e1..23802cad457 100644 --- a/oshmem/mca/sshmem/base/sshmem_base_wrappers.c +++ b/oshmem/mca/sshmem/base/sshmem_base_wrappers.c @@ -18,13 +18,13 @@ int mca_sshmem_segment_create(map_segment_t *ds_buf, const char *file_name, - size_t size) + size_t size, long hint) { if (!mca_sshmem_base_selected) { return OSHMEM_ERROR; } - return mca_sshmem_base_module->segment_create(ds_buf, file_name, size); + return mca_sshmem_base_module->segment_create(ds_buf, file_name, size, hint); } void * diff --git a/oshmem/mca/sshmem/mmap/sshmem_mmap_module.c b/oshmem/mca/sshmem/mmap/sshmem_mmap_module.c index 9a303221075..dab1e2d1bc8 100644 --- a/oshmem/mca/sshmem/mmap/sshmem_mmap_module.c +++ b/oshmem/mca/sshmem/mmap/sshmem_mmap_module.c @@ -62,7 +62,7 @@ module_init(void); static int segment_create(map_segment_t *ds_buf, const char *file_name, - size_t size); + size_t size, long hint); static void * segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey); @@ -111,13 +111,17 @@ module_finalize(void) static int segment_create(map_segment_t *ds_buf, const char *file_name, - size_t size) + size_t size, long hint) { int rc = OSHMEM_SUCCESS; void *addr = NULL; assert(ds_buf); + if (hint) { + return OSHMEM_ERR_NOT_IMPLEMENTED; + } + /* init the contents of map_segment_t */ shmem_ds_reset(ds_buf); diff --git a/oshmem/mca/sshmem/sshmem.h b/oshmem/mca/sshmem/sshmem.h index a2b570aab8f..8ba10574928 100644 --- a/oshmem/mca/sshmem/sshmem.h +++ b/oshmem/mca/sshmem/sshmem.h @@ -83,14 +83,19 @@ typedef int * @param file_name file_name unique string identifier that must be a valid, * writable path (IN). * + * @param address address to attach the segment at, or 0 allocate + * any available address in the process. + * * @param size size of the shared memory segment. * + * @param hint hint of the shared memory segment. + * * @return OSHMEM_SUCCESS on success. */ typedef int (*mca_sshmem_base_module_segment_create_fn_t)(map_segment_t *ds_buf, const char *file_name, - size_t size); + size_t size, long hint); /** * attach to an existing shared memory segment initialized by segment_create. diff --git a/oshmem/mca/sshmem/sshmem_types.h b/oshmem/mca/sshmem/sshmem_types.h index ccdf8995b5f..4e1d937901a 100644 --- a/oshmem/mca/sshmem/sshmem_types.h +++ b/oshmem/mca/sshmem/sshmem_types.h @@ -107,6 +107,8 @@ typedef struct mkey_segment { void *rva_base; /* base va on remote pe */ } mkey_segment_t; +typedef struct segment_allocator segment_allocator_t; + typedef struct map_segment { map_base_segment_t super; sshmem_mkey_t **mkeys_cache; /* includes remote segment bases in va_base */ @@ -115,10 +117,17 @@ typedef struct map_segment { int seg_id; size_t seg_size; /* length of the segment */ segment_type_t type; /* type of the segment */ + long alloc_hints; /* allocation hints this segment supports */ void *context; /* allocator can use this field to store its own private data */ + segment_allocator_t *allocator; /* segment-specific allocator */ } map_segment_t; +struct segment_allocator { + int (*realloc)(map_segment_t*, size_t newsize, void *, void **); + int (*free)(map_segment_t*, void*); +}; + END_C_DECLS #endif /* MCA_SSHMEM_TYPES_H */ diff --git a/oshmem/mca/sshmem/sysv/sshmem_sysv_module.c b/oshmem/mca/sshmem/sysv/sshmem_sysv_module.c index a1d112da7d9..fe939df35d1 100644 --- a/oshmem/mca/sshmem/sysv/sshmem_sysv_module.c +++ b/oshmem/mca/sshmem/sysv/sshmem_sysv_module.c @@ -60,7 +60,7 @@ module_init(void); static int segment_create(map_segment_t *ds_buf, const char *file_name, - size_t size); + size_t size, long hint); static void * segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey); @@ -109,7 +109,7 @@ module_finalize(void) static int segment_create(map_segment_t *ds_buf, const char *file_name, - size_t size) + size_t size, long hint) { int rc = OSHMEM_SUCCESS; void *addr = NULL; @@ -119,6 +119,10 @@ segment_create(map_segment_t *ds_buf, assert(ds_buf); + if (hint) { + return OSHMEM_ERR_NOT_IMPLEMENTED; + } + /* init the contents of map_segment_t */ shmem_ds_reset(ds_buf); diff --git a/oshmem/mca/sshmem/ucx/Makefile.am b/oshmem/mca/sshmem/ucx/Makefile.am index bf3a08b547a..ce37cd0e906 100644 --- a/oshmem/mca/sshmem/ucx/Makefile.am +++ b/oshmem/mca/sshmem/ucx/Makefile.am @@ -15,7 +15,8 @@ AM_CPPFLAGS = $(sshmem_ucx_CPPFLAGS) sources = \ sshmem_ucx.h \ sshmem_ucx_component.c \ - sshmem_ucx_module.c + sshmem_ucx_module.c \ + sshmem_ucx_shadow.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/oshmem/mca/sshmem/ucx/configure.m4 b/oshmem/mca/sshmem/ucx/configure.m4 index aafa4f4e029..7448b2dadf4 100644 --- a/oshmem/mca/sshmem/ucx/configure.m4 +++ b/oshmem/mca/sshmem/ucx/configure.m4 @@ -22,6 +22,40 @@ AC_DEFUN([MCA_oshmem_sshmem_ucx_CONFIG],[ [$1], [$2]) + # Check for UCX device memory allocation support + save_LDFLAGS="$LDFLAGS" + save_LIBS="$LIBS" + save_CPPFLAGS="$CPPFLAGS" + + alloc_dm_LDFLAGS=" -L$ompi_check_ucx_libdir/ucx" + alloc_dm_LIBS=" -luct_ib" + CPPFLAGS+=" $sshmem_ucx_CPPFLAGS" + LDFLAGS+=" $sshmem_ucx_LDFLAGS $alloc_dm_LDFLAGS" + LIBS+=" $sshmem_ucx_LIBS $alloc_dm_LIBS" + + AC_LANG_PUSH([C]) + AC_LINK_IFELSE([AC_LANG_PROGRAM( + [[ + #include + #include + ]], + [[ + uct_md_h md = ucp_context_find_tl_md((ucp_context_h)NULL, ""); + (void)uct_ib_md_alloc_device_mem(md, NULL, NULL, 0, "", NULL); + uct_ib_md_release_device_mem(NULL); + ]])], + [ + AC_MSG_NOTICE([UCX device memory allocation is supported]) + AC_DEFINE([HAVE_UCX_DEVICE_MEM], [1], [Support for device memory allocation]) + sshmem_ucx_LIBS+=" $alloc_dm_LIBS" + sshmem_ucx_LDFLAGS+=" $alloc_dm_LDFLAGS" + ], + [AC_MSG_NOTICE([UCX device memory allocation is not supported])]) + AC_LANG_POP([C]) + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" # substitute in the things needed to build ucx AC_SUBST([sshmem_ucx_CFLAGS]) diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx.h b/oshmem/mca/sshmem/ucx/sshmem_ucx.h index 0b625fcc469..fa264b40f42 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx.h +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx.h @@ -15,8 +15,12 @@ #include "oshmem/mca/sshmem/sshmem.h" +#include + BEGIN_C_DECLS +typedef struct sshmem_ucx_shadow_allocator sshmem_ucx_shadow_allocator_t; + /** * globally exported variable to hold the ucx component. */ @@ -30,11 +34,35 @@ typedef struct mca_sshmem_ucx_component_t { OSHMEM_MODULE_DECLSPEC extern mca_sshmem_ucx_component_t mca_sshmem_ucx_component; +typedef struct mca_sshmem_ucx_segment_context { + void *dev_mem; + sshmem_ucx_shadow_allocator_t *shadow_allocator; + ucp_mem_h ucp_memh; +} mca_sshmem_ucx_segment_context_t; + typedef struct mca_sshmem_ucx_module_t { mca_sshmem_base_module_t super; } mca_sshmem_ucx_module_t; extern mca_sshmem_ucx_module_t mca_sshmem_ucx_module; +sshmem_ucx_shadow_allocator_t *sshmem_ucx_shadow_create(unsigned count); +void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator); +int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned *index); + +/* Reallocate existing allocated buffer. If possible - used inplace + * reallocation. + * Parameter 'inplace' - out, in case if zero - new buffer was allocated + * (inplace is not possible), user should remove original buffer after data + * is copied, else (if inplace == 0) - no additional action required */ +int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned old_index, unsigned *index, + int *inplace); +int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index); +unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index); + END_C_DECLS #endif /* MCA_SHMEM_UCX_EXPORT_H */ diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c index 44f73743316..52b4d560626 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c @@ -18,12 +18,24 @@ #include "oshmem/proc/proc.h" #include "oshmem/mca/sshmem/sshmem.h" +#include "oshmem/include/shmemx.h" #include "oshmem/mca/sshmem/base/base.h" #include "oshmem/util/oshmem_util.h" #include "oshmem/mca/spml/ucx/spml_ucx.h" #include "sshmem_ucx.h" +//#include + +#if HAVE_UCX_DEVICE_MEM +#include +#include +#endif + +#define ALLOC_ELEM_SIZE sizeof(uint64_t) +#define min(a,b) ((a) < (b) ? (a) : (b)) +#define max(a,b) ((a) > (b) ? (a) : (b)) + /* ////////////////////////////////////////////////////////////////////////// */ /*local functions */ /* local functions */ @@ -33,7 +45,7 @@ module_init(void); static int segment_create(map_segment_t *ds_buf, const char *file_name, - size_t size); + size_t size, long hint); static void * segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey); @@ -47,6 +59,11 @@ segment_unlink(map_segment_t *ds_buf); static int module_finalize(void); +static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, + void* old_ptr, void** new_ptr); + +static int sshmem_ucx_memheap_free(map_segment_t *s, void* ptr); + /* * ucx shmem module */ @@ -79,13 +96,18 @@ module_finalize(void) /* ////////////////////////////////////////////////////////////////////////// */ +static segment_allocator_t sshmem_ucx_allocator = { + .realloc = sshmem_ucx_memheap_realloc, + .free = sshmem_ucx_memheap_free +}; + static int -segment_create(map_segment_t *ds_buf, - const char *file_name, - size_t size) +segment_create_internal(map_segment_t *ds_buf, void *address, size_t size, + unsigned flags, long hint, void *dev_mem) { + mca_sshmem_ucx_segment_context_t *ctx; int rc = OSHMEM_SUCCESS; - mca_spml_ucx_t *spml = (mca_spml_ucx_t *)mca_spml.self; + mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self; ucp_mem_map_params_t mem_map_params; ucp_mem_h mem_h; ucs_status_t status; @@ -99,25 +121,51 @@ segment_create(map_segment_t *ds_buf, UCP_MEM_MAP_PARAM_FIELD_LENGTH | UCP_MEM_MAP_PARAM_FIELD_FLAGS; - mem_map_params.address = (void *)mca_sshmem_base_start_address; + mem_map_params.address = address; mem_map_params.length = size; - mem_map_params.flags = UCP_MEM_MAP_ALLOCATE|UCP_MEM_MAP_FIXED; - - if (spml->heap_reg_nb) { - mem_map_params.flags |= UCP_MEM_MAP_NONBLOCK; - } + mem_map_params.flags = flags; status = ucp_mem_map(spml->ucp_context, &mem_map_params, &mem_h); if (UCS_OK != status) { + SSHMEM_ERROR("ucp_mem_map() failed: %s\n", ucs_status_string(status)); rc = OSHMEM_ERROR; goto out; } - ds_buf->super.va_base = mem_map_params.address; + if (!(flags & UCP_MEM_MAP_FIXED)) { + /* Memory was allocated at an arbitrary address; obtain it */ + ucp_mem_attr_t mem_attr; + mem_attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS; + status = ucp_mem_query(mem_h, &mem_attr); + if (status != UCS_OK) { + SSHMEM_ERROR("ucp_mem_query() failed: %s\n", ucs_status_string(status)); + ucp_mem_unmap(spml->ucp_context, mem_h); + rc = OSHMEM_ERROR; + goto out; + } + + ds_buf->super.va_base = mem_attr.address; + } else { + ds_buf->super.va_base = mem_map_params.address; + } + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + ucp_mem_unmap(spml->ucp_context, mem_h); + rc = OSHMEM_ERR_OUT_OF_RESOURCE; + goto out; + } + ds_buf->seg_size = size; ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size); - ds_buf->context = mem_h; + ds_buf->context = ctx; ds_buf->type = MAP_SEGMENT_ALLOC_UCX; + ds_buf->alloc_hints = hint; + ctx->ucp_memh = mem_h; + ctx->dev_mem = dev_mem; + if (hint) { + ds_buf->allocator = &sshmem_ucx_allocator; + } out: OPAL_OUTPUT_VERBOSE( @@ -132,6 +180,83 @@ segment_create(map_segment_t *ds_buf, return rc; } +#if HAVE_UCX_DEVICE_MEM +static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size, + void **address_p) +{ + uct_ib_device_mem_h dev_mem = NULL; + ucs_status_t status; + uct_md_h uct_md; + void *address; + size_t length; + + uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5"); + if (uct_md == NULL) { + SSHMEM_VERBOSE(1, "ucp_context_find_tl_md() returned NULL\n"); + return NULL; + } + + /* If found a matching memory domain, allocate device memory on it */ + length = size; + address = NULL; + status = uct_ib_md_alloc_device_mem(uct_md, &length, &address, + UCT_MD_MEM_ACCESS_ALL, "sshmem_seg", + &dev_mem); + if (status != UCS_OK) { + /* If could not allocate device memory - fallback to mmap (since some + * PEs in the job may succeed and while others failed */ + SSHMEM_VERBOSE(1, "uct_ib_md_alloc_dm() failed: %s\n", + ucs_status_string(status)); + return NULL; + } + + SSHMEM_VERBOSE(3, "uct_ib_md_alloc_dm() returned address %p\n", address); + *address_p = address; + return dev_mem; +} +#endif + +static int +segment_create(map_segment_t *ds_buf, + const char *file_name, + size_t size, long hint) +{ + mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self; + unsigned flags; + int ret; + +#if HAVE_UCX_DEVICE_MEM + if (hint & SHMEM_HINT_DEVICE_NIC_MEM) { + if (size > UINT_MAX) { + return OSHMEM_ERR_BAD_PARAM; + } + + void *dev_mem_address; + uct_ib_device_mem_h dev_mem = alloc_device_mem(spml, size, + &dev_mem_address); + if (dev_mem != NULL) { + ret = segment_create_internal(ds_buf, dev_mem_address, size, 0, + hint, dev_mem); + if (ret == OSHMEM_SUCCESS) { + return OSHMEM_SUCCESS; + } else if (dev_mem != NULL) { + uct_ib_md_release_device_mem(dev_mem); + /* fallback to regular allocation */ + } + } + } +#endif + + flags = UCP_MEM_MAP_ALLOCATE | (spml->heap_reg_nb ? UCP_MEM_MAP_NONBLOCK : 0); + if (hint) { + return segment_create_internal(ds_buf, NULL, size, flags, hint, NULL); + } else { + return segment_create_internal(ds_buf, mca_sshmem_base_start_address, + size, flags | UCP_MEM_MAP_FIXED, hint, + NULL); + } +} + static void * segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey) { @@ -168,10 +293,22 @@ static int segment_unlink(map_segment_t *ds_buf) { mca_spml_ucx_t *spml = (mca_spml_ucx_t *)mca_spml.self; + mca_sshmem_ucx_segment_context_t *ctx = ds_buf->context; - assert(ds_buf); + if (ctx->shadow_allocator) { + sshmem_ucx_shadow_destroy(ctx->shadow_allocator); + } - ucp_mem_unmap(spml->ucp_context, (ucp_mem_h)ds_buf->context); + ucp_mem_unmap(spml->ucp_context, ctx->ucp_memh); + +#if HAVE_UCX_DEVICE_MEM + if (ctx->dev_mem) { + uct_ib_md_release_device_mem(ctx->dev_mem); + } +#endif + + ds_buf->context = NULL; + free(ctx); OPAL_OUTPUT_VERBOSE( (70, oshmem_sshmem_base_framework.framework_output, @@ -188,3 +325,86 @@ segment_unlink(map_segment_t *ds_buf) return OSHMEM_SUCCESS; } +static void *sshmem_ucx_memheap_index2ptr(map_segment_t *s, unsigned index) +{ + return (char*)s->super.va_base + (index * ALLOC_ELEM_SIZE); +} + +static unsigned sshmem_ucx_memheap_ptr2index(map_segment_t *s, void *ptr) +{ + return ((char*)ptr - (char*)s->super.va_base) / ALLOC_ELEM_SIZE; +} + +static void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size) +{ + const size_t count = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); + uint64_t *dst64 = (uint64_t*)dst; + uint64_t *src64 = (uint64_t*)src; + size_t i; + + for (i = 0; i < count; ++i) { + *(dst64++) = *(src64++); + } + opal_atomic_wmb(); +} + +static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, + void* old_ptr, void** new_ptr) +{ + mca_sshmem_ucx_segment_context_t *ctx = s->context; + unsigned alloc_count, index, old_index, old_alloc_count; + int res; + int inplace; + + if (size > s->seg_size) { + return OSHMEM_ERR_OUT_OF_RESOURCE; + } + + /* create allocator on demand */ + if (!ctx->shadow_allocator) { + ctx->shadow_allocator = sshmem_ucx_shadow_create(s->seg_size); + if (!ctx->shadow_allocator) { + return OSHMEM_ERR_OUT_OF_RESOURCE; + } + } + + /* Allocate new element. Zero-size allocation should still return a unique + * pointer, so allocate 1 byte */ + alloc_count = max((size + ALLOC_ELEM_SIZE - 1) / ALLOC_ELEM_SIZE, 1); + + if (!old_ptr) { + res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index); + } else { + old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr); + res = sshmem_ucx_shadow_realloc(ctx->shadow_allocator, alloc_count, + old_index, &index, &inplace); + } + + if (res != OSHMEM_SUCCESS) { + return res; + } + + *new_ptr = sshmem_ucx_memheap_index2ptr(s, index); + + /* Copy to new segment and release old*/ + if (old_ptr && !inplace) { + old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, old_index); + sshmem_ucx_memheap_wordcopy(*new_ptr, old_ptr, + min(size, old_alloc_count * ALLOC_ELEM_SIZE)); + sshmem_ucx_shadow_free(ctx->shadow_allocator, old_index); + } + + return OSHMEM_SUCCESS; +} + +static int sshmem_ucx_memheap_free(map_segment_t *s, void* ptr) +{ + mca_sshmem_ucx_segment_context_t *ctx = s->context; + + if (!ptr) { + return OSHMEM_SUCCESS; + } + + return sshmem_ucx_shadow_free(ctx->shadow_allocator, + sshmem_ucx_memheap_ptr2index(s, ptr)); +} diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c new file mode 100644 index 00000000000..06922c3e1b7 --- /dev/null +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "oshmem_config.h" + +#include "oshmem/mca/sshmem/sshmem.h" +#include "oshmem/include/shmemx.h" +#include "oshmem/mca/sshmem/base/base.h" + +#include "sshmem_ucx.h" + +#define SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE 0x1 + +typedef struct sshmem_ucx_shadow_alloc_elem { + unsigned flags; + unsigned block_size; +} sshmem_ucx_shadow_alloc_elem_t; + +struct sshmem_ucx_shadow_allocator { + size_t num_elems; + sshmem_ucx_shadow_alloc_elem_t elems[]; +}; + +static int sshmem_ucx_shadow_is_free(sshmem_ucx_shadow_alloc_elem_t *elem) +{ + return elem->flags & SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE; +} + +static void sshmem_ucx_shadow_set_elem(sshmem_ucx_shadow_alloc_elem_t *elem, + unsigned flags, unsigned block_size) +{ + elem->flags = flags; + elem->block_size = block_size; +} + +sshmem_ucx_shadow_allocator_t *sshmem_ucx_shadow_create(unsigned count) +{ + sshmem_ucx_shadow_allocator_t *allocator; + + allocator = calloc(1, sizeof(*allocator) + + count * sizeof(*allocator->elems)); + if (allocator) { + /* initialization: set initial element to the whole buffer */ + sshmem_ucx_shadow_set_elem(&allocator->elems[0], + SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, count); + allocator->num_elems = count; + } + + return allocator; +} + +void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator) +{ + free(allocator); /* no leak check. TODO add leak warnings/debug */ +} + +int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned *index) +{ + sshmem_ucx_shadow_alloc_elem_t *end = &allocator->elems[allocator->num_elems]; + sshmem_ucx_shadow_alloc_elem_t *elem; + + assert(count > 0); + + for (elem = &allocator->elems[0]; elem < end; elem += elem->block_size) { + if (sshmem_ucx_shadow_is_free(elem) && (elem->block_size >= count)) { + /* found suitable free element */ + if (elem->block_size > count) { + /* create new 'free' element for tail of current buffer */ + sshmem_ucx_shadow_set_elem(elem + count, + SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, + elem->block_size - count); + } + + /* set the size and flags of the allocated element */ + sshmem_ucx_shadow_set_elem(elem, 0, count); + *index = elem - &allocator->elems[0]; + return OSHMEM_SUCCESS; + } + } + + return OSHMEM_ERR_OUT_OF_RESOURCE; +} + +static void sshmem_ucx_shadow_merge_blocks(sshmem_ucx_shadow_allocator_t *allocator) +{ + sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[0]; + sshmem_ucx_shadow_alloc_elem_t *end = &allocator->elems[allocator->num_elems]; + sshmem_ucx_shadow_alloc_elem_t *next_elem; + + while ( (next_elem = (elem + elem->block_size)) < end) { + if (sshmem_ucx_shadow_is_free(elem) && sshmem_ucx_shadow_is_free(next_elem)) { + /* current & next elements are free, should be merged */ + elem->block_size += next_elem->block_size; + /* clean element which is merged */ + sshmem_ucx_shadow_set_elem(next_elem, 0, 0); + } else { + elem = next_elem; + } + } +} + + + +int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned old_index, unsigned *index, + int *inplace) +{ + sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[old_index]; + unsigned old_count = elem->block_size; + sshmem_ucx_shadow_alloc_elem_t *end; + sshmem_ucx_shadow_alloc_elem_t *next; + + assert(count > 0); + assert(!sshmem_ucx_shadow_is_free(elem)); + + *inplace = 1; + + if (count == old_count) { + *index = old_index; + return OSHMEM_SUCCESS; + } + + if (count < old_count) { + /* requested block is shorter than allocated block + * then just cut current buffer */ + sshmem_ucx_shadow_set_elem(elem + count, + SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, + elem->block_size - count); + elem->block_size = count; + *index = old_index; + sshmem_ucx_shadow_merge_blocks(allocator); + return OSHMEM_SUCCESS; + } + + assert(count > old_count); + + end = &allocator->elems[allocator->num_elems]; + next = &elem[old_count]; + /* try to check if next element is free & has enough length */ + if ((next < end) && /* non-last element? */ + sshmem_ucx_shadow_is_free(next) && /* next is free */ + (old_count + next->block_size >= count)) + { + assert(elem < next); + assert(elem + count > next); + assert(elem + count <= end); + assert(next + next->block_size <= end); + + if (old_count + next->block_size > count) { + sshmem_ucx_shadow_set_elem(elem + count, SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, + old_count + next->block_size - count); + } + + sshmem_ucx_shadow_set_elem(next, 0, 0); + elem->block_size = count; + *index = old_index; + return OSHMEM_SUCCESS; + } + + *inplace = 0; + return sshmem_ucx_shadow_alloc(allocator, count, index); +} + +int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index) +{ + sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[index]; + + elem->flags |= SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE; + sshmem_ucx_shadow_merge_blocks(allocator); + return OSHMEM_SUCCESS; +} + +unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index) +{ + sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[index]; + + assert(!sshmem_ucx_shadow_is_free(elem)); + return elem->block_size; +} diff --git a/oshmem/runtime/oshmem_shmem_init.c b/oshmem/runtime/oshmem_shmem_init.c index d1a187bf4d3..f1115810045 100644 --- a/oshmem/runtime/oshmem_shmem_init.c +++ b/oshmem/runtime/oshmem_shmem_init.c @@ -107,7 +107,7 @@ MPI_Comm oshmem_comm_world = {0}; opal_thread_t *oshmem_mpi_main_thread = NULL; -shmem_internal_mutex_t shmem_internal_mutex_alloc = {0}; +shmem_internal_mutex_t shmem_internal_mutex_alloc = {{0}}; shmem_ctx_t oshmem_ctx_default = NULL; diff --git a/oshmem/runtime/runtime.h b/oshmem/runtime/runtime.h index 737def7e255..28f22f3eab7 100644 --- a/oshmem/runtime/runtime.h +++ b/oshmem/runtime/runtime.h @@ -74,16 +74,6 @@ OSHMEM_DECLSPEC extern shmem_ctx_t oshmem_ctx_default; } while (0) -/* - * SHMEM_Init_thread constants - */ -enum { - SHMEM_THREAD_SINGLE, - SHMEM_THREAD_FUNNELED, - SHMEM_THREAD_SERIALIZED, - SHMEM_THREAD_MULTIPLE -}; - /** Bitflags to be used for the modex exchange for the various thread * levels. Required to support heterogeneous environments */ #define OSHMEM_THREADLEVEL_SINGLE_BF 0x00000001 @@ -210,6 +200,13 @@ OSHMEM_DECLSPEC int oshmem_shmem_register_params(void); RUNTIME_CHECK_ERROR("Required address %p is not in symmetric space\n", ((void*)x)); \ oshmem_shmem_abort(-1); \ } +/* Check if address is in symmetric space or size is zero */ +#define RUNTIME_CHECK_ADDR_SIZE(x,s) \ + if (OPAL_UNLIKELY((s) && !MCA_MEMHEAP_CALL(is_symmetric_addr((x))))) \ + { \ + RUNTIME_CHECK_ERROR("Required address %p is not in symmetric space\n", ((void*)x)); \ + oshmem_shmem_abort(-1); \ + } #define RUNTIME_CHECK_WITH_MEMHEAP_SIZE(x) \ if (OPAL_UNLIKELY((long)(x) > MCA_MEMHEAP_CALL(size))) \ { \ @@ -222,6 +219,7 @@ OSHMEM_DECLSPEC int oshmem_shmem_register_params(void); #define RUNTIME_CHECK_INIT() #define RUNTIME_CHECK_PE(x) #define RUNTIME_CHECK_ADDR(x) +#define RUNTIME_CHECK_ADDR_SIZE(x,s) #define RUNTIME_CHECK_WITH_MEMHEAP_SIZE(x) #endif /* OSHMEM_PARAM_CHECK */ diff --git a/oshmem/shmem/c/profile/Makefile.am b/oshmem/shmem/c/profile/Makefile.am index 4764c3810d3..717d9fbf2ef 100644 --- a/oshmem/shmem/c/profile/Makefile.am +++ b/oshmem/shmem/c/profile/Makefile.am @@ -38,6 +38,7 @@ OSHMEM_API_SOURCES = \ pshmem_align.c \ pshmem_query.c \ pshmem_p.c \ + pshmem_context.c \ pshmem_put.c \ pshmem_g.c \ pshmem_get.c \ diff --git a/oshmem/shmem/c/profile/defines.h b/oshmem/shmem/c/profile/defines.h index ce87e2ee2f5..fa30d783778 100644 --- a/oshmem/shmem/c/profile/defines.h +++ b/oshmem/shmem/c/profile/defines.h @@ -58,6 +58,8 @@ #define shrealloc pshrealloc /* shmem-compat.h */ #define shfree pshfree /* shmem-compat.h */ +#define shmemx_malloc_with_hint pshmemx_malloc_with_hint + /* * Remote pointer operations */ @@ -72,7 +74,6 @@ /* * Elemental put routines */ - #define shmem_ctx_char_p pshmem_ctx_char_p #define shmem_ctx_short_p pshmem_ctx_short_p #define shmem_ctx_int_p pshmem_ctx_int_p @@ -80,7 +81,24 @@ #define shmem_ctx_float_p pshmem_ctx_float_p #define shmem_ctx_double_p pshmem_ctx_double_p #define shmem_ctx_longlong_p pshmem_ctx_longlong_p +#define shmem_ctx_schar_p pshmem_ctx_schar_p +#define shmem_ctx_uchar_p pshmem_ctx_uchar_p +#define shmem_ctx_ushort_p pshmem_ctx_ushort_p +#define shmem_ctx_uint_p pshmem_ctx_uint_p +#define shmem_ctx_ulong_p pshmem_ctx_ulong_p +#define shmem_ctx_ulonglong_p pshmem_ctx_ulonglong_p #define shmem_ctx_longdouble_p pshmem_ctx_longdouble_p +#define shmem_ctx_int8_p pshmem_ctx_int8_p +#define shmem_ctx_int16_p pshmem_ctx_int16_p +#define shmem_ctx_int32_p pshmem_ctx_int32_p +#define shmem_ctx_int64_p pshmem_ctx_int64_p +#define shmem_ctx_uint8_p pshmem_ctx_uint8_p +#define shmem_ctx_uint16_p pshmem_ctx_uint16_p +#define shmem_ctx_uint32_p pshmem_ctx_uint32_p +#define shmem_ctx_uint64_p pshmem_ctx_uint64_p +#define shmem_ctx_size_p pshmem_ctx_size_p +#define shmem_ctx_ptrdiff_p pshmem_ctx_ptrdiff_p + #define shmem_char_p pshmem_char_p #define shmem_short_p pshmem_short_p #define shmem_int_p pshmem_int_p @@ -88,7 +106,24 @@ #define shmem_float_p pshmem_float_p #define shmem_double_p pshmem_double_p #define shmem_longlong_p pshmem_longlong_p +#define shmem_schar_p pshmem_schar_p +#define shmem_uchar_p pshmem_uchar_p +#define shmem_ushort_p pshmem_ushort_p +#define shmem_uint_p pshmem_uint_p +#define shmem_ulong_p pshmem_ulong_p +#define shmem_ulonglong_p pshmem_ulonglong_p #define shmem_longdouble_p pshmem_longdouble_p +#define shmem_int8_p pshmem_int8_p +#define shmem_int16_p pshmem_int16_p +#define shmem_int32_p pshmem_int32_p +#define shmem_int64_p pshmem_int64_p +#define shmem_uint8_p pshmem_uint8_p +#define shmem_uint16_p pshmem_uint16_p +#define shmem_uint32_p pshmem_uint32_p +#define shmem_uint64_p pshmem_uint64_p +#define shmem_size_p pshmem_size_p +#define shmem_ptrdiff_p pshmem_ptrdiff_p + #define shmemx_int16_p pshmemx_int16_p #define shmemx_int32_p pshmemx_int32_p #define shmemx_int64_p pshmemx_int64_p @@ -103,7 +138,24 @@ #define shmem_ctx_float_put pshmem_ctx_float_put #define shmem_ctx_double_put pshmem_ctx_double_put #define shmem_ctx_longlong_put pshmem_ctx_longlong_put +#define shmem_ctx_schar_put pshmem_ctx_schar_put +#define shmem_ctx_uchar_put pshmem_ctx_uchar_put +#define shmem_ctx_ushort_put pshmem_ctx_ushort_put +#define shmem_ctx_uint_put pshmem_ctx_uint_put +#define shmem_ctx_ulong_put pshmem_ctx_ulong_put +#define shmem_ctx_ulonglong_put pshmem_ctx_ulonglong_put #define shmem_ctx_longdouble_put pshmem_ctx_longdouble_put +#define shmem_ctx_int8_put pshmem_ctx_int8_put +#define shmem_ctx_int16_put pshmem_ctx_int16_put +#define shmem_ctx_int32_put pshmem_ctx_int32_put +#define shmem_ctx_int64_put pshmem_ctx_int64_put +#define shmem_ctx_uint8_put pshmem_ctx_uint8_put +#define shmem_ctx_uint16_put pshmem_ctx_uint16_put +#define shmem_ctx_uint32_put pshmem_ctx_uint32_put +#define shmem_ctx_uint64_put pshmem_ctx_uint64_put +#define shmem_ctx_size_put pshmem_ctx_size_put +#define shmem_ctx_ptrdiff_put pshmem_ctx_ptrdiff_put + #define shmem_char_put pshmem_char_put /* shmem-compat.h */ #define shmem_short_put pshmem_short_put #define shmem_int_put pshmem_int_put @@ -111,7 +163,23 @@ #define shmem_float_put pshmem_float_put #define shmem_double_put pshmem_double_put #define shmem_longlong_put pshmem_longlong_put +#define shmem_schar_put pshmem_schar_put +#define shmem_uchar_put pshmem_uchar_put +#define shmem_ushort_put pshmem_ushort_put +#define shmem_uint_put pshmem_uint_put +#define shmem_ulong_put pshmem_ulong_put +#define shmem_ulonglong_put pshmem_ulonglong_put #define shmem_longdouble_put pshmem_longdouble_put +#define shmem_int8_put pshmem_int8_put +#define shmem_int16_put pshmem_int16_put +#define shmem_int32_put pshmem_int32_put +#define shmem_int64_put pshmem_int64_put +#define shmem_uint8_put pshmem_uint8_put +#define shmem_uint16_put pshmem_uint16_put +#define shmem_uint32_put pshmem_uint32_put +#define shmem_uint64_put pshmem_uint64_put +#define shmem_size_put pshmem_size_put +#define shmem_ptrdiff_put pshmem_ptrdiff_put #define shmem_ctx_put8 pshmem_ctx_put8 #define shmem_ctx_put16 pshmem_ctx_put16 @@ -119,6 +187,7 @@ #define shmem_ctx_put64 pshmem_ctx_put64 #define shmem_ctx_put128 pshmem_ctx_put128 #define shmem_ctx_putmem pshmem_ctx_putmem + #define shmem_put8 pshmem_put8 #define shmem_put16 pshmem_put16 #define shmem_put32 pshmem_put32 @@ -129,28 +198,62 @@ /* * Strided put routines */ -#define shmem_ctx_char_iput pshmem_ctx_char_iput -#define shmem_ctx_short_iput pshmem_ctx_short_iput -#define shmem_ctx_int_iput pshmem_ctx_int_iput -#define shmem_ctx_float_iput pshmem_ctx_float_iput -#define shmem_ctx_double_iput pshmem_ctx_double_iput -#define shmem_ctx_longlong_iput pshmem_ctx_longlong_iput -#define shmem_ctx_longdouble_iput pshmem_ctx_longdouble_iput -#define shmem_ctx_long_iput pshmem_ctx_long_iput -#define shmem_char_iput pshmem_char_iput -#define shmem_short_iput pshmem_short_iput -#define shmem_int_iput pshmem_int_iput -#define shmem_float_iput pshmem_float_iput -#define shmem_double_iput pshmem_double_iput -#define shmem_longlong_iput pshmem_longlong_iput -#define shmem_longdouble_iput pshmem_longdouble_iput -#define shmem_long_iput pshmem_long_iput +#define shmem_ctx_char_iput pshmem_ctx_char_iput +#define shmem_ctx_short_iput pshmem_ctx_short_iput +#define shmem_ctx_int_iput pshmem_ctx_int_iput +#define shmem_ctx_long_iput pshmem_ctx_long_iput +#define shmem_ctx_float_iput pshmem_ctx_float_iput +#define shmem_ctx_double_iput pshmem_ctx_double_iput +#define shmem_ctx_longlong_iput pshmem_ctx_longlong_iput +#define shmem_ctx_schar_iput pshmem_ctx_schar_iput +#define shmem_ctx_uchar_iput pshmem_ctx_uchar_iput +#define shmem_ctx_ushort_iput pshmem_ctx_ushort_iput +#define shmem_ctx_uint_iput pshmem_ctx_uint_iput +#define shmem_ctx_ulong_iput pshmem_ctx_ulong_iput +#define shmem_ctx_ulonglong_iput pshmem_ctx_ulonglong_iput +#define shmem_ctx_longdouble_iput pshmem_ctx_longdouble_iput +#define shmem_ctx_int8_iput pshmem_ctx_int8_iput +#define shmem_ctx_int16_iput pshmem_ctx_int16_iput +#define shmem_ctx_int32_iput pshmem_ctx_int32_iput +#define shmem_ctx_int64_iput pshmem_ctx_int64_iput +#define shmem_ctx_uint8_iput pshmem_ctx_uint8_iput +#define shmem_ctx_uint16_iput pshmem_ctx_uint16_iput +#define shmem_ctx_uint32_iput pshmem_ctx_uint32_iput +#define shmem_ctx_uint64_iput pshmem_ctx_uint64_iput +#define shmem_ctx_size_iput pshmem_ctx_size_iput +#define shmem_ctx_ptrdiff_iput pshmem_ctx_ptrdiff_iput + +#define shmem_char_iput pshmem_char_iput +#define shmem_short_iput pshmem_short_iput +#define shmem_int_iput pshmem_int_iput +#define shmem_long_iput pshmem_long_iput +#define shmem_float_iput pshmem_float_iput +#define shmem_double_iput pshmem_double_iput +#define shmem_longlong_iput pshmem_longlong_iput +#define shmem_schar_iput pshmem_schar_iput +#define shmem_uchar_iput pshmem_uchar_iput +#define shmem_ushort_iput pshmem_ushort_iput +#define shmem_uint_iput pshmem_uint_iput +#define shmem_ulong_iput pshmem_ulong_iput +#define shmem_ulonglong_iput pshmem_ulonglong_iput +#define shmem_longdouble_iput pshmem_longdouble_iput +#define shmem_int8_iput pshmem_int8_iput +#define shmem_int16_iput pshmem_int16_iput +#define shmem_int32_iput pshmem_int32_iput +#define shmem_int64_iput pshmem_int64_iput +#define shmem_uint8_iput pshmem_uint8_iput +#define shmem_uint16_iput pshmem_uint16_iput +#define shmem_uint32_iput pshmem_uint32_iput +#define shmem_uint64_iput pshmem_uint64_iput +#define shmem_size_iput pshmem_size_iput +#define shmem_ptrdiff_iput pshmem_ptrdiff_iput #define shmem_ctx_iput8 pshmem_ctx_iput8 #define shmem_ctx_iput16 pshmem_ctx_iput16 #define shmem_ctx_iput32 pshmem_ctx_iput32 #define shmem_ctx_iput64 pshmem_ctx_iput64 #define shmem_ctx_iput128 pshmem_ctx_iput128 + #define shmem_iput8 pshmem_iput8 #define shmem_iput16 pshmem_iput16 #define shmem_iput32 pshmem_iput32 @@ -160,22 +263,55 @@ /* * Non-block data put routines */ -#define shmem_ctx_char_put_nbi pshmem_ctx_char_put_nbi -#define shmem_ctx_short_put_nbi pshmem_ctx_short_put_nbi -#define shmem_ctx_int_put_nbi pshmem_ctx_int_put_nbi -#define shmem_ctx_long_put_nbi pshmem_ctx_long_put_nbi -#define shmem_ctx_float_put_nbi pshmem_ctx_float_put_nbi -#define shmem_ctx_double_put_nbi pshmem_ctx_double_put_nbi -#define shmem_ctx_longlong_put_nbi pshmem_ctx_longlong_put_nbi -#define shmem_ctx_longdouble_put_nbi pshmem_ctx_longdouble_put_nbi -#define shmem_char_put_nbi pshmem_char_put_nbi -#define shmem_short_put_nbi pshmem_short_put_nbi -#define shmem_int_put_nbi pshmem_int_put_nbi -#define shmem_long_put_nbi pshmem_long_put_nbi -#define shmem_float_put_nbi pshmem_float_put_nbi -#define shmem_double_put_nbi pshmem_double_put_nbi -#define shmem_longlong_put_nbi pshmem_longlong_put_nbi -#define shmem_longdouble_put_nbi pshmem_longdouble_put_nbi +#define shmem_ctx_char_put_nbi pshmem_ctx_char_put_nbi +#define shmem_ctx_short_put_nbi pshmem_ctx_short_put_nbi +#define shmem_ctx_int_put_nbi pshmem_ctx_int_put_nbi +#define shmem_ctx_long_put_nbi pshmem_ctx_long_put_nbi +#define shmem_ctx_float_put_nbi pshmem_ctx_float_put_nbi +#define shmem_ctx_double_put_nbi pshmem_ctx_double_put_nbi +#define shmem_ctx_longlong_put_nbi pshmem_ctx_longlong_put_nbi +#define shmem_ctx_schar_put_nbi pshmem_ctx_schar_put_nbi +#define shmem_ctx_uchar_put_nbi pshmem_ctx_uchar_put_nbi +#define shmem_ctx_ushort_put_nbi pshmem_ctx_ushort_put_nbi +#define shmem_ctx_uint_put_nbi pshmem_ctx_uint_put_nbi +#define shmem_ctx_ulong_put_nbi pshmem_ctx_ulong_put_nbi +#define shmem_ctx_ulonglong_put_nbi pshmem_ctx_ulonglong_put_nbi +#define shmem_ctx_longdouble_put_nbi pshmem_ctx_longdouble_put_nbi +#define shmem_ctx_int8_put_nbi pshmem_ctx_int8_put_nbi +#define shmem_ctx_int16_put_nbi pshmem_ctx_int16_put_nbi +#define shmem_ctx_int32_put_nbi pshmem_ctx_int32_put_nbi +#define shmem_ctx_int64_put_nbi pshmem_ctx_int64_put_nbi +#define shmem_ctx_uint8_put_nbi pshmem_ctx_uint8_put_nbi +#define shmem_ctx_uint16_put_nbi pshmem_ctx_uint16_put_nbi +#define shmem_ctx_uint32_put_nbi pshmem_ctx_uint32_put_nbi +#define shmem_ctx_uint64_put_nbi pshmem_ctx_uint64_put_nbi +#define shmem_ctx_size_put_nbi pshmem_ctx_size_put_nbi +#define shmem_ctx_ptrdiff_put_nbi pshmem_ctx_ptrdiff_put_nbi + +#define shmem_char_put_nbi pshmem_char_put_nbi +#define shmem_short_put_nbi pshmem_short_put_nbi +#define shmem_int_put_nbi pshmem_int_put_nbi +#define shmem_long_put_nbi pshmem_long_put_nbi +#define shmem_float_put_nbi pshmem_float_put_nbi +#define shmem_double_put_nbi pshmem_double_put_nbi +#define shmem_longlong_put_nbi pshmem_longlong_put_nbi +#define shmem_schar_put_nbi pshmem_schar_put_nbi +#define shmem_uchar_put_nbi pshmem_uchar_put_nbi +#define shmem_ushort_put_nbi pshmem_ushort_put_nbi +#define shmem_uint_put_nbi pshmem_uint_put_nbi +#define shmem_ulong_put_nbi pshmem_ulong_put_nbi +#define shmem_ulonglong_put_nbi pshmem_ulonglong_put_nbi +#define shmem_longdouble_put_nbi pshmem_longdouble_put_nbi +#define shmem_int8_put_nbi pshmem_int8_put_nbi +#define shmem_int16_put_nbi pshmem_int16_put_nbi +#define shmem_int32_put_nbi pshmem_int32_put_nbi +#define shmem_int64_put_nbi pshmem_int64_put_nbi +#define shmem_uint8_put_nbi pshmem_uint8_put_nbi +#define shmem_uint16_put_nbi pshmem_uint16_put_nbi +#define shmem_uint32_put_nbi pshmem_uint32_put_nbi +#define shmem_uint64_put_nbi pshmem_uint64_put_nbi +#define shmem_size_put_nbi pshmem_size_put_nbi +#define shmem_ptrdiff_put_nbi pshmem_ptrdiff_put_nbi #define shmem_ctx_put8_nbi pshmem_ctx_put8_nbi #define shmem_ctx_put16_nbi pshmem_ctx_put16_nbi @@ -183,6 +319,7 @@ #define shmem_ctx_put64_nbi pshmem_ctx_put64_nbi #define shmem_ctx_put128_nbi pshmem_ctx_put128_nbi #define shmem_ctx_putmem_nbi pshmem_ctx_putmem_nbi + #define shmem_put8_nbi pshmem_put8_nbi #define shmem_put16_nbi pshmem_put16_nbi #define shmem_put32_nbi pshmem_put32_nbi @@ -200,7 +337,24 @@ #define shmem_ctx_float_g pshmem_ctx_float_g #define shmem_ctx_double_g pshmem_ctx_double_g #define shmem_ctx_longlong_g pshmem_ctx_longlong_g +#define shmem_ctx_schar_g pshmem_ctx_schar_g +#define shmem_ctx_uchar_g pshmem_ctx_uchar_g +#define shmem_ctx_ushort_g pshmem_ctx_ushort_g +#define shmem_ctx_uint_g pshmem_ctx_uint_g +#define shmem_ctx_ulong_g pshmem_ctx_ulong_g +#define shmem_ctx_ulonglong_g pshmem_ctx_ulonglong_g #define shmem_ctx_longdouble_g pshmem_ctx_longdouble_g +#define shmem_ctx_int8_g pshmem_ctx_int8_g +#define shmem_ctx_int16_g pshmem_ctx_int16_g +#define shmem_ctx_int32_g pshmem_ctx_int32_g +#define shmem_ctx_int64_g pshmem_ctx_int64_g +#define shmem_ctx_uint8_g pshmem_ctx_uint8_g +#define shmem_ctx_uint16_g pshmem_ctx_uint16_g +#define shmem_ctx_uint32_g pshmem_ctx_uint32_g +#define shmem_ctx_uint64_g pshmem_ctx_uint64_g +#define shmem_ctx_size_g pshmem_ctx_size_g +#define shmem_ctx_ptrdiff_g pshmem_ctx_ptrdiff_g + #define shmem_char_g pshmem_char_g #define shmem_short_g pshmem_short_g #define shmem_int_g pshmem_int_g @@ -208,7 +362,24 @@ #define shmem_float_g pshmem_float_g #define shmem_double_g pshmem_double_g #define shmem_longlong_g pshmem_longlong_g +#define shmem_schar_g pshmem_schar_g +#define shmem_uchar_g pshmem_uchar_g +#define shmem_ushort_g pshmem_ushort_g +#define shmem_uint_g pshmem_uint_g +#define shmem_ulong_g pshmem_ulong_g +#define shmem_ulonglong_g pshmem_ulonglong_g #define shmem_longdouble_g pshmem_longdouble_g +#define shmem_int8_g pshmem_int8_g +#define shmem_int16_g pshmem_int16_g +#define shmem_int32_g pshmem_int32_g +#define shmem_int64_g pshmem_int64_g +#define shmem_uint8_g pshmem_uint8_g +#define shmem_uint16_g pshmem_uint16_g +#define shmem_uint32_g pshmem_uint32_g +#define shmem_uint64_g pshmem_uint64_g +#define shmem_size_g pshmem_size_g +#define shmem_ptrdiff_g pshmem_ptrdiff_g + #define shmemx_int16_g pshmemx_int16_g #define shmemx_int32_g pshmemx_int32_g #define shmemx_int64_g pshmemx_int64_g @@ -223,7 +394,24 @@ #define shmem_ctx_float_get pshmem_ctx_float_get #define shmem_ctx_double_get pshmem_ctx_double_get #define shmem_ctx_longlong_get pshmem_ctx_longlong_get +#define shmem_ctx_schar_get pshmem_ctx_schar_get +#define shmem_ctx_uchar_get pshmem_ctx_uchar_get +#define shmem_ctx_ushort_get pshmem_ctx_ushort_get +#define shmem_ctx_uint_get pshmem_ctx_uint_get +#define shmem_ctx_ulong_get pshmem_ctx_ulong_get +#define shmem_ctx_ulonglong_get pshmem_ctx_ulonglong_get #define shmem_ctx_longdouble_get pshmem_ctx_longdouble_get +#define shmem_ctx_int8_get pshmem_ctx_int8_get +#define shmem_ctx_int16_get pshmem_ctx_int16_get +#define shmem_ctx_int32_get pshmem_ctx_int32_get +#define shmem_ctx_int64_get pshmem_ctx_int64_get +#define shmem_ctx_uint8_get pshmem_ctx_uint8_get +#define shmem_ctx_uint16_get pshmem_ctx_uint16_get +#define shmem_ctx_uint32_get pshmem_ctx_uint32_get +#define shmem_ctx_uint64_get pshmem_ctx_uint64_get +#define shmem_ctx_size_get pshmem_ctx_size_get +#define shmem_ctx_ptrdiff_get pshmem_ctx_ptrdiff_get + #define shmem_char_get pshmem_char_get /* shmem-compat.h */ #define shmem_short_get pshmem_short_get #define shmem_int_get pshmem_int_get @@ -231,7 +419,23 @@ #define shmem_float_get pshmem_float_get #define shmem_double_get pshmem_double_get #define shmem_longlong_get pshmem_longlong_get +#define shmem_schar_get pshmem_schar_get +#define shmem_uchar_get pshmem_uchar_get +#define shmem_ushort_get pshmem_ushort_get +#define shmem_uint_get pshmem_uint_get +#define shmem_ulong_get pshmem_ulong_get +#define shmem_ulonglong_get pshmem_ulonglong_get #define shmem_longdouble_get pshmem_longdouble_get +#define shmem_int8_get pshmem_int8_get +#define shmem_int16_get pshmem_int16_get +#define shmem_int32_get pshmem_int32_get +#define shmem_int64_get pshmem_int64_get +#define shmem_uint8_get pshmem_uint8_get +#define shmem_uint16_get pshmem_uint16_get +#define shmem_uint32_get pshmem_uint32_get +#define shmem_uint64_get pshmem_uint64_get +#define shmem_size_get pshmem_size_get +#define shmem_ptrdiff_get pshmem_ptrdiff_get #define shmem_ctx_get8 pshmem_ctx_get8 #define shmem_ctx_get16 pshmem_ctx_get16 @@ -239,6 +443,7 @@ #define shmem_ctx_get64 pshmem_ctx_get64 #define shmem_ctx_get128 pshmem_ctx_get128 #define shmem_ctx_getmem pshmem_ctx_getmem + #define shmem_get8 pshmem_get8 #define shmem_get16 pshmem_get16 #define shmem_get32 pshmem_get32 @@ -249,28 +454,62 @@ /* * Strided get routines */ -#define shmem_ctx_char_iget pshmem_ctx_char_iget -#define shmem_ctx_short_iget pshmem_ctx_short_iget -#define shmem_ctx_int_iget pshmem_ctx_int_iget -#define shmem_ctx_float_iget pshmem_ctx_float_iget -#define shmem_ctx_double_iget pshmem_ctx_double_iget -#define shmem_ctx_longlong_iget pshmem_ctx_longlong_iget -#define shmem_ctx_longdouble_iget pshmem_ctx_longdouble_iget -#define shmem_ctx_long_iget pshmem_ctx_long_iget -#define shmem_char_iget pshmem_char_iget -#define shmem_short_iget pshmem_short_iget -#define shmem_int_iget pshmem_int_iget -#define shmem_float_iget pshmem_float_iget -#define shmem_double_iget pshmem_double_iget -#define shmem_longlong_iget pshmem_longlong_iget -#define shmem_longdouble_iget pshmem_longdouble_iget -#define shmem_long_iget pshmem_long_iget +#define shmem_ctx_char_iget pshmem_ctx_char_iget +#define shmem_ctx_short_iget pshmem_ctx_short_iget +#define shmem_ctx_int_iget pshmem_ctx_int_iget +#define shmem_ctx_long_iget pshmem_ctx_long_iget +#define shmem_ctx_float_iget pshmem_ctx_float_iget +#define shmem_ctx_double_iget pshmem_ctx_double_iget +#define shmem_ctx_longlong_iget pshmem_ctx_longlong_iget +#define shmem_ctx_schar_iget pshmem_ctx_schar_iget +#define shmem_ctx_uchar_iget pshmem_ctx_uchar_iget +#define shmem_ctx_ushort_iget pshmem_ctx_ushort_iget +#define shmem_ctx_uint_iget pshmem_ctx_uint_iget +#define shmem_ctx_ulong_iget pshmem_ctx_ulong_iget +#define shmem_ctx_ulonglong_iget pshmem_ctx_ulonglong_iget +#define shmem_ctx_longdouble_iget pshmem_ctx_longdouble_iget +#define shmem_ctx_int8_iget pshmem_ctx_int8_iget +#define shmem_ctx_int16_iget pshmem_ctx_int16_iget +#define shmem_ctx_int32_iget pshmem_ctx_int32_iget +#define shmem_ctx_int64_iget pshmem_ctx_int64_iget +#define shmem_ctx_uint8_iget pshmem_ctx_uint8_iget +#define shmem_ctx_uint16_iget pshmem_ctx_uint16_iget +#define shmem_ctx_uint32_iget pshmem_ctx_uint32_iget +#define shmem_ctx_uint64_iget pshmem_ctx_uint64_iget +#define shmem_ctx_size_iget pshmem_ctx_size_iget +#define shmem_ctx_ptrdiff_iget pshmem_ctx_ptrdiff_iget + +#define shmem_char_iget pshmem_char_iget +#define shmem_short_iget pshmem_short_iget +#define shmem_int_iget pshmem_int_iget +#define shmem_long_iget pshmem_long_iget +#define shmem_float_iget pshmem_float_iget +#define shmem_double_iget pshmem_double_iget +#define shmem_longlong_iget pshmem_longlong_iget +#define shmem_schar_iget pshmem_schar_iget +#define shmem_uchar_iget pshmem_uchar_iget +#define shmem_ushort_iget pshmem_ushort_iget +#define shmem_uint_iget pshmem_uint_iget +#define shmem_ulong_iget pshmem_ulong_iget +#define shmem_ulonglong_iget pshmem_ulonglong_iget +#define shmem_longdouble_iget pshmem_longdouble_iget +#define shmem_int8_iget pshmem_int8_iget +#define shmem_int16_iget pshmem_int16_iget +#define shmem_int32_iget pshmem_int32_iget +#define shmem_int64_iget pshmem_int64_iget +#define shmem_uint8_iget pshmem_uint8_iget +#define shmem_uint16_iget pshmem_uint16_iget +#define shmem_uint32_iget pshmem_uint32_iget +#define shmem_uint64_iget pshmem_uint64_iget +#define shmem_size_iget pshmem_size_iget +#define shmem_ptrdiff_iget pshmem_ptrdiff_iget #define shmem_ctx_iget8 pshmem_ctx_iget8 #define shmem_ctx_iget16 pshmem_ctx_iget16 #define shmem_ctx_iget32 pshmem_ctx_iget32 #define shmem_ctx_iget64 pshmem_ctx_iget64 #define shmem_ctx_iget128 pshmem_ctx_iget128 + #define shmem_iget8 pshmem_iget8 #define shmem_iget16 pshmem_iget16 #define shmem_iget32 pshmem_iget32 @@ -280,22 +519,55 @@ /* * Non-block data get routines */ -#define shmem_ctx_char_get_nbi pshmem_ctx_char_get_nbi -#define shmem_ctx_short_get_nbi pshmem_ctx_short_get_nbi -#define shmem_ctx_int_get_nbi pshmem_ctx_int_get_nbi -#define shmem_ctx_long_get_nbi pshmem_ctx_long_get_nbi -#define shmem_ctx_float_get_nbi pshmem_ctx_float_get_nbi -#define shmem_ctx_double_get_nbi pshmem_ctx_double_get_nbi -#define shmem_ctx_longlong_get_nbi pshmem_ctx_longlong_get_nbi -#define shmem_ctx_longdouble_get_nbi pshmem_ctx_longdouble_get_nbi -#define shmem_char_get_nbi pshmem_char_get_nbi -#define shmem_short_get_nbi pshmem_short_get_nbi -#define shmem_int_get_nbi pshmem_int_get_nbi -#define shmem_long_get_nbi pshmem_long_get_nbi -#define shmem_float_get_nbi pshmem_float_get_nbi -#define shmem_double_get_nbi pshmem_double_get_nbi -#define shmem_longlong_get_nbi pshmem_longlong_get_nbi -#define shmem_longdouble_get_nbi pshmem_longdouble_get_nbi +#define shmem_ctx_char_get_nbi pshmem_ctx_char_get_nbi +#define shmem_ctx_short_get_nbi pshmem_ctx_short_get_nbi +#define shmem_ctx_int_get_nbi pshmem_ctx_int_get_nbi +#define shmem_ctx_long_get_nbi pshmem_ctx_long_get_nbi +#define shmem_ctx_float_get_nbi pshmem_ctx_float_get_nbi +#define shmem_ctx_double_get_nbi pshmem_ctx_double_get_nbi +#define shmem_ctx_longlong_get_nbi pshmem_ctx_longlong_get_nbi +#define shmem_ctx_schar_get_nbi pshmem_ctx_schar_get_nbi +#define shmem_ctx_uchar_get_nbi pshmem_ctx_uchar_get_nbi +#define shmem_ctx_ushort_get_nbi pshmem_ctx_ushort_get_nbi +#define shmem_ctx_uint_get_nbi pshmem_ctx_uint_get_nbi +#define shmem_ctx_ulong_get_nbi pshmem_ctx_ulong_get_nbi +#define shmem_ctx_ulonglong_get_nbi pshmem_ctx_ulonglong_get_nbi +#define shmem_ctx_longdouble_get_nbi pshmem_ctx_longdouble_get_nbi +#define shmem_ctx_int8_get_nbi pshmem_ctx_int8_get_nbi +#define shmem_ctx_int16_get_nbi pshmem_ctx_int16_get_nbi +#define shmem_ctx_int32_get_nbi pshmem_ctx_int32_get_nbi +#define shmem_ctx_int64_get_nbi pshmem_ctx_int64_get_nbi +#define shmem_ctx_uint8_get_nbi pshmem_ctx_uint8_get_nbi +#define shmem_ctx_uint16_get_nbi pshmem_ctx_uint16_get_nbi +#define shmem_ctx_uint32_get_nbi pshmem_ctx_uint32_get_nbi +#define shmem_ctx_uint64_get_nbi pshmem_ctx_uint64_get_nbi +#define shmem_ctx_size_get_nbi pshmem_ctx_size_get_nbi +#define shmem_ctx_ptrdiff_get_nbi pshmem_ctx_ptrdiff_get_nbi + +#define shmem_char_get_nbi pshmem_char_get_nbi +#define shmem_short_get_nbi pshmem_short_get_nbi +#define shmem_int_get_nbi pshmem_int_get_nbi +#define shmem_long_get_nbi pshmem_long_get_nbi +#define shmem_float_get_nbi pshmem_float_get_nbi +#define shmem_double_get_nbi pshmem_double_get_nbi +#define shmem_longlong_get_nbi pshmem_longlong_get_nbi +#define shmem_schar_get_nbi pshmem_schar_get_nbi +#define shmem_uchar_get_nbi pshmem_uchar_get_nbi +#define shmem_ushort_get_nbi pshmem_ushort_get_nbi +#define shmem_uint_get_nbi pshmem_uint_get_nbi +#define shmem_ulong_get_nbi pshmem_ulong_get_nbi +#define shmem_ulonglong_get_nbi pshmem_ulonglong_get_nbi +#define shmem_longdouble_get_nbi pshmem_longdouble_get_nbi +#define shmem_int8_get_nbi pshmem_int8_get_nbi +#define shmem_int16_get_nbi pshmem_int16_get_nbi +#define shmem_int32_get_nbi pshmem_int32_get_nbi +#define shmem_int64_get_nbi pshmem_int64_get_nbi +#define shmem_uint8_get_nbi pshmem_uint8_get_nbi +#define shmem_uint16_get_nbi pshmem_uint16_get_nbi +#define shmem_uint32_get_nbi pshmem_uint32_get_nbi +#define shmem_uint64_get_nbi pshmem_uint64_get_nbi +#define shmem_size_get_nbi pshmem_size_get_nbi +#define shmem_ptrdiff_get_nbi pshmem_ptrdiff_get_nbi #define shmem_ctx_get8_nbi pshmem_ctx_get8_nbi #define shmem_ctx_get16_nbi pshmem_ctx_get16_nbi @@ -303,6 +575,7 @@ #define shmem_ctx_get64_nbi pshmem_ctx_get64_nbi #define shmem_ctx_get128_nbi pshmem_ctx_get128_nbi #define shmem_ctx_getmem_nbi pshmem_ctx_getmem_nbi + #define shmem_get8_nbi pshmem_get8_nbi #define shmem_get16_nbi pshmem_get16_nbi #define shmem_get32_nbi pshmem_get32_nbi @@ -319,16 +592,25 @@ #define shmem_ctx_int_atomic_swap pshmem_ctx_int_atomic_swap #define shmem_ctx_long_atomic_swap pshmem_ctx_long_atomic_swap #define shmem_ctx_longlong_atomic_swap pshmem_ctx_longlong_atomic_swap +#define shmem_ctx_uint_atomic_swap pshmem_ctx_uint_atomic_swap +#define shmem_ctx_ulong_atomic_swap pshmem_ctx_ulong_atomic_swap +#define shmem_ctx_ulonglong_atomic_swap pshmem_ctx_ulonglong_atomic_swap + #define shmem_double_atomic_swap pshmem_double_atomic_swap #define shmem_float_atomic_swap pshmem_float_atomic_swap #define shmem_int_atomic_swap pshmem_int_atomic_swap #define shmem_long_atomic_swap pshmem_long_atomic_swap #define shmem_longlong_atomic_swap pshmem_longlong_atomic_swap +#define shmem_uint_atomic_swap pshmem_uint_atomic_swap +#define shmem_ulong_atomic_swap pshmem_ulong_atomic_swap +#define shmem_ulonglong_atomic_swap pshmem_ulonglong_atomic_swap + #define shmem_double_swap pshmem_double_swap #define shmem_float_swap pshmem_float_swap #define shmem_int_swap pshmem_int_swap #define shmem_long_swap pshmem_long_swap #define shmem_longlong_swap pshmem_longlong_swap + #define shmemx_int32_swap pshmemx_int32_swap #define shmemx_int64_swap pshmemx_int64_swap @@ -338,16 +620,25 @@ #define shmem_ctx_int_atomic_set pshmem_ctx_int_atomic_set #define shmem_ctx_long_atomic_set pshmem_ctx_long_atomic_set #define shmem_ctx_longlong_atomic_set pshmem_ctx_longlong_atomic_set +#define shmem_ctx_uint_atomic_set pshmem_ctx_uint_atomic_set +#define shmem_ctx_ulong_atomic_set pshmem_ctx_ulong_atomic_set +#define shmem_ctx_ulonglong_atomic_set pshmem_ctx_ulonglong_atomic_set + #define shmem_double_atomic_set pshmem_double_atomic_set #define shmem_float_atomic_set pshmem_float_atomic_set #define shmem_int_atomic_set pshmem_int_atomic_set #define shmem_long_atomic_set pshmem_long_atomic_set #define shmem_longlong_atomic_set pshmem_longlong_atomic_set +#define shmem_uint_atomic_set pshmem_uint_atomic_set +#define shmem_ulong_atomic_set pshmem_ulong_atomic_set +#define shmem_ulonglong_atomic_set pshmem_ulonglong_atomic_set + #define shmem_double_set pshmem_double_set #define shmem_float_set pshmem_float_set #define shmem_int_set pshmem_int_set #define shmem_long_set pshmem_long_set #define shmem_longlong_set pshmem_longlong_set + #define shmemx_int32_set pshmemx_int32_set #define shmemx_int64_set pshmemx_int64_set @@ -355,59 +646,125 @@ #define shmem_ctx_int_atomic_compare_swap pshmem_ctx_int_atomic_compare_swap #define shmem_ctx_long_atomic_compare_swap pshmem_ctx_long_atomic_compare_swap #define shmem_ctx_longlong_atomic_compare_swap pshmem_ctx_longlong_atomic_compare_swap +#define shmem_ctx_uint_atomic_compare_swap pshmem_ctx_uint_atomic_compare_swap +#define shmem_ctx_ulong_atomic_compare_swap pshmem_ctx_ulong_atomic_compare_swap +#define shmem_ctx_ulonglong_atomic_compare_swap pshmem_ctx_ulonglong_atomic_compare_swap + #define shmem_int_atomic_compare_swap pshmem_int_atomic_compare_swap #define shmem_long_atomic_compare_swap pshmem_long_atomic_compare_swap #define shmem_longlong_atomic_compare_swap pshmem_longlong_atomic_compare_swap +#define shmem_uint_atomic_compare_swap pshmem_uint_atomic_compare_swap +#define shmem_ulong_atomic_compare_swap pshmem_ulong_atomic_compare_swap +#define shmem_ulonglong_atomic_compare_swap pshmem_ulonglong_atomic_compare_swap + #define shmem_int_cswap pshmem_int_cswap #define shmem_long_cswap pshmem_long_cswap #define shmem_longlong_cswap pshmem_longlong_cswap + #define shmemx_int32_cswap pshmemx_int32_cswap #define shmemx_int64_cswap pshmemx_int64_cswap /* Atomic Fetch&Add */ -#define shmem_ctx_int_atomic_fetch_add pshmem_ctx_int_atomic_fetch_add -#define shmem_ctx_long_atomic_fetch_add pshmem_ctx_long_atomic_fetch_add -#define shmem_ctx_longlong_atomic_fetch_add pshmem_ctx_longlong_atomic_fetch_add -#define shmem_int_atomic_fetch_add pshmem_int_atomic_fetch_add -#define shmem_long_atomic_fetch_add pshmem_long_atomic_fetch_add -#define shmem_longlong_atomic_fetch_add pshmem_longlong_atomic_fetch_add -#define shmem_int_fadd pshmem_int_fadd -#define shmem_long_fadd pshmem_long_fadd -#define shmem_longlong_fadd pshmem_longlong_fadd -#define shmemx_int32_fadd pshmemx_int32_fadd -#define shmemx_int64_fadd pshmemx_int64_fadd +#define shmem_ctx_int_atomic_fetch_add pshmem_ctx_int_atomic_fetch_add +#define shmem_ctx_long_atomic_fetch_add pshmem_ctx_long_atomic_fetch_add +#define shmem_ctx_longlong_atomic_fetch_add pshmem_ctx_longlong_atomic_fetch_add +#define shmem_ctx_uint_atomic_fetch_add pshmem_ctx_uint_atomic_fetch_add +#define shmem_ctx_ulong_atomic_fetch_add pshmem_ctx_ulong_atomic_fetch_add +#define shmem_ctx_ulonglong_atomic_fetch_add pshmem_ctx_ulonglong_atomic_fetch_add + +#define shmem_int_atomic_fetch_add pshmem_int_atomic_fetch_add +#define shmem_long_atomic_fetch_add pshmem_long_atomic_fetch_add +#define shmem_longlong_atomic_fetch_add pshmem_longlong_atomic_fetch_add +#define shmem_uint_atomic_fetch_add pshmem_uint_atomic_fetch_add +#define shmem_ulong_atomic_fetch_add pshmem_ulong_atomic_fetch_add +#define shmem_ulonglong_atomic_fetch_add pshmem_ulonglong_atomic_fetch_add + +#define shmem_int_fadd pshmem_int_fadd +#define shmem_long_fadd pshmem_long_fadd +#define shmem_longlong_fadd pshmem_longlong_fadd + +#define shmemx_int32_fadd pshmemx_int32_fadd +#define shmemx_int64_fadd pshmemx_int64_fadd /* Atomic Fetch&And */ +#define shmem_int_atomic_fetch_and pshmem_int_atomic_fetch_and +#define shmem_long_atomic_fetch_and pshmem_long_atomic_fetch_and +#define shmem_longlong_atomic_fetch_and pshmem_longlong_atomic_fetch_and #define shmem_uint_atomic_fetch_and pshmem_uint_atomic_fetch_and #define shmem_ulong_atomic_fetch_and pshmem_ulong_atomic_fetch_and #define shmem_ulonglong_atomic_fetch_and pshmem_ulonglong_atomic_fetch_and +#define shmem_int32_atomic_fetch_and pshmem_int32_atomic_fetch_and +#define shmem_int64_atomic_fetch_and pshmem_int64_atomic_fetch_and +#define shmem_uint32_atomic_fetch_and pshmem_uint32_atomic_fetch_and +#define shmem_uint64_atomic_fetch_and pshmem_uint64_atomic_fetch_and + +#define shmem_ctx_int_atomic_fetch_and pshmem_ctx_int_atomic_fetch_and +#define shmem_ctx_long_atomic_fetch_and pshmem_ctx_long_atomic_fetch_and +#define shmem_ctx_longlong_atomic_fetch_and pshmem_ctx_longlong_atomic_fetch_and #define shmem_ctx_uint_atomic_fetch_and pshmem_ctx_uint_atomic_fetch_and #define shmem_ctx_ulong_atomic_fetch_and pshmem_ctx_ulong_atomic_fetch_and #define shmem_ctx_ulonglong_atomic_fetch_and pshmem_ctx_ulonglong_atomic_fetch_and +#define shmem_ctx_int32_atomic_fetch_and pshmem_ctx_int32_atomic_fetch_and +#define shmem_ctx_int64_atomic_fetch_and pshmem_ctx_int64_atomic_fetch_and +#define shmem_ctx_uint32_atomic_fetch_and pshmem_ctx_uint32_atomic_fetch_and +#define shmem_ctx_uint64_atomic_fetch_and pshmem_ctx_uint64_atomic_fetch_and + #define shmemx_int32_atomic_fetch_and pshmemx_int32_atomic_fetch_and #define shmemx_int64_atomic_fetch_and pshmemx_int64_atomic_fetch_and #define shmemx_uint32_atomic_fetch_and pshmemx_uint32_atomic_fetch_and #define shmemx_uint64_atomic_fetch_and pshmemx_uint64_atomic_fetch_and /* Atomic Fetch&Or */ +#define shmem_int_atomic_fetch_or pshmem_int_atomic_fetch_or +#define shmem_long_atomic_fetch_or pshmem_long_atomic_fetch_or +#define shmem_longlong_atomic_fetch_or pshmem_longlong_atomic_fetch_or #define shmem_uint_atomic_fetch_or pshmem_uint_atomic_fetch_or #define shmem_ulong_atomic_fetch_or pshmem_ulong_atomic_fetch_or #define shmem_ulonglong_atomic_fetch_or pshmem_ulonglong_atomic_fetch_or +#define shmem_int32_atomic_fetch_or pshmem_int32_atomic_fetch_or +#define shmem_int64_atomic_fetch_or pshmem_int64_atomic_fetch_or +#define shmem_uint32_atomic_fetch_or pshmem_uint32_atomic_fetch_or +#define shmem_uint64_atomic_fetch_or pshmem_uint64_atomic_fetch_or + +#define shmem_ctx_int_atomic_fetch_or pshmem_ctx_int_atomic_fetch_or +#define shmem_ctx_long_atomic_fetch_or pshmem_ctx_long_atomic_fetch_or +#define shmem_ctx_longlong_atomic_fetch_or pshmem_ctx_longlong_atomic_fetch_or #define shmem_ctx_uint_atomic_fetch_or pshmem_ctx_uint_atomic_fetch_or #define shmem_ctx_ulong_atomic_fetch_or pshmem_ctx_ulong_atomic_fetch_or #define shmem_ctx_ulonglong_atomic_fetch_or pshmem_ctx_ulonglong_atomic_fetch_or +#define shmem_ctx_int32_atomic_fetch_or pshmem_ctx_int32_atomic_fetch_or +#define shmem_ctx_int64_atomic_fetch_or pshmem_ctx_int64_atomic_fetch_or +#define shmem_ctx_uint32_atomic_fetch_or pshmem_ctx_uint32_atomic_fetch_or +#define shmem_ctx_uint64_atomic_fetch_or pshmem_ctx_uint64_atomic_fetch_or + #define shmemx_int32_atomic_fetch_or pshmemx_int32_atomic_fetch_or #define shmemx_int64_atomic_fetch_or pshmemx_int64_atomic_fetch_or #define shmemx_uint32_atomic_fetch_or pshmemx_uint32_atomic_fetch_or #define shmemx_uint64_atomic_fetch_or pshmemx_uint64_atomic_fetch_or /* Atomic Fetch&Xor */ +#define shmem_int_atomic_fetch_xor pshmem_int_atomic_fetch_xor +#define shmem_long_atomic_fetch_xor pshmem_long_atomic_fetch_xor +#define shmem_longlong_atomic_fetch_xor pshmem_longlong_atomic_fetch_xor #define shmem_uint_atomic_fetch_xor pshmem_uint_atomic_fetch_xor #define shmem_ulong_atomic_fetch_xor pshmem_ulong_atomic_fetch_xor #define shmem_ulonglong_atomic_fetch_xor pshmem_ulonglong_atomic_fetch_xor +#define shmem_int32_atomic_fetch_xor pshmem_int32_atomic_fetch_xor +#define shmem_int64_atomic_fetch_xor pshmem_int64_atomic_fetch_xor +#define shmem_uint32_atomic_fetch_xor pshmem_uint32_atomic_fetch_xor +#define shmem_uint64_atomic_fetch_xor pshmem_uint64_atomic_fetch_xor + +#define shmem_ctx_int_atomic_fetch_xor pshmem_ctx_int_atomic_fetch_xor +#define shmem_ctx_long_atomic_fetch_xor pshmem_ctx_long_atomic_fetch_xor +#define shmem_ctx_longlong_atomic_fetch_xor pshmem_ctx_longlong_atomic_fetch_xor #define shmem_ctx_uint_atomic_fetch_xor pshmem_ctx_uint_atomic_fetch_xor #define shmem_ctx_ulong_atomic_fetch_xor pshmem_ctx_ulong_atomic_fetch_xor #define shmem_ctx_ulonglong_atomic_fetch_xor pshmem_ctx_ulonglong_atomic_fetch_xor +#define shmem_ctx_int32_atomic_fetch_xor pshmem_ctx_int32_atomic_fetch_xor +#define shmem_ctx_int64_atomic_fetch_xor pshmem_ctx_int64_atomic_fetch_xor +#define shmem_ctx_uint32_atomic_fetch_xor pshmem_ctx_uint32_atomic_fetch_xor +#define shmem_ctx_uint64_atomic_fetch_xor pshmem_ctx_uint64_atomic_fetch_xor + #define shmemx_int32_atomic_fetch_xor pshmemx_int32_atomic_fetch_xor #define shmemx_int64_atomic_fetch_xor pshmemx_int64_atomic_fetch_xor #define shmemx_uint32_atomic_fetch_xor pshmemx_uint32_atomic_fetch_xor @@ -419,16 +776,25 @@ #define shmem_ctx_int_atomic_fetch pshmem_ctx_int_atomic_fetch #define shmem_ctx_long_atomic_fetch pshmem_ctx_long_atomic_fetch #define shmem_ctx_longlong_atomic_fetch pshmem_ctx_longlong_atomic_fetch +#define shmem_ctx_uint_atomic_fetch pshmem_ctx_uint_atomic_fetch +#define shmem_ctx_ulong_atomic_fetch pshmem_ctx_ulong_atomic_fetch +#define shmem_ctx_ulonglong_atomic_fetch pshmem_ctx_ulonglong_atomic_fetch + #define shmem_double_atomic_fetch pshmem_double_atomic_fetch #define shmem_float_atomic_fetch pshmem_float_atomic_fetch #define shmem_int_atomic_fetch pshmem_int_atomic_fetch #define shmem_long_atomic_fetch pshmem_long_atomic_fetch #define shmem_longlong_atomic_fetch pshmem_longlong_atomic_fetch +#define shmem_uint_atomic_fetch pshmem_uint_atomic_fetch +#define shmem_ulong_atomic_fetch pshmem_ulong_atomic_fetch +#define shmem_ulonglong_atomic_fetch pshmem_ulonglong_atomic_fetch + #define shmem_double_fetch pshmem_double_fetch #define shmem_float_fetch pshmem_float_fetch #define shmem_int_fetch pshmem_int_fetch #define shmem_long_fetch pshmem_long_fetch #define shmem_longlong_fetch pshmem_longlong_fetch + #define shmemx_int32_fetch pshmemx_int32_fetch #define shmemx_int64_fetch pshmemx_int64_fetch @@ -436,12 +802,21 @@ #define shmem_ctx_int_atomic_fetch_inc pshmem_ctx_int_atomic_fetch_inc #define shmem_ctx_long_atomic_fetch_inc pshmem_ctx_long_atomic_fetch_inc #define shmem_ctx_longlong_atomic_fetch_inc pshmem_ctx_longlong_atomic_fetch_inc +#define shmem_ctx_uint_atomic_fetch_inc pshmem_ctx_uint_atomic_fetch_inc +#define shmem_ctx_ulong_atomic_fetch_inc pshmem_ctx_ulong_atomic_fetch_inc +#define shmem_ctx_ulonglong_atomic_fetch_inc pshmem_ctx_ulonglong_atomic_fetch_inc + +#define shmem_uint_atomic_fetch_inc pshmem_uint_atomic_fetch_inc +#define shmem_ulong_atomic_fetch_inc pshmem_ulong_atomic_fetch_inc +#define shmem_ulonglong_atomic_fetch_inc pshmem_ulonglong_atomic_fetch_inc #define shmem_int_atomic_fetch_inc pshmem_int_atomic_fetch_inc #define shmem_long_atomic_fetch_inc pshmem_long_atomic_fetch_inc #define shmem_longlong_atomic_fetch_inc pshmem_longlong_atomic_fetch_inc + #define shmem_int_finc pshmem_int_finc #define shmem_long_finc pshmem_long_finc #define shmem_longlong_finc pshmem_longlong_finc + #define shmemx_int32_finc pshmemx_int32_finc #define shmemx_int64_finc pshmemx_int64_finc @@ -449,48 +824,108 @@ #define shmem_ctx_int_atomic_add pshmem_ctx_int_atomic_add #define shmem_ctx_long_atomic_add pshmem_ctx_long_atomic_add #define shmem_ctx_longlong_atomic_add pshmem_ctx_longlong_atomic_add +#define shmem_ctx_uint_atomic_add pshmem_ctx_uint_atomic_add +#define shmem_ctx_ulong_atomic_add pshmem_ctx_ulong_atomic_add +#define shmem_ctx_ulonglong_atomic_add pshmem_ctx_ulonglong_atomic_add + #define shmem_int_atomic_add pshmem_int_atomic_add #define shmem_long_atomic_add pshmem_long_atomic_add #define shmem_longlong_atomic_add pshmem_longlong_atomic_add +#define shmem_uint_atomic_add pshmem_uint_atomic_add +#define shmem_ulong_atomic_add pshmem_ulong_atomic_add +#define shmem_ulonglong_atomic_add pshmem_ulonglong_atomic_add + #define shmem_int_add pshmem_int_add #define shmem_long_add pshmem_long_add #define shmem_longlong_add pshmem_longlong_add + #define shmemx_int32_add pshmemx_int32_add #define shmemx_int64_add pshmemx_int64_add /* Atomic And */ +#define shmem_int_atomic_and pshmem_int_atomic_and +#define shmem_long_atomic_and pshmem_long_atomic_and +#define shmem_longlong_atomic_and pshmem_longlong_atomic_and #define shmem_uint_atomic_and pshmem_uint_atomic_and #define shmem_ulong_atomic_and pshmem_ulong_atomic_and #define shmem_ulonglong_atomic_and pshmem_ulonglong_atomic_and +#define shmem_int32_atomic_and pshmem_int32_atomic_and +#define shmem_int64_atomic_and pshmem_int64_atomic_and +#define shmem_uint32_atomic_and pshmem_uint32_atomic_and +#define shmem_uint64_atomic_and pshmem_uint64_atomic_and + +#define shmem_ctx_int_atomic_and pshmem_ctx_int_atomic_and +#define shmem_ctx_long_atomic_and pshmem_ctx_long_atomic_and +#define shmem_ctx_longlong_atomic_and pshmem_ctx_longlong_atomic_and #define shmem_ctx_uint_atomic_and pshmem_ctx_uint_atomic_and #define shmem_ctx_ulong_atomic_and pshmem_ctx_ulong_atomic_and #define shmem_ctx_ulonglong_atomic_and pshmem_ctx_ulonglong_atomic_and +#define shmem_ctx_int32_atomic_and pshmem_ctx_int32_atomic_and +#define shmem_ctx_int64_atomic_and pshmem_ctx_int64_atomic_and +#define shmem_ctx_uint32_atomic_and pshmem_ctx_uint32_atomic_and +#define shmem_ctx_uint64_atomic_and pshmem_ctx_uint64_atomic_and + #define shmemx_int32_atomic_and pshmemx_int32_atomic_and #define shmemx_int64_atomic_and pshmemx_int64_atomic_and + #define shmemx_uint32_atomic_and pshmemx_uint32_atomic_and #define shmemx_uint64_atomic_and pshmemx_uint64_atomic_and /* Atomic Or */ +#define shmem_int_atomic_or pshmem_int_atomic_or +#define shmem_long_atomic_or pshmem_long_atomic_or +#define shmem_longlong_atomic_or pshmem_longlong_atomic_or #define shmem_uint_atomic_or pshmem_uint_atomic_or #define shmem_ulong_atomic_or pshmem_ulong_atomic_or #define shmem_ulonglong_atomic_or pshmem_ulonglong_atomic_or +#define shmem_int32_atomic_or pshmem_int32_atomic_or +#define shmem_int64_atomic_or pshmem_int64_atomic_or +#define shmem_uint32_atomic_or pshmem_uint32_atomic_or +#define shmem_uint64_atomic_or pshmem_uint64_atomic_or + +#define shmem_ctx_int_atomic_or pshmem_ctx_int_atomic_or +#define shmem_ctx_long_atomic_or pshmem_ctx_long_atomic_or +#define shmem_ctx_longlong_atomic_or pshmem_ctx_longlong_atomic_or #define shmem_ctx_uint_atomic_or pshmem_ctx_uint_atomic_or #define shmem_ctx_ulong_atomic_or pshmem_ctx_ulong_atomic_or #define shmem_ctx_ulonglong_atomic_or pshmem_ctx_ulonglong_atomic_or +#define shmem_ctx_int32_atomic_or pshmem_ctx_int32_atomic_or +#define shmem_ctx_int64_atomic_or pshmem_ctx_int64_atomic_or +#define shmem_ctx_uint32_atomic_or pshmem_ctx_uint32_atomic_or +#define shmem_ctx_uint64_atomic_or pshmem_ctx_uint64_atomic_or + #define shmemx_int32_atomic_or pshmemx_int32_atomic_or #define shmemx_int64_atomic_or pshmemx_int64_atomic_or + #define shmemx_uint32_atomic_or pshmemx_uint32_atomic_or #define shmemx_uint64_atomic_or pshmemx_uint64_atomic_or /* Atomic Xor */ +#define shmem_int_atomic_xor pshmem_int_atomic_xor +#define shmem_long_atomic_xor pshmem_long_atomic_xor +#define shmem_longlong_atomic_xor pshmem_longlong_atomic_xor #define shmem_uint_atomic_xor pshmem_uint_atomic_xor #define shmem_ulong_atomic_xor pshmem_ulong_atomic_xor #define shmem_ulonglong_atomic_xor pshmem_ulonglong_atomic_xor +#define shmem_int32_atomic_xor pshmem_int32_atomic_xor +#define shmem_int64_atomic_xor pshmem_int64_atomic_xor +#define shmem_uint32_atomic_xor pshmem_uint32_atomic_xor +#define shmem_uint64_atomic_xor pshmem_uint64_atomic_xor + +#define shmem_ctx_int_atomic_xor pshmem_ctx_int_atomic_xor +#define shmem_ctx_long_atomic_xor pshmem_ctx_long_atomic_xor +#define shmem_ctx_longlong_atomic_xor pshmem_ctx_longlong_atomic_xor #define shmem_ctx_uint_atomic_xor pshmem_ctx_uint_atomic_xor #define shmem_ctx_ulong_atomic_xor pshmem_ctx_ulong_atomic_xor #define shmem_ctx_ulonglong_atomic_xor pshmem_ctx_ulonglong_atomic_xor +#define shmem_ctx_int32_atomic_xor pshmem_ctx_int32_atomic_xor +#define shmem_ctx_int64_atomic_xor pshmem_ctx_int64_atomic_xor +#define shmem_ctx_uint32_atomic_xor pshmem_ctx_uint32_atomic_xor +#define shmem_ctx_uint64_atomic_xor pshmem_ctx_uint64_atomic_xor + #define shmemx_int32_atomic_xor pshmemx_int32_atomic_xor #define shmemx_int64_atomic_xor pshmemx_int64_atomic_xor + #define shmemx_uint32_atomic_xor pshmemx_uint32_atomic_xor #define shmemx_uint64_atomic_xor pshmemx_uint64_atomic_xor @@ -498,12 +933,21 @@ #define shmem_ctx_int_atomic_inc pshmem_ctx_int_atomic_inc #define shmem_ctx_long_atomic_inc pshmem_ctx_long_atomic_inc #define shmem_ctx_longlong_atomic_inc pshmem_ctx_longlong_atomic_inc +#define shmem_ctx_uint_atomic_inc pshmem_ctx_uint_atomic_inc +#define shmem_ctx_ulong_atomic_inc pshmem_ctx_ulong_atomic_inc +#define shmem_ctx_ulonglong_atomic_inc pshmem_ctx_ulonglong_atomic_inc + #define shmem_int_atomic_inc pshmem_int_atomic_inc #define shmem_long_atomic_inc pshmem_long_atomic_inc #define shmem_longlong_atomic_inc pshmem_longlong_atomic_inc +#define shmem_uint_atomic_inc pshmem_uint_atomic_inc +#define shmem_ulong_atomic_inc pshmem_ulong_atomic_inc +#define shmem_ulonglong_atomic_inc pshmem_ulonglong_atomic_inc + #define shmem_int_inc pshmem_int_inc #define shmem_long_inc pshmem_long_inc #define shmem_longlong_inc pshmem_longlong_inc + #define shmemx_int32_inc pshmemx_int32_inc #define shmemx_int64_inc pshmemx_int64_inc @@ -529,7 +973,17 @@ #define shmem_int_wait_until pshmem_int_wait_until #define shmem_long_wait_until pshmem_long_wait_until #define shmem_longlong_wait_until pshmem_longlong_wait_until -#define shmem_wait_until pshmem_wait_until +#define shmem_ushort_wait_until pshmem_ushort_wait_until +#define shmem_uint_wait_until pshmem_uint_wait_until +#define shmem_ulong_wait_until pshmem_ulong_wait_until +#define shmem_ulonglong_wait_until pshmem_ulonglong_wait_until +#define shmem_int32_wait_until pshmem_int32_wait_until +#define shmem_int64_wait_until pshmem_int64_wait_until +#define shmem_uint32_wait_until pshmem_uint32_wait_until +#define shmem_uint64_wait_until pshmem_uint64_wait_until +#define shmem_size_wait_until pshmem_size_wait_until +#define shmem_ptrdiff_wait_until pshmem_ptrdiff_wait_until + #define shmemx_int32_wait_until pshmemx_int32_wait_until #define shmemx_int64_wait_until pshmemx_int64_wait_until @@ -537,6 +991,16 @@ #define shmem_int_test pshmem_int_test #define shmem_long_test pshmem_long_test #define shmem_longlong_test pshmem_longlong_test +#define shmem_ushort_test pshmem_ushort_test +#define shmem_uint_test pshmem_uint_test +#define shmem_ulong_test pshmem_ulong_test +#define shmem_ulonglong_test pshmem_ulonglong_test +#define shmem_int32_test pshmem_int32_test +#define shmem_int64_test pshmem_int64_test +#define shmem_uint32_test pshmem_uint32_test +#define shmem_uint64_test pshmem_uint64_test +#define shmem_size_test pshmem_size_test +#define shmem_ptrdiff_test pshmem_ptrdiff_test /* * Barrier sync routines diff --git a/oshmem/shmem/c/shmem_add.c b/oshmem/shmem/c/shmem_add.c index 9ca5c62c770..6435496892a 100644 --- a/oshmem/shmem/c/shmem_add.c +++ b/oshmem/shmem/c/shmem_add.c @@ -63,23 +63,38 @@ #pragma weak shmem_ctx_int_atomic_add = pshmem_ctx_int_atomic_add #pragma weak shmem_ctx_long_atomic_add = pshmem_ctx_long_atomic_add #pragma weak shmem_ctx_longlong_atomic_add = pshmem_ctx_longlong_atomic_add +#pragma weak shmem_ctx_uint_atomic_add = pshmem_ctx_uint_atomic_add +#pragma weak shmem_ctx_ulong_atomic_add = pshmem_ctx_ulong_atomic_add +#pragma weak shmem_ctx_ulonglong_atomic_add = pshmem_ctx_ulonglong_atomic_add + #pragma weak shmem_int_atomic_add = pshmem_int_atomic_add #pragma weak shmem_long_atomic_add = pshmem_long_atomic_add #pragma weak shmem_longlong_atomic_add = pshmem_longlong_atomic_add +#pragma weak shmem_uint_atomic_add = pshmem_uint_atomic_add +#pragma weak shmem_ulong_atomic_add = pshmem_ulong_atomic_add +#pragma weak shmem_ulonglong_atomic_add = pshmem_ulonglong_atomic_add + #pragma weak shmem_int_add = pshmem_int_add #pragma weak shmem_long_add = pshmem_long_add #pragma weak shmem_longlong_add = pshmem_longlong_add + #pragma weak shmemx_int32_add = pshmemx_int32_add #pragma weak shmemx_int64_add = pshmemx_int64_add #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_ATOMIC_ADD(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_ulonglong, unsigned long long, shmem) SHMEM_CTX_TYPE_ATOMIC_ADD(_int, int, shmem) SHMEM_CTX_TYPE_ATOMIC_ADD(_long, long, shmem) SHMEM_CTX_TYPE_ATOMIC_ADD(_longlong, long long, shmem) SHMEM_TYPE_ATOMIC_ADD(_int, int, shmem) SHMEM_TYPE_ATOMIC_ADD(_long, long, shmem) SHMEM_TYPE_ATOMIC_ADD(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_ADD(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_ADD(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_ADD(_ulonglong, unsigned long long, shmem) /* deprecated APIs */ #define SHMEM_TYPE_ADD(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_addr_accessible.c b/oshmem/shmem/c/shmem_addr_accessible.c index 8d44ff41818..724318a894f 100644 --- a/oshmem/shmem/c/shmem_addr_accessible.c +++ b/oshmem/shmem/c/shmem_addr_accessible.c @@ -31,7 +31,8 @@ int shmem_addr_accessible(const void *addr, int pe) RUNTIME_CHECK_INIT(); for (i = 0; i < mca_memheap_base_num_transports(); i++) { - mkey = mca_memheap_base_get_cached_mkey(pe, (void *)addr, i, &rva); + /* TODO: iterate on all ctxs, try to get cached mkey */ + mkey = mca_memheap_base_get_cached_mkey(oshmem_ctx_default, pe, (void *)addr, i, &rva); if (mkey) { return 1; } diff --git a/oshmem/shmem/c/shmem_alloc.c b/oshmem/shmem/c/shmem_alloc.c index 3f7a579a20c..92592ce8ca3 100644 --- a/oshmem/shmem/c/shmem_alloc.c +++ b/oshmem/shmem/c/shmem_alloc.c @@ -11,6 +11,7 @@ #include "oshmem/constants.h" #include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" #include "oshmem/shmem/shmem_api_logger.h" @@ -19,9 +20,11 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_malloc = pshmem_malloc -#pragma weak shmem_calloc = pshmem_calloc -#pragma weak shmalloc = pshmalloc +#include "oshmem/include/pshmemx.h" +#pragma weak shmem_malloc = pshmem_malloc +#pragma weak shmem_calloc = pshmem_calloc +#pragma weak shmalloc = pshmalloc +#pragma weak shmemx_malloc_with_hint = pshmemx_malloc_with_hint #include "oshmem/shmem/c/profile/defines.h" #endif @@ -72,3 +75,33 @@ static inline void* _shmalloc(size_t size) #endif return pBuff; } + +void* shmemx_malloc_with_hint(size_t size, long hint) +{ + int rc; + void* pBuff = NULL; + + if (!hint) { + return _shmalloc(size); + } + + RUNTIME_CHECK_INIT(); + RUNTIME_CHECK_WITH_MEMHEAP_SIZE(size); + + SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc); + + rc = mca_memheap_alloc_with_hint(size, hint, &pBuff); + + SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc); + + if (OSHMEM_SUCCESS != rc) { + SHMEM_API_VERBOSE(10, + "Allocation with shmalloc(size=%lu) failed.", + (unsigned long)size); + return NULL ; + } +#if OSHMEM_SPEC_COMPAT == 1 + shmem_barrier_all(); +#endif + return pBuff; +} diff --git a/oshmem/shmem/c/shmem_alltoall.c b/oshmem/shmem/c/shmem_alltoall.c index 57f40f67bd8..0bc115f713d 100644 --- a/oshmem/shmem/c/shmem_alltoall.c +++ b/oshmem/shmem/c/shmem_alltoall.c @@ -30,7 +30,7 @@ static void _shmem_alltoall(void *target, int PE_size, long *pSync); -#define SHMEM_TYPE_ALLTOALL(name, element_size) \ +#define SHMEM_TYPE_ALLTOALL(name, element_size) \ void shmem##name(void *target, \ const void *source, \ size_t nelems, \ @@ -40,15 +40,15 @@ static void _shmem_alltoall(void *target, long *pSync) \ { \ RUNTIME_CHECK_INIT(); \ - RUNTIME_CHECK_ADDR(target); \ - RUNTIME_CHECK_ADDR(source); \ + RUNTIME_CHECK_ADDR_SIZE(target, nelems); \ + RUNTIME_CHECK_ADDR_SIZE(source, nelems); \ \ _shmem_alltoall(target, source, 1, 1, nelems, element_size, \ PE_start, logPE_stride, PE_size, \ pSync); \ } -#define SHMEM_TYPE_ALLTOALLS(name, element_size) \ +#define SHMEM_TYPE_ALLTOALLS(name, element_size) \ void shmem##name(void *target, \ const void *source, \ ptrdiff_t dst, ptrdiff_t sst, \ @@ -59,8 +59,8 @@ static void _shmem_alltoall(void *target, long *pSync) \ { \ RUNTIME_CHECK_INIT(); \ - RUNTIME_CHECK_ADDR(target); \ - RUNTIME_CHECK_ADDR(source); \ + RUNTIME_CHECK_ADDR_SIZE(target, nelems); \ + RUNTIME_CHECK_ADDR_SIZE(source, nelems); \ \ _shmem_alltoall(target, source, dst, sst, nelems, element_size, \ PE_start, logPE_stride, PE_size, \ diff --git a/oshmem/shmem/c/shmem_and.c b/oshmem/shmem/c/shmem_and.c index 2402a6c0f51..0f4c5be9d39 100644 --- a/oshmem/shmem/c/shmem_and.c +++ b/oshmem/shmem/c/shmem_and.c @@ -25,12 +25,28 @@ */ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_and = pshmem_int_atomic_and +#pragma weak shmem_long_atomic_and = pshmem_long_atomic_and +#pragma weak shmem_longlong_atomic_and = pshmem_longlong_atomic_and #pragma weak shmem_uint_atomic_and = pshmem_uint_atomic_and #pragma weak shmem_ulong_atomic_and = pshmem_ulong_atomic_and #pragma weak shmem_ulonglong_atomic_and = pshmem_ulonglong_atomic_and +#pragma weak shmem_int32_atomic_and = pshmem_int32_atomic_and +#pragma weak shmem_int64_atomic_and = pshmem_int64_atomic_and +#pragma weak shmem_uint32_atomic_and = pshmem_uint32_atomic_and +#pragma weak shmem_uint64_atomic_and = pshmem_uint64_atomic_and + +#pragma weak shmem_ctx_int_atomic_and = pshmem_ctx_int_atomic_and +#pragma weak shmem_ctx_long_atomic_and = pshmem_ctx_long_atomic_and +#pragma weak shmem_ctx_longlong_atomic_and = pshmem_ctx_longlong_atomic_and #pragma weak shmem_ctx_uint_atomic_and = pshmem_ctx_uint_atomic_and #pragma weak shmem_ctx_ulong_atomic_and = pshmem_ctx_ulong_atomic_and #pragma weak shmem_ctx_ulonglong_atomic_and = pshmem_ctx_ulonglong_atomic_and +#pragma weak shmem_ctx_int32_atomic_and = pshmem_ctx_int32_atomic_and +#pragma weak shmem_ctx_int64_atomic_and = pshmem_ctx_int64_atomic_and +#pragma weak shmem_ctx_uint32_atomic_and = pshmem_ctx_uint32_atomic_and +#pragma weak shmem_ctx_uint64_atomic_and = pshmem_ctx_uint64_atomic_and + #pragma weak shmemx_int32_atomic_and = pshmemx_int32_atomic_and #pragma weak shmemx_int64_atomic_and = pshmemx_int64_atomic_and #pragma weak shmemx_uint32_atomic_and = pshmemx_uint32_atomic_and @@ -38,12 +54,28 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +OSHMEM_TYPE_OP(int, int, shmem, and) +OSHMEM_TYPE_OP(long, long, shmem, and) +OSHMEM_TYPE_OP(longlong, long long, shmem, and) OSHMEM_TYPE_OP(uint, unsigned int, shmem, and) OSHMEM_TYPE_OP(ulong, unsigned long, shmem, and) OSHMEM_TYPE_OP(ulonglong, unsigned long long, shmem, and) +OSHMEM_TYPE_OP(int32, int32_t, shmem, and) +OSHMEM_TYPE_OP(int64, int64_t, shmem, and) +OSHMEM_TYPE_OP(uint32, uint32_t, shmem, and) +OSHMEM_TYPE_OP(uint64, uint64_t, shmem, and) + +OSHMEM_CTX_TYPE_OP(int, int, shmem, and) +OSHMEM_CTX_TYPE_OP(long, long, shmem, and) +OSHMEM_CTX_TYPE_OP(longlong, long long, shmem, and) OSHMEM_CTX_TYPE_OP(uint, unsigned int, shmem, and) OSHMEM_CTX_TYPE_OP(ulong, unsigned long, shmem, and) OSHMEM_CTX_TYPE_OP(ulonglong, unsigned long long, shmem, and) +OSHMEM_CTX_TYPE_OP(int32, int32_t, shmem, and) +OSHMEM_CTX_TYPE_OP(int64, int64_t, shmem, and) +OSHMEM_CTX_TYPE_OP(uint32, uint32_t, shmem, and) +OSHMEM_CTX_TYPE_OP(uint64, uint64_t, shmem, and) + OSHMEM_TYPE_OP(int32, int32_t, shmemx, and) OSHMEM_TYPE_OP(int64, int64_t, shmemx, and) OSHMEM_TYPE_OP(uint32, uint32_t, shmemx, and) diff --git a/oshmem/shmem/c/shmem_broadcast.c b/oshmem/shmem/c/shmem_broadcast.c index a618df733ca..ec11f50d585 100644 --- a/oshmem/shmem/c/shmem_broadcast.c +++ b/oshmem/shmem/c/shmem_broadcast.c @@ -29,7 +29,7 @@ static void _shmem_broadcast(void *target, int PE_size, long *pSync); -#define SHMEM_TYPE_BROADCAST(name, element_size) \ +#define SHMEM_TYPE_BROADCAST(name, element_size) \ void shmem##name( void *target, \ const void *source, \ size_t nelems, \ @@ -40,10 +40,10 @@ static void _shmem_broadcast(void *target, long *pSync) \ { \ RUNTIME_CHECK_INIT(); \ - RUNTIME_CHECK_ADDR(target); \ - RUNTIME_CHECK_ADDR(source); \ + RUNTIME_CHECK_ADDR_SIZE(target, nelems); \ + RUNTIME_CHECK_ADDR_SIZE(source, nelems); \ \ - _shmem_broadcast( target, source, nelems * element_size, \ + _shmem_broadcast( target, source, nelems * element_size, \ PE_root, PE_start, logPE_stride, PE_size, \ pSync); \ } @@ -78,6 +78,7 @@ static void _shmem_broadcast(void *target, source, nbytes, pSync, + true, SCOLL_DEFAULT_ALG); out: oshmem_proc_group_destroy(group); diff --git a/oshmem/shmem/c/shmem_collect.c b/oshmem/shmem/c/shmem_collect.c index 91502035fcc..423093d6c18 100644 --- a/oshmem/shmem/c/shmem_collect.c +++ b/oshmem/shmem/c/shmem_collect.c @@ -39,10 +39,10 @@ static void _shmem_collect(void *target, long *pSync) \ { \ RUNTIME_CHECK_INIT(); \ - RUNTIME_CHECK_ADDR(target); \ - RUNTIME_CHECK_ADDR(source); \ + RUNTIME_CHECK_ADDR_SIZE(target, nelems); \ + RUNTIME_CHECK_ADDR_SIZE(source, nelems); \ \ - _shmem_collect( target, source, nelems * element_size, \ + _shmem_collect( target, source, nelems * element_size, \ PE_start, logPE_stride, PE_size, \ pSync, \ nelems_type); \ diff --git a/oshmem/shmem/c/shmem_context.c b/oshmem/shmem/c/shmem_context.c index 44367bb6b04..a5a094e6056 100644 --- a/oshmem/shmem/c/shmem_context.c +++ b/oshmem/shmem/c/shmem_context.c @@ -19,6 +19,7 @@ #include "oshmem/constants.h" #include "oshmem/include/shmem.h" +#include "oshmem/mca/spml/spml.h" #include "oshmem/runtime/params.h" #include "oshmem/runtime/runtime.h" #include "oshmem/shmem/shmem_api_logger.h" diff --git a/oshmem/shmem/c/shmem_cswap.c b/oshmem/shmem/c/shmem_cswap.c index 12ef8e4a042..99e3b1f9420 100644 --- a/oshmem/shmem/c/shmem_cswap.c +++ b/oshmem/shmem/c/shmem_cswap.c @@ -65,15 +65,24 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_uint_atomic_compare_swap = pshmem_ctx_uint_atomic_compare_swap +#pragma weak shmem_ctx_ulong_atomic_compare_swap = pshmem_ctx_ulong_atomic_compare_swap +#pragma weak shmem_ctx_ulonglong_atomic_compare_swap = pshmem_ctx_ulonglong_atomic_compare_swap #pragma weak shmem_ctx_int_atomic_compare_swap = pshmem_ctx_int_atomic_compare_swap #pragma weak shmem_ctx_long_atomic_compare_swap = pshmem_ctx_long_atomic_compare_swap #pragma weak shmem_ctx_longlong_atomic_compare_swap = pshmem_ctx_longlong_atomic_compare_swap + #pragma weak shmem_int_atomic_compare_swap = pshmem_int_atomic_compare_swap #pragma weak shmem_long_atomic_compare_swap = pshmem_long_atomic_compare_swap #pragma weak shmem_longlong_atomic_compare_swap = pshmem_longlong_atomic_compare_swap +#pragma weak shmem_uint_atomic_compare_swap = pshmem_uint_atomic_compare_swap +#pragma weak shmem_ulong_atomic_compare_swap = pshmem_ulong_atomic_compare_swap +#pragma weak shmem_ulonglong_atomic_compare_swap = pshmem_ulonglong_atomic_compare_swap + #pragma weak shmem_int_cswap = pshmem_int_cswap #pragma weak shmem_long_cswap = pshmem_long_cswap #pragma weak shmem_longlong_cswap = pshmem_longlong_cswap + #pragma weak shmemx_int32_cswap = pshmemx_int32_cswap #pragma weak shmemx_int64_cswap = pshmemx_int64_cswap #include "oshmem/shmem/c/profile/defines.h" @@ -82,9 +91,15 @@ SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_int, int, shmem) SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_long, long, shmem) SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_int, int, shmem) SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_long, long, shmem) SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_ulonglong, unsigned long long, shmem) /* deprecated APIs */ #define SHMEM_TYPE_CSWAP(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_fadd.c b/oshmem/shmem/c/shmem_fadd.c index 16c59a4ada0..33f18973aca 100644 --- a/oshmem/shmem/c/shmem_fadd.c +++ b/oshmem/shmem/c/shmem_fadd.c @@ -67,12 +67,21 @@ #pragma weak shmem_ctx_int_atomic_fetch_add = pshmem_ctx_int_atomic_fetch_add #pragma weak shmem_ctx_long_atomic_fetch_add = pshmem_ctx_long_atomic_fetch_add #pragma weak shmem_ctx_longlong_atomic_fetch_add = pshmem_ctx_longlong_atomic_fetch_add +#pragma weak shmem_ctx_uint_atomic_fetch_add = pshmem_ctx_uint_atomic_fetch_add +#pragma weak shmem_ctx_ulong_atomic_fetch_add = pshmem_ctx_ulong_atomic_fetch_add +#pragma weak shmem_ctx_ulonglong_atomic_fetch_add = pshmem_ctx_ulonglong_atomic_fetch_add + #pragma weak shmem_int_atomic_fetch_add = pshmem_int_atomic_fetch_add #pragma weak shmem_long_atomic_fetch_add = pshmem_long_atomic_fetch_add #pragma weak shmem_longlong_atomic_fetch_add = pshmem_longlong_atomic_fetch_add +#pragma weak shmem_uint_atomic_fetch_add = pshmem_uint_atomic_fetch_add +#pragma weak shmem_ulong_atomic_fetch_add = pshmem_ulong_atomic_fetch_add +#pragma weak shmem_ulonglong_atomic_fetch_add = pshmem_ulonglong_atomic_fetch_add + #pragma weak shmem_int_fadd = pshmem_int_fadd #pragma weak shmem_long_fadd = pshmem_long_fadd #pragma weak shmem_longlong_fadd = pshmem_longlong_fadd + #pragma weak shmemx_int32_fadd = pshmemx_int32_fadd #pragma weak shmemx_int64_fadd = pshmemx_int64_fadd #include "oshmem/shmem/c/profile/defines.h" @@ -81,9 +90,15 @@ SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_int, int, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_long, long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_ATOMIC_FETCH_ADD(_int, int, shmem) SHMEM_TYPE_ATOMIC_FETCH_ADD(_long, long, shmem) SHMEM_TYPE_ATOMIC_FETCH_ADD(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_ulonglong, unsigned long long, shmem) /* deprecated APIs */ #define SHMEM_TYPE_FADD(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_fand.c b/oshmem/shmem/c/shmem_fand.c index 2b452a40520..6761844f291 100644 --- a/oshmem/shmem/c/shmem_fand.c +++ b/oshmem/shmem/c/shmem_fand.c @@ -27,12 +27,28 @@ */ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_fetch_and = pshmem_int_atomic_fetch_and +#pragma weak shmem_long_atomic_fetch_and = pshmem_long_atomic_fetch_and +#pragma weak shmem_longlong_atomic_fetch_and = pshmem_longlong_atomic_fetch_and #pragma weak shmem_uint_atomic_fetch_and = pshmem_uint_atomic_fetch_and #pragma weak shmem_ulong_atomic_fetch_and = pshmem_ulong_atomic_fetch_and #pragma weak shmem_ulonglong_atomic_fetch_and = pshmem_ulonglong_atomic_fetch_and +#pragma weak shmem_int32_atomic_fetch_and = pshmem_int32_atomic_fetch_and +#pragma weak shmem_int64_atomic_fetch_and = pshmem_int64_atomic_fetch_and +#pragma weak shmem_uint32_atomic_fetch_and = pshmem_uint32_atomic_fetch_and +#pragma weak shmem_uint64_atomic_fetch_and = pshmem_uint64_atomic_fetch_and + +#pragma weak shmem_ctx_int_atomic_fetch_and = pshmem_ctx_int_atomic_fetch_and +#pragma weak shmem_ctx_long_atomic_fetch_and = pshmem_ctx_long_atomic_fetch_and +#pragma weak shmem_ctx_longlong_atomic_fetch_and = pshmem_ctx_longlong_atomic_fetch_and #pragma weak shmem_ctx_uint_atomic_fetch_and = pshmem_ctx_uint_atomic_fetch_and #pragma weak shmem_ctx_ulong_atomic_fetch_and = pshmem_ctx_ulong_atomic_fetch_and #pragma weak shmem_ctx_ulonglong_atomic_fetch_and = pshmem_ctx_ulonglong_atomic_fetch_and +#pragma weak shmem_ctx_int32_atomic_fetch_and = pshmem_ctx_int32_atomic_fetch_and +#pragma weak shmem_ctx_int64_atomic_fetch_and = pshmem_ctx_int64_atomic_fetch_and +#pragma weak shmem_ctx_uint32_atomic_fetch_and = pshmem_ctx_uint32_atomic_fetch_and +#pragma weak shmem_ctx_uint64_atomic_fetch_and = pshmem_ctx_uint64_atomic_fetch_and + #pragma weak shmemx_int32_atomic_fetch_and = pshmemx_int32_atomic_fetch_and #pragma weak shmemx_int64_atomic_fetch_and = pshmemx_int64_atomic_fetch_and #pragma weak shmemx_uint32_atomic_fetch_and = pshmemx_uint32_atomic_fetch_and @@ -40,14 +56,28 @@ #include "oshmem/shmem/c/profile/defines.h" #endif - unsigned int shmem_uint_atomic_fand(unsigned int *target, unsigned int value, int pe); - +OSHMEM_TYPE_FOP(int, int, shmem, and) +OSHMEM_TYPE_FOP(long, long, shmem, and) +OSHMEM_TYPE_FOP(longlong, long long, shmem, and) OSHMEM_TYPE_FOP(uint, unsigned int, shmem, and) OSHMEM_TYPE_FOP(ulong, unsigned long, shmem, and) OSHMEM_TYPE_FOP(ulonglong, unsigned long long, shmem, and) +OSHMEM_TYPE_FOP(int32, int32_t, shmem, and) +OSHMEM_TYPE_FOP(int64, int64_t, shmem, and) +OSHMEM_TYPE_FOP(uint32, uint32_t, shmem, and) +OSHMEM_TYPE_FOP(uint64, uint64_t, shmem, and) + +OSHMEM_CTX_TYPE_FOP(int, int, shmem, and) +OSHMEM_CTX_TYPE_FOP(long, long, shmem, and) +OSHMEM_CTX_TYPE_FOP(longlong, long long, shmem, and) OSHMEM_CTX_TYPE_FOP(uint, unsigned int, shmem, and) OSHMEM_CTX_TYPE_FOP(ulong, unsigned long, shmem, and) OSHMEM_CTX_TYPE_FOP(ulonglong, unsigned long long, shmem, and) +OSHMEM_CTX_TYPE_FOP(int32, int32_t, shmem, and) +OSHMEM_CTX_TYPE_FOP(int64, int64_t, shmem, and) +OSHMEM_CTX_TYPE_FOP(uint32, uint32_t, shmem, and) +OSHMEM_CTX_TYPE_FOP(uint64, uint64_t, shmem, and) + OSHMEM_TYPE_FOP(int32, int32_t, shmemx, and) OSHMEM_TYPE_FOP(int64, int64_t, shmemx, and) OSHMEM_TYPE_FOP(uint32, uint32_t, shmemx, and) diff --git a/oshmem/shmem/c/shmem_fetch.c b/oshmem/shmem/c/shmem_fetch.c index 95c688ea02e..fe02d5a16e5 100644 --- a/oshmem/shmem/c/shmem_fetch.c +++ b/oshmem/shmem/c/shmem_fetch.c @@ -68,18 +68,27 @@ #pragma weak shmem_ctx_int_atomic_fetch = pshmem_ctx_int_atomic_fetch #pragma weak shmem_ctx_long_atomic_fetch = pshmem_ctx_long_atomic_fetch #pragma weak shmem_ctx_longlong_atomic_fetch = pshmem_ctx_longlong_atomic_fetch +#pragma weak shmem_ctx_uint_atomic_fetch = pshmem_ctx_uint_atomic_fetch +#pragma weak shmem_ctx_ulong_atomic_fetch = pshmem_ctx_ulong_atomic_fetch +#pragma weak shmem_ctx_ulonglong_atomic_fetch = pshmem_ctx_ulonglong_atomic_fetch #pragma weak shmem_ctx_double_atomic_fetch = pshmem_ctx_double_atomic_fetch #pragma weak shmem_ctx_float_atomic_fetch = pshmem_ctx_float_atomic_fetch + #pragma weak shmem_int_atomic_fetch = pshmem_int_atomic_fetch #pragma weak shmem_long_atomic_fetch = pshmem_long_atomic_fetch #pragma weak shmem_longlong_atomic_fetch = pshmem_longlong_atomic_fetch +#pragma weak shmem_uint_atomic_fetch = pshmem_uint_atomic_fetch +#pragma weak shmem_ulong_atomic_fetch = pshmem_ulong_atomic_fetch +#pragma weak shmem_ulonglong_atomic_fetch = pshmem_ulonglong_atomic_fetch #pragma weak shmem_double_atomic_fetch = pshmem_double_atomic_fetch #pragma weak shmem_float_atomic_fetch = pshmem_float_atomic_fetch + #pragma weak shmem_int_fetch = pshmem_int_fetch #pragma weak shmem_long_fetch = pshmem_long_fetch #pragma weak shmem_longlong_fetch = pshmem_longlong_fetch #pragma weak shmem_double_fetch = pshmem_double_fetch #pragma weak shmem_float_fetch = pshmem_float_fetch + #pragma weak shmemx_int32_fetch = pshmemx_int32_fetch #pragma weak shmemx_int64_fetch = pshmemx_int64_fetch #include "oshmem/shmem/c/profile/defines.h" @@ -88,11 +97,17 @@ SHMEM_CTX_TYPE_ATOMIC_FETCH(_int, int, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH(_long, long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_ulonglong, unsigned long long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH(_double, double, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH(_float, float, shmem) SHMEM_TYPE_ATOMIC_FETCH(_int, int, shmem) SHMEM_TYPE_ATOMIC_FETCH(_long, long, shmem) SHMEM_TYPE_ATOMIC_FETCH(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_ATOMIC_FETCH(_double, double, shmem) SHMEM_TYPE_ATOMIC_FETCH(_float, float, shmem) diff --git a/oshmem/shmem/c/shmem_finc.c b/oshmem/shmem/c/shmem_finc.c index dc507797e23..de3ae9b2dd1 100644 --- a/oshmem/shmem/c/shmem_finc.c +++ b/oshmem/shmem/c/shmem_finc.c @@ -68,12 +68,21 @@ #pragma weak shmem_ctx_int_atomic_fetch_inc = pshmem_ctx_int_atomic_fetch_inc #pragma weak shmem_ctx_long_atomic_fetch_inc = pshmem_ctx_long_atomic_fetch_inc #pragma weak shmem_ctx_longlong_atomic_fetch_inc = pshmem_ctx_longlong_atomic_fetch_inc +#pragma weak shmem_ctx_uint_atomic_fetch_inc = pshmem_ctx_uint_atomic_fetch_inc +#pragma weak shmem_ctx_ulong_atomic_fetch_inc = pshmem_ctx_ulong_atomic_fetch_inc +#pragma weak shmem_ctx_ulonglong_atomic_fetch_inc = pshmem_ctx_ulonglong_atomic_fetch_inc + #pragma weak shmem_int_atomic_fetch_inc = pshmem_int_atomic_fetch_inc #pragma weak shmem_long_atomic_fetch_inc = pshmem_long_atomic_fetch_inc #pragma weak shmem_longlong_atomic_fetch_inc = pshmem_longlong_atomic_fetch_inc +#pragma weak shmem_uint_atomic_fetch_inc = pshmem_uint_atomic_fetch_inc +#pragma weak shmem_ulong_atomic_fetch_inc = pshmem_ulong_atomic_fetch_inc +#pragma weak shmem_ulonglong_atomic_fetch_inc = pshmem_ulonglong_atomic_fetch_inc + #pragma weak shmem_int_finc = pshmem_int_finc #pragma weak shmem_long_finc = pshmem_long_finc #pragma weak shmem_longlong_finc = pshmem_longlong_finc + #pragma weak shmemx_int32_finc = pshmemx_int32_finc #pragma weak shmemx_int64_finc = pshmemx_int64_finc #include "oshmem/shmem/c/profile/defines.h" @@ -82,9 +91,15 @@ SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_int, int, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_long, long, shmem) SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_ATOMIC_FETCH_INC(_int, int, shmem) SHMEM_TYPE_ATOMIC_FETCH_INC(_long, long, shmem) SHMEM_TYPE_ATOMIC_FETCH_INC(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_ulonglong, unsigned long long, shmem) /* deprecated APIs */ #define SHMEM_TYPE_FINC(type_name, type, prefix) \ diff --git a/oshmem/shmem/c/shmem_for.c b/oshmem/shmem/c/shmem_for.c index c30bef03b1c..4d0e732004d 100644 --- a/oshmem/shmem/c/shmem_for.c +++ b/oshmem/shmem/c/shmem_for.c @@ -27,12 +27,28 @@ */ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_fetch_or = pshmem_int_atomic_fetch_or +#pragma weak shmem_long_atomic_fetch_or = pshmem_long_atomic_fetch_or +#pragma weak shmem_longlong_atomic_fetch_or = pshmem_longlong_atomic_fetch_or #pragma weak shmem_uint_atomic_fetch_or = pshmem_uint_atomic_fetch_or #pragma weak shmem_ulong_atomic_fetch_or = pshmem_ulong_atomic_fetch_or #pragma weak shmem_ulonglong_atomic_fetch_or = pshmem_ulonglong_atomic_fetch_or +#pragma weak shmem_int32_atomic_fetch_or = pshmem_int32_atomic_fetch_or +#pragma weak shmem_int64_atomic_fetch_or = pshmem_int64_atomic_fetch_or +#pragma weak shmem_uint32_atomic_fetch_or = pshmem_uint32_atomic_fetch_or +#pragma weak shmem_uint64_atomic_fetch_or = pshmem_uint64_atomic_fetch_or + +#pragma weak shmem_ctx_int_atomic_fetch_or = pshmem_ctx_int_atomic_fetch_or +#pragma weak shmem_ctx_long_atomic_fetch_or = pshmem_ctx_long_atomic_fetch_or +#pragma weak shmem_ctx_longlong_atomic_fetch_or = pshmem_ctx_longlong_atomic_fetch_or #pragma weak shmem_ctx_uint_atomic_fetch_or = pshmem_ctx_uint_atomic_fetch_or #pragma weak shmem_ctx_ulong_atomic_fetch_or = pshmem_ctx_ulong_atomic_fetch_or #pragma weak shmem_ctx_ulonglong_atomic_fetch_or = pshmem_ctx_ulonglong_atomic_fetch_or +#pragma weak shmem_ctx_int32_atomic_fetch_or = pshmem_ctx_int32_atomic_fetch_or +#pragma weak shmem_ctx_int64_atomic_fetch_or = pshmem_ctx_int64_atomic_fetch_or +#pragma weak shmem_ctx_uint32_atomic_fetch_or = pshmem_ctx_uint32_atomic_fetch_or +#pragma weak shmem_ctx_uint64_atomic_fetch_or = pshmem_ctx_uint64_atomic_fetch_or + #pragma weak shmemx_int32_atomic_fetch_or = pshmemx_int32_atomic_fetch_or #pragma weak shmemx_int64_atomic_fetch_or = pshmemx_int64_atomic_fetch_or #pragma weak shmemx_uint32_atomic_fetch_or = pshmemx_uint32_atomic_fetch_or @@ -40,12 +56,28 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +OSHMEM_TYPE_FOP(int, int, shmem, or) +OSHMEM_TYPE_FOP(long, long, shmem, or) +OSHMEM_TYPE_FOP(longlong, long long, shmem, or) OSHMEM_TYPE_FOP(uint, unsigned int, shmem, or) OSHMEM_TYPE_FOP(ulong, unsigned long, shmem, or) OSHMEM_TYPE_FOP(ulonglong, unsigned long long, shmem, or) +OSHMEM_TYPE_FOP(int32, int32_t, shmem, or) +OSHMEM_TYPE_FOP(int64, int64_t, shmem, or) +OSHMEM_TYPE_FOP(uint32, uint32_t, shmem, or) +OSHMEM_TYPE_FOP(uint64, uint64_t, shmem, or) + +OSHMEM_CTX_TYPE_FOP(int, int, shmem, or) +OSHMEM_CTX_TYPE_FOP(long, long, shmem, or) +OSHMEM_CTX_TYPE_FOP(longlong, long long, shmem, or) OSHMEM_CTX_TYPE_FOP(uint, unsigned int, shmem, or) OSHMEM_CTX_TYPE_FOP(ulong, unsigned long, shmem, or) OSHMEM_CTX_TYPE_FOP(ulonglong, unsigned long long, shmem, or) +OSHMEM_CTX_TYPE_FOP(int32, int32_t, shmem, or) +OSHMEM_CTX_TYPE_FOP(int64, int64_t, shmem, or) +OSHMEM_CTX_TYPE_FOP(uint32, uint32_t, shmem, or) +OSHMEM_CTX_TYPE_FOP(uint64, uint64_t, shmem, or) + OSHMEM_TYPE_FOP(int32, int32_t, shmemx, or) OSHMEM_TYPE_FOP(int64, int64_t, shmemx, or) OSHMEM_TYPE_FOP(uint32, uint32_t, shmemx, or) diff --git a/oshmem/shmem/c/shmem_free.c b/oshmem/shmem/c/shmem_free.c index f5c5ce0caec..eebdd537ab1 100644 --- a/oshmem/shmem/c/shmem_free.c +++ b/oshmem/shmem/c/shmem_free.c @@ -18,6 +18,7 @@ #include "oshmem/runtime/runtime.h" #include "oshmem/mca/memheap/memheap.h" +#include "oshmem/mca/memheap/base/base.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" @@ -41,6 +42,7 @@ void shfree(void* ptr) static inline void _shfree(void* ptr) { int rc; + map_segment_t *s; RUNTIME_CHECK_INIT(); if (NULL == ptr) { @@ -55,7 +57,15 @@ static inline void _shfree(void* ptr) SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc); - rc = MCA_MEMHEAP_CALL(free(ptr)); + if (ptr) { + s = memheap_find_va(ptr); + } + + if (s && s->allocator) { + rc = s->allocator->free(s, ptr); + } else { + rc = MCA_MEMHEAP_CALL(free(ptr)); + } SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc); diff --git a/oshmem/shmem/c/shmem_fxor.c b/oshmem/shmem/c/shmem_fxor.c index 413daca45cf..41fe2249c6b 100644 --- a/oshmem/shmem/c/shmem_fxor.c +++ b/oshmem/shmem/c/shmem_fxor.c @@ -27,12 +27,28 @@ */ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_fetch_xor = pshmem_int_atomic_fetch_xor +#pragma weak shmem_long_atomic_fetch_xor = pshmem_long_atomic_fetch_xor +#pragma weak shmem_longlong_atomic_fetch_xor = pshmem_longlong_atomic_fetch_xor #pragma weak shmem_uint_atomic_fetch_xor = pshmem_uint_atomic_fetch_xor #pragma weak shmem_ulong_atomic_fetch_xor = pshmem_ulong_atomic_fetch_xor #pragma weak shmem_ulonglong_atomic_fetch_xor = pshmem_ulonglong_atomic_fetch_xor +#pragma weak shmem_int32_atomic_fetch_xor = pshmem_int32_atomic_fetch_xor +#pragma weak shmem_int64_atomic_fetch_xor = pshmem_int64_atomic_fetch_xor +#pragma weak shmem_uint32_atomic_fetch_xor = pshmem_uint32_atomic_fetch_xor +#pragma weak shmem_uint64_atomic_fetch_xor = pshmem_uint64_atomic_fetch_xor + +#pragma weak shmem_ctx_int_atomic_fetch_xor = pshmem_ctx_int_atomic_fetch_xor +#pragma weak shmem_ctx_long_atomic_fetch_xor = pshmem_ctx_long_atomic_fetch_xor +#pragma weak shmem_ctx_longlong_atomic_fetch_xor = pshmem_ctx_longlong_atomic_fetch_xor #pragma weak shmem_ctx_uint_atomic_fetch_xor = pshmem_ctx_uint_atomic_fetch_xor #pragma weak shmem_ctx_ulong_atomic_fetch_xor = pshmem_ctx_ulong_atomic_fetch_xor #pragma weak shmem_ctx_ulonglong_atomic_fetch_xor = pshmem_ctx_ulonglong_atomic_fetch_xor +#pragma weak shmem_ctx_int32_atomic_fetch_xor = pshmem_ctx_int32_atomic_fetch_xor +#pragma weak shmem_ctx_int64_atomic_fetch_xor = pshmem_ctx_int64_atomic_fetch_xor +#pragma weak shmem_ctx_uint32_atomic_fetch_xor = pshmem_ctx_uint32_atomic_fetch_xor +#pragma weak shmem_ctx_uint64_atomic_fetch_xor = pshmem_ctx_uint64_atomic_fetch_xor + #pragma weak shmemx_int32_atomic_fetch_xor = pshmemx_int32_atomic_fetch_xor #pragma weak shmemx_int64_atomic_fetch_xor = pshmemx_int64_atomic_fetch_xor #pragma weak shmemx_uint32_atomic_fetch_xor = pshmemx_uint32_atomic_fetch_xor @@ -40,12 +56,28 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +OSHMEM_TYPE_FOP(int, int, shmem, xor) +OSHMEM_TYPE_FOP(long, long, shmem, xor) +OSHMEM_TYPE_FOP(longlong, long long, shmem, xor) OSHMEM_TYPE_FOP(uint, unsigned int, shmem, xor) OSHMEM_TYPE_FOP(ulong, unsigned long, shmem, xor) OSHMEM_TYPE_FOP(ulonglong, unsigned long long, shmem, xor) +OSHMEM_TYPE_FOP(int32, int32_t, shmem, xor) +OSHMEM_TYPE_FOP(int64, int64_t, shmem, xor) +OSHMEM_TYPE_FOP(uint32, uint32_t, shmem, xor) +OSHMEM_TYPE_FOP(uint64, uint64_t, shmem, xor) + +OSHMEM_CTX_TYPE_FOP(int, int, shmem, xor) +OSHMEM_CTX_TYPE_FOP(long, long, shmem, xor) +OSHMEM_CTX_TYPE_FOP(longlong, long long, shmem, xor) OSHMEM_CTX_TYPE_FOP(uint, unsigned int, shmem, xor) OSHMEM_CTX_TYPE_FOP(ulong, unsigned long, shmem, xor) OSHMEM_CTX_TYPE_FOP(ulonglong, unsigned long long, shmem, xor) +OSHMEM_CTX_TYPE_FOP(int32, int32_t, shmem, xor) +OSHMEM_CTX_TYPE_FOP(int64, int64_t, shmem, xor) +OSHMEM_CTX_TYPE_FOP(uint32, uint32_t, shmem, xor) +OSHMEM_CTX_TYPE_FOP(uint64, uint64_t, shmem, xor) + OSHMEM_TYPE_FOP(int32, int32_t, shmemx, xor) OSHMEM_TYPE_FOP(int64, int64_t, shmemx, xor) OSHMEM_TYPE_FOP(uint32, uint32_t, shmemx, xor) diff --git a/oshmem/shmem/c/shmem_g.c b/oshmem/shmem/c/shmem_g.c index 7ab13913635..aa96afaba68 100644 --- a/oshmem/shmem/c/shmem_g.c +++ b/oshmem/shmem/c/shmem_g.c @@ -58,22 +58,56 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_ctx_char_g = pshmem_ctx_char_g -#pragma weak shmem_ctx_short_g = pshmem_ctx_short_g -#pragma weak shmem_ctx_int_g = pshmem_ctx_int_g -#pragma weak shmem_ctx_long_g = pshmem_ctx_long_g -#pragma weak shmem_ctx_longlong_g = pshmem_ctx_longlong_g -#pragma weak shmem_ctx_float_g = pshmem_ctx_float_g -#pragma weak shmem_ctx_double_g = pshmem_ctx_double_g +#pragma weak shmem_ctx_char_g = pshmem_ctx_char_g +#pragma weak shmem_ctx_short_g = pshmem_ctx_short_g +#pragma weak shmem_ctx_int_g = pshmem_ctx_int_g +#pragma weak shmem_ctx_long_g = pshmem_ctx_long_g +#pragma weak shmem_ctx_float_g = pshmem_ctx_float_g +#pragma weak shmem_ctx_double_g = pshmem_ctx_double_g +#pragma weak shmem_ctx_longlong_g = pshmem_ctx_longlong_g +#pragma weak shmem_ctx_schar_g = pshmem_ctx_schar_g +#pragma weak shmem_ctx_uchar_g = pshmem_ctx_uchar_g +#pragma weak shmem_ctx_ushort_g = pshmem_ctx_ushort_g +#pragma weak shmem_ctx_uint_g = pshmem_ctx_uint_g +#pragma weak shmem_ctx_ulong_g = pshmem_ctx_ulong_g +#pragma weak shmem_ctx_ulonglong_g = pshmem_ctx_ulonglong_g #pragma weak shmem_ctx_longdouble_g = pshmem_ctx_longdouble_g -#pragma weak shmem_char_g = pshmem_char_g -#pragma weak shmem_short_g = pshmem_short_g -#pragma weak shmem_int_g = pshmem_int_g -#pragma weak shmem_long_g = pshmem_long_g -#pragma weak shmem_longlong_g = pshmem_longlong_g -#pragma weak shmem_float_g = pshmem_float_g -#pragma weak shmem_double_g = pshmem_double_g -#pragma weak shmem_longdouble_g = pshmem_longdouble_g +#pragma weak shmem_ctx_int8_g = pshmem_ctx_int8_g +#pragma weak shmem_ctx_int16_g = pshmem_ctx_int16_g +#pragma weak shmem_ctx_int32_g = pshmem_ctx_int32_g +#pragma weak shmem_ctx_int64_g = pshmem_ctx_int64_g +#pragma weak shmem_ctx_uint8_g = pshmem_ctx_uint8_g +#pragma weak shmem_ctx_uint16_g = pshmem_ctx_uint16_g +#pragma weak shmem_ctx_uint32_g = pshmem_ctx_uint32_g +#pragma weak shmem_ctx_uint64_g = pshmem_ctx_uint64_g +#pragma weak shmem_ctx_size_g = pshmem_ctx_size_g +#pragma weak shmem_ctx_ptrdiff_g = pshmem_ctx_ptrdiff_g + +#pragma weak shmem_char_g = pshmem_char_g +#pragma weak shmem_short_g = pshmem_short_g +#pragma weak shmem_int_g = pshmem_int_g +#pragma weak shmem_long_g = pshmem_long_g +#pragma weak shmem_float_g = pshmem_float_g +#pragma weak shmem_double_g = pshmem_double_g +#pragma weak shmem_longlong_g = pshmem_longlong_g +#pragma weak shmem_schar_g = pshmem_schar_g +#pragma weak shmem_uchar_g = pshmem_uchar_g +#pragma weak shmem_ushort_g = pshmem_ushort_g +#pragma weak shmem_uint_g = pshmem_uint_g +#pragma weak shmem_ulong_g = pshmem_ulong_g +#pragma weak shmem_ulonglong_g = pshmem_ulonglong_g +#pragma weak shmem_longdouble_g = pshmem_longdouble_g +#pragma weak shmem_int8_g = pshmem_int8_g +#pragma weak shmem_int16_g = pshmem_int16_g +#pragma weak shmem_int32_g = pshmem_int32_g +#pragma weak shmem_int64_g = pshmem_int64_g +#pragma weak shmem_uint8_g = pshmem_uint8_g +#pragma weak shmem_uint16_g = pshmem_uint16_g +#pragma weak shmem_uint32_g = pshmem_uint32_g +#pragma weak shmem_uint64_g = pshmem_uint64_g +#pragma weak shmem_size_g = pshmem_size_g +#pragma weak shmem_ptrdiff_g = pshmem_ptrdiff_g + #pragma weak shmemx_int16_g = pshmemx_int16_g #pragma weak shmemx_int32_g = pshmemx_int32_g #pragma weak shmemx_int64_g = pshmemx_int64_g @@ -85,17 +119,51 @@ SHMEM_CTX_TYPE_G(_short, short, shmem) SHMEM_CTX_TYPE_G(_int, int, shmem) SHMEM_CTX_TYPE_G(_long, long, shmem) SHMEM_CTX_TYPE_G(_longlong, long long, shmem) +SHMEM_CTX_TYPE_G(_schar, signed char, shmem) +SHMEM_CTX_TYPE_G(_uchar, unsigned char, shmem) +SHMEM_CTX_TYPE_G(_ushort, unsigned short, shmem) +SHMEM_CTX_TYPE_G(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_G(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_G(_ulonglong, unsigned long long, shmem) SHMEM_CTX_TYPE_G(_float, float, shmem) SHMEM_CTX_TYPE_G(_double, double, shmem) SHMEM_CTX_TYPE_G(_longdouble, long double, shmem) +SHMEM_CTX_TYPE_G(_int8, int8_t, shmem) +SHMEM_CTX_TYPE_G(_int16, int16_t, shmem) +SHMEM_CTX_TYPE_G(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_G(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_G(_uint8, uint8_t, shmem) +SHMEM_CTX_TYPE_G(_uint16, uint16_t, shmem) +SHMEM_CTX_TYPE_G(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_G(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_G(_size, size_t, shmem) +SHMEM_CTX_TYPE_G(_ptrdiff, ptrdiff_t, shmem) + SHMEM_TYPE_G(_char, char, shmem) SHMEM_TYPE_G(_short, short, shmem) SHMEM_TYPE_G(_int, int, shmem) SHMEM_TYPE_G(_long, long, shmem) SHMEM_TYPE_G(_longlong, long long, shmem) +SHMEM_TYPE_G(_schar, signed char, shmem) +SHMEM_TYPE_G(_uchar, unsigned char, shmem) +SHMEM_TYPE_G(_ushort, unsigned short, shmem) +SHMEM_TYPE_G(_uint, unsigned int, shmem) +SHMEM_TYPE_G(_ulong, unsigned long, shmem) +SHMEM_TYPE_G(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_G(_float, float, shmem) SHMEM_TYPE_G(_double, double, shmem) SHMEM_TYPE_G(_longdouble, long double, shmem) +SHMEM_TYPE_G(_int8, int8_t, shmem) +SHMEM_TYPE_G(_int16, int16_t, shmem) +SHMEM_TYPE_G(_int32, int32_t, shmem) +SHMEM_TYPE_G(_int64, int64_t, shmem) +SHMEM_TYPE_G(_uint8, uint8_t, shmem) +SHMEM_TYPE_G(_uint16, uint16_t, shmem) +SHMEM_TYPE_G(_uint32, uint32_t, shmem) +SHMEM_TYPE_G(_uint64, uint64_t, shmem) +SHMEM_TYPE_G(_size, size_t, shmem) +SHMEM_TYPE_G(_ptrdiff, ptrdiff_t, shmem) + SHMEM_TYPE_G(_int16, int16_t, shmemx) SHMEM_TYPE_G(_int32, int32_t, shmemx) SHMEM_TYPE_G(_int64, int64_t, shmemx) diff --git a/oshmem/shmem/c/shmem_get.c b/oshmem/shmem/c/shmem_get.c index 9537030138a..452557fdd6a 100644 --- a/oshmem/shmem/c/shmem_get.c +++ b/oshmem/shmem/c/shmem_get.c @@ -57,28 +57,63 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_ctx_char_get = pshmem_ctx_char_get -#pragma weak shmem_ctx_short_get = pshmem_ctx_short_get -#pragma weak shmem_ctx_int_get = pshmem_ctx_int_get -#pragma weak shmem_ctx_long_get = pshmem_ctx_long_get -#pragma weak shmem_ctx_longlong_get = pshmem_ctx_longlong_get -#pragma weak shmem_ctx_float_get = pshmem_ctx_float_get -#pragma weak shmem_ctx_double_get = pshmem_ctx_double_get +#pragma weak shmem_ctx_char_get = pshmem_ctx_char_get +#pragma weak shmem_ctx_short_get = pshmem_ctx_short_get +#pragma weak shmem_ctx_int_get = pshmem_ctx_int_get +#pragma weak shmem_ctx_long_get = pshmem_ctx_long_get +#pragma weak shmem_ctx_float_get = pshmem_ctx_float_get +#pragma weak shmem_ctx_double_get = pshmem_ctx_double_get +#pragma weak shmem_ctx_longlong_get = pshmem_ctx_longlong_get +#pragma weak shmem_ctx_schar_get = pshmem_ctx_schar_get +#pragma weak shmem_ctx_uchar_get = pshmem_ctx_uchar_get +#pragma weak shmem_ctx_ushort_get = pshmem_ctx_ushort_get +#pragma weak shmem_ctx_uint_get = pshmem_ctx_uint_get +#pragma weak shmem_ctx_ulong_get = pshmem_ctx_ulong_get +#pragma weak shmem_ctx_ulonglong_get = pshmem_ctx_ulonglong_get #pragma weak shmem_ctx_longdouble_get = pshmem_ctx_longdouble_get -#pragma weak shmem_char_get = pshmem_char_get -#pragma weak shmem_short_get = pshmem_short_get -#pragma weak shmem_int_get = pshmem_int_get -#pragma weak shmem_long_get = pshmem_long_get -#pragma weak shmem_longlong_get = pshmem_longlong_get -#pragma weak shmem_float_get = pshmem_float_get -#pragma weak shmem_double_get = pshmem_double_get -#pragma weak shmem_longdouble_get = pshmem_longdouble_get +#pragma weak shmem_ctx_int8_get = pshmem_ctx_int8_get +#pragma weak shmem_ctx_int16_get = pshmem_ctx_int16_get +#pragma weak shmem_ctx_int32_get = pshmem_ctx_int32_get +#pragma weak shmem_ctx_int64_get = pshmem_ctx_int64_get +#pragma weak shmem_ctx_uint8_get = pshmem_ctx_uint8_get +#pragma weak shmem_ctx_uint16_get = pshmem_ctx_uint16_get +#pragma weak shmem_ctx_uint32_get = pshmem_ctx_uint32_get +#pragma weak shmem_ctx_uint64_get = pshmem_ctx_uint64_get +#pragma weak shmem_ctx_size_get = pshmem_ctx_size_get +#pragma weak shmem_ctx_ptrdiff_get = pshmem_ctx_ptrdiff_get + +#pragma weak shmem_char_get = pshmem_char_get +#pragma weak shmem_short_get = pshmem_short_get +#pragma weak shmem_int_get = pshmem_int_get +#pragma weak shmem_long_get = pshmem_long_get +#pragma weak shmem_float_get = pshmem_float_get +#pragma weak shmem_double_get = pshmem_double_get +#pragma weak shmem_longlong_get = pshmem_longlong_get +#pragma weak shmem_schar_get = pshmem_schar_get +#pragma weak shmem_uchar_get = pshmem_uchar_get +#pragma weak shmem_ushort_get = pshmem_ushort_get +#pragma weak shmem_uint_get = pshmem_uint_get +#pragma weak shmem_ulong_get = pshmem_ulong_get +#pragma weak shmem_ulonglong_get = pshmem_ulonglong_get +#pragma weak shmem_longdouble_get = pshmem_longdouble_get +#pragma weak shmem_int8_get = pshmem_int8_get +#pragma weak shmem_int16_get = pshmem_int16_get +#pragma weak shmem_int32_get = pshmem_int32_get +#pragma weak shmem_int64_get = pshmem_int64_get +#pragma weak shmem_uint8_get = pshmem_uint8_get +#pragma weak shmem_uint16_get = pshmem_uint16_get +#pragma weak shmem_uint32_get = pshmem_uint32_get +#pragma weak shmem_uint64_get = pshmem_uint64_get +#pragma weak shmem_size_get = pshmem_size_get +#pragma weak shmem_ptrdiff_get = pshmem_ptrdiff_get + #pragma weak shmem_ctx_getmem = pshmem_ctx_getmem #pragma weak shmem_ctx_get8 = pshmem_ctx_get8 #pragma weak shmem_ctx_get16 = pshmem_ctx_get16 #pragma weak shmem_ctx_get32 = pshmem_ctx_get32 #pragma weak shmem_ctx_get64 = pshmem_ctx_get64 #pragma weak shmem_ctx_get128 = pshmem_ctx_get128 + #pragma weak shmem_getmem = pshmem_getmem #pragma weak shmem_get8 = pshmem_get8 #pragma weak shmem_get16 = pshmem_get16 @@ -93,17 +128,50 @@ SHMEM_CTX_TYPE_GET(_short, short) SHMEM_CTX_TYPE_GET(_int, int) SHMEM_CTX_TYPE_GET(_long, long) SHMEM_CTX_TYPE_GET(_longlong, long long) +SHMEM_CTX_TYPE_GET(_schar, signed char) +SHMEM_CTX_TYPE_GET(_uchar, unsigned char) +SHMEM_CTX_TYPE_GET(_ushort, unsigned short) +SHMEM_CTX_TYPE_GET(_uint, unsigned int) +SHMEM_CTX_TYPE_GET(_ulong, unsigned long) +SHMEM_CTX_TYPE_GET(_ulonglong, unsigned long long) SHMEM_CTX_TYPE_GET(_float, float) SHMEM_CTX_TYPE_GET(_double, double) SHMEM_CTX_TYPE_GET(_longdouble, long double) +SHMEM_CTX_TYPE_GET(_int8, int8_t) +SHMEM_CTX_TYPE_GET(_int16, int16_t) +SHMEM_CTX_TYPE_GET(_int32, int32_t) +SHMEM_CTX_TYPE_GET(_int64, int64_t) +SHMEM_CTX_TYPE_GET(_uint8, uint8_t) +SHMEM_CTX_TYPE_GET(_uint16, uint16_t) +SHMEM_CTX_TYPE_GET(_uint32, uint32_t) +SHMEM_CTX_TYPE_GET(_uint64, uint64_t) +SHMEM_CTX_TYPE_GET(_size, size_t) +SHMEM_CTX_TYPE_GET(_ptrdiff, ptrdiff_t) + SHMEM_TYPE_GET(_char, char) SHMEM_TYPE_GET(_short, short) SHMEM_TYPE_GET(_int, int) SHMEM_TYPE_GET(_long, long) SHMEM_TYPE_GET(_longlong, long long) +SHMEM_TYPE_GET(_schar, signed char) +SHMEM_TYPE_GET(_uchar, unsigned char) +SHMEM_TYPE_GET(_ushort, unsigned short) +SHMEM_TYPE_GET(_uint, unsigned int) +SHMEM_TYPE_GET(_ulong, unsigned long) +SHMEM_TYPE_GET(_ulonglong, unsigned long long) SHMEM_TYPE_GET(_float, float) SHMEM_TYPE_GET(_double, double) SHMEM_TYPE_GET(_longdouble, long double) +SHMEM_TYPE_GET(_int8, int8_t) +SHMEM_TYPE_GET(_int16, int16_t) +SHMEM_TYPE_GET(_int32, int32_t) +SHMEM_TYPE_GET(_int64, int64_t) +SHMEM_TYPE_GET(_uint8, uint8_t) +SHMEM_TYPE_GET(_uint16, uint16_t) +SHMEM_TYPE_GET(_uint32, uint32_t) +SHMEM_TYPE_GET(_uint64, uint64_t) +SHMEM_TYPE_GET(_size, size_t) +SHMEM_TYPE_GET(_ptrdiff, ptrdiff_t) #define DO_SHMEM_GETMEM(ctx, target, source, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ diff --git a/oshmem/shmem/c/shmem_get_nb.c b/oshmem/shmem/c/shmem_get_nb.c index 971223f91fe..877b7ac0201 100644 --- a/oshmem/shmem/c/shmem_get_nb.c +++ b/oshmem/shmem/c/shmem_get_nb.c @@ -57,28 +57,63 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_ctx_char_get_nbi = pshmem_ctx_char_get_nbi -#pragma weak shmem_ctx_short_get_nbi = pshmem_ctx_short_get_nbi -#pragma weak shmem_ctx_int_get_nbi = pshmem_ctx_int_get_nbi -#pragma weak shmem_ctx_long_get_nbi = pshmem_ctx_long_get_nbi -#pragma weak shmem_ctx_longlong_get_nbi = pshmem_ctx_longlong_get_nbi -#pragma weak shmem_ctx_float_get_nbi = pshmem_ctx_float_get_nbi -#pragma weak shmem_ctx_double_get_nbi = pshmem_ctx_double_get_nbi +#pragma weak shmem_ctx_char_get_nbi = pshmem_ctx_char_get_nbi +#pragma weak shmem_ctx_short_get_nbi = pshmem_ctx_short_get_nbi +#pragma weak shmem_ctx_int_get_nbi = pshmem_ctx_int_get_nbi +#pragma weak shmem_ctx_long_get_nbi = pshmem_ctx_long_get_nbi +#pragma weak shmem_ctx_float_get_nbi = pshmem_ctx_float_get_nbi +#pragma weak shmem_ctx_double_get_nbi = pshmem_ctx_double_get_nbi +#pragma weak shmem_ctx_longlong_get_nbi = pshmem_ctx_longlong_get_nbi +#pragma weak shmem_ctx_schar_get_nbi = pshmem_ctx_schar_get_nbi +#pragma weak shmem_ctx_uchar_get_nbi = pshmem_ctx_uchar_get_nbi +#pragma weak shmem_ctx_ushort_get_nbi = pshmem_ctx_ushort_get_nbi +#pragma weak shmem_ctx_uint_get_nbi = pshmem_ctx_uint_get_nbi +#pragma weak shmem_ctx_ulong_get_nbi = pshmem_ctx_ulong_get_nbi +#pragma weak shmem_ctx_ulonglong_get_nbi = pshmem_ctx_ulonglong_get_nbi #pragma weak shmem_ctx_longdouble_get_nbi = pshmem_ctx_longdouble_get_nbi -#pragma weak shmem_char_get_nbi = pshmem_char_get_nbi -#pragma weak shmem_short_get_nbi = pshmem_short_get_nbi -#pragma weak shmem_int_get_nbi = pshmem_int_get_nbi -#pragma weak shmem_long_get_nbi = pshmem_long_get_nbi -#pragma weak shmem_longlong_get_nbi = pshmem_longlong_get_nbi -#pragma weak shmem_float_get_nbi = pshmem_float_get_nbi -#pragma weak shmem_double_get_nbi = pshmem_double_get_nbi -#pragma weak shmem_longdouble_get_nbi = pshmem_longdouble_get_nbi +#pragma weak shmem_ctx_int8_get_nbi = pshmem_ctx_int8_get_nbi +#pragma weak shmem_ctx_int16_get_nbi = pshmem_ctx_int16_get_nbi +#pragma weak shmem_ctx_int32_get_nbi = pshmem_ctx_int32_get_nbi +#pragma weak shmem_ctx_int64_get_nbi = pshmem_ctx_int64_get_nbi +#pragma weak shmem_ctx_uint8_get_nbi = pshmem_ctx_uint8_get_nbi +#pragma weak shmem_ctx_uint16_get_nbi = pshmem_ctx_uint16_get_nbi +#pragma weak shmem_ctx_uint32_get_nbi = pshmem_ctx_uint32_get_nbi +#pragma weak shmem_ctx_uint64_get_nbi = pshmem_ctx_uint64_get_nbi +#pragma weak shmem_ctx_size_get_nbi = pshmem_ctx_size_get_nbi +#pragma weak shmem_ctx_ptrdiff_get_nbi = pshmem_ctx_ptrdiff_get_nbi + +#pragma weak shmem_char_get_nbi = pshmem_char_get_nbi +#pragma weak shmem_short_get_nbi = pshmem_short_get_nbi +#pragma weak shmem_int_get_nbi = pshmem_int_get_nbi +#pragma weak shmem_long_get_nbi = pshmem_long_get_nbi +#pragma weak shmem_float_get_nbi = pshmem_float_get_nbi +#pragma weak shmem_double_get_nbi = pshmem_double_get_nbi +#pragma weak shmem_longlong_get_nbi = pshmem_longlong_get_nbi +#pragma weak shmem_schar_get_nbi = pshmem_schar_get_nbi +#pragma weak shmem_uchar_get_nbi = pshmem_uchar_get_nbi +#pragma weak shmem_ushort_get_nbi = pshmem_ushort_get_nbi +#pragma weak shmem_uint_get_nbi = pshmem_uint_get_nbi +#pragma weak shmem_ulong_get_nbi = pshmem_ulong_get_nbi +#pragma weak shmem_ulonglong_get_nbi = pshmem_ulonglong_get_nbi +#pragma weak shmem_longdouble_get_nbi = pshmem_longdouble_get_nbi +#pragma weak shmem_int8_get_nbi = pshmem_int8_get_nbi +#pragma weak shmem_int16_get_nbi = pshmem_int16_get_nbi +#pragma weak shmem_int32_get_nbi = pshmem_int32_get_nbi +#pragma weak shmem_int64_get_nbi = pshmem_int64_get_nbi +#pragma weak shmem_uint8_get_nbi = pshmem_uint8_get_nbi +#pragma weak shmem_uint16_get_nbi = pshmem_uint16_get_nbi +#pragma weak shmem_uint32_get_nbi = pshmem_uint32_get_nbi +#pragma weak shmem_uint64_get_nbi = pshmem_uint64_get_nbi +#pragma weak shmem_size_get_nbi = pshmem_size_get_nbi +#pragma weak shmem_ptrdiff_get_nbi = pshmem_ptrdiff_get_nbi + #pragma weak shmem_ctx_get8_nbi = pshmem_ctx_get8_nbi #pragma weak shmem_ctx_get16_nbi = pshmem_ctx_get16_nbi #pragma weak shmem_ctx_get32_nbi = pshmem_ctx_get32_nbi #pragma weak shmem_ctx_get64_nbi = pshmem_ctx_get64_nbi #pragma weak shmem_ctx_get128_nbi = pshmem_ctx_get128_nbi #pragma weak shmem_ctx_getmem_nbi = pshmem_ctx_getmem_nbi + #pragma weak shmem_get8_nbi = pshmem_get8_nbi #pragma weak shmem_get16_nbi = pshmem_get16_nbi #pragma weak shmem_get32_nbi = pshmem_get32_nbi @@ -93,17 +128,50 @@ SHMEM_CTX_TYPE_GET_NB(_short, short) SHMEM_CTX_TYPE_GET_NB(_int, int) SHMEM_CTX_TYPE_GET_NB(_long, long) SHMEM_CTX_TYPE_GET_NB(_longlong, long long) +SHMEM_CTX_TYPE_GET_NB(_schar, signed char) +SHMEM_CTX_TYPE_GET_NB(_uchar, unsigned char) +SHMEM_CTX_TYPE_GET_NB(_ushort, unsigned short) +SHMEM_CTX_TYPE_GET_NB(_uint, unsigned int) +SHMEM_CTX_TYPE_GET_NB(_ulong, unsigned long) +SHMEM_CTX_TYPE_GET_NB(_ulonglong, unsigned long long) SHMEM_CTX_TYPE_GET_NB(_float, float) SHMEM_CTX_TYPE_GET_NB(_double, double) SHMEM_CTX_TYPE_GET_NB(_longdouble, long double) +SHMEM_CTX_TYPE_GET_NB(_int8, int8_t) +SHMEM_CTX_TYPE_GET_NB(_int16, int16_t) +SHMEM_CTX_TYPE_GET_NB(_int32, int32_t) +SHMEM_CTX_TYPE_GET_NB(_int64, int64_t) +SHMEM_CTX_TYPE_GET_NB(_uint8, uint8_t) +SHMEM_CTX_TYPE_GET_NB(_uint16, uint16_t) +SHMEM_CTX_TYPE_GET_NB(_uint32, uint32_t) +SHMEM_CTX_TYPE_GET_NB(_uint64, uint64_t) +SHMEM_CTX_TYPE_GET_NB(_size, size_t) +SHMEM_CTX_TYPE_GET_NB(_ptrdiff, ptrdiff_t) + SHMEM_TYPE_GET_NB(_char, char) SHMEM_TYPE_GET_NB(_short, short) SHMEM_TYPE_GET_NB(_int, int) SHMEM_TYPE_GET_NB(_long, long) SHMEM_TYPE_GET_NB(_longlong, long long) +SHMEM_TYPE_GET_NB(_schar, signed char) +SHMEM_TYPE_GET_NB(_uchar, unsigned char) +SHMEM_TYPE_GET_NB(_ushort, unsigned short) +SHMEM_TYPE_GET_NB(_uint, unsigned int) +SHMEM_TYPE_GET_NB(_ulong, unsigned long) +SHMEM_TYPE_GET_NB(_ulonglong, unsigned long long) SHMEM_TYPE_GET_NB(_float, float) SHMEM_TYPE_GET_NB(_double, double) SHMEM_TYPE_GET_NB(_longdouble, long double) +SHMEM_TYPE_GET_NB(_int8, int8_t) +SHMEM_TYPE_GET_NB(_int16, int16_t) +SHMEM_TYPE_GET_NB(_int32, int32_t) +SHMEM_TYPE_GET_NB(_int64, int64_t) +SHMEM_TYPE_GET_NB(_uint8, uint8_t) +SHMEM_TYPE_GET_NB(_uint16, uint16_t) +SHMEM_TYPE_GET_NB(_uint32, uint32_t) +SHMEM_TYPE_GET_NB(_uint64, uint64_t) +SHMEM_TYPE_GET_NB(_size, size_t) +SHMEM_TYPE_GET_NB(_ptrdiff, ptrdiff_t) #define DO_SHMEM_GETMEM_NB(ctx, target, source, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ diff --git a/oshmem/shmem/c/shmem_iget.c b/oshmem/shmem/c/shmem_iget.c index 300d3c310e6..eb4d31cf802 100644 --- a/oshmem/shmem/c/shmem_iget.c +++ b/oshmem/shmem/c/shmem_iget.c @@ -62,27 +62,62 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_ctx_char_iget = pshmem_ctx_char_iget -#pragma weak shmem_ctx_short_iget = pshmem_ctx_short_iget -#pragma weak shmem_ctx_int_iget = pshmem_ctx_int_iget -#pragma weak shmem_ctx_long_iget = pshmem_ctx_long_iget -#pragma weak shmem_ctx_longlong_iget = pshmem_ctx_longlong_iget -#pragma weak shmem_ctx_float_iget = pshmem_ctx_float_iget -#pragma weak shmem_ctx_double_iget = pshmem_ctx_double_iget +#pragma weak shmem_ctx_char_iget = pshmem_ctx_char_iget +#pragma weak shmem_ctx_short_iget = pshmem_ctx_short_iget +#pragma weak shmem_ctx_int_iget = pshmem_ctx_int_iget +#pragma weak shmem_ctx_long_iget = pshmem_ctx_long_iget +#pragma weak shmem_ctx_float_iget = pshmem_ctx_float_iget +#pragma weak shmem_ctx_double_iget = pshmem_ctx_double_iget +#pragma weak shmem_ctx_longlong_iget = pshmem_ctx_longlong_iget +#pragma weak shmem_ctx_schar_iget = pshmem_ctx_schar_iget +#pragma weak shmem_ctx_uchar_iget = pshmem_ctx_uchar_iget +#pragma weak shmem_ctx_ushort_iget = pshmem_ctx_ushort_iget +#pragma weak shmem_ctx_uint_iget = pshmem_ctx_uint_iget +#pragma weak shmem_ctx_ulong_iget = pshmem_ctx_ulong_iget +#pragma weak shmem_ctx_ulonglong_iget = pshmem_ctx_ulonglong_iget #pragma weak shmem_ctx_longdouble_iget = pshmem_ctx_longdouble_iget -#pragma weak shmem_char_iget = pshmem_char_iget -#pragma weak shmem_short_iget = pshmem_short_iget -#pragma weak shmem_int_iget = pshmem_int_iget -#pragma weak shmem_long_iget = pshmem_long_iget -#pragma weak shmem_longlong_iget = pshmem_longlong_iget -#pragma weak shmem_float_iget = pshmem_float_iget -#pragma weak shmem_double_iget = pshmem_double_iget -#pragma weak shmem_longdouble_iget = pshmem_longdouble_iget +#pragma weak shmem_ctx_int8_iget = pshmem_ctx_int8_iget +#pragma weak shmem_ctx_int16_iget = pshmem_ctx_int16_iget +#pragma weak shmem_ctx_int32_iget = pshmem_ctx_int32_iget +#pragma weak shmem_ctx_int64_iget = pshmem_ctx_int64_iget +#pragma weak shmem_ctx_uint8_iget = pshmem_ctx_uint8_iget +#pragma weak shmem_ctx_uint16_iget = pshmem_ctx_uint16_iget +#pragma weak shmem_ctx_uint32_iget = pshmem_ctx_uint32_iget +#pragma weak shmem_ctx_uint64_iget = pshmem_ctx_uint64_iget +#pragma weak shmem_ctx_size_iget = pshmem_ctx_size_iget +#pragma weak shmem_ctx_ptrdiff_iget = pshmem_ctx_ptrdiff_iget + +#pragma weak shmem_char_iget = pshmem_char_iget +#pragma weak shmem_short_iget = pshmem_short_iget +#pragma weak shmem_int_iget = pshmem_int_iget +#pragma weak shmem_long_iget = pshmem_long_iget +#pragma weak shmem_float_iget = pshmem_float_iget +#pragma weak shmem_double_iget = pshmem_double_iget +#pragma weak shmem_longlong_iget = pshmem_longlong_iget +#pragma weak shmem_schar_iget = pshmem_schar_iget +#pragma weak shmem_uchar_iget = pshmem_uchar_iget +#pragma weak shmem_ushort_iget = pshmem_ushort_iget +#pragma weak shmem_uint_iget = pshmem_uint_iget +#pragma weak shmem_ulong_iget = pshmem_ulong_iget +#pragma weak shmem_ulonglong_iget = pshmem_ulonglong_iget +#pragma weak shmem_longdouble_iget = pshmem_longdouble_iget +#pragma weak shmem_int8_iget = pshmem_int8_iget +#pragma weak shmem_int16_iget = pshmem_int16_iget +#pragma weak shmem_int32_iget = pshmem_int32_iget +#pragma weak shmem_int64_iget = pshmem_int64_iget +#pragma weak shmem_uint8_iget = pshmem_uint8_iget +#pragma weak shmem_uint16_iget = pshmem_uint16_iget +#pragma weak shmem_uint32_iget = pshmem_uint32_iget +#pragma weak shmem_uint64_iget = pshmem_uint64_iget +#pragma weak shmem_size_iget = pshmem_size_iget +#pragma weak shmem_ptrdiff_iget = pshmem_ptrdiff_iget + #pragma weak shmem_ctx_iget8 = pshmem_ctx_iget8 #pragma weak shmem_ctx_iget16 = pshmem_ctx_iget16 #pragma weak shmem_ctx_iget32 = pshmem_ctx_iget32 #pragma weak shmem_ctx_iget64 = pshmem_ctx_iget64 #pragma weak shmem_ctx_iget128 = pshmem_ctx_iget128 + #pragma weak shmem_iget8 = pshmem_iget8 #pragma weak shmem_iget16 = pshmem_iget16 #pragma weak shmem_iget32 = pshmem_iget32 @@ -96,17 +131,50 @@ SHMEM_CTX_TYPE_IGET(_short, short) SHMEM_CTX_TYPE_IGET(_int, int) SHMEM_CTX_TYPE_IGET(_long, long) SHMEM_CTX_TYPE_IGET(_longlong, long long) +SHMEM_CTX_TYPE_IGET(_schar, signed char) +SHMEM_CTX_TYPE_IGET(_uchar, unsigned char) +SHMEM_CTX_TYPE_IGET(_ushort, unsigned short) +SHMEM_CTX_TYPE_IGET(_uint, unsigned int) +SHMEM_CTX_TYPE_IGET(_ulong, unsigned long) +SHMEM_CTX_TYPE_IGET(_ulonglong, unsigned long long) SHMEM_CTX_TYPE_IGET(_float, float) SHMEM_CTX_TYPE_IGET(_double, double) SHMEM_CTX_TYPE_IGET(_longdouble, long double) +SHMEM_CTX_TYPE_IGET(_int8, int8_t) +SHMEM_CTX_TYPE_IGET(_int16, int16_t) +SHMEM_CTX_TYPE_IGET(_int32, int32_t) +SHMEM_CTX_TYPE_IGET(_int64, int64_t) +SHMEM_CTX_TYPE_IGET(_uint8, uint8_t) +SHMEM_CTX_TYPE_IGET(_uint16, uint16_t) +SHMEM_CTX_TYPE_IGET(_uint32, uint32_t) +SHMEM_CTX_TYPE_IGET(_uint64, uint64_t) +SHMEM_CTX_TYPE_IGET(_size, size_t) +SHMEM_CTX_TYPE_IGET(_ptrdiff, ptrdiff_t) + SHMEM_TYPE_IGET(_char, char) SHMEM_TYPE_IGET(_short, short) SHMEM_TYPE_IGET(_int, int) SHMEM_TYPE_IGET(_long, long) SHMEM_TYPE_IGET(_longlong, long long) +SHMEM_TYPE_IGET(_schar, signed char) +SHMEM_TYPE_IGET(_uchar, unsigned char) +SHMEM_TYPE_IGET(_ushort, unsigned short) +SHMEM_TYPE_IGET(_uint, unsigned int) +SHMEM_TYPE_IGET(_ulong, unsigned long) +SHMEM_TYPE_IGET(_ulonglong, unsigned long long) SHMEM_TYPE_IGET(_float, float) SHMEM_TYPE_IGET(_double, double) SHMEM_TYPE_IGET(_longdouble, long double) +SHMEM_TYPE_IGET(_int8, int8_t) +SHMEM_TYPE_IGET(_int16, int16_t) +SHMEM_TYPE_IGET(_int32, int32_t) +SHMEM_TYPE_IGET(_int64, int64_t) +SHMEM_TYPE_IGET(_uint8, uint8_t) +SHMEM_TYPE_IGET(_uint16, uint16_t) +SHMEM_TYPE_IGET(_uint32, uint32_t) +SHMEM_TYPE_IGET(_uint64, uint64_t) +SHMEM_TYPE_IGET(_size, size_t) +SHMEM_TYPE_IGET(_ptrdiff, ptrdiff_t) #define DO_SHMEM_IGETMEM(ctx, target, source, tst, sst, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ diff --git a/oshmem/shmem/c/shmem_inc.c b/oshmem/shmem/c/shmem_inc.c index f3e022d3b60..8c44c0258d1 100644 --- a/oshmem/shmem/c/shmem_inc.c +++ b/oshmem/shmem/c/shmem_inc.c @@ -63,12 +63,21 @@ #pragma weak shmem_ctx_int_atomic_inc = pshmem_ctx_int_atomic_inc #pragma weak shmem_ctx_long_atomic_inc = pshmem_ctx_long_atomic_inc #pragma weak shmem_ctx_longlong_atomic_inc = pshmem_ctx_longlong_atomic_inc +#pragma weak shmem_ctx_uint_atomic_inc = pshmem_ctx_uint_atomic_inc +#pragma weak shmem_ctx_ulong_atomic_inc = pshmem_ctx_ulong_atomic_inc +#pragma weak shmem_ctx_ulonglong_atomic_inc = pshmem_ctx_ulonglong_atomic_inc + #pragma weak shmem_int_atomic_inc = pshmem_int_atomic_inc #pragma weak shmem_long_atomic_inc = pshmem_long_atomic_inc #pragma weak shmem_longlong_atomic_inc = pshmem_longlong_atomic_inc +#pragma weak shmem_uint_atomic_inc = pshmem_uint_atomic_inc +#pragma weak shmem_ulong_atomic_inc = pshmem_ulong_atomic_inc +#pragma weak shmem_ulonglong_atomic_inc = pshmem_ulonglong_atomic_inc + #pragma weak shmem_int_inc = pshmem_int_inc #pragma weak shmem_long_inc = pshmem_long_inc #pragma weak shmem_longlong_inc = pshmem_longlong_inc + #pragma weak shmemx_int32_inc = pshmemx_int32_inc #pragma weak shmemx_int64_inc = pshmemx_int64_inc #include "oshmem/shmem/c/profile/defines.h" @@ -77,14 +86,20 @@ SHMEM_CTX_TYPE_ATOMIC_INC(_int, int, shmem) SHMEM_CTX_TYPE_ATOMIC_INC(_long, long, shmem) SHMEM_CTX_TYPE_ATOMIC_INC(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_ATOMIC_INC(_int, int, shmem) SHMEM_TYPE_ATOMIC_INC(_long, long, shmem) SHMEM_TYPE_ATOMIC_INC(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_INC(_uint, unsigned int, shmem) +SHMEM_TYPE_ATOMIC_INC(_ulong, unsigned long, shmem) +SHMEM_TYPE_ATOMIC_INC(_ulonglong, unsigned long long, shmem) #define SHMEM_TYPE_INC(type_name, type, prefix) \ void prefix##type_name##_inc(type *target, int pe) \ { \ - DO_SHMEM_TYPE_ATOMIC_INC(oshmem_ctx_default, type_name, \ + DO_SHMEM_TYPE_ATOMIC_INC(oshmem_ctx_default, type_name, \ type, target, pe); \ return ; \ } diff --git a/oshmem/shmem/c/shmem_iput.c b/oshmem/shmem/c/shmem_iput.c index 9b0137b1987..0e83b6c8495 100644 --- a/oshmem/shmem/c/shmem_iput.c +++ b/oshmem/shmem/c/shmem_iput.c @@ -63,27 +63,62 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_ctx_char_iput = pshmem_ctx_char_iput -#pragma weak shmem_ctx_short_iput = pshmem_ctx_short_iput -#pragma weak shmem_ctx_int_iput = pshmem_ctx_int_iput -#pragma weak shmem_ctx_long_iput = pshmem_ctx_long_iput -#pragma weak shmem_ctx_longlong_iput = pshmem_ctx_longlong_iput -#pragma weak shmem_ctx_float_iput = pshmem_ctx_float_iput -#pragma weak shmem_ctx_double_iput = pshmem_ctx_double_iput +#pragma weak shmem_ctx_char_iput = pshmem_ctx_char_iput +#pragma weak shmem_ctx_short_iput = pshmem_ctx_short_iput +#pragma weak shmem_ctx_int_iput = pshmem_ctx_int_iput +#pragma weak shmem_ctx_long_iput = pshmem_ctx_long_iput +#pragma weak shmem_ctx_float_iput = pshmem_ctx_float_iput +#pragma weak shmem_ctx_double_iput = pshmem_ctx_double_iput +#pragma weak shmem_ctx_longlong_iput = pshmem_ctx_longlong_iput +#pragma weak shmem_ctx_schar_iput = pshmem_ctx_schar_iput +#pragma weak shmem_ctx_uchar_iput = pshmem_ctx_uchar_iput +#pragma weak shmem_ctx_ushort_iput = pshmem_ctx_ushort_iput +#pragma weak shmem_ctx_uint_iput = pshmem_ctx_uint_iput +#pragma weak shmem_ctx_ulong_iput = pshmem_ctx_ulong_iput +#pragma weak shmem_ctx_ulonglong_iput = pshmem_ctx_ulonglong_iput #pragma weak shmem_ctx_longdouble_iput = pshmem_ctx_longdouble_iput -#pragma weak shmem_char_iput = pshmem_char_iput -#pragma weak shmem_short_iput = pshmem_short_iput -#pragma weak shmem_int_iput = pshmem_int_iput -#pragma weak shmem_long_iput = pshmem_long_iput -#pragma weak shmem_longlong_iput = pshmem_longlong_iput -#pragma weak shmem_float_iput = pshmem_float_iput -#pragma weak shmem_double_iput = pshmem_double_iput -#pragma weak shmem_longdouble_iput = pshmem_longdouble_iput +#pragma weak shmem_ctx_int8_iput = pshmem_ctx_int8_iput +#pragma weak shmem_ctx_int16_iput = pshmem_ctx_int16_iput +#pragma weak shmem_ctx_int32_iput = pshmem_ctx_int32_iput +#pragma weak shmem_ctx_int64_iput = pshmem_ctx_int64_iput +#pragma weak shmem_ctx_uint8_iput = pshmem_ctx_uint8_iput +#pragma weak shmem_ctx_uint16_iput = pshmem_ctx_uint16_iput +#pragma weak shmem_ctx_uint32_iput = pshmem_ctx_uint32_iput +#pragma weak shmem_ctx_uint64_iput = pshmem_ctx_uint64_iput +#pragma weak shmem_ctx_size_iput = pshmem_ctx_size_iput +#pragma weak shmem_ctx_ptrdiff_iput = pshmem_ctx_ptrdiff_iput + +#pragma weak shmem_char_iput = pshmem_char_iput +#pragma weak shmem_short_iput = pshmem_short_iput +#pragma weak shmem_int_iput = pshmem_int_iput +#pragma weak shmem_long_iput = pshmem_long_iput +#pragma weak shmem_float_iput = pshmem_float_iput +#pragma weak shmem_double_iput = pshmem_double_iput +#pragma weak shmem_longlong_iput = pshmem_longlong_iput +#pragma weak shmem_schar_iput = pshmem_schar_iput +#pragma weak shmem_uchar_iput = pshmem_uchar_iput +#pragma weak shmem_ushort_iput = pshmem_ushort_iput +#pragma weak shmem_uint_iput = pshmem_uint_iput +#pragma weak shmem_ulong_iput = pshmem_ulong_iput +#pragma weak shmem_ulonglong_iput = pshmem_ulonglong_iput +#pragma weak shmem_longdouble_iput = pshmem_longdouble_iput +#pragma weak shmem_int8_iput = pshmem_int8_iput +#pragma weak shmem_int16_iput = pshmem_int16_iput +#pragma weak shmem_int32_iput = pshmem_int32_iput +#pragma weak shmem_int64_iput = pshmem_int64_iput +#pragma weak shmem_uint8_iput = pshmem_uint8_iput +#pragma weak shmem_uint16_iput = pshmem_uint16_iput +#pragma weak shmem_uint32_iput = pshmem_uint32_iput +#pragma weak shmem_uint64_iput = pshmem_uint64_iput +#pragma weak shmem_size_iput = pshmem_size_iput +#pragma weak shmem_ptrdiff_iput = pshmem_ptrdiff_iput + #pragma weak shmem_ctx_iput8 = pshmem_ctx_iput8 #pragma weak shmem_ctx_iput16 = pshmem_ctx_iput16 #pragma weak shmem_ctx_iput32 = pshmem_ctx_iput32 #pragma weak shmem_ctx_iput64 = pshmem_ctx_iput64 #pragma weak shmem_ctx_iput128 = pshmem_ctx_iput128 + #pragma weak shmem_iput8 = pshmem_iput8 #pragma weak shmem_iput16 = pshmem_iput16 #pragma weak shmem_iput32 = pshmem_iput32 @@ -97,17 +132,50 @@ SHMEM_CTX_TYPE_IPUT(_short, short) SHMEM_CTX_TYPE_IPUT(_int, int) SHMEM_CTX_TYPE_IPUT(_long, long) SHMEM_CTX_TYPE_IPUT(_longlong, long long) +SHMEM_CTX_TYPE_IPUT(_schar, signed char) +SHMEM_CTX_TYPE_IPUT(_uchar, unsigned char) +SHMEM_CTX_TYPE_IPUT(_ushort, unsigned short) +SHMEM_CTX_TYPE_IPUT(_uint, unsigned int) +SHMEM_CTX_TYPE_IPUT(_ulong, unsigned long) +SHMEM_CTX_TYPE_IPUT(_ulonglong, unsigned long long) SHMEM_CTX_TYPE_IPUT(_float, float) SHMEM_CTX_TYPE_IPUT(_double, double) SHMEM_CTX_TYPE_IPUT(_longdouble, long double) +SHMEM_CTX_TYPE_IPUT(_int8, int8_t) +SHMEM_CTX_TYPE_IPUT(_int16, int16_t) +SHMEM_CTX_TYPE_IPUT(_int32, int32_t) +SHMEM_CTX_TYPE_IPUT(_int64, int64_t) +SHMEM_CTX_TYPE_IPUT(_uint8, uint8_t) +SHMEM_CTX_TYPE_IPUT(_uint16, uint16_t) +SHMEM_CTX_TYPE_IPUT(_uint32, uint32_t) +SHMEM_CTX_TYPE_IPUT(_uint64, uint64_t) +SHMEM_CTX_TYPE_IPUT(_size, size_t) +SHMEM_CTX_TYPE_IPUT(_ptrdiff, ptrdiff_t) + SHMEM_TYPE_IPUT(_char, char) SHMEM_TYPE_IPUT(_short, short) SHMEM_TYPE_IPUT(_int, int) SHMEM_TYPE_IPUT(_long, long) SHMEM_TYPE_IPUT(_longlong, long long) +SHMEM_TYPE_IPUT(_schar, signed char) +SHMEM_TYPE_IPUT(_uchar, unsigned char) +SHMEM_TYPE_IPUT(_ushort, unsigned short) +SHMEM_TYPE_IPUT(_uint, unsigned int) +SHMEM_TYPE_IPUT(_ulong, unsigned long) +SHMEM_TYPE_IPUT(_ulonglong, unsigned long long) SHMEM_TYPE_IPUT(_float, float) SHMEM_TYPE_IPUT(_double, double) SHMEM_TYPE_IPUT(_longdouble, long double) +SHMEM_TYPE_IPUT(_int8, int8_t) +SHMEM_TYPE_IPUT(_int16, int16_t) +SHMEM_TYPE_IPUT(_int32, int32_t) +SHMEM_TYPE_IPUT(_int64, int64_t) +SHMEM_TYPE_IPUT(_uint8, uint8_t) +SHMEM_TYPE_IPUT(_uint16, uint16_t) +SHMEM_TYPE_IPUT(_uint32, uint32_t) +SHMEM_TYPE_IPUT(_uint64, uint64_t) +SHMEM_TYPE_IPUT(_size, size_t) +SHMEM_TYPE_IPUT(_ptrdiff, ptrdiff_t) #define DO_SHMEM_IPUTMEM(ctx, target, source, tst, sst, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ diff --git a/oshmem/shmem/c/shmem_lock.c b/oshmem/shmem/c/shmem_lock.c index 4bd524f1885..626fed5843a 100644 --- a/oshmem/shmem/c/shmem_lock.c +++ b/oshmem/shmem/c/shmem_lock.c @@ -708,6 +708,7 @@ static int shmem_lock_wait_for_ticket(void *lock, do { shmem_int_get(&remote_turn, lock_turn, 1, server_pe); + opal_progress(); } while (remote_turn != ticket); shmem_get_wrapper(&temp, lock, lock_size, 1, server_pe); diff --git a/oshmem/shmem/c/shmem_or.c b/oshmem/shmem/c/shmem_or.c index 1ae67efbc60..7bdbb59ad34 100644 --- a/oshmem/shmem/c/shmem_or.c +++ b/oshmem/shmem/c/shmem_or.c @@ -25,12 +25,28 @@ */ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_or = pshmem_int_atomic_or +#pragma weak shmem_long_atomic_or = pshmem_long_atomic_or +#pragma weak shmem_longlong_atomic_or = pshmem_longlong_atomic_or #pragma weak shmem_uint_atomic_or = pshmem_uint_atomic_or #pragma weak shmem_ulong_atomic_or = pshmem_ulong_atomic_or #pragma weak shmem_ulonglong_atomic_or = pshmem_ulonglong_atomic_or +#pragma weak shmem_int32_atomic_or = pshmem_int32_atomic_or +#pragma weak shmem_int64_atomic_or = pshmem_int64_atomic_or +#pragma weak shmem_uint32_atomic_or = pshmem_uint32_atomic_or +#pragma weak shmem_uint64_atomic_or = pshmem_uint64_atomic_or + +#pragma weak shmem_ctx_int_atomic_or = pshmem_ctx_int_atomic_or +#pragma weak shmem_ctx_long_atomic_or = pshmem_ctx_long_atomic_or +#pragma weak shmem_ctx_longlong_atomic_or = pshmem_ctx_longlong_atomic_or #pragma weak shmem_ctx_uint_atomic_or = pshmem_ctx_uint_atomic_or #pragma weak shmem_ctx_ulong_atomic_or = pshmem_ctx_ulong_atomic_or #pragma weak shmem_ctx_ulonglong_atomic_or = pshmem_ctx_ulonglong_atomic_or +#pragma weak shmem_ctx_int32_atomic_or = pshmem_ctx_int32_atomic_or +#pragma weak shmem_ctx_int64_atomic_or = pshmem_ctx_int64_atomic_or +#pragma weak shmem_ctx_uint32_atomic_or = pshmem_ctx_uint32_atomic_or +#pragma weak shmem_ctx_uint64_atomic_or = pshmem_ctx_uint64_atomic_or + #pragma weak shmemx_int32_atomic_or = pshmemx_int32_atomic_or #pragma weak shmemx_int64_atomic_or = pshmemx_int64_atomic_or #pragma weak shmemx_uint32_atomic_or = pshmemx_uint32_atomic_or @@ -38,12 +54,28 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +OSHMEM_TYPE_OP(int, int, shmem, or) +OSHMEM_TYPE_OP(long, long, shmem, or) +OSHMEM_TYPE_OP(longlong, long long, shmem, or) OSHMEM_TYPE_OP(uint, unsigned int, shmem, or) OSHMEM_TYPE_OP(ulong, unsigned long, shmem, or) OSHMEM_TYPE_OP(ulonglong, unsigned long long, shmem, or) +OSHMEM_TYPE_OP(int32, int32_t, shmem, or) +OSHMEM_TYPE_OP(int64, int64_t, shmem, or) +OSHMEM_TYPE_OP(uint32, uint32_t, shmem, or) +OSHMEM_TYPE_OP(uint64, uint64_t, shmem, or) + +OSHMEM_CTX_TYPE_OP(int, int, shmem, or) +OSHMEM_CTX_TYPE_OP(long, long, shmem, or) +OSHMEM_CTX_TYPE_OP(longlong, long long, shmem, or) OSHMEM_CTX_TYPE_OP(uint, unsigned int, shmem, or) OSHMEM_CTX_TYPE_OP(ulong, unsigned long, shmem, or) OSHMEM_CTX_TYPE_OP(ulonglong, unsigned long long, shmem, or) +OSHMEM_CTX_TYPE_OP(int32, int32_t, shmem, or) +OSHMEM_CTX_TYPE_OP(int64, int64_t, shmem, or) +OSHMEM_CTX_TYPE_OP(uint32, uint32_t, shmem, or) +OSHMEM_CTX_TYPE_OP(uint64, uint64_t, shmem, or) + OSHMEM_TYPE_OP(int32, int32_t, shmemx, or) OSHMEM_TYPE_OP(int64, int64_t, shmemx, or) OSHMEM_TYPE_OP(uint32, uint32_t, shmemx, or) diff --git a/oshmem/shmem/c/shmem_p.c b/oshmem/shmem/c/shmem_p.c index 2f38b32cb34..974333a4361 100644 --- a/oshmem/shmem/c/shmem_p.c +++ b/oshmem/shmem/c/shmem_p.c @@ -59,22 +59,56 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_ctx_char_p = pshmem_ctx_char_p -#pragma weak shmem_ctx_short_p = pshmem_ctx_short_p -#pragma weak shmem_ctx_int_p = pshmem_ctx_int_p -#pragma weak shmem_ctx_long_p = pshmem_ctx_long_p -#pragma weak shmem_ctx_longlong_p = pshmem_ctx_longlong_p -#pragma weak shmem_ctx_float_p = pshmem_ctx_float_p -#pragma weak shmem_ctx_double_p = pshmem_ctx_double_p +#pragma weak shmem_ctx_char_p = pshmem_ctx_char_p +#pragma weak shmem_ctx_short_p = pshmem_ctx_short_p +#pragma weak shmem_ctx_int_p = pshmem_ctx_int_p +#pragma weak shmem_ctx_long_p = pshmem_ctx_long_p +#pragma weak shmem_ctx_float_p = pshmem_ctx_float_p +#pragma weak shmem_ctx_double_p = pshmem_ctx_double_p +#pragma weak shmem_ctx_longlong_p = pshmem_ctx_longlong_p +#pragma weak shmem_ctx_schar_p = pshmem_ctx_schar_p +#pragma weak shmem_ctx_uchar_p = pshmem_ctx_uchar_p +#pragma weak shmem_ctx_ushort_p = pshmem_ctx_ushort_p +#pragma weak shmem_ctx_uint_p = pshmem_ctx_uint_p +#pragma weak shmem_ctx_ulong_p = pshmem_ctx_ulong_p +#pragma weak shmem_ctx_ulonglong_p = pshmem_ctx_ulonglong_p #pragma weak shmem_ctx_longdouble_p = pshmem_ctx_longdouble_p -#pragma weak shmem_char_p = pshmem_char_p -#pragma weak shmem_short_p = pshmem_short_p -#pragma weak shmem_int_p = pshmem_int_p -#pragma weak shmem_long_p = pshmem_long_p -#pragma weak shmem_longlong_p = pshmem_longlong_p -#pragma weak shmem_float_p = pshmem_float_p -#pragma weak shmem_double_p = pshmem_double_p -#pragma weak shmem_longdouble_p = pshmem_longdouble_p +#pragma weak shmem_ctx_int8_p = pshmem_ctx_int8_p +#pragma weak shmem_ctx_int16_p = pshmem_ctx_int16_p +#pragma weak shmem_ctx_int32_p = pshmem_ctx_int32_p +#pragma weak shmem_ctx_int64_p = pshmem_ctx_int64_p +#pragma weak shmem_ctx_uint8_p = pshmem_ctx_uint8_p +#pragma weak shmem_ctx_uint16_p = pshmem_ctx_uint16_p +#pragma weak shmem_ctx_uint32_p = pshmem_ctx_uint32_p +#pragma weak shmem_ctx_uint64_p = pshmem_ctx_uint64_p +#pragma weak shmem_ctx_size_p = pshmem_ctx_size_p +#pragma weak shmem_ctx_ptrdiff_p = pshmem_ctx_ptrdiff_p + +#pragma weak shmem_char_p = pshmem_char_p +#pragma weak shmem_short_p = pshmem_short_p +#pragma weak shmem_int_p = pshmem_int_p +#pragma weak shmem_long_p = pshmem_long_p +#pragma weak shmem_float_p = pshmem_float_p +#pragma weak shmem_double_p = pshmem_double_p +#pragma weak shmem_longlong_p = pshmem_longlong_p +#pragma weak shmem_schar_p = pshmem_schar_p +#pragma weak shmem_uchar_p = pshmem_uchar_p +#pragma weak shmem_ushort_p = pshmem_ushort_p +#pragma weak shmem_uint_p = pshmem_uint_p +#pragma weak shmem_ulong_p = pshmem_ulong_p +#pragma weak shmem_ulonglong_p = pshmem_ulonglong_p +#pragma weak shmem_longdouble_p = pshmem_longdouble_p +#pragma weak shmem_int8_p = pshmem_int8_p +#pragma weak shmem_int16_p = pshmem_int16_p +#pragma weak shmem_int32_p = pshmem_int32_p +#pragma weak shmem_int64_p = pshmem_int64_p +#pragma weak shmem_uint8_p = pshmem_uint8_p +#pragma weak shmem_uint16_p = pshmem_uint16_p +#pragma weak shmem_uint32_p = pshmem_uint32_p +#pragma weak shmem_uint64_p = pshmem_uint64_p +#pragma weak shmem_size_p = pshmem_size_p +#pragma weak shmem_ptrdiff_p = pshmem_ptrdiff_p + #pragma weak shmemx_int16_p = pshmemx_int16_p #pragma weak shmemx_int32_p = pshmemx_int32_p #pragma weak shmemx_int64_p = pshmemx_int64_p @@ -86,17 +120,51 @@ SHMEM_CTX_TYPE_P(_short, short, shmem) SHMEM_CTX_TYPE_P(_int, int, shmem) SHMEM_CTX_TYPE_P(_long, long, shmem) SHMEM_CTX_TYPE_P(_longlong, long long, shmem) +SHMEM_CTX_TYPE_P(_schar, signed char, shmem) +SHMEM_CTX_TYPE_P(_uchar, unsigned char, shmem) +SHMEM_CTX_TYPE_P(_ushort, unsigned short, shmem) +SHMEM_CTX_TYPE_P(_uint, unsigned int, shmem) +SHMEM_CTX_TYPE_P(_ulong, unsigned long, shmem) +SHMEM_CTX_TYPE_P(_ulonglong, unsigned long long, shmem) SHMEM_CTX_TYPE_P(_float, float, shmem) SHMEM_CTX_TYPE_P(_double, double, shmem) SHMEM_CTX_TYPE_P(_longdouble, long double, shmem) +SHMEM_CTX_TYPE_P(_int8, int8_t, shmem) +SHMEM_CTX_TYPE_P(_int16, int16_t, shmem) +SHMEM_CTX_TYPE_P(_int32, int32_t, shmem) +SHMEM_CTX_TYPE_P(_int64, int64_t, shmem) +SHMEM_CTX_TYPE_P(_uint8, uint8_t, shmem) +SHMEM_CTX_TYPE_P(_uint16, uint16_t, shmem) +SHMEM_CTX_TYPE_P(_uint32, uint32_t, shmem) +SHMEM_CTX_TYPE_P(_uint64, uint64_t, shmem) +SHMEM_CTX_TYPE_P(_size, size_t, shmem) +SHMEM_CTX_TYPE_P(_ptrdiff, ptrdiff_t, shmem) + SHMEM_TYPE_P(_char, char, shmem) SHMEM_TYPE_P(_short, short, shmem) SHMEM_TYPE_P(_int, int, shmem) SHMEM_TYPE_P(_long, long, shmem) SHMEM_TYPE_P(_longlong, long long, shmem) +SHMEM_TYPE_P(_schar, signed char, shmem) +SHMEM_TYPE_P(_uchar, unsigned char, shmem) +SHMEM_TYPE_P(_ushort, unsigned short, shmem) +SHMEM_TYPE_P(_uint, unsigned int, shmem) +SHMEM_TYPE_P(_ulong, unsigned long, shmem) +SHMEM_TYPE_P(_ulonglong, unsigned long long, shmem) SHMEM_TYPE_P(_float, float, shmem) SHMEM_TYPE_P(_double, double, shmem) SHMEM_TYPE_P(_longdouble, long double, shmem) +SHMEM_TYPE_P(_int8, int8_t, shmem) +SHMEM_TYPE_P(_int16, int16_t, shmem) +SHMEM_TYPE_P(_int32, int32_t, shmem) +SHMEM_TYPE_P(_int64, int64_t, shmem) +SHMEM_TYPE_P(_uint8, uint8_t, shmem) +SHMEM_TYPE_P(_uint16, uint16_t, shmem) +SHMEM_TYPE_P(_uint32, uint32_t, shmem) +SHMEM_TYPE_P(_uint64, uint64_t, shmem) +SHMEM_TYPE_P(_size, size_t, shmem) +SHMEM_TYPE_P(_ptrdiff, ptrdiff_t, shmem) + SHMEM_TYPE_P(_int16, int16_t, shmemx) SHMEM_TYPE_P(_int32, int32_t, shmemx) SHMEM_TYPE_P(_int64, int64_t, shmemx) diff --git a/oshmem/shmem/c/shmem_ptr.c b/oshmem/shmem/c/shmem_ptr.c index 35a324c2212..afceb9f6222 100644 --- a/oshmem/shmem/c/shmem_ptr.c +++ b/oshmem/shmem/c/shmem_ptr.c @@ -52,7 +52,8 @@ void *shmem_ptr(const void *dst_addr, int pe) } for (i = 0; i < mca_memheap_base_num_transports(); i++) { - mkey = mca_memheap_base_get_cached_mkey(pe, (void *)dst_addr, i, &rva); + /* TODO: iterate on all ctxs, try to get cached mkeys */ + mkey = mca_memheap_base_get_cached_mkey(oshmem_ctx_default, pe, (void *)dst_addr, i, &rva); if (!mkey) { continue; } diff --git a/oshmem/shmem/c/shmem_put.c b/oshmem/shmem/c/shmem_put.c index c734409ea70..98b6f24c7b8 100644 --- a/oshmem/shmem/c/shmem_put.c +++ b/oshmem/shmem/c/shmem_put.c @@ -60,28 +60,63 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_ctx_char_put = pshmem_ctx_char_put -#pragma weak shmem_ctx_short_put = pshmem_ctx_short_put -#pragma weak shmem_ctx_int_put = pshmem_ctx_int_put -#pragma weak shmem_ctx_long_put = pshmem_ctx_long_put -#pragma weak shmem_ctx_longlong_put = pshmem_ctx_longlong_put -#pragma weak shmem_ctx_float_put = pshmem_ctx_float_put -#pragma weak shmem_ctx_double_put = pshmem_ctx_double_put +#pragma weak shmem_ctx_char_put = pshmem_ctx_char_put +#pragma weak shmem_ctx_short_put = pshmem_ctx_short_put +#pragma weak shmem_ctx_int_put = pshmem_ctx_int_put +#pragma weak shmem_ctx_long_put = pshmem_ctx_long_put +#pragma weak shmem_ctx_float_put = pshmem_ctx_float_put +#pragma weak shmem_ctx_double_put = pshmem_ctx_double_put +#pragma weak shmem_ctx_longlong_put = pshmem_ctx_longlong_put +#pragma weak shmem_ctx_schar_put = pshmem_ctx_schar_put +#pragma weak shmem_ctx_uchar_put = pshmem_ctx_uchar_put +#pragma weak shmem_ctx_ushort_put = pshmem_ctx_ushort_put +#pragma weak shmem_ctx_uint_put = pshmem_ctx_uint_put +#pragma weak shmem_ctx_ulong_put = pshmem_ctx_ulong_put +#pragma weak shmem_ctx_ulonglong_put = pshmem_ctx_ulonglong_put #pragma weak shmem_ctx_longdouble_put = pshmem_ctx_longdouble_put -#pragma weak shmem_char_put = pshmem_char_put -#pragma weak shmem_short_put = pshmem_short_put -#pragma weak shmem_int_put = pshmem_int_put -#pragma weak shmem_long_put = pshmem_long_put -#pragma weak shmem_longlong_put = pshmem_longlong_put -#pragma weak shmem_float_put = pshmem_float_put -#pragma weak shmem_double_put = pshmem_double_put -#pragma weak shmem_longdouble_put = pshmem_longdouble_put +#pragma weak shmem_ctx_int8_put = pshmem_ctx_int8_put +#pragma weak shmem_ctx_int16_put = pshmem_ctx_int16_put +#pragma weak shmem_ctx_int32_put = pshmem_ctx_int32_put +#pragma weak shmem_ctx_int64_put = pshmem_ctx_int64_put +#pragma weak shmem_ctx_uint8_put = pshmem_ctx_uint8_put +#pragma weak shmem_ctx_uint16_put = pshmem_ctx_uint16_put +#pragma weak shmem_ctx_uint32_put = pshmem_ctx_uint32_put +#pragma weak shmem_ctx_uint64_put = pshmem_ctx_uint64_put +#pragma weak shmem_ctx_size_put = pshmem_ctx_size_put +#pragma weak shmem_ctx_ptrdiff_put = pshmem_ctx_ptrdiff_put + +#pragma weak shmem_char_put = pshmem_char_put +#pragma weak shmem_short_put = pshmem_short_put +#pragma weak shmem_int_put = pshmem_int_put +#pragma weak shmem_long_put = pshmem_long_put +#pragma weak shmem_float_put = pshmem_float_put +#pragma weak shmem_double_put = pshmem_double_put +#pragma weak shmem_longlong_put = pshmem_longlong_put +#pragma weak shmem_schar_put = pshmem_schar_put +#pragma weak shmem_uchar_put = pshmem_uchar_put +#pragma weak shmem_ushort_put = pshmem_ushort_put +#pragma weak shmem_uint_put = pshmem_uint_put +#pragma weak shmem_ulong_put = pshmem_ulong_put +#pragma weak shmem_ulonglong_put = pshmem_ulonglong_put +#pragma weak shmem_longdouble_put = pshmem_longdouble_put +#pragma weak shmem_int8_put = pshmem_int8_put +#pragma weak shmem_int16_put = pshmem_int16_put +#pragma weak shmem_int32_put = pshmem_int32_put +#pragma weak shmem_int64_put = pshmem_int64_put +#pragma weak shmem_uint8_put = pshmem_uint8_put +#pragma weak shmem_uint16_put = pshmem_uint16_put +#pragma weak shmem_uint32_put = pshmem_uint32_put +#pragma weak shmem_uint64_put = pshmem_uint64_put +#pragma weak shmem_size_put = pshmem_size_put +#pragma weak shmem_ptrdiff_put = pshmem_ptrdiff_put + #pragma weak shmem_ctx_putmem = pshmem_ctx_putmem #pragma weak shmem_ctx_put8 = pshmem_ctx_put8 #pragma weak shmem_ctx_put16 = pshmem_ctx_put16 #pragma weak shmem_ctx_put32 = pshmem_ctx_put32 #pragma weak shmem_ctx_put64 = pshmem_ctx_put64 #pragma weak shmem_ctx_put128 = pshmem_ctx_put128 + #pragma weak shmem_putmem = pshmem_putmem #pragma weak shmem_put8 = pshmem_put8 #pragma weak shmem_put16 = pshmem_put16 @@ -96,17 +131,50 @@ SHMEM_CTX_TYPE_PUT(_short, short) SHMEM_CTX_TYPE_PUT(_int, int) SHMEM_CTX_TYPE_PUT(_long, long) SHMEM_CTX_TYPE_PUT(_longlong, long long) +SHMEM_CTX_TYPE_PUT(_schar, signed char) +SHMEM_CTX_TYPE_PUT(_uchar, unsigned char) +SHMEM_CTX_TYPE_PUT(_ushort, unsigned short) +SHMEM_CTX_TYPE_PUT(_uint, unsigned int) +SHMEM_CTX_TYPE_PUT(_ulong, unsigned long) +SHMEM_CTX_TYPE_PUT(_ulonglong, unsigned long long) SHMEM_CTX_TYPE_PUT(_float, float) SHMEM_CTX_TYPE_PUT(_double, double) SHMEM_CTX_TYPE_PUT(_longdouble, long double) +SHMEM_CTX_TYPE_PUT(_int8, int8_t) +SHMEM_CTX_TYPE_PUT(_int16, int16_t) +SHMEM_CTX_TYPE_PUT(_int32, int32_t) +SHMEM_CTX_TYPE_PUT(_int64, int64_t) +SHMEM_CTX_TYPE_PUT(_uint8, uint8_t) +SHMEM_CTX_TYPE_PUT(_uint16, uint16_t) +SHMEM_CTX_TYPE_PUT(_uint32, uint32_t) +SHMEM_CTX_TYPE_PUT(_uint64, uint64_t) +SHMEM_CTX_TYPE_PUT(_size, size_t) +SHMEM_CTX_TYPE_PUT(_ptrdiff, ptrdiff_t) + SHMEM_TYPE_PUT(_char, char) SHMEM_TYPE_PUT(_short, short) SHMEM_TYPE_PUT(_int, int) SHMEM_TYPE_PUT(_long, long) SHMEM_TYPE_PUT(_longlong, long long) +SHMEM_TYPE_PUT(_schar, signed char) +SHMEM_TYPE_PUT(_uchar, unsigned char) +SHMEM_TYPE_PUT(_ushort, unsigned short) +SHMEM_TYPE_PUT(_uint, unsigned int) +SHMEM_TYPE_PUT(_ulong, unsigned long) +SHMEM_TYPE_PUT(_ulonglong, unsigned long long) SHMEM_TYPE_PUT(_float, float) SHMEM_TYPE_PUT(_double, double) SHMEM_TYPE_PUT(_longdouble, long double) +SHMEM_TYPE_PUT(_int8, int8_t) +SHMEM_TYPE_PUT(_int16, int16_t) +SHMEM_TYPE_PUT(_int32, int32_t) +SHMEM_TYPE_PUT(_int64, int64_t) +SHMEM_TYPE_PUT(_uint8, uint8_t) +SHMEM_TYPE_PUT(_uint16, uint16_t) +SHMEM_TYPE_PUT(_uint32, uint32_t) +SHMEM_TYPE_PUT(_uint64, uint64_t) +SHMEM_TYPE_PUT(_size, size_t) +SHMEM_TYPE_PUT(_ptrdiff, ptrdiff_t) #define DO_SHMEM_PUTMEM(ctx, target, source, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ diff --git a/oshmem/shmem/c/shmem_put_nb.c b/oshmem/shmem/c/shmem_put_nb.c index eb422ad6762..bf63130e236 100644 --- a/oshmem/shmem/c/shmem_put_nb.c +++ b/oshmem/shmem/c/shmem_put_nb.c @@ -64,28 +64,63 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" -#pragma weak shmem_char_put_nbi = pshmem_char_put_nbi -#pragma weak shmem_short_put_nbi = pshmem_short_put_nbi -#pragma weak shmem_int_put_nbi = pshmem_int_put_nbi -#pragma weak shmem_long_put_nbi = pshmem_long_put_nbi -#pragma weak shmem_longlong_put_nbi = pshmem_longlong_put_nbi -#pragma weak shmem_float_put_nbi = pshmem_float_put_nbi -#pragma weak shmem_double_put_nbi = pshmem_double_put_nbi -#pragma weak shmem_longdouble_put_nbi = pshmem_longdouble_put_nbi +#pragma weak shmem_ctx_char_put_nbi = pshmem_ctx_char_put_nbi +#pragma weak shmem_ctx_short_put_nbi = pshmem_ctx_short_put_nbi +#pragma weak shmem_ctx_int_put_nbi = pshmem_ctx_int_put_nbi +#pragma weak shmem_ctx_long_put_nbi = pshmem_ctx_long_put_nbi +#pragma weak shmem_ctx_float_put_nbi = pshmem_ctx_float_put_nbi +#pragma weak shmem_ctx_double_put_nbi = pshmem_ctx_double_put_nbi +#pragma weak shmem_ctx_longlong_put_nbi = pshmem_ctx_longlong_put_nbi +#pragma weak shmem_ctx_schar_put_nbi = pshmem_ctx_schar_put_nbi +#pragma weak shmem_ctx_uchar_put_nbi = pshmem_ctx_uchar_put_nbi +#pragma weak shmem_ctx_ushort_put_nbi = pshmem_ctx_ushort_put_nbi +#pragma weak shmem_ctx_uint_put_nbi = pshmem_ctx_uint_put_nbi +#pragma weak shmem_ctx_ulong_put_nbi = pshmem_ctx_ulong_put_nbi +#pragma weak shmem_ctx_ulonglong_put_nbi = pshmem_ctx_ulonglong_put_nbi +#pragma weak shmem_ctx_longdouble_put_nbi = pshmem_ctx_longdouble_put_nbi +#pragma weak shmem_ctx_int8_put_nbi = pshmem_ctx_int8_put_nbi +#pragma weak shmem_ctx_int16_put_nbi = pshmem_ctx_int16_put_nbi +#pragma weak shmem_ctx_int32_put_nbi = pshmem_ctx_int32_put_nbi +#pragma weak shmem_ctx_int64_put_nbi = pshmem_ctx_int64_put_nbi +#pragma weak shmem_ctx_uint8_put_nbi = pshmem_ctx_uint8_put_nbi +#pragma weak shmem_ctx_uint16_put_nbi = pshmem_ctx_uint16_put_nbi +#pragma weak shmem_ctx_uint32_put_nbi = pshmem_ctx_uint32_put_nbi +#pragma weak shmem_ctx_uint64_put_nbi = pshmem_ctx_uint64_put_nbi +#pragma weak shmem_ctx_size_put_nbi = pshmem_ctx_size_put_nbi +#pragma weak shmem_ctx_ptrdiff_put_nbi = pshmem_ctx_ptrdiff_put_nbi + +#pragma weak shmem_char_put_nbi = pshmem_char_put_nbi +#pragma weak shmem_short_put_nbi = pshmem_short_put_nbi +#pragma weak shmem_int_put_nbi = pshmem_int_put_nbi +#pragma weak shmem_long_put_nbi = pshmem_long_put_nbi +#pragma weak shmem_float_put_nbi = pshmem_float_put_nbi +#pragma weak shmem_double_put_nbi = pshmem_double_put_nbi +#pragma weak shmem_longlong_put_nbi = pshmem_longlong_put_nbi +#pragma weak shmem_schar_put_nbi = pshmem_schar_put_nbi +#pragma weak shmem_uchar_put_nbi = pshmem_uchar_put_nbi +#pragma weak shmem_ushort_put_nbi = pshmem_ushort_put_nbi +#pragma weak shmem_uint_put_nbi = pshmem_uint_put_nbi +#pragma weak shmem_ulong_put_nbi = pshmem_ulong_put_nbi +#pragma weak shmem_ulonglong_put_nbi = pshmem_ulonglong_put_nbi +#pragma weak shmem_longdouble_put_nbi = pshmem_longdouble_put_nbi +#pragma weak shmem_int8_put_nbi = pshmem_int8_put_nbi +#pragma weak shmem_int16_put_nbi = pshmem_int16_put_nbi +#pragma weak shmem_int32_put_nbi = pshmem_int32_put_nbi +#pragma weak shmem_int64_put_nbi = pshmem_int64_put_nbi +#pragma weak shmem_uint8_put_nbi = pshmem_uint8_put_nbi +#pragma weak shmem_uint16_put_nbi = pshmem_uint16_put_nbi +#pragma weak shmem_uint32_put_nbi = pshmem_uint32_put_nbi +#pragma weak shmem_uint64_put_nbi = pshmem_uint64_put_nbi +#pragma weak shmem_size_put_nbi = pshmem_size_put_nbi +#pragma weak shmem_ptrdiff_put_nbi = pshmem_ptrdiff_put_nbi + #pragma weak shmem_put8_nbi = pshmem_put8_nbi #pragma weak shmem_put16_nbi = pshmem_put16_nbi #pragma weak shmem_put32_nbi = pshmem_put32_nbi #pragma weak shmem_put64_nbi = pshmem_put64_nbi #pragma weak shmem_put128_nbi = pshmem_put128_nbi #pragma weak shmem_putmem_nbi = pshmem_putmem_nbi -#pragma weak shmem_ctx_char_put_nbi = pshmem_ctx_char_put_nbi -#pragma weak shmem_ctx_short_put_nbi = pshmem_ctx_short_put_nbi -#pragma weak shmem_ctx_int_put_nbi = pshmem_ctx_int_put_nbi -#pragma weak shmem_ctx_long_put_nbi = pshmem_ctx_long_put_nbi -#pragma weak shmem_ctx_longlong_put_nbi = pshmem_ctx_longlong_put_nbi -#pragma weak shmem_ctx_float_put_nbi = pshmem_ctx_float_put_nbi -#pragma weak shmem_ctx_double_put_nbi = pshmem_ctx_double_put_nbi -#pragma weak shmem_ctx_longdouble_put_nbi = pshmem_ctx_longdouble_put_nbi + #pragma weak shmem_ctx_put8_nbi = pshmem_ctx_put8_nbi #pragma weak shmem_ctx_put16_nbi = pshmem_ctx_put16_nbi #pragma weak shmem_ctx_put32_nbi = pshmem_ctx_put32_nbi @@ -100,17 +135,50 @@ SHMEM_CTX_TYPE_PUT_NB(_short, short) SHMEM_CTX_TYPE_PUT_NB(_int, int) SHMEM_CTX_TYPE_PUT_NB(_long, long) SHMEM_CTX_TYPE_PUT_NB(_longlong, long long) +SHMEM_CTX_TYPE_PUT_NB(_schar, signed char) +SHMEM_CTX_TYPE_PUT_NB(_uchar, unsigned char) +SHMEM_CTX_TYPE_PUT_NB(_ushort, unsigned short) +SHMEM_CTX_TYPE_PUT_NB(_uint, unsigned int) +SHMEM_CTX_TYPE_PUT_NB(_ulong, unsigned long) +SHMEM_CTX_TYPE_PUT_NB(_ulonglong, unsigned long long) SHMEM_CTX_TYPE_PUT_NB(_float, float) SHMEM_CTX_TYPE_PUT_NB(_double, double) SHMEM_CTX_TYPE_PUT_NB(_longdouble, long double) +SHMEM_CTX_TYPE_PUT_NB(_int8, int8_t) +SHMEM_CTX_TYPE_PUT_NB(_int16, int16_t) +SHMEM_CTX_TYPE_PUT_NB(_int32, int32_t) +SHMEM_CTX_TYPE_PUT_NB(_int64, int64_t) +SHMEM_CTX_TYPE_PUT_NB(_uint8, uint8_t) +SHMEM_CTX_TYPE_PUT_NB(_uint16, uint16_t) +SHMEM_CTX_TYPE_PUT_NB(_uint32, uint32_t) +SHMEM_CTX_TYPE_PUT_NB(_uint64, uint64_t) +SHMEM_CTX_TYPE_PUT_NB(_size, size_t) +SHMEM_CTX_TYPE_PUT_NB(_ptrdiff, ptrdiff_t) + SHMEM_TYPE_PUT_NB(_char, char) SHMEM_TYPE_PUT_NB(_short, short) SHMEM_TYPE_PUT_NB(_int, int) SHMEM_TYPE_PUT_NB(_long, long) SHMEM_TYPE_PUT_NB(_longlong, long long) +SHMEM_TYPE_PUT_NB(_schar, signed char) +SHMEM_TYPE_PUT_NB(_uchar, unsigned char) +SHMEM_TYPE_PUT_NB(_ushort, unsigned short) +SHMEM_TYPE_PUT_NB(_uint, unsigned int) +SHMEM_TYPE_PUT_NB(_ulong, unsigned long) +SHMEM_TYPE_PUT_NB(_ulonglong, unsigned long long) SHMEM_TYPE_PUT_NB(_float, float) SHMEM_TYPE_PUT_NB(_double, double) SHMEM_TYPE_PUT_NB(_longdouble, long double) +SHMEM_TYPE_PUT_NB(_int8, int8_t) +SHMEM_TYPE_PUT_NB(_int16, int16_t) +SHMEM_TYPE_PUT_NB(_int32, int32_t) +SHMEM_TYPE_PUT_NB(_int64, int64_t) +SHMEM_TYPE_PUT_NB(_uint8, uint8_t) +SHMEM_TYPE_PUT_NB(_uint16, uint16_t) +SHMEM_TYPE_PUT_NB(_uint32, uint32_t) +SHMEM_TYPE_PUT_NB(_uint64, uint64_t) +SHMEM_TYPE_PUT_NB(_size, size_t) +SHMEM_TYPE_PUT_NB(_ptrdiff, ptrdiff_t) #define DO_SHMEM_PUTMEM_NB(ctx, target, source, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ @@ -158,3 +226,12 @@ SHMEM_TYPE_PUTMEM_NB(_put32, 4, shmem) SHMEM_TYPE_PUTMEM_NB(_put64, 8, shmem) SHMEM_TYPE_PUTMEM_NB(_put128, 16, shmem) SHMEM_TYPE_PUTMEM_NB(_putmem, 1, shmem) + +void shmemx_alltoall_global_nb(void *dest, + const void *source, + size_t size, + long *counter) +{ + int rc = MCA_SPML_CALL(put_all_nb(dest, source, size, counter)); + RUNTIME_CHECK_RC(rc); +} diff --git a/oshmem/shmem/c/shmem_realloc.c b/oshmem/shmem/c/shmem_realloc.c index 0a45cf9fe3f..7aab27735f5 100644 --- a/oshmem/shmem/c/shmem_realloc.c +++ b/oshmem/shmem/c/shmem_realloc.c @@ -18,6 +18,7 @@ #include "oshmem/shmem/shmem_api_logger.h" #include "oshmem/mca/memheap/memheap.h" +#include "oshmem/mca/memheap/base/base.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" @@ -42,12 +43,23 @@ static inline void* _shrealloc(void *ptr, size_t size) { int rc; void* pBuff = NULL; + map_segment_t *s; RUNTIME_CHECK_INIT(); SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc); - rc = MCA_MEMHEAP_CALL(realloc(size, ptr, &pBuff)); + if (ptr) { + s = memheap_find_va(ptr); + } else { + s = NULL; + } + + if (s && s->allocator) { + rc = s->allocator->realloc(s, size, ptr, &pBuff); + } else { + rc = MCA_MEMHEAP_CALL(realloc(size, ptr, &pBuff)); + } SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc); diff --git a/oshmem/shmem/c/shmem_reduce.c b/oshmem/shmem/c/shmem_reduce.c index 11654196abf..02c47023ce7 100644 --- a/oshmem/shmem/c/shmem_reduce.c +++ b/oshmem/shmem/c/shmem_reduce.c @@ -26,8 +26,8 @@ * object of every PE in the active set. The active set of PEs is defined by the triple PE_start, * logPE_stride and PE_size. */ -#define SHMEM_TYPE_REDUCE_OP(name, type_name, type, prefix) \ - void prefix##type_name##_##name##_to_all( type *target, \ +#define SHMEM_TYPE_REDUCE_OP(name, type_name, type, prefix) \ + void prefix##type_name##_##name##_to_all( type *target, \ const type *source, \ int nreduce, \ int PE_start, \ @@ -40,8 +40,8 @@ oshmem_group_t* group = NULL; \ \ RUNTIME_CHECK_INIT(); \ - RUNTIME_CHECK_ADDR(target); \ - RUNTIME_CHECK_ADDR(source); \ + RUNTIME_CHECK_ADDR_SIZE(target, nreduce); \ + RUNTIME_CHECK_ADDR_SIZE(source, nreduce); \ \ { \ group = oshmem_proc_group_create_nofail(PE_start, 1<dt_size, \ - FPTR_2_VOID_PTR(pSync), SCOLL_DEFAULT_ALG );\ + FPTR_2_VOID_PTR(pSync), \ + true, \ + SCOLL_DEFAULT_ALG );\ out: \ oshmem_proc_group_destroy(group);\ RUNTIME_CHECK_RC(rc); \ diff --git a/oshmem/shmem/fortran/shmem_int4_cswap_f.c b/oshmem/shmem/fortran/shmem_int4_cswap_f.c index 460cc7c4dca..a1e5fbfe924 100644 --- a/oshmem/shmem/fortran/shmem_int4_cswap_f.c +++ b/oshmem/shmem/fortran/shmem_int4_cswap_f.c @@ -4,6 +4,7 @@ * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,7 +43,7 @@ ompi_fortran_integer4_t shmem_int4_cswap_f(FORTRAN_POINTER_T target, MPI_Fint *c (void *)&out_value, FPTR_2_INT(cond, sizeof(ompi_fortran_integer4_t)), FPTR_2_INT(value, sizeof(ompi_fortran_integer4_t)), - sizeof(out_value), + sizeof(ompi_fortran_integer4_t), OMPI_FINT_2_INT(*pe))); return out_value; diff --git a/test/datatype/Makefile.am b/test/datatype/Makefile.am index cd867134a4f..4366724a523 100644 --- a/test/datatype/Makefile.am +++ b/test/datatype/Makefile.am @@ -4,8 +4,8 @@ # reserved. # Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2014-2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. +# Copyright (c) 2014-2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -15,7 +15,7 @@ # if PROJECT_OMPI - MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw unpack_ooo ddt_pack external32 + MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw ddt_raw2 unpack_ooo ddt_pack external32 large_data MPI_CHECKS = to_self endif TESTS = opal_datatype_test unpack_hetero $(MPI_TESTS) @@ -40,6 +40,12 @@ ddt_raw_LDADD = \ $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la +ddt_raw2_SOURCES = ddt_raw2.c ddt_lib.c ddt_lib.h +ddt_raw2_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) +ddt_raw2_LDADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + ddt_pack_SOURCES = ddt_pack.c ddt_pack_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) ddt_pack_LDADD = \ @@ -68,6 +74,12 @@ to_self_SOURCES = to_self.c to_self_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) to_self_LDADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la +large_data_SOURCES = large_data.c +large_data_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) +large_data_LDADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + opal_datatype_test_SOURCES = opal_datatype_test.c opal_ddt_lib.c opal_ddt_lib.h opal_datatype_test_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) opal_datatype_test_LDADD = \ diff --git a/test/datatype/ddt_pack.c b/test/datatype/ddt_pack.c index 1164e6feca8..423574efebd 100644 --- a/test/datatype/ddt_pack.c +++ b/test/datatype/ddt_pack.c @@ -51,7 +51,7 @@ main(int argc, char* argv[]) int ret = 0; int blen[4]; ptrdiff_t disp[4]; - ompi_datatype_t *newType, *types[4], *struct_type, *vec_type; + ompi_datatype_t *newType, *types[4], *struct_type, *vec_type, *dup_type; ptrdiff_t old_lb, old_extent, old_true_lb, old_true_extent; ptrdiff_t lb, extent, true_lb, true_extent; @@ -394,6 +394,53 @@ main(int argc, char* argv[]) ret = ompi_datatype_destroy(&unpacked_dt); if (ret != 0) goto cleanup; + /** + * + * TEST 7 + * + */ + printf("---> Basic test with dup'ed MPI_INT\n"); + + ret = get_extents(&ompi_mpi_int.dt, &old_lb, &old_extent, &old_true_lb, &old_true_extent); + if (ret != 0) goto cleanup; + ret = ompi_datatype_duplicate(&ompi_mpi_int.dt, &dup_type); + if (ret != 0) goto cleanup; + ompi_datatype_t * type = &ompi_mpi_int.dt; + ret = ompi_datatype_set_args(dup_type, 0, NULL, 0, NULL, 1, &type, MPI_COMBINER_DUP); + if (ret != 0) goto cleanup; + packed_ddt_len = ompi_datatype_pack_description_length(dup_type); + ptr = payload = malloc(packed_ddt_len); + ret = ompi_datatype_get_pack_description(dup_type, &packed_ddt); + if (ret != 0) goto cleanup; + + memcpy(payload, packed_ddt, packed_ddt_len); + unpacked_dt = ompi_datatype_create_from_packed_description(&payload, + ompi_proc_local()); + free(ptr); + if (unpacked_dt == NULL) { + printf("\tFAILED: could not unpack datatype\n"); + ret = 1; + goto cleanup; + } else { + ret = get_extents(unpacked_dt, &lb, &extent, &true_lb, &true_extent); + if (ret != 0) goto cleanup; + + if (old_lb != lb || old_extent != extent || + old_true_lb != true_lb || old_true_extent != extent) { + printf("\tFAILED: datatypes don't match\n"); + ret = 1; + goto cleanup; + } + printf("\tPASSED\n"); + } + if (unpacked_dt == &ompi_mpi_int32_t.dt) { + printf("\tPASSED\n"); + } else { + printf("\tFAILED: datatypes don't match\n"); + ret = 1; + goto cleanup; + } + ompi_datatype_destroy(&dup_type); cleanup: ompi_datatype_finalize(); diff --git a/test/datatype/ddt_raw.c b/test/datatype/ddt_raw.c index de35d6b83f4..bba285ceea0 100644 --- a/test/datatype/ddt_raw.c +++ b/test/datatype/ddt_raw.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -71,7 +71,7 @@ static int test_upper( unsigned int length ) iov_count = 5; max_data = 0; opal_convertor_raw( pConv, iov, &iov_count, &max_data ); - i -= max_data; + i -= max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); @@ -85,12 +85,12 @@ static int test_upper( unsigned int length ) } /** - * Conversion function. They deal with data-types in 3 ways, always making local copies. + * Conversion function. They deal with datatypes in 3 ways, always making local copies. * In order to allow performance testings, there are 3 functions: * - one copying directly from one memory location to another one using the - * data-type copy function. - * - one which use a 2 convertors created with the same data-type - * - and one using 2 convertors created from different data-types. + * datatype copy function. + * - one which use a 2 convertors created with the same datatype + * - and one using 2 convertors created from different datatypes. * */ static int local_copy_ddt_raw( ompi_datatype_t* pdt, int count, int iov_num ) @@ -114,13 +114,13 @@ static int local_copy_ddt_raw( ompi_datatype_t* pdt, int count, int iov_num ) GET_TIME( start ); while( 0 == opal_convertor_raw(convertor, iov, &iov_count, &max_data) ) { #if 0 - printf( "New raw extraction (iov_count = %d, max_data = %zu)\n", - iov_count, max_data ); - for( i = 0; i < iov_count; i++ ) { - printf( "\t{%p, %d}\n", iov[i].iov_base, iov[i].iov_len ); - } + printf( "New raw extraction (iov_count = %d, max_data = %zu)\n", + iov_count, max_data ); + for( i = 0; i < iov_count; i++ ) { + printf( "\t{%p, %d}\n", iov[i].iov_base, iov[i].iov_len ); + } #endif - remaining_length -= max_data; + remaining_length -= max_data; iov_count = iov_num; } remaining_length -= max_data; @@ -129,19 +129,23 @@ static int local_copy_ddt_raw( ompi_datatype_t* pdt, int count, int iov_num ) printf( "raw extraction in %ld microsec\n", total_time ); OBJ_RELEASE( convertor ); if( remaining_length != 0 ) { - printf( "Not all raw description was been extracted (%lu bytes missing)\n", - (unsigned long) remaining_length ); + printf( "Not all raw description was been extracted (%lu bytes missing)\n", + (unsigned long) remaining_length ); } free(iov); return OMPI_SUCCESS; } /** - * Main function. Call several tests and print-out the results. It try to stress the convertor - * using difficult data-type constructions as well as strange segment sizes for the conversion. - * Usually, it is able to detect most of the data-type and convertor problems. Any modifications - * on the data-type engine should first pass all the tests from this file, before going into other - * tests. + * Go over a set of datatypes and copy them using the raw functionality provided by the + * convertor. The goal of this test is to stress the convertor using several more or less + * difficult datatype, with a large set of segment sizes for the conversion. It can be used + * to highlight the raw capability of the convertor as well as detecting datatype convertor + * problems. + * + * This test is part of the testing infrastructure for the core datatype engine. As such any + * modifications on the datatype engine should first pass all the tests from this file, + * before going into other tests. */ int main( int argc, char* argv[] ) { @@ -226,7 +230,7 @@ int main( int argc, char* argv[] ) OBJ_RELEASE( pdt3 ); assert( pdt3 == NULL ); printf( ">>--------------------------------------------<<\n" ); - printf( " Contiguous data-type (MPI_DOUBLE)\n" ); + printf( " Contiguous datatype (MPI_DOUBLE)\n" ); pdt = MPI_DOUBLE; if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_raw(pdt, 4500, iov_num); @@ -235,37 +239,37 @@ int main( int argc, char* argv[] ) printf( ">>--------------------------------------------<<\n" ); if( outputFlags & CHECK_PACK_UNPACK ) { - printf( "Contiguous multiple data-type (4500*1)\n" ); + printf( "Contiguous multiple datatype (4500*1)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 4500 ); local_copy_ddt_raw(pdt, 1, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (450*10)\n" ); + printf( "Contiguous multiple datatype (450*10)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 450 ); local_copy_ddt_raw(pdt, 10, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (45*100)\n" ); + printf( "Contiguous multiple datatype (45*100)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 45 ); local_copy_ddt_raw(pdt, 100, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (100*45)\n" ); + printf( "Contiguous multiple datatype (100*45)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 100 ); local_copy_ddt_raw(pdt, 45, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (10*450)\n" ); + printf( "Contiguous multiple datatype (10*450)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 10 ); local_copy_ddt_raw(pdt, 450, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (1*4500)\n" ); + printf( "Contiguous multiple datatype (1*4500)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 1 ); local_copy_ddt_raw(pdt, 4500, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); } printf( ">>--------------------------------------------<<\n" ); printf( ">>--------------------------------------------<<\n" ); - printf( "Vector data-type (450 times 10 double stride 11)\n" ); + printf( "Vector datatype (450 times 10 double stride 11)\n" ); pdt = create_vector_type( MPI_DOUBLE, 450, 10, 11 ); if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { - ompi_datatype_dump( pdt ); + ompi_datatype_dump( pdt ); } if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_raw(pdt, 1, iov_num); @@ -292,9 +296,9 @@ int main( int argc, char* argv[] ) printf( ">>--------------------------------------------<<\n" ); pdt = test_create_blacs_type(); if( outputFlags & CHECK_PACK_UNPACK ) { - if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { - ompi_datatype_dump( pdt ); - } + if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { + ompi_datatype_dump( pdt ); + } local_copy_ddt_raw(pdt, 4500, iov_num); } printf( ">>--------------------------------------------<<\n" ); diff --git a/test/datatype/ddt_raw2.c b/test/datatype/ddt_raw2.c new file mode 100644 index 00000000000..7e91a323f7a --- /dev/null +++ b/test/datatype/ddt_raw2.c @@ -0,0 +1,352 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2019 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ddt_lib.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/datatype/opal_datatype_internal.h" +#include "opal/runtime/opal.h" + +#include +#include +#ifdef HAVE_SYS_TIME_H +#include +#endif +#include + + +static int +mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype, + int count, + struct iovec **iov, + uint32_t *iovec_count, + int increment) +{ + opal_convertor_t *convertor; + size_t remaining_length = 0; + uint32_t i; + uint32_t temp_count; + struct iovec *temp_iov=NULL; + size_t temp_data; + + convertor = opal_convertor_create( opal_local_arch, 0 ); + + if (OMPI_SUCCESS != opal_convertor_prepare_for_send (convertor, + &(datatype->super), + count, + NULL)) { + opal_output (1, "Cannot attach the datatype to a convertor\n"); + return OMPI_ERROR; + } + + if ( 0 == datatype->super.size ) { + *iovec_count = 0; + *iov = NULL; + return OMPI_SUCCESS; + } + + remaining_length = count * datatype->super.size; + + temp_count = increment; + temp_iov = (struct iovec*)malloc(temp_count * sizeof(struct iovec)); + if (NULL == temp_iov) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (0 == opal_convertor_raw(convertor, temp_iov, + &temp_count, &temp_data)) { + *iovec_count = *iovec_count + temp_count; + *iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec)); + if (NULL == *iov) { + opal_output(1, "OUT OF MEMORY\n"); + free(temp_iov); + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i = 0 ; i < temp_count ; i++) { + (*iov)[i+(*iovec_count-temp_count)].iov_base = temp_iov[i].iov_base; + (*iov)[i+(*iovec_count-temp_count)].iov_len = temp_iov[i].iov_len; + } + + remaining_length -= temp_data; + temp_count = increment; + } + *iovec_count = *iovec_count + temp_count; + if ( temp_count > 0 ) { + *iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec)); + if (NULL == *iov) { + opal_output(1, "OUT OF MEMORY\n"); + free(temp_iov); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + for (i=0 ; isuper.flags = 3332; + datatype->super.id = 0; + datatype->super.bdt_used = 512; + datatype->super.size = 31684; + datatype->super.true_lb = 4; + datatype->super.true_ub = 218288; + datatype->super.lb = 0; + datatype->super.ub = 218344; + datatype->super.nbElems = 31684; + datatype->super.align = 1; + datatype->super.loops = 1146; + datatype->super.desc.length = 3351; + datatype->super.desc.used = 184; + datatype->super.desc.desc = descs; + datatype->super.opt_desc.length = 3351; + datatype->super.opt_desc.used = 184; + datatype->super.opt_desc.desc = descs; + + /* Get the entire raw description of the datatype in a single call */ + uint32_t iovec_count_300 = 0; + struct iovec * iov_300 = NULL; + mca_common_ompio_decode_datatype ( datatype, 1, &iov_300, &iovec_count_300, 300); + /* Get the raw description of the datatype 10 elements at the time. This stresses some + * of the execution paths in the convertor raw. + */ + uint32_t iovec_count_10 = 0; + struct iovec * iov_10 = NULL; + mca_common_ompio_decode_datatype ( datatype, 1, &iov_10, &iovec_count_10, 10); + /* Get the raw description of the datatype one element at the time. This stresses all + * execution paths in the convertor raw. + */ + uint32_t iovec_count_1 = 0; + struct iovec * iov_1 = NULL; + mca_common_ompio_decode_datatype ( datatype, 1, &iov_1, &iovec_count_1, 1); + + assert(iovec_count_300 == iovec_count_10); + assert(iovec_count_300 == iovec_count_1); + // assert(iov[100].iov_base == iov2[100].iov_base); + // assert(iov[100].iov_len == iov2[100].iov_len); + for (uint32_t i = 0; i < iovec_count_300; i++) { + assert(iov_300[i].iov_base == iov_10[i].iov_base); + assert(iov_300[i].iov_len == iov_10[i].iov_len); + assert(iov_300[i].iov_base == iov_1[i].iov_base); + assert(iov_300[i].iov_len == iov_1[i].iov_len); + } + + return 0; +} + diff --git a/test/datatype/large_data.c b/test/datatype/large_data.c new file mode 100644 index 00000000000..1c031b36629 --- /dev/null +++ b/test/datatype/large_data.c @@ -0,0 +1,174 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2018 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * This test check the correct OMPI datatype description for + * extremely large types (over 4GB). + */ + +#include +#include +#include +#include +#include + +#include "ompi_config.h" +#include "ompi/datatype/ompi_datatype.h" +#include "opal/runtime/opal.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/datatype/opal_datatype_internal.h" + +#define MAX_IOVEC 10 +#define MAX_CHUNK (1024*1024*1024) /* 1GB */ + +static int verbose = 0; + +static size_t +count_length_via_convertor_raw(char* msg, + MPI_Datatype dtype, int count) +{ + opal_convertor_t* pconv; + struct iovec iov[MAX_IOVEC]; + uint32_t iov_count = MAX_IOVEC, i; + size_t length = MAX_CHUNK, packed_iovec = 0, packed = 0; + + pconv = opal_convertor_create( opal_local_arch, 0 ); + opal_convertor_prepare_for_send(pconv, (const struct opal_datatype_t *)dtype, 1, NULL); + while( 0 == opal_convertor_raw(pconv, iov, &iov_count, &length) ) { + if( verbose ) { + printf("iov_count = %d packed_iovec = %"PRIsize_t" length = %"PRIsize_t"\n", + iov_count, packed_iovec, length); + } + packed += length; + for( i = 0; i < iov_count; i++ ) { + packed_iovec += iov[i].iov_len; + if( verbose ) { + printf("[%s] add %"PRIsize_t" bytes -> so far %"PRIsize_t" bytes\n", + msg, iov[i].iov_len, packed_iovec); + } + } + if( packed != packed_iovec ) { + printf( "[%s] Raw data amount diverges %"PRIsize_t" != %"PRIsize_t"\n", + msg, packed, packed_iovec); + exit(-1); + } + iov_count = MAX_IOVEC; /* number of available iov */ + length = MAX_CHUNK; + } + if( verbose ) { + printf("iov_count = %d packed_iovec = %"PRIsize_t" length = %"PRIsize_t"\n", + iov_count, packed_iovec, length); + } + packed += length; + for( i = 0; i < iov_count; i++ ) { + packed_iovec += iov[i].iov_len; + if( verbose ) { + printf("[%s] add %"PRIsize_t" bytes -> so far %"PRIsize_t" bytes\n", + msg, iov[i].iov_len, packed_iovec); + } + } + if( packed != packed_iovec ) { + printf( "[%s] Raw data amount diverges %"PRIsize_t" != %"PRIsize_t"\n", + msg, packed, packed_iovec); + exit(-1); + } + return packed_iovec; +} + +int main(int argc, char * argv[]) +{ + + int const per_process = 192; + int const per_type = 20000000; + int blocklen, stride, count; + + int scounts[2] = {per_process, per_process}; + int sdispls[2] = {3*per_process, 0*per_process}; + int rcounts[2] = {per_process, per_process}; + int rdispls[2] = {1*per_process, 2*per_process}; + + MPI_Datatype ddt, stype, rtype; + size_t length, packed; + + opal_init_util(&argc, &argv); + ompi_datatype_init(); + + ompi_datatype_create_contiguous( per_type, MPI_FLOAT, &ddt); + + /* + * Large sparse datatype: indexed contiguous + */ + ompi_datatype_create_indexed(2, scounts, sdispls, ddt, &stype); + ompi_datatype_commit(&stype); + + packed = count_length_via_convertor_raw("1. INDEX", stype, 1); + opal_datatype_type_size(&stype->super, &length); + if( length != packed ) { + printf("Mismatched length of packed data to datatype size (%"PRIsize_t" != %"PRIsize_t")\n", + packed, length); + exit(-2); + } + ompi_datatype_destroy(&stype); + + /* + * Large contiguous datatype: indexed contiguous + */ + ompi_datatype_create_indexed(2, rcounts, rdispls, ddt, &rtype); + ompi_datatype_commit(&rtype); + + packed = count_length_via_convertor_raw("2. INDEX", rtype, 1); + opal_datatype_type_size(&rtype->super, &length); + if( length != packed ) { + printf("Mismatched length of packed data to datatype size (%"PRIsize_t" != %"PRIsize_t")\n", + packed, length); + exit(-2); + } + ompi_datatype_destroy(&rtype); + ompi_datatype_destroy(&ddt); + + /* + * Large sparse datatype: vector + */ + count = INT_MAX / 2; + blocklen = stride = 4; + ompi_datatype_create_vector(count, blocklen, stride, MPI_FLOAT, &ddt); + ompi_datatype_commit(&ddt); + + packed = count_length_via_convertor_raw("3. VECTOR", ddt, 1); + opal_datatype_type_size(&ddt->super, &length); + if( length != packed ) { + printf("Mismatched length of packed data to datatype size (%"PRIsize_t" != %"PRIsize_t")\n", + packed, length); + exit(-2); + } + ompi_datatype_destroy(&ddt); + + /* + * Large sparse datatype: contiguous + */ + MPI_Datatype tmp; + ompi_datatype_create_contiguous(stride, MPI_FLOAT, &tmp); + ompi_datatype_create_contiguous(count, tmp, &ddt); + ompi_datatype_commit(&ddt); + + packed = count_length_via_convertor_raw("4. CONTIG", ddt, 1); + opal_datatype_type_size(&ddt->super, &length); + if( length != packed ) { + printf("Mismatched length of packed data to datatype size (%"PRIsize_t" != %"PRIsize_t")\n", + packed, length); + exit(-2); + } + ompi_datatype_destroy(&ddt); + ompi_datatype_destroy(&tmp); + + return 0; +} diff --git a/test/datatype/opal_datatype_test.c b/test/datatype/opal_datatype_test.c index cf564eb838e..b5cdedbb7ad 100644 --- a/test/datatype/opal_datatype_test.c +++ b/test/datatype/opal_datatype_test.c @@ -159,8 +159,7 @@ static int local_copy_ddt_count( opal_datatype_t const * const pdt, int count ) osrc = (char*)malloc( malloced_size ); { - for( size_t i = 0; i < malloced_size; i++ ) - osrc[i] = i % 128 + 32; + for( size_t i = 0; i < malloced_size; i++ ) osrc[i] = i % 128 + 32; memcpy(odst, osrc, malloced_size); } pdst = odst - lb; diff --git a/test/datatype/opal_ddt_lib.c b/test/datatype/opal_ddt_lib.c index e09a5232afb..605d7deedb6 100644 --- a/test/datatype/opal_ddt_lib.c +++ b/test/datatype/opal_ddt_lib.c @@ -445,7 +445,7 @@ static int32_t opal_datatype_create_vector( int count, int bLength, int stride, } pData = opal_datatype_create( oldType->desc.used + 2 ); - if( (bLength == stride) || (1 >= count) ) { /* the elements are contiguous */ + if( (bLength == stride) || (1 == count) ) { /* the elements are contiguous */ opal_datatype_add( pData, oldType, count * bLength, 0, extent ); } else { if( 1 == bLength ) { @@ -476,7 +476,7 @@ static int32_t opal_datatype_create_hvector( int count, int bLength, ptrdiff_t s } pTempData = opal_datatype_create( oldType->desc.used + 2 ); - if( ((extent * bLength) == stride) || (1 >= count) ) { /* contiguous */ + if( ((extent * bLength) == stride) || (1 == count) ) { /* contiguous */ pData = pTempData; opal_datatype_add( pData, oldType, count * bLength, 0, extent ); } else { diff --git a/test/datatype/position.c b/test/datatype/position.c index 07725816e43..0be4f77fd85 100644 --- a/test/datatype/position.c +++ b/test/datatype/position.c @@ -197,9 +197,9 @@ static void dump_ldi( ddt_ldi_t* buffer, int start_pos, int end_pos ) #endif #if (OPAL_ENABLE_DEBUG == 1) && (OPAL_C_HAVE_VISIBILITY == 0) -extern bool opal_unpack_debug; -extern bool opal_pack_debug; -extern bool opal_position_debug ; +extern bool opal_ddt_unpack_debug; +extern bool opal_ddt_pack_debug; +extern bool opal_ddt_position_debug ; #endif /* OPAL_ENABLE_DEBUG */ static char* bytes_dump( void* src, size_t cnt ) @@ -235,9 +235,9 @@ int main( int argc, char* argv[] ) ompi_datatype_init(); #if (OPAL_ENABLE_DEBUG == 1) && (OPAL_C_HAVE_VISIBILITY == 0) - opal_unpack_debug = false; - opal_pack_debug = false; - opal_position_debug = false; + opal_ddt_unpack_debug = false; + opal_ddt_pack_debug = false; + opal_ddt_position_debug = false; #endif /* OPAL_ENABLE_DEBUG */ create_segments( datatype, data_count, fragment_size, diff --git a/test/datatype/position_noncontig.c b/test/datatype/position_noncontig.c index 0fb94c224ab..7058c6d17ba 100644 --- a/test/datatype/position_noncontig.c +++ b/test/datatype/position_noncontig.c @@ -178,9 +178,9 @@ unpack_segments( ompi_datatype_t* datatype, int count, } #if (OPAL_ENABLE_DEBUG == 1) && (OPAL_C_HAVE_VISIBILITY == 0) -extern bool opal_unpack_debug; -extern bool opal_pack_debug; -extern bool opal_position_debug ; +extern bool opal_ddt_unpack_debug; +extern bool opal_ddt_pack_debug; +extern bool opal_ddt_position_debug ; #endif /* OPAL_ENABLE_DEBUG */ int main( int argc, char* argv[] ) @@ -206,9 +206,9 @@ int main( int argc, char* argv[] ) ompi_datatype_commit(&datatype); #if (OPAL_ENABLE_DEBUG == 1) && (OPAL_C_HAVE_VISIBILITY == 0) - opal_unpack_debug = false; - opal_pack_debug = false; - opal_position_debug = false; + opal_ddt_unpack_debug = false; + opal_ddt_pack_debug = false; + opal_ddt_position_debug = false; #endif /* OPAL_ENABLE_DEBUG */ create_segments( datatype, 1, fragment_size, diff --git a/test/datatype/to_self.c b/test/datatype/to_self.c index 58849f5e90c..2ba3eb92163 100644 --- a/test/datatype/to_self.c +++ b/test/datatype/to_self.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ @@ -15,14 +15,35 @@ #include #include #include +#include -#if OPEN_MPI && 0 +#if 0 && OPEN_MPI extern void ompi_datatype_dump( MPI_Datatype ddt ); #define MPI_DDT_DUMP(ddt) ompi_datatype_dump( (ddt) ) #else #define MPI_DDT_DUMP(ddt) #endif /* OPEN_MPI */ +static MPI_Datatype +create_merged_contig_with_gaps(int count) /* count of the basic datatype */ +{ + int array_of_blocklengths[] = {1, 1, 1}; + MPI_Aint array_of_displacements[] = {0, 8, 16}; + MPI_Datatype array_of_types[] = {MPI_DOUBLE, MPI_LONG, MPI_CHAR}; + MPI_Datatype type; + + MPI_Type_create_struct(3, array_of_blocklengths, + array_of_displacements, array_of_types, + &type); + if( 1 < count ) { + MPI_Datatype temp = type; + MPI_Type_contiguous(count, temp, &type); + } + MPI_Type_commit(&type); + MPI_DDT_DUMP( type ); + return type; +} + /* Create a non-contiguous resized datatype */ struct structure { double not_transfered; @@ -178,23 +199,146 @@ create_indexed_gap_optimized_ddt( void ) return dt3; } -static void print_result( int length, int cycles, double time ) -{ - double bandwidth, clock_prec; +/******************************************************************** + *******************************************************************/ + +#define DO_CONTIG 0x00000001 +#define DO_CONSTANT_GAP 0x00000002 +#define DO_INDEXED_GAP 0x00000004 +#define DO_OPTIMIZED_INDEXED_GAP 0x00000008 +#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x00000010 +#define DO_STRUCT_MERGED_WITH_GAP_RESIZED 0x00000020 + +#define DO_PACK 0x01000000 +#define DO_UNPACK 0x02000000 +#define DO_ISEND_RECV 0x04000000 +#define DO_ISEND_IRECV 0x08000000 +#define DO_IRECV_SEND 0x10000000 +#define DO_IRECV_ISEND 0x20000000 + +#define MIN_LENGTH 1024 +#define MAX_LENGTH (1024*1024) + +static int cycles = 100; +static int trials = 20; +static int warmups = 2; + +static void print_result( int length, int trials, double* timers ) +{ + double bandwidth, clock_prec, temp; + double min_time, max_time, average, std_dev = 0.0; + double ordered[trials]; + int t, pos, quartile_start, quartile_end; + + for( t = 0; t < trials; ordered[t] = timers[t], t++ ); + for( t = 0; t < trials-1; t++ ) { + temp = ordered[t]; + pos = t; + for( int i = t+1; i < trials; i++ ) { + if( temp > ordered[i] ) { + temp = ordered[i]; + pos = i; + } + } + if( pos != t ) { + temp = ordered[t]; + ordered[t] = ordered[pos]; + ordered[pos] = temp; + } + } + quartile_start = trials - (3 * trials) / 4; + quartile_end = trials - (1 * trials) / 4; clock_prec = MPI_Wtick(); - bandwidth = (length * clock_prec * cycles) / (1024.0 * 1024.0) / (time * clock_prec); - printf( "%8d\t%.6f\t%.4f MB/s\n", length, time / cycles, bandwidth ); + min_time = ordered[quartile_start]; + max_time = ordered[quartile_start]; + average = ordered[quartile_start]; + for( t = quartile_start + 1; t < quartile_end; t++ ) { + if( min_time > ordered[t] ) min_time = ordered[t]; + if( max_time < ordered[t] ) max_time = ordered[t]; + average += ordered[t]; + } + average /= (quartile_end - quartile_start); + for( t = quartile_start; t < quartile_end; t++ ) { + std_dev += (ordered[t] - average) * (ordered[t] - average); + } + std_dev = sqrt( std_dev/(quartile_end - quartile_start) ); + + bandwidth = (length * clock_prec) / (1024.0 * 1024.0) / (average * clock_prec); + printf( "%8d\t%15g\t%10.4f MB/s [min %10g max %10g std %2.2f%%]\n", length, average, bandwidth, + min_time, max_time, (100.0 * std_dev) / average ); +} + +static int pack( int cycles, + MPI_Datatype sdt, int scount, void* sbuf, + void* packed_buf ) +{ + int position, myself, c, t, outsize; + double timers[trials]; + + MPI_Type_size( sdt, &outsize ); + outsize *= scount; + + MPI_Comm_rank( MPI_COMM_WORLD, &myself ); + + for( t = 0; t < warmups; t++ ) { + for( c = 0; c < cycles; c++ ) { + position = 0; + MPI_Pack(sbuf, scount, sdt, packed_buf, outsize, &position, MPI_COMM_WORLD); + } + } + + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + position = 0; + MPI_Pack(sbuf, scount, sdt, packed_buf, outsize, &position, MPI_COMM_WORLD); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; + } + print_result( outsize, trials, timers ); + return 0; +} + +static int unpack( int cycles, + void* packed_buf, + MPI_Datatype rdt, int rcount, void* rbuf ) +{ + int position, myself, c, t, insize; + double timers[trials]; + + MPI_Type_size( rdt, &insize ); + insize *= rcount; + + MPI_Comm_rank( MPI_COMM_WORLD, &myself ); + + for( t = 0; t < warmups; t++ ) { + for( c = 0; c < cycles; c++ ) { + position = 0; + MPI_Unpack(packed_buf, insize, &position, rbuf, rcount, rdt, MPI_COMM_WORLD); + } + } + + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + position = 0; + MPI_Unpack(packed_buf, insize, &position, rbuf, rcount, rdt, MPI_COMM_WORLD); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; + } + print_result( insize, trials, timers ); + return 0; } static int isend_recv( int cycles, MPI_Datatype sdt, int scount, void* sbuf, MPI_Datatype rdt, int rcount, void* rbuf ) { - int myself, tag = 0, i, slength, rlength; + int myself, tag = 0, c, t, slength, rlength; MPI_Status status; MPI_Request req; - double tstart, tend; + double timers[trials]; MPI_Type_size( sdt, &slength ); slength *= scount; @@ -203,21 +347,16 @@ static int isend_recv( int cycles, MPI_Comm_rank( MPI_COMM_WORLD, &myself ); - tstart = MPI_Wtime(); - for( i = 0; i < cycles; i++ ) { -#ifndef FAST - MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req ); - MPI_Recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status ); - MPI_Wait( &req, &status ); - /*MPI_Request_free( &req );*/ -#else - ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req ); - ftmpi_mpi_recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status ); - ftmpi_request_free( &req ); -#endif + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req ); + MPI_Recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status ); + MPI_Wait( &req, &status ); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; } - tend = MPI_Wtime(); - print_result( rlength, cycles, tend - tstart ); + print_result( rlength, trials, timers ); return 0; } @@ -225,10 +364,10 @@ static int irecv_send( int cycles, MPI_Datatype sdt, int scount, void* sbuf, MPI_Datatype rdt, int rcount, void* rbuf ) { - int myself, tag = 0, i, slength, rlength; + int myself, tag = 0, c, t, slength, rlength; MPI_Request req; MPI_Status status; - double tstart, tend; + double timers[trials]; MPI_Type_size( sdt, &slength ); slength *= scount; @@ -237,21 +376,16 @@ static int irecv_send( int cycles, MPI_Comm_rank( MPI_COMM_WORLD, &myself ); - tstart = MPI_Wtime(); - for( i = 0; i < cycles; i++ ) { -#ifndef FAST - MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req ); - MPI_Send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD ); - MPI_Wait( &req, &status ); - /*MPI_Request_free( &req );*/ -#else - ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req ); - ftmpi_mpi_send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD ); - ftmpi_request_free( &req ); -#endif + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req ); + MPI_Send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD ); + MPI_Wait( &req, &status ); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; } - tend = MPI_Wtime(); - print_result( rlength, cycles, tend - tstart ); + print_result( rlength, trials, timers ); return 0; } @@ -259,10 +393,10 @@ static int isend_irecv_wait( int cycles, MPI_Datatype sdt, int scount, void* sbuf, MPI_Datatype rdt, int rcount, void* rbuf ) { - int myself, tag = 0, i, slength, rlength; - MPI_Request sreq, rreq; - MPI_Status status; - double tstart, tend; + int myself, tag = 0, c, t, slength, rlength; + MPI_Request requests[2]; + MPI_Status statuses[2]; + double timers[trials]; MPI_Type_size( sdt, &slength ); slength *= scount; @@ -271,25 +405,16 @@ static int isend_irecv_wait( int cycles, MPI_Comm_rank( MPI_COMM_WORLD, &myself ); - tstart = MPI_Wtime(); - for( i = 0; i < cycles; i++ ) { -#ifndef FAST - MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq ); - MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq ); - MPI_Wait( &sreq, &status ); - MPI_Wait( &rreq, &status ); - /*MPI_Request_free( &sreq );*/ - /*MPI_Request_free( &rreq );*/ -#else - ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq ); - ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq ); - ftmpi_wait( &sreq, &status ); - ftmpi_request_free( &sreq ); - ftmpi_request_free( &rreq ); -#endif + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &requests[0] ); + MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &requests[1] ); + MPI_Waitall( 2, requests, statuses ); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; } - tend = MPI_Wtime(); - print_result( rlength, cycles, tend - tstart ); + print_result( rlength, trials, timers ); return 0; } @@ -297,10 +422,10 @@ static int irecv_isend_wait( int cycles, MPI_Datatype sdt, int scount, void* sbuf, MPI_Datatype rdt, int rcount, void* rbuf ) { - int myself, tag = 0, i, slength, rlength; - MPI_Request sreq, rreq; - MPI_Status status; - double tstart, tend; + int myself, tag = 0, c, t, slength, rlength; + MPI_Request requests[2]; + MPI_Status statuses[2]; + double timers[trials]; MPI_Type_size( sdt, &slength ); slength *= scount; @@ -309,74 +434,82 @@ static int irecv_isend_wait( int cycles, MPI_Comm_rank( MPI_COMM_WORLD, &myself ); - tstart = MPI_Wtime(); - for( i = 0; i < cycles; i++ ) { -#ifndef FAST - MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq ); - MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq ); - MPI_Wait( &sreq, &status ); - MPI_Wait( &rreq, &status ); - /*MPI_Request_free( &sreq );*/ - /*MPI_Request_free( &rreq );*/ -#else - ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq ); - ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq ); - ftmpi_wait( &sreq, &status ); - ftmpi_request_free( &sreq ); - ftmpi_request_free( &rreq ); -#endif + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &requests[0] ); + MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &requests[1] ); + MPI_Waitall( 2, requests, statuses ); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; } - tend = MPI_Wtime(); - print_result( rlength, cycles, tend - tstart ); + print_result( rlength, trials, timers); return 0; } -static int do_test_for_ddt( MPI_Datatype sddt, MPI_Datatype rddt, int length ) +static int do_test_for_ddt( int doop, MPI_Datatype sddt, MPI_Datatype rddt, int length ) { - int i; MPI_Aint lb, extent; char *sbuf, *rbuf; + int i; MPI_Type_get_extent( sddt, &lb, &extent ); sbuf = (char*)malloc( length ); rbuf = (char*)malloc( length ); - printf( "# Isend recv (length %d)\n", length ); - for( i = 1; i <= (length/extent); i *= 2 ) { - isend_recv( 10, sddt, i, sbuf, rddt, i, rbuf ); + if( doop & DO_PACK ) { + printf("# Pack (max length %d)\n", length); + for( i = 1; i <= (length/extent); i *= 2 ) { + pack( cycles, sddt, i, sbuf, rbuf ); + } } - printf( "# Isend Irecv Wait (length %d)\n", length ); - for( i = 1; i <= (length/extent); i *= 2 ) { - isend_irecv_wait( 10, sddt, i, sbuf, rddt, i, rbuf ); + + if( doop & DO_UNPACK ) { + printf("# Unpack (length %d)\n", length); + for( i = 1; i <= (length/extent); i *= 2 ) { + unpack( cycles, sbuf, rddt, i, rbuf ); + } } - printf( "# Irecv send (length %d)\n", length ); - for( i = 1; i <= (length/extent); i *= 2 ) { - irecv_send( 10, sddt, i, sbuf, rddt, i, rbuf ); + + if( doop & DO_ISEND_RECV ) { + printf( "# Isend recv (length %d)\n", length ); + for( i = 1; i <= (length/extent); i *= 2 ) { + isend_recv( cycles, sddt, i, sbuf, rddt, i, rbuf ); + } + } + + if( doop & DO_ISEND_IRECV ) { + printf( "# Isend Irecv Wait (length %d)\n", length ); + for( i = 1; i <= (length/extent); i *= 2 ) { + isend_irecv_wait( cycles, sddt, i, sbuf, rddt, i, rbuf ); + } + } + + if( doop & DO_IRECV_SEND ) { + printf( "# Irecv send (length %d)\n", length ); + for( i = 1; i <= (length/extent); i *= 2 ) { + irecv_send( cycles, sddt, i, sbuf, rddt, i, rbuf ); + } } - printf( "# Irecv Isend Wait (length %d)\n", length ); - for( i = 1; i <= (length/extent); i *= 2 ) { - irecv_isend_wait( 10, sddt, i, sbuf, rddt, i, rbuf ); + + if( doop & DO_IRECV_SEND ) { + printf( "# Irecv Isend Wait (length %d)\n", length ); + for( i = 1; i <= (length/extent); i *= 2 ) { + irecv_isend_wait( cycles, sddt, i, sbuf, rddt, i, rbuf ); + } } free( sbuf ); free( rbuf ); return 0; } -#define DO_CONTIG 0x01 -#define DO_CONSTANT_GAP 0x02 -#define DO_INDEXED_GAP 0x04 -#define DO_OPTIMIZED_INDEXED_GAP 0x08 -#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x10 - -#define MIN_LENGTH 1024 -#define MAX_LENGTH (1024*1024) - int main( int argc, char* argv[] ) { - int run_tests = 0xffffffff; /* do all tests by default */ - int length, rank, size; + int run_tests = DO_STRUCT_MERGED_WITH_GAP_RESIZED; /* do all datatype tests by default */ + int rank, size; MPI_Datatype ddt; - /*int run_tests = DO_CONSTANT_GAP;*/ + run_tests |= DO_PACK | DO_UNPACK; + MPI_Init (&argc, &argv); MPI_Comm_rank (MPI_COMM_WORLD, &rank); @@ -389,16 +522,14 @@ int main( int argc, char* argv[] ) if( run_tests & DO_CONTIG ) { printf( "\ncontiguous datatype\n\n" ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( MPI_INT, MPI_INT, length ); + do_test_for_ddt( run_tests, MPI_INT, MPI_INT, MAX_LENGTH ); } if( run_tests & DO_INDEXED_GAP ) { printf( "\nindexed gap\n\n" ); ddt = create_indexed_gap_ddt(); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } @@ -406,8 +537,7 @@ int main( int argc, char* argv[] ) printf( "\noptimized indexed gap\n\n" ); ddt = create_indexed_gap_optimized_ddt(); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } @@ -415,8 +545,7 @@ int main( int argc, char* argv[] ) printf( "\nconstant indexed gap\n\n" ); ddt = create_indexed_constant_gap_ddt( 80, 100, 1 ); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } @@ -424,8 +553,7 @@ int main( int argc, char* argv[] ) printf( "\noptimized constant indexed gap\n\n" ); ddt = create_optimized_indexed_constant_gap_ddt( 80, 100, 1 ); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } @@ -433,8 +561,15 @@ int main( int argc, char* argv[] ) printf( "\nstruct constant gap resized\n\n" ); ddt = create_struct_constant_gap_resized_ddt( 0 /* unused */, 0 /* unused */, 0 /* unused */ ); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); + MPI_Type_free( &ddt ); + } + + if( run_tests & DO_STRUCT_MERGED_WITH_GAP_RESIZED ) { + printf( "\nstruct constant gap resized\n\n" ); + ddt = create_merged_contig_with_gaps( 1 ); + MPI_DDT_DUMP( ddt ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } diff --git a/test/datatype/unpack_ooo.c b/test/datatype/unpack_ooo.c index 458ef550930..58ef8a95774 100644 --- a/test/datatype/unpack_ooo.c +++ b/test/datatype/unpack_ooo.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -18,7 +18,6 @@ #include "opal/runtime/opal.h" #include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_datatype_internal.h" -// #include #include #include #include @@ -61,6 +60,18 @@ static void print_bar_pbar(struct foo_t* bar, struct pfoo_t* pbar) fprintf(stderr, "\n"); } +static void print_stack(opal_convertor_t* conv) +{ + printf("Stack pos %d [converted %" PRIsize_t "/%" PRIsize_t "]\n", + conv->stack_pos, conv->bConverted, conv->local_size); + for( uint32_t i = 0; i <= conv->stack_pos; i++ ) { + printf( "[%u] index %d, type %s count %" PRIsize_t " disp %p\n", + i, conv->pStack[i].index, opal_datatype_basicDatatypes[conv->pStack[i].type]->name, + conv->pStack[i].count, (void*)conv->pStack[i].disp); + } + printf("\n"); +} + static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { int i, j, errors = 0; struct iovec a; @@ -104,6 +115,7 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { max_data = a.iov_len; pos = arr[i][1]; opal_convertor_set_position(pConv, &pos); + print_stack(pConv); assert(arr[i][1] == pos); opal_convertor_unpack( pConv, &a, &iov_count, &max_data ); a.iov_base = (char*)a.iov_base - 1024; @@ -118,9 +130,10 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { bar[j].d[1] != 0.0 || bar[j].d[2] != pbar[j].d[1]) { if(0 == errors) { - fprintf(stderr, "ERROR ! count=%d, position=%d, ptr = %p" + (void)opal_datatype_dump(&newtype->super); + fprintf(stderr, "ERROR ! position=%d/%d, ptr = %p" " got (%d,%d,%d,%g,%g,%g) expected (%d,%d,%d,%g,%g,%g)\n", - N, j, (void*)&bar[j], + j, N, (void*)&bar[j], bar[j].i[0], bar[j].i[1], bar[j].i[2], diff --git a/test/spc/spc_test.c b/test/spc/spc_test.c index a15d87dc95a..65dc744435f 100644 --- a/test/spc/spc_test.c +++ b/test/spc/spc_test.c @@ -44,8 +44,8 @@ int main(int argc, char **argv) char name[256], description[256]; /* Counter names to be read by ranks 0 and 1 */ - char *counter_names[] = { "runtime_spc_OMPI_BYTES_SENT_USER", - "runtime_spc_OMPI_BYTES_RECEIVED_USER" }; + char *counter_names[] = { "runtime_spc_OMPI_SPC_BYTES_SENT_USER", + "runtime_spc_OMPI_SPC_BYTES_RECEIVED_USER" }; MPI_Init(NULL, NULL); MPI_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided); diff --git a/test/util/opal_path_nfs.c b/test/util/opal_path_nfs.c index b5fad7ae3dd..fe5768c00ee 100644 --- a/test/util/opal_path_nfs.c +++ b/test/util/opal_path_nfs.c @@ -16,6 +16,8 @@ * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,7 +33,9 @@ #include #include +#ifdef HAVE_SYS_MOUNT_H #include +#endif #ifdef HAVE_SYS_STATFS_H #include #endif