From fa24868a2bf33066bd7ffb23236677764e216b2e Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 25 Jan 2017 10:20:11 -0600 Subject: [PATCH 1/2] hook/prot: Connectivity map component * `-mca hook_prot_verbose VALUE` * General component vebosity * `-mca hook_prot_enable_mpi_init BOOL` * Enable map display at the bottom of `MPI_Init` * `-mca hook_prot_enable_mpi_finalize BOOL` * Enable map display at the top of `MPI_Finalize` * `-mca hook_prot_platform_prot VALUE` * Alias environment variable: `MPI_PROT` * `1 : Same as -mca hook_prot_enable_mpi_init t` * `2 : Same as -mca hook_prot_enable_mpi_finalize t` Signed-off-by: Joshua Hursey --- ompi/mca/hook/prot/Makefile.am | 20 + ompi/mca/hook/prot/configure.m4 | 25 + ompi/mca/hook/prot/hook_prot.h | 34 ++ ompi/mca/hook/prot/hook_prot_component.c | 155 +++++ ompi/mca/hook/prot/hook_prot_fns.c | 711 +++++++++++++++++++++++ ompi/mca/hook/prot/owner.txt | 7 + ompi/mca/pml/cm/pml_cm_component.c | 8 + ompi/mca/pml/ob1/pml_ob1.c | 21 + 8 files changed, 981 insertions(+) create mode 100644 ompi/mca/hook/prot/Makefile.am create mode 100644 ompi/mca/hook/prot/configure.m4 create mode 100644 ompi/mca/hook/prot/hook_prot.h create mode 100644 ompi/mca/hook/prot/hook_prot_component.c create mode 100644 ompi/mca/hook/prot/hook_prot_fns.c create mode 100644 ompi/mca/hook/prot/owner.txt diff --git a/ompi/mca/hook/prot/Makefile.am b/ompi/mca/hook/prot/Makefile.am new file mode 100644 index 00000000000..80888430741 --- /dev/null +++ b/ompi/mca/hook/prot/Makefile.am @@ -0,0 +1,20 @@ +# +# Copyright (c) 2016 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + hook_prot.h \ + hook_prot_component.c \ + hook_prot_fns.c + +# This component will only ever be built statically -- never as a DSO. + +noinst_LTLIBRARIES = libmca_hook_prot.la + +libmca_hook_prot_la_SOURCES = $(sources) +libmca_hook_prot_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/hook/prot/configure.m4 b/ompi/mca/hook/prot/configure.m4 new file mode 100644 index 00000000000..830385de81f --- /dev/null +++ b/ompi/mca/hook/prot/configure.m4 @@ -0,0 +1,25 @@ +# +# Copyright (c) 2016 IBM Corporation. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make this a static component +AC_DEFUN([MCA_ompi_hook_prot_COMPILE_MODE], [ + AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) + $4="static" + AC_MSG_RESULT([$$4]) +]) + +# MCA_hook_prot_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_hook_prot_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/hook/prot/Makefile]) + + $1 +]) diff --git a/ompi/mca/hook/prot/hook_prot.h b/ompi/mca/hook/prot/hook_prot.h new file mode 100644 index 00000000000..290229f5268 --- /dev/null +++ b/ompi/mca/hook/prot/hook_prot.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef MCA_HOOK_PROT_H +#define MCA_HOOK_PROT_H + +#include "ompi_config.h" + +#include "ompi/constants.h" + +#include "ompi/mca/hook/hook.h" +#include "ompi/mca/hook/base/base.h" + +BEGIN_C_DECLS + +OMPI_MODULE_DECLSPEC extern const ompi_hook_base_component_2_0_0_t mca_hook_prot_component; + +extern int mca_hook_prot_verbose; +extern int mca_hook_prot_output; +extern bool hook_prot_enable_mpi_init; +extern bool hook_prot_enable_mpi_finalize; + +void ompi_hook_prot_mpi_init_bottom(int argc, char **argv, int requested, int *provided); + +void ompi_hook_prot_mpi_finalize_top(void); + +END_C_DECLS + +#endif /* MCA_HOOK_PROT_H */ diff --git a/ompi/mca/hook/prot/hook_prot_component.c b/ompi/mca/hook/prot/hook_prot_component.c new file mode 100644 index 00000000000..98113b7e4ed --- /dev/null +++ b/ompi/mca/hook/prot/hook_prot_component.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "hook_prot.h" + +static int ompi_hook_prot_component_open(void); +static int ompi_hook_prot_component_close(void); +static int ompi_hook_prot_component_register(void); + +/* + * Public string showing the component version number + */ +const char *mca_hook_prot_component_version_string = + "Open MPI 'prot' hook MCA component version " OMPI_VERSION; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +const ompi_hook_base_component_2_0_0_t mca_hook_prot_component = { + + /* First, the mca_component_t struct containing meta information + * about the component itself */ + .hookm_version = { + OMPI_HOOK_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "prot", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = ompi_hook_prot_component_open, + .mca_close_component = ompi_hook_prot_component_close, + .mca_register_component_params = ompi_hook_prot_component_register, + }, + .hookm_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + /* Component functions */ + .hookm_mpi_initialized_top = NULL, + .hookm_mpi_initialized_bottom = NULL, + + .hookm_mpi_finalized_top = NULL, + .hookm_mpi_finalized_bottom = NULL, + + .hookm_mpi_init_top = NULL, + .hookm_mpi_init_top_post_opal = NULL, + .hookm_mpi_init_bottom = ompi_hook_prot_mpi_init_bottom, + .hookm_mpi_init_error = NULL, + + .hookm_mpi_finalize_top = ompi_hook_prot_mpi_finalize_top, + .hookm_mpi_finalize_bottom = NULL, +}; + +int mca_hook_prot_verbose = 0; +int mca_hook_prot_output = -1; +bool hook_prot_enable_mpi_init = false; +bool hook_prot_enable_mpi_finalize = false; + +static int ompi_hook_prot_component_open(void) +{ + // Nothing to do + return OMPI_SUCCESS; +} + +static int ompi_hook_prot_component_close(void) +{ + // Nothing to do + return OMPI_SUCCESS; +} + +static int ompi_hook_prot_component_register(void) +{ + + /* + * Component verbosity level + */ + // Inherit the verbosity of the base framework, but also allow this to be overridden + if( ompi_hook_base_framework.framework_verbose > MCA_BASE_VERBOSE_NONE ) { + mca_hook_prot_verbose = ompi_hook_base_framework.framework_verbose; + } + else { + mca_hook_prot_verbose = MCA_BASE_VERBOSE_NONE; + } + (void) mca_base_component_var_register(&mca_hook_prot_component.hookm_version, "verbose", + NULL, + MCA_BASE_VAR_TYPE_INT, NULL, + 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_hook_prot_verbose); + + mca_hook_prot_output = opal_output_open(NULL); + opal_output_set_verbosity(mca_hook_prot_output, mca_hook_prot_verbose); + + /* + * If the component is active for mpi_init / mpi_finalize + */ + hook_prot_enable_mpi_init = false; + (void) mca_base_component_var_register(&mca_hook_prot_component.hookm_version, "enable_mpi_init", + "Enable prot behavior on mpi_init", + MCA_BASE_VAR_TYPE_BOOL, NULL, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &hook_prot_enable_mpi_init); + + hook_prot_enable_mpi_finalize = false; + (void) mca_base_component_var_register(&mca_hook_prot_component.hookm_version, "enable_mpi_finalize", + "Enable prot behavior on mpi_finalize", + MCA_BASE_VAR_TYPE_BOOL, NULL, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &hook_prot_enable_mpi_finalize); + + // User can set the ompi_platform_prot variable too + int hook_prot_platform_prot = -1; + (void) mca_base_var_register("ompi", NULL, NULL, "platform_prot", + "Enable prot behavior (1) mpi_init or (2) mpi_finalize", + MCA_BASE_VAR_TYPE_INT, NULL, + 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, + &hook_prot_platform_prot); + + // User can set the MPI_PROT variable + // MPI_PROT which can also be 1 or 2, saying where we want to + // activate from. + char *p = getenv("MPI_PROT"); + int mode = 0; + if( NULL != p ) { + mode = atoi(p); + } + if( 1 == mode || 1 == hook_prot_platform_prot ) { + hook_prot_enable_mpi_init = true; + } + else if( 2 == mode || 2 == hook_prot_platform_prot ) { + hook_prot_enable_mpi_finalize = true; + } + + return OMPI_SUCCESS; +} + diff --git a/ompi/mca/hook/prot/hook_prot_fns.c b/ompi/mca/hook/prot/hook_prot_fns.c new file mode 100644 index 00000000000..4bdd33e3f88 --- /dev/null +++ b/ompi/mca/hook/prot/hook_prot_fns.c @@ -0,0 +1,711 @@ +/* + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "hook_prot.h" + +#ifdef HAVE_DLFCN_H +#include +#endif + +#include "ompi/communicator/communicator.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/pml/base/base.h" + +#define COMM_METHOD_MXM_YALLA 1 /* mxm (via yalla pml) */ +#define COMM_METHOD_MXM_CM 2 /* mxm (via cm pml) */ +#define COMM_METHOD_OFI 3 /* ofi (via cm pml) */ +#define COMM_METHOD_PSM 4 /* psm (via cm pml) */ +#define COMM_METHOD_VADER 5 /* vader (via ob1 pml) */ +#define COMM_METHOD_SM 6 /* sm (via ob1 pml) */ +#define COMM_METHOD_TCP 7 /* tcp (via ob1 pml) */ +#define COMM_METHOD_USNIC 8 /* usnic (via ob1 pml) */ +#define COMM_METHOD_OPENIB 9 /* openib (via ob1 pml) */ +#define COMM_METHOD_SELF 10 /* self (via ob1 pml) */ +#define COMM_METHOD_PAMI 11 /* pami (via pami pml) */ +#define COMM_METHOD_PSM2 12 /* psm2 (via cm pml) */ +#define COMM_METHOD_MAX 12 + +static int comm_method(MPI_Comm comm, int rank); +static char * comm_method_string(int method); +static int icompar(const void *a, const void *b); +static void abbreviate_list_into_string(char *str, int max, int *list, int nlist); +static void ompi_report_prots(int mode); + + +void ompi_hook_prot_mpi_init_bottom(int argc, char **argv, int requested, int *provided) +{ + if( hook_prot_enable_mpi_init ) { + ompi_report_prots( 1 ); + } +} + +void ompi_hook_prot_mpi_finalize_top(void) +{ + if( hook_prot_enable_mpi_finalize ) { + ompi_report_prots( 2 ); + } +} + +// ---------------------------------------------------------------------------- + +static int +comm_method(MPI_Comm comm, int rank) { + void *pml_fp; + void *dlsym_pami_fp; + void *dlsym_yalla_fp; + void *dlsym_ob1_fp; + void *dlsym_cm_fp; + + void (*mca_pml_ob1__lookup_btl_fns_fp)(ompi_communicator_t* comm, int rank, + void **sendfn, void **putfn); + void (*mca_pml_cm__lookup_mtl_fns_fp)(void **finfn); + + +// First figure out which PML (pami,yalla,ob1,cm,v) is used based on +// mca_pml.pml_enable being one of mca_pml_{pami,yalla,ob1,cm}_enable. +// In general there's a certain amount of fragility here because +// if these symbols are static ('t' in nm) dlsym won't see them. + pml_fp = (void*) mca_pml.pml_enable; + dlsym_pami_fp = dlsym(RTLD_DEFAULT, "mca_pml_pami_enable"); + dlsym_yalla_fp = dlsym(RTLD_DEFAULT, "mca_pml_yalla_enable"); + dlsym_ob1_fp = dlsym(RTLD_DEFAULT, "mca_pml_ob1_enable"); + dlsym_cm_fp = dlsym(RTLD_DEFAULT, "mca_pml_cm_enable"); + + if (pml_fp == dlsym_pami_fp) { + return COMM_METHOD_PAMI; + } + else if (pml_fp == dlsym_yalla_fp) { + return COMM_METHOD_MXM_YALLA; + } + else if (pml_fp == dlsym_ob1_fp) { + void *btl_fp, *btl_fp_put; + void *dlsym_vader_send; + void *dlsym_sm_send; + void *dlsym_tcp_send; + void *dlsym_openib_send; + void *dlsym_self_send; + void *dlsym_usnic_put; + + mca_pml_ob1__lookup_btl_fns_fp = + dlsym(RTLD_DEFAULT, "mca_pml_ob1__lookup_btl_fns"); + if (!mca_pml_ob1__lookup_btl_fns_fp) { + return 0; + } + mca_pml_ob1__lookup_btl_fns_fp(comm, rank, + &btl_fp, &btl_fp_put); + if (!btl_fp) { return 0; } + + dlsym_vader_send = dlsym(RTLD_DEFAULT, "mca_btl_vader_send"); + dlsym_sm_send = dlsym(RTLD_DEFAULT, "mca_btl_sm_send"); + dlsym_tcp_send = dlsym(RTLD_DEFAULT, "mca_btl_tcp_send"); + dlsym_openib_send = dlsym(RTLD_DEFAULT, "mca_btl_openib_send"); + dlsym_self_send = dlsym(RTLD_DEFAULT, "mca_btl_self_send"); +// This asymmetry for usnic is due to the fragility mentioned above. +// The btl_send for usnic is usnic_send which is a static so we can't +// see it. But usnic happens to use a global for its put operation. + dlsym_usnic_put = dlsym(RTLD_DEFAULT, "opal_btl_usnic_put"); + + if (btl_fp == dlsym_vader_send) { return COMM_METHOD_VADER; } + else if (btl_fp == dlsym_sm_send) { return COMM_METHOD_SM; } + else if (btl_fp == dlsym_tcp_send) { return COMM_METHOD_TCP; } + else if (btl_fp == dlsym_openib_send) { return COMM_METHOD_OPENIB; } + else if (btl_fp == dlsym_self_send) { return COMM_METHOD_SELF; } + else if (btl_fp_put == dlsym_usnic_put) { return COMM_METHOD_USNIC; } + else { return 0; } + } + else if (pml_fp == dlsym_cm_fp) { + void *mtl_fp; + void *dlsym_mxm_finalize; + void *dlsym_ofi_finalize; + void *dlsym_psm_finalize; + void *dlsym_psm2_finalize; + + mca_pml_cm__lookup_mtl_fns_fp = + dlsym(RTLD_DEFAULT, "mca_pml_cm__lookup_mtl_fns"); + if (!mca_pml_cm__lookup_mtl_fns_fp) { + return 0; + } + mca_pml_cm__lookup_mtl_fns_fp(&mtl_fp); + if (!mtl_fp) { return 0; } + + dlsym_mxm_finalize = dlsym(RTLD_DEFAULT, "ompi_mtl_mxm_finalize"); + dlsym_ofi_finalize = dlsym(RTLD_DEFAULT, "ompi_mtl_ofi_finalize"); + dlsym_psm_finalize = dlsym(RTLD_DEFAULT, "ompi_mtl_psm_finalize"); + dlsym_psm2_finalize = dlsym(RTLD_DEFAULT, "ompi_mtl_psm2_finalize"); + + if (mtl_fp == dlsym_mxm_finalize) { return COMM_METHOD_MXM_CM; } + else if (mtl_fp == dlsym_ofi_finalize) { return COMM_METHOD_OFI; } + else if (mtl_fp == dlsym_psm_finalize) { return COMM_METHOD_PSM; } + else if (mtl_fp == dlsym_psm2_finalize) { return COMM_METHOD_PSM2; } + else { return 0; } + } + else { + // unrecognized PML + return 0; + } + + return 0; +} + +// return abbreviation string +static char * +comm_method_string(int method) +{ + char *use = "--"; + switch (method) { + case 0 : { use = "n/a"; break; } // unconn or unknown + case COMM_METHOD_PAMI : { use = "pami"; break; } + case COMM_METHOD_MXM_YALLA : { use = "mxm"; break; } + case COMM_METHOD_MXM_CM : { use = "mxmc"; break; } + case COMM_METHOD_OFI : { use = "ofi"; break; } + case COMM_METHOD_PSM : { use = "psm"; break; } + case COMM_METHOD_PSM2 : { use = "psm2"; break; } + case COMM_METHOD_VADER : { use = "shm"; break; } + case COMM_METHOD_SM : { use = "sm"; break; } + case COMM_METHOD_TCP : { use = "tcp"; break; } + case COMM_METHOD_USNIC : { use = "usnic"; break; } + case COMM_METHOD_OPENIB : { use = "ib"; break; } + case COMM_METHOD_SELF : { use = "self"; break; } + default : { use = "--"; break; } + } + return use; +} + +static int +icompar(const void *a, const void *b) { + if (*(int*)a < *(int*)b) { return -1; } + if (*(int*)a > *(int*)b) { return 1; } + return 0; +} + +// Input list[] is expected to be sorted +static void +abbreviate_list_into_string(char *str, int max, int *list, int nlist) +{ + int lo, hi; + int i; + int per, tmp; + +/* + * How much space do we need in strings to store rank numbers. + * A 10000 rank run needs more digits to write the rank numbers in than + * a 4 rank job. + */ + per = 1; + tmp = list[nlist-1]; + while (tmp >= 10) { ++per; tmp /= 10; } + + str[0] = 0; + lo = hi = -1; + for (i=0; i hi) { + if (strlen(str)==0 || str[strlen(str)-1] != '.') { + if (strlen(str) != 0) { + strcpy(&str[strlen(str)], ", "); + } + if (lo != hi) { + sprintf(&str[strlen(str)], "%d - %d", lo, hi); + } else { + sprintf(&str[strlen(str)], "%d", lo); + } + } +/* + * If we've almost written to the end of the string, and we haven't + * already written ".." to indicate we're not writing amy more, then + * add the "..". Also set hi=lo=i since the data we just wrote is + * for the previous contiguous chunk, and the current i is the start + * of the next chunk. + */ + if (((int)strlen(str)) >= max - 5 - 2*per + && + (strlen(str) == 0 || str[strlen(str)-1] != '.')) + { + strcpy(&str[strlen(str)], ", .."); + break; + } + hi = lo = list[i]; + } + } + if (strlen(str)==0 || str[strlen(str)-1] != '.') { + if (strlen(str)!=0) { + strcpy(&str[strlen(str)], ", "); + } + if (lo != hi) { + sprintf(&str[strlen(str)], "%d - %d", lo, hi); + } else { + sprintf(&str[strlen(str)], "%d", lo); + } + } +} + +// Input argument tells where we're being called from: +// 1 for init, 2 for finalize. +// The other implicit input is an environment variable we look at. +// When activated from init: we establish connections before printing. +// When activated from finalize: we just print whatever info is available. +static void +ompi_report_prots(int mode) // 1 = from init, 2 = from finalize +{ + int numhosts, i, j, k; + char *p; + int max2Dprottable = 16; + int hostidprotbrief = 0; + char * max2Dprotptr = NULL; + char * hostidprotptr = NULL; + int printTable; + int hpmp_myrank, hpmp_nprocs; + int mylocalrank, nlocalranks, myleaderrank, nleaderranks; + int ret; + ompi_communicator_t *local_comm, *leader_comm; + int *method; + char *hoststring; + char **allhoststrings; + +// early return in the case of spawn + // PMPI_Comm_get_parent(&parent); + if (ompi_mpi_comm_parent != MPI_COMM_NULL) { return; } + + hpmp_myrank = ompi_comm_rank(MPI_COMM_WORLD); + hpmp_nprocs = ompi_comm_size(MPI_COMM_WORLD); + + if (hpmp_myrank == 0) { + max2Dprotptr = getenv("MPI_PROT_MAX"); + if (max2Dprotptr) { max2Dprottable = atoi(max2Dprotptr); } + + hostidprotptr = getenv("MPI_PROT_BRIEF"); + if (hostidprotptr) { hostidprotbrief = atoi(hostidprotptr); } + } + +// Gathering layout data the same way osc_rdma_component.c does + ret = ompi_comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, NULL, + &local_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return; + } + mylocalrank = ompi_comm_rank(local_comm); + nlocalranks = ompi_comm_size(local_comm); + + ret = ompi_comm_split(MPI_COMM_WORLD, + (0 == mylocalrank) ? 0 : MPI_UNDEFINED, + hpmp_myrank, &leader_comm, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + ompi_comm_free(&local_comm); + return; + } + +// Non-host-leaders return early. + if (mylocalrank != 0) { + ompi_comm_free(&local_comm); + return; + } +// ------------------------------------------------- +// Only host-leaders exist from this point on. +// ------------------------------------------------- + myleaderrank = ompi_comm_rank(leader_comm); + nleaderranks = numhosts = ompi_comm_size(leader_comm); + +/* + * Allocate space for each rank to store its communication method + * on a per-host basis. But rank 0 gets enough space to store the + * data for all pairs of hosts. + */ + method = malloc(numhosts * sizeof(int) * (hpmp_myrank?1:numhosts)); + if (!method) { + ompi_comm_free(&local_comm); + ompi_comm_free(&leader_comm); + return; + } + +// Each host leader figures out a string of basic info for its host +// in hoststring. (allocated at all host leaders, can be different sizes) + + { + int len; + int *ranklist; // comm-world ranks contained in local_comm + // sorted into comm-world order (although + // local_comm should already be constructed + // in that way) + int *ranklist_in; + + ompi_group_t *local_group, *world_group; + ompi_comm_group(local_comm, &local_group); + ompi_comm_group(MPI_COMM_WORLD, &world_group); + ranklist = malloc(nlocalranks * sizeof(int) * 2); + ranklist_in = ranklist + nlocalranks; + for (i=0; i 1) { + method[i] = comm_method(local_comm, 1); + } + } + } + +// Gather the strings and the methods at rank 0. +// The gatherv of the strings takes a few steps since we have to get +// the sizes first and allocate the receiving string. + { + int len, *lens, *disps; + + len = strlen(hoststring) + 1; + if (myleaderrank == 0) { + lens = malloc(nleaderranks * sizeof(int)); + disps = malloc(nleaderranks * sizeof(int)); + } + leader_comm->c_coll.coll_gather( + &len, 1, MPI_INT, + lens, 1, MPI_INT, + 0, leader_comm, leader_comm->c_coll.coll_gather_module); + if (myleaderrank == 0) { + int tlen = 0; + char *p; + for (i=0; ic_coll.coll_gatherv( + hoststring, strlen(hoststring) + 1, MPI_CHAR, + &allhoststrings[0][0], lens, disps, MPI_CHAR, + 0, leader_comm, leader_comm->c_coll.coll_gatherv_module); + } else { + // matching above call from rank 0, just &allhoststrings[0][0] + // isn't legal here, and those args aren't used at non-root anyway + leader_comm->c_coll.coll_gatherv( + hoststring, strlen(hoststring) + 1, MPI_CHAR, + NULL, NULL, NULL, MPI_CHAR, + 0, leader_comm, leader_comm->c_coll.coll_gatherv_module); + } + if (myleaderrank == 0) { + free(lens); + free(disps); + } +// and a simpler gather for the methods + leader_comm->c_coll.coll_gather( + method, nleaderranks, MPI_INT, + method, nleaderranks, MPI_INT, + 0, leader_comm, leader_comm->c_coll.coll_gather_module); + } + ompi_comm_free(&local_comm); + ompi_comm_free(&leader_comm); + +// Interception for testing purposes. Let rank-0 meddle with all its method[] +// settings, this is only for testing, eg to make sure the printing comes out +// right. + if (myleaderrank == 0) { + p = getenv("MPI_PROT_FAKEFILE"); + if (p && *p) { + FILE *fp; + int setting; + fp = fopen(p, "r"); + for (i=0; i= 10) { ++per; tmp /= 10; } + for (i=0; i per) { per = tmp+1; } + } + } + + str = malloc(nleaderranks * per + 1); + p = str; + for (i=0; i=str && ((*p)==' ')) { *(p--)=0; } + printf(" host | %s\n", str); + memset(str, (int)'=', tmp); + printf("======|=%s\n", str); + + for (i=0; istr && *p==' ') { *(p--)=0; } + printf("%5d : %s\n", i, str); + } + printf("\n"); + free(str); + } +// 3: abbreviated summary of interconnect and outliers +// - check diagonal for uniformity + self, save majority method +// - check non-diagonal for uniformity, save majority method +// - print ranks with non-majority settings + { + int method_count[COMM_METHOD_MAX + 1]; + int majority_method_onhost; + int majority_method_offhost; + int uniformity_onhost; + int uniformity_offhost; + int any_self = 0; + + printf("Connection summary:\n"); + + majority_method_onhost = -1; + uniformity_onhost = 1; + for (i=0; i<=COMM_METHOD_MAX; ++i) { method_count[i] = 0; } + for (i=0; i 0 && + majority_method_onhost == COMM_METHOD_SELF) + { + majority_method_onhost = i; + } + if (method_count[i] > method_count[majority_method_onhost]) { + if (i != COMM_METHOD_SELF) { + majority_method_onhost = i; + } + } + } + if (method_count[COMM_METHOD_SELF] > 0) { any_self = 1; } + + majority_method_offhost = -1; + uniformity_offhost = 1; + for (i=0; i<=COMM_METHOD_MAX; ++i) { method_count[i] = 0; } + for (i=0; i 0 && majority_method_offhost == 0) { + majority_method_offhost = i; + } + if (method_count[i] > method_count[majority_method_offhost]) { + majority_method_offhost = i; + } + } + + char *all_or_most = "all"; + char *or_self = ""; + if (!uniformity_onhost) { + all_or_most = "most"; + } + if ((majority_method_onhost != COMM_METHOD_SELF) && any_self) { + or_self = " or self"; + } + printf(" on-host: %s connections are %s%s\n", all_or_most, + comm_method_string(majority_method_onhost), or_self); + + all_or_most = "all"; + if (!uniformity_offhost) { + all_or_most = "most"; + } + printf(" off-host: %s connections are %s\n", all_or_most, + comm_method_string(majority_method_offhost)); + + if (!uniformity_onhost || !uniformity_offhost) { + printf("Exceptions:\n"); + for (i=0; i 0) { +// if (!first) { +// strcat(str, " /"); +// } + sprintf(&str[strlen(str)], + " [%dx %s]", + method_count[k], + comm_method_string(k)); +// first = 0; + } + } + printf("%s\n", str); + free(str); + } + } + } + printf("\n"); + } + } + + if (myleaderrank == 0) { + free(allhoststrings); + } + free(method); +} diff --git a/ompi/mca/hook/prot/owner.txt b/ompi/mca/hook/prot/owner.txt new file mode 100644 index 00000000000..2fd247dddb1 --- /dev/null +++ b/ompi/mca/hook/prot/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: IBM +status: active diff --git a/ompi/mca/pml/cm/pml_cm_component.c b/ompi/mca/pml/cm/pml_cm_component.c index 72f79312cf9..0dd4f53016d 100644 --- a/ompi/mca/pml/cm/pml_cm_component.c +++ b/ompi/mca/pml/cm/pml_cm_component.c @@ -12,6 +12,7 @@ * Copyright (c) 2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -168,3 +169,10 @@ mca_pml_cm_component_fini(void) return OMPI_SUCCESS; } +// return the mtl's mtl_finalize function pointer in fp +void +mca_pml_cm__lookup_mtl_fns(void **fp) +{ + *fp = (void*) ompi_mtl->mtl_finalize; + return; +} diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index fc941df0716..aaa95b4bf71 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -18,6 +18,7 @@ * reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -984,3 +985,23 @@ int mca_pml_ob1_com_btl_comp(const void *v1, const void *v2) return 0; } +// Find the first btl_send function for the incoming comm,rank +void +mca_pml_ob1__lookup_btl_fns(ompi_communicator_t* comm, int rank, + void **sendfn, void **putfn) +{ + *sendfn = NULL; + *putfn = NULL; + + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup(comm, rank); + ompi_proc_t *dst_proc = ob1_proc->ompi_proc; + mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint(dst_proc); + if (endpoint && + endpoint->btl_send.bml_btls && + endpoint->btl_send.bml_btls[0].btl) + { + *sendfn = (void*) (endpoint->btl_send.bml_btls[0].btl->btl_send); + *putfn = (void*) (endpoint->btl_send.bml_btls[0].btl->btl_put); + } + return; +} From a760be176d827f23d7f20a00248330435e4b5e4f Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 25 Jan 2017 17:51:57 -0500 Subject: [PATCH 2/2] hook/prot: adding shortened output Normally we print a -prot table up to 16 hosts that looks like this, where 16 can be changed via MPI_PROT_MAX: ``` host | 0 1 2 3 4 5 6 7 8 ======|============================================== 0 : shm ib ib ib ib ib ib ib ib 1 : ib shm ib ib ib ib ib ib ib 2 : ib ib self ib ib ib ib ib ib 3 : ib ib ib self ib ib ib ib ib 4 : ib ib ib ib self ib ib ib ib 5 : ib ib ib ib ib self ib ib ib 6 : ib ib ib ib ib ib self ib ib 7 : ib ib ib ib ib ib ib self ib 8 : ib ib ib ib ib ib ib ib self ``` This checkin reduces MPI_PROT_MAX to 12 but adds a shorter table output that looks like this: ``` host | 0 1 2 3 4 8 ======|==================== 0 : A C C C C C C C C 1 : C A C C C C C C C 2 : C C B C C C C C C 3 : C C C B C C C C C 4 : C C C C B C C C C 5 : C C C C C B C C C 6 : C C C C C C B C C 7 : C C C C C C C B C 8 : C C C C C C C C B key: A == shm key: B == self key: C == ib ``` That is used from 13 up to 36 ranks (or 3*MPI_PROT_MAX). Signed-off-by: Joshua Hursey --- ompi/mca/hook/prot/hook_prot_fns.c | 96 +++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) diff --git a/ompi/mca/hook/prot/hook_prot_fns.c b/ompi/mca/hook/prot/hook_prot_fns.c index 4bdd33e3f88..d5cc5a12ccf 100644 --- a/ompi/mca/hook/prot/hook_prot_fns.c +++ b/ompi/mca/hook/prot/hook_prot_fns.c @@ -268,7 +268,8 @@ ompi_report_prots(int mode) // 1 = from init, 2 = from finalize { int numhosts, i, j, k; char *p; - int max2Dprottable = 16; + int max2Dprottable = 12; + int max2D1Cprottable = 36; int hostidprotbrief = 0; char * max2Dprotptr = NULL; char * hostidprotptr = NULL; @@ -290,7 +291,10 @@ ompi_report_prots(int mode) // 1 = from init, 2 = from finalize if (hpmp_myrank == 0) { max2Dprotptr = getenv("MPI_PROT_MAX"); - if (max2Dprotptr) { max2Dprottable = atoi(max2Dprotptr); } + if (max2Dprotptr) { + max2Dprottable = atoi(max2Dprotptr); + max2D1Cprottable = 3 * max2Dprottable; + } hostidprotptr = getenv("MPI_PROT_BRIEF"); if (hostidprotptr) { hostidprotbrief = atoi(hostidprotptr); } @@ -552,6 +556,94 @@ ompi_report_prots(int mode) // 1 = from init, 2 = from finalize printf("\n"); free(str); } + else if (nleaderranks <= max2D1Cprottable) { + char *str, *p; + int tmp, per, done; + char char_code[COMM_METHOD_MAX + 1], next_char; + int method_count[COMM_METHOD_MAX + 1]; + + // characters for the number column in the 2d table, + // must be large enough for the digits needed for host numbers + per = 2; + tmp = nleaderranks; + while (tmp >= 10) { ++per; tmp /= 10; } + + // pick a character code for each comm method based on + // how many times it's in the table, use 'A' for the least common + for (i=0; i<=COMM_METHOD_MAX; ++i) { + char_code[i] = 0; + method_count[i] = 0; + } + for (i=0; i=str && ((*p)==' ')) { *(p--)=0; } + tmp = (int)strlen(str) + 2; + printf(" host | %s\n", str); + memset(str, (int)'=', tmp); + printf("======|=%s\n", str); + + for (i=0; istr && *p==' ') { *(p--)=0; } + printf("%5d : %s\n", i, str); + } + free(str); + for (i=0; i<=COMM_METHOD_MAX; ++i) { + for (k=0; k<=COMM_METHOD_MAX; ++k) { + if (char_code[k] == 'A' + i) { + printf("key: %c == %s\n", char_code[k], + comm_method_string(k)); + } + } + } + printf("\n"); + } // 3: abbreviated summary of interconnect and outliers // - check diagonal for uniformity + self, save majority method // - check non-diagonal for uniformity, save majority method