Skip to content

Commit 3e50202

Browse files
committed
MPI 4: Add MPI_COMM_TYPE_HW_UNGUIDED and MPI_COMM_TYPE_HW_GUIDED
* `MPI_COMM_TYPE_HW_GUIDED` supports all of the existing `OMPI_COMM_TYPE_` options. * `MPI_COMM_TYPE_HW_UNGUIDED` is recognized, but not supported so it returns `MPI_COMM_NULL` indidicating that the MPI library cannot split the communicator any further. Signed-off-by: Joshua Hursey <[email protected]>
1 parent d32ce3f commit 3e50202

File tree

4 files changed

+133
-4
lines changed

4 files changed

+133
-4
lines changed

ompi/communicator/comm.c

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
* and Technology (RIST). All rights reserved.
2323
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
2424
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
25-
* Copyright (c) 2017 IBM Corporation. All rights reserved.
25+
* Copyright (c) 2017-2022 IBM Corporation. All rights reserved.
2626
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
2727
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights
2828
* reserved.
@@ -57,6 +57,28 @@
5757

5858
#include "ompi/runtime/params.h"
5959

60+
struct ompi_comm_split_type_hw_guided_t {
61+
const char *info_value;
62+
int split_type;
63+
};
64+
typedef struct ompi_comm_split_type_hw_guided_t ompi_comm_split_type_hw_guided_t;
65+
66+
static const ompi_comm_split_type_hw_guided_t ompi_comm_split_type_hw_guided_support[] = {
67+
{.info_value = "mpi_shared_memory", .split_type = MPI_COMM_TYPE_SHARED},
68+
{.info_value = "hwthread", .split_type = OMPI_COMM_TYPE_HWTHREAD},
69+
{.info_value = "core", .split_type = OMPI_COMM_TYPE_CORE},
70+
{.info_value = "l1cache", .split_type = OMPI_COMM_TYPE_L1CACHE},
71+
{.info_value = "l2cache", .split_type = OMPI_COMM_TYPE_L2CACHE},
72+
{.info_value = "l3cache", .split_type = OMPI_COMM_TYPE_L3CACHE},
73+
{.info_value = "socket", .split_type = OMPI_COMM_TYPE_SOCKET},
74+
{.info_value = "numanode", .split_type = OMPI_COMM_TYPE_NUMA},
75+
{.info_value = "board", .split_type = OMPI_COMM_TYPE_BOARD},
76+
{.info_value = "host", .split_type = OMPI_COMM_TYPE_HOST},
77+
{.info_value = "cu", .split_type = OMPI_COMM_TYPE_CU},
78+
{.info_value = "cluster", .split_type = OMPI_COMM_TYPE_CLUSTER},
79+
{.info_value = NULL},
80+
};
81+
6082
/*
6183
** sort-function for MPI_Comm_split
6284
*/
@@ -764,6 +786,15 @@ static int ompi_comm_split_type_get_part (ompi_group_t *group, const int split_t
764786
case OMPI_COMM_TYPE_CLUSTER:
765787
include = OPAL_PROC_ON_LOCAL_CLUSTER(locality);
766788
break;
789+
case MPI_COMM_TYPE_HW_GUIDED:
790+
case MPI_COMM_TYPE_HW_UNGUIDED:
791+
/*
792+
* MPI_COMM_TYPE_HW_(UN)GUIDED handled in calling function.
793+
* We should not get here as the split type will be changed
794+
* at a higher level.
795+
*/
796+
opal_output(0, "Error: in ompi_comm_split_type_get_part() unexpected split_type=%d", split_type);
797+
return OMPI_ERR_BAD_PARAM;
767798
}
768799

769800
if (include) {
@@ -899,6 +930,73 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
899930
return OMPI_SUCCESS;
900931
}
901932

933+
if (MPI_COMM_TYPE_HW_GUIDED == global_split_type) {
934+
int flag;
935+
opal_cstring_t *value = NULL;
936+
937+
opal_info_get(info, "mpi_hw_resource_type", &value, &flag);
938+
/* If key is not in the 'info', then return MPI_COMM_NULL.
939+
* This is caught at the MPI interface level, but it doesn't hurt to
940+
* check it again.
941+
*/
942+
if (!flag) {
943+
*newcomm = MPI_COMM_NULL;
944+
return OMPI_SUCCESS;
945+
}
946+
947+
/* Verify the value associated with the "mpi_hw_resource_type" key
948+
* - is supported, and
949+
* - is the same value at all ranks
950+
*
951+
* If not supported, then return MPI_COMM_NULL.
952+
* If not the same at all ranks, throw an error.
953+
*/
954+
flag = 0;
955+
for (int i = 0; ompi_comm_split_type_hw_guided_support[i].info_value; ++i) {
956+
if (0 == strncasecmp(value->string, ompi_comm_split_type_hw_guided_support[i].info_value, strlen(ompi_comm_split_type_hw_guided_support[i].info_value))) {
957+
global_split_type = ompi_comm_split_type_hw_guided_support[i].split_type;
958+
flag = 1;
959+
break;
960+
}
961+
}
962+
/* If not supported, then return MPI_COMM_NULL. */
963+
if (0 == flag) {
964+
*newcomm = MPI_COMM_NULL;
965+
return OMPI_SUCCESS;
966+
}
967+
968+
/* Verify all ranks have supplied the same info 'value' represented at
969+
* this point by global_split_type since we swapped out 'MPI_COMM_TYPE_HW_GUIDED'
970+
* for the specific OMPI_COMM_TYPE_ represented by the info 'value'.
971+
*/
972+
tmp[0] = global_split_type;
973+
tmp[1] = -global_split_type;
974+
rc = comm->c_coll->coll_allreduce (MPI_IN_PLACE, &tmp, 2, MPI_INT, MPI_MAX, comm,
975+
comm->c_coll->coll_allreduce_module);
976+
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
977+
return rc;
978+
}
979+
/* If not the same at all ranks, throw an error. */
980+
if (tmp[0] != -tmp[1]) {
981+
if (0 == ompi_comm_rank(comm)) {
982+
opal_output(0, "Error: Mismatched info values for MPI_COMM_TYPE_HW_GUIDED");
983+
}
984+
return OMPI_ERR_BAD_PARAM;
985+
}
986+
}
987+
988+
/* TODO: Make this better...
989+
*
990+
* See Example 7.4 in the MPI 4.0 standard for example usage.
991+
*
992+
* Stage 0: Recognized, but not implemented.
993+
* Stage 1: Do better than that
994+
*/
995+
if (MPI_COMM_TYPE_HW_UNGUIDED == global_split_type) {
996+
*newcomm = MPI_COMM_NULL;
997+
return OMPI_SUCCESS;
998+
}
999+
9021000
/* Step 2: Build potential communicator groups. If any ranks will not be part of
9031001
* the ultimate communicator we will drop them later. This saves doing an extra
9041002
* allgather on the whole communicator. By using ompi_comm_split() later only

ompi/include/mpi.h.in

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* Copyright (c) 2015 University of Houston. All rights reserved.
2020
* Copyright (c) 2015-2021 Research Organization for Information Science
2121
* and Technology (RIST). All rights reserved.
22-
* Copyright (c) 2017-2019 IBM Corporation. All rights reserved.
22+
* Copyright (c) 2017-2022 IBM Corporation. All rights reserved.
2323
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
2424
* Copyright (c) 2021-2022 Google, LLC. All rights reserved.
2525
* Copyright (c) 2021-2022 Amazon.com, Inc. or its affiliates. All Rights
@@ -852,7 +852,9 @@ enum {
852852
OMPI_COMM_TYPE_BOARD,
853853
OMPI_COMM_TYPE_HOST,
854854
OMPI_COMM_TYPE_CU,
855-
OMPI_COMM_TYPE_CLUSTER
855+
OMPI_COMM_TYPE_CLUSTER,
856+
MPI_COMM_TYPE_HW_UNGUIDED,
857+
MPI_COMM_TYPE_HW_GUIDED
856858
};
857859
#define OMPI_COMM_TYPE_NODE MPI_COMM_TYPE_SHARED
858860

ompi/include/mpif-values.pl

100755100644
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# Copyright (c) 2020 The University of Tennessee and The University
88
# of Tennessee Research Foundation. All rights
99
# reserved.
10+
# Copyright (c) 2022 IBM Corporation. All rights reserved.
1011
# $COPYRIGHT$
1112
#
1213
# Additional copyrights may follow
@@ -395,6 +396,8 @@ sub write_file {
395396
$constants->{OMPI_COMM_TYPE_HOST} = 9;
396397
$constants->{OMPI_COMM_TYPE_CU} = 10;
397398
$constants->{OMPI_COMM_TYPE_CLUSTER} = 11;
399+
$constants->{MPI_COMM_TYPE_HW_UNGUIDED} = 12;
400+
$constants->{MPI_COMM_TYPE_HW_GUIDED} = 13;
398401

399402
#----------------------------------------------------------------------------
400403

ompi/mpi/c/comm_split_type.c

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
1515
* Copyright (c) 2015 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
17-
* Copyright (c) 2017 IBM Corporation. All rights reserved.
17+
* Copyright (c) 2017-2022 IBM Corporation. All rights reserved.
1818
* $COPYRIGHT$
1919
*
2020
* Additional copyrights may follow
@@ -65,6 +65,8 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key,
6565
}
6666

6767
if ( MPI_COMM_TYPE_SHARED != split_type && // Same as OMPI_COMM_TYPE_NODE
68+
MPI_COMM_TYPE_HW_UNGUIDED != split_type &&
69+
MPI_COMM_TYPE_HW_GUIDED != split_type &&
6870
OMPI_COMM_TYPE_CLUSTER != split_type &&
6971
OMPI_COMM_TYPE_CU != split_type &&
7072
OMPI_COMM_TYPE_HOST != split_type &&
@@ -99,6 +101,30 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key,
99101
}
100102
#endif
101103

104+
if ( MPI_COMM_TYPE_HW_GUIDED == split_type ) {
105+
int flag;
106+
opal_cstring_t *value = NULL;
107+
108+
/* MPI_Info is required for this split_type.
109+
* Not an error condition, per MPI 4.0.
110+
*/
111+
if ( MPI_INFO_NULL == info ) {
112+
*newcomm = MPI_COMM_NULL;
113+
rc = MPI_SUCCESS;
114+
OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME);
115+
}
116+
117+
/* MPI_Info with key "mpi_hw_resource_type" is required for this split_type.
118+
* Not an error condition, per MPI 4.0.
119+
*/
120+
ompi_info_get(info, "mpi_hw_resource_type", &value, &flag);
121+
if ( !flag ) {
122+
*newcomm = MPI_COMM_NULL;
123+
rc = MPI_SUCCESS;
124+
OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME);
125+
}
126+
}
127+
102128
if( (MPI_COMM_SELF == comm) && (MPI_UNDEFINED == split_type) ) {
103129
*newcomm = MPI_COMM_NULL;
104130
rc = MPI_SUCCESS;

0 commit comments

Comments
 (0)