Skip to content

Commit c93f38d

Browse files
committed
MPI 4: Add MPI_COMM_TYPE_HW_UNGUIDED and MPI_COMM_TYPE_HW_GUIDED
* `MPI_COMM_TYPE_HW_GUIDED` supports all of the existing `OMPI_COMM_TYPE_` options. * `MPI_COMM_TYPE_HW_UNGUIDED` is recognized, but not supported so it returns `MPI_COMM_NULL` indidicating that the MPI library cannot split the communicator any further. Signed-off-by: Joshua Hursey <[email protected]>
1 parent d32ce3f commit c93f38d

File tree

4 files changed

+142
-1
lines changed

4 files changed

+142
-1
lines changed

ompi/communicator/comm.c

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,29 @@
5757

5858
#include "ompi/runtime/params.h"
5959

60+
#if MPI_VERSION >= 4
61+
struct ompi_comm_split_type_hw_guided_t {
62+
const char *info_value;
63+
int split_type;
64+
};
65+
typedef struct ompi_comm_split_type_hw_guided_t ompi_comm_split_type_hw_guided_t;
66+
static const ompi_comm_split_type_hw_guided_t ompi_comm_split_type_hw_guided_support[] = {
67+
{.info_value = "mpi_shared_memory", .split_type = MPI_COMM_TYPE_SHARED},
68+
{.info_value = "hwthread", .split_type = OMPI_COMM_TYPE_HWTHREAD},
69+
{.info_value = "core", .split_type = OMPI_COMM_TYPE_CORE},
70+
{.info_value = "l1cache", .split_type = OMPI_COMM_TYPE_L1CACHE},
71+
{.info_value = "l2cache", .split_type = OMPI_COMM_TYPE_L2CACHE},
72+
{.info_value = "l3cache", .split_type = OMPI_COMM_TYPE_L3CACHE},
73+
{.info_value = "socket", .split_type = OMPI_COMM_TYPE_SOCKET},
74+
{.info_value = "numanode", .split_type = OMPI_COMM_TYPE_NUMA},
75+
{.info_value = "board", .split_type = OMPI_COMM_TYPE_BOARD},
76+
{.info_value = "host", .split_type = OMPI_COMM_TYPE_HOST},
77+
{.info_value = "cu", .split_type = OMPI_COMM_TYPE_CU},
78+
{.info_value = "cluster", .split_type = OMPI_COMM_TYPE_CLUSTER},
79+
{.info_value = NULL},
80+
};
81+
#endif
82+
6083
/*
6184
** sort-function for MPI_Comm_split
6285
*/
@@ -764,6 +787,17 @@ static int ompi_comm_split_type_get_part (ompi_group_t *group, const int split_t
764787
case OMPI_COMM_TYPE_CLUSTER:
765788
include = OPAL_PROC_ON_LOCAL_CLUSTER(locality);
766789
break;
790+
#if MPI_VERSION >= 4
791+
case MPI_COMM_TYPE_HW_GUIDED:
792+
case MPI_COMM_TYPE_HW_UNGUIDED:
793+
/*
794+
* MPI_COMM_TYPE_HW_(UN)GUIDED handled in calling function.
795+
* We should not get here as the split type will be changed
796+
* at a higher level.
797+
*/
798+
opal_output(0, "Error: in ompi_comm_split_type_get_part() unexpected split_type=%d", split_type);
799+
return OMPI_ERR_BAD_PARAM;
800+
#endif
767801
}
768802

769803
if (include) {
@@ -899,6 +933,75 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
899933
return OMPI_SUCCESS;
900934
}
901935

936+
#if MPI_VERSION >= 4
937+
if (MPI_COMM_TYPE_HW_GUIDED == global_split_type) {
938+
int flag;
939+
opal_cstring_t *value = NULL;
940+
941+
opal_info_get(info, "mpi_hw_resource_type", &value, &flag);
942+
/* If key is not in the 'info', then return MPI_COMM_NULL.
943+
* This is caught at the MPI interface level, but it doesn't hurt to
944+
* check it again.
945+
*/
946+
if (!flag) {
947+
*newcomm = MPI_COMM_NULL;
948+
return OMPI_SUCCESS;
949+
}
950+
951+
/* Verify the value associated with the "mpi_hw_resource_type" key
952+
* - is supported, and
953+
* - is the same value at all ranks
954+
*
955+
* If not supported, then return MPI_COMM_NULL.
956+
* If not the same at all ranks, throw an error.
957+
*/
958+
flag = 0;
959+
for (int i = 0; ompi_comm_split_type_hw_guided_support[i].info_value; ++i) {
960+
if (0 == strncasecmp(value->string, ompi_comm_split_type_hw_guided_support[i].info_value, strlen(ompi_comm_split_type_hw_guided_support[i].info_value))) {
961+
global_split_type = ompi_comm_split_type_hw_guided_support[i].split_type;
962+
flag = 1;
963+
break;
964+
}
965+
}
966+
/* If not supported, then return MPI_COMM_NULL. */
967+
if (0 == flag) {
968+
*newcomm = MPI_COMM_NULL;
969+
return OMPI_SUCCESS;
970+
}
971+
972+
/* Verify all ranks have supplied the same info 'value' represented at
973+
* this point by global_split_type since we swapped out 'MPI_COMM_TYPE_HW_GUIDED'
974+
* for the specific OMPI_COMM_TYPE_ represented by the info 'value'.
975+
*/
976+
tmp[0] = global_split_type;
977+
tmp[1] = -global_split_type;
978+
rc = comm->c_coll->coll_allreduce (MPI_IN_PLACE, &tmp, 2, MPI_INT, MPI_MAX, comm,
979+
comm->c_coll->coll_allreduce_module);
980+
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
981+
return rc;
982+
}
983+
/* If not the same at all ranks, throw an error. */
984+
if (tmp[0] != -tmp[1]) {
985+
if (0 == ompi_comm_rank(comm)) {
986+
opal_output(0, "Error: Mismatched info values for MPI_COMM_TYPE_HW_GUIDED");
987+
}
988+
return OMPI_ERR_BAD_PARAM;
989+
}
990+
}
991+
992+
/* TODO: Make this better...
993+
*
994+
* See Example 7.4 in the MPI 4.0 standard for example usage.
995+
*
996+
* Stage 0: Recognized, but not implemented.
997+
* Stage 1: Do better than that
998+
*/
999+
if (MPI_COMM_TYPE_HW_UNGUIDED == global_split_type) {
1000+
*newcomm = MPI_COMM_NULL;
1001+
return OMPI_SUCCESS;
1002+
}
1003+
#endif
1004+
9021005
/* Step 2: Build potential communicator groups. If any ranks will not be part of
9031006
* the ultimate communicator we will drop them later. This saves doing an extra
9041007
* allgather on the whole communicator. By using ompi_comm_split() later only

ompi/include/mpi.h.in

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,11 @@ enum {
852852
OMPI_COMM_TYPE_BOARD,
853853
OMPI_COMM_TYPE_HOST,
854854
OMPI_COMM_TYPE_CU,
855-
OMPI_COMM_TYPE_CLUSTER
855+
OMPI_COMM_TYPE_CLUSTER,
856+
#if MPI_VERSION >= 4
857+
MPI_COMM_TYPE_HW_UNGUIDED,
858+
MPI_COMM_TYPE_HW_GUIDED
859+
#endif
856860
};
857861
#define OMPI_COMM_TYPE_NODE MPI_COMM_TYPE_SHARED
858862

ompi/include/mpif-values.pl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,10 @@ sub write_file {
395395
$constants->{OMPI_COMM_TYPE_HOST} = 9;
396396
$constants->{OMPI_COMM_TYPE_CU} = 10;
397397
$constants->{OMPI_COMM_TYPE_CLUSTER} = 11;
398+
if ($constants->{MPI_VERSION} >= 4) {
399+
$constants->{MPI_COMM_TYPE_HW_UNGUIDED} = 12;
400+
$constants->{MPI_COMM_TYPE_HW_GUIDED} = 13;
401+
}
398402

399403
#----------------------------------------------------------------------------
400404

ompi/mpi/c/comm_split_type.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key,
7777
OMPI_COMM_TYPE_L1CACHE != split_type &&
7878
OMPI_COMM_TYPE_CORE != split_type &&
7979
OMPI_COMM_TYPE_HWTHREAD != split_type &&
80+
#if MPI_VERSION >= 4
81+
MPI_COMM_TYPE_HW_UNGUIDED != split_type &&
82+
MPI_COMM_TYPE_HW_GUIDED != split_type &&
83+
#endif
8084
MPI_UNDEFINED != split_type ) {
8185
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG,
8286
FUNC_NAME);
@@ -99,6 +103,32 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key,
99103
}
100104
#endif
101105

106+
#if MPI_VERSION >= 4
107+
if ( MPI_COMM_TYPE_HW_GUIDED == split_type ) {
108+
int flag;
109+
opal_cstring_t *value = NULL;
110+
111+
/* MPI_Info is required for this split_type.
112+
* Not an error condition, per MPI 4.0.
113+
*/
114+
if ( MPI_INFO_NULL == info ) {
115+
*newcomm = MPI_COMM_NULL;
116+
rc = MPI_SUCCESS;
117+
OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME);
118+
}
119+
120+
/* MPI_Info with key "mpi_hw_resource_type" is required for this split_type.
121+
* Not an error condition, per MPI 4.0.
122+
*/
123+
ompi_info_get(info, "mpi_hw_resource_type", &value, &flag);
124+
if ( !flag ) {
125+
*newcomm = MPI_COMM_NULL;
126+
rc = MPI_SUCCESS;
127+
OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME);
128+
}
129+
}
130+
#endif
131+
102132
if( (MPI_COMM_SELF == comm) && (MPI_UNDEFINED == split_type) ) {
103133
*newcomm = MPI_COMM_NULL;
104134
rc = MPI_SUCCESS;

0 commit comments

Comments
 (0)