Skip to content

Commit d44e532

Browse files
committed
Add acoll collective component
acoll is a collective component optimized for AMD "Zen"-based processors. It supports Bcast, Allreduce, Reduce, Barrier, Gather and Allgather APIs. Signed-off-by: Nithya V S <[email protected]>
1 parent 8ecda52 commit d44e532

14 files changed

+4175
-0
lines changed

ompi/mca/coll/acoll/LICENSE.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
2+
3+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
4+
5+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
6+
7+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8+
9+
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10+
11+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

ompi/mca/coll/acoll/Makefile.am

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#
2+
# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
3+
# $COPYRIGHT$
4+
#
5+
# Additional copyrights may follow
6+
#
7+
# $HEADER$
8+
#
9+
10+
AM_CPPFLAGS = $(coll_acoll_CPPFLAGS)
11+
12+
sources = \
13+
coll_acoll.h \
14+
coll_acoll_utils.h \
15+
coll_acoll_allgather.c \
16+
coll_acoll_bcast.c \
17+
coll_acoll_gather.c \
18+
coll_acoll_reduce.c \
19+
coll_acoll_allreduce.c \
20+
coll_acoll_barrier.c \
21+
coll_acoll_component.c \
22+
coll_acoll_module.c
23+
24+
# Make the output library in this directory, and name it either
25+
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
26+
# (for static builds).
27+
28+
if MCA_BUILD_ompi_coll_acoll_DSO
29+
component_noinst =
30+
component_install = mca_coll_acoll.la
31+
else
32+
component_noinst = libmca_coll_acoll.la
33+
component_install =
34+
endif
35+
36+
mcacomponentdir = $(ompilibdir)
37+
mcacomponent_LTLIBRARIES = $(component_install)
38+
mca_coll_acoll_la_SOURCES = $(sources)
39+
mca_coll_acoll_la_LDFLAGS = -module -avoid-version $(coll_acoll_LDFLAGS)
40+
mca_coll_acoll_la_LIBADD = $(top_builddir)/ompi/lib@[email protected] $(coll_acoll_LIBS)
41+
42+
noinst_LTLIBRARIES = $(component_noinst)
43+
libmca_coll_acoll_la_SOURCES =$(sources)
44+
libmca_coll_acoll_la_LIBADD = $(coll_acoll_LIBS)
45+
libmca_coll_acoll_la_LDFLAGS = -module -avoid-version $(coll_acoll_LDFLAGS)

ompi/mca/coll/acoll/README

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights
2+
reserved.
3+
4+
$COPYRIGHT$
5+
6+
Additional copyrights may follow
7+
8+
$HEADER$
9+
10+
===========================================================================
11+
12+
The collective component, AMD Coll (“acoll”), is a high-performant MPI collective component for the OpenMPI library that is optimized for AMD "Zen"-based processors. A collective component comprises of a collection of algorithms (collectives) that represent a widely used set of communication patterns in MPI-based applications, where a group of processes participates in computation and data movement related operations. “acoll” is optimized for communications within a single node of AMD “Zen”-based processors and provides the following commonly used collective algorithms: boardcast (MPI_Bcast), allreduce (MPI_Allreduce), reduce (MPI_Reduce), gather (MPI_Gather), allgather (MPI_Allgather), and barrier (MPI_Barrier).
13+
14+
At present, “acoll” has been tested with OpenMPI v5.0.2 and can be built as part of OpenMPI.
15+
16+
To run an application with acoll, use the following command line parameters
17+
- mpirun <common mpi runtime parameters> --mca coll acoll,tuned,libnbc,basic <executable>

ompi/mca/coll/acoll/coll_acoll.h

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
4+
* $COPYRIGHT$
5+
*
6+
* Additional copyrights may follow
7+
*
8+
* $HEADER$
9+
*/
10+
11+
#ifndef MCA_COLL_ACOLL_EXPORT_H
12+
#define MCA_COLL_ACOLL_EXPORT_H
13+
14+
#include "ompi_config.h"
15+
16+
#include "mpi.h"
17+
#include "ompi/communicator/communicator.h"
18+
#include "ompi/mca/coll/base/coll_base_functions.h"
19+
#include "ompi/mca/coll/coll.h"
20+
#include "ompi/mca/mca.h"
21+
#include "ompi/request/request.h"
22+
23+
#ifdef HAVE_XPMEM_H
24+
# include "opal/mca/rcache/base/base.h"
25+
# include <xpmem.h>
26+
#endif
27+
28+
#include "opal/mca/shmem/base/base.h"
29+
#include "opal/mca/shmem/shmem.h"
30+
31+
BEGIN_C_DECLS
32+
33+
/* Globally exported variables */
34+
OMPI_DECLSPEC extern const mca_coll_base_component_2_4_0_t mca_coll_acoll_component;
35+
extern int mca_coll_acoll_priority;
36+
extern int mca_coll_acoll_sg_size;
37+
extern int mca_coll_acoll_sg_scale;
38+
extern int mca_coll_acoll_node_size;
39+
extern int mca_coll_acoll_use_dynamic_rules;
40+
extern int mca_coll_acoll_mnode_enable;
41+
extern int mca_coll_acoll_bcast_lin0;
42+
extern int mca_coll_acoll_bcast_lin1;
43+
extern int mca_coll_acoll_bcast_lin2;
44+
extern int mca_coll_acoll_bcast_nonsg;
45+
extern int mca_coll_acoll_allgather_lin;
46+
extern int mca_coll_acoll_allgather_ring_1;
47+
48+
/* API functions */
49+
int mca_coll_acoll_init_query(bool enable_progress_threads, bool enable_mpi_threads);
50+
mca_coll_base_module_t *mca_coll_acoll_comm_query(struct ompi_communicator_t *comm, int *priority);
51+
52+
int mca_coll_acoll_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm);
53+
54+
int mca_coll_acoll_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
55+
void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
56+
struct ompi_communicator_t *comm, mca_coll_base_module_t *module);
57+
58+
int mca_coll_acoll_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root,
59+
struct ompi_communicator_t *comm, mca_coll_base_module_t *module);
60+
61+
int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
62+
void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root,
63+
struct ompi_communicator_t *comm, mca_coll_base_module_t *module);
64+
65+
int mca_coll_acoll_reduce_intra(const void *sbuf, void *rbuf, int count,
66+
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root,
67+
struct ompi_communicator_t *comm, mca_coll_base_module_t *module);
68+
69+
int mca_coll_acoll_allreduce_intra(const void *sbuf, void *rbuf, int count,
70+
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
71+
struct ompi_communicator_t *comm,
72+
mca_coll_base_module_t *module);
73+
74+
int mca_coll_acoll_barrier_intra(struct ompi_communicator_t *comm, mca_coll_base_module_t *module);
75+
76+
int mca_coll_acoll_ft_event(int status);
77+
78+
END_C_DECLS
79+
80+
#define MCA_COLL_ACOLL_MAX_CID 100
81+
#define MCA_COLL_ACOLL_ROOT_CHANGE_THRESH 10
82+
83+
typedef enum MCA_COLL_ACOLL_SG_SIZES {
84+
MCA_COLL_ACOLL_SG_SIZE_1 = 8,
85+
MCA_COLL_ACOLL_SG_SIZE_2 = 16
86+
} MCA_COLL_ACOLL_SG_SIZES;
87+
88+
typedef enum MCA_COLL_ACOLL_SG_SCALES {
89+
MCA_COLL_ACOLL_SG_SCALE_1 = 1,
90+
MCA_COLL_ACOLL_SG_SCALE_2 = 2,
91+
MCA_COLL_ACOLL_SG_SCALE_3 = 4,
92+
MCA_COLL_ACOLL_SG_SCALE_4 = 8,
93+
MCA_COLL_ACOLL_SG_SCALE_5 = 16
94+
} MCA_COLL_ACOLL_SG_SCALES;
95+
96+
typedef enum MCA_COLL_ACOLL_SUBCOMMS {
97+
MCA_COLL_ACOLL_NODE_L = 0,
98+
MCA_COLL_ACOLL_INTRA,
99+
MCA_COLL_ACOLL_SOCK_L,
100+
MCA_COLL_ACOLL_NUMA_L,
101+
MCA_COLL_ACOLL_L3_L,
102+
MCA_COLL_ACOLL_LEAF,
103+
MCA_COLL_ACOLL_NUM_SC
104+
} MCA_COLL_ACOLL_SUBCOMMS;
105+
106+
typedef enum MCA_COLL_ACOLL_LAYERS {
107+
MCA_COLL_ACOLL_LYR_NODE = 0,
108+
MCA_COLL_ACOLL_LYR_SOCKET,
109+
MCA_COLL_ACOLL_NUM_LAYERS
110+
} MCA_COLL_ACOLL_LAYERS;
111+
112+
typedef enum MCA_COLL_ACOLL_BASE_LYRS {
113+
MCA_COLL_ACOLL_L3CACHE = 0,
114+
MCA_COLL_ACOLL_NUMA,
115+
MCA_COLL_ACOLL_NUM_BASE_LYRS
116+
} MCA_COLL_ACOLL_BASE_LYRS;
117+
118+
typedef struct coll_acoll_data {
119+
#ifdef HAVE_XPMEM_H
120+
xpmem_segid_t *allseg_id;
121+
xpmem_apid_t *all_apid;
122+
void **allshm_sbuf;
123+
void **allshm_rbuf;
124+
void **xpmem_saddr;
125+
void **xpmem_raddr;
126+
mca_rcache_base_module_t **rcache;
127+
void *scratch;
128+
#endif
129+
opal_shmem_ds_t *allshmseg_id;
130+
void **allshmmmap_sbuf;
131+
132+
int comm_size;
133+
int l1_local_rank;
134+
int l2_local_rank;
135+
int l1_gp_size;
136+
int *l1_gp;
137+
int *l2_gp;
138+
int l2_gp_size;
139+
int offset[4];
140+
int sync[2];
141+
} coll_acoll_data_t;
142+
143+
typedef struct coll_acoll_subcomms {
144+
ompi_communicator_t *local_comm;
145+
ompi_communicator_t *local_r_comm;
146+
ompi_communicator_t *leader_comm;
147+
ompi_communicator_t *subgrp_comm;
148+
ompi_communicator_t *numa_comm;
149+
ompi_communicator_t *base_comm[MCA_COLL_ACOLL_NUM_BASE_LYRS][MCA_COLL_ACOLL_NUM_LAYERS];
150+
ompi_communicator_t *orig_comm;
151+
ompi_communicator_t *socket_comm;
152+
ompi_communicator_t *socket_ldr_comm;
153+
int num_nodes;
154+
int derived_node_size;
155+
int is_root_node;
156+
int is_root_sg;
157+
int is_root_numa;
158+
int is_root_socket;
159+
int local_root[MCA_COLL_ACOLL_NUM_LAYERS];
160+
int outer_grp_root;
161+
int subgrp_root;
162+
int numa_root;
163+
int socket_ldr_root;
164+
int base_root[MCA_COLL_ACOLL_NUM_BASE_LYRS][MCA_COLL_ACOLL_NUM_LAYERS];
165+
int base_rank[MCA_COLL_ACOLL_NUM_BASE_LYRS];
166+
int socket_rank;
167+
int subgrp_size;
168+
int initialized;
169+
int prev_init_root;
170+
int num_root_change;
171+
172+
ompi_communicator_t *numa_comm_ldrs;
173+
ompi_communicator_t *node_comm;
174+
ompi_communicator_t *inter_comm;
175+
int cid;
176+
coll_acoll_data_t *data;
177+
bool initialized_data;
178+
bool initialized_shm_data;
179+
#ifdef HAVE_XPMEM_H
180+
uint64_t xpmem_buf_size;
181+
int without_xpmem;
182+
int xpmem_use_sr_buf;
183+
#endif
184+
185+
} coll_acoll_subcomms_t;
186+
187+
typedef struct coll_acoll_reserve_mem {
188+
void *reserve_mem;
189+
int64_t reserve_mem_size;
190+
bool reserve_mem_allocate;
191+
bool reserve_mem_in_use;
192+
} coll_acoll_reserve_mem_t;
193+
194+
struct mca_coll_acoll_module_t {
195+
mca_coll_base_module_t super;
196+
MCA_COLL_ACOLL_SG_SIZES sg_size;
197+
MCA_COLL_ACOLL_SG_SCALES sg_scale;
198+
int sg_cnt;
199+
// Todo: Remove log2 variables
200+
int log2_sg_cnt;
201+
int node_cnt;
202+
int log2_node_cnt;
203+
int use_dyn_rules;
204+
// Todo: Use substructure for every API related ones
205+
int use_mnode;
206+
int use_lin0;
207+
int use_lin1;
208+
int use_lin2;
209+
int mnode_sg_size;
210+
int mnode_log2_sg_size;
211+
int allg_lin;
212+
int allg_ring;
213+
coll_acoll_subcomms_t subc[MCA_COLL_ACOLL_MAX_CID];
214+
coll_acoll_reserve_mem_t reserve_mem_s;
215+
};
216+
217+
#ifdef HAVE_XPMEM_H
218+
struct acoll_xpmem_rcache_reg_t {
219+
mca_rcache_base_registration_t base;
220+
void *xpmem_vaddr;
221+
};
222+
#endif
223+
224+
typedef struct mca_coll_acoll_module_t mca_coll_acoll_module_t;
225+
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_acoll_module_t);
226+
227+
#endif /* MCA_COLL_ACOLL_EXPORT_H */

0 commit comments

Comments
 (0)