-
Notifications
You must be signed in to change notification settings - Fork 900
Add the acoll component #12484
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Add the acoll component #12484
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# | ||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. | ||
# $COPYRIGHT$ | ||
# | ||
# Additional copyrights may follow | ||
# | ||
# $HEADER$ | ||
# | ||
|
||
AM_CPPFLAGS = $(coll_acoll_CPPFLAGS) | ||
|
||
sources = \ | ||
coll_acoll.h \ | ||
coll_acoll_utils.h \ | ||
coll_acoll_allgather.c \ | ||
coll_acoll_bcast.c \ | ||
coll_acoll_gather.c \ | ||
coll_acoll_reduce.c \ | ||
coll_acoll_allreduce.c \ | ||
coll_acoll_barrier.c \ | ||
coll_acoll_component.c \ | ||
coll_acoll_module.c | ||
|
||
# Make the output library in this directory, and name it either | ||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la | ||
# (for static builds). | ||
|
||
if MCA_BUILD_ompi_coll_acoll_DSO | ||
component_noinst = | ||
component_install = mca_coll_acoll.la | ||
else | ||
component_noinst = libmca_coll_acoll.la | ||
component_install = | ||
endif | ||
|
||
mcacomponentdir = $(ompilibdir) | ||
mcacomponent_LTLIBRARIES = $(component_install) | ||
mca_coll_acoll_la_SOURCES = $(sources) | ||
mca_coll_acoll_la_LDFLAGS = -module -avoid-version $(coll_acoll_LDFLAGS) | ||
mca_coll_acoll_la_LIBADD = $(top_builddir)/ompi/lib@[email protected] $(coll_acoll_LIBS) | ||
|
||
noinst_LTLIBRARIES = $(component_noinst) | ||
libmca_coll_acoll_la_SOURCES =$(sources) | ||
libmca_coll_acoll_la_LIBADD = $(coll_acoll_LIBS) | ||
libmca_coll_acoll_la_LDFLAGS = -module -avoid-version $(coll_acoll_LDFLAGS) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. | ||
|
||
$COPYRIGHT$ | ||
|
||
Additional copyrights may follow | ||
|
||
$HEADER$ | ||
|
||
=========================================================================== | ||
|
||
The collective component, AMD Coll (“acoll”), is a high-performant MPI collective component for the OpenMPI library that is optimized for AMD "Zen"-based processors. “acoll” is optimized for communications within a single node of AMD “Zen”-based processors and provides the following commonly used collective algorithms: boardcast (MPI_Bcast), allreduce (MPI_Allreduce), reduce (MPI_Reduce), gather (MPI_Gather), allgather (MPI_Allgather), and barrier (MPI_Barrier). | ||
|
||
At present, “acoll” has been tested with OpenMPI v5.0.2 and can be built as part of OpenMPI. | ||
|
||
To run an application with acoll, use the following command line parameters | ||
- mpirun <common mpi runtime parameters> --mca coll acoll,tuned,libnbc,basic --mca coll_acoll_priority 40 <executable> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ | ||
/* | ||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. | ||
* $COPYRIGHT$ | ||
* | ||
* Additional copyrights may follow | ||
* | ||
* $HEADER$ | ||
*/ | ||
|
||
#ifndef MCA_COLL_ACOLL_EXPORT_H | ||
#define MCA_COLL_ACOLL_EXPORT_H | ||
|
||
#include "ompi_config.h" | ||
|
||
#include "mpi.h" | ||
#include "ompi/communicator/communicator.h" | ||
#include "ompi/mca/coll/base/coll_base_functions.h" | ||
#include "ompi/mca/coll/coll.h" | ||
#include "ompi/mca/mca.h" | ||
#include "ompi/request/request.h" | ||
|
||
#ifdef HAVE_XPMEM_H | ||
#include "opal/mca/rcache/base/base.h" | ||
#include <xpmem.h> | ||
#endif | ||
|
||
#include "opal/mca/shmem/base/base.h" | ||
#include "opal/mca/shmem/shmem.h" | ||
|
||
BEGIN_C_DECLS | ||
|
||
/* Globally exported variables */ | ||
OMPI_DECLSPEC extern const mca_coll_base_component_3_0_0_t mca_coll_acoll_component; | ||
extern int mca_coll_acoll_priority; | ||
extern int mca_coll_acoll_sg_size; | ||
extern int mca_coll_acoll_sg_scale; | ||
extern int mca_coll_acoll_node_size; | ||
extern int mca_coll_acoll_use_dynamic_rules; | ||
extern int mca_coll_acoll_mnode_enable; | ||
extern int mca_coll_acoll_bcast_lin0; | ||
extern int mca_coll_acoll_bcast_lin1; | ||
extern int mca_coll_acoll_bcast_lin2; | ||
extern int mca_coll_acoll_bcast_nonsg; | ||
extern int mca_coll_acoll_allgather_lin; | ||
extern int mca_coll_acoll_allgather_ring_1; | ||
|
||
/* API functions */ | ||
int mca_coll_acoll_init_query(bool enable_progress_threads, bool enable_mpi_threads); | ||
mca_coll_base_module_t *mca_coll_acoll_comm_query(struct ompi_communicator_t *comm, int *priority); | ||
|
||
int mca_coll_acoll_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); | ||
|
||
int mca_coll_acoll_allgather(const void *sbuf, size_t scount, struct ompi_datatype_t *sdtype, | ||
void *rbuf, size_t rcount, struct ompi_datatype_t *rdtype, | ||
struct ompi_communicator_t *comm, mca_coll_base_module_t *module); | ||
|
||
int mca_coll_acoll_bcast(void *buff, size_t count, struct ompi_datatype_t *datatype, int root, | ||
struct ompi_communicator_t *comm, mca_coll_base_module_t *module); | ||
|
||
int mca_coll_acoll_gather_intra(const void *sbuf, size_t scount, struct ompi_datatype_t *sdtype, | ||
void *rbuf, size_t rcount, struct ompi_datatype_t *rdtype, int root, | ||
struct ompi_communicator_t *comm, mca_coll_base_module_t *module); | ||
|
||
int mca_coll_acoll_reduce_intra(const void *sbuf, void *rbuf, size_t count, | ||
struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, | ||
struct ompi_communicator_t *comm, mca_coll_base_module_t *module); | ||
|
||
int mca_coll_acoll_allreduce_intra(const void *sbuf, void *rbuf, size_t count, | ||
struct ompi_datatype_t *dtype, struct ompi_op_t *op, | ||
struct ompi_communicator_t *comm, | ||
mca_coll_base_module_t *module); | ||
|
||
int mca_coll_acoll_barrier_intra(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); | ||
|
||
END_C_DECLS | ||
|
||
#define MCA_COLL_ACOLL_MAX_CID 100 | ||
#define MCA_COLL_ACOLL_ROOT_CHANGE_THRESH 10 | ||
|
||
typedef enum MCA_COLL_ACOLL_SG_SIZES { | ||
MCA_COLL_ACOLL_SG_SIZE_1 = 8, | ||
MCA_COLL_ACOLL_SG_SIZE_2 = 16 | ||
} MCA_COLL_ACOLL_SG_SIZES; | ||
|
||
typedef enum MCA_COLL_ACOLL_SG_SCALES { | ||
MCA_COLL_ACOLL_SG_SCALE_1 = 1, | ||
MCA_COLL_ACOLL_SG_SCALE_2 = 2, | ||
MCA_COLL_ACOLL_SG_SCALE_3 = 4, | ||
MCA_COLL_ACOLL_SG_SCALE_4 = 8, | ||
MCA_COLL_ACOLL_SG_SCALE_5 = 16 | ||
} MCA_COLL_ACOLL_SG_SCALES; | ||
|
||
typedef enum MCA_COLL_ACOLL_SUBCOMMS { | ||
MCA_COLL_ACOLL_NODE_L = 0, | ||
MCA_COLL_ACOLL_INTRA, | ||
MCA_COLL_ACOLL_SOCK_L, | ||
MCA_COLL_ACOLL_NUMA_L, | ||
MCA_COLL_ACOLL_L3_L, | ||
MCA_COLL_ACOLL_LEAF, | ||
MCA_COLL_ACOLL_NUM_SC | ||
} MCA_COLL_ACOLL_SUBCOMMS; | ||
|
||
typedef enum MCA_COLL_ACOLL_LAYERS { | ||
MCA_COLL_ACOLL_LYR_NODE = 0, | ||
MCA_COLL_ACOLL_LYR_SOCKET, | ||
MCA_COLL_ACOLL_NUM_LAYERS | ||
} MCA_COLL_ACOLL_LAYERS; | ||
|
||
typedef enum MCA_COLL_ACOLL_BASE_LYRS { | ||
MCA_COLL_ACOLL_L3CACHE = 0, | ||
MCA_COLL_ACOLL_NUMA, | ||
MCA_COLL_ACOLL_NUM_BASE_LYRS | ||
} MCA_COLL_ACOLL_BASE_LYRS; | ||
|
||
typedef struct coll_acoll_data { | ||
#ifdef HAVE_XPMEM_H | ||
xpmem_segid_t *allseg_id; | ||
xpmem_apid_t *all_apid; | ||
void **allshm_sbuf; | ||
void **allshm_rbuf; | ||
void **xpmem_saddr; | ||
void **xpmem_raddr; | ||
mca_rcache_base_module_t **rcache; | ||
void *scratch; | ||
#endif | ||
opal_shmem_ds_t *allshmseg_id; | ||
void **allshmmmap_sbuf; | ||
|
||
int comm_size; | ||
int l1_local_rank; | ||
int l2_local_rank; | ||
int l1_gp_size; | ||
int *l1_gp; | ||
int *l2_gp; | ||
int l2_gp_size; | ||
int offset[4]; | ||
int sync[2]; | ||
} coll_acoll_data_t; | ||
|
||
typedef struct coll_acoll_subcomms { | ||
ompi_communicator_t *local_comm; | ||
ompi_communicator_t *local_r_comm; | ||
ompi_communicator_t *leader_comm; | ||
ompi_communicator_t *subgrp_comm; | ||
ompi_communicator_t *numa_comm; | ||
ompi_communicator_t *base_comm[MCA_COLL_ACOLL_NUM_BASE_LYRS][MCA_COLL_ACOLL_NUM_LAYERS]; | ||
ompi_communicator_t *orig_comm; | ||
ompi_communicator_t *socket_comm; | ||
ompi_communicator_t *socket_ldr_comm; | ||
int num_nodes; | ||
int derived_node_size; | ||
int is_root_node; | ||
int is_root_sg; | ||
int is_root_numa; | ||
int is_root_socket; | ||
int local_root[MCA_COLL_ACOLL_NUM_LAYERS]; | ||
int outer_grp_root; | ||
int subgrp_root; | ||
int numa_root; | ||
int socket_ldr_root; | ||
int base_root[MCA_COLL_ACOLL_NUM_BASE_LYRS][MCA_COLL_ACOLL_NUM_LAYERS]; | ||
int base_rank[MCA_COLL_ACOLL_NUM_BASE_LYRS]; | ||
int socket_rank; | ||
int subgrp_size; | ||
int initialized; | ||
int prev_init_root; | ||
int num_root_change; | ||
|
||
ompi_communicator_t *numa_comm_ldrs; | ||
ompi_communicator_t *node_comm; | ||
ompi_communicator_t *inter_comm; | ||
int cid; | ||
coll_acoll_data_t *data; | ||
bool initialized_data; | ||
bool initialized_shm_data; | ||
#ifdef HAVE_XPMEM_H | ||
uint64_t xpmem_buf_size; | ||
int without_xpmem; | ||
int xpmem_use_sr_buf; | ||
#endif | ||
|
||
} coll_acoll_subcomms_t; | ||
|
||
typedef struct coll_acoll_reserve_mem { | ||
void *reserve_mem; | ||
uint64_t reserve_mem_size; | ||
bool reserve_mem_allocate; | ||
bool reserve_mem_in_use; | ||
} coll_acoll_reserve_mem_t; | ||
|
||
struct mca_coll_acoll_module_t { | ||
mca_coll_base_module_t super; | ||
MCA_COLL_ACOLL_SG_SIZES sg_size; | ||
MCA_COLL_ACOLL_SG_SCALES sg_scale; | ||
int sg_cnt; | ||
// Todo: Remove log2 variables | ||
int log2_sg_cnt; | ||
int node_cnt; | ||
int log2_node_cnt; | ||
int use_dyn_rules; | ||
// Todo: Use substructure for every API related ones | ||
int use_mnode; | ||
int use_lin0; | ||
int use_lin1; | ||
int use_lin2; | ||
int mnode_sg_size; | ||
int mnode_log2_sg_size; | ||
int allg_lin; | ||
int allg_ring; | ||
coll_acoll_subcomms_t subc[MCA_COLL_ACOLL_MAX_CID]; | ||
coll_acoll_reserve_mem_t reserve_mem_s; | ||
}; | ||
|
||
#ifdef HAVE_XPMEM_H | ||
struct acoll_xpmem_rcache_reg_t { | ||
mca_rcache_base_registration_t base; | ||
void *xpmem_vaddr; | ||
}; | ||
#endif | ||
|
||
typedef struct mca_coll_acoll_module_t mca_coll_acoll_module_t; | ||
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_acoll_module_t); | ||
|
||
#endif /* MCA_COLL_ACOLL_EXPORT_H */ |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See my other comment about CID. I'm not sure if it is intended to be used in collectives.