Skip to content

Commit bad049f

Browse files
Coll/han Improvements:
Allow topological level to be named in configuration file Improve algorithm management and choice Allow algorithm selection (optional) in configuration file Algorithm choice through MCA parameters simplification Signed-off-by: Florent GERMAIN <[email protected]>
1 parent 14bc2ed commit bad049f

9 files changed

+722
-189
lines changed

ompi/mca/coll/han/Makefile.am

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# of Tennessee Research Foundation. All rights
44
# reserved.
55
# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
6+
# Copyright (c) 2022 BULL S.A.S. All rights reserved.
67
# $COPYRIGHT$
78
#
89
# Additional copyrights may follow
@@ -13,6 +14,7 @@
1314
sources = \
1415
coll_han.h \
1516
coll_han_trigger.h \
17+
coll_han_algorithms.h \
1618
coll_han_dynamic.h \
1719
coll_han_dynamic_file.h \
1820
coll_han_barrier.c \
@@ -25,6 +27,7 @@ coll_han_allgather.c \
2527
coll_han_component.c \
2628
coll_han_module.c \
2729
coll_han_trigger.c \
30+
coll_han_algorithms.c \
2831
coll_han_dynamic.c \
2932
coll_han_dynamic_file.c \
3033
coll_han_topo.c \

ompi/mca/coll/han/coll_han.h

+10-121
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2018-2020 The University of Tennessee and The University
33
* of Tennessee Research Foundation. All rights
44
* reserved.
5-
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
5+
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
66
* $COPYRIGHT$
77
*
88
* Additional copyrights may follow
@@ -39,6 +39,7 @@
3939
#include "ompi/mca/coll/base/coll_base_functions.h"
4040
#include "coll_han_trigger.h"
4141
#include "ompi/mca/coll/han/coll_han_dynamic.h"
42+
#include "coll_han_algorithms.h"
4243

4344
/*
4445
* Today;
@@ -204,6 +205,7 @@ typedef struct mca_coll_han_component_t {
204205
int han_priority;
205206
/* whether output the log message */
206207
int han_output;
208+
int han_output_verbose; /* activation level of coll han verbosity */
207209
/* segment size for bcast */
208210
uint32_t han_bcast_segsize;
209211
/* up level module for bcast */
@@ -241,6 +243,8 @@ typedef struct mca_coll_han_component_t {
241243
*/
242244
bool han_reproducible;
243245
bool use_simple_algorithm[COLLCOUNT];
246+
int use_algorithm[COLLCOUNT];
247+
int use_algorithm_param[COLLCOUNT]; // MCA parmeter id for algo, to know if user provided
244248

245249
/* Dynamic configuration rules */
246250
bool use_dynamic_file_rules;
@@ -249,7 +253,11 @@ typedef struct mca_coll_han_component_t {
249253
/* Dynamic rules from file */
250254
mca_coll_han_dynamic_rules_t dynamic_rules;
251255
/* Dynamic rules from mca parameter */
252-
COMPONENT_T mca_rules[COLLCOUNT][NB_TOPO_LVL];
256+
COMPONENT_T mca_sub_components[COLLCOUNT][NB_TOPO_LVL];
257+
258+
int num_available_algos[COLLCOUNT]; // not counting "default" behaviour
259+
/* to show algorithms in ompi_info */
260+
mca_base_var_enum_value_t* algorithm_enumerator[COLLCOUNT];
253261

254262
/* Define maximum dynamic errors printed by rank 0 with a 0 verbosity level */
255263
int max_dynamic_errors;
@@ -468,109 +476,7 @@ mca_coll_han_scatter_intra_dynamic(SCATTER_BASE_ARGS,
468476

469477
int mca_coll_han_barrier_intra_simple(struct ompi_communicator_t *comm,
470478
mca_coll_base_module_t *module);
471-
/* Bcast */
472-
int mca_coll_han_bcast_intra_simple(void *buff,
473-
int count,
474-
struct ompi_datatype_t *dtype,
475-
int root,
476-
struct ompi_communicator_t *comm,
477-
mca_coll_base_module_t *module);
478-
int mca_coll_han_bcast_intra(void *buff, int count, struct ompi_datatype_t *dtype, int root,
479-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
480-
481-
/* Reduce */
482-
int
483-
mca_coll_han_reduce_intra_simple(const void *sbuf,
484-
void* rbuf,
485-
int count,
486-
struct ompi_datatype_t *dtype,
487-
ompi_op_t *op,
488-
int root,
489-
struct ompi_communicator_t *comm,
490-
mca_coll_base_module_t *module);
491-
int
492-
mca_coll_han_reduce_reproducible_decision(struct ompi_communicator_t *comm,
493-
mca_coll_base_module_t *module);
494-
int
495-
mca_coll_han_reduce_reproducible(const void *sbuf,
496-
void *rbuf,
497-
int count,
498-
struct ompi_datatype_t *dtype,
499-
struct ompi_op_t *op,
500-
int root,
501-
struct ompi_communicator_t *comm,
502-
mca_coll_base_module_t *module);
503479

504-
int mca_coll_han_reduce_intra(const void *sbuf,
505-
void *rbuf,
506-
int count,
507-
struct ompi_datatype_t *dtype,
508-
ompi_op_t* op,
509-
int root,
510-
struct ompi_communicator_t *comm,
511-
mca_coll_base_module_t * module);
512-
513-
/* Allreduce */
514-
int
515-
mca_coll_han_allreduce_intra_simple(const void *sbuf,
516-
void *rbuf,
517-
int count,
518-
struct ompi_datatype_t *dtype,
519-
struct ompi_op_t *op,
520-
struct ompi_communicator_t *comm,
521-
mca_coll_base_module_t *module);
522-
int
523-
mca_coll_han_allreduce_reproducible_decision(struct ompi_communicator_t *comm,
524-
mca_coll_base_module_t *module);
525-
int
526-
mca_coll_han_allreduce_reproducible(const void *sbuf,
527-
void *rbuf,
528-
int count,
529-
struct ompi_datatype_t *dtype,
530-
struct ompi_op_t *op,
531-
struct ompi_communicator_t *comm,
532-
mca_coll_base_module_t *module);
533-
534-
int mca_coll_han_allreduce_intra(const void *sbuf,
535-
void *rbuf,
536-
int count,
537-
struct ompi_datatype_t *dtype,
538-
struct ompi_op_t *op,
539-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
540-
541-
/* Scatter */
542-
int
543-
mca_coll_han_scatter_intra(const void *sbuf, int scount,
544-
struct ompi_datatype_t *sdtype,
545-
void *rbuf, int rcount,
546-
struct ompi_datatype_t *rdtype,
547-
int root,
548-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
549-
int
550-
mca_coll_han_scatter_intra_simple(const void *sbuf, int scount,
551-
struct ompi_datatype_t *sdtype,
552-
void *rbuf, int rcount,
553-
struct ompi_datatype_t *rdtype,
554-
int root,
555-
struct ompi_communicator_t *comm,
556-
mca_coll_base_module_t * module);
557-
558-
/* Gather */
559-
int
560-
mca_coll_han_gather_intra(const void *sbuf, int scount,
561-
struct ompi_datatype_t *sdtype,
562-
void *rbuf, int rcount,
563-
struct ompi_datatype_t *rdtype,
564-
int root,
565-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
566-
int
567-
mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
568-
struct ompi_datatype_t *sdtype,
569-
void *rbuf, int rcount,
570-
struct ompi_datatype_t *rdtype,
571-
int root,
572-
struct ompi_communicator_t *comm,
573-
mca_coll_base_module_t *module);
574480
/* reordering after gather, for unordered ranks */
575481
void
576482
ompi_coll_han_reorder_gather(const void *sbuf,
@@ -579,21 +485,4 @@ ompi_coll_han_reorder_gather(const void *sbuf,
579485
struct ompi_communicator_t *comm,
580486
int * topo);
581487

582-
583-
584-
/* Allgather */
585-
int
586-
mca_coll_han_allgather_intra(const void *sbuf, int scount,
587-
struct ompi_datatype_t *sdtype,
588-
void *rbuf, int rcount,
589-
struct ompi_datatype_t *rdtype,
590-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
591-
int
592-
mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
593-
struct ompi_datatype_t *sdtype,
594-
void* rbuf, int rcount,
595-
struct ompi_datatype_t *rdtype,
596-
struct ompi_communicator_t *comm,
597-
mca_coll_base_module_t *module);
598-
599488
#endif /* MCA_COLL_HAN_EXPORT_H */

0 commit comments

Comments
 (0)