Skip to content

Commit 5ae5907

Browse files
Coll/han Improvements:
Allow topological level to be named in configuration file Improve algorithm management and choice Allow algorithm selection (optional) in configuration file Algorithm choice through MCA parameters simplification Signed-off-by: Florent GERMAIN <[email protected]>
1 parent 1a9a3b3 commit 5ae5907

8 files changed

+719
-186
lines changed

ompi/mca/coll/han/Makefile.am

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# of Tennessee Research Foundation. All rights
44
# reserved.
55
# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
6+
# Copyright (c) 2022 BULL S.A.S. All rights reserved.
67
# $COPYRIGHT$
78
#
89
# Additional copyrights may follow
@@ -13,6 +14,7 @@
1314
sources = \
1415
coll_han.h \
1516
coll_han_trigger.h \
17+
coll_han_algorithms.h \
1618
coll_han_dynamic.h \
1719
coll_han_dynamic_file.h \
1820
coll_han_barrier.c \
@@ -25,6 +27,7 @@ coll_han_allgather.c \
2527
coll_han_component.c \
2628
coll_han_module.c \
2729
coll_han_trigger.c \
30+
coll_han_algorithms.c \
2831
coll_han_dynamic.c \
2932
coll_han_dynamic_file.c \
3033
coll_han_topo.c \

ompi/mca/coll/han/coll_han.h

Lines changed: 10 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
* Copyright (c) 2018-2020 The University of Tennessee and The University
33
* of Tennessee Research Foundation. All rights
44
* reserved.
5-
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
65
* Copyright (c) 2022 IBM Corporation. All rights reserved
6+
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
77
* $COPYRIGHT$
88
*
99
* Additional copyrights may follow
@@ -40,6 +40,7 @@
4040
#include "ompi/mca/coll/base/coll_base_functions.h"
4141
#include "coll_han_trigger.h"
4242
#include "ompi/mca/coll/han/coll_han_dynamic.h"
43+
#include "coll_han_algorithms.h"
4344

4445
/*
4546
* Today;
@@ -205,6 +206,7 @@ typedef struct mca_coll_han_component_t {
205206
int han_priority;
206207
/* whether output the log message */
207208
int han_output;
209+
int han_output_verbose; /* activation level of coll han verbosity */
208210
/* segment size for bcast */
209211
uint32_t han_bcast_segsize;
210212
/* up level module for bcast */
@@ -242,6 +244,8 @@ typedef struct mca_coll_han_component_t {
242244
*/
243245
bool han_reproducible;
244246
bool use_simple_algorithm[COLLCOUNT];
247+
int use_algorithm[COLLCOUNT];
248+
int use_algorithm_param[COLLCOUNT]; // MCA parmeter id for algo, to know if user provided
245249

246250
/* Dynamic configuration rules */
247251
bool use_dynamic_file_rules;
@@ -250,7 +254,11 @@ typedef struct mca_coll_han_component_t {
250254
/* Dynamic rules from file */
251255
mca_coll_han_dynamic_rules_t dynamic_rules;
252256
/* Dynamic rules from mca parameter */
253-
COMPONENT_T mca_rules[COLLCOUNT][NB_TOPO_LVL];
257+
COMPONENT_T mca_sub_components[COLLCOUNT][NB_TOPO_LVL];
258+
259+
int num_available_algorithms[COLLCOUNT]; // not counting "default" behaviour
260+
/* to show algorithms in ompi_info */
261+
mca_base_var_enum_value_t* algorithm_enumerator[COLLCOUNT];
254262

255263
/* Define maximum dynamic errors printed by rank 0 with a 0 verbosity level */
256264
int max_dynamic_errors;
@@ -469,109 +477,7 @@ mca_coll_han_scatter_intra_dynamic(SCATTER_BASE_ARGS,
469477

470478
int mca_coll_han_barrier_intra_simple(struct ompi_communicator_t *comm,
471479
mca_coll_base_module_t *module);
472-
/* Bcast */
473-
int mca_coll_han_bcast_intra_simple(void *buff,
474-
int count,
475-
struct ompi_datatype_t *dtype,
476-
int root,
477-
struct ompi_communicator_t *comm,
478-
mca_coll_base_module_t *module);
479-
int mca_coll_han_bcast_intra(void *buff, int count, struct ompi_datatype_t *dtype, int root,
480-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
481-
482-
/* Reduce */
483-
int
484-
mca_coll_han_reduce_intra_simple(const void *sbuf,
485-
void* rbuf,
486-
int count,
487-
struct ompi_datatype_t *dtype,
488-
ompi_op_t *op,
489-
int root,
490-
struct ompi_communicator_t *comm,
491-
mca_coll_base_module_t *module);
492-
int
493-
mca_coll_han_reduce_reproducible_decision(struct ompi_communicator_t *comm,
494-
mca_coll_base_module_t *module);
495-
int
496-
mca_coll_han_reduce_reproducible(const void *sbuf,
497-
void *rbuf,
498-
int count,
499-
struct ompi_datatype_t *dtype,
500-
struct ompi_op_t *op,
501-
int root,
502-
struct ompi_communicator_t *comm,
503-
mca_coll_base_module_t *module);
504480

505-
int mca_coll_han_reduce_intra(const void *sbuf,
506-
void *rbuf,
507-
int count,
508-
struct ompi_datatype_t *dtype,
509-
ompi_op_t* op,
510-
int root,
511-
struct ompi_communicator_t *comm,
512-
mca_coll_base_module_t * module);
513-
514-
/* Allreduce */
515-
int
516-
mca_coll_han_allreduce_intra_simple(const void *sbuf,
517-
void *rbuf,
518-
int count,
519-
struct ompi_datatype_t *dtype,
520-
struct ompi_op_t *op,
521-
struct ompi_communicator_t *comm,
522-
mca_coll_base_module_t *module);
523-
int
524-
mca_coll_han_allreduce_reproducible_decision(struct ompi_communicator_t *comm,
525-
mca_coll_base_module_t *module);
526-
int
527-
mca_coll_han_allreduce_reproducible(const void *sbuf,
528-
void *rbuf,
529-
int count,
530-
struct ompi_datatype_t *dtype,
531-
struct ompi_op_t *op,
532-
struct ompi_communicator_t *comm,
533-
mca_coll_base_module_t *module);
534-
535-
int mca_coll_han_allreduce_intra(const void *sbuf,
536-
void *rbuf,
537-
int count,
538-
struct ompi_datatype_t *dtype,
539-
struct ompi_op_t *op,
540-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
541-
542-
/* Scatter */
543-
int
544-
mca_coll_han_scatter_intra(const void *sbuf, int scount,
545-
struct ompi_datatype_t *sdtype,
546-
void *rbuf, int rcount,
547-
struct ompi_datatype_t *rdtype,
548-
int root,
549-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
550-
int
551-
mca_coll_han_scatter_intra_simple(const void *sbuf, int scount,
552-
struct ompi_datatype_t *sdtype,
553-
void *rbuf, int rcount,
554-
struct ompi_datatype_t *rdtype,
555-
int root,
556-
struct ompi_communicator_t *comm,
557-
mca_coll_base_module_t * module);
558-
559-
/* Gather */
560-
int
561-
mca_coll_han_gather_intra(const void *sbuf, int scount,
562-
struct ompi_datatype_t *sdtype,
563-
void *rbuf, int rcount,
564-
struct ompi_datatype_t *rdtype,
565-
int root,
566-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
567-
int
568-
mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
569-
struct ompi_datatype_t *sdtype,
570-
void *rbuf, int rcount,
571-
struct ompi_datatype_t *rdtype,
572-
int root,
573-
struct ompi_communicator_t *comm,
574-
mca_coll_base_module_t *module);
575481
/* reordering after gather, for unordered ranks */
576482
void
577483
ompi_coll_han_reorder_gather(const void *sbuf,
@@ -580,21 +486,4 @@ ompi_coll_han_reorder_gather(const void *sbuf,
580486
struct ompi_communicator_t *comm,
581487
int * topo);
582488

583-
584-
585-
/* Allgather */
586-
int
587-
mca_coll_han_allgather_intra(const void *sbuf, int scount,
588-
struct ompi_datatype_t *sdtype,
589-
void *rbuf, int rcount,
590-
struct ompi_datatype_t *rdtype,
591-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
592-
int
593-
mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
594-
struct ompi_datatype_t *sdtype,
595-
void* rbuf, int rcount,
596-
struct ompi_datatype_t *rdtype,
597-
struct ompi_communicator_t *comm,
598-
mca_coll_base_module_t *module);
599-
600489
#endif /* MCA_COLL_HAN_EXPORT_H */

0 commit comments

Comments
 (0)