Skip to content

Use module names in HAN. #10514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions ompi/mca/coll/han/coll_han.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,27 @@ struct mca_coll_han_allgather_s {
};
typedef struct mca_coll_han_allgather_s mca_coll_han_allgather_t;

typedef struct mca_coll_han_op_up_low_module_name_t {
char* han_op_up_module_name;
char* han_op_low_module_name;
} mca_coll_han_op_up_low_module_name_t;

/**
* The only reason we need to keep these around is because our MCA system does
* not support MCA variables that do not point to existing variables (aka. where
* mbv_storage does not exists until the completion of the application). Thus,
* we need to keep track of the storage for all variables, even the ones we
* only use to translated into a string.
*/
typedef struct mca_coll_han_op_module_name_t {
mca_coll_han_op_up_low_module_name_t bcast;
mca_coll_han_op_up_low_module_name_t reduce;
mca_coll_han_op_up_low_module_name_t allreduce;
mca_coll_han_op_up_low_module_name_t allgather;
mca_coll_han_op_up_low_module_name_t gather;
mca_coll_han_op_up_low_module_name_t scatter;
} mca_coll_han_op_module_name_t;

/**
* Structure to hold the han coll component. First it holds the
* base coll component, and then holds a bunch of
Expand Down Expand Up @@ -213,6 +234,8 @@ typedef struct mca_coll_han_component_t {
uint32_t han_scatter_up_module;
/* low level module for scatter */
uint32_t han_scatter_low_module;
/* name of the modules */
mca_coll_han_op_module_name_t han_op_module_name;
/* whether we need reproducible results
* (but disables topological optimisations)
*/
Expand Down
173 changes: 112 additions & 61 deletions ompi/mca/coll/han/coll_han_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,43 @@ static int han_open(void)
return mca_coll_han_init_dynamic_rules();
}


/*
* Shut down the component
*/
static int han_close(void)
{
mca_coll_han_free_dynamic_rules();

free(mca_coll_han_component.han_op_module_name.bcast.han_op_up_module_name);
mca_coll_han_component.han_op_module_name.bcast.han_op_up_module_name = NULL;
free(mca_coll_han_component.han_op_module_name.bcast.han_op_low_module_name);
mca_coll_han_component.han_op_module_name.bcast.han_op_low_module_name = NULL;

free(mca_coll_han_component.han_op_module_name.reduce.han_op_up_module_name);
mca_coll_han_component.han_op_module_name.reduce.han_op_up_module_name = NULL;
free(mca_coll_han_component.han_op_module_name.reduce.han_op_low_module_name);
mca_coll_han_component.han_op_module_name.reduce.han_op_low_module_name = NULL;

free(mca_coll_han_component.han_op_module_name.allreduce.han_op_up_module_name);
mca_coll_han_component.han_op_module_name.allreduce.han_op_up_module_name = NULL;
free(mca_coll_han_component.han_op_module_name.allreduce.han_op_low_module_name);
mca_coll_han_component.han_op_module_name.allreduce.han_op_low_module_name = NULL;

free(mca_coll_han_component.han_op_module_name.allgather.han_op_up_module_name);
mca_coll_han_component.han_op_module_name.allgather.han_op_up_module_name = NULL;
free(mca_coll_han_component.han_op_module_name.allgather.han_op_low_module_name);
mca_coll_han_component.han_op_module_name.allgather.han_op_low_module_name = NULL;

free(mca_coll_han_component.han_op_module_name.gather.han_op_up_module_name);
mca_coll_han_component.han_op_module_name.gather.han_op_up_module_name = NULL;
free(mca_coll_han_component.han_op_module_name.gather.han_op_low_module_name);
mca_coll_han_component.han_op_module_name.gather.han_op_low_module_name = NULL;

free(mca_coll_han_component.han_op_module_name.scatter.han_op_up_module_name);
mca_coll_han_component.han_op_module_name.scatter.han_op_up_module_name = NULL;
free(mca_coll_han_component.han_op_module_name.scatter.han_op_low_module_name);
mca_coll_han_component.han_op_module_name.scatter.han_op_low_module_name = NULL;

return OMPI_SUCCESS;
}

Expand Down Expand Up @@ -147,6 +177,37 @@ const char* mca_coll_han_topo_lvl_to_str(TOPO_LVL_T topo_lvl)
}
}

static int
mca_coll_han_query_module_from_mca(mca_base_component_t* c,
const char* param_name,
const char* param_doc,
int info_level,
uint32_t* module_id,
char** storage)
{
char *module_name, *endptr = NULL;

int mod_id = COMPONENTS_COUNT;
mod_id = (*module_id > (uint32_t)mod_id) ? mod_id : (int)*module_id; /* stay in range */
mod_id = (mod_id < 0) ? 0 : mod_id; /* in range */

*storage = available_components[mod_id].component_name;

(void) mca_base_component_var_register(c, param_name, param_doc,
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
info_level,
MCA_BASE_VAR_SCOPE_READONLY, storage);
module_name = *storage;
mod_id = strtol(module_name, &endptr, 10);
if( module_name == endptr ) { /* no conversion, maybe we got a module name instead */
/* Convert module name to id */
mod_id = mca_coll_han_component_name_to_id(module_name);
}
/* Keep the module in the range */
*module_id = (mod_id < 0) ? 0 : mod_id;

return OMPI_SUCCESS;
}

/*
* Register MCA params
Expand Down Expand Up @@ -177,18 +238,17 @@ static int han_register(void)
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_segsize);

cs->han_bcast_up_module = 0;
(void) mca_base_component_var_register(c, "bcast_up_module",
"up level module for bcast, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_up_module);
(void) mca_coll_han_query_module_from_mca(c, "bcast_up_module",
"up level module for bcast, 0 libnbc, 1 adapt",
OPAL_INFO_LVL_9, &cs->han_bcast_up_module,
&cs->han_op_module_name.bcast.han_op_up_module_name);

cs->han_bcast_low_module = 0;
(void) mca_base_component_var_register(c, "bcast_low_module",
"low level module for bcast, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_low_module);
(void) mca_coll_han_query_module_from_mca(c, "bcast_low_module",
"low level module for bcast, 0 tuned, 1 sm",
OPAL_INFO_LVL_9,
&cs->han_bcast_low_module,
&cs->han_op_module_name.bcast.han_op_low_module_name);

cs->han_reduce_segsize = 65536;
(void) mca_base_component_var_register(c, "reduce_segsize",
Expand All @@ -198,18 +258,17 @@ static int han_register(void)
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_segsize);

cs->han_reduce_up_module = 0;
(void) mca_base_component_var_register(c, "reduce_up_module",
"up level module for allreduce, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_up_module);
(void) mca_coll_han_query_module_from_mca(c, "reduce_up_module",
"up level module for allreduce, 0 libnbc, 1 adapt",
OPAL_INFO_LVL_9, &cs->han_reduce_up_module,
&cs->han_op_module_name.reduce.han_op_up_module_name);

cs->han_reduce_low_module = 0;
(void) mca_base_component_var_register(c, "reduce_low_module",
"low level module for allreduce, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_low_module);
(void) mca_coll_han_query_module_from_mca(c, "reduce_low_module",
"low level module for allreduce, 0 tuned, 1 sm",
OPAL_INFO_LVL_9, &cs->han_reduce_low_module,
&cs->han_op_module_name.reduce.han_op_low_module_name);

cs->han_allreduce_segsize = 65536;
(void) mca_base_component_var_register(c, "allreduce_segsize",
"segment size for allreduce",
Expand All @@ -218,60 +277,52 @@ static int han_register(void)
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_segsize);

cs->han_allreduce_up_module = 0;
(void) mca_base_component_var_register(c, "allreduce_up_module",
"up level module for allreduce, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_up_module);
(void) mca_coll_han_query_module_from_mca(c, "allreduce_up_module",
"up level module for allreduce, 0 libnbc, 1 adapt",
OPAL_INFO_LVL_9, &cs->han_allreduce_up_module,
&cs->han_op_module_name.allreduce.han_op_up_module_name);

cs->han_allreduce_low_module = 0;
(void) mca_base_component_var_register(c, "allreduce_low_module",
"low level module for allreduce, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_low_module);
(void) mca_coll_han_query_module_from_mca(c, "allreduce_low_module",
"low level module for allreduce, 0 tuned, 1 sm",
OPAL_INFO_LVL_9, &cs->han_allreduce_low_module,
&cs->han_op_module_name.allreduce.han_op_low_module_name);

cs->han_allgather_up_module = 0;
(void) mca_base_component_var_register(c, "allgather_up_module",
"up level module for allgather, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allgather_up_module);
(void) mca_coll_han_query_module_from_mca(c, "allgather_up_module",
"up level module for allgather, 0 libnbc, 1 adapt",
OPAL_INFO_LVL_9, &cs->han_allgather_up_module,
&cs->han_op_module_name.allgather.han_op_up_module_name);

cs->han_allgather_low_module = 0;
(void) mca_base_component_var_register(c, "allgather_low_module",
"low level module for allgather, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allgather_low_module);
(void) mca_coll_han_query_module_from_mca(c, "allgather_low_module",
"low level module for allgather, 0 tuned, 1 sm",
OPAL_INFO_LVL_9, &cs->han_allgather_low_module,
&cs->han_op_module_name.allgather.han_op_low_module_name);

cs->han_gather_up_module = 0;
(void) mca_base_component_var_register(c, "gather_up_module",
"up level module for gather, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_up_module);
(void) mca_coll_han_query_module_from_mca(c, "gather_up_module",
"up level module for gather, 0 libnbc, 1 adapt",
OPAL_INFO_LVL_9, &cs->han_gather_up_module,
&cs->han_op_module_name.gather.han_op_up_module_name);

cs->han_gather_low_module = 0;
(void) mca_base_component_var_register(c, "gather_low_module",
"low level module for gather, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_low_module);
(void) mca_coll_han_query_module_from_mca(c, "gather_low_module",
"low level module for gather, 0 tuned, 1 sm",
OPAL_INFO_LVL_9, &cs->han_gather_low_module,
&cs->han_op_module_name.gather.han_op_low_module_name);

cs->han_scatter_up_module = 0;
(void) mca_base_component_var_register(c, "scatter_up_module",
"up level module for scatter, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_up_module);
(void) mca_coll_han_query_module_from_mca(c, "scatter_up_module",
"up level module for scatter, 0 libnbc, 1 adapt",
OPAL_INFO_LVL_9, &cs->han_scatter_up_module,
&cs->han_op_module_name.scatter.han_op_up_module_name);

cs->han_scatter_low_module = 0;
(void) mca_base_component_var_register(c, "scatter_low_module",
"low level module for scatter, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_low_module);
(void) mca_coll_han_query_module_from_mca(c, "scatter_low_module",
"low level module for scatter, 0 tuned, 1 sm",
OPAL_INFO_LVL_9, &cs->han_scatter_low_module,
&cs->han_op_module_name.scatter.han_op_low_module_name);

cs->han_reproducible = 0;
(void) mca_base_component_var_register(c, "reproducible",
Expand Down