Skip to content

Commit ab27ad4

Browse files
authored
Merge pull request #10514 from bosilca/fix/10438
Use module names in HAN.
2 parents 267b119 + e572aee commit ab27ad4

File tree

2 files changed

+135
-61
lines changed

2 files changed

+135
-61
lines changed

ompi/mca/coll/han/coll_han.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,27 @@ struct mca_coll_han_allgather_s {
169169
};
170170
typedef struct mca_coll_han_allgather_s mca_coll_han_allgather_t;
171171

172+
typedef struct mca_coll_han_op_up_low_module_name_t {
173+
char* han_op_up_module_name;
174+
char* han_op_low_module_name;
175+
} mca_coll_han_op_up_low_module_name_t;
176+
177+
/**
178+
* The only reason we need to keep these around is because our MCA system does
179+
* not support MCA variables that do not point to existing variables (aka. where
180+
* mbv_storage does not exists until the completion of the application). Thus,
181+
* we need to keep track of the storage for all variables, even the ones we
182+
* only use to translated into a string.
183+
*/
184+
typedef struct mca_coll_han_op_module_name_t {
185+
mca_coll_han_op_up_low_module_name_t bcast;
186+
mca_coll_han_op_up_low_module_name_t reduce;
187+
mca_coll_han_op_up_low_module_name_t allreduce;
188+
mca_coll_han_op_up_low_module_name_t allgather;
189+
mca_coll_han_op_up_low_module_name_t gather;
190+
mca_coll_han_op_up_low_module_name_t scatter;
191+
} mca_coll_han_op_module_name_t;
192+
172193
/**
173194
* Structure to hold the han coll component. First it holds the
174195
* base coll component, and then holds a bunch of
@@ -213,6 +234,8 @@ typedef struct mca_coll_han_component_t {
213234
uint32_t han_scatter_up_module;
214235
/* low level module for scatter */
215236
uint32_t han_scatter_low_module;
237+
/* name of the modules */
238+
mca_coll_han_op_module_name_t han_op_module_name;
216239
/* whether we need reproducible results
217240
* (but disables topological optimisations)
218241
*/

ompi/mca/coll/han/coll_han_component.c

Lines changed: 112 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,43 @@ static int han_open(void)
101101
return mca_coll_han_init_dynamic_rules();
102102
}
103103

104-
105104
/*
106105
* Shut down the component
107106
*/
108107
static int han_close(void)
109108
{
110109
mca_coll_han_free_dynamic_rules();
110+
111+
free(mca_coll_han_component.han_op_module_name.bcast.han_op_up_module_name);
112+
mca_coll_han_component.han_op_module_name.bcast.han_op_up_module_name = NULL;
113+
free(mca_coll_han_component.han_op_module_name.bcast.han_op_low_module_name);
114+
mca_coll_han_component.han_op_module_name.bcast.han_op_low_module_name = NULL;
115+
116+
free(mca_coll_han_component.han_op_module_name.reduce.han_op_up_module_name);
117+
mca_coll_han_component.han_op_module_name.reduce.han_op_up_module_name = NULL;
118+
free(mca_coll_han_component.han_op_module_name.reduce.han_op_low_module_name);
119+
mca_coll_han_component.han_op_module_name.reduce.han_op_low_module_name = NULL;
120+
121+
free(mca_coll_han_component.han_op_module_name.allreduce.han_op_up_module_name);
122+
mca_coll_han_component.han_op_module_name.allreduce.han_op_up_module_name = NULL;
123+
free(mca_coll_han_component.han_op_module_name.allreduce.han_op_low_module_name);
124+
mca_coll_han_component.han_op_module_name.allreduce.han_op_low_module_name = NULL;
125+
126+
free(mca_coll_han_component.han_op_module_name.allgather.han_op_up_module_name);
127+
mca_coll_han_component.han_op_module_name.allgather.han_op_up_module_name = NULL;
128+
free(mca_coll_han_component.han_op_module_name.allgather.han_op_low_module_name);
129+
mca_coll_han_component.han_op_module_name.allgather.han_op_low_module_name = NULL;
130+
131+
free(mca_coll_han_component.han_op_module_name.gather.han_op_up_module_name);
132+
mca_coll_han_component.han_op_module_name.gather.han_op_up_module_name = NULL;
133+
free(mca_coll_han_component.han_op_module_name.gather.han_op_low_module_name);
134+
mca_coll_han_component.han_op_module_name.gather.han_op_low_module_name = NULL;
135+
136+
free(mca_coll_han_component.han_op_module_name.scatter.han_op_up_module_name);
137+
mca_coll_han_component.han_op_module_name.scatter.han_op_up_module_name = NULL;
138+
free(mca_coll_han_component.han_op_module_name.scatter.han_op_low_module_name);
139+
mca_coll_han_component.han_op_module_name.scatter.han_op_low_module_name = NULL;
140+
111141
return OMPI_SUCCESS;
112142
}
113143

@@ -147,6 +177,37 @@ const char* mca_coll_han_topo_lvl_to_str(TOPO_LVL_T topo_lvl)
147177
}
148178
}
149179

180+
static int
181+
mca_coll_han_query_module_from_mca(mca_base_component_t* c,
182+
const char* param_name,
183+
const char* param_doc,
184+
int info_level,
185+
uint32_t* module_id,
186+
char** storage)
187+
{
188+
char *module_name, *endptr = NULL;
189+
190+
int mod_id = COMPONENTS_COUNT;
191+
mod_id = (*module_id > (uint32_t)mod_id) ? mod_id : (int)*module_id; /* stay in range */
192+
mod_id = (mod_id < 0) ? 0 : mod_id; /* in range */
193+
194+
*storage = available_components[mod_id].component_name;
195+
196+
(void) mca_base_component_var_register(c, param_name, param_doc,
197+
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
198+
info_level,
199+
MCA_BASE_VAR_SCOPE_READONLY, storage);
200+
module_name = *storage;
201+
mod_id = strtol(module_name, &endptr, 10);
202+
if( module_name == endptr ) { /* no conversion, maybe we got a module name instead */
203+
/* Convert module name to id */
204+
mod_id = mca_coll_han_component_name_to_id(module_name);
205+
}
206+
/* Keep the module in the range */
207+
*module_id = (mod_id < 0) ? 0 : mod_id;
208+
209+
return OMPI_SUCCESS;
210+
}
150211

151212
/*
152213
* Register MCA params
@@ -177,18 +238,17 @@ static int han_register(void)
177238
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_segsize);
178239

179240
cs->han_bcast_up_module = 0;
180-
(void) mca_base_component_var_register(c, "bcast_up_module",
181-
"up level module for bcast, 0 libnbc, 1 adapt",
182-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
183-
OPAL_INFO_LVL_9,
184-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_up_module);
241+
(void) mca_coll_han_query_module_from_mca(c, "bcast_up_module",
242+
"up level module for bcast, 0 libnbc, 1 adapt",
243+
OPAL_INFO_LVL_9, &cs->han_bcast_up_module,
244+
&cs->han_op_module_name.bcast.han_op_up_module_name);
185245

186246
cs->han_bcast_low_module = 0;
187-
(void) mca_base_component_var_register(c, "bcast_low_module",
188-
"low level module for bcast, 0 tuned, 1 sm",
189-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
190-
OPAL_INFO_LVL_9,
191-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_low_module);
247+
(void) mca_coll_han_query_module_from_mca(c, "bcast_low_module",
248+
"low level module for bcast, 0 tuned, 1 sm",
249+
OPAL_INFO_LVL_9,
250+
&cs->han_bcast_low_module,
251+
&cs->han_op_module_name.bcast.han_op_low_module_name);
192252

193253
cs->han_reduce_segsize = 65536;
194254
(void) mca_base_component_var_register(c, "reduce_segsize",
@@ -198,18 +258,17 @@ static int han_register(void)
198258
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_segsize);
199259

200260
cs->han_reduce_up_module = 0;
201-
(void) mca_base_component_var_register(c, "reduce_up_module",
202-
"up level module for allreduce, 0 libnbc, 1 adapt",
203-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
204-
OPAL_INFO_LVL_9,
205-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_up_module);
261+
(void) mca_coll_han_query_module_from_mca(c, "reduce_up_module",
262+
"up level module for allreduce, 0 libnbc, 1 adapt",
263+
OPAL_INFO_LVL_9, &cs->han_reduce_up_module,
264+
&cs->han_op_module_name.reduce.han_op_up_module_name);
206265

207266
cs->han_reduce_low_module = 0;
208-
(void) mca_base_component_var_register(c, "reduce_low_module",
209-
"low level module for allreduce, 0 tuned, 1 sm",
210-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
211-
OPAL_INFO_LVL_9,
212-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_low_module);
267+
(void) mca_coll_han_query_module_from_mca(c, "reduce_low_module",
268+
"low level module for allreduce, 0 tuned, 1 sm",
269+
OPAL_INFO_LVL_9, &cs->han_reduce_low_module,
270+
&cs->han_op_module_name.reduce.han_op_low_module_name);
271+
213272
cs->han_allreduce_segsize = 65536;
214273
(void) mca_base_component_var_register(c, "allreduce_segsize",
215274
"segment size for allreduce",
@@ -218,60 +277,52 @@ static int han_register(void)
218277
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_segsize);
219278

220279
cs->han_allreduce_up_module = 0;
221-
(void) mca_base_component_var_register(c, "allreduce_up_module",
222-
"up level module for allreduce, 0 libnbc, 1 adapt",
223-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
224-
OPAL_INFO_LVL_9,
225-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_up_module);
280+
(void) mca_coll_han_query_module_from_mca(c, "allreduce_up_module",
281+
"up level module for allreduce, 0 libnbc, 1 adapt",
282+
OPAL_INFO_LVL_9, &cs->han_allreduce_up_module,
283+
&cs->han_op_module_name.allreduce.han_op_up_module_name);
226284

227285
cs->han_allreduce_low_module = 0;
228-
(void) mca_base_component_var_register(c, "allreduce_low_module",
229-
"low level module for allreduce, 0 tuned, 1 sm",
230-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
231-
OPAL_INFO_LVL_9,
232-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_low_module);
286+
(void) mca_coll_han_query_module_from_mca(c, "allreduce_low_module",
287+
"low level module for allreduce, 0 tuned, 1 sm",
288+
OPAL_INFO_LVL_9, &cs->han_allreduce_low_module,
289+
&cs->han_op_module_name.allreduce.han_op_low_module_name);
233290

234291
cs->han_allgather_up_module = 0;
235-
(void) mca_base_component_var_register(c, "allgather_up_module",
236-
"up level module for allgather, 0 libnbc, 1 adapt",
237-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
238-
OPAL_INFO_LVL_9,
239-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allgather_up_module);
292+
(void) mca_coll_han_query_module_from_mca(c, "allgather_up_module",
293+
"up level module for allgather, 0 libnbc, 1 adapt",
294+
OPAL_INFO_LVL_9, &cs->han_allgather_up_module,
295+
&cs->han_op_module_name.allgather.han_op_up_module_name);
240296

241297
cs->han_allgather_low_module = 0;
242-
(void) mca_base_component_var_register(c, "allgather_low_module",
243-
"low level module for allgather, 0 tuned, 1 sm",
244-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
245-
OPAL_INFO_LVL_9,
246-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allgather_low_module);
298+
(void) mca_coll_han_query_module_from_mca(c, "allgather_low_module",
299+
"low level module for allgather, 0 tuned, 1 sm",
300+
OPAL_INFO_LVL_9, &cs->han_allgather_low_module,
301+
&cs->han_op_module_name.allgather.han_op_low_module_name);
247302

248303
cs->han_gather_up_module = 0;
249-
(void) mca_base_component_var_register(c, "gather_up_module",
250-
"up level module for gather, 0 libnbc, 1 adapt",
251-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
252-
OPAL_INFO_LVL_9,
253-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_up_module);
304+
(void) mca_coll_han_query_module_from_mca(c, "gather_up_module",
305+
"up level module for gather, 0 libnbc, 1 adapt",
306+
OPAL_INFO_LVL_9, &cs->han_gather_up_module,
307+
&cs->han_op_module_name.gather.han_op_up_module_name);
254308

255309
cs->han_gather_low_module = 0;
256-
(void) mca_base_component_var_register(c, "gather_low_module",
257-
"low level module for gather, 0 tuned, 1 sm",
258-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
259-
OPAL_INFO_LVL_9,
260-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_low_module);
310+
(void) mca_coll_han_query_module_from_mca(c, "gather_low_module",
311+
"low level module for gather, 0 tuned, 1 sm",
312+
OPAL_INFO_LVL_9, &cs->han_gather_low_module,
313+
&cs->han_op_module_name.gather.han_op_low_module_name);
261314

262315
cs->han_scatter_up_module = 0;
263-
(void) mca_base_component_var_register(c, "scatter_up_module",
264-
"up level module for scatter, 0 libnbc, 1 adapt",
265-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
266-
OPAL_INFO_LVL_9,
267-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_up_module);
316+
(void) mca_coll_han_query_module_from_mca(c, "scatter_up_module",
317+
"up level module for scatter, 0 libnbc, 1 adapt",
318+
OPAL_INFO_LVL_9, &cs->han_scatter_up_module,
319+
&cs->han_op_module_name.scatter.han_op_up_module_name);
268320

269321
cs->han_scatter_low_module = 0;
270-
(void) mca_base_component_var_register(c, "scatter_low_module",
271-
"low level module for scatter, 0 tuned, 1 sm",
272-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
273-
OPAL_INFO_LVL_9,
274-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_low_module);
322+
(void) mca_coll_han_query_module_from_mca(c, "scatter_low_module",
323+
"low level module for scatter, 0 tuned, 1 sm",
324+
OPAL_INFO_LVL_9, &cs->han_scatter_low_module,
325+
&cs->han_op_module_name.scatter.han_op_low_module_name);
275326

276327
cs->han_reproducible = 0;
277328
(void) mca_base_component_var_register(c, "reproducible",

0 commit comments

Comments
 (0)