Skip to content

Commit 5432c32

Browse files
committed
ofi: Refactor MCA vars / fix output level
The ofi common refactoring patches broke handling of the verbose flag because I forgot that the input variable has to live longer than the function (sigh). Move the output level variable to be a global variable. Add error checking to the entire mca registration path, since it was notably lacking. Even before the refactor, we didn't handle re-setting verbosity after registering synonyms properly. This patch probably isn't perfect in that regards, but at least is *consistent* in what we do. Signed-off-by: Brian Barrett <[email protected]>
1 parent c1a4c08 commit 5432c32

File tree

1 file changed

+60
-29
lines changed

1 file changed

+60
-29
lines changed

opal/mca/common/ofi/common_ofi.c

Lines changed: 60 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ opal_common_ofi_module_t opal_common_ofi = {.prov_include = NULL,
4040
.output = -1};
4141
static const char default_prov_exclude_list[] = "shm,sockets,tcp,udp,rstream,usnic";
4242
static opal_mutex_t opal_common_ofi_mutex = OPAL_MUTEX_STATIC_INIT;
43+
static int opal_common_ofi_verbose_level = 0;
4344
static int opal_common_ofi_init_ref_cnt = 0;
4445
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
4546
static bool opal_common_ofi_installed_memory_monitor = false;
@@ -264,20 +265,18 @@ int opal_common_ofi_is_in_list(char **list, char *item)
264265

265266
int opal_common_ofi_mca_register(const mca_base_component_t *component)
266267
{
267-
static int include_index;
268-
static int exclude_index;
269-
static int verbose_index;
270-
int verbose;
271-
int param;
268+
static int include_index = -1;
269+
static int exclude_index = -1;
270+
static int verbose_index = -1;
271+
int ret;
272272

273273
if (fi_version() < FI_VERSION(1, 0)) {
274274
return OPAL_ERROR;
275275
}
276276

277277
OPAL_THREAD_LOCK(&opal_common_ofi_mutex);
278278

279-
param = mca_base_var_find("opal", "opal_common", "ofi", "provider_include");
280-
if (0 > param) {
279+
if (0 > include_index) {
281280
/*
282281
* this monkey business is needed because of the way the MCA VARs stuff tries to handle
283282
* pointers to strings when when destructing the MCA var database. If you don't do
@@ -296,12 +295,13 @@ int opal_common_ofi_mca_register(const mca_base_component_t *component)
296295
"exclusive with mtl_ofi_provider_exclude.",
297296
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_1, MCA_BASE_VAR_SCOPE_READONLY,
298297
opal_common_ofi.prov_include);
299-
} else {
300-
include_index = param;
298+
if (0 > include_index) {
299+
ret = include_index;
300+
goto err;
301+
}
301302
}
302303

303-
param = mca_base_var_find("opal", "opal_common", "ofi", "provider_exclude");
304-
if (0 > param) {
304+
if (0 > exclude_index) {
305305
if (NULL == opal_common_ofi.prov_exclude) {
306306
opal_common_ofi.prov_exclude = (char **) malloc(sizeof(char *));
307307
assert(NULL != opal_common_ofi.prov_exclude);
@@ -314,42 +314,73 @@ int opal_common_ofi_mca_register(const mca_base_component_t *component)
314314
"exclusive with mtl_ofi_provider_include.",
315315
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_1, MCA_BASE_VAR_SCOPE_READONLY,
316316
opal_common_ofi.prov_exclude);
317-
} else {
318-
exclude_index = param;
317+
if (0 > exclude_index) {
318+
ret = exclude_index;
319+
goto err;
320+
}
319321
}
320322

321-
param = mca_base_var_find("opal", "opal_common", "ofi", "verbose");
322-
if (0 > param) {
323+
if (0 > verbose_index) {
323324
verbose_index = mca_base_var_register("opal", "opal_common", "ofi", "verbose",
324325
"Verbose level of the OFI components",
325326
MCA_BASE_VAR_TYPE_INT, NULL, 0,
326327
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
327328
MCA_BASE_VAR_SCOPE_LOCAL,
328-
&verbose);
329-
} else {
330-
verbose_index = param;
329+
&opal_common_ofi_verbose_level);
330+
if (0 > verbose_index) {
331+
ret = verbose_index;
332+
goto err;
333+
}
331334
}
332335

333336
if (component) {
334-
mca_base_var_register_synonym(include_index, component->mca_project_name,
335-
component->mca_type_name, component->mca_component_name,
336-
"provider_include", 0);
337-
mca_base_var_register_synonym(exclude_index, component->mca_project_name,
338-
component->mca_type_name, component->mca_component_name,
339-
"provider_exclude", 0);
340-
mca_base_var_register_synonym(verbose_index, component->mca_project_name,
341-
component->mca_type_name, component->mca_component_name,
342-
"verbose", 0);
337+
ret = mca_base_var_register_synonym(include_index,
338+
component->mca_project_name,
339+
component->mca_type_name,
340+
component->mca_component_name,
341+
"provider_include", 0);
342+
if (0 > ret) {
343+
goto err;
344+
}
345+
ret = mca_base_var_register_synonym(exclude_index,
346+
component->mca_project_name,
347+
component->mca_type_name,
348+
component->mca_component_name,
349+
"provider_exclude", 0);
350+
if (0 > ret) {
351+
goto err;
352+
}
353+
ret = mca_base_var_register_synonym(verbose_index,
354+
component->mca_project_name,
355+
component->mca_type_name,
356+
component->mca_component_name,
357+
"verbose", 0);
358+
if (0 > ret) {
359+
goto err;
360+
}
343361
}
344362

363+
/* The frameworks initialize their output streams during
364+
* register(), so we similarly try to initialize the output stream
365+
* as early as possible. Because we may register synonyms for
366+
* each dependent component, we don't necessarily have all the
367+
* data to set verbosity during the first call to
368+
* common_ofi_register(). The MCA infrastructure has rules on
369+
* synonym value evaluation, so our rubric is to re-set verbosity
370+
* after every call to register() (which has registered a new
371+
* synonym). This is not perfect, but it's not horrible, either.
372+
*/
345373
if (opal_common_ofi.output == -1) {
346374
opal_common_ofi.output = opal_output_open(NULL);
347-
opal_output_set_verbosity(opal_common_ofi.output, verbose);
348375
}
376+
opal_output_set_verbosity(opal_common_ofi.output, opal_common_ofi_verbose_level);
377+
378+
ret = OPAL_SUCCESS;
349379

380+
err:
350381
OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex);
351382

352-
return OPAL_SUCCESS;
383+
return ret;
353384
}
354385

355386
/* check that the tx attributes match */

0 commit comments

Comments
 (0)