Skip to content

Commit 6fb81f2

Browse files
committed
mtl/psm2: create mca variables to shadow PSM2 environment variables
This commit enables MCA support for the following PSM2 environment variables: PSM2_DEVICES, PSM2_MEMORY, PSM2_MQ_SENDREQS_MAX, PSM2_MQ_RECVREQS_MAX, PSM2_MQ_RNDV_HFI_THRESH, PSM2_MQ_RNDV_SHM_THRESH, PSM2_RCVTHREAD, PSM2_SHAREDCONTEXTS, PSM2_SHAREDCONTEXTS_MAX, and PSM2_TRACEMASK. These variable can be set by MCA if they are not already set in the environment. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent ccf1780 commit 6fb81f2

File tree

2 files changed

+155
-29
lines changed

2 files changed

+155
-29
lines changed

ompi/mca/mtl/psm2/mtl_psm2_component.c

Lines changed: 142 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
14-
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
15-
* All rights reserved.
14+
* Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
15+
* reserved.
1616
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
1717
* Copyright (c) 2017 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
@@ -77,9 +77,129 @@ mca_mtl_psm2_component_t mca_mtl_psm2_component = {
7777
}
7878
};
7979

80+
struct ompi_mtl_psm2_shadow_variable {
81+
int variable_type;
82+
void *storage;
83+
mca_base_var_storage_t default_value;
84+
const char *env_name;
85+
mca_base_var_info_lvl_t info_level;
86+
const char *mca_name;
87+
const char *description;
88+
};
89+
90+
struct ompi_mtl_psm2_shadow_variable ompi_mtl_psm2_shadow_variables[] = {
91+
{MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_devices, {.stringval = "self,shm,hfi"}, "PSM2_DEVICES", OPAL_INFO_LVL_3,
92+
"devices", "Comma-delimited list of PSM2 devices. Valid values: self, shm, hfi (default: self,shm,hfi)"},
93+
{MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_memory, {.stringval = "normal"}, "PSM2_MEMORY", OPAL_INFO_LVL_9,
94+
"memory_model", "PSM2 memory usage mode (default: normal)"},
95+
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_sendreqs_max, {.ulval = 1048576}, "PSM2_MQ_SENDREQS_MAX", OPAL_INFO_LVL_3,
96+
"mq_sendreqs_max", "PSM2 maximum number of isend requests in flight (default: 1M)"},
97+
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_recvreqs_max, {.ulval = 1048576}, "PSM2_MQ_RECVREQS_MAX", OPAL_INFO_LVL_3,
98+
"mq_recvreqs_max", "PSM2 maximum number of irecv requests in flight (default: 1M)"},
99+
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_hfi_threshold, {.ulval = 64000}, "PSM2_MQ_RNDV_HFI_THRESH", OPAL_INFO_LVL_3,
100+
"hfi_eager_limit", "PSM2 eager to rendezvous threshold (default: 64000)"},
101+
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_shm_threshold, {.ulval = 16000}, "PSM2_MQ_RNDV_SHM_THRESH", OPAL_INFO_LVL_3,
102+
"shm_eager_limit", "PSM2 shared memory eager to rendezvous threshold (default: 16000)"},
103+
{MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_recvthread, {.boolval = true}, "PSM2_RCVTHREAD", OPAL_INFO_LVL_3,
104+
"use_receive_thread", "Use PSM2 progress thread (default: true)"},
105+
{MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_shared_contexts, {.boolval = true}, "PSM2_SHAREDCONTEXTS", OPAL_INFO_LVL_6,
106+
"use_shared_contexts", "Share PSM contexts between MPI processes (default: true)"},
107+
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_shared_contexts_max, {.ulval = 8}, "PSM2_SHAREDCONTEXTS_MAX", OPAL_INFO_LVL_9,
108+
"max_shared_contexts", "Maximum number of contexts available on a node (default: 8, max: 8)"},
109+
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_tracemask, {.ulval = 1}, "PSM2_TRACEMASK", OPAL_INFO_LVL_9,
110+
"trace_mask", "PSM2 tracemask value. See PSM2 documentation for accepted values (default: 1)"},
111+
{-1},
112+
};
113+
114+
static void ompi_mtl_psm2_set_shadow_env (struct ompi_mtl_psm2_shadow_variable *variable)
115+
{
116+
mca_base_var_storage_t *storage = variable->storage;
117+
char *env_value;
118+
int ret;
119+
120+
switch (variable->variable_type) {
121+
case MCA_BASE_VAR_TYPE_BOOL:
122+
ret = asprintf (&env_value, "%s=%s", variable->env_name, storage->boolval ? "YES" : "NO");
123+
break;
124+
case MCA_BASE_VAR_TYPE_UNSIGNED_LONG:
125+
if (0 == strcmp (variable->env_name, "PSM2_TRACEMASK")) {
126+
/* PSM2 documentation shows the tracemask as a hexidecimal number. to be consitent
127+
* use hexidecimal here. */
128+
ret = asprintf (&env_value, "%s=0x%lx", variable->env_name, storage->ulval);
129+
} else {
130+
ret = asprintf (&env_value, "%s=%lu", variable->env_name, storage->ulval);
131+
}
132+
break;
133+
case MCA_BASE_VAR_TYPE_STRING:
134+
ret = asprintf (&env_value, "%s=%s", variable->env_name, storage->stringval);
135+
break;
136+
}
137+
138+
if (0 > ret) {
139+
fprintf (stderr, "ERROR setting PSM2 environment variable: %s\n", variable->env_name);
140+
} else {
141+
putenv (env_value);
142+
}
143+
}
144+
145+
static void ompi_mtl_psm2_register_shadow_env (struct ompi_mtl_psm2_shadow_variable *variable)
146+
{
147+
mca_base_var_storage_t *storage = variable->storage;
148+
char *env_value;
149+
150+
env_value = getenv (variable->env_name);
151+
switch (variable->variable_type) {
152+
case MCA_BASE_VAR_TYPE_BOOL:
153+
if (env_value) {
154+
int tmp;
155+
(void) mca_base_var_enum_bool.value_from_string (&mca_base_var_enum_bool, env_value, &tmp);
156+
storage->boolval = !!tmp;
157+
} else {
158+
storage->boolval = variable->default_value.boolval;
159+
}
160+
break;
161+
case MCA_BASE_VAR_TYPE_UNSIGNED_LONG:
162+
if (env_value) {
163+
storage->ulval = strtol (env_value, NULL, 0);
164+
} else {
165+
storage->ulval = variable->default_value.ulval;
166+
}
167+
break;
168+
case MCA_BASE_VAR_TYPE_STRING:
169+
if (env_value) {
170+
storage->stringval = env_value;
171+
} else {
172+
storage->stringval = variable->default_value.stringval;
173+
}
174+
break;
175+
}
176+
177+
(void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, variable->mca_name, variable->description,
178+
variable->variable_type, NULL, 0, 0, variable->info_level, MCA_BASE_VAR_SCOPE_READONLY,
179+
variable->storage);
180+
}
181+
182+
static int
183+
get_num_total_procs(int *out_ntp)
184+
{
185+
*out_ntp = (int)ompi_process_info.num_procs;
186+
return OMPI_SUCCESS;
187+
}
188+
189+
static int
190+
get_num_local_procs(int *out_nlp)
191+
{
192+
/* num_local_peers does not include us in
193+
* its calculation, so adjust for that */
194+
*out_nlp = (int)(1 + ompi_process_info.num_local_peers);
195+
return OMPI_SUCCESS;
196+
}
197+
80198
static int
81199
ompi_mtl_psm2_component_register(void)
82200
{
201+
int num_local_procs, num_total_procs;
202+
83203
ompi_mtl_psm2.connect_timeout = 180;
84204
(void) mca_base_component_var_register(&mca_mtl_psm2_component.super.mtl_version,
85205
"connect_timeout",
@@ -89,15 +209,32 @@ ompi_mtl_psm2_component_register(void)
89209
MCA_BASE_VAR_SCOPE_READONLY,
90210
&ompi_mtl_psm2.connect_timeout);
91211

212+
213+
(void) get_num_local_procs(&num_local_procs);
214+
(void) get_num_total_procs(&num_total_procs);
215+
92216
/* set priority high enough to beat ob1's default (also set higher than psm) */
93-
param_priority = 40;
217+
if (num_local_procs == num_total_procs) {
218+
/* disable hfi if all processes are local */
219+
setenv("PSM2_DEVICES", "self,shm", 0);
220+
/* ob1 is much faster than psm2 with shared memory */
221+
param_priority = 10;
222+
} else {
223+
param_priority = 40;
224+
}
225+
94226
(void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version,
95227
"priority", "Priority of the PSM2 MTL component",
96228
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
97229
OPAL_INFO_LVL_9,
98230
MCA_BASE_VAR_SCOPE_READONLY,
99231
&param_priority);
100232

233+
234+
for (int i = 0 ; ompi_mtl_psm2_shadow_variables[i].variable_type >= 0 ; ++i) {
235+
ompi_mtl_psm2_register_shadow_env (ompi_mtl_psm2_shadow_variables + i);
236+
}
237+
101238
return OMPI_SUCCESS;
102239
}
103240

@@ -172,22 +309,6 @@ ompi_mtl_psm2_component_close(void)
172309
return OMPI_SUCCESS;
173310
}
174311

175-
static int
176-
get_num_total_procs(int *out_ntp)
177-
{
178-
*out_ntp = (int)ompi_process_info.num_procs;
179-
return OMPI_SUCCESS;
180-
}
181-
182-
static int
183-
get_num_local_procs(int *out_nlp)
184-
{
185-
/* num_local_peers does not include us in
186-
* its calculation, so adjust for that */
187-
*out_nlp = (int)(1 + ompi_process_info.num_local_peers);
188-
return OMPI_SUCCESS;
189-
}
190-
191312
static int
192313
get_local_rank(int *out_rank)
193314
{
@@ -211,7 +332,6 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
211332
int verno_major = PSM2_VERNO_MAJOR;
212333
int verno_minor = PSM2_VERNO_MINOR;
213334
int local_rank = -1, num_local_procs = 0;
214-
int num_total_procs = 0;
215335

216336
/* Compute the total number of processes on this host and our local rank
217337
* on that node. We need to provide PSM2 with these values so it can
@@ -226,11 +346,6 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
226346
opal_output(0, "Cannot determine local rank. Cannot continue.\n");
227347
return NULL;
228348
}
229-
if (OMPI_SUCCESS != get_num_total_procs(&num_total_procs)) {
230-
opal_output(0, "Cannot determine total number of processes. "
231-
"Cannot continue.\n");
232-
return NULL;
233-
}
234349

235350
err = psm2_error_register_handler(NULL /* no ep */,
236351
PSM2_ERRHANDLER_NOP);
@@ -240,8 +355,8 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
240355
return NULL;
241356
}
242357

243-
if (num_local_procs == num_total_procs) {
244-
setenv("PSM2_DEVICES", "self,shm", 0);
358+
for (int i = 0 ; ompi_mtl_psm2_shadow_variables[i].variable_type >= 0 ; ++i) {
359+
ompi_mtl_psm2_set_shadow_env (ompi_mtl_psm2_shadow_variables + i);
245360
}
246361

247362
err = psm2_init(&verno_major, &verno_minor);

ompi/mca/mtl/psm2/mtl_psm2_types.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -10,8 +11,8 @@
1011
* Copyright (c) 2004-2006 The Regents of the University of California.
1112
* All rights reserved.
1213
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
13-
* Copyright (c) 2011 Los Alamos National Security, LLC.
14-
* All rights reserved.
14+
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
15+
* reserved.
1516
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
1617
* $COPYRIGHT$
1718
*
@@ -49,6 +50,16 @@ struct mca_mtl_psm2_module_t {
4950
psm2_mq_t mq;
5051
psm2_epid_t epid;
5152
psm2_epaddr_t epaddr;
53+
char *psm2_devices;
54+
char *psm2_memory;
55+
unsigned long psm2_mq_sendreqs_max;
56+
unsigned long psm2_mq_recvreqs_max;
57+
unsigned long psm2_mq_rndv_hfi_threshold;
58+
unsigned long psm2_mq_rndv_shm_threshold;
59+
unsigned long psm2_shared_contexts_max;
60+
unsigned long psm2_tracemask;
61+
bool psm2_recvthread;
62+
bool psm2_shared_contexts;
5263
};
5364

5465
typedef struct mca_mtl_psm2_module_t mca_mtl_psm2_module_t;

0 commit comments

Comments
 (0)