17
17
#include "opal/mca/base/mca_base_framework.h"
18
18
#include "opal/mca/pmix/pmix-internal.h"
19
19
#include "opal/memoryhooks/memory.h"
20
+ #include "opal/util/argv.h"
20
21
21
22
#include <ucm/api/ucm.h>
23
+ #include <fnmatch.h>
24
+ #include <stdio.h>
22
25
23
26
/***********************************************************************/
24
27
@@ -28,7 +31,8 @@ opal_common_ucx_module_t opal_common_ucx = {
28
31
.verbose = 0 ,
29
32
.progress_iterations = 100 ,
30
33
.registered = 0 ,
31
- .opal_mem_hooks = 0
34
+ .opal_mem_hooks = 0 ,
35
+ .tls = NULL
32
36
};
33
37
34
38
static void opal_common_ucx_mem_release_cb (void * buf , size_t length ,
@@ -39,10 +43,15 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length,
39
43
40
44
OPAL_DECLSPEC void opal_common_ucx_mca_var_register (const mca_base_component_t * component )
41
45
{
46
+ static const char * default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,cuda_ipc,rocm_ipc" ;
47
+ static const char * default_devices = "mlx*" ;
42
48
static int registered = 0 ;
43
49
static int hook_index ;
44
50
static int verbose_index ;
45
51
static int progress_index ;
52
+ static int tls_index ;
53
+ static int devices_index ;
54
+
46
55
if (!registered ) {
47
56
verbose_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "verbose" ,
48
57
"Verbose level of the UCX components" ,
@@ -63,6 +72,29 @@ OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *
63
72
OPAL_INFO_LVL_3 ,
64
73
MCA_BASE_VAR_SCOPE_LOCAL ,
65
74
& opal_common_ucx .opal_mem_hooks );
75
+
76
+ opal_common_ucx .tls = malloc (sizeof (* opal_common_ucx .tls ));
77
+ * opal_common_ucx .tls = strdup (default_tls );
78
+ tls_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "tls" ,
79
+ "List of UCX transports which should be supported on the system, to enable "
80
+ "selecting the UCX component. Special values: any (any available). "
81
+ "A '^' prefix negates the list. "
82
+ "For example, in order to exclude on shared memory and TCP transports, "
83
+ "please set to '^posix,sysv,self,tcp,cma,knem,xpmem'." ,
84
+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
85
+ OPAL_INFO_LVL_3 ,
86
+ MCA_BASE_VAR_SCOPE_LOCAL ,
87
+ opal_common_ucx .tls );
88
+
89
+ opal_common_ucx .devices = malloc (sizeof (* opal_common_ucx .devices ));
90
+ * opal_common_ucx .devices = strdup (default_devices );
91
+ devices_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "devices" ,
92
+ "List of device driver pattern names, which, if supported by UCX, will "
93
+ "bump its priority above ob1. Special values: any (any available)" ,
94
+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
95
+ OPAL_INFO_LVL_3 ,
96
+ MCA_BASE_VAR_SCOPE_LOCAL ,
97
+ opal_common_ucx .devices );
66
98
registered = 1 ;
67
99
}
68
100
if (component ) {
@@ -78,6 +110,14 @@ OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *
78
110
component -> mca_type_name ,
79
111
component -> mca_component_name ,
80
112
"opal_mem_hooks" , 0 );
113
+ mca_base_var_register_synonym (tls_index , component -> mca_project_name ,
114
+ component -> mca_type_name ,
115
+ component -> mca_component_name ,
116
+ "tls" , 0 );
117
+ mca_base_var_register_synonym (devices_index , component -> mca_project_name ,
118
+ component -> mca_type_name ,
119
+ component -> mca_component_name ,
120
+ "devices" , 0 );
81
121
}
82
122
}
83
123
@@ -126,6 +166,166 @@ OPAL_DECLSPEC void opal_common_ucx_mca_deregister(void)
126
166
opal_output_close (opal_common_ucx .output );
127
167
}
128
168
169
+ #if HAVE_DECL_OPEN_MEMSTREAM
170
+ static bool opal_common_ucx_check_device (const char * device_name , char * * device_list )
171
+ {
172
+ char sysfs_driver_link [PATH_MAX ];
173
+ char driver_path [PATH_MAX ];
174
+ char * ib_device_name ;
175
+ char * driver_name ;
176
+ char * * list_item ;
177
+ ssize_t ret ;
178
+
179
+ /* mlx5_0:1 */
180
+ ret = sscanf (device_name , "%m[^:]%*d" , & ib_device_name );
181
+ if (ret != 1 ) {
182
+ return false;
183
+ }
184
+
185
+ sysfs_driver_link [sizeof (sysfs_driver_link ) - 1 ] = '\0' ;
186
+ snprintf (sysfs_driver_link , sizeof (sysfs_driver_link ) - 1 ,
187
+ "/sys/class/infiniband/%s/device/driver" , ib_device_name );
188
+ free (ib_device_name );
189
+
190
+ driver_path [sizeof (driver_path ) - 1 ] = '\0' ;
191
+ ret = readlink (sysfs_driver_link , driver_path , sizeof (driver_path ) - 1 );
192
+ if (ret < 0 ) {
193
+ MCA_COMMON_UCX_VERBOSE (2 , "readlink(%s) failed: %s" , sysfs_driver_link ,
194
+ strerror (errno ));
195
+ return false;
196
+ }
197
+
198
+ driver_name = basename (driver_path );
199
+ for (list_item = device_list ; * list_item != NULL ; ++ list_item ) {
200
+ if (!fnmatch (* list_item , driver_name , 0 )) {
201
+ MCA_COMMON_UCX_VERBOSE (2 , "driver '%s' matched by '%s'" ,
202
+ driver_path , * list_item );
203
+ return true;
204
+ }
205
+ }
206
+
207
+ return false;
208
+ }
209
+ #endif
210
+
211
+ OPAL_DECLSPEC opal_common_ucx_support_level_t
212
+ opal_common_ucx_support_level (ucp_context_h context )
213
+ {
214
+ opal_common_ucx_support_level_t support_level = OPAL_COMMON_UCX_SUPPORT_NONE ;
215
+ static const char * support_level_names [] = {
216
+ [OPAL_COMMON_UCX_SUPPORT_NONE ] = "none" ,
217
+ [OPAL_COMMON_UCX_SUPPORT_TRANSPORT ] = "transports only" ,
218
+ [OPAL_COMMON_UCX_SUPPORT_DEVICE ] = "transports and devices"
219
+ };
220
+ #if HAVE_DECL_OPEN_MEMSTREAM
221
+ char * rsc_tl_name , * rsc_device_name ;
222
+ char * * tl_list , * * device_list , * * list_item ;
223
+ bool is_any_tl , is_any_device ;
224
+ bool found_tl , negate ;
225
+ char line [128 ];
226
+ FILE * stream ;
227
+ char * buffer ;
228
+ size_t size ;
229
+ int ret ;
230
+ #endif
231
+
232
+ is_any_tl = !strcmp (* opal_common_ucx .tls , "any" );
233
+ is_any_device = !strcmp (* opal_common_ucx .devices , "any" );
234
+
235
+ /* Check for special value "any" */
236
+ if (is_any_tl && is_any_device ) {
237
+ MCA_COMMON_UCX_VERBOSE (1 , "ucx is enabled on any transport or device" ,
238
+ * opal_common_ucx .tls );
239
+ support_level = OPAL_COMMON_UCX_SUPPORT_DEVICE ;
240
+ goto out ;
241
+ }
242
+
243
+ #if HAVE_DECL_OPEN_MEMSTREAM
244
+ /* Split transports list */
245
+ negate = ('^' == (* opal_common_ucx .tls )[0 ]);
246
+ tl_list = opal_argv_split (* opal_common_ucx .tls + (negate ? 1 : 0 ), ',' );
247
+ if (tl_list == NULL ) {
248
+ MCA_COMMON_UCX_VERBOSE (1 , "failed to split tl list '%s', ucx is disabled" ,
249
+ * opal_common_ucx .tls );
250
+ goto out ;
251
+ }
252
+
253
+ /* Split devices list */
254
+ device_list = opal_argv_split (* opal_common_ucx .devices , ',' );
255
+ if (device_list == NULL ) {
256
+ MCA_COMMON_UCX_VERBOSE (1 , "failed to split devices list '%s', ucx is disabled" ,
257
+ * opal_common_ucx .devices );
258
+ goto out_free_tl_list ;
259
+ }
260
+
261
+ /* Open memory stream to dump UCX information to */
262
+ stream = open_memstream (& buffer , & size );
263
+ if (stream == NULL ) {
264
+ MCA_COMMON_UCX_VERBOSE (1 , "failed to open memory stream for ucx info (%s), "
265
+ "ucx is disabled" , strerror (errno ));
266
+ goto out_free_device_list ;
267
+ }
268
+
269
+ /* Print ucx transports information to the memory stream */
270
+ ucp_context_print_info (context , stream );
271
+
272
+ /* Rewind and read transports/devices list from the stream */
273
+ fseek (stream , 0 , SEEK_SET );
274
+ while ((support_level != OPAL_COMMON_UCX_SUPPORT_DEVICE ) &&
275
+ (fgets (line , sizeof (line ), stream ) != NULL )) {
276
+ rsc_tl_name = NULL ;
277
+ ret = sscanf (line ,
278
+ /* "# resource 6 : md 5 dev 4 flags -- rc_verbs/mlx5_0:1" */
279
+ "# resource %*d : md %*d dev %*d flags -- %m[^/ \n\r]/%m[^/ \n\r]" ,
280
+ & rsc_tl_name , & rsc_device_name );
281
+ if (ret != 2 ) {
282
+ free (rsc_tl_name );
283
+ continue ;
284
+ }
285
+
286
+ /* Check if 'rsc_tl_name' is found provided list */
287
+ found_tl = is_any_tl ;
288
+ for (list_item = tl_list ; !found_tl && (* list_item != NULL ); ++ list_item ) {
289
+ found_tl = !strcmp (* list_item , rsc_tl_name );
290
+ }
291
+
292
+ /* Check if the transport has a match (either positive or negative) */
293
+ assert (!(is_any_tl && negate ));
294
+ if (found_tl != negate ) {
295
+ if (is_any_device ||
296
+ opal_common_ucx_check_device (rsc_device_name , device_list )) {
297
+ MCA_COMMON_UCX_VERBOSE (2 , "%s/%s: matched both transport and device list" ,
298
+ rsc_tl_name , rsc_device_name );
299
+ support_level = OPAL_COMMON_UCX_SUPPORT_DEVICE ;
300
+ } else {
301
+ MCA_COMMON_UCX_VERBOSE (2 , "%s/%s: matched transport list but not device list" ,
302
+ rsc_tl_name , rsc_device_name );
303
+ support_level = OPAL_COMMON_UCX_SUPPORT_TRANSPORT ;
304
+ }
305
+ } else {
306
+ MCA_COMMON_UCX_VERBOSE (2 , "%s/%s: did not match transport list" ,
307
+ rsc_tl_name , rsc_device_name );
308
+ }
309
+
310
+ free (rsc_device_name );
311
+ free (rsc_tl_name );
312
+ }
313
+
314
+ MCA_COMMON_UCX_VERBOSE (2 , "support level is %s" , support_level_names [support_level ]);
315
+ fclose (stream );
316
+ free (buffer );
317
+
318
+ out_free_device_list :
319
+ opal_argv_free (device_list );
320
+ out_free_tl_list :
321
+ opal_argv_free (tl_list );
322
+ out :
323
+ #else
324
+ MCA_COMMON_UCX_VERBOSE (2 , "open_memstream() was not found, ucx is disabled" );
325
+ #endif
326
+ return support_level ;
327
+ }
328
+
129
329
void opal_common_ucx_empty_complete_cb (void * request , ucs_status_t status )
130
330
{
131
331
}
0 commit comments