Skip to content

Commit 47fd231

Browse files
hjelmnRalph Castain
authored and
Ralph Castain
committed
btl/vader: move backing files into /dev/shm on Linux
This commit moves the backing files to /dev/shm to avoid limitations that may be set on /tmp. The files are registered with pmix to ensure they are cleaned up after an erroneous exit. Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from commit 4810127)
1 parent 07427c6 commit 47fd231

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

opal/mca/btl/vader/btl_vader.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
1414
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
15-
* Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
1818
*
@@ -136,6 +136,8 @@ struct mca_btl_vader_component_t {
136136
opal_list_t pending_endpoints; /**< list of endpoints with pending fragments */
137137
opal_list_t pending_fragments; /**< fragments pending remote completion */
138138

139+
char *backing_directory; /**< directory to place shared memory backing files */
140+
139141
/* knem stuff */
140142
#if OPAL_BTL_VADER_HAVE_KNEM
141143
unsigned int knem_dma_min; /**< minimum size to enable DMA for knem transfers (0 disables) */

opal/mca/btl/vader/btl_vader_component.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
1414
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
15-
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
15+
* Copyright (c) 2010-2017 Los Alamos National Security, LLC.
1616
* All rights reserved.
1717
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
1818
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
@@ -211,6 +211,19 @@ static int mca_btl_vader_component_register (void)
211211
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_GROUP, &mca_btl_vader_component.single_copy_mechanism);
212212
OBJ_RELEASE(new_enum);
213213

214+
if (0 == access ("/dev/shm", W_OK)) {
215+
mca_btl_vader_component.backing_directory = "/dev/shm";
216+
} else {
217+
mca_btl_vader_component.backing_directory = opal_process_info.proc_session_dir;
218+
}
219+
(void) mca_base_component_var_register (&mca_btl_vader_component.super.btl_version, "backing_directory",
220+
"Directory to place backing files for shared memory communication. "
221+
"This directory should be on a local filesystem such as /tmp or "
222+
"/dev/shm (default: (linux) /dev/shm, (others) session directory)",
223+
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
224+
MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_vader_component.backing_directory);
225+
226+
214227
#if OPAL_BTL_VADER_HAVE_KNEM
215228
/* Currently disabling DMA mode by default; it's not clear that this is useful in all applications and architectures. */
216229
mca_btl_vader_component.knem_dma_min = 0;
@@ -491,13 +504,17 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
491504
if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
492505
char *sm_file;
493506

494-
rc = asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%d", opal_process_info.proc_session_dir,
507+
rc = asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%d", mca_btl_vader_component.backing_directory,
495508
opal_process_info.nodename, MCA_BTL_VADER_LOCAL_RANK);
496509
if (0 > rc) {
497510
free (btls);
498511
return NULL;
499512
}
500513

514+
if (NULL != opal_pmix.register_cleanup) {
515+
opal_pmix.register_cleanup (sm_file, false, false);
516+
}
517+
501518
rc = opal_shmem_segment_create (&component->seg_ds, sm_file, component->segment_size);
502519
free (sm_file);
503520
if (OPAL_SUCCESS != rc) {

0 commit comments

Comments
 (0)