Skip to content

Commit cb6a04e

Browse files
committed
smsc/xpmem: add single-copy support using XPMEM
This commit adds a new shared-memory single-copy component supporting Cray/SGI XPMEM. This component supports copy_to, copy_from, and memory mapping. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 099423e commit cb6a04e

File tree

7 files changed

+690
-0
lines changed

7 files changed

+690
-0
lines changed

opal/mca/smsc/xpmem/Makefile.am

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#
2+
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3+
# University Research and Technology
4+
# Corporation. All rights reserved.
5+
# Copyright (c) 2004-2009 The University of Tennessee and The University
6+
# of Tennessee Research Foundation. All rights
7+
# reserved.
8+
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
9+
# University of Stuttgart. All rights reserved.
10+
# Copyright (c) 2004-2005 The Regents of the University of California.
11+
# All rights reserved.
12+
# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved.
13+
# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
14+
# reserved.
15+
# Copyright (c) 2017 IBM Corporation. All rights reserved.
16+
# Copyright (c) 2020-2021 Google, LLC. All rights reserved.
17+
# $COPYRIGHT$
18+
#
19+
# Additional copyrights may follow
20+
#
21+
# $HEADER$
22+
#
23+
24+
EXTRA_DIST = post_configure.sh
25+
26+
AM_CPPFLAGS = $(smsc_xpmem_CPPFLAGS)
27+
28+
libmca_smsc_xpmem_la_sources = \
29+
smsc_xpmem_component.c \
30+
smsc_xpmem_module.c \
31+
smsc_xpmem_internal.h \
32+
smsc_xpmem.h
33+
34+
# Make the output library in this directory, and name it either
35+
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
36+
# (for static builds).
37+
38+
if MCA_BUILD_opal_smsc_xpmem_DSO
39+
component_noinst =
40+
component_install = mca_smsc_xpmem.la
41+
else
42+
component_noinst = libmca_smsc_xpmem.la
43+
component_install =
44+
endif
45+
46+
mcacomponentdir = $(opallibdir)
47+
mcacomponent_LTLIBRARIES = $(component_install)
48+
mca_smsc_xpmem_la_SOURCES = $(libmca_smsc_xpmem_la_sources)
49+
mca_smsc_xpmem_la_LDFLAGS = -module -avoid-version $(smsc_xpmem_LDFLAGS)
50+
mca_smsc_xpmem_la_LIBADD = $(top_builddir)/opal/lib@[email protected] \
51+
$(smsc_xpmem_LIBS)
52+
53+
noinst_LTLIBRARIES = $(component_noinst)
54+
libmca_smsc_xpmem_la_SOURCES = $(libmca_smsc_xpmem_la_sources)
55+
libmca_smsc_xpmem_la_LIBADD = $(smsc_xpmem_LIBS)
56+
libmca_smsc_xpmem_la_LDFLAGS = -module -avoid-version $(smsc_xpmem_LDFLAGS)

opal/mca/smsc/xpmem/configure.m4

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# -*- shell-script -*-
2+
#
3+
# Copyright (c) 2009 The University of Tennessee and The University
4+
# of Tennessee Research Foundation. All rights
5+
# reserved.
6+
# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
7+
# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
8+
# reserved.
9+
# Copyright (c) 2015 Research Organization for Information Science
10+
# and Technology (RIST). All rights reserved.
11+
# Copyright (c) 2021 Google, LLC. All rights reserved.
12+
# $COPYRIGHT$
13+
#
14+
# Additional copyrights may follow
15+
#
16+
# $HEADER$
17+
#
18+
19+
# MCA_smsc_xpmem_CONFIG([action-if-can-compile],
20+
# [action-if-cant-compile])
21+
# ------------------------------------------------
22+
AC_DEFUN([MCA_opal_smsc_xpmem_CONFIG],[
23+
AC_CONFIG_FILES([opal/mca/smsc/xpmem/Makefile])
24+
25+
OPAL_CHECK_XPMEM([smsc_xpmem], [$1], [$2])
26+
27+
AC_SUBST([smsc_xpmem_CFLAGS])
28+
AC_SUBST([smsc_xpmem_CPPFLAGS])
29+
AC_SUBST([smsc_xpmem_LDFLAGS])
30+
AC_SUBST([smsc_xpmem_LIBS])
31+
])dnl

opal/mca/smsc/xpmem/post_configure.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DIRECT_CALL_HEADER="opal/mca/smsc/xpmem/smsc_xpmem.h"

opal/mca/smsc/xpmem/smsc_xpmem.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2021 Google, Inc. All rights reserved.
4+
* $COPYRIGHT$
5+
*
6+
* Additional copyrights may follow
7+
*
8+
* $HEADER$
9+
*/
10+
11+
#ifndef OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_H
12+
#define OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_H
13+
14+
#include "opal_config.h"
15+
16+
#include "opal/mca/smsc/smsc.h"
17+
18+
mca_smsc_endpoint_t *mca_smsc_xpmem_get_endpoint(opal_proc_t *peer_proc);
19+
void mca_smsc_xpmem_return_endpoint(mca_smsc_endpoint_t *endpoint);
20+
21+
int mca_smsc_xpmem_copy_to(mca_smsc_endpoint_t *endpoint, void *local_address, void *remote_address,
22+
size_t size, void *reg_handle);
23+
24+
int mca_smsc_xpmem_copy_from(mca_smsc_endpoint_t *endpoint, void *local_address,
25+
void *remote_address, size_t size, void *reg_handle);
26+
27+
/**
28+
* @brief Map a peer memory region into this processes address space.
29+
*
30+
* See the description in smsc.h.
31+
*
32+
* Caveats: XPMEM does not support futex operations within the region. Attempts to wake the
33+
* process owning the mutex will result in an EFAULT error code.
34+
*/
35+
void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t flags,
36+
void *remote_ptr, size_t size, void **local_ptr);
37+
void mca_smsc_xpmem_unmap_peer_region(void *ctx);
38+
39+
/* unsupported interfaces defined to support MCA direct */
40+
void *mca_smsc_xpmem_register_region(void *local_address, size_t size);
41+
void mca_smsc_xpmem_deregister_region(void *reg_data);
42+
43+
#endif /* OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_H */
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2021 Google, Inc. All rights reserved.
4+
* $COPYRIGHT$
5+
*
6+
* Additional copyrights may follow
7+
*
8+
* $HEADER$
9+
*/
10+
#include "opal_config.h"
11+
12+
#include "opal/mca/smsc/base/base.h"
13+
#include "opal/mca/smsc/xpmem/smsc_xpmem_internal.h"
14+
#include "opal/util/minmax.h"
15+
16+
#include <fcntl.h>
17+
#include <stdio.h>
18+
#include <sys/prctl.h>
19+
#include <sys/stat.h>
20+
#include <sys/types.h>
21+
#include <unistd.h>
22+
23+
static int mca_smsc_xpmem_component_register(void);
24+
static int mca_smsc_xpmem_component_open(void);
25+
static int mca_smsc_xpmem_component_close(void);
26+
static int mca_smsc_xpmem_component_query(void);
27+
static mca_smsc_module_t *mca_smsc_xpmem_component_enable(void);
28+
29+
#define MCA_SMSC_XPMEM_DEFAULT_PRIORITY 42
30+
static const int mca_smsc_xpmem_default_priority = MCA_SMSC_XPMEM_DEFAULT_PRIORITY;
31+
32+
mca_smsc_xpmem_component_t mca_smsc_xpmem_component = {
33+
.super = {
34+
.smsc_version = {
35+
MCA_SMSC_DEFAULT_VERSION("xpmem"),
36+
.mca_open_component = mca_smsc_xpmem_component_open,
37+
.mca_close_component = mca_smsc_xpmem_component_close,
38+
.mca_register_component_params = mca_smsc_xpmem_component_register,
39+
},
40+
.priority = MCA_SMSC_XPMEM_DEFAULT_PRIORITY,
41+
.query = mca_smsc_xpmem_component_query,
42+
.enable = mca_smsc_xpmem_component_enable,
43+
},
44+
};
45+
46+
static int mca_smsc_xpmem_component_register(void)
47+
{
48+
mca_smsc_xpmem_component.log_attach_align = 23;
49+
(void) mca_base_component_var_register(&mca_smsc_xpmem_component.super.smsc_version,
50+
"log_align",
51+
"Log base 2 of the alignment to use for xpmem "
52+
"segments (default: 23, minimum: 12, maximum: 25)",
53+
MCA_BASE_VAR_TYPE_INT, /*enumerator=*/NULL, /*bind=*/0,
54+
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
55+
MCA_BASE_VAR_SCOPE_LOCAL,
56+
&mca_smsc_xpmem_component.log_attach_align);
57+
58+
mca_smsc_xpmem_component.memcpy_chunk_size = 262144;
59+
(void) mca_base_component_var_register(
60+
&mca_smsc_xpmem_component.super.smsc_version, "memcpy_chunk_size",
61+
"Maximum size to copy with a single call to memcpy. On some systems a smaller or larger "
62+
"number may provide better performance (default: 256k)",
63+
MCA_BASE_VAR_TYPE_UINT64_T, /*enumerator=*/NULL, /*bind=*/0, MCA_BASE_VAR_FLAG_SETTABLE,
64+
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_smsc_xpmem_component.memcpy_chunk_size);
65+
66+
mca_smsc_base_register_default_params(&mca_smsc_xpmem_component.super,
67+
mca_smsc_xpmem_default_priority);
68+
return OPAL_SUCCESS;
69+
}
70+
71+
static int mca_smsc_xpmem_component_open(void)
72+
{
73+
/* nothing to do */
74+
return OPAL_SUCCESS;
75+
}
76+
77+
static int mca_smsc_xpmem_component_close(void)
78+
{
79+
if (mca_smsc_xpmem_module.vma_module) {
80+
OBJ_RELEASE(mca_smsc_xpmem_module.vma_module);
81+
}
82+
83+
return OPAL_SUCCESS;
84+
}
85+
86+
static int mca_smsc_xpmem_send_modex(void)
87+
{
88+
mca_smsc_xpmem_modex_t modex;
89+
90+
modex.seg_id = mca_smsc_xpmem_component.my_seg_id;
91+
modex.address_max = mca_smsc_xpmem_component.my_address_max;
92+
93+
int rc;
94+
OPAL_MODEX_SEND(rc, PMIX_LOCAL, &mca_smsc_xpmem_component.super.smsc_version, &modex,
95+
sizeof(modex));
96+
return rc;
97+
}
98+
99+
static int mca_smsc_xpmem_component_query(void)
100+
{
101+
/* Any attachment that goes past the Linux TASK_SIZE will always fail. To prevent this we need
102+
* to determine the value of TASK_SIZE. On x86_64 the value was hard-coded in sm to be
103+
* 0x7ffffffffffful but this approach does not work with AARCH64 (and possibly other
104+
* architectures). Since there is really no way to directly determine the value we can (in all
105+
* cases?) look through the mapping for this process to determine what the largest address is.
106+
* This should be the top of the stack. No heap allocations should be larger than this value.
107+
* Since the largest address may differ between processes the value must be shared as part of
108+
* the modex and stored in the endpoint. */
109+
FILE *fh = fopen("/proc/self/maps", "r");
110+
if (NULL == fh) {
111+
opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output,
112+
"mca_smsc_xpmem_component_query: could not open /proc/self/maps for "
113+
"reading. disabling XPMEM");
114+
return OPAL_ERR_NOT_AVAILABLE;
115+
}
116+
117+
char buffer[1024];
118+
uintptr_t address_max = 0;
119+
while (fgets(buffer, sizeof(buffer), fh)) {
120+
uintptr_t low, high;
121+
char *tmp;
122+
/* each line of /proc/self/maps starts with low-high in hexidecimal (without a 0x) */
123+
low = strtoul(buffer, &tmp, 16);
124+
high = strtoul(tmp + 1, NULL, 16);
125+
if (address_max < high) {
126+
address_max = high;
127+
}
128+
}
129+
130+
fclose(fh);
131+
132+
if (0 == address_max) {
133+
opal_output_verbose(MCA_BASE_VERBOSE_COMPONENT, opal_smsc_base_framework.framework_output,
134+
"mca_smsc_xpmem_component_query: could not determine the address max");
135+
return OPAL_ERR_NOT_AVAILABLE;
136+
}
137+
138+
/* save the calcuated maximum */
139+
mca_smsc_xpmem_component.my_address_max = address_max - 1;
140+
141+
/* it is safe to use XPMEM_MAXADDR_SIZE here (which is always (size_t)-1 even though
142+
* it is not safe for attach */
143+
mca_smsc_xpmem_component.my_seg_id = xpmem_make(0, XPMEM_MAXADDR_SIZE, XPMEM_PERMIT_MODE,
144+
(void *) 0666);
145+
if (-1 == mca_smsc_xpmem_component.my_seg_id) {
146+
return OPAL_ERR_NOT_AVAILABLE;
147+
}
148+
149+
mca_smsc_xpmem_send_modex();
150+
151+
return OPAL_SUCCESS;
152+
}
153+
154+
static mca_smsc_module_t *mca_smsc_xpmem_component_enable(void)
155+
{
156+
if (0 > mca_smsc_xpmem_component.super.priority) {
157+
return NULL;
158+
}
159+
160+
/* limit segment alignment to be between 4k and 16M */
161+
mca_smsc_xpmem_component.log_attach_align
162+
= opal_min(opal_max(mca_smsc_xpmem_component.log_attach_align, 12), 25);
163+
164+
mca_smsc_xpmem_module.vma_module = mca_rcache_base_vma_module_alloc();
165+
166+
return &mca_smsc_xpmem_module.super;
167+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2021 Google, Inc. All rights reserved.
4+
* $COPYRIGHT$
5+
*
6+
* Additional copyrights may follow
7+
*
8+
* $HEADER$
9+
*/
10+
11+
#ifndef OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_INTERNAL_H
12+
#define OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_INTERNAL_H
13+
14+
#include "opal/mca/smsc/xpmem/smsc_xpmem.h"
15+
16+
#include "opal/mca/rcache/base/rcache_base_vma.h"
17+
#if defined(HAVE_XPMEM_H)
18+
# include <xpmem.h>
19+
20+
typedef struct xpmem_addr xpmem_addr_t;
21+
#elif defined(HAVE_SN_XPMEM_H)
22+
# include <sn/xpmem.h>
23+
24+
typedef int64_t xpmem_segid_t;
25+
typedef int64_t xpmem_apid_t;
26+
#endif
27+
28+
typedef struct xpmem_addr xpmem_addr_t;
29+
30+
struct mca_smsc_xpmem_modex_t {
31+
/** XPMEM segment id for this peer */
32+
xpmem_segid_t seg_id;
33+
/** maximum address we can attach to on this peer */
34+
uintptr_t address_max;
35+
};
36+
37+
typedef struct mca_smsc_xpmem_modex_t mca_smsc_xpmem_modex_t;
38+
39+
struct mca_smsc_xpmem_endpoint_t {
40+
mca_smsc_endpoint_t super;
41+
/** XPMEM apid for this peer */
42+
xpmem_apid_t apid;
43+
/** maximum address we can attach to on this peer */
44+
uintptr_t address_max;
45+
};
46+
47+
typedef struct mca_smsc_xpmem_endpoint_t mca_smsc_xpmem_endpoint_t;
48+
49+
OBJ_CLASS_DECLARATION(mca_smsc_xpmem_endpoint_t);
50+
51+
struct mca_smsc_xpmem_component_t {
52+
mca_smsc_component_t super;
53+
54+
/** maximum attachment address for this process. attempts to attach past this value may fail. */
55+
uintptr_t my_address_max;
56+
/** XPMEM segment id for this process */
57+
xpmem_segid_t my_seg_id;
58+
/** log base 2 of the attachment alignment. this controls how big the smallest attachment is. a
59+
* larger value will produce fewer entries in the cache but will increase attachment time. */
60+
unsigned int log_attach_align;
61+
/** maximum size that will be used with a single memcpy call. on some systems we see better
62+
* peformance if we chunk the copy into multiple memcpy calls. */
63+
uint64_t memcpy_chunk_size;
64+
};
65+
66+
typedef struct mca_smsc_xpmem_component_t mca_smsc_xpmem_component_t;
67+
68+
struct mca_smsc_xpmem_module_t {
69+
mca_smsc_module_t super;
70+
71+
/** cache of xpmem attachments. this cache holds attachments for all peers. the registrations
72+
* are differentiated by the alloc_base which is set to the endpoint. */
73+
mca_rcache_base_vma_module_t *vma_module;
74+
};
75+
76+
typedef struct mca_smsc_xpmem_module_t mca_smsc_xpmem_module_t;
77+
78+
extern mca_smsc_xpmem_module_t mca_smsc_xpmem_module;
79+
extern mca_smsc_xpmem_component_t mca_smsc_xpmem_component;
80+
81+
#endif /* OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_INTERNAL_H */

0 commit comments

Comments
 (0)