Skip to content

Commit a754501

Browse files
authored
Merge pull request #2382 from hjelmn/vader
v2.x reduce vader XPMEM memory footprint
2 parents 6b85ef5 + 8a23df2 commit a754501

File tree

5 files changed

+118
-84
lines changed

5 files changed

+118
-84
lines changed

opal/mca/btl/vader/btl_vader.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "opal/sys/atomic.h"
4949
#include "opal/mca/btl/btl.h"
5050
#include "opal/mca/rcache/rcache.h"
51+
#include "opal/mca/rcache/base/rcache_base_vma.h"
5152
#include "opal/mca/btl/base/base.h"
5253
#include "opal/mca/rcache/rcache.h"
5354
#include "opal/mca/rcache/base/base.h"
@@ -103,6 +104,7 @@ struct mca_btl_vader_component_t {
103104
int vader_free_list_inc; /**< number of elements to alloc when growing free lists */
104105
#if OPAL_BTL_VADER_HAVE_XPMEM
105106
xpmem_segid_t my_seg_id; /**< this rank's xpmem segment id */
107+
mca_rcache_base_vma_module_t *vma_module; /**< registration cache for xpmem segments */
106108
#endif
107109
opal_shmem_ds_t seg_ds; /**< this rank's shared memory segment (when not using xpmem) */
108110

opal/mca/btl/vader/btl_vader_endpoint.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828

2929
#include "opal_config.h"
3030
#include "btl_vader_xpmem.h"
31-
#include "opal/mca/rcache/base/rcache_base_vma.h"
3231

3332
#define MCA_BTL_VADER_FBOX_ALIGNMENT 32
3433
#define MCA_BTL_VADER_FBOX_ALIGNMENT_MASK (MCA_BTL_VADER_FBOX_ALIGNMENT - 1)
@@ -75,7 +74,6 @@ typedef struct mca_btl_base_endpoint_t {
7574
union {
7675
#if OPAL_BTL_VADER_HAVE_XPMEM
7776
struct {
78-
mca_rcache_base_vma_module_t *vma_module;
7977
xpmem_apid_t apid; /**< xpmem apid for remote peer */
8078
} xpmem;
8179
#endif

opal/mca/btl/vader/btl_vader_module.c

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
145145
/* set flag indicating btl has been inited */
146146
vader_btl->btl_inited = true;
147147

148+
#if OPAL_BTL_VADER_HAVE_XPMEM
149+
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
150+
mca_btl_vader_component.vma_module = mca_rcache_base_vma_module_alloc ();
151+
}
152+
#endif
153+
148154
return OPAL_SUCCESS;
149155
}
150156

@@ -171,7 +177,6 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
171177
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
172178
/* always use xpmem if it is available */
173179
ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666);
174-
ep->segment_data.xpmem.vma_module = mca_rcache_base_vma_module_alloc ();
175180
(void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size,
176181
MCA_RCACHE_FLAGS_PERSIST, (void **) &ep->segment_base);
177182
} else {
@@ -354,6 +359,12 @@ static int vader_finalize(struct mca_btl_base_module_t *btl)
354359
opal_shmem_segment_detach (&mca_btl_vader_component.seg_ds);
355360
}
356361

362+
#if OPAL_BTL_VADER_HAVE_XPMEM
363+
if (NULL != mca_btl_vader_component.vma_module) {
364+
OBJ_RELEASE(mca_btl_vader_component.vma_module);
365+
}
366+
#endif
367+
357368
return OPAL_SUCCESS;
358369
}
359370

@@ -530,14 +541,6 @@ static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep)
530541
}
531542

532543
#if OPAL_BTL_VADER_HAVE_XPMEM
533-
static int mca_btl_vader_endpoint_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx)
534-
{
535-
mca_rcache_base_vma_module_t *vma_module = (mca_rcache_base_vma_module_t *) ctx;
536-
/* otherwise dereg will fail on assert */
537-
reg->ref_count = 0;
538-
(void) mca_rcache_base_vma_delete (vma_module, reg);
539-
return OPAL_SUCCESS;
540-
}
541544
#endif
542545

543546
static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
@@ -547,19 +550,7 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
547550

548551
#if OPAL_BTL_VADER_HAVE_XPMEM
549552
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
550-
if (ep->segment_data.xpmem.vma_module) {
551-
/* clean out the registration cache */
552-
(void) mca_rcache_base_vma_iterate (ep->segment_data.xpmem.vma_module,
553-
NULL, (size_t) -1,
554-
mca_btl_vader_endpoint_rcache_cleanup,
555-
(void *) ep->segment_data.xpmem.vma_module);
556-
OBJ_RELEASE(ep->segment_data.xpmem.vma_module);
557-
}
558-
559-
if (ep->segment_base) {
560-
xpmem_release (ep->segment_data.xpmem.apid);
561-
ep->segment_data.xpmem.apid = 0;
562-
}
553+
mca_btl_vader_xpmem_cleanup_endpoint (ep);
563554
} else
564555
#endif
565556
if (ep->segment_data.other.seg_ds) {

opal/mca/btl/vader/btl_vader_xpmem.c

Lines changed: 95 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -32,118 +32,153 @@ int mca_btl_vader_xpmem_init (void)
3232
return OPAL_SUCCESS;
3333
}
3434

35+
struct vader_check_reg_ctx_t {
36+
mca_rcache_base_vma_module_t *vma_module;
37+
mca_btl_base_endpoint_t *ep;
38+
mca_rcache_base_registration_t **reg;
39+
uintptr_t base;
40+
uintptr_t bound;
41+
};
42+
typedef struct vader_check_reg_ctx_t vader_check_reg_ctx_t;
43+
44+
static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx)
45+
{
46+
vader_check_reg_ctx_t *vader_ctx = (vader_check_reg_ctx_t *) ctx;
47+
48+
if ((intptr_t) reg->alloc_base != vader_ctx->ep->peer_smp_rank ||
49+
(reg->flags & MCA_RCACHE_FLAGS_PERSIST)) {
50+
/* ignore this registration */
51+
return OPAL_SUCCESS;
52+
}
53+
54+
vader_ctx->reg[0] = reg;
55+
56+
if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) {
57+
(void)opal_atomic_add (&reg->ref_count, 1);
58+
return 1;
59+
}
60+
61+
/* remove this pointer from the rcache and decrement its reference count
62+
(so it is detached later) */
63+
mca_rcache_base_vma_delete (vader_ctx->vma_module, reg);
64+
65+
return 2;
66+
}
67+
3568
/* look up the remote pointer in the peer rcache and attach if
3669
* necessary */
3770
mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr,
3871
size_t size, int flags, void **local_ptr)
3972
{
40-
mca_rcache_base_vma_module_t *vma_module = ep->segment_data.xpmem.vma_module;
41-
mca_rcache_base_registration_t *regs[10], *reg = NULL;
73+
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
74+
uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align;
75+
mca_rcache_base_registration_t *reg = NULL;
76+
vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = &reg, .vma_module = vma_module};
4277
xpmem_addr_t xpmem_addr;
4378
uintptr_t base, bound;
44-
uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align;
4579
int rc, i;
4680

47-
/* protect rcache access */
48-
OPAL_THREAD_LOCK(&ep->lock);
49-
50-
/* use btl/self for self communication */
51-
assert (ep->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK);
52-
5381
base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t);
5482
bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1;
5583
if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) {
5684
bound = VADER_MAX_ADDRESS;
5785
}
5886

59-
/* several segments may match the base pointer */
60-
rc = mca_rcache_base_vma_find_all (vma_module, (void *) base, bound - base, regs, 10);
61-
for (i = 0 ; i < rc ; ++i) {
62-
if (bound <= (uintptr_t)regs[i]->bound && base >= (uintptr_t)regs[i]->base) {
63-
(void)opal_atomic_add (&regs[i]->ref_count, 1);
64-
reg = regs[i];
65-
goto reg_found;
66-
}
67-
68-
if (regs[i]->flags & MCA_RCACHE_FLAGS_PERSIST) {
69-
continue;
70-
}
71-
72-
/* remove this pointer from the rcache and decrement its reference count
73-
(so it is detached later) */
74-
rc = mca_rcache_base_vma_delete (vma_module, regs[i]);
75-
if (OPAL_UNLIKELY(0 != rc)) {
76-
/* someone beat us to it? */
77-
break;
78-
}
87+
check_ctx.base = base;
88+
check_ctx.bound = bound;
7989

90+
/* several segments may match the base pointer */
91+
rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, vader_check_reg, &check_ctx);
92+
if (2 == rc) {
8093
/* start the new segment from the lower of the two bases */
81-
base = (uintptr_t) regs[i]->base < base ? (uintptr_t) regs[i]->base : base;
82-
83-
(void)opal_atomic_add (&regs[i]->ref_count, -1);
94+
base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base;
8495

85-
if (OPAL_LIKELY(0 == regs[i]->ref_count)) {
96+
if (OPAL_LIKELY(0 == opal_atomic_add_32 (&reg->ref_count, -1))) {
8697
/* this pointer is not in use */
87-
(void) xpmem_detach (regs[i]->rcache_context);
88-
OBJ_RELEASE(regs[i]);
98+
(void) xpmem_detach (reg->rcache_context);
99+
OBJ_RELEASE(reg);
89100
}
90101

91-
break;
102+
reg = NULL;
92103
}
93104

94-
reg = OBJ_NEW(mca_rcache_base_registration_t);
95-
if (OPAL_LIKELY(NULL != reg)) {
96-
/* stick around for awhile */
97-
reg->ref_count = 2;
98-
reg->base = (unsigned char *) base;
99-
reg->bound = (unsigned char *) bound;
100-
reg->flags = flags;
105+
if (NULL == reg) {
106+
reg = OBJ_NEW(mca_rcache_base_registration_t);
107+
if (OPAL_LIKELY(NULL != reg)) {
108+
/* stick around for awhile */
109+
reg->ref_count = 2;
110+
reg->base = (unsigned char *) base;
111+
reg->bound = (unsigned char *) bound;
112+
reg->flags = flags;
113+
reg->alloc_base = (void *) (intptr_t) ep->peer_smp_rank;
101114

102115
#if defined(HAVE_SN_XPMEM_H)
103-
xpmem_addr.id = ep->segment_data.xpmem.apid;
116+
xpmem_addr.id = ep->segment_data.xpmem.apid;
104117
#else
105-
xpmem_addr.apid = ep->segment_data.xpmem.apid;
118+
xpmem_addr.apid = ep->segment_data.xpmem.apid;
106119
#endif
107-
xpmem_addr.offset = base;
120+
xpmem_addr.offset = base;
108121

109-
reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL);
110-
if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) {
111-
OPAL_THREAD_UNLOCK(&ep->lock);
112-
OBJ_RELEASE(reg);
113-
return NULL;
114-
}
122+
reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL);
123+
if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) {
124+
OBJ_RELEASE(reg);
125+
return NULL;
126+
}
115127

116-
opal_memchecker_base_mem_defined (reg->rcache_context, bound - base);
128+
opal_memchecker_base_mem_defined (reg->rcache_context, bound - base);
117129

118-
mca_rcache_base_vma_insert (vma_module, reg, 0);
130+
mca_rcache_base_vma_insert (vma_module, reg, 0);
131+
}
119132
}
120133

121-
reg_found:
122134
opal_atomic_wmb ();
123135
*local_ptr = (void *) ((uintptr_t) reg->rcache_context +
124136
(ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base));
125137

126-
OPAL_THREAD_UNLOCK(&ep->lock);
127-
128138
return reg;
129139
}
130140

131141
void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep)
132142
{
133-
mca_rcache_base_vma_module_t *vma_module = ep->segment_data.xpmem.vma_module;
143+
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
134144
int32_t ref_count;
135145

136146
ref_count = opal_atomic_add_32 (&reg->ref_count, -1);
137147
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
138148
/* protect rcache access */
139-
OPAL_THREAD_LOCK(&ep->lock);
140149
mca_rcache_base_vma_delete (vma_module, reg);
141-
OPAL_THREAD_UNLOCK(&ep->lock);
142150

143151
opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
144152
(void)xpmem_detach (reg->rcache_context);
145153
OBJ_RELEASE (reg);
146154
}
147155
}
148156

157+
static int mca_btl_vader_endpoint_xpmem_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx)
158+
{
159+
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
160+
mca_btl_vader_endpoint_t *ep = (mca_btl_vader_endpoint_t *) ctx;
161+
if ((intptr_t) reg->alloc_base == ep->peer_smp_rank) {
162+
/* otherwise dereg will fail on assert */
163+
reg->ref_count = 0;
164+
(void) mca_rcache_base_vma_delete (vma_module, reg);
165+
OBJ_RELEASE(reg);
166+
}
167+
168+
return OPAL_SUCCESS;
169+
}
170+
171+
void mca_btl_vader_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep)
172+
{
173+
/* clean out the registration cache */
174+
(void) mca_rcache_base_vma_iterate (mca_btl_vader_component.vma_module,
175+
NULL, (size_t) -1,
176+
mca_btl_vader_endpoint_xpmem_rcache_cleanup,
177+
(void *) ep);
178+
if (ep->segment_base) {
179+
xpmem_release (ep->segment_data.xpmem.apid);
180+
ep->segment_data.xpmem.apid = 0;
181+
}
182+
}
183+
149184
#endif /* OPAL_BTL_VADER_HAVE_XPMEM */

opal/mca/btl/vader/btl_vader_xpmem.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/*
33
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
44
* reserved.
5+
* Copyright (c) 2016 ARM, Inc. All rights reserved.
56
* $COPYRIGHT$
67
*
78
* Additional copyrights may follow
@@ -32,14 +33,21 @@
3233
* necessary */
3334

3435
/* largest address we can attach to using xpmem */
36+
#if defined(__x86_64__)
3537
#define VADER_MAX_ADDRESS ((uintptr_t)0x7ffffffff000ul)
38+
#else
39+
#define VADER_MAX_ADDRESS XPMEM_MAXADDR_SIZE
40+
#endif
41+
42+
struct mca_btl_base_endpoint_t;
3643

3744
int mca_btl_vader_xpmem_init (void);
3845

3946
mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
4047
size_t size, int flags, void **local_ptr);
4148

4249
void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint);
50+
void mca_btl_vader_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep);
4351

4452
#else
4553

0 commit comments

Comments
 (0)