Skip to content

Commit b59ca84

Browse files
authored
Merge pull request #2101 from hjelmn/v2.x_mpool
v2.1 mpool/rcache rewrite
2 parents 4afc1d1 + 333805c commit b59ca84

File tree

141 files changed

+5571
-5238
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

141 files changed

+5571
-5238
lines changed

ompi/mca/pml/base/pml_base_bsend.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -12,6 +13,8 @@
1213
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
1314
* Copyright (c) 2015 Research Organization for Information Science
1415
* and Technology (RIST). All rights reserved.
16+
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
17+
* reserved.
1518
* $COPYRIGHT$
1619
*
1720
* Additional copyrights may follow
@@ -55,10 +58,7 @@ extern char *ompi_pml_base_bsend_allocator_name;
5558
/*
5659
* Routine to return pages to sub-allocator as needed
5760
*/
58-
static void* mca_pml_bsend_alloc_segment(
59-
struct mca_mpool_base_module_t* module,
60-
size_t* size_inout,
61-
mca_mpool_base_registration_t** registration)
61+
static void* mca_pml_bsend_alloc_segment(void *ctx, size_t *size_inout)
6262
{
6363
void *addr;
6464
size_t size = *size_inout;
@@ -70,7 +70,6 @@ static void* mca_pml_bsend_alloc_segment(
7070
addr = mca_pml_bsend_addr;
7171
mca_pml_bsend_addr += size;
7272
*size_inout = size;
73-
if (NULL != registration) *registration = NULL;
7473
return addr;
7574
}
7675

@@ -232,7 +231,7 @@ int mca_pml_base_bsend_request_start(ompi_request_t* request)
232231

233232
/* allocate a buffer to hold packed message */
234233
sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc(
235-
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL);
234+
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0);
236235
if(NULL == sendreq->req_addr) {
237236
/* release resources when request is freed */
238237
sendreq->req_base.req_pml_complete = true;
@@ -287,7 +286,7 @@ int mca_pml_base_bsend_request_alloc(ompi_request_t* request)
287286

288287
/* allocate a buffer to hold packed message */
289288
sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc(
290-
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL);
289+
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0);
291290
if(NULL == sendreq->req_addr) {
292291
/* release resources when request is freed */
293292
sendreq->req_base.req_pml_complete = true;
@@ -321,7 +320,7 @@ void* mca_pml_base_bsend_request_alloc_buf( size_t length )
321320

322321
/* allocate a buffer to hold packed message */
323322
buf = mca_pml_bsend_allocator->alc_alloc(
324-
mca_pml_bsend_allocator, length, 0, NULL);
323+
mca_pml_bsend_allocator, length, 0);
325324
if(NULL == buf) {
326325
/* release resources when request is freed */
327326
OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex);

ompi/mca/pml/ob1/pml_ob1_component.c

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,9 @@ mca_pml_base_component_2_0_0_t mca_pml_ob1_component = {
8080
.pmlm_finalize = mca_pml_ob1_component_fini,
8181
};
8282

83-
void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool,
84-
size_t* size,
85-
mca_mpool_base_registration_t** registration);
83+
void *mca_pml_ob1_seg_alloc (void *ctx, size_t* size);
8684

87-
void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool,
88-
void* segment );
85+
void mca_pml_ob1_seg_free (void *ctx, void *segment);
8986

9087
static inline int mca_pml_ob1_param_register_int(
9188
const char* param_name,
@@ -366,13 +363,12 @@ int mca_pml_ob1_component_fini(void)
366363
return OMPI_SUCCESS;
367364
}
368365

369-
void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool,
370-
size_t* size,
371-
mca_mpool_base_registration_t** registration) {
366+
void *mca_pml_ob1_seg_alloc (void *ctx, size_t *size)
367+
{
372368
return malloc(*size);
373369
}
374370

375-
void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool,
376-
void* segment ) {
371+
void mca_pml_ob1_seg_free (void *ctx, void *segment)
372+
{
377373
free(segment);
378374
}

ompi/mca/pml/ob1/pml_ob1_recvfrag.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ do { \
8181
buffers[0].addr = (char*) \
8282
mca_pml_ob1.allocator->alc_alloc( mca_pml_ob1.allocator, \
8383
buffers[0].len, \
84-
0, NULL); \
84+
0); \
8585
_ptr = (unsigned char*)(buffers[0].addr); \
8686
macro_segments[0].seg_addr.pval = buffers[0].addr; \
8787
} \

ompi/mca/vprotocol/base/vprotocol_base_request.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ int mca_vprotocol_base_request_parasite(void)
4242
pml_fl_save.fl_max_to_alloc,
4343
pml_fl_save.fl_num_per_alloc,
4444
pml_fl_save.fl_mpool,
45-
pml_fl_save.fl_mpool_reg_flags,
46-
0,
45+
pml_fl_save.fl_rcache_reg_flags,
46+
pml_fl_save.fl_rcache,
4747
pml_fl_save.item_init,
4848
pml_fl_save.ctx);
4949
if(OMPI_SUCCESS != ret) return ret;
@@ -71,8 +71,8 @@ int mca_vprotocol_base_request_parasite(void)
7171
pml_fl_save.fl_max_to_alloc,
7272
pml_fl_save.fl_num_per_alloc,
7373
pml_fl_save.fl_mpool,
74-
pml_fl_save.fl_mpool_reg_flags,
75-
0,
74+
pml_fl_save.fl_rcache_reg_flags,
75+
pml_fl_save.fl_rcache,
7676
pml_fl_save.item_init,
7777
pml_fl_save.ctx);
7878
if(OMPI_SUCCESS != ret) return ret;

ompi/mpi/c/alloc_mem.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -12,6 +13,8 @@
1213
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
1314
* Copyright (c) 2015 Research Organization for Information Science
1415
* and Technology (RIST). All rights reserved.
16+
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
17+
* reserved.
1518
* $COPYRIGHT$
1619
*
1720
* Additional copyrights may follow
@@ -43,6 +46,8 @@ static const char FUNC_NAME[] = "MPI_Alloc_mem";
4346

4447
int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
4548
{
49+
char info_value[MPI_MAX_INFO_VAL + 1];
50+
char *mpool_hints = NULL;
4651

4752
if (MPI_PARAM_CHECK) {
4853
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
@@ -67,7 +72,16 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
6772
return MPI_SUCCESS;
6873
}
6974

70-
*((void **) baseptr) = mca_mpool_base_alloc((size_t) size, (struct opal_info_t*)info);
75+
if (MPI_INFO_NULL != info) {
76+
int flag;
77+
(void) ompi_info_get (info, "mpool_hints", MPI_MAX_INFO_VAL, info_value, &flag);
78+
if (flag) {
79+
mpool_hints = info_value;
80+
}
81+
}
82+
83+
*((void **) baseptr) = mca_mpool_base_alloc ((size_t) size, (struct opal_info_t*)info,
84+
mpool_hints);
7185
if (NULL == *((void **) baseptr)) {
7286
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM,
7387
FUNC_NAME);

ompi/runtime/ompi_mpi_init.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -619,13 +619,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
619619

620620
/* Select which MPI components to use */
621621

622-
if (OMPI_SUCCESS !=
623-
(ret = mca_mpool_base_init(OPAL_ENABLE_PROGRESS_THREADS,
624-
ompi_mpi_thread_multiple))) {
625-
error = "mca_mpool_base_init() failed";
626-
goto error;
627-
}
628-
629622
if (OMPI_SUCCESS !=
630623
(ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS,
631624
ompi_mpi_thread_multiple))) {

opal/class/opal_free_list.c

Lines changed: 58 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
1414
* Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
1515
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
16-
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
16+
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
1717
* reserved.
1818
* $COPYRIGHT$
1919
*
@@ -28,6 +28,9 @@
2828
#include "opal/align.h"
2929
#include "opal/util/output.h"
3030
#include "opal/mca/mpool/mpool.h"
31+
#include "opal/mca/mpool/base/base.h"
32+
#include "opal/mca/rcache/rcache.h"
33+
#include "opal/util/sys_limits.h"
3134

3235
typedef struct opal_free_list_item_t opal_free_list_memory_t;
3336

@@ -49,17 +52,22 @@ static void opal_free_list_construct(opal_free_list_t* fl)
4952
fl->fl_payload_buffer_alignment = 0;
5053
fl->fl_frag_class = OBJ_CLASS(opal_free_list_item_t);
5154
fl->fl_mpool = NULL;
55+
fl->fl_rcache = NULL;
5256
/* default flags */
53-
fl->fl_mpool_reg_flags = MCA_MPOOL_FLAGS_CACHE_BYPASS |
54-
MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM;
57+
fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS |
58+
MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM;
5559
fl->ctx = NULL;
5660
OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
5761
}
5862

5963
static void opal_free_list_allocation_release (opal_free_list_t *fl, opal_free_list_memory_t *fl_mem)
6064
{
65+
if (NULL != fl->fl_rcache) {
66+
fl->fl_rcache->rcache_deregister (fl->fl_rcache, fl_mem->registration);
67+
}
68+
6169
if (NULL != fl->fl_mpool) {
62-
fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr, fl_mem->registration);
70+
fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr);
6371
} else if (fl_mem->ptr) {
6472
free (fl_mem->ptr);
6573
}
@@ -108,8 +116,9 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
108116
opal_class_t *frag_class, size_t payload_buffer_size,
109117
size_t payload_buffer_alignment, int num_elements_to_alloc,
110118
int max_elements_to_alloc, int num_elements_per_alloc,
111-
mca_mpool_base_module_t* mpool, int mpool_reg_flags,
112-
void *unused0, opal_free_list_item_init_fn_t item_init, void *ctx)
119+
mca_mpool_base_module_t *mpool, int rcache_reg_flags,
120+
mca_rcache_base_module_t *rcache, opal_free_list_item_init_fn_t item_init,
121+
void *ctx)
113122
{
114123
/* alignment must be more than zero and power of two */
115124
if (frag_alignment <= 1 || (frag_alignment & (frag_alignment - 1))) {
@@ -137,11 +146,12 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
137146
flist->fl_max_to_alloc = max_elements_to_alloc;
138147
flist->fl_num_allocated = 0;
139148
flist->fl_num_per_alloc = num_elements_per_alloc;
140-
flist->fl_mpool = mpool;
149+
flist->fl_mpool = mpool ? mpool : mca_mpool_base_default_module;
150+
flist->fl_rcache = rcache;
141151
flist->fl_frag_alignment = frag_alignment;
142152
flist->fl_payload_buffer_alignment = payload_buffer_alignment;
143153
flist->item_init = item_init;
144-
flist->fl_mpool_reg_flags |= mpool_reg_flags;
154+
flist->fl_rcache_reg_flags |= rcache_reg_flags;
145155
flist->ctx = ctx;
146156

147157
if (num_elements_to_alloc) {
@@ -153,10 +163,10 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
153163

154164
int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
155165
{
156-
unsigned char *ptr, *mpool_alloc_ptr = NULL, *payload_ptr = NULL;
166+
unsigned char *ptr, *payload_ptr = NULL;
157167
opal_free_list_memory_t *alloc_ptr;
158-
size_t alloc_size, head_size, elem_size = 0;
159-
mca_mpool_base_registration_t *reg = NULL;
168+
size_t alloc_size, head_size, elem_size = 0, buffer_size, align;
169+
mca_rcache_base_registration_t *reg = NULL;
160170
int rc = OPAL_SUCCESS;
161171

162172
if (flist->fl_max_to_alloc && (flist->fl_num_allocated + num_elements) >
@@ -170,6 +180,29 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
170180

171181
head_size = OPAL_ALIGN(flist->fl_frag_size, flist->fl_frag_alignment, size_t);
172182

183+
/* NTH: calculate allocation alignment first as it might change the number of elements */
184+
if (0 != flist->fl_payload_buffer_size) {
185+
elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size,
186+
flist->fl_payload_buffer_alignment, size_t);
187+
188+
/* elem_size should not be 0 here */
189+
assert (elem_size > 0);
190+
191+
buffer_size = num_elements * elem_size;
192+
align = flist->fl_payload_buffer_alignment;
193+
194+
if (MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM & flist->fl_rcache_reg_flags) {
195+
size_t pagesize = opal_getpagesize ();
196+
/* CUDA cannot handle registering overlapping regions, so make
197+
* sure each region is page sized and page aligned. */
198+
align = OPAL_ALIGN(align, pagesize, size_t);
199+
buffer_size = OPAL_ALIGN(buffer_size, pagesize, size_t);
200+
201+
/* avoid wasting space in the buffer */
202+
num_elements = buffer_size / elem_size;
203+
}
204+
}
205+
173206
/* calculate head allocation size */
174207
alloc_size = num_elements * head_size + sizeof(opal_free_list_memory_t) +
175208
flist->fl_frag_alignment;
@@ -180,37 +213,23 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
180213
}
181214

182215
if (0 != flist->fl_payload_buffer_size) {
183-
elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size,
184-
flist->fl_payload_buffer_alignment, size_t);
185-
186-
/* elem_size should not be 0 here */
187-
assert (elem_size > 0);
188-
189216
/* allocate the rest from the mpool (or use memalign/malloc) */
190-
if(flist->fl_mpool != NULL) {
191-
payload_ptr = mpool_alloc_ptr =
192-
(unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool,
193-
num_elements * elem_size,
194-
flist->fl_payload_buffer_alignment,
195-
flist->fl_mpool_reg_flags, &reg);
196-
} else {
197-
#ifdef HAVE_POSIX_MEMALIGN
198-
posix_memalign ((void **) &mpool_alloc_ptr, flist->fl_payload_buffer_alignment,
199-
num_elements * elem_size);
200-
payload_ptr = mpool_alloc_ptr;
201-
#else
202-
mpool_alloc_ptr = (unsigned char *) malloc (num_elements * elem_size +
203-
flist->fl_payload_buffer_alignment);
204-
payload_ptr = (unsigned char *) OPAL_ALIGN((uintptr_t)mpool_alloc_ptr,
205-
flist->fl_payload_buffer_alignment,
206-
uintptr_t);
207-
#endif
208-
}
209-
210-
if(NULL == mpool_alloc_ptr) {
217+
payload_ptr = (unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool, buffer_size, align, 0);
218+
if (NULL == payload_ptr) {
211219
free(alloc_ptr);
212220
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
213221
}
222+
223+
if (flist->fl_rcache) {
224+
rc = flist->fl_rcache->rcache_register (flist->fl_rcache, payload_ptr, num_elements * elem_size,
225+
flist->fl_rcache_reg_flags, MCA_RCACHE_ACCESS_ANY, &reg);
226+
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
227+
free (alloc_ptr);
228+
flist->fl_mpool->mpool_free (flist->fl_mpool, payload_ptr);
229+
230+
return rc;
231+
}
232+
}
214233
}
215234

216235
/* make the alloc_ptr a list item, save the chunk in the allocations list,
@@ -219,7 +238,7 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
219238
opal_list_append(&(flist->fl_allocations), (opal_list_item_t*)alloc_ptr);
220239

221240
alloc_ptr->registration = reg;
222-
alloc_ptr->ptr = mpool_alloc_ptr;
241+
alloc_ptr->ptr = payload_ptr;
223242

224243
ptr = (unsigned char*)alloc_ptr + sizeof(opal_free_list_memory_t);
225244
ptr = OPAL_ALIGN_PTR(ptr, flist->fl_frag_alignment, unsigned char*);

0 commit comments

Comments
 (0)