Skip to content

Commit 52b907c

Browse files
committed
oshmem/shmem: Allocate and exchange base segment address beforehand
1 parent 285f6b1 commit 52b907c

File tree

6 files changed

+190
-28
lines changed

6 files changed

+190
-28
lines changed

oshmem/mca/memheap/base/memheap_base_select.c

Lines changed: 142 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include "oshmem/mca/sshmem/base/base.h"
2626
#include "ompi/util/timings.h"
2727

28+
#include <sys/mman.h>
29+
2830
mca_memheap_base_config_t mca_memheap_base_config = {
2931
.device_nic_mem_seg_size = 0
3032
};
@@ -106,6 +108,136 @@ static size_t _memheap_size(void)
106108
return (size_t) memheap_align(oshmem_shmem_info_env.symmetric_heap_size);
107109
}
108110

111+
static void *memheap_mmap_get(void *hint, size_t size)
112+
{
113+
void *addr;
114+
115+
addr = mmap(hint, size,
116+
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
117+
if (addr == MAP_FAILED) {
118+
return NULL;
119+
}
120+
121+
return addr;
122+
}
123+
124+
static int memheap_exchange_base_address(size_t size, void **address)
125+
{
126+
int nprocs = oshmem_num_procs();
127+
int need_sync = (*address == NULL);
128+
void *base = NULL;
129+
void *ptr = NULL;
130+
int rc, i;
131+
void **bases;
132+
133+
bases = calloc(nprocs, sizeof(*bases));
134+
if (NULL == bases) {
135+
return OSHMEM_ERROR;
136+
}
137+
138+
if (oshmem_my_proc_id() == 0) {
139+
ptr = memheap_mmap_get(NULL, size);
140+
base = ptr;
141+
}
142+
143+
rc = oshmem_shmem_bcast(&base, sizeof(base), 0);
144+
if (OSHMEM_SUCCESS != rc) {
145+
MEMHEAP_ERROR("Failed to exchange allocated vma for base segment "
146+
"(error %d)", rc);
147+
goto out;
148+
}
149+
150+
if (oshmem_my_proc_id() != 0) {
151+
ptr = memheap_mmap_get(base, size);
152+
}
153+
154+
MEMHEAP_VERBOSE(100, "#%d: exchange base address: base %p: %s",
155+
oshmem_my_proc_id(), base,
156+
(base == ptr)? "ok" : "unavailable");
157+
158+
*address = base;
159+
if (need_sync) {
160+
/* They all succeed or fail to allow fallback */
161+
rc = oshmem_shmem_allgather(&ptr, bases, sizeof(ptr));
162+
if (OSHMEM_SUCCESS != rc) {
163+
MEMHEAP_ERROR("Failed to exchange selected vma for base segment "
164+
"(error %d)", rc);
165+
goto out;
166+
}
167+
168+
for (i = 0; i < nprocs; i++) {
169+
if ((NULL == bases[i]) || (bases[i] != base)) {
170+
*address = NULL;
171+
break;
172+
}
173+
}
174+
} else if (ptr != base) {
175+
/* Any failure terminates the rank and others start teardown */
176+
rc = OSHMEM_ERROR;
177+
}
178+
179+
out:
180+
if (((OSHMEM_SUCCESS != rc) || (*address == NULL)) && (ptr != NULL)) {
181+
(void)munmap(ptr, size);
182+
}
183+
184+
free(bases);
185+
return rc;
186+
}
187+
188+
189+
/*
190+
* The returned mca_sshmem_base_start_address value is reserved by using
191+
* mmap() for the expected size.
192+
*/
193+
static int memheap_base_segment_setup(size_t size)
194+
{
195+
int rc;
196+
197+
if ((mca_sshmem_base_start_address == (void *)UINTPTR_MAX) ||
198+
(mca_sshmem_base_start_address == NULL)) {
199+
if (UINTPTR_MAX == 0xFFFFFFFF) {
200+
/**
201+
* if 32 bit we set sshmem_base_start_adress to 0
202+
* to let OS allocate segment automatically
203+
*/
204+
mca_sshmem_base_start_address = NULL;
205+
return OSHMEM_SUCCESS;
206+
}
207+
208+
rc = memheap_exchange_base_address(size, &mca_sshmem_base_start_address);
209+
if (OSHMEM_SUCCESS != rc) {
210+
MEMHEAP_ERROR("Failed to setup base segment address (error %d)", rc);
211+
return rc;
212+
}
213+
214+
if (NULL != mca_sshmem_base_start_address) {
215+
goto done; /* Region is reserved */
216+
}
217+
218+
#if defined(__aarch64__)
219+
mca_sshmem_base_start_address = (void*)0xAB0000000000;
220+
#else
221+
mca_sshmem_base_start_address = (void*)0xFF000000;
222+
#endif
223+
}
224+
225+
if (mca_sshmem_base_start_address != memheap_mmap_get(
226+
mca_sshmem_base_start_address, size)) {
227+
MEMHEAP_ERROR("Failed to create segment address %p/%zu",
228+
mca_sshmem_base_start_address, size);
229+
return OSHMEM_ERROR;
230+
}
231+
232+
done:
233+
if (oshmem_my_proc_id() == 0) {
234+
MEMHEAP_VERBOSE(10, "Using symmetric segment address %p/%zu",
235+
mca_sshmem_base_start_address, size);
236+
}
237+
238+
return OSHMEM_SUCCESS;
239+
}
240+
109241
static memheap_context_t* _memheap_create(void)
110242
{
111243
int rc = OSHMEM_SUCCESS;
@@ -123,13 +255,18 @@ static memheap_context_t* _memheap_create(void)
123255

124256
OPAL_TIMING_ENV_NEXT(timing, "_memheap_size()");
125257

126-
/* Inititialize symmetric area */
127-
if (OSHMEM_SUCCESS == rc) {
128-
rc = mca_memheap_base_alloc_init(&mca_memheap_base_map,
129-
user_size + MEMHEAP_BASE_PRIVATE_SIZE, 0,
130-
"regular_mem");
258+
/* Locate and reserve symmetric area */
259+
rc = memheap_base_segment_setup(user_size + MEMHEAP_BASE_PRIVATE_SIZE);
260+
if (OSHMEM_SUCCESS != rc) {
261+
MEMHEAP_ERROR("Failed to negotiate base segment addres");
262+
return NULL;
131263
}
132264

265+
/* Initialize symmetric area */
266+
rc = mca_memheap_base_alloc_init(&mca_memheap_base_map,
267+
user_size + MEMHEAP_BASE_PRIVATE_SIZE, 0,
268+
"regular_mem");
269+
133270
OPAL_TIMING_ENV_NEXT(timing, "mca_memheap_base_alloc_init()");
134271

135272
/* Initialize atomic symmetric area */

oshmem/mca/memheap/base/memheap_base_static.c

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,17 @@
2525
#include <pthread.h>
2626

2727
static int _check_perms(const char *perm);
28+
static int _check_non_static_segment(const map_segment_t *mem_segs,
29+
int n_segment,
30+
const void *start, const void *end);
2831
static int _check_address(void *start, void **end);
2932
static int _check_pathname(uint64_t inode, const char *pathname);
3033

3134
int mca_memheap_base_static_init(mca_memheap_map_t *map)
3235
{
3336
/* read and parse segments from /proc/self/maps */
3437
int ret = OSHMEM_SUCCESS;
38+
int n_segments = map->n_segments;
3539
uint64_t total_mem = 0;
3640
void* start;
3741
void* end;
@@ -54,14 +58,6 @@ int mca_memheap_base_static_init(mca_memheap_map_t *map)
5458
return OSHMEM_ERROR;
5559
}
5660

57-
#ifdef __linux__
58-
extern unsigned _end;
59-
if (mca_sshmem_base_start_address < (uintptr_t)&_end) {
60-
MEMHEAP_VERBOSE(1, "sshmem base start address is inside data region"
61-
" (%p < %p)", mca_sshmem_base_start_address, &_end);
62-
}
63-
#endif
64-
6561
while (NULL != fgets(line, sizeof(line), fp)) {
6662
if (3 > sscanf(line,
6763
"%llx-%llx %s %llx %s %llx %s",
@@ -77,6 +73,12 @@ int mca_memheap_base_static_init(mca_memheap_map_t *map)
7773
goto out;
7874
}
7975

76+
if (OSHMEM_ERROR == _check_non_static_segment(
77+
map->mem_segs, n_segments,
78+
start, end)) {
79+
continue;
80+
}
81+
8082
if (OSHMEM_ERROR == _check_address(start, &end))
8183
continue;
8284

@@ -138,6 +140,26 @@ static int _check_perms(const char *perms)
138140
return OSHMEM_ERROR;
139141
}
140142

143+
static int _check_non_static_segment(const map_segment_t *mem_segs,
144+
int n_segment,
145+
const void *start, const void *end)
146+
{
147+
int i;
148+
149+
for (i = 0; i < n_segment; i++) {
150+
if ((start <= mem_segs[i].super.va_base) &&
151+
(mem_segs[i].super.va_base < end)) {
152+
MEMHEAP_VERBOSE(100,
153+
"non static segment: %p-%p already exists as %p-%p",
154+
start, end, mem_segs[i].super.va_base,
155+
mem_segs[i].super.va_end);
156+
return OSHMEM_ERROR;
157+
}
158+
}
159+
160+
return OSHMEM_SUCCESS;
161+
}
162+
141163
static int _check_address(void *start, void **end)
142164
{
143165
/* FIXME Linux specific code */
@@ -148,11 +170,9 @@ static int _check_address(void *start, void **end)
148170
/**
149171
* SGI shmem only supports globals&static in main program.
150172
* It does not support them in shared objects or in dlopen()
151-
* (Clarified on PGAS 2011 tutorial)
173+
* (Clarified on PGAS 2011 tutorial).
152174
*
153-
* So ignored any maps that start higher then process _end
154-
* FIXME: make sure we do not register symmetric heap twice
155-
* if we decide to allow shared objects
175+
* So ignored any maps that start higher then process _end.
156176
*/
157177
if ((uintptr_t)start > data_end) {
158178
MEMHEAP_VERBOSE(100,

oshmem/mca/sshmem/base/sshmem_base_open.c

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,7 @@
3131
* globals
3232
*/
3333

34-
/**
35-
* if 32 bit we set sshmem_base_start_address to 0
36-
* to let OS allocate segment automatically
37-
*/
38-
#if UINTPTR_MAX == 0xFFFFFFFF
39-
void *mca_sshmem_base_start_address = (void*)0;
40-
#elif defined(__aarch64__)
41-
void* mca_sshmem_base_start_address = (void*)0xAB0000000000;
42-
#else
43-
void* mca_sshmem_base_start_address = (void*)0xFF000000;
44-
#endif
34+
void *mca_sshmem_base_start_address = UINTPTR_MAX;
4535

4636
char * mca_sshmem_base_backing_file_dir = NULL;
4737

oshmem/mca/sshmem/sysv/sshmem_sysv_module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ segment_create(map_segment_t *ds_buf,
170170
}
171171

172172
/* Attach to the segment */
173+
(void)munmap(mca_sshmem_base_start_address, size);
173174
addr = shmat(shmid, (void *) mca_sshmem_base_start_address, 0);
174175
if (addr == (void *) -1L) {
175176
opal_show_help("help-oshmem-sshmem.txt",

oshmem/runtime/oshmem_shmem_exchange.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,15 @@
1616
#include "oshmem/runtime/runtime.h"
1717
#include "oshmem/runtime/params.h"
1818

19+
int oshmem_shmem_bcast(void *buf, int elem_size, int root)
20+
{
21+
int rc;
22+
23+
rc = PMPI_Bcast(buf, elem_size, MPI_BYTE, root, oshmem_comm_world);
24+
25+
return rc;
26+
}
27+
1928
int oshmem_shmem_allgather(void *send_buf, void *rcv_buf, int elem_size)
2029
{
2130
int rc;

oshmem/runtime/runtime.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ int oshmem_shmem_finalize(void);
128128
*/
129129
OSHMEM_DECLSPEC int oshmem_shmem_abort(int errcode);
130130

131+
/**
132+
* Broadcast between all PEs
133+
*/
134+
OSHMEM_DECLSPEC int oshmem_shmem_bcast(void *buf, int elem_size, int root);
135+
131136
/**
132137
* Allgather between all PEs
133138
*/

0 commit comments

Comments
 (0)