Skip to content

Commit 34566ea

Browse files
committed
WIP + debug prints
Signed-off-by: George Katevenis <[email protected]>
1 parent 88bb483 commit 34566ea

File tree

3 files changed

+94
-52
lines changed

3 files changed

+94
-52
lines changed

opal/class/opal_interval_tree.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,8 @@ static opal_interval_tree_node_t *opal_interval_tree_find_interval(opal_interval
424424
static opal_interval_tree_node_t *
425425
opal_interval_tree_find_node(opal_interval_tree_t *tree, uint64_t low, uint64_t high, void *data)
426426
{
427+
printf("tree find low 0x%lx high 0x%lx\n", low, high);
428+
427429
return opal_interval_tree_find_interval(tree, tree->root.left, low, high, data);
428430
}
429431

@@ -664,6 +666,8 @@ static void opal_interval_tree_insert_node(opal_interval_tree_t *tree,
664666
opal_interval_tree_node_t *n = parent->left; /* the real root of the tree */
665667
opal_interval_tree_node_t *nill = &tree->nill;
666668

669+
printf("tree insert low 0x%lx high 0x%lx\n", node->low, node->high);
670+
667671
/* set up initial values for the node */
668672
node->color = OPAL_INTERVAL_TREE_COLOR_RED;
669673
node->parent = NULL;

opal/mca/smsc/xpmem/smsc_xpmem_internal.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#define OPAL_MCA_SMSC_XPMEM_SMSC_XPMEM_INTERNAL_H
1515

1616
#include "opal/mca/smsc/xpmem/smsc_xpmem.h"
17-
#include "opal/mca/rcache/rcache.h"
1817

1918
#include "opal/mca/rcache/base/rcache_base_vma.h"
2019
#if defined(HAVE_XPMEM_H)

opal/mca/smsc/xpmem/smsc_xpmem_module.c

Lines changed: 90 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ mca_smsc_endpoint_t *mca_smsc_xpmem_get_endpoint(opal_proc_t *peer_proc)
6666
return &endpoint->super;
6767
}
6868

69-
/* look up the remote pointer in the peer rcache and attach if
70-
* necessary */
69+
/* look up the remote pointer in the peer rcache and attach if necessary */
7170
void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t flags,
7271
void *remote_ptr, size_t size, void **local_ptr)
7372
{
@@ -77,71 +76,81 @@ void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
7776
mca_rcache_base_registration_t *reg = NULL;
7877
xpmem_addr_t xpmem_addr;
7978
uintptr_t base, bound;
79+
size_t region_size;
8080
int rc;
8181

82+
/* base is the first byte of the region, bound is the last (inclusive) */
8283
base = OPAL_DOWN_ALIGN((uintptr_t) remote_ptr, attach_align, uintptr_t);
83-
bound = OPAL_ALIGN((uintptr_t) remote_ptr + size, attach_align, uintptr_t);
84+
bound = OPAL_ALIGN((uintptr_t) remote_ptr + size, attach_align, uintptr_t) - 1;
8485
if (OPAL_UNLIKELY(bound > xpmem_endpoint->address_max)) {
8586
bound = xpmem_endpoint->address_max;
8687
}
88+
region_size = bound - base + 1;
89+
90+
printf("user ptr %p size %lu base %p bound %p\n", remote_ptr, size, base, bound);
91+
printf("search base %p len %p\n", base, region_size);
92+
93+
rc = mca_rcache_base_vma_find(vma_module, (void *) base, region_size, &reg);
94+
assert(OPAL_SUCCESS == rc);
8795

88-
rc = mca_rcache_base_vma_find(vma_module, (void *) base, bound - base, &reg);
89-
assert(rc != OPAL_SUCCESS);
90-
9196
// TODO Add rcache stats?
92-
93-
/* For a number of operations here, while support for multiple threads is
94-
* existent, might not produce the fully ideal result. Looks like this can't
95-
* be fully resolved without respective support from the regcache tree.
96-
* TODO finish comment. is it accurate?*/
97-
97+
9898
// TODO what if reg is deleted between finding it and atomically fetching the
99-
// ref count? Or will the tree block? And this could also happen inside the
100-
// tree's code.
101-
99+
// ref count? Or will the tree block? (this could also happen inside the tree's code)
100+
102101
if (reg) {
102+
printf("region match %p-%p\n", reg->base, reg->bound);
103+
103104
int32_t old_ref_count = opal_atomic_fetch_add_32(&reg->ref_count, 1);
104105
if(0 == old_ref_count) {
105106
/* Registration is being deleted by another thread
106107
* in mca_smsc_xpmem_unmap_peer_region, ignore it. */
107108
reg = NULL;
108109
}
109-
110+
110111
// TODO what if two threads increment the ref counter while a third one is
111112
// deleting it? One of the increment-threads will see 1 as the old value
112113
// and go ahead with using the registration, while the writer will delete it!
113-
114+
115+
// Do we ultimately have to do something like this?
116+
114117
// int32_t ref_count = opal_atomic_load_32(&reg->ref_count);
115-
116-
// while(1) {
118+
119+
// while(true) {
117120
// if(0 == ref_count) {
118121
// reg = NULL;
119122
// break;
120123
// }
121-
124+
122125
// if(opal_atomic_compare_exchange_strong_32(
123126
// &reg->ref_count, &ref_count, ref_count + 1)) {
124127
// break;
125128
// }
126129
// }
127-
128130
} else {
131+
printf("no region match\n");
132+
129133
/* If there is a registration that overlaps with the requested range, but
130-
* does not fully cover it, we destroy it and make a new one in its place
131-
* to covers both the previous range and the new requested one. */
132-
133-
rc = mca_rcache_base_vma_find(vma_module, (void *) base, 1, &reg);
134-
assert(rc != OPAL_SUCCESS);
135-
136-
// TODO is this correct?
137-
// TODO check for hang. Only with non-debug?
138-
139-
if(NULL == reg) {
140-
rc = mca_rcache_base_vma_find(vma_module, (void *) (bound + 1), 1, &reg);
141-
assert(rc != OPAL_SUCCESS);
142-
}
143-
134+
* does not fully cover it, we destroy it and make in its place a new one
135+
* that covers both the existing and the new range. */
136+
137+
// uintptr_t search_begin[4] = {base, bound, base - 1, bound + 1};
138+
uintptr_t search_begin[2] = {base, bound};
139+
for (size_t i = 0; i < 2; i++) {
140+
printf("search overlapping %p-%p\n",
141+
search_begin[i], search_begin[i]+1);
142+
143+
rc = mca_rcache_base_vma_find(vma_module, (void *) search_begin[i], 1, &reg);
144+
assert(OPAL_SUCCESS == rc);
145+
146+
if (reg) {
147+
break;
148+
}
149+
}
150+
144151
if (reg) {
152+
printf("found overlapping\n");
153+
145154
/* Set the invalid flag, to mark the deletion of this registration
146155
* (will take place in unmap_peer_region). If another thread has
147156
* already marked deletion, ignore. */
@@ -150,31 +159,38 @@ void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
150159
(volatile opal_atomic_int32_t *) &reg->flags, MCA_RCACHE_FLAGS_INVALID);
151160

152161
if (!(old_flags & MCA_RCACHE_FLAGS_INVALID)) {
162+
printf("handling merge\n");
163+
153164
base = opal_min(base, (uintptr_t) reg->base);
154165
bound = opal_max(bound, (uintptr_t) reg->bound);
166+
region_size = bound - base + 1;
155167

156-
/* We did not increment the ref count when we found the registration.
157-
* When PERSIST is set, a superfluous ref is present, so no need to do
158-
* anything. If not, we must increment the ref counter before calling
159-
* unmap_peer_region (which will decrement it), to avoid it going negative. */
168+
/* unmap_peer_region will decrement the ref count, but we did not
169+
* increment it when we found the reg. If persist was not set,
170+
* a superflous ref is present, so all is fine. If not, we need
171+
* to manually adjust before calling unmap_peer_region, to avoid
172+
* deallocation while someone is still using the reg. */
160173
if(!(MCA_RCACHE_FLAGS_PERSIST & reg->flags))
161174
opal_atomic_add(&reg->ref_count, 1);
162175

176+
printf("set invalid, ref count before unmap call %d\n", reg->ref_count);
177+
163178
mca_smsc_xpmem_unmap_peer_region(reg);
164179
}
165-
180+
166181
reg = NULL;
167-
}
182+
} else
183+
printf("no overlapping\n");
168184
}
169-
185+
170186
if (NULL == reg) {
171187
reg = OBJ_NEW(mca_rcache_base_registration_t);
172188
if (OPAL_LIKELY(NULL == reg)) {
173189
return NULL;
174190
}
175191

176192
reg->ref_count = ((flags & MCA_RCACHE_FLAGS_PERSIST)
177-
&& !(flags & MCA_RCACHE_FLAGS_CACHE_BYPASS) ? 2 : 1);
193+
&& !(flags & MCA_RCACHE_FLAGS_CACHE_BYPASS) ? 2 : 1);
178194
reg->flags = flags;
179195
reg->base = (unsigned char *) base;
180196
reg->bound = (unsigned char *) bound;
@@ -192,24 +208,39 @@ void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
192208
"for endpoint %p address range %p-%p",
193209
(void *) endpoint, reg->base, reg->bound);
194210

195-
reg->rcache_context = xpmem_attach(xpmem_addr, bound - base, NULL);
211+
reg->rcache_context = xpmem_attach(xpmem_addr, region_size, NULL);
212+
printf("xpmem attach(%p, 0x%lx) -> %p\n", base, region_size, reg->rcache_context);
213+
196214
if (OPAL_UNLIKELY((void *) -1 == reg->rcache_context)) {
215+
uintptr_t old_bound = bound;
216+
197217
/* retry with the page as upper bound */
198-
bound = OPAL_ALIGN((uintptr_t) remote_ptr + size, opal_getpagesize(), uintptr_t);
218+
bound = OPAL_ALIGN((uintptr_t) remote_ptr + size, opal_getpagesize(), uintptr_t) - 1;
199219
reg->bound = (unsigned char *) bound;
200-
reg->rcache_context = xpmem_attach(xpmem_addr, bound - base, NULL);
220+
region_size = bound - base + 1;
221+
222+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, opal_smsc_base_framework.framework_output,
223+
"mca_smsc_xpmem_map_peer_region: region mapping "
224+
"for endpoint %p address range %p-%p failed. "
225+
"retrying with range %p-%p",
226+
(void *) endpoint, reg->base, (void *) old_bound,
227+
reg->base, reg->bound);
228+
229+
reg->rcache_context = xpmem_attach(xpmem_addr, region_size, NULL);
201230
if (OPAL_UNLIKELY((void *) -1 == reg->rcache_context)) {
202231
OBJ_RELEASE(reg);
203232
return NULL;
204233
}
205234
}
206235

207-
opal_memchecker_base_mem_defined(reg->rcache_context, bound - base);
208-
236+
printf("new reg %p-%p ref count %d\n", reg->base, reg->bound, reg->ref_count);
237+
238+
opal_memchecker_base_mem_defined(reg->rcache_context, region_size);
239+
209240
if(!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) {
210241
rc = mca_rcache_base_vma_insert(vma_module, reg, 0);
211242
assert(OPAL_SUCCESS == rc);
212-
243+
213244
if(OPAL_SUCCESS != rc) {
214245
reg->flags |= MCA_RCACHE_FLAGS_CACHE_BYPASS;
215246
}
@@ -231,6 +262,8 @@ void mca_smsc_xpmem_unmap_peer_region(void *ctx)
231262

232263
ref_count = opal_atomic_add_fetch_32(&reg->ref_count, -1);
233264
if (OPAL_UNLIKELY(0 == ref_count)) {
265+
printf("UNMAP reg %p-%p\n", reg->base, reg->bound);
266+
234267
opal_output_verbose(MCA_BASE_VERBOSE_INFO, opal_smsc_base_framework.framework_output,
235268
"mca_smsc_xpmem_unmap_peer_region: deleting region mapping for "
236269
"endpoint %p address range %p-%p",
@@ -244,7 +277,7 @@ void mca_smsc_xpmem_unmap_peer_region(void *ctx)
244277
#endif
245278
}
246279

247-
opal_memchecker_base_mem_noaccess(reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
280+
opal_memchecker_base_mem_noaccess(reg->rcache_context, (uintptr_t)(reg->bound - reg->base + 1));
248281
(void) xpmem_detach(reg->rcache_context);
249282

250283
OBJ_RELEASE(reg);
@@ -253,6 +286,12 @@ void mca_smsc_xpmem_unmap_peer_region(void *ctx)
253286

254287
static int mca_smsc_xpmem_endpoint_rcache_cleanup(mca_rcache_base_registration_t *reg, void *ctx)
255288
{
289+
/* See respective comment in mca_smsc_xpmem_map_peer_region */
290+
if(!(MCA_RCACHE_FLAGS_PERSIST & reg->flags))
291+
opal_atomic_add(&reg->ref_count, 1);
292+
293+
printf("cleanup reg %p-%p count %d\n", reg->base, reg->bound, reg->ref_count);
294+
256295
mca_smsc_xpmem_unmap_peer_region(reg);
257296
return OPAL_SUCCESS;
258297
}
@@ -284,7 +323,7 @@ void mca_smsc_xpmem_return_endpoint(mca_smsc_endpoint_t *endpoint)
284323
}
285324

286325
/* memcpy is faster at larger sizes but is undefined if the
287-
pointers are aliased (TODO -- readd alias check) */
326+
pointers are aliased (TODO -- read alias check) */
288327
static inline void mca_smsc_xpmem_memmove(void *dst, void *src, size_t size)
289328
{
290329
while (size > 0) {

0 commit comments

Comments
 (0)