@@ -66,8 +66,7 @@ mca_smsc_endpoint_t *mca_smsc_xpmem_get_endpoint(opal_proc_t *peer_proc)
66
66
return & endpoint -> super ;
67
67
}
68
68
69
- /* look up the remote pointer in the peer rcache and attach if
70
- * necessary */
69
+ /* look up the remote pointer in the peer rcache and attach if necessary */
71
70
void * mca_smsc_xpmem_map_peer_region (mca_smsc_endpoint_t * endpoint , uint64_t flags ,
72
71
void * remote_ptr , size_t size , void * * local_ptr )
73
72
{
@@ -77,71 +76,81 @@ void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
77
76
mca_rcache_base_registration_t * reg = NULL ;
78
77
xpmem_addr_t xpmem_addr ;
79
78
uintptr_t base , bound ;
79
+ size_t region_size ;
80
80
int rc ;
81
81
82
+ /* base is the first byte of the region, bound is the last (inclusive) */
82
83
base = OPAL_DOWN_ALIGN ((uintptr_t ) remote_ptr , attach_align , uintptr_t );
83
- bound = OPAL_ALIGN ((uintptr_t ) remote_ptr + size , attach_align , uintptr_t );
84
+ bound = OPAL_ALIGN ((uintptr_t ) remote_ptr + size , attach_align , uintptr_t ) - 1 ;
84
85
if (OPAL_UNLIKELY (bound > xpmem_endpoint -> address_max )) {
85
86
bound = xpmem_endpoint -> address_max ;
86
87
}
88
+ region_size = bound - base + 1 ;
89
+
90
+ printf ("user ptr %p size %lu base %p bound %p\n" , remote_ptr , size , base , bound );
91
+ printf ("search base %p len %p\n" , base , region_size );
92
+
93
+ rc = mca_rcache_base_vma_find (vma_module , (void * ) base , region_size , & reg );
94
+ assert (OPAL_SUCCESS == rc );
87
95
88
- rc = mca_rcache_base_vma_find (vma_module , (void * ) base , bound - base , & reg );
89
- assert (rc != OPAL_SUCCESS );
90
-
91
96
// TODO Add rcache stats?
92
-
93
- /* For a number of operations here, while support for multiple threads is
94
- * existent, might not produce the fully ideal result. Looks like this can't
95
- * be fully resolved without respective support from the regcache tree.
96
- * TODO finish comment. is it accurate?*/
97
-
97
+
98
98
// TODO what if reg is deleted between finding it and atomically fetching the
99
- // ref count? Or will the tree block? And this could also happen inside the
100
- // tree's code.
101
-
99
+ // ref count? Or will the tree block? (this could also happen inside the tree's code)
100
+
102
101
if (reg ) {
102
+ printf ("region match %p-%p\n" , reg -> base , reg -> bound );
103
+
103
104
int32_t old_ref_count = opal_atomic_fetch_add_32 (& reg -> ref_count , 1 );
104
105
if (0 == old_ref_count ) {
105
106
/* Registration is being deleted by another thread
106
107
* in mca_smsc_xpmem_unmap_peer_region, ignore it. */
107
108
reg = NULL ;
108
109
}
109
-
110
+
110
111
// TODO what if two threads increment the ref counter while a third one is
111
112
// deleting it? One of the increment-threads will see 1 as the old value
112
113
// and go ahead with using the registration, while the writer will delete it!
113
-
114
+
115
+ // Do we ultimately have to do something like this?
116
+
114
117
// int32_t ref_count = opal_atomic_load_32(®->ref_count);
115
-
116
- // while(1 ) {
118
+
119
+ // while(true ) {
117
120
// if(0 == ref_count) {
118
121
// reg = NULL;
119
122
// break;
120
123
// }
121
-
124
+
122
125
// if(opal_atomic_compare_exchange_strong_32(
123
126
// ®->ref_count, &ref_count, ref_count + 1)) {
124
127
// break;
125
128
// }
126
129
// }
127
-
128
130
} else {
131
+ printf ("no region match\n" );
132
+
129
133
/* If there is a registration that overlaps with the requested range, but
130
- * does not fully cover it, we destroy it and make a new one in its place
131
- * to covers both the previous range and the new requested one. */
132
-
133
- rc = mca_rcache_base_vma_find (vma_module , (void * ) base , 1 , & reg );
134
- assert (rc != OPAL_SUCCESS );
135
-
136
- // TODO is this correct?
137
- // TODO check for hang. Only with non-debug?
138
-
139
- if (NULL == reg ) {
140
- rc = mca_rcache_base_vma_find (vma_module , (void * ) (bound + 1 ), 1 , & reg );
141
- assert (rc != OPAL_SUCCESS );
142
- }
143
-
134
+ * does not fully cover it, we destroy it and make in its place a new one
135
+ * that covers both the existing and the new range. */
136
+
137
+ // uintptr_t search_begin[4] = {base, bound, base - 1, bound + 1};
138
+ uintptr_t search_begin [2 ] = {base , bound };
139
+ for (size_t i = 0 ; i < 2 ; i ++ ) {
140
+ printf ("search overlapping %p-%p\n" ,
141
+ search_begin [i ], search_begin [i ]+ 1 );
142
+
143
+ rc = mca_rcache_base_vma_find (vma_module , (void * ) search_begin [i ], 1 , & reg );
144
+ assert (OPAL_SUCCESS == rc );
145
+
146
+ if (reg ) {
147
+ break ;
148
+ }
149
+ }
150
+
144
151
if (reg ) {
152
+ printf ("found overlapping\n" );
153
+
145
154
/* Set the invalid flag, to mark the deletion of this registration
146
155
* (will take place in unmap_peer_region). If another thread has
147
156
* already marked deletion, ignore. */
@@ -150,31 +159,38 @@ void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
150
159
(volatile opal_atomic_int32_t * ) & reg -> flags , MCA_RCACHE_FLAGS_INVALID );
151
160
152
161
if (!(old_flags & MCA_RCACHE_FLAGS_INVALID )) {
162
+ printf ("handling merge\n" );
163
+
153
164
base = opal_min (base , (uintptr_t ) reg -> base );
154
165
bound = opal_max (bound , (uintptr_t ) reg -> bound );
166
+ region_size = bound - base + 1 ;
155
167
156
- /* We did not increment the ref count when we found the registration.
157
- * When PERSIST is set, a superfluous ref is present, so no need to do
158
- * anything. If not, we must increment the ref counter before calling
159
- * unmap_peer_region (which will decrement it), to avoid it going negative. */
168
+ /* unmap_peer_region will decrement the ref count, but we did not
169
+ * increment it when we found the reg. If persist was not set,
170
+ * a superflous ref is present, so all is fine. If not, we need
171
+ * to manually adjust before calling unmap_peer_region, to avoid
172
+ * deallocation while someone is still using the reg. */
160
173
if (!(MCA_RCACHE_FLAGS_PERSIST & reg -> flags ))
161
174
opal_atomic_add (& reg -> ref_count , 1 );
162
175
176
+ printf ("set invalid, ref count before unmap call %d\n" , reg -> ref_count );
177
+
163
178
mca_smsc_xpmem_unmap_peer_region (reg );
164
179
}
165
-
180
+
166
181
reg = NULL ;
167
- }
182
+ } else
183
+ printf ("no overlapping\n" );
168
184
}
169
-
185
+
170
186
if (NULL == reg ) {
171
187
reg = OBJ_NEW (mca_rcache_base_registration_t );
172
188
if (OPAL_LIKELY (NULL == reg )) {
173
189
return NULL ;
174
190
}
175
191
176
192
reg -> ref_count = ((flags & MCA_RCACHE_FLAGS_PERSIST )
177
- && !(flags & MCA_RCACHE_FLAGS_CACHE_BYPASS ) ? 2 : 1 );
193
+ && !(flags & MCA_RCACHE_FLAGS_CACHE_BYPASS ) ? 2 : 1 );
178
194
reg -> flags = flags ;
179
195
reg -> base = (unsigned char * ) base ;
180
196
reg -> bound = (unsigned char * ) bound ;
@@ -192,24 +208,39 @@ void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
192
208
"for endpoint %p address range %p-%p" ,
193
209
(void * ) endpoint , reg -> base , reg -> bound );
194
210
195
- reg -> rcache_context = xpmem_attach (xpmem_addr , bound - base , NULL );
211
+ reg -> rcache_context = xpmem_attach (xpmem_addr , region_size , NULL );
212
+ printf ("xpmem attach(%p, 0x%lx) -> %p\n" , base , region_size , reg -> rcache_context );
213
+
196
214
if (OPAL_UNLIKELY ((void * ) -1 == reg -> rcache_context )) {
215
+ uintptr_t old_bound = bound ;
216
+
197
217
/* retry with the page as upper bound */
198
- bound = OPAL_ALIGN ((uintptr_t ) remote_ptr + size , opal_getpagesize (), uintptr_t );
218
+ bound = OPAL_ALIGN ((uintptr_t ) remote_ptr + size , opal_getpagesize (), uintptr_t ) - 1 ;
199
219
reg -> bound = (unsigned char * ) bound ;
200
- reg -> rcache_context = xpmem_attach (xpmem_addr , bound - base , NULL );
220
+ region_size = bound - base + 1 ;
221
+
222
+ opal_output_verbose (MCA_BASE_VERBOSE_INFO , opal_smsc_base_framework .framework_output ,
223
+ "mca_smsc_xpmem_map_peer_region: region mapping "
224
+ "for endpoint %p address range %p-%p failed. "
225
+ "retrying with range %p-%p" ,
226
+ (void * ) endpoint , reg -> base , (void * ) old_bound ,
227
+ reg -> base , reg -> bound );
228
+
229
+ reg -> rcache_context = xpmem_attach (xpmem_addr , region_size , NULL );
201
230
if (OPAL_UNLIKELY ((void * ) -1 == reg -> rcache_context )) {
202
231
OBJ_RELEASE (reg );
203
232
return NULL ;
204
233
}
205
234
}
206
235
207
- opal_memchecker_base_mem_defined (reg -> rcache_context , bound - base );
208
-
236
+ printf ("new reg %p-%p ref count %d\n" , reg -> base , reg -> bound , reg -> ref_count );
237
+
238
+ opal_memchecker_base_mem_defined (reg -> rcache_context , region_size );
239
+
209
240
if (!(reg -> flags & MCA_RCACHE_FLAGS_CACHE_BYPASS )) {
210
241
rc = mca_rcache_base_vma_insert (vma_module , reg , 0 );
211
242
assert (OPAL_SUCCESS == rc );
212
-
243
+
213
244
if (OPAL_SUCCESS != rc ) {
214
245
reg -> flags |= MCA_RCACHE_FLAGS_CACHE_BYPASS ;
215
246
}
@@ -231,6 +262,8 @@ void mca_smsc_xpmem_unmap_peer_region(void *ctx)
231
262
232
263
ref_count = opal_atomic_add_fetch_32 (& reg -> ref_count , -1 );
233
264
if (OPAL_UNLIKELY (0 == ref_count )) {
265
+ printf ("UNMAP reg %p-%p\n" , reg -> base , reg -> bound );
266
+
234
267
opal_output_verbose (MCA_BASE_VERBOSE_INFO , opal_smsc_base_framework .framework_output ,
235
268
"mca_smsc_xpmem_unmap_peer_region: deleting region mapping for "
236
269
"endpoint %p address range %p-%p" ,
@@ -244,7 +277,7 @@ void mca_smsc_xpmem_unmap_peer_region(void *ctx)
244
277
#endif
245
278
}
246
279
247
- opal_memchecker_base_mem_noaccess (reg -> rcache_context , (uintptr_t )(reg -> bound - reg -> base ));
280
+ opal_memchecker_base_mem_noaccess (reg -> rcache_context , (uintptr_t )(reg -> bound - reg -> base + 1 ));
248
281
(void ) xpmem_detach (reg -> rcache_context );
249
282
250
283
OBJ_RELEASE (reg );
@@ -253,6 +286,12 @@ void mca_smsc_xpmem_unmap_peer_region(void *ctx)
253
286
254
287
static int mca_smsc_xpmem_endpoint_rcache_cleanup (mca_rcache_base_registration_t * reg , void * ctx )
255
288
{
289
+ /* See respective comment in mca_smsc_xpmem_map_peer_region */
290
+ if (!(MCA_RCACHE_FLAGS_PERSIST & reg -> flags ))
291
+ opal_atomic_add (& reg -> ref_count , 1 );
292
+
293
+ printf ("cleanup reg %p-%p count %d\n" , reg -> base , reg -> bound , reg -> ref_count );
294
+
256
295
mca_smsc_xpmem_unmap_peer_region (reg );
257
296
return OPAL_SUCCESS ;
258
297
}
@@ -284,7 +323,7 @@ void mca_smsc_xpmem_return_endpoint(mca_smsc_endpoint_t *endpoint)
284
323
}
285
324
286
325
/* memcpy is faster at larger sizes but is undefined if the
287
- pointers are aliased (TODO -- readd alias check) */
326
+ pointers are aliased (TODO -- read alias check) */
288
327
static inline void mca_smsc_xpmem_memmove (void * dst , void * src , size_t size )
289
328
{
290
329
while (size > 0 ) {
0 commit comments