@@ -66,8 +66,7 @@ mca_smsc_endpoint_t *mca_smsc_xpmem_get_endpoint(opal_proc_t *peer_proc)
66
66
return & endpoint -> super ;
67
67
}
68
68
69
- /* look up the remote pointer in the peer rcache and attach if
70
- * necessary */
69
+ /* look up the remote pointer in the peer rcache and attach if necessary */
71
70
void * mca_smsc_xpmem_map_peer_region (mca_smsc_endpoint_t * endpoint , uint64_t flags ,
72
71
void * remote_ptr , size_t size , void * * local_ptr )
73
72
{
@@ -79,69 +78,77 @@ void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
79
78
uintptr_t base , bound ;
80
79
int rc ;
81
80
81
+ /* base is the first byte of the region, bound is the last (inclusive) */
82
82
base = OPAL_DOWN_ALIGN ((uintptr_t ) remote_ptr , attach_align , uintptr_t );
83
- bound = OPAL_ALIGN ((uintptr_t ) remote_ptr + size , attach_align , uintptr_t );
83
+ bound = OPAL_ALIGN ((uintptr_t ) remote_ptr + size , attach_align , uintptr_t ) - 1 ;
84
84
if (OPAL_UNLIKELY (bound > xpmem_endpoint -> address_max )) {
85
85
bound = xpmem_endpoint -> address_max ;
86
86
}
87
87
88
- rc = mca_rcache_base_vma_find (vma_module , (void * ) base , bound - base , & reg );
89
- assert (rc != OPAL_SUCCESS );
90
-
88
+ printf ("user ptr %p size %lu base %p bound %p\n" , remote_ptr , size , base , bound );
89
+ printf ("search base %p len %p\n" , base , bound - base + 1 );
90
+
91
+ rc = mca_rcache_base_vma_find (vma_module , (void * ) base , bound - base + 1 , & reg );
92
+ assert (OPAL_SUCCESS == rc );
93
+
91
94
// TODO Add rcache stats?
92
-
93
- /* For a number of operations here, while support for multiple threads is
94
- * existent, might not produce the fully ideal result. Looks like this can't
95
- * be fully resolved without respective support from the regcache tree.
96
- * TODO finish comment. is it accurate?*/
97
-
95
+
98
96
// TODO what if reg is deleted between finding it and atomically fetching the
99
- // ref count? Or will the tree block? And this could also happen inside the
100
- // tree's code.
101
-
97
+ // ref count? Or will the tree block? (this could also happen inside the tree's code)
98
+
102
99
if (reg ) {
100
+ printf ("region match %p-%p\n" , reg -> base , reg -> bound );
101
+
103
102
int32_t old_ref_count = opal_atomic_fetch_add_32 (& reg -> ref_count , 1 );
104
103
if (0 == old_ref_count ) {
105
104
/* Registration is being deleted by another thread
106
105
* in mca_smsc_xpmem_unmap_peer_region, ignore it. */
107
106
reg = NULL ;
108
107
}
109
-
108
+
110
109
// TODO what if two threads increment the ref counter while a third one is
111
110
// deleting it? One of the increment-threads will see 1 as the old value
112
111
// and go ahead with using the registration, while the writer will delete it!
113
-
112
+
113
+ // Do we ultimately have to do something like this?
114
+
114
115
// int32_t ref_count = opal_atomic_load_32(®->ref_count);
115
-
116
- // while(1 ) {
116
+
117
+ // while(true ) {
117
118
// if(0 == ref_count) {
118
119
// reg = NULL;
119
120
// break;
120
121
// }
121
-
122
+
122
123
// if(opal_atomic_compare_exchange_strong_32(
123
124
// ®->ref_count, &ref_count, ref_count + 1)) {
124
125
// break;
125
126
// }
126
127
// }
127
-
128
128
} else {
129
+ printf ("no region match\n" );
130
+
129
131
/* If there is a registration that overlaps with the requested range, but
130
- * does not fully cover it, we destroy it and make a new one in its place
131
- * to covers both the previous range and the new requested one. */
132
-
133
- rc = mca_rcache_base_vma_find (vma_module , (void * ) base , 1 , & reg );
134
- assert (rc != OPAL_SUCCESS );
135
-
136
- // TODO is this correct?
137
- // TODO check for hang. Only with non-debug?
138
-
139
- if (NULL == reg ) {
140
- rc = mca_rcache_base_vma_find (vma_module , (void * ) (bound + 1 ), 1 , & reg );
141
- assert (rc != OPAL_SUCCESS );
142
- }
143
-
132
+ * does not fully cover it, we destroy it and make in its place a new one
133
+ * that covers both the existing and the new range. */
134
+
135
+ // uintptr_t search_begin[4] = {base, bound, base - 1, bound + 1};
136
+ uintptr_t search_begin [2 ] = {base , bound };
137
+ for (size_t i = 0 ; i < 2 ; i ++ ) {
138
+ printf ("search overlapping %p-%p\n" ,
139
+ search_begin [i ], search_begin [i ]+ 1 );
140
+
141
+ rc = mca_rcache_base_vma_find (vma_module , (void * ) search_begin [i ], 1 , & reg );
142
+ assert (OPAL_SUCCESS == rc );
143
+
144
+ if (reg ) {
145
+ break ;
146
+ }
147
+ }
148
+
144
149
if (reg ) {
150
+ printf ("found overlapping\n" );
151
+
145
152
/* Set the invalid flag, to mark the deletion of this registration
146
153
* (will take place in unmap_peer_region). If another thread has
147
154
* already marked deletion, ignore. */
@@ -150,31 +157,37 @@ void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
150
157
(volatile opal_atomic_int32_t * ) & reg -> flags , MCA_RCACHE_FLAGS_INVALID );
151
158
152
159
if (!(old_flags & MCA_RCACHE_FLAGS_INVALID )) {
160
+ printf ("handling merge\n" );
161
+
153
162
base = opal_min (base , (uintptr_t ) reg -> base );
154
163
bound = opal_max (bound , (uintptr_t ) reg -> bound );
155
164
156
- /* We did not increment the ref count when we found the registration.
157
- * When PERSIST is set, a superfluous ref is present, so no need to do
158
- * anything. If not, we must increment the ref counter before calling
159
- * unmap_peer_region (which will decrement it), to avoid it going negative. */
165
+ /* unmap_peer_region will decrement the ref count, but we did not
166
+ * increment it when we found the reg. If persist was not set,
167
+ * a superflous ref is present, so all is fine. If not, we need
168
+ * to manually adjust before calling unmap_peer_region, to avoid
169
+ * deallocation while someone is still using the reg. */
160
170
if (!(MCA_RCACHE_FLAGS_PERSIST & reg -> flags ))
161
171
opal_atomic_add (& reg -> ref_count , 1 );
162
172
173
+ printf ("set invalid, ref count before unmap call %d\n" , reg -> ref_count );
174
+
163
175
mca_smsc_xpmem_unmap_peer_region (reg );
164
176
}
165
-
177
+
166
178
reg = NULL ;
167
- }
179
+ } else
180
+ printf ("no overlapping\n" );
168
181
}
169
-
182
+
170
183
if (NULL == reg ) {
171
184
reg = OBJ_NEW (mca_rcache_base_registration_t );
172
185
if (OPAL_LIKELY (NULL == reg )) {
173
186
return NULL ;
174
187
}
175
188
176
189
reg -> ref_count = ((flags & MCA_RCACHE_FLAGS_PERSIST )
177
- && !(flags & MCA_RCACHE_FLAGS_CACHE_BYPASS ) ? 2 : 1 );
190
+ && !(flags & MCA_RCACHE_FLAGS_CACHE_BYPASS ) ? 2 : 1 );
178
191
reg -> flags = flags ;
179
192
reg -> base = (unsigned char * ) base ;
180
193
reg -> bound = (unsigned char * ) bound ;
@@ -192,24 +205,38 @@ void *mca_smsc_xpmem_map_peer_region(mca_smsc_endpoint_t *endpoint, uint64_t fla
192
205
"for endpoint %p address range %p-%p" ,
193
206
(void * ) endpoint , reg -> base , reg -> bound );
194
207
195
- reg -> rcache_context = xpmem_attach (xpmem_addr , bound - base , NULL );
208
+ reg -> rcache_context = xpmem_attach (xpmem_addr , bound - base + 1 , NULL );
209
+ printf ("xpmem attach(%p, 0x%lx) -> %p\n" , base , bound - base + 1 , reg -> rcache_context );
210
+
196
211
if (OPAL_UNLIKELY ((void * ) -1 == reg -> rcache_context )) {
212
+ uintptr_t old_bound = bound ;
213
+
197
214
/* retry with the page as upper bound */
198
- bound = OPAL_ALIGN ((uintptr_t ) remote_ptr + size , opal_getpagesize (), uintptr_t );
215
+ bound = OPAL_ALIGN ((uintptr_t ) remote_ptr + size , opal_getpagesize (), uintptr_t ) - 1 ;
199
216
reg -> bound = (unsigned char * ) bound ;
200
- reg -> rcache_context = xpmem_attach (xpmem_addr , bound - base , NULL );
217
+
218
+ opal_output_verbose (MCA_BASE_VERBOSE_INFO , opal_smsc_base_framework .framework_output ,
219
+ "mca_smsc_xpmem_map_peer_region: region mapping "
220
+ "for endpoint %p address range %p-%p failed. "
221
+ "retrying with range %p-%p" ,
222
+ (void * ) endpoint , reg -> base , (void * ) old_bound ,
223
+ reg -> base , reg -> bound );
224
+
225
+ reg -> rcache_context = xpmem_attach (xpmem_addr , bound - base + 1 , NULL );
201
226
if (OPAL_UNLIKELY ((void * ) -1 == reg -> rcache_context )) {
202
227
OBJ_RELEASE (reg );
203
228
return NULL ;
204
229
}
205
230
}
206
231
207
- opal_memchecker_base_mem_defined (reg -> rcache_context , bound - base );
208
-
232
+ printf ("new reg %p-%p ref count %d\n" , reg -> base , reg -> bound , reg -> ref_count );
233
+
234
+ opal_memchecker_base_mem_defined (reg -> rcache_context , bound - base + 1 );
235
+
209
236
if (!(reg -> flags & MCA_RCACHE_FLAGS_CACHE_BYPASS )) {
210
237
rc = mca_rcache_base_vma_insert (vma_module , reg , 0 );
211
238
assert (OPAL_SUCCESS == rc );
212
-
239
+
213
240
if (OPAL_SUCCESS != rc ) {
214
241
reg -> flags |= MCA_RCACHE_FLAGS_CACHE_BYPASS ;
215
242
}
@@ -231,6 +258,8 @@ void mca_smsc_xpmem_unmap_peer_region(void *ctx)
231
258
232
259
ref_count = opal_atomic_add_fetch_32 (& reg -> ref_count , -1 );
233
260
if (OPAL_UNLIKELY (0 == ref_count )) {
261
+ printf ("UNMAP reg %p-%p\n" , reg -> base , reg -> bound );
262
+
234
263
opal_output_verbose (MCA_BASE_VERBOSE_INFO , opal_smsc_base_framework .framework_output ,
235
264
"mca_smsc_xpmem_unmap_peer_region: deleting region mapping for "
236
265
"endpoint %p address range %p-%p" ,
@@ -244,7 +273,7 @@ void mca_smsc_xpmem_unmap_peer_region(void *ctx)
244
273
#endif
245
274
}
246
275
247
- opal_memchecker_base_mem_noaccess (reg -> rcache_context , (uintptr_t )(reg -> bound - reg -> base ));
276
+ opal_memchecker_base_mem_noaccess (reg -> rcache_context , (uintptr_t )(reg -> bound - reg -> base + 1 ));
248
277
(void ) xpmem_detach (reg -> rcache_context );
249
278
250
279
OBJ_RELEASE (reg );
@@ -253,6 +282,12 @@ void mca_smsc_xpmem_unmap_peer_region(void *ctx)
253
282
254
283
static int mca_smsc_xpmem_endpoint_rcache_cleanup (mca_rcache_base_registration_t * reg , void * ctx )
255
284
{
285
+ /* See respective comment in mca_smsc_xpmem_map_peer_region */
286
+ if (!(MCA_RCACHE_FLAGS_PERSIST & reg -> flags ))
287
+ opal_atomic_add (& reg -> ref_count , 1 );
288
+
289
+ printf ("cleanup reg %p-%p count %d\n" , reg -> base , reg -> bound , reg -> ref_count );
290
+
256
291
mca_smsc_xpmem_unmap_peer_region (reg );
257
292
return OPAL_SUCCESS ;
258
293
}
@@ -284,7 +319,7 @@ void mca_smsc_xpmem_return_endpoint(mca_smsc_endpoint_t *endpoint)
284
319
}
285
320
286
321
/* memcpy is faster at larger sizes but is undefined if the
287
- pointers are aliased (TODO -- readd alias check) */
322
+ pointers are aliased (TODO -- read alias check) */
288
323
static inline void mca_smsc_xpmem_memmove (void * dst , void * src , size_t size )
289
324
{
290
325
while (size > 0 ) {
0 commit comments