Skip to content

Commit 12192f1

Browse files
AboorvaDevarajanawlauria
authored andcommitted
ompi/request: Add a read memory barrier to sync the receive buffer soon after wait completes.
We found an issue where with using multiple threads, it is possible for the data to not be in the buffer before MPI_Wait() returns. Testing the buffer later after MPI_Wait() returned would show the data arrives eventually without the rmb(). We have seen this issue on Power9 intermittently using PAMI, but in theory could happen with any transport. Signed-off-by: Austen Lauria <[email protected]>
1 parent 0fd550d commit 12192f1

File tree

1 file changed

+30
-25
lines changed

1 file changed

+30
-25
lines changed

ompi/request/request.h

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -448,39 +448,44 @@ static inline bool ompi_request_tag_is_collective(int tag) {
448448

449449
static inline void ompi_request_wait_completion(ompi_request_t *req)
450450
{
451-
if (opal_using_threads () && !REQUEST_COMPLETE(req)) {
452-
void *_tmp_ptr;
453-
ompi_wait_sync_t sync;
451+
if (opal_using_threads ()) {
452+
if(!REQUEST_COMPLETE(req)) {
453+
void *_tmp_ptr;
454+
ompi_wait_sync_t sync;
455+
456+
454457
#if OPAL_ENABLE_FT_MPI
455-
redo:
456-
if(OPAL_UNLIKELY( ompi_request_is_failed(req) )) {
457-
return;
458-
}
458+
redo:
459+
if(OPAL_UNLIKELY( ompi_request_is_failed(req) )) {
460+
return;
461+
}
459462
#endif /* OPAL_ENABLE_FT_MPI */
460-
_tmp_ptr = REQUEST_PENDING;
463+
_tmp_ptr = REQUEST_PENDING;
461464

462-
WAIT_SYNC_INIT(&sync, 1);
465+
WAIT_SYNC_INIT(&sync, 1);
463466

464-
if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) {
465-
SYNC_WAIT(&sync);
466-
} else {
467-
/* completed before we had a chance to swap in the sync object */
468-
WAIT_SYNC_SIGNALLED(&sync);
469-
}
467+
if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) {
468+
SYNC_WAIT(&sync);
469+
} else {
470+
/* completed before we had a chance to swap in the sync object */
471+
WAIT_SYNC_SIGNALLED(&sync);
472+
}
470473

471474
#if OPAL_ENABLE_FT_MPI
472-
if (OPAL_UNLIKELY(OMPI_SUCCESS != sync.status)) {
473-
OPAL_OUTPUT_VERBOSE((50, ompi_ftmpi_output_handle, "Status %d reported for sync %p rearming req %p", sync.status, (void*)&sync, (void*)req));
474-
_tmp_ptr = &sync;
475-
if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, REQUEST_PENDING)) {
476-
opal_output_verbose(10, ompi_ftmpi_output_handle, "Status %d reported for sync %p rearmed req %p", sync.status, (void*)&sync, (void*)req);
477-
WAIT_SYNC_RELEASE(&sync);
478-
goto redo;
475+
if (OPAL_UNLIKELY(OMPI_SUCCESS != sync.status)) {
476+
OPAL_OUTPUT_VERBOSE((50, ompi_ftmpi_output_handle, "Status %d reported for sync %p rearming req %p", sync.status, (void*)&sync, (void*)req));
477+
_tmp_ptr = &sync;
478+
if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, REQUEST_PENDING)) {
479+
opal_output_verbose(10, ompi_ftmpi_output_handle, "Status %d reported for sync %p rearmed req %p", sync.status, (void*)&sync, (void*)req);
480+
WAIT_SYNC_RELEASE(&sync);
481+
goto redo;
482+
}
479483
}
480-
}
481484
#endif /* OPAL_ENABLE_FT_MPI */
482-
assert(REQUEST_COMPLETE(req));
483-
WAIT_SYNC_RELEASE(&sync);
485+
assert(REQUEST_COMPLETE(req));
486+
WAIT_SYNC_RELEASE(&sync);
487+
}
488+
opal_atomic_rmb();
484489
} else {
485490
while(!REQUEST_COMPLETE(req)) {
486491
opal_progress();

0 commit comments

Comments
 (0)