Skip to content

Commit b6467d0

Browse files
committed
Fix a race condition between registering a callback and completing a request
Using atomic swap operations we make sure that a thread completing a request will atomically mark it for the thread registering a callback. Similarly, the thread registering a callback will register it atomically and check for whether the request has completed. Signed-off-by: Joseph Schuchart <[email protected]>
1 parent c9c315c commit b6467d0

File tree

1 file changed

+20
-12
lines changed

1 file changed

+20
-12
lines changed

ompi/request/request.h

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ typedef struct ompi_request_t ompi_request_t;
158158
#define REQUEST_PENDING (void *)0L
159159
#define REQUEST_COMPLETED (void *)1L
160160

161+
#define REQUEST_CB_PENDING (void *)0L
162+
#define REQUEST_CB_COMPLETED (void *)1L
163+
161164
struct ompi_predefined_request_t {
162165
struct ompi_request_t request;
163166
char padding[PREDEFINED_REQUEST_PAD - sizeof(ompi_request_t)];
@@ -517,12 +520,12 @@ static inline void ompi_request_wait_completion(ompi_request_t *req)
517520
static inline int ompi_request_complete(ompi_request_t* request, bool with_signal)
518521
{
519522
int rc = 0;
520-
521-
if(NULL != request->req_complete_cb) {
522-
/* Set the request cb to NULL to allow resetting in the callback */
523-
ompi_request_complete_fn_t fct = request->req_complete_cb;
524-
request->req_complete_cb = NULL;
525-
rc = fct( request );
523+
ompi_request_complete_fn_t cb;
524+
cb = (ompi_request_complete_fn_t)OPAL_ATOMIC_SWAP_PTR((opal_atomic_intptr_t*)&request->req_complete_cb,
525+
(intptr_t)REQUEST_CB_COMPLETED);
526+
if (REQUEST_CB_PENDING != cb) {
527+
request->req_complete_cb = REQUEST_CB_PENDING;
528+
rc = cb(request);
526529
}
527530

528531
if (0 == rc) {
@@ -546,12 +549,17 @@ static inline int ompi_request_set_callback(ompi_request_t* request,
546549
void* cb_data)
547550
{
548551
request->req_complete_cb_data = cb_data;
549-
request->req_complete_cb = cb;
550-
/* If request is completed and the callback is not called, need to call callback */
551-
if ((NULL != request->req_complete_cb) && (request->req_complete == REQUEST_COMPLETED)) {
552-
ompi_request_complete_fn_t fct = request->req_complete_cb;
553-
request->req_complete_cb = NULL;
554-
return fct( request );
552+
opal_atomic_wmb();
553+
if ((REQUEST_CB_COMPLETED == request->req_complete_cb) ||
554+
(REQUEST_CB_COMPLETED == (void*)OPAL_ATOMIC_SWAP_PTR((opal_atomic_intptr_t*)&request->req_complete_cb,
555+
(intptr_t)cb))) {
556+
if (NULL != cb) {
557+
/* the request was marked at least partially completed, make sure it's fully complete */
558+
while (!REQUEST_COMPLETE(request)) {}
559+
/* Set the request cb to NULL to allow resetting in the callback */
560+
request->req_complete_cb = REQUEST_CB_PENDING;
561+
cb(request);
562+
}
555563
}
556564
return OMPI_SUCCESS;
557565
}

0 commit comments

Comments
 (0)