Skip to content

Commit a85bd84

Browse files
committed
Improve MPI_Waitsome performance
by avoiding extra atomic exchanges. The fix is based on MPI spec: 12.4.2 Multiple threads completing the same request. A program in which two threads block, waiting on the same request, is erroneous. Similarly, the same request cannot appear in the array of requests of two concurrent MPI_{WAIT|TEST}{ANY|SOME|ALL} calls. In MPI, a request can only be completed once. Any combination of wait or test that violates this rule is erroneous." We add marked flag to the request structure. Only MPI_Waitsome thread will use/access it by any means. PML threads will not see/touch it. So given that any particular request can be used no more than in one MPI_Waitsome we are safe to do this change.
1 parent 42e6251 commit a85bd84

File tree

2 files changed

+27
-16
lines changed

2 files changed

+27
-16
lines changed

ompi/request/req_wait.c

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -367,8 +367,8 @@ int ompi_request_default_wait_some(size_t count,
367367
ompi_request_t **rptr = NULL;
368368
ompi_request_t *request = NULL;
369369
ompi_wait_sync_t sync;
370-
size_t sync_sets = 0, sync_unsets = 0;
371-
370+
bool will_be_signalled = false;
371+
372372
WAIT_SYNC_INIT(&sync, 1);
373373

374374
*outcount = 0;
@@ -390,9 +390,12 @@ int ompi_request_default_wait_some(size_t count,
390390
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) {
391391
/* If the request is completed go ahead and mark it as such */
392392
assert( REQUEST_COMPLETE(request) );
393+
/* TODO: make sure MPI spec is not strict about index order */
394+
indices[num_requests_done] = i;
393395
num_requests_done++;
396+
REQUEST_UNMARK(request);
394397
} else {
395-
sync_sets++;
398+
REQUEST_MARK(request);
396399
}
397400
}
398401

@@ -412,29 +415,29 @@ int ompi_request_default_wait_some(size_t count,
412415
/* Clean up the synchronization primitives */
413416

414417
rptr = requests;
415-
num_requests_done = 0;
416418
for (size_t i = 0; i < count; i++, rptr++) {
417419
request = *rptr;
418420

419-
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
421+
/* Skip inactive and already accounted requests */
422+
if( request->req_state == OMPI_REQUEST_INACTIVE || !REQUEST_MARKED(request) ) {
420423
continue;
421424
}
422-
/* Atomically mark the request as pending. If this succeed
423-
* then the request was not completed, and it is now marked as
424-
* pending. Otherwise, the request is complete )either it was
425-
* before or it has been meanwhile). The major drawback here
426-
* is that we will do all the atomics operations in all cases.
425+
/* Atomically mark the request as pending.
426+
* If this succeed - then the request was not completed,
427+
* and it is now marked as pending.
428+
* Otherwise, the request is complete meanwhile.
427429
*/
428-
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
430+
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
429431
indices[num_requests_done] = i;
430432
num_requests_done++;
431-
} else {
432-
/* request wasn't finished during this call */
433-
sync_unsets++;
434-
}
433+
/* at least one of requests was completed during this call
434+
* corresponding thread will signal us
435+
*/
436+
will_be_signalled = true;
437+
}
435438
}
436439

437-
if( sync_sets == sync_unsets ){
440+
if( !will_be_signalled ){
438441
/* nobody knows about us,
439442
* set signa-in-progress flag to false
440443
*/

ompi/request/request.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
1616
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
1717
* reserved.
18+
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -112,6 +113,7 @@ struct ompi_request_t {
112113
ompi_request_complete_fn_t req_complete_cb; /**< Called when the request is MPI completed */
113114
void *req_complete_cb_data;
114115
ompi_mpi_object_t req_mpi_object; /**< Pointer to MPI object that created this request */
116+
bool marked;
115117
};
116118

117119
/**
@@ -151,6 +153,12 @@ typedef struct ompi_predefined_request_t ompi_predefined_request_t;
151153

152154

153155
#define REQUEST_COMPLETE(req) (REQUEST_COMPLETED == (req)->req_complete)
156+
157+
#define REQUEST_MARK(req) ( (req)->marked = true )
158+
#define REQUEST_UNMARK(req) ( (req)->marked = false )
159+
#define REQUEST_MARKED(req) ( (req)->marked )
160+
161+
154162
/**
155163
* Finalize a request. This is a macro to avoid function call
156164
* overhead, since this is typically invoked in the critical

0 commit comments

Comments
 (0)