Skip to content

Commit fb44c3b

Browse files
committed
Fix MPI_Waitany and MPI_Waitsome
(request handling related)
1 parent 8d011ea commit fb44c3b

File tree

2 files changed

+44
-5
lines changed

2 files changed

+44
-5
lines changed

ompi/request/req_wait.c

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
1616
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
1717
* reserved.
18+
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -87,6 +88,7 @@ int ompi_request_default_wait_any(size_t count,
8788
int rc = OMPI_SUCCESS;
8889
ompi_request_t *request=NULL;
8990
ompi_wait_sync_t sync;
91+
int sync_sets = 0, sync_unsets = 0;
9092

9193
WAIT_SYNC_INIT(&sync, 1);
9294

@@ -108,6 +110,8 @@ int ompi_request_default_wait_any(size_t count,
108110
completed = i;
109111
*index = i;
110112
goto after_sync_wait;
113+
} else {
114+
sync_sets++;
111115
}
112116
}
113117

@@ -116,7 +120,8 @@ int ompi_request_default_wait_any(size_t count,
116120
if (MPI_STATUS_IGNORE != status) {
117121
*status = ompi_status_empty;
118122
}
119-
WAIT_SYNC_RELEASE(&sync);
123+
/* No signal-in-flight can be in this case */
124+
WAIT_SYNC_RELEASE_NOWAIT(&sync);
120125
return rc;
121126
}
122127

@@ -138,8 +143,17 @@ int ompi_request_default_wait_any(size_t count,
138143
*/
139144
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
140145
*index = i;
146+
} else {
147+
sync_unsets++;
141148
}
142149
}
150+
151+
if( sync_sets == sync_unsets ){
152+
/* set signalled flag so we won't
153+
* block in WAIT_SYNC_RELEASE
154+
*/
155+
WAIT_SYNC_SIGNALLED(&sync);
156+
}
143157

144158
request = requests[*index];
145159
assert( REQUEST_COMPLETE(request) );
@@ -361,7 +375,8 @@ int ompi_request_default_wait_some(size_t count,
361375
ompi_request_t **rptr = NULL;
362376
ompi_request_t *request = NULL;
363377
ompi_wait_sync_t sync;
364-
378+
size_t sync_sets = 0, sync_unsets = 0;
379+
365380
WAIT_SYNC_INIT(&sync, 1);
366381

367382
*outcount = 0;
@@ -384,12 +399,15 @@ int ompi_request_default_wait_some(size_t count,
384399
/* If the request is completed go ahead and mark it as such */
385400
assert( REQUEST_COMPLETE(request) );
386401
num_requests_done++;
402+
} else {
403+
sync_sets++;
387404
}
388405
}
389406

390407
if(num_requests_null_inactive == count) {
391408
*outcount = MPI_UNDEFINED;
392-
WAIT_SYNC_RELEASE(&sync);
409+
/* nobody will signall us */
410+
WAIT_SYNC_RELEASE_NOWAIT(&sync);
393411
return rc;
394412
}
395413

@@ -418,9 +436,19 @@ int ompi_request_default_wait_some(size_t count,
418436
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
419437
indices[num_requests_done] = i;
420438
num_requests_done++;
439+
} else {
440+
/* request wasn't finished during this call */
441+
sync_unsets++;
421442
}
422443
}
423444

445+
if( sync_sets == sync_unsets ){
446+
/* nobody knows about us,
447+
* set signa-in-progress flag to false
448+
*/
449+
WAIT_SYNC_SIGNALLED(&sync);
450+
}
451+
424452
WAIT_SYNC_RELEASE(&sync);
425453

426454
*outcount = num_requests_done;

opal/threads/wait_sync.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,21 +43,32 @@ typedef struct ompi_wait_sync_t {
4343
* the critical path. */
4444
#define WAIT_SYNC_RELEASE(sync) \
4545
if (opal_using_threads()) { \
46-
while ((sync)->signaling) { \
46+
while ((sync)->signaling) { \
4747
continue; \
4848
} \
4949
pthread_cond_destroy(&(sync)->condition); \
5050
pthread_mutex_destroy(&(sync)->lock); \
5151
}
5252

53+
#define WAIT_SYNC_RELEASE_NOWAIT(sync) \
54+
if (opal_using_threads()) { \
55+
pthread_cond_destroy(&(sync)->condition); \
56+
pthread_mutex_destroy(&(sync)->lock); \
57+
}
58+
59+
5360
#define WAIT_SYNC_SIGNAL(sync) \
5461
if (opal_using_threads()) { \
5562
pthread_mutex_lock(&(sync->lock)); \
5663
pthread_cond_signal(&sync->condition); \
5764
pthread_mutex_unlock(&(sync->lock)); \
58-
sync->signaling = false; \
65+
sync->signaling = false; \
5966
}
6067

68+
#define WAIT_SYNC_SIGNALLED(sync){ \
69+
(sync)->signaling = false; \
70+
}
71+
6172
OPAL_DECLSPEC int sync_wait_mt(ompi_wait_sync_t *sync);
6273
static inline int sync_wait_st (ompi_wait_sync_t *sync)
6374
{

0 commit comments

Comments
 (0)