Skip to content

Commit e333d4e

Browse files
authored
Merge pull request #8934 from zhngaj/v5.0.x-err-handling-fix
v5.0.x: coll/base: Fix the error handling in couple of collectives.
2 parents f14685f + 10b29c5 commit e333d4e

7 files changed

+44
-22
lines changed

ompi/mca/coll/base/coll_base_alltoall.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,10 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
474474
for( ri = 0; ri < nreqs; ri++ ) {
475475
if (MPI_REQUEST_NULL == reqs[ri]) continue;
476476
if (MPI_ERR_PENDING == reqs[ri]->req_status.MPI_ERROR) continue;
477-
error = reqs[ri]->req_status.MPI_ERROR;
478-
break;
477+
if (reqs[ri]->req_status.MPI_ERROR != MPI_SUCCESS) {
478+
error = reqs[ri]->req_status.MPI_ERROR;
479+
break;
480+
}
479481
}
480482
}
481483
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
@@ -677,8 +679,10 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
677679
for( i = 0; i < nreqs; i++ ) {
678680
if (MPI_REQUEST_NULL == req[i]) continue;
679681
if (MPI_ERR_PENDING == req[i]->req_status.MPI_ERROR) continue;
680-
err = req[i]->req_status.MPI_ERROR;
681-
break;
682+
if (req[i]->req_status.MPI_ERROR != MPI_SUCCESS) {
683+
err = req[i]->req_status.MPI_ERROR;
684+
break;
685+
}
682686
}
683687
}
684688
OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",

ompi/mca/coll/base/coll_base_alltoallv.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,10 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
281281
for( i = 0; i < nreqs; i++ ) {
282282
if (MPI_REQUEST_NULL == reqs[i]) continue;
283283
if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
284-
err = reqs[i]->req_status.MPI_ERROR;
285-
break;
284+
if (reqs[i]->req_status.MPI_ERROR != MPI_SUCCESS) {
285+
err = reqs[i]->req_status.MPI_ERROR;
286+
break;
287+
}
286288
}
287289
}
288290
/* Free the requests in all cases as they are persistent */

ompi/mca/coll/base/coll_base_barrier.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,8 +400,10 @@ int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
400400
for( i = 0; i < size; i++ ) {
401401
if (MPI_REQUEST_NULL == requests[i]) continue;
402402
if (MPI_ERR_PENDING == requests[i]->req_status.MPI_ERROR) continue;
403-
err = requests[i]->req_status.MPI_ERROR;
404-
break;
403+
if (requests[i]->req_status.MPI_ERROR != MPI_SUCCESS) {
404+
err = requests[i]->req_status.MPI_ERROR;
405+
break;
406+
}
405407
}
406408
}
407409
ompi_coll_base_free_reqs(requests, size);

ompi/mca/coll/base/coll_base_bcast.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,10 @@ ompi_coll_base_bcast_intra_generic( void* buffer,
218218
for( req_index = 0; req_index < 2; req_index++ ) {
219219
if (MPI_REQUEST_NULL == recv_reqs[req_index]) continue;
220220
if (MPI_ERR_PENDING == recv_reqs[req_index]->req_status.MPI_ERROR) continue;
221-
err = recv_reqs[req_index]->req_status.MPI_ERROR;
222-
break;
221+
if (recv_reqs[req_index]->req_status.MPI_ERROR != MPI_SUCCESS) {
222+
err = recv_reqs[req_index]->req_status.MPI_ERROR;
223+
break;
224+
}
223225
}
224226
}
225227
ompi_coll_base_free_reqs( recv_reqs, 2);
@@ -228,8 +230,10 @@ ompi_coll_base_bcast_intra_generic( void* buffer,
228230
for( req_index = 0; req_index < tree->tree_nextsize; req_index++ ) {
229231
if (MPI_REQUEST_NULL == send_reqs[req_index]) continue;
230232
if (MPI_ERR_PENDING == send_reqs[req_index]->req_status.MPI_ERROR) continue;
231-
err = send_reqs[req_index]->req_status.MPI_ERROR;
232-
break;
233+
if (send_reqs[req_index]->req_status.MPI_ERROR != MPI_SUCCESS) {
234+
err = send_reqs[req_index]->req_status.MPI_ERROR;
235+
break;
236+
}
233237
}
234238
}
235239
ompi_coll_base_free_reqs(send_reqs, tree->tree_nextsize);
@@ -679,8 +683,10 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
679683
for( preq = reqs; preq < reqs+i; preq++ ) {
680684
if (MPI_REQUEST_NULL == *preq) continue;
681685
if (MPI_ERR_PENDING == (*preq)->req_status.MPI_ERROR) continue;
682-
err = (*preq)->req_status.MPI_ERROR;
683-
break;
686+
if ((*preq)->req_status.MPI_ERROR != MPI_SUCCESS) {
687+
err = (*preq)->req_status.MPI_ERROR;
688+
break;
689+
}
684690
}
685691
ompi_coll_base_free_reqs(reqs, i);
686692
}

ompi/mca/coll/base/coll_base_gather.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,8 +331,10 @@ ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount,
331331
for( i = 0; i < size; i++ ) {
332332
if (MPI_REQUEST_NULL == reqs[i]) continue;
333333
if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
334-
ret = reqs[i]->req_status.MPI_ERROR;
335-
break;
334+
if (reqs[i]->req_status.MPI_ERROR != MPI_SUCCESS) {
335+
ret = reqs[i]->req_status.MPI_ERROR;
336+
break;
337+
}
336338
}
337339
}
338340
ompi_coll_base_free_reqs(reqs, size);

ompi/mca/coll/base/coll_base_reduce.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,10 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
343343
for( i = 0; i < 2; i++ ) {
344344
if (MPI_REQUEST_NULL == reqs[i]) continue;
345345
if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
346-
ret = reqs[i]->req_status.MPI_ERROR;
347-
break;
346+
if (reqs[i]->req_status.MPI_ERROR != MPI_SUCCESS) {
347+
ret = reqs[i]->req_status.MPI_ERROR;
348+
break;
349+
}
348350
}
349351
}
350352
ompi_coll_base_free_reqs(reqs, 2);
@@ -353,8 +355,10 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
353355
for( i = 0; i < max_outstanding_reqs; i++ ) {
354356
if (MPI_REQUEST_NULL == sreq[i]) continue;
355357
if (MPI_ERR_PENDING == sreq[i]->req_status.MPI_ERROR) continue;
356-
ret = sreq[i]->req_status.MPI_ERROR;
357-
break;
358+
if (sreq[i]->req_status.MPI_ERROR != MPI_SUCCESS) {
359+
ret = sreq[i]->req_status.MPI_ERROR;
360+
break;
361+
}
358362
}
359363
}
360364
ompi_coll_base_free_reqs(sreq, max_outstanding_reqs);

ompi/mca/coll/base/coll_base_scatter.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,8 +377,10 @@ ompi_coll_base_scatter_intra_linear_nb(const void *sbuf, int scount,
377377
for (i = 0; i < nreqs; i++) {
378378
if (MPI_REQUEST_NULL == reqs[i]) continue;
379379
if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
380-
err = reqs[i]->req_status.MPI_ERROR;
381-
break;
380+
if (reqs[i]->req_status.MPI_ERROR != MPI_SUCCESS) {
381+
err = reqs[i]->req_status.MPI_ERROR;
382+
break;
383+
}
382384
}
383385
}
384386
ompi_coll_base_free_reqs(reqs, nreqs);

0 commit comments

Comments
 (0)