Skip to content

Commit cc4be03

Browse files
authored
Merge pull request #10520 from awlauria/more_alltoall_fixes
v4.0.x: Residual alltoall fixes
2 parents e771506 + 7ce8359 commit cc4be03

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

ompi/mca/coll/base/coll_base_alltoall.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "mpi.h"
3030
#include "ompi/constants.h"
3131
#include "ompi/datatype/ompi_datatype.h"
32+
#include "opal/datatype/opal_convertor_internal.h"
3233
#include "ompi/communicator/communicator.h"
3334
#include "ompi/mca/coll/coll.h"
3435
#include "ompi/mca/coll/base/coll_tags.h"

ompi/mca/coll/base/coll_base_alltoallv.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "mpi.h"
3232
#include "ompi/constants.h"
3333
#include "ompi/datatype/ompi_datatype.h"
34+
#include "opal/datatype/opal_convertor_internal.h"
3435
#include "ompi/communicator/communicator.h"
3536
#include "ompi/mca/coll/coll.h"
3637
#include "ompi/mca/coll/base/coll_tags.h"
@@ -42,7 +43,7 @@
4243
/*
4344
* We want to minimize the amount of temporary memory needed while allowing as many ranks
4445
* to exchange data simultaneously. We use a variation of the ring algorithm, where in a
45-
* single step a process echange the data with both neighbors at distance k (on the left
46+
* single step a process exchange the data with both neighbors at distance k (on the left
4647
* and the right on a logical ring topology). With this approach we need to pack the data
4748
* for a single of the two neighbors, as we can then use the original buffer (and datatype
4849
* and count) to send the data to the other.
@@ -57,16 +58,22 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
5758
ptrdiff_t extent;
5859
ompi_request_t *req = MPI_REQUEST_NULL;
5960
char *tmp_buffer;
60-
size_t packed_size = 0, max_size;
61+
size_t packed_size = 0, max_size, type_size;
6162
opal_convertor_t convertor;
6263

6364
/* Initialize. */
6465

6566
size = ompi_comm_size(comm);
6667
rank = ompi_comm_rank(comm);
68+
ompi_datatype_type_size(rdtype, &type_size);
6769

68-
ompi_datatype_type_size(rdtype, &max_size);
69-
max_size *= rcounts[rank];
70+
for (i = 0, max_size = 0 ; i < size ; ++i) {
71+
if (i == rank) {
72+
continue;
73+
}
74+
packed_size = rcounts[i] * type_size;
75+
max_size = packed_size > max_size ? packed_size : max_size;
76+
}
7077

7178
/* Easy way out */
7279
if ((1 == size) || (0 == max_size) ) {

0 commit comments

Comments
 (0)