43
43
/*
44
44
* We want to minimize the amount of temporary memory needed while allowing as many ranks
45
45
* to exchange data simultaneously. We use a variation of the ring algorithm, where in a
46
- * single step a process echange the data with both neighbors at distance k (on the left
46
+ * single step a process exchange the data with both neighbors at distance k (on the left
47
47
* and the right on a logical ring topology). With this approach we need to pack the data
48
48
* for a single of the two neighbors, as we can then use the original buffer (and datatype
49
49
* and count) to send the data to the other.
@@ -58,16 +58,22 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
58
58
ptrdiff_t extent ;
59
59
ompi_request_t * req = MPI_REQUEST_NULL ;
60
60
char * tmp_buffer ;
61
- size_t packed_size = 0 , max_size ;
61
+ size_t packed_size = 0 , max_size , type_size ;
62
62
opal_convertor_t convertor ;
63
63
64
64
/* Initialize. */
65
65
66
66
size = ompi_comm_size (comm );
67
67
rank = ompi_comm_rank (comm );
68
+ ompi_datatype_type_size (rdtype , & type_size );
68
69
69
- ompi_datatype_type_size (rdtype , & max_size );
70
- max_size *= rcounts [rank ];
70
+ for (i = 0 , max_size = 0 ; i < size ; ++ i ) {
71
+ if (i == rank ) {
72
+ continue ;
73
+ }
74
+ packed_size = rcounts [i ] * type_size ;
75
+ max_size = packed_size > max_size ? packed_size : max_size ;
76
+ }
71
77
72
78
/* Easy way out */
73
79
if ((1 == size ) || (0 == max_size ) ) {
0 commit comments