36
36
37
37
int
38
38
ompi_coll_base_bcast_intra_generic ( void * buffer ,
39
- int count ,
40
- struct ompi_datatype_t * datatype ,
41
- int root ,
42
- struct ompi_communicator_t * comm ,
43
- mca_coll_base_module_t * module ,
44
- size_t segment_size ,
45
- ompi_coll_tree_t * tree )
46
- {
47
- int err = 0 , line , i , rank , segindex , req_index ;
48
- int num_segments ; /* Number of segments */
49
- int sendcount ; /* number of elements sent in this segment */
50
- size_t realsegsize , type_size ;
51
- char * tmpbuf ;
52
- ptrdiff_t extent , lb ;
53
- ompi_request_t * recv_reqs [2 ] = {MPI_REQUEST_NULL , MPI_REQUEST_NULL };
54
- ompi_request_t * * send_reqs = NULL ;
55
-
56
- #if OPAL_ENABLE_DEBUG
57
- int size ;
58
- size = ompi_comm_size (comm );
59
- assert ( size > 1 );
60
- #endif
61
- rank = ompi_comm_rank (comm );
62
-
63
- ompi_datatype_get_extent (datatype , & lb , & extent );
64
- ompi_datatype_type_size ( datatype , & type_size );
65
- num_segments = (original_count + count_by_segment - 1 ) / count_by_segment ;
66
- realsegsize = (ptrdiff_t )count_by_segment * extent ;
67
-
68
- /* Set the buffer pointers */
69
- tmpbuf = (char * ) buffer ;
70
-
71
- if ( tree -> tree_nextsize != 0 ) {
72
- send_reqs = ompi_coll_base_comm_get_reqs (module -> base_data , tree -> tree_nextsize );
73
- if ( NULL == send_reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto error_hndl ; }
74
- }
75
-
76
- /* Root code */
77
- if ( rank == root ) {
78
- /*
79
- For each segment:
80
- - send segment to all children.
81
- The last segment may have less elements than other segments.
82
- */
83
- sendcount = count_by_segment ;
84
- for ( segindex = 0 ; segindex < num_segments ; segindex ++ ) {
85
- if ( segindex == (num_segments - 1 ) ) {
86
- sendcount = original_count - segindex * count_by_segment ;
87
- }
88
- for ( i = 0 ; i < tree -> tree_nextsize ; i ++ ) {
89
- err = MCA_PML_CALL (isend (tmpbuf , sendcount , datatype ,
90
- tree -> tree_next [i ],
91
- MCA_COLL_BASE_TAG_BCAST ,
92
- MCA_PML_BASE_SEND_STANDARD , comm ,
93
- & send_reqs [i ]));
94
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
95
- }
96
-
97
- /* complete the sends before starting the next sends */
98
- err = ompi_request_wait_all ( tree -> tree_nextsize , send_reqs ,
99
- MPI_STATUSES_IGNORE );
100
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
101
-
102
- /* update tmp buffer */
103
- tmpbuf += realsegsize ;
104
-
105
- }
106
- }
107
-
108
- /* Intermediate nodes code */
109
- else if ( tree -> tree_nextsize > 0 ) {
110
- /*
111
- Create the pipeline.
112
- 1) Post the first receive
113
- 2) For segments 1 .. num_segments
114
- - post new receive
115
- - wait on the previous receive to complete
116
- - send this data to children
117
- 3) Wait on the last segment
118
- 4) Compute number of elements in last segment.
119
- 5) Send the last segment to children
120
- */
121
- req_index = 0 ;
122
- err = MCA_PML_CALL (irecv (tmpbuf , count_by_segment , datatype ,
123
- tree -> tree_prev , MCA_COLL_BASE_TAG_BCAST ,
124
- comm , & recv_reqs [req_index ]));
125
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
126
-
127
- for ( segindex = 1 ; segindex < num_segments ; segindex ++ ) {
128
-
129
- req_index = req_index ^ 0x1 ;
130
-
131
- /* post new irecv */
132
- err = MCA_PML_CALL (irecv ( tmpbuf + realsegsize , count_by_segment ,
133
- datatype , tree -> tree_prev ,
134
- MCA_COLL_BASE_TAG_BCAST ,
135
- comm , & recv_reqs [req_index ]));
136
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
137
-
138
- /* wait for and forward the previous segment to children */
139
- err = ompi_request_wait ( & recv_reqs [req_index ^ 0x1 ],
140
- MPI_STATUS_IGNORE );
141
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
142
-
143
- for ( i = 0 ; i < tree -> tree_nextsize ; i ++ ) {
144
- err = MCA_PML_CALL (isend (tmpbuf , count_by_segment , datatype ,
145
- tree -> tree_next [i ],
146
- MCA_COLL_BASE_TAG_BCAST ,
147
- MCA_PML_BASE_SEND_STANDARD , comm ,
148
- & send_reqs [i ]));
149
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
150
- }
151
-
152
- /* complete the sends before starting the next iteration */
153
- err = ompi_request_wait_all ( tree -> tree_nextsize , send_reqs ,
154
- MPI_STATUSES_IGNORE );
155
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
156
-
157
- /* Update the receive buffer */
158
- tmpbuf += realsegsize ;
159
-
160
- }
161
-
162
- /* Process the last segment */
163
- err = ompi_request_wait ( & recv_reqs [req_index ], MPI_STATUS_IGNORE );
164
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
165
- sendcount = original_count - (ptrdiff_t )(num_segments - 1 ) * count_by_segment ;
166
- for ( i = 0 ; i < tree -> tree_nextsize ; i ++ ) {
167
- err = MCA_PML_CALL (isend (tmpbuf , sendcount , datatype ,
168
- tree -> tree_next [i ],
169
- MCA_COLL_BASE_TAG_BCAST ,
170
- MCA_PML_BASE_SEND_STANDARD , comm ,
171
- & send_reqs [i ]));
172
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
173
- }
174
-
175
- err = ompi_request_wait_all ( tree -> tree_nextsize , send_reqs ,
176
- MPI_STATUSES_IGNORE );
177
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
178
- }
179
-
180
- /* Leaf nodes */
181
- else {
182
- /*
183
- Receive all segments from parent in a loop:
184
- 1) post irecv for the first segment
185
- 2) for segments 1 .. num_segments
186
- - post irecv for the next segment
187
- - wait on the previous segment to arrive
188
- 3) wait for the last segment
189
- */
190
- req_index = 0 ;
191
- err = MCA_PML_CALL (irecv (tmpbuf , count_by_segment , datatype ,
192
- tree -> tree_prev , MCA_COLL_BASE_TAG_BCAST ,
193
- comm , & recv_reqs [req_index ]));
194
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
195
-
196
- for ( segindex = 1 ; segindex < num_segments ; segindex ++ ) {
197
- req_index = req_index ^ 0x1 ;
198
- tmpbuf += realsegsize ;
199
- /* post receive for the next segment */
200
- err = MCA_PML_CALL (irecv (tmpbuf , count_by_segment , datatype ,
201
- tree -> tree_prev , MCA_COLL_BASE_TAG_BCAST ,
202
- comm , & recv_reqs [req_index ]));
203
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
204
- /* wait on the previous segment */
205
- err = ompi_request_wait ( & recv_reqs [req_index ^ 0x1 ],
206
- MPI_STATUS_IGNORE );
207
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
208
- }
209
-
210
- err = ompi_request_wait ( & recv_reqs [req_index ], MPI_STATUS_IGNORE );
211
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
212
- }
213
-
214
- return (MPI_SUCCESS );
215
-
216
- error_hndl :
217
- OPAL_OUTPUT ( (ompi_coll_base_framework .framework_output ,"%s:%4d\tError occurred %d, rank %2d" ,
218
- __FILE__ , line , err , rank ) );
219
- (void )line ; // silence compiler warnings
220
- ompi_coll_base_free_reqs ( recv_reqs , 2 );
221
- if ( NULL != send_reqs ) {
222
- ompi_coll_base_free_reqs (send_reqs , tree -> tree_nextsize );
223
- }
224
-
225
- return err ;
226
- }
227
-
228
- int
229
- ompi_coll_base_bcast_intra_generic2 ( void * buffer ,
230
39
int count ,
231
40
struct ompi_datatype_t * datatype ,
232
41
int root ,
@@ -252,7 +61,7 @@ ompi_coll_base_bcast_intra_generic2( void* buffer,
252
61
rank = ompi_comm_rank (comm );
253
62
254
63
if ( tree -> tree_nextsize != 0 ) {
255
- send_reqs = coll_base_comm_get_reqs (module -> base_data , tree -> tree_nextsize );
64
+ send_reqs = ompi_coll_base_comm_get_reqs (module -> base_data , tree -> tree_nextsize );
256
65
if ( NULL == send_reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto error_hndl ; }
257
66
}
258
67
@@ -793,10 +602,11 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer,
793
602
offsets [0 ] += sizes [0 ];
794
603
}
795
604
opal_convertor_set_position (& recv_convertors [0 ], & offsets [0 ]);
605
+
796
606
offsets [0 ] += sizes [lr ] - remainings [lr ];
797
607
offsets [1 ] = offsets [0 ] + segsize ;
798
- if (offsets [1 ] > sizes [lr ]) {
799
- offsets [1 ] = sizes [lr ];
608
+ if (offsets [1 ] > sizes [lr ] + ( lr ? sizes [ 0 ]: 0 ) ) {
609
+ offsets [1 ] = sizes [lr ] + ( lr ? sizes [ 0 ]: 0 ) ;
800
610
}
801
611
opal_convertor_set_position (& recv_convertors [1 ], & offsets [1 ]);
802
612
if (offsets [1 ] == offsets [0 ]) {
0 commit comments