35
35
36
36
int
37
37
ompi_coll_base_bcast_intra_generic ( void * buffer ,
38
- int original_count ,
39
- struct ompi_datatype_t * datatype ,
40
- int root ,
41
- struct ompi_communicator_t * comm ,
42
- mca_coll_base_module_t * module ,
43
- uint32_t count_by_segment ,
44
- ompi_coll_tree_t * tree )
38
+ int count ,
39
+ struct ompi_datatype_t * datatype ,
40
+ int root ,
41
+ struct ompi_communicator_t * comm ,
42
+ mca_coll_base_module_t * module ,
43
+ size_t segment_size ,
44
+ ompi_coll_tree_t * tree )
45
45
{
46
- int err = 0 , line , i , rank , segindex , req_index ;
47
- int num_segments ; /* Number of segments */
48
- int sendcount ; /* number of elements sent in this segment */
49
- size_t realsegsize , type_size ;
50
- char * tmpbuf ;
51
- ptrdiff_t extent , lb ;
52
- ompi_request_t * recv_reqs [2 ] = {MPI_REQUEST_NULL , MPI_REQUEST_NULL };
53
- ompi_request_t * * send_reqs = NULL ;
54
-
55
- #if OPAL_ENABLE_DEBUG
56
- int size ;
57
- size = ompi_comm_size (comm );
58
- assert ( size > 1 );
59
- #endif
60
- rank = ompi_comm_rank (comm );
61
-
62
- ompi_datatype_get_extent (datatype , & lb , & extent );
63
- ompi_datatype_type_size ( datatype , & type_size );
64
- num_segments = (original_count + count_by_segment - 1 ) / count_by_segment ;
65
- realsegsize = (ptrdiff_t )count_by_segment * extent ;
66
-
67
- /* Set the buffer pointers */
68
- tmpbuf = (char * ) buffer ;
69
-
70
- if ( tree -> tree_nextsize != 0 ) {
71
- send_reqs = coll_base_comm_get_reqs (module -> base_data , tree -> tree_nextsize );
72
- if ( NULL == send_reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto error_hndl ; }
73
- }
74
-
75
- /* Root code */
76
- if ( rank == root ) {
77
- /*
78
- For each segment:
79
- - send segment to all children.
80
- The last segment may have less elements than other segments.
81
- */
82
- sendcount = count_by_segment ;
83
- for ( segindex = 0 ; segindex < num_segments ; segindex ++ ) {
84
- if ( segindex == (num_segments - 1 ) ) {
85
- sendcount = original_count - segindex * count_by_segment ;
86
- }
87
- for ( i = 0 ; i < tree -> tree_nextsize ; i ++ ) {
88
- err = MCA_PML_CALL (isend (tmpbuf , sendcount , datatype ,
89
- tree -> tree_next [i ],
90
- MCA_COLL_BASE_TAG_BCAST ,
91
- MCA_PML_BASE_SEND_STANDARD , comm ,
92
- & send_reqs [i ]));
93
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
94
- }
95
-
96
- /* complete the sends before starting the next sends */
97
- err = ompi_request_wait_all ( tree -> tree_nextsize , send_reqs ,
98
- MPI_STATUSES_IGNORE );
99
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
100
-
101
- /* update tmp buffer */
102
- tmpbuf += realsegsize ;
103
-
104
- }
105
- }
106
-
107
- /* Intermediate nodes code */
108
- else if ( tree -> tree_nextsize > 0 ) {
109
- /*
110
- Create the pipeline.
111
- 1) Post the first receive
112
- 2) For segments 1 .. num_segments
113
- - post new receive
114
- - wait on the previous receive to complete
115
- - send this data to children
116
- 3) Wait on the last segment
117
- 4) Compute number of elements in last segment.
118
- 5) Send the last segment to children
119
- */
120
- req_index = 0 ;
121
- err = MCA_PML_CALL (irecv (tmpbuf , count_by_segment , datatype ,
122
- tree -> tree_prev , MCA_COLL_BASE_TAG_BCAST ,
123
- comm , & recv_reqs [req_index ]));
124
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
125
-
126
- for ( segindex = 1 ; segindex < num_segments ; segindex ++ ) {
127
-
128
- req_index = req_index ^ 0x1 ;
129
-
130
- /* post new irecv */
131
- err = MCA_PML_CALL (irecv ( tmpbuf + realsegsize , count_by_segment ,
132
- datatype , tree -> tree_prev ,
133
- MCA_COLL_BASE_TAG_BCAST ,
134
- comm , & recv_reqs [req_index ]));
135
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
136
-
137
- /* wait for and forward the previous segment to children */
138
- err = ompi_request_wait ( & recv_reqs [req_index ^ 0x1 ],
139
- MPI_STATUS_IGNORE );
140
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
141
-
142
- for ( i = 0 ; i < tree -> tree_nextsize ; i ++ ) {
143
- err = MCA_PML_CALL (isend (tmpbuf , count_by_segment , datatype ,
144
- tree -> tree_next [i ],
145
- MCA_COLL_BASE_TAG_BCAST ,
146
- MCA_PML_BASE_SEND_STANDARD , comm ,
147
- & send_reqs [i ]));
148
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
149
- }
150
-
151
- /* complete the sends before starting the next iteration */
152
- err = ompi_request_wait_all ( tree -> tree_nextsize , send_reqs ,
153
- MPI_STATUSES_IGNORE );
154
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
155
-
156
- /* Update the receive buffer */
157
- tmpbuf += realsegsize ;
158
-
159
- }
160
-
161
- /* Process the last segment */
162
- err = ompi_request_wait ( & recv_reqs [req_index ], MPI_STATUS_IGNORE );
163
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
164
- sendcount = original_count - (ptrdiff_t )(num_segments - 1 ) * count_by_segment ;
165
- for ( i = 0 ; i < tree -> tree_nextsize ; i ++ ) {
166
- err = MCA_PML_CALL (isend (tmpbuf , sendcount , datatype ,
167
- tree -> tree_next [i ],
168
- MCA_COLL_BASE_TAG_BCAST ,
169
- MCA_PML_BASE_SEND_STANDARD , comm ,
170
- & send_reqs [i ]));
171
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
172
- }
173
-
174
- err = ompi_request_wait_all ( tree -> tree_nextsize , send_reqs ,
175
- MPI_STATUSES_IGNORE );
176
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
177
- }
178
-
179
- /* Leaf nodes */
180
- else {
181
- /*
182
- Receive all segments from parent in a loop:
183
- 1) post irecv for the first segment
184
- 2) for segments 1 .. num_segments
185
- - post irecv for the next segment
186
- - wait on the previous segment to arrive
187
- 3) wait for the last segment
188
- */
189
- req_index = 0 ;
190
- err = MCA_PML_CALL (irecv (tmpbuf , count_by_segment , datatype ,
191
- tree -> tree_prev , MCA_COLL_BASE_TAG_BCAST ,
192
- comm , & recv_reqs [req_index ]));
193
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
194
-
195
- for ( segindex = 1 ; segindex < num_segments ; segindex ++ ) {
196
- req_index = req_index ^ 0x1 ;
197
- tmpbuf += realsegsize ;
198
- /* post receive for the next segment */
199
- err = MCA_PML_CALL (irecv (tmpbuf , count_by_segment , datatype ,
200
- tree -> tree_prev , MCA_COLL_BASE_TAG_BCAST ,
201
- comm , & recv_reqs [req_index ]));
202
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
203
- /* wait on the previous segment */
204
- err = ompi_request_wait ( & recv_reqs [req_index ^ 0x1 ],
205
- MPI_STATUS_IGNORE );
206
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
207
- }
208
-
209
- err = ompi_request_wait ( & recv_reqs [req_index ], MPI_STATUS_IGNORE );
210
- if (err != MPI_SUCCESS ) { line = __LINE__ ; goto error_hndl ; }
211
- }
212
-
213
- return (MPI_SUCCESS );
214
-
215
- error_hndl :
216
- OPAL_OUTPUT ( (ompi_coll_base_framework .framework_output ,"%s:%4d\tError occurred %d, rank %2d" ,
217
- __FILE__ , line , err , rank ) );
218
- (void )line ; // silence compiler warnings
219
- ompi_coll_base_free_reqs ( recv_reqs , 2 );
220
- if ( NULL != send_reqs ) {
221
- ompi_coll_base_free_reqs (send_reqs , tree -> tree_nextsize );
222
- }
223
-
224
- return err ;
225
- }
226
-
227
- int
228
- ompi_coll_base_bcast_intra_generic2 ( void * buffer ,
229
- int count ,
230
- struct ompi_datatype_t * datatype ,
231
- int root ,
232
- struct ompi_communicator_t * comm ,
233
- mca_coll_base_module_t * module ,
234
- size_t segment_size ,
235
- ompi_coll_tree_t * tree )
236
- {
237
- int err = 0 , line , i = 0 , rank , req_index ;
46
+ int err = 0 , line , i = 0 , rank ;
238
47
opal_convertor_t send_convertors [2 ], recv_convertors [2 ];
239
48
size_t offset = 0 ;
240
49
size_t next_offset ;
@@ -501,23 +310,15 @@ ompi_coll_base_bcast_intra_bintree ( void* buffer,
501
310
mca_coll_base_module_t * module ,
502
311
uint32_t segsize )
503
312
{
504
- int segcount = count ;
505
- size_t typelng ;
506
313
mca_coll_base_comm_t * data = module -> base_data ;
507
314
508
315
COLL_BASE_UPDATE_BINTREE ( comm , module , root );
509
316
510
- /**
511
- * Determine number of elements sent per operation.
512
- */
513
- ompi_datatype_type_size ( datatype , & typelng );
514
- COLL_BASE_COMPUTED_SEGCOUNT ( segsize , typelng , segcount );
515
-
516
- OPAL_OUTPUT ((ompi_coll_base_framework .framework_output ,"coll:base:bcast_intra_binary rank %d ss %5d typelng %lu segcount %d" ,
517
- ompi_comm_rank (comm ), segsize , (unsigned long )typelng , segcount ));
317
+ OPAL_OUTPUT ((ompi_coll_base_framework .framework_output ,"coll:base:bcast_intra_binary rank %d ss %5d" ,
318
+ ompi_comm_rank (comm ), segsize ));
518
319
519
320
return ompi_coll_base_bcast_intra_generic ( buffer , count , datatype , root , comm , module ,
520
- segcount , data -> cached_bintree );
321
+ segsize , data -> cached_bintree );
521
322
}
522
323
523
324
int
@@ -536,8 +337,8 @@ ompi_coll_base_bcast_intra_pipeline( void* buffer,
536
337
OPAL_OUTPUT ((ompi_coll_base_framework .framework_output ,"coll:base:bcast_intra_pipeline rank %d ss %5d" ,
537
338
ompi_comm_rank (comm ), segsize ));
538
339
539
- return ompi_coll_base_bcast_intra_generic2 ( buffer , count , datatype , root , comm , module ,
540
- segsize , data -> cached_pipeline );
340
+ return ompi_coll_base_bcast_intra_generic ( buffer , count , datatype , root , comm , module ,
341
+ segsize , data -> cached_pipeline );
541
342
}
542
343
543
344
int
@@ -549,23 +350,15 @@ ompi_coll_base_bcast_intra_chain( void* buffer,
549
350
mca_coll_base_module_t * module ,
550
351
uint32_t segsize , int32_t chains )
551
352
{
552
- int segcount = count ;
553
- size_t typelng ;
554
353
mca_coll_base_comm_t * data = module -> base_data ;
555
354
556
355
COLL_BASE_UPDATE_CHAIN ( comm , module , root , chains );
557
356
558
- /**
559
- * Determine number of elements sent per operation.
560
- */
561
- ompi_datatype_type_size ( datatype , & typelng );
562
- COLL_BASE_COMPUTED_SEGCOUNT ( segsize , typelng , segcount );
563
-
564
- OPAL_OUTPUT ((ompi_coll_base_framework .framework_output ,"coll:base:bcast_intra_chain rank %d fo %d ss %5d typelng %lu segcount %d" ,
565
- ompi_comm_rank (comm ), chains , segsize , (unsigned long )typelng , segcount ));
357
+ OPAL_OUTPUT ((ompi_coll_base_framework .framework_output ,"coll:base:bcast_intra_chain rank %d fo %d ss %5d" ,
358
+ ompi_comm_rank (comm ), chains , segsize ));
566
359
567
360
return ompi_coll_base_bcast_intra_generic ( buffer , count , datatype , root , comm , module ,
568
- segcount , data -> cached_chain );
361
+ segsize , data -> cached_chain );
569
362
}
570
363
571
364
int
@@ -577,23 +370,15 @@ ompi_coll_base_bcast_intra_binomial( void* buffer,
577
370
mca_coll_base_module_t * module ,
578
371
uint32_t segsize )
579
372
{
580
- int segcount = count ;
581
- size_t typelng ;
582
373
mca_coll_base_comm_t * data = module -> base_data ;
583
374
584
375
COLL_BASE_UPDATE_BMTREE ( comm , module , root );
585
376
586
- /**
587
- * Determine number of elements sent per operation.
588
- */
589
- ompi_datatype_type_size ( datatype , & typelng );
590
- COLL_BASE_COMPUTED_SEGCOUNT ( segsize , typelng , segcount );
591
-
592
- OPAL_OUTPUT ((ompi_coll_base_framework .framework_output ,"coll:base:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d" ,
593
- ompi_comm_rank (comm ), segsize , (unsigned long )typelng , segcount ));
377
+ OPAL_OUTPUT ((ompi_coll_base_framework .framework_output ,"coll:base:bcast_intra_binomial rank %d ss %5d" ,
378
+ ompi_comm_rank (comm ), segsize ));
594
379
595
380
return ompi_coll_base_bcast_intra_generic ( buffer , count , datatype , root , comm , module ,
596
- segcount , data -> cached_bmtree );
381
+ segsize , data -> cached_bmtree );
597
382
}
598
383
599
384
int
0 commit comments