Skip to content

Commit 4ed5c62

Browse files
committed
checkpoint
1 parent 6c026a5 commit 4ed5c62

File tree

9 files changed

+134
-248
lines changed

9 files changed

+134
-248
lines changed

ompi/mca/coll/base/coll_base_bcast.c

Lines changed: 19 additions & 234 deletions
Original file line numberDiff line numberDiff line change
@@ -35,206 +35,15 @@
3535

3636
int
3737
ompi_coll_base_bcast_intra_generic( void* buffer,
38-
int original_count,
39-
struct ompi_datatype_t* datatype,
40-
int root,
41-
struct ompi_communicator_t* comm,
42-
mca_coll_base_module_t *module,
43-
uint32_t count_by_segment,
44-
ompi_coll_tree_t* tree )
38+
int count,
39+
struct ompi_datatype_t* datatype,
40+
int root,
41+
struct ompi_communicator_t* comm,
42+
mca_coll_base_module_t *module,
43+
size_t segment_size,
44+
ompi_coll_tree_t* tree )
4545
{
46-
int err = 0, line, i, rank, segindex, req_index;
47-
int num_segments; /* Number of segments */
48-
int sendcount; /* number of elements sent in this segment */
49-
size_t realsegsize, type_size;
50-
char *tmpbuf;
51-
ptrdiff_t extent, lb;
52-
ompi_request_t *recv_reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
53-
ompi_request_t **send_reqs = NULL;
54-
55-
#if OPAL_ENABLE_DEBUG
56-
int size;
57-
size = ompi_comm_size(comm);
58-
assert( size > 1 );
59-
#endif
60-
rank = ompi_comm_rank(comm);
61-
62-
ompi_datatype_get_extent (datatype, &lb, &extent);
63-
ompi_datatype_type_size( datatype, &type_size );
64-
num_segments = (original_count + count_by_segment - 1) / count_by_segment;
65-
realsegsize = (ptrdiff_t)count_by_segment * extent;
66-
67-
/* Set the buffer pointers */
68-
tmpbuf = (char *) buffer;
69-
70-
if( tree->tree_nextsize != 0 ) {
71-
send_reqs = coll_base_comm_get_reqs(module->base_data, tree->tree_nextsize);
72-
if( NULL == send_reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto error_hndl; }
73-
}
74-
75-
/* Root code */
76-
if( rank == root ) {
77-
/*
78-
For each segment:
79-
- send segment to all children.
80-
The last segment may have less elements than other segments.
81-
*/
82-
sendcount = count_by_segment;
83-
for( segindex = 0; segindex < num_segments; segindex++ ) {
84-
if( segindex == (num_segments - 1) ) {
85-
sendcount = original_count - segindex * count_by_segment;
86-
}
87-
for( i = 0; i < tree->tree_nextsize; i++ ) {
88-
err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype,
89-
tree->tree_next[i],
90-
MCA_COLL_BASE_TAG_BCAST,
91-
MCA_PML_BASE_SEND_STANDARD, comm,
92-
&send_reqs[i]));
93-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
94-
}
95-
96-
/* complete the sends before starting the next sends */
97-
err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
98-
MPI_STATUSES_IGNORE );
99-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
100-
101-
/* update tmp buffer */
102-
tmpbuf += realsegsize;
103-
104-
}
105-
}
106-
107-
/* Intermediate nodes code */
108-
else if( tree->tree_nextsize > 0 ) {
109-
/*
110-
Create the pipeline.
111-
1) Post the first receive
112-
2) For segments 1 .. num_segments
113-
- post new receive
114-
- wait on the previous receive to complete
115-
- send this data to children
116-
3) Wait on the last segment
117-
4) Compute number of elements in last segment.
118-
5) Send the last segment to children
119-
*/
120-
req_index = 0;
121-
err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype,
122-
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
123-
comm, &recv_reqs[req_index]));
124-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
125-
126-
for( segindex = 1; segindex < num_segments; segindex++ ) {
127-
128-
req_index = req_index ^ 0x1;
129-
130-
/* post new irecv */
131-
err = MCA_PML_CALL(irecv( tmpbuf + realsegsize, count_by_segment,
132-
datatype, tree->tree_prev,
133-
MCA_COLL_BASE_TAG_BCAST,
134-
comm, &recv_reqs[req_index]));
135-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
136-
137-
/* wait for and forward the previous segment to children */
138-
err = ompi_request_wait( &recv_reqs[req_index ^ 0x1],
139-
MPI_STATUS_IGNORE );
140-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
141-
142-
for( i = 0; i < tree->tree_nextsize; i++ ) {
143-
err = MCA_PML_CALL(isend(tmpbuf, count_by_segment, datatype,
144-
tree->tree_next[i],
145-
MCA_COLL_BASE_TAG_BCAST,
146-
MCA_PML_BASE_SEND_STANDARD, comm,
147-
&send_reqs[i]));
148-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
149-
}
150-
151-
/* complete the sends before starting the next iteration */
152-
err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
153-
MPI_STATUSES_IGNORE );
154-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
155-
156-
/* Update the receive buffer */
157-
tmpbuf += realsegsize;
158-
159-
}
160-
161-
/* Process the last segment */
162-
err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUS_IGNORE );
163-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
164-
sendcount = original_count - (ptrdiff_t)(num_segments - 1) * count_by_segment;
165-
for( i = 0; i < tree->tree_nextsize; i++ ) {
166-
err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype,
167-
tree->tree_next[i],
168-
MCA_COLL_BASE_TAG_BCAST,
169-
MCA_PML_BASE_SEND_STANDARD, comm,
170-
&send_reqs[i]));
171-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
172-
}
173-
174-
err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
175-
MPI_STATUSES_IGNORE );
176-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
177-
}
178-
179-
/* Leaf nodes */
180-
else {
181-
/*
182-
Receive all segments from parent in a loop:
183-
1) post irecv for the first segment
184-
2) for segments 1 .. num_segments
185-
- post irecv for the next segment
186-
- wait on the previous segment to arrive
187-
3) wait for the last segment
188-
*/
189-
req_index = 0;
190-
err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype,
191-
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
192-
comm, &recv_reqs[req_index]));
193-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
194-
195-
for( segindex = 1; segindex < num_segments; segindex++ ) {
196-
req_index = req_index ^ 0x1;
197-
tmpbuf += realsegsize;
198-
/* post receive for the next segment */
199-
err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype,
200-
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
201-
comm, &recv_reqs[req_index]));
202-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
203-
/* wait on the previous segment */
204-
err = ompi_request_wait( &recv_reqs[req_index ^ 0x1],
205-
MPI_STATUS_IGNORE );
206-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
207-
}
208-
209-
err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUS_IGNORE );
210-
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
211-
}
212-
213-
return (MPI_SUCCESS);
214-
215-
error_hndl:
216-
OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
217-
__FILE__, line, err, rank) );
218-
(void)line; // silence compiler warnings
219-
ompi_coll_base_free_reqs( recv_reqs, 2);
220-
if( NULL != send_reqs ) {
221-
ompi_coll_base_free_reqs(send_reqs, tree->tree_nextsize);
222-
}
223-
224-
return err;
225-
}
226-
227-
int
228-
ompi_coll_base_bcast_intra_generic2( void* buffer,
229-
int count,
230-
struct ompi_datatype_t* datatype,
231-
int root,
232-
struct ompi_communicator_t* comm,
233-
mca_coll_base_module_t *module,
234-
size_t segment_size,
235-
ompi_coll_tree_t* tree )
236-
{
237-
int err = 0, line, i = 0, rank, req_index;
46+
int err = 0, line, i = 0, rank;
23847
opal_convertor_t send_convertors[2], recv_convertors[2];
23948
size_t offset = 0;
24049
size_t next_offset;
@@ -501,23 +310,15 @@ ompi_coll_base_bcast_intra_bintree ( void* buffer,
501310
mca_coll_base_module_t *module,
502311
uint32_t segsize )
503312
{
504-
int segcount = count;
505-
size_t typelng;
506313
mca_coll_base_comm_t *data = module->base_data;
507314

508315
COLL_BASE_UPDATE_BINTREE( comm, module, root );
509316

510-
/**
511-
* Determine number of elements sent per operation.
512-
*/
513-
ompi_datatype_type_size( datatype, &typelng );
514-
COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
515-
516-
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binary rank %d ss %5d typelng %lu segcount %d",
517-
ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));
317+
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binary rank %d ss %5d",
318+
ompi_comm_rank(comm), segsize));
518319

519320
return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
520-
segcount, data->cached_bintree );
321+
segsize, data->cached_bintree );
521322
}
522323

523324
int
@@ -536,8 +337,8 @@ ompi_coll_base_bcast_intra_pipeline( void* buffer,
536337
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_pipeline rank %d ss %5d",
537338
ompi_comm_rank(comm), segsize));
538339

539-
return ompi_coll_base_bcast_intra_generic2( buffer, count, datatype, root, comm, module,
540-
segsize, data->cached_pipeline );
340+
return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
341+
segsize, data->cached_pipeline );
541342
}
542343

543344
int
@@ -549,23 +350,15 @@ ompi_coll_base_bcast_intra_chain( void* buffer,
549350
mca_coll_base_module_t *module,
550351
uint32_t segsize, int32_t chains )
551352
{
552-
int segcount = count;
553-
size_t typelng;
554353
mca_coll_base_comm_t *data = module->base_data;
555354

556355
COLL_BASE_UPDATE_CHAIN( comm, module, root, chains );
557356

558-
/**
559-
* Determine number of elements sent per operation.
560-
*/
561-
ompi_datatype_type_size( datatype, &typelng );
562-
COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
563-
564-
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_chain rank %d fo %d ss %5d typelng %lu segcount %d",
565-
ompi_comm_rank(comm), chains, segsize, (unsigned long)typelng, segcount));
357+
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_chain rank %d fo %d ss %5d",
358+
ompi_comm_rank(comm), chains, segsize));
566359

567360
return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
568-
segcount, data->cached_chain );
361+
segsize, data->cached_chain );
569362
}
570363

571364
int
@@ -577,23 +370,15 @@ ompi_coll_base_bcast_intra_binomial( void* buffer,
577370
mca_coll_base_module_t *module,
578371
uint32_t segsize )
579372
{
580-
int segcount = count;
581-
size_t typelng;
582373
mca_coll_base_comm_t *data = module->base_data;
583374

584375
COLL_BASE_UPDATE_BMTREE( comm, module, root );
585376

586-
/**
587-
* Determine number of elements sent per operation.
588-
*/
589-
ompi_datatype_type_size( datatype, &typelng );
590-
COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
591-
592-
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d",
593-
ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));
377+
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binomial rank %d ss %5d",
378+
ompi_comm_rank(comm), segsize));
594379

595380
return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
596-
segcount, data->cached_bmtree );
381+
segsize, data->cached_bmtree );
597382
}
598383

599384
int

ompi/mca/coll/base/coll_base_functions.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,7 @@ int ompi_coll_base_barrier_intra_tree(BARRIER_ARGS);
130130
int ompi_coll_base_barrier_intra_basic_linear(BARRIER_ARGS);
131131

132132
/* Bcast */
133-
int ompi_coll_base_bcast_intra_generic(BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree);
134-
int ompi_coll_base_bcast_intra_generic2(BCAST_ARGS, size_t segment_size, ompi_coll_tree_t* tree);
133+
int ompi_coll_base_bcast_intra_generic(BCAST_ARGS, size_t segment_size, ompi_coll_tree_t* tree);
135134
int ompi_coll_base_bcast_intra_basic_linear(BCAST_ARGS);
136135
int ompi_coll_base_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains);
137136
int ompi_coll_base_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);

ompi/mca/pml/ob1/pml_ob1_recvreq.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
694694
#endif /* OPAL_CUDA_GDR_SUPPORT */
695695

696696

697-
offset = 0;
697+
offset = recvreq->req_recv.req_base.req_offset;
698698

699699
OPAL_THREAD_LOCK(&recvreq->lock);
700700
opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor, &offset);
@@ -731,14 +731,12 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
731731
memcpy (frag->remote_handle, hdr + 1, btl->btl_registration_handle_size);
732732

733733
/* update the read location */
734-
frag->remote_address = hdr->hdr_src_ptr + offset;
734+
frag->remote_address = hdr->hdr_src_ptr + offset - recvreq->req_recv.req_base.req_offset;
735735

736736
/* updating the write location */
737737
OPAL_THREAD_LOCK(&recvreq->lock);
738-
offset += recvreq->req_recv.req_base.req_offset;
739738
opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor, &offset);
740739
opal_convertor_get_current_pointer (&recvreq->req_recv.req_base.req_convertor, &frag->local_address);
741-
offset -= recvreq->req_recv.req_base.req_offset;
742740
OPAL_THREAD_UNLOCK(&recvreq->lock);
743741

744742
frag->rdma_bml = rdma_bml;
@@ -846,7 +844,8 @@ void mca_pml_ob1_recv_request_progress_match( mca_pml_ob1_recv_request_t* recvre
846844
mca_btl_base_segment_t* segments,
847845
size_t num_segments )
848846
{
849-
size_t bytes_received, data_offset = 0;
847+
// size_t bytes_received, data_offset = 0;
848+
size_t bytes_received, data_offset = recvreq->req_recv.req_base.req_offset;
850849
size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_OB1_RECV_REQUEST_UNPACK */
851850
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
852851

ompi/mca/pml/ob1/pml_ob1_sendreq.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,10 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type)
159159

160160
#define MCA_PML_OB1_SEND_REQUEST_RESET(sendreq) \
161161
if ((sendreq)->req_send.req_bytes_packed > 0) { \
162-
size_t _position = 0; \
162+
size_t _position = sendreq->req_send.req_base.req_offset; \
163163
opal_convertor_set_position(&(sendreq)->req_send.req_base.req_convertor, \
164164
&_position); \
165-
assert( 0 == _position ); \
165+
assert( sendreq->req_send.req_base.req_offset == _position ); \
166166
}
167167

168168
static inline void mca_pml_ob1_free_rdma_resources (mca_pml_ob1_send_request_t* sendreq)

ompi/mpiext/split/c/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ libmpiext_split_c_la_SOURCES += \
4848
$(ompi_HEADERS) \
4949
mpiext_isplit_recv.c \
5050
mpiext_isplit_send.c \
51+
mpiext_split_recv.c \
5152
mpiext_split_send.c
5253
endif
5354

ompi/mpiext/split/c/mpiext_isplit_recv.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,14 @@ int OMPI_Isplit_recv(void *buf, int count, MPI_Datatype type, int source,
8888
}
8989
size = size / 2;
9090
offset = 0;
91+
#if 0
9192
opal_convertor_set_position(&convertor, &offset);
9293
OPAL_CR_ENTER_LIBRARY();
9394
rc = MCA_PML_CALL(icrecv(&convertor, &size, source, tag, comm, request));
9495
if (OMPI_SUCCESS != rc) {
9596
OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME);
9697
}
98+
#endif
9799
offset += size;
98100
opal_convertor_set_position(&convertor, &offset);
99101
rc = MCA_PML_CALL(icrecv(&convertor, &size, source, tag, comm, request+1));

0 commit comments

Comments
 (0)