Skip to content

Commit 0e1abb6

Browse files
authored
Merge pull request #12823 from bosilca/topic/fix_ob1_segmentation
Topic/fix ob1 segmentation with UCT BTL
2 parents b7a56e9 + 58400ad commit 0e1abb6

File tree

5 files changed

+26
-29
lines changed

5 files changed

+26
-29
lines changed

ompi/mca/pml/ob1/pml_ob1_isend.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count,
143143
}
144144

145145
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
146-
return rc;
146+
return rc;
147147
}
148148

149149
return (int) size;

opal/datatype/opal_datatype_internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ struct opal_datatype_t;
539539
# define OPAL_DATATYPE_SAFEGUARD_POINTER(ACTPTR, LENGTH, INITPTR, PDATA, COUNT) \
540540
{ \
541541
unsigned char *__lower_bound = (INITPTR), *__upper_bound; \
542-
assert(((LENGTH) != 0) && ((COUNT) != 0)); \
542+
assert( (COUNT) != 0 ); \
543543
__lower_bound += (PDATA)->true_lb; \
544544
__upper_bound = (INITPTR) + (PDATA)->true_ub + \
545545
((PDATA)->ub - (PDATA)->lb) * ((COUNT) -1); \

opal/datatype/opal_datatype_position.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ static inline void position_single_block(opal_convertor_t *CONVERTOR, unsigned c
6666
}
6767

6868
/**
69-
* Advance the convertors' position according. Update the pointer and the remaining space
70-
* accordingly.
69+
* Advance the convertors' position according to account for *COUNT elements. Update
70+
* the pointer and the remaining space accordingly.
7171
*/
7272
static inline void position_predefined_data(opal_convertor_t *CONVERTOR, dt_elem_desc_t *ELEM,
7373
size_t *COUNT, unsigned char **POINTER, size_t *SPACE)
@@ -82,7 +82,8 @@ static inline void position_predefined_data(opal_convertor_t *CONVERTOR, dt_elem
8282

8383
if (cando_count > *(COUNT)) {
8484
cando_count = *(COUNT);
85-
}
85+
} else if( 0 == cando_count )
86+
return;
8687

8788
if (1 == _elem->blocklen) {
8889
DO_DEBUG(opal_output(0,

opal/mca/btl/sm/btl_sm_send.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,4 @@ int mca_btl_sm_send(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpo
7373
}
7474

7575
return OPAL_SUCCESS;
76-
77-
#if 0
78-
if (((frag->hdr->flags & MCA_BTL_SM_FLAG_SINGLE_COPY) ||
79-
!(frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) &&
80-
frag->base.des_cbfunc) {
81-
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
82-
83-
return OPAL_SUCCESS;
84-
}
85-
86-
/* data is gone (from the pml's perspective). frag callback/release will
87-
happen later */
88-
return 1;
89-
#endif
9076
}

opal/mca/btl/uct/btl_uct_am.c

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ mca_btl_base_descriptor_t *mca_btl_uct_alloc(mca_btl_base_module_t *btl,
5151
}
5252

5353
static inline void _mca_btl_uct_send_pack(void *data, void *header, size_t header_size,
54-
opal_convertor_t *convertor, size_t payload_size)
54+
opal_convertor_t *convertor, size_t* payload_size)
5555
{
5656
uint32_t iov_count = 1;
5757
struct iovec iov;
@@ -64,11 +64,9 @@ static inline void _mca_btl_uct_send_pack(void *data, void *header, size_t heade
6464

6565
/* pack the data into the supplied buffer */
6666
iov.iov_base = (IOVBASE_TYPE *) ((intptr_t) data + header_size);
67-
iov.iov_len = length = payload_size;
67+
iov.iov_len = *payload_size;
6868

69-
(void) opal_convertor_pack(convertor, &iov, &iov_count, &length);
70-
71-
assert(length == payload_size);
69+
(void) opal_convertor_pack(convertor, &iov, &iov_count, payload_size);
7270
}
7371

7472
struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src(mca_btl_base_module_t *btl,
@@ -92,7 +90,10 @@ struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src(mca_btl_base_module_t
9290
}
9391

9492
_mca_btl_uct_send_pack((void *) ((intptr_t) frag->uct_iov.buffer + reserve), NULL, 0,
95-
convertor, *size);
93+
convertor, size);
94+
/* update the length of the fragment according to the convertor packed data */
95+
frag->segments[0].seg_len = reserve + *size;
96+
frag->uct_iov.length = frag->segments[0].seg_len;
9697
} else {
9798
opal_convertor_get_current_pointer(convertor, &data_ptr);
9899
assert(NULL != data_ptr);
@@ -286,7 +287,7 @@ static size_t mca_btl_uct_sendi_pack(void *data, void *arg)
286287

287288
am_header->value = args->am_header;
288289
_mca_btl_uct_send_pack((void *) ((intptr_t) data + 8), args->header, args->header_size,
289-
args->convertor, args->payload_size);
290+
args->convertor, &args->payload_size);
290291
return args->header_size + args->payload_size + 8;
291292
}
292293

@@ -329,9 +330,18 @@ int mca_btl_uct_sendi(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpo
329330
} else if (msg_size < (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context->context_id)
330331
.cap.am.max_short) {
331332
int8_t *data = alloca(total_size);
332-
_mca_btl_uct_send_pack(data, header, header_size, convertor, payload_size);
333-
ucs_status = uct_ep_am_short(ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data,
334-
total_size);
333+
size_t packed_payload_size = payload_size;
334+
_mca_btl_uct_send_pack(data, header, header_size, convertor, &packed_payload_size);
335+
if (packed_payload_size != payload_size) {
336+
/* This should never happen as the packed data should go in a single pack. But
337+
in case it does, fallback onto a descriptor allocation and let the caller
338+
send the data.
339+
*/
340+
ucs_status = UCS_ERR_NO_RESOURCE;
341+
} else {
342+
ucs_status = uct_ep_am_short(ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data,
343+
total_size);
344+
}
335345
} else {
336346
ssize_t size;
337347

0 commit comments

Comments
 (0)