Skip to content

Backport/3.1.x/6952 #6978

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ompi/datatype/ompi_datatype.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2009-2013 The University of Tennessee and The University
* Copyright (c) 2009-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
Expand Down
6 changes: 3 additions & 3 deletions ompi/datatype/ompi_datatype_create_indexed.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,17 +167,17 @@ int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdi
pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) );
disp = pDisp[0];
dLength = bLength;
endat = disp + dLength;
endat = disp + dLength * extent;
for( i = 1; i < count; i++ ) {
if( endat == pDisp[i] ) {
/* contiguous with the previsious */
dLength += bLength;
endat += bLength;
endat += bLength * extent;
} else {
ompi_datatype_add( pdt, oldType, dLength, disp, extent );
disp = pDisp[i];
dLength = bLength;
endat = disp + bLength;
endat = disp + bLength * extent;
}
}
ompi_datatype_add( pdt, oldType, dLength, disp, extent );
Expand Down
3 changes: 1 addition & 2 deletions ompi/datatype/ompi_datatype_external.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
Expand All @@ -26,7 +26,6 @@
#include <stdio.h>

#include "ompi/runtime/params.h"
#include "ompi/communicator/communicator.h"
#include "ompi/datatype/ompi_datatype.h"
#include "opal/datatype/opal_convertor.h"

Expand Down
18 changes: 9 additions & 9 deletions ompi/datatype/ompi_datatype_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2017 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -730,14 +730,14 @@ void ompi_datatype_dump( const ompi_datatype_t* pData )
length = length * 100 + 500;
buffer = (char*)malloc( length );
index += snprintf( buffer, length - index,
"Datatype %p[%s] id %d size %ld align %d opal_id %d length %d used %d\n"
"true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n"
"nbElems %d loops %d flags %X (",
(void*)pData, pData->name, pData->id,
(long)pData->super.size, (int)pData->super.align, pData->super.id, (int)pData->super.desc.length, (int)pData->super.desc.used,
(long)pData->super.true_lb, (long)pData->super.true_ub, (long)(pData->super.true_ub - pData->super.true_lb),
(long)pData->super.lb, (long)pData->super.ub, (long)(pData->super.ub - pData->super.lb),
(int)pData->super.nbElems, (int)pData->super.loops, (int)pData->super.flags );
"Datatype %p[%s] id %d size %" PRIsize_t " align %u opal_id %u length %" PRIsize_t " used %" PRIsize_t "\n"
"true_lb %td true_ub %td (true_extent %td) lb %td ub %td (extent %td)\n"
"nbElems %" PRIsize_t " loops %u flags %X (",
(void*)pData, pData->name, pData->id,
pData->super.size, pData->super.align, (uint32_t)pData->super.id, pData->super.desc.length, pData->super.desc.used,
pData->super.true_lb, pData->super.true_ub, pData->super.true_ub - pData->super.true_lb,
pData->super.lb, pData->super.ub, pData->super.ub - pData->super.lb,
pData->super.nbElems, pData->super.loops, (int)pData->super.flags );
/* dump the flags */
if( ompi_datatype_is_predefined(pData) ) {
index += snprintf( buffer + index, length - index, "predefined " );
Expand Down
59 changes: 31 additions & 28 deletions opal/datatype/opal_convertor.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2017 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
Expand All @@ -12,8 +12,8 @@
* All rights reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2013-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
Expand Down Expand Up @@ -324,13 +324,14 @@ int32_t opal_convertor_unpack( opal_convertor_t* pConv,
return pConv->fAdvance( pConv, iov, out_size, max_data );
}

static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor,
size_t starting_point, const size_t* sizes )
static inline int
opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor,
size_t starting_point, const size_t* sizes )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
const opal_datatype_t* pData = pConvertor->pDesc;
dt_elem_desc_t* pElems;
uint32_t count;
size_t count;
ptrdiff_t extent;

pStack = pConvertor->pStack;
Expand All @@ -340,7 +341,7 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
*/
pElems = pConvertor->use_desc->desc;

count = (uint32_t)(starting_point / pData->size);
count = starting_point / pData->size;
extent = pData->ub - pData->lb;

pStack[0].type = OPAL_DATATYPE_LOOP; /* the first one is always the loop */
Expand All @@ -349,14 +350,14 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
pStack[0].disp = count * extent;

/* now compute the number of pending bytes */
count = (uint32_t)(starting_point - count * pData->size);
count = starting_point % pData->size;
/**
* We save the current displacement starting from the begining
* of this data.
*/
if( OPAL_LIKELY(0 == count) ) {
pStack[1].type = pElems->elem.common.type;
pStack[1].count = pElems->elem.count;
pStack[1].count = pElems->elem.blocklen;
} else {
pStack[1].type = OPAL_DATATYPE_UINT1;
pStack[1].count = pData->size - count;
Expand All @@ -370,9 +371,9 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
return OPAL_SUCCESS;
}

static inline
int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
const size_t* sizes )
static inline int
opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
const size_t* sizes )
{
dt_stack_t* pStack = convertor->pStack;
dt_elem_desc_t* pElems;
Expand Down Expand Up @@ -402,7 +403,7 @@ int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
pStack[1].count = pElems[0].loop.loops;
pStack[1].type = OPAL_DATATYPE_LOOP;
} else {
pStack[1].count = pElems[0].elem.count;
pStack[1].count = pElems[0].elem.count * pElems[0].elem.blocklen;
pStack[1].type = pElems[0].elem.common.type;
}
return OPAL_SUCCESS;
Expand Down Expand Up @@ -563,7 +564,7 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )

int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
const struct opal_datatype_t* datatype,
int32_t count,
size_t count,
const void* pUserBuf )
{
/* Here I should check that the data is not overlapping */
Expand All @@ -578,8 +579,9 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
assert(! (convertor->flags & CONVERTOR_SEND));
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );

if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
#if defined(CHECKSUM)
if( OPAL_UNLIKELY(convertor->flags & CONVERTOR_WITH_CHECKSUM) ) {
if( OPAL_UNLIKELY(!(convertor->flags & CONVERTOR_HOMOGENEOUS)) ) {
convertor->fAdvance = opal_unpack_general_checksum;
} else {
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
Expand All @@ -588,8 +590,9 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
convertor->fAdvance = opal_generic_simple_unpack_checksum;
}
}
} else {
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
} else
#endif /* defined(CHECKSUM) */
if( OPAL_UNLIKELY(!(convertor->flags & CONVERTOR_HOMOGENEOUS)) ) {
convertor->fAdvance = opal_unpack_general;
} else {
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
Expand All @@ -598,14 +601,13 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
convertor->fAdvance = opal_generic_simple_unpack;
}
}
}
return OPAL_SUCCESS;
}


int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
const struct opal_datatype_t* datatype,
int32_t count,
size_t count,
const void* pUserBuf )
{
convertor->flags |= CONVERTOR_SEND;
Expand All @@ -617,6 +619,7 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,

OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );

#if defined(CHECKSUM)
if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {
if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) {
convertor->fAdvance = opal_pack_general_checksum;
Expand All @@ -631,7 +634,8 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
convertor->fAdvance = opal_generic_simple_pack_checksum;
}
}
} else {
} else
#endif /* defined(CHECKSUM) */
if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) {
convertor->fAdvance = opal_pack_general;
} else {
Expand All @@ -645,7 +649,6 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
convertor->fAdvance = opal_generic_simple_pack;
}
}
}
return OPAL_SUCCESS;
}

Expand Down Expand Up @@ -699,12 +702,12 @@ int opal_convertor_clone( const opal_convertor_t* source,

void opal_convertor_dump( opal_convertor_t* convertor )
{
opal_output( 0, "Convertor %p count %d stack position %d bConverted %ld\n"
"\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n"
opal_output( 0, "Convertor %p count %" PRIsize_t " stack position %u bConverted %" PRIsize_t "\n"
"\tlocal_size %" PRIsize_t " remote_size %" PRIsize_t " flags %X stack_size %u pending_length %" PRIsize_t "\n"
"\tremote_arch %u local_arch %u\n",
(void*)convertor,
convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted,
(unsigned long)convertor->local_size, (unsigned long)convertor->remote_size,
convertor->count, convertor->stack_pos, convertor->bConverted,
convertor->local_size, convertor->remote_size,
convertor->flags, convertor->stack_size, convertor->partial_length,
convertor->remoteArch, opal_local_arch );
if( convertor->flags & CONVERTOR_RECV ) opal_output( 0, "unpack ");
Expand Down Expand Up @@ -734,8 +737,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos,
{
opal_output( 0, "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
for( ; stack_pos >= 0; stack_pos-- ) {
opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index,
(int)pStack[stack_pos].count, (long)pStack[stack_pos].disp );
opal_output( 0, "%d: pos %d count %" PRIsize_t " disp %ld ", stack_pos, pStack[stack_pos].index,
pStack[stack_pos].count, pStack[stack_pos].disp );
if( pStack->index != -1 )
opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n",
(unsigned long)pDesc[pStack[stack_pos].index].elem.count,
Expand Down
34 changes: 20 additions & 14 deletions opal/datatype/opal_convertor.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
* All rights reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
Expand Down Expand Up @@ -74,6 +74,7 @@ struct opal_convertor_master_t;
struct dt_stack_t {
int32_t index; /**< index in the element description */
int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */
int16_t padding;
size_t count; /**< number of times we still have to do it */
ptrdiff_t disp; /**< actual displacement depending on the count field */
};
Expand All @@ -93,30 +94,33 @@ struct opal_convertor_t {
const opal_datatype_t* pDesc; /**< the datatype description associated with the convertor */
const dt_type_desc_t* use_desc; /**< the version used by the convertor (normal or optimized) */
opal_datatype_count_t count; /**< the total number of full datatype elements */

/* --- cacheline boundary (64 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */
uint32_t stack_size; /**< size of the allocated stack */
/* --- cacheline 1 boundary (64 bytes) --- */
unsigned char* pBaseBuf; /**< initial buffer as supplied by the user */
dt_stack_t* pStack; /**< the local stack for the actual conversion */
convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */

/* --- cacheline boundary (96 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */
struct opal_convertor_master_t* master; /**< the master convertor */

/* All others fields get modified for every call to pack/unpack functions */
uint32_t stack_pos; /**< the actual position on the stack */
uint32_t partial_length; /**< amount of data left over from the last unpack */
size_t partial_length; /**< amount of data left over from the last unpack */
size_t bConverted; /**< # of bytes already converted */

/* --- cacheline boundary (128 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */
uint32_t checksum; /**< checksum computed by pack/unpack operation */
uint32_t csum_ui1; /**< partial checksum computed by pack/unpack operation */
size_t csum_ui2; /**< partial checksum computed by pack/unpack operation */
/* --- cacheline 2 boundary (128 bytes) --- */

/* --- fields are no more aligned on cacheline --- */
dt_stack_t static_stack[DT_STATIC_STACK_SIZE]; /**< local stack for small datatypes */
/* --- cacheline 3 boundary (192 bytes) was 56 bytes ago --- */

#if OPAL_CUDA_SUPPORT
memcpy_fct_t cbmemcpy; /**< memcpy or cuMemcpy */
void * stream; /**< CUstream for async copy */
#endif
/* size: 248, cachelines: 4, members: 20 */
/* last cacheline: 56 bytes */
};
OPAL_DECLSPEC OBJ_CLASS_DECLARATION( opal_convertor_t );

Expand Down Expand Up @@ -251,12 +255,12 @@ static inline void opal_convertor_get_offset_pointer( const opal_convertor_t* pC
*/
OPAL_DECLSPEC int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
const struct opal_datatype_t* datatype,
int32_t count,
size_t count,
const void* pUserBuf);

static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_convertor_t* pSrcConv,
const struct opal_datatype_t* datatype,
int32_t count,
size_t count,
const void* pUserBuf,
int32_t flags,
opal_convertor_t* convertor )
Expand All @@ -273,11 +277,11 @@ static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_conve
*/
OPAL_DECLSPEC int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
const struct opal_datatype_t* datatype,
int32_t count,
size_t count,
const void* pUserBuf );
static inline int32_t opal_convertor_copy_and_prepare_for_recv( const opal_convertor_t* pSrcConv,
const struct opal_datatype_t* datatype,
int32_t count,
size_t count,
const void* pUserBuf,
int32_t flags,
opal_convertor_t* convertor )
Expand Down Expand Up @@ -328,8 +332,10 @@ opal_convertor_set_position( opal_convertor_t* convertor,
/* Remove the completed flag if it's already set */
convertor->flags &= ~CONVERTOR_COMPLETED;

if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) &&
(convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) &&
if( (convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) &&
#if defined(CHECKSUM)
!(convertor->flags & CONVERTOR_WITH_CHECKSUM) &&
#endif /* defined(CHECKSUM) */
(convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) {
/* Contiguous and no checkpoint and no homogeneous unpack */
convertor->bConverted = *position;
Expand Down
Loading