Skip to content

v3.0.x: misc fixes for heterogeneous clusters #3871

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion config/opal_configure_options.m4
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,22 @@ fi
AC_DEFINE_UNQUOTED(OPAL_ENABLE_DLOPEN_SUPPORT, $OPAL_ENABLE_DLOPEN_SUPPORT,
[Whether we want to enable dlopen support])

opal_want_heterogeneous=0
#
# Heterogeneous support
#

AC_MSG_CHECKING([if want heterogeneous support])
AC_ARG_ENABLE([heterogeneous],
[AC_HELP_STRING([--enable-heterogeneous],
[Enable features required for heterogeneous
platform support (default: disabled)])])
if test "$enable_heterogeneous" = "yes" ; then
AC_MSG_RESULT([yes])
opal_want_heterogeneous=1
else
AC_MSG_RESULT([no])
opal_want_heterogeneous=0
fi
AC_DEFINE_UNQUOTED([OPAL_ENABLE_HETEROGENEOUS_SUPPORT],
[$opal_want_heterogeneous],
[Enable features required for heterogeneous support])
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \
sys/types.h sys/uio.h sys/un.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \
termios.h ulimit.h unistd.h util.h utmp.h malloc.h \
ifaddrs.h crt_externs.h regex.h mntent.h paths.h \
ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h db.h ndbm.h zlib.h])
ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h db.h ndbm.h zlib.h ieee754.h])

AC_CHECK_HEADERS([sys/mount.h], [], [],
[AC_INCLUDES_DEFAULT
Expand Down
5 changes: 4 additions & 1 deletion opal/datatype/opal_convertor.c
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,9 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
pConvertor->remote_size = pConvertor->local_size;
if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) {
pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
pConvertor->use_desc = &(datatype->desc);
if (!(pConvertor->flags & CONVERTOR_SEND && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) {
pConvertor->use_desc = &(datatype->desc);
}
if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) {
/* This is for a single datatype, we must update it with the count */
pConvertor->remote_size = opal_datatype_compute_remote_size(datatype,
Expand Down Expand Up @@ -570,6 +572,7 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
mca_cuda_convertor_init(convertor, pUserBuf);
#endif

assert(! (convertor->flags & CONVERTOR_SEND));
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );

if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {
Expand Down
1 change: 1 addition & 0 deletions opal/datatype/opal_convertor.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pCo
return;
}
if( 0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags) ) {
assert(! (pConv->flags & CONVERTOR_SEND));
opal_convertor_compute_remote_size( (opal_convertor_t*)pConv);
}
*pSize = pConv->remote_size;
Expand Down
91 changes: 88 additions & 3 deletions opal/datatype/opal_copy_functions_heterogeneous.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
Expand All @@ -15,6 +16,10 @@

#include "opal_config.h"

#ifdef HAVE_IEEE754_H
#include <ieee754.h>
#endif

#include <stddef.h>
#include <stdint.h>

Expand Down Expand Up @@ -62,13 +67,78 @@ opal_dt_swap_bytes(void *to_p, const void *from_p, const size_t size, size_t cou
}
}

#ifdef HAVE_IEEE754_H
struct bit128 {
unsigned int mantissa3:32;
unsigned int mantissa2:32;
unsigned int mantissa1:32;
unsigned int mantissa0:16;
unsigned int exponent:15;
unsigned int negative:1;
};

struct bit80 {
unsigned int pad:32;
unsigned int empty:16;
unsigned int negative:1;
unsigned int exponent:15;
unsigned int mantissa0:32;
unsigned int mantissa1:32;
};

static inline void
opal_dt_swap_long_double(void *to_p, const void *from_p, const size_t size, size_t count, uint32_t remoteArch)
{
#ifdef HAVE_IEEE754_H
size_t i;
long double*to = (long double *) to_p;

if ((opal_local_arch&OPAL_ARCH_LDISINTEL) && !(remoteArch&OPAL_ARCH_LDISINTEL)) {
#ifdef __x86_64
for (i=0; i<count; i++, to++) {
union ieee854_long_double ld;
struct bit128 * b = (struct bit128 *)to;
ld.ieee.empty = 0;
ld.ieee.mantissa0 = 0x80000000 | (((unsigned int)b->mantissa0 << 15) & 0x7FFF8000) | ((b->mantissa1 >> 17) & 0x00007FFF);
ld.ieee.mantissa1 = ((b->mantissa1 << 15) & 0xFFFF8000) | ((b->mantissa2 << 17) & 0x000007FFF);
ld.ieee.exponent = b->exponent;
ld.ieee.negative = b->negative;
MEMCPY( to, &ld, sizeof(long double));
}
#endif
} else if (!(opal_local_arch&OPAL_ARCH_LDISINTEL) && (remoteArch&OPAL_ARCH_LDISINTEL)) {
#ifdef __sparcv9
for (i=0; i<count; i++, to++) {
union ieee854_long_double ld;
struct bit80 * b = (struct bit80 *)to;
ld.ieee.mantissa3 = 0;
ld.ieee.mantissa2 = 0;
ld.ieee.mantissa0 = (b->mantissa0 << 1) | (b->mantissa1 & 0x80000000);
ld.ieee.mantissa1 = (b->mantissa1 << 1) & 0xFFFFFFFE;
ld.ieee.exponent = b->exponent;
ld.ieee.negative = b->negative;
MEMCPY( to, &ld, sizeof(long double));
}
#endif
}
#else
assert(0);
#endif
}
#else
#define opal_dt_swap_long_double(to_p, from_p, size, count, remoteArch)
#endif

/**
* BEWARE: Do not use the following macro with composed types such as
* complex. As the swap is done using the entire type sizeof, the
* wrong endianess translation will be done. Instead, use the
* COPY_2SAMETYPE_HETEROGENEOUS.
*/
#define COPY_TYPE_HETEROGENEOUS( TYPENAME, TYPE ) \
COPY_TYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, 0 )

#define COPY_TYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE ) \
static int32_t \
copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \
const char* from, size_t from_len, ptrdiff_t from_extent, \
Expand All @@ -85,9 +155,15 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count,
(opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \
if( (to_extent == from_extent) && (to_extent == sizeof(TYPE)) ) { \
opal_dt_swap_bytes(to, from, sizeof(TYPE), count); \
if (LONG_DOUBLE) { \
opal_dt_swap_long_double(to, from, sizeof(TYPE), count, pConvertor->remoteArch);\
} \
} else { \
for( i = 0; i < count; i++ ) { \
opal_dt_swap_bytes(to, from, sizeof(TYPE), 1); \
if (LONG_DOUBLE) { \
opal_dt_swap_long_double(to, from, sizeof(TYPE), 1, pConvertor->remoteArch);\
} \
to += to_extent; \
from += from_extent; \
} \
Expand All @@ -108,6 +184,9 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count,
}

#define COPY_2SAMETYPE_HETEROGENEOUS( TYPENAME, TYPE ) \
COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, 0)

#define COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE) \
static int32_t \
copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \
const char* from, size_t from_len, ptrdiff_t from_extent, \
Expand All @@ -122,11 +201,17 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count,
\
if ((pConvertor->remoteArch & OPAL_ARCH_ISBIGENDIAN) != \
(opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \
if( (to_extent == from_extent) && (to_extent == sizeof(TYPE)) ) { \
if( (to_extent == from_extent) && (to_extent == (2 * sizeof(TYPE))) ) { \
opal_dt_swap_bytes(to, from, sizeof(TYPE), 2 * count); \
if (LONG_DOUBLE) { \
opal_dt_swap_long_double(to, from, sizeof(TYPE), 2*count, pConvertor->remoteArch);\
} \
} else { \
for( i = 0; i < count; i++ ) { \
opal_dt_swap_bytes(to, from, sizeof(TYPE), 2); \
if (LONG_DOUBLE) { \
opal_dt_swap_long_double(to, from, sizeof(TYPE), 2, pConvertor->remoteArch);\
} \
to += to_extent; \
from += from_extent; \
} \
Expand Down Expand Up @@ -333,7 +418,7 @@ COPY_TYPE_HETEROGENEOUS( float16, float )
#elif SIZEOF_DOUBLE == 16
COPY_TYPE_HETEROGENEOUS( float16, double )
#elif HAVE_LONG_DOUBLE && SIZEOF_LONG_DOUBLE == 16
COPY_TYPE_HETEROGENEOUS( float16, long double )
COPY_TYPE_HETEROGENEOUS_INTERNAL( float16, long double, 1)
#else
/* #error No basic type for copy function for opal_datatype_float16 found */
#define copy_float16_heterogeneous NULL
Expand All @@ -354,7 +439,7 @@ COPY_2SAMETYPE_HETEROGENEOUS( double_complex, double )
#endif

#if HAVE_LONG_DOUBLE__COMPLEX
COPY_2SAMETYPE_HETEROGENEOUS( long_double_complex, long double )
COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( long_double_complex, long double, 1)
#else
/* #error No basic type for copy function for opal_datatype_long_double_complex found */
#define copy_long_double_complex_heterogeneous NULL
Expand Down
1 change: 1 addition & 0 deletions opal/datatype/opal_datatype_fake_stack.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor,
}

/* remove from the main loop all the complete datatypes */
assert (! (pConvertor->flags & CONVERTOR_SEND));
remote_size = opal_convertor_compute_remote_size( pConvertor );
count = (int32_t)(starting_point / remote_size);
resting_place -= (remote_size * count);
Expand Down
3 changes: 2 additions & 1 deletion opal/mca/btl/tcp/btl_tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Research Organization for Information Science
* Copyright (c) 2016-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
*
Expand Down Expand Up @@ -381,6 +381,7 @@ int mca_btl_tcp_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t

frag->segments[1].seg_addr.lval = remote_address;
frag->segments[1].seg_len = size;
if (endpoint->endpoint_nbo) MCA_BTL_BASE_SEGMENT_HTON(frag->segments[1]);

frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.des_cbfunc = fake_rdma_complete;
Expand Down
3 changes: 2 additions & 1 deletion opal/mca/btl/tcp/btl_tcp_frag.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -291,6 +291,7 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd)
goto repeat;
} else if (frag->iov_idx == 2) {
for( i = 0; i < frag->hdr.count; i++ ) {
if (btl_endpoint->endpoint_nbo) MCA_BTL_BASE_SEGMENT_NTOH(frag->segments[i]);
frag->iov[i+2].iov_base = (IOVBASE_TYPE*)frag->segments[i].seg_addr.pval;
frag->iov[i+2].iov_len = frag->segments[i].seg_len;
}
Expand Down
20 changes: 10 additions & 10 deletions orte/mca/oob/tcp/oob_tcp_hdr.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
* All rights reserved.
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* $COPYRIGHT$
*
Expand All @@ -32,12 +34,12 @@
* the message came from an external (to
* this component) source
*/
typedef enum {
MCA_OOB_TCP_IDENT,
MCA_OOB_TCP_PROBE,
MCA_OOB_TCP_PING,
MCA_OOB_TCP_USER
} mca_oob_tcp_msg_type_t;
typedef uint8_t mca_oob_tcp_msg_type_t;

#define MCA_OOB_TCP_IDENT 1
#define MCA_OOB_TCP_PROBE 2
#define MCA_OOB_TCP_PING 3
#define MCA_OOB_TCP_USER 4

#define ORTE_MAX_RTD_SIZE 31

Expand All @@ -54,14 +56,14 @@ typedef struct {
* and let some other module try to send it
*/
orte_process_name_t dst;
/* type of message */
mca_oob_tcp_msg_type_t type;
/* the rml tag where this message is headed */
orte_rml_tag_t tag;
/* the seq number of this message */
uint32_t seq_num;
/* number of bytes in message */
uint32_t nbytes;
/* type of message */
mca_oob_tcp_msg_type_t type;
/* routed module to be used */
char routed[ORTE_MAX_RTD_SIZE+1];
} mca_oob_tcp_hdr_t;
Expand All @@ -71,7 +73,6 @@ typedef struct {
#define MCA_OOB_TCP_HDR_NTOH(h) \
ORTE_PROCESS_NAME_NTOH((h)->origin); \
ORTE_PROCESS_NAME_NTOH((h)->dst); \
(h)->type = ntohl((h)->type); \
(h)->tag = ORTE_RML_TAG_NTOH((h)->tag); \
(h)->nbytes = ntohl((h)->nbytes);

Expand All @@ -81,7 +82,6 @@ typedef struct {
#define MCA_OOB_TCP_HDR_HTON(h) \
ORTE_PROCESS_NAME_HTON((h)->origin); \
ORTE_PROCESS_NAME_HTON((h)->dst); \
(h)->type = htonl((h)->type); \
(h)->tag = ORTE_RML_TAG_HTON((h)->tag); \
(h)->nbytes = htonl((h)->nbytes);

Expand Down