Skip to content

Commit cbaf3b6

Browse files
committed
opal/datatype: add minimal support to convert long double
between ieee 754 quadruple precision and extended precision formats. Signed-off-by: Gilles Gouaillardet <[email protected]> (cherry picked from commit 8fd08b9)
1 parent c4184ae commit cbaf3b6

File tree

2 files changed

+85
-4
lines changed

2 files changed

+85
-4
lines changed

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,7 @@ AC_CACHE_SAVE
588588
opal_show_title "Header file tests"
589589

590590
AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \
591-
dlfcn.h endian.h execinfo.h err.h fcntl.h grp.h libgen.h \
591+
dlfcn.h endian.h execinfo.h err.h fcntl.h grp.h ieee754 libgen.h \
592592
libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \
593593
poll.h pthread.h pty.h pwd.h sched.h \
594594
strings.h stropts.h linux/ethtool.h linux/sockios.h \

opal/datatype/opal_copy_functions_heterogeneous.c

Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* reserved.
66
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
77
* Copyright (c) 2015-2017 Research Organization for Information Science
8+
* Copyright (c) 2015-2017 Research Organization for Information Science
89
* and Technology (RIST). All rights reserved.
910
* $COPYRIGHT$
1011
*
@@ -15,6 +16,10 @@
1516

1617
#include "opal_config.h"
1718

19+
#ifdef HAVE_IEEE754_H
20+
#include <ieee754.h>
21+
#endif
22+
1823
#include <stddef.h>
1924
#include <stdint.h>
2025

@@ -62,13 +67,74 @@ opal_dt_swap_bytes(void *to_p, const void *from_p, const size_t size, size_t cou
6267
}
6368
}
6469

70+
#ifdef HAVE_IEEE754_H
71+
struct bit128 {
72+
unsigned int mantissa3:32;
73+
unsigned int mantissa2:32;
74+
unsigned int mantissa1:32;
75+
unsigned int mantissa0:16;
76+
unsigned int exponent:15;
77+
unsigned int negative:1;
78+
};
79+
80+
struct bit80 {
81+
unsigned int pad:32;
82+
unsigned int empty:16;
83+
unsigned int negative:1;
84+
unsigned int exponent:15;
85+
unsigned int mantissa0:32;
86+
unsigned int mantissa1:32;
87+
};
88+
89+
static inline void
90+
opal_dt_swap_long_double(void *to_p, const void *from_p, const size_t size, size_t count, uint32_t remoteArch)
91+
{
92+
size_t i;
93+
long double*to = (long double *) to_p;
94+
95+
if ((opal_local_arch&OPAL_ARCH_LDISINTEL) && !(remoteArch&OPAL_ARCH_LDISINTEL)) {
96+
#ifdef __x86_64
97+
for (i=0; i<count; i++, to++) {
98+
union ieee854_long_double ld;
99+
struct bit128 * b = (struct bit128 *)to;
100+
ld.ieee.empty = 0;
101+
ld.ieee.mantissa0 = 0x80000000 | (((unsigned int)b->mantissa0 << 15) & 0x7FFF8000) | ((b->mantissa1 >> 17) & 0x00007FFF);
102+
ld.ieee.mantissa1 = ((b->mantissa1 << 15) & 0xFFFF8000) | ((b->mantissa2 << 17) & 0x000007FFF);
103+
ld.ieee.exponent = b->exponent;
104+
ld.ieee.negative = b->negative;
105+
MEMCPY( to, &ld, sizeof(long double));
106+
}
107+
#endif
108+
} else if (!(opal_local_arch&OPAL_ARCH_LDISINTEL) && (remoteArch&OPAL_ARCH_LDISINTEL)) {
109+
#ifdef __sparcv9
110+
for (i=0; i<count; i++, to++) {
111+
union ieee854_long_double ld;
112+
struct bit80 * b = (struct bit80 *)to;
113+
ld.ieee.mantissa3 = 0;
114+
ld.ieee.mantissa2 = 0;
115+
ld.ieee.mantissa0 = (b->mantissa0 << 1) | (b->mantissa1 & 0x80000000);
116+
ld.ieee.mantissa1 = (b->mantissa1 << 1) & 0xFFFFFFFE;
117+
ld.ieee.exponent = b->exponent;
118+
ld.ieee.negative = b->negative;
119+
MEMCPY( to, &ld, sizeof(long double));
120+
}
121+
#endif
122+
}
123+
}
124+
#else
125+
#define opal_dt_swap_long_double(to_p, from_p, size, count, remoteArch)
126+
#endif
127+
65128
/**
66129
* BEWARE: Do not use the following macro with composed types such as
67130
* complex. As the swap is done using the entire type sizeof, the
68131
* wrong endianess translation will be done. Instead, use the
69132
* COPY_2SAMETYPE_HETEROGENEOUS.
70133
*/
71134
#define COPY_TYPE_HETEROGENEOUS( TYPENAME, TYPE ) \
135+
COPY_TYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, 0 )
136+
137+
#define COPY_TYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE ) \
72138
static int32_t \
73139
copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \
74140
const char* from, size_t from_len, ptrdiff_t from_extent, \
@@ -85,9 +151,15 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count,
85151
(opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \
86152
if( (to_extent == from_extent) && (to_extent == sizeof(TYPE)) ) { \
87153
opal_dt_swap_bytes(to, from, sizeof(TYPE), count); \
154+
if (LONG_DOUBLE) { \
155+
opal_dt_swap_long_double(to, from, sizeof(TYPE), count, pConvertor->remoteArch);\
156+
} \
88157
} else { \
89158
for( i = 0; i < count; i++ ) { \
90159
opal_dt_swap_bytes(to, from, sizeof(TYPE), 1); \
160+
if (LONG_DOUBLE) { \
161+
opal_dt_swap_long_double(to, from, sizeof(TYPE), 1, pConvertor->remoteArch);\
162+
} \
91163
to += to_extent; \
92164
from += from_extent; \
93165
} \
@@ -108,6 +180,9 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count,
108180
}
109181

110182
#define COPY_2SAMETYPE_HETEROGENEOUS( TYPENAME, TYPE ) \
183+
COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, 0)
184+
185+
#define COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE) \
111186
static int32_t \
112187
copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \
113188
const char* from, size_t from_len, ptrdiff_t from_extent, \
@@ -122,11 +197,17 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count,
122197
\
123198
if ((pConvertor->remoteArch & OPAL_ARCH_ISBIGENDIAN) != \
124199
(opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \
125-
if( (to_extent == from_extent) && (to_extent == sizeof(TYPE)) ) { \
200+
if( (to_extent == from_extent) && (to_extent == (2 * sizeof(TYPE))) ) { \
126201
opal_dt_swap_bytes(to, from, sizeof(TYPE), 2 * count); \
202+
if (LONG_DOUBLE) { \
203+
opal_dt_swap_long_double(to, from, sizeof(TYPE), 2*count, pConvertor->remoteArch);\
204+
} \
127205
} else { \
128206
for( i = 0; i < count; i++ ) { \
129207
opal_dt_swap_bytes(to, from, sizeof(TYPE), 2); \
208+
if (LONG_DOUBLE) { \
209+
opal_dt_swap_long_double(to, from, sizeof(TYPE), 2, pConvertor->remoteArch);\
210+
} \
130211
to += to_extent; \
131212
from += from_extent; \
132213
} \
@@ -333,7 +414,7 @@ COPY_TYPE_HETEROGENEOUS( float16, float )
333414
#elif SIZEOF_DOUBLE == 16
334415
COPY_TYPE_HETEROGENEOUS( float16, double )
335416
#elif HAVE_LONG_DOUBLE && SIZEOF_LONG_DOUBLE == 16
336-
COPY_TYPE_HETEROGENEOUS( float16, long double )
417+
COPY_TYPE_HETEROGENEOUS_INTERNAL( float16, long double, 1)
337418
#else
338419
/* #error No basic type for copy function for opal_datatype_float16 found */
339420
#define copy_float16_heterogeneous NULL
@@ -354,7 +435,7 @@ COPY_2SAMETYPE_HETEROGENEOUS( double_complex, double )
354435
#endif
355436

356437
#if HAVE_LONG_DOUBLE__COMPLEX
357-
COPY_2SAMETYPE_HETEROGENEOUS( long_double_complex, long double )
438+
COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( long_double_complex, long double, 1)
358439
#else
359440
/* #error No basic type for copy function for opal_datatype_long_double_complex found */
360441
#define copy_long_double_complex_heterogeneous NULL

0 commit comments

Comments
 (0)