Skip to content

Commit 7e14d75

Browse files
committed
Force inlining memcpy for short data
This work is based on @derbeyn patch provided on #6678. I reworked it to be more inclusive (works now with both gcc and icc) and to cover more standard size lengths (4, 8, 16). Signed-off-by: George Bosilca <[email protected]> Signed-off-by: Nadia Derbey <[email protected]>
1 parent a2239d4 commit 7e14d75

File tree

1 file changed

+25
-2
lines changed

1 file changed

+25
-2
lines changed

opal/datatype/opal_datatype_memcpy.h

+25-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,30 @@
1414
#ifndef OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED
1515
#define OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED
1616

17-
#define MEMCPY( DST, SRC, BLENGTH ) \
18-
memcpy( (DST), (SRC), (BLENGTH) )
17+
/*
18+
* This macro is called whenever we are packing/unpacking a DDT that
19+
* that is built with basic datatypes.
20+
* Specifying a fixed size for the memcpy() makes the intel compiler
21+
* inline it as an assignment operation.
22+
* This code is a bit hacky, but doing this we can divide the latency
23+
* by up to 2 during DDT exechanges.
24+
*/
25+
#define MEMCPY( DST, SRC, BLENGTH ) \
26+
do { \
27+
if( (BLENGTH) < 16 ) { \
28+
uintptr_t align = ((uintptr_t)(DST)) ^ ((uintptr_t)(SRC)); \
29+
if( (4 == (BLENGTH)) && (0 == (align & 0x3)) ) { /* We are copying an int */ \
30+
*(int*)(DST) = *(int*)(SRC); \
31+
} else if( (8 == (BLENGTH)) && (0 == (align & 0x7)) ) { /* We are copying a double */ \
32+
*(double*)(DST) = *(double*)(SRC); \
33+
} else if( (16 == (BLENGTH)) && (0 == (align & 0xF)) ) { /* We are copying a long double */ \
34+
*(long double*)(DST) = *(long double*)(SRC); \
35+
} else { \
36+
memcpy((DST), (SRC), (BLENGTH)); \
37+
} \
38+
} else { \
39+
memcpy((DST), (SRC), (BLENGTH)); \
40+
} \
41+
} while (0)
1942

2043
#endif /* OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED */

0 commit comments

Comments
 (0)