Force inlining memcpy for short data

bosilca · bosilca · commit 7e14d75d14c4 · 2019-05-22T10:58:48.000-04:00
This work is based on @derbeyn patch provided on #6678. I reworked it to be more inclusive (works now with both gcc and icc) and to cover more standard size lengths (4, 8, 16). Signed-off-by: George Bosilca <bosilca@icl.utk.edu> Signed-off-by: Nadia Derbey <Nadia.Derbey@atos.net>
diff --git a/opal/datatype/opal_datatype_memcpy.h b/opal/datatype/opal_datatype_memcpy.h
@@ -14,7 +14,30 @@
 #ifndef OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED
 #define OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED
 
-#define MEMCPY( DST, SRC, BLENGTH ) \
-    memcpy( (DST), (SRC), (BLENGTH) )
+/*
+ * This macro is called whenever we are packing/unpacking a DDT that
+ * that is built with basic datatypes.
+ * Specifying a fixed size for the memcpy() makes the intel compiler
+ * inline it as an assignment operation.
+ * This code is a bit hacky, but doing this we can divide the latency
+ * by up to 2 during DDT exechanges.
+ */
+#define MEMCPY( DST, SRC, BLENGTH )                                 \
+    do {                                                            \
+        if( (BLENGTH) < 16 ) {                                      \
+            uintptr_t align = ((uintptr_t)(DST)) ^ ((uintptr_t)(SRC));                        \
+            if( (4 == (BLENGTH)) && (0 == (align & 0x3)) ) {  /* We are copying an int */     \
+                *(int*)(DST) = *(int*)(SRC);                        \
+            } else if( (8 == (BLENGTH)) && (0 == (align & 0x7)) ) {   /* We are copying a double */   \
+                *(double*)(DST) = *(double*)(SRC);                  \
+            } else if( (16 == (BLENGTH)) && (0 == (align & 0xF)) ) {   /* We are copying a long double */   \
+                *(long double*)(DST) = *(long double*)(SRC);        \
+            } else {                                                \
+                memcpy((DST), (SRC), (BLENGTH));                    \
+            }                                                       \
+        } else {                                                    \
+            memcpy((DST), (SRC), (BLENGTH));                        \
+        }                                                           \
+    } while (0)
 
 #endif  /* OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED */