From 4a171e3ec709fcb80d44184d4762d27f48c4ef31 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Wed, 27 May 2020 21:49:53 +0000
Subject: [PATCH 01/11] Add one AVX512f comparison and the intrinsics needed to
 test it

---
 crates/core_arch/src/simd.rs        |  4 ++++
 crates/core_arch/src/x86/avx512f.rs | 37 +++++++++++++++++++++++++++++
 crates/core_arch/src/x86/mod.rs     |  9 +++++++
 3 files changed, 50 insertions(+)

diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs
index 746f084641..7e4f7e8cce 100644
--- a/crates/core_arch/src/simd.rs
+++ b/crates/core_arch/src/simd.rs
@@ -201,3 +201,7 @@ simd_ty!(i32x16[i32]:
 simd_ty!(i64x8[i64]:
          i64, i64, i64, i64, i64, i64, i64, i64
          | x0, x1, x2, x3, x4, x5, x6, x7);
+
+simd_ty!(u64x8[u64]:
+         u64, u64, u64, u64, u64, u64, u64, u64
+         | x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 9515b7f728..64f13f9bfa 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -94,6 +94,35 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
     transmute(i64x8::splat(a))
 }
 
+/// Sets packed 64-bit integers in `dst` with the supplied values.
+///
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_set_epi64(
+    e7: i64,
+    e6: i64,
+    e5: i64,
+    e4: i64,
+    e3: i64,
+    e2: i64,
+    e1: i64,
+    e0: i64,
+) -> __m512i {
+    let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7);
+    transmute(r)
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmpuq))]
+pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
+}
+
 #[cfg(test)]
 mod tests {
     use std;
@@ -197,4 +226,12 @@ mod tests {
         );
         assert_eq_m512i(r, e);
     }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmplt_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let m = _mm512_cmplt_epu64_mask(a, b);
+        assert_eq!(m, 0b11001111);
+    }
 }
diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index 78a3e23179..2a47597a66 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -346,6 +346,10 @@ types! {
 #[allow(non_camel_case_types)]
 pub type __mmask16 = i16;
 
+/// The `__mmask8` type used in AVX-512 intrinsics, a 8-bit integer
+#[allow(non_camel_case_types)]
+pub type __mmask8 = u8;
+
 #[cfg(test)]
 mod test;
 #[cfg(test)]
@@ -509,6 +513,11 @@ pub(crate) trait m512iExt: Sized {
     fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
         unsafe { transmute(self.as_m512i()) }
     }
+
+    #[inline]
+    fn as_u64x8(self) -> crate::core_arch::simd::u64x8 {
+        unsafe { transmute(self.as_m512i()) }
+    }
 }
 
 impl m512iExt for __m512i {

From 17b48f9fea2e9e2c6bf2742626c5ca3a7964f3b8 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Wed, 27 May 2020 22:35:36 +0000
Subject: [PATCH 02/11] Add __mmask8 type

---
 crates/stdarch-verify/src/lib.rs         | 1 +
 crates/stdarch-verify/tests/x86-intel.rs | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs
index c56fb0de7e..62ad41c48f 100644
--- a/crates/stdarch-verify/src/lib.rs
+++ b/crates/stdarch-verify/src/lib.rs
@@ -145,6 +145,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
             "__m512" => quote! { &M512 },
             "__m512d" => quote! { &M512D },
             "__m512i" => quote! { &M512I },
+            "__mmask8" => quote! { &MMASK8 },
             "__mmask16" => quote! { &MMASK16 },
             "__m64" => quote! { &M64 },
             "bool" => quote! { &BOOL },
diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs
index 572de603d3..bf8ede6071 100644
--- a/crates/stdarch-verify/tests/x86-intel.rs
+++ b/crates/stdarch-verify/tests/x86-intel.rs
@@ -53,6 +53,7 @@ static M256D: Type = Type::M256D;
 static M512: Type = Type::M512;
 static M512I: Type = Type::M512I;
 static M512D: Type = Type::M512D;
+static MMASK8: Type = Type::MMASK8;
 static MMASK16: Type = Type::MMASK16;
 
 static TUPLE: Type = Type::Tuple;
@@ -76,6 +77,7 @@ enum Type {
     M512,
     M512D,
     M512I,
+    MMASK8,
     MMASK16,
     Tuple,
     CpuidResult,
@@ -653,6 +655,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
         (&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
         (&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}
 
+        (&Type::MMASK8, "__mmask8") => {}
         (&Type::MMASK16, "__mmask16") => {}
 
         // This is a macro (?) in C which seems to mutate its arguments, but

From fda1c0d216b6f4b32bc314d178090d2f6088ca60 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Wed, 27 May 2020 22:36:42 +0000
Subject: [PATCH 03/11] Move 64 bit function based on stdarch-verify

---
 crates/core_arch/src/x86/avx512f.rs    | 19 ----------
 crates/core_arch/src/x86_64/avx512f.rs | 50 ++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 19 deletions(-)
 create mode 100644 crates/core_arch/src/x86_64/avx512f.rs

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 64f13f9bfa..5b64367d1f 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -94,25 +94,6 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
     transmute(i64x8::splat(a))
 }
 
-/// Sets packed 64-bit integers in `dst` with the supplied values.
-///
-/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
-#[inline]
-#[target_feature(enable = "avx512f")]
-pub unsafe fn _mm512_set_epi64(
-    e7: i64,
-    e6: i64,
-    e5: i64,
-    e4: i64,
-    e3: i64,
-    e2: i64,
-    e1: i64,
-    e0: i64,
-) -> __m512i {
-    let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7);
-    transmute(r)
-}
-
 /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
new file mode 100644
index 0000000000..8c40235958
--- /dev/null
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -0,0 +1,50 @@
+use crate::{
+    core_arch::{simd::*, simd_llvm::*, x86_64::*},
+    mem::{self, transmute},
+};
+
+/// Sets packed 64-bit integers in `dst` with the supplied values.
+///
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_set_epi64(
+    e7: i64,
+    e6: i64,
+    e5: i64,
+    e4: i64,
+    e3: i64,
+    e2: i64,
+    e1: i64,
+    e0: i64,
+) -> __m512i {
+    let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7);
+    transmute(r)
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmpuq))]
+pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
+}
+
+#[cfg(test)]
+mod tests {
+    use std;
+    use stdarch_test::simd_test;
+
+    use crate::core_arch::x86::*;
+    use crate::core_arch::x86_64::*;
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmplt_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let m = _mm512_cmplt_epu64_mask(a, b);
+        assert_eq!(m, 0b11001111);
+    }
+}

From 1c7ade8346b6bf884caf563ab57e3c15d70b968a Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Wed, 27 May 2020 22:57:27 +0000
Subject: [PATCH 04/11] Fix stdarch-verify test

---
 crates/core_arch/src/x86/avx512f.rs    |  8 ------
 crates/core_arch/src/x86_64/avx512f.rs | 35 ++++++++++++++++++++------
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 5b64367d1f..804ffc30d3 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -207,12 +207,4 @@ mod tests {
         );
         assert_eq_m512i(r, e);
     }
-
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_epu64_mask() {
-        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
-        let b = _mm512_set1_epi64(-1);
-        let m = _mm512_cmplt_epu64_mask(a, b);
-        assert_eq!(m, 0b11001111);
-    }
 }
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index 8c40235958..c4544a5acf 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -18,18 +18,27 @@ pub unsafe fn _mm512_set_epi64(
     e1: i64,
     e0: i64,
 ) -> __m512i {
-    let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7);
-    transmute(r)
+  _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Sets packed 64-bit integers in `dst` with the supplied values in
+/// reverse order.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
+/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmpuq))]
-pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
-    simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
+pub unsafe fn _mm512_setr_epi64(
+    e7: i64,
+    e6: i64,
+    e5: i64,
+    e4: i64,
+    e3: i64,
+    e2: i64,
+    e1: i64,
+    e0: i64,
+) -> __m512i {
+    let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7);
+    transmute(r)
 }
 
 #[cfg(test)]
@@ -47,4 +56,16 @@ mod tests {
         let m = _mm512_cmplt_epu64_mask(a, b);
         assert_eq!(m, 0b11001111);
     }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_set_epi64() {
+        let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+	assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0))
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_setr_epi64() {
+        let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+	assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
+    }
 }

From 61b67d527d4c096704748855fa510e57846751a1 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Wed, 27 May 2020 23:08:23 +0000
Subject: [PATCH 05/11] Use correct instruction

---
 crates/core_arch/src/x86/avx512f.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 804ffc30d3..a8632111d5 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -99,7 +99,7 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmpuq))]
+#[cfg_attr(test, assert_instr(vpcmpltuq))]
 pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
 }

From 9ddd036ab8a9bff2a25e71baacb930dda5472a0d Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Thu, 28 May 2020 20:45:13 +0000
Subject: [PATCH 06/11] Only check for the instruction prefix since MSVC and
 Clang use different instructions

---
 crates/core_arch/src/x86/avx512f.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index a8632111d5..2e71cf296a 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -99,7 +99,7 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmpltuq))]
+#[cfg_attr(test, assert_instr(vpcmp))]
 pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
 }

From 65bdc88af3e2f8628e89900615a71aee91d8db63 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Thu, 28 May 2020 21:49:54 +0000
Subject: [PATCH 07/11] rustfmt

---
 crates/core_arch/src/x86_64/avx512f.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index c4544a5acf..ab6eb69e66 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -18,7 +18,7 @@ pub unsafe fn _mm512_set_epi64(
     e1: i64,
     e0: i64,
 ) -> __m512i {
-  _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
+    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
 }
 
 /// Sets packed 64-bit integers in `dst` with the supplied values in
@@ -60,12 +60,12 @@ mod tests {
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_set_epi64() {
         let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
-	assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0))
+        assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0))
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_setr_epi64() {
         let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
-	assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
+        assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
     }
 }

From 3c6eca455eaf2705aa4584ef76e3e26b9a1fdd7e Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Thu, 28 May 2020 22:15:12 +0000
Subject: [PATCH 08/11] finish/fix adding avx512f to x86_64

---
 crates/core_arch/src/x86_64/avx512f.rs | 36 +++++++++++++-------------
 crates/core_arch/src/x86_64/mod.rs     |  3 +++
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index ab6eb69e66..c5ab3af525 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -1,6 +1,6 @@
 use crate::{
-    core_arch::{simd::*, simd_llvm::*, x86_64::*},
-    mem::{self, transmute},
+    core_arch::{simd::*, x86::*},
+    mem::transmute,
 };
 
 /// Sets packed 64-bit integers in `dst` with the supplied values.
@@ -9,16 +9,16 @@ use crate::{
 #[inline]
 #[target_feature(enable = "avx512f")]
 pub unsafe fn _mm512_set_epi64(
-    e7: i64,
-    e6: i64,
-    e5: i64,
-    e4: i64,
-    e3: i64,
-    e2: i64,
-    e1: i64,
     e0: i64,
+    e1: i64,
+    e2: i64,
+    e3: i64,
+    e4: i64,
+    e5: i64,
+    e6: i64,
+    e7: i64,
 ) -> __m512i {
-    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
+    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
 }
 
 /// Sets packed 64-bit integers in `dst` with the supplied values in
@@ -28,16 +28,16 @@ pub unsafe fn _mm512_set_epi64(
 #[inline]
 #[target_feature(enable = "avx512f")]
 pub unsafe fn _mm512_setr_epi64(
-    e7: i64,
-    e6: i64,
-    e5: i64,
-    e4: i64,
-    e3: i64,
-    e2: i64,
-    e1: i64,
     e0: i64,
+    e1: i64,
+    e2: i64,
+    e3: i64,
+    e4: i64,
+    e5: i64,
+    e6: i64,
+    e7: i64,
 ) -> __m512i {
-    let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7);
+    let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
     transmute(r)
 }
 
diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs
index 038f6478cc..c9f3bd637c 100644
--- a/crates/core_arch/src/x86_64/mod.rs
+++ b/crates/core_arch/src/x86_64/mod.rs
@@ -33,6 +33,9 @@ pub use self::bmi2::*;
 mod avx2;
 pub use self::avx2::*;
 
+mod avx512f;
+pub use self::avx512f::*;
+
 mod bswap;
 pub use self::bswap::*;
 

From 3c13fabb0258efa0b81be95e4aac9189eac3463d Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Thu, 28 May 2020 22:19:28 +0000
Subject: [PATCH 09/11] Add mask variant to cmplt

---
 crates/core_arch/src/x86/avx512f.rs    | 11 +++++++++++
 crates/core_arch/src/x86_64/avx512f.rs |  9 +++++++++
 2 files changed, 20 insertions(+)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 2e71cf296a..46357c82ff 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -104,6 +104,17 @@ pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
 }
 
+///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmplt_epu64_mask(a, b) & m
+}
+
 #[cfg(test)]
 mod tests {
     use std;
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index c5ab3af525..22cd159bd9 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -57,6 +57,15 @@ mod tests {
         assert_eq!(m, 0b11001111);
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmplt_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let mask = 0b01111010;
+        let r = _mm512_mask_cmplt_epu64_mask(mask, a, b);
+        assert_eq!(r, 0b01001010);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_set_epi64() {
         let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);

From 6b83a787df6fdeae67f877ccf65fad85e46c54d2 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Thu, 28 May 2020 22:29:46 +0000
Subject: [PATCH 10/11] Add gt and eq unsigned variants

---
 crates/core_arch/src/x86/avx512f.rs    | 42 ++++++++++++++++++++++++++
 crates/core_arch/src/x86_64/avx512f.rs | 34 +++++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 46357c82ff..646a4aa2f4 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -115,6 +115,48 @@ pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmplt_epu64_mask(a, b) & m
 }
 
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
+}
+
+///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpgt_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
+}
+
+///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpeq_epu64_mask(a, b) & m
+}
+
 #[cfg(test)]
 mod tests {
     use std;
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index 22cd159bd9..411c871e4e 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -66,6 +66,40 @@ mod tests {
         assert_eq!(r, 0b01001010);
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpgt_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let m = _mm512_cmpgt_epu64_mask(b, a);
+        assert_eq!(m, 0b11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpgt_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let mask = 0b01111010;
+        let r = _mm512_mask_cmpgt_epu64_mask(mask, b, a);
+        assert_eq!(r, 0b01001010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpeq_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+        let m = _mm512_cmpeq_epu64_mask(b, a);
+        assert_eq!(m, 0b11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpeq_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+        let mask = 0b01111010;
+        let r = _mm512_mask_cmpeq_epu64_mask(mask, b, a);
+        assert_eq!(r, 0b01001010);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_set_epi64() {
         let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);

From 764bfdb687d79957057d190aafb2f45464327814 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Thu, 28 May 2020 22:40:37 +0000
Subject: [PATCH 11/11] Add signed variants

---
 crates/core_arch/src/x86/avx512f.rs    | 63 ++++++++++++++++++++++++++
 crates/core_arch/src/x86/mod.rs        |  5 ++
 crates/core_arch/src/x86_64/avx512f.rs | 51 +++++++++++++++++++++
 3 files changed, 119 insertions(+)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 646a4aa2f4..421146d53d 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -157,6 +157,69 @@ pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpeq_epu64_mask(a, b) & m
 }
 
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
+}
+
+///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmplt_epi64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
+}
+
+///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpgt_epi64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
+}
+
+///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpeq_epi64_mask(a, b) & m
+}
+
 #[cfg(test)]
 mod tests {
     use std;
diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index 2a47597a66..3e96478b65 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -518,6 +518,11 @@ pub(crate) trait m512iExt: Sized {
     fn as_u64x8(self) -> crate::core_arch::simd::u64x8 {
         unsafe { transmute(self.as_m512i()) }
     }
+
+    #[inline]
+    fn as_i64x8(self) -> crate::core_arch::simd::i64x8 {
+        unsafe { transmute(self.as_m512i()) }
+    }
 }
 
 impl m512iExt for __m512i {
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index 411c871e4e..ad2e29e5cc 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -100,6 +100,57 @@ mod tests {
         assert_eq!(r, 0b01001010);
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmplt_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let m = _mm512_cmplt_epi64_mask(a, b);
+        assert_eq!(m, 0b00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmplt_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let mask = 0b01100110;
+        let r = _mm512_mask_cmplt_epi64_mask(mask, a, b);
+        assert_eq!(r, 0b00000100);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpgt_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let m = _mm512_cmpgt_epi64_mask(b, a);
+        assert_eq!(m, 0b00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpgt_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let mask = 0b01100110;
+        let r = _mm512_mask_cmpgt_epi64_mask(mask, b, a);
+        assert_eq!(r, 0b00000100);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpeq_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+        let m = _mm512_cmpeq_epi64_mask(b, a);
+        assert_eq!(m, 0b11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpeq_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+        let mask = 0b01111010;
+        let r = _mm512_mask_cmpeq_epi64_mask(mask, b, a);
+        assert_eq!(r, 0b01001010);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_set_epi64() {
         let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);