From 4a171e3ec709fcb80d44184d4762d27f48c4ef31 Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Wed, 27 May 2020 21:49:53 +0000 Subject: [PATCH 01/11] Add one AVX512f comparison and the intrinsics needed to test it --- crates/core_arch/src/simd.rs | 4 ++++ crates/core_arch/src/x86/avx512f.rs | 37 +++++++++++++++++++++++++++++ crates/core_arch/src/x86/mod.rs | 9 +++++++ 3 files changed, 50 insertions(+) diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs index 746f084641..7e4f7e8cce 100644 --- a/crates/core_arch/src/simd.rs +++ b/crates/core_arch/src/simd.rs @@ -201,3 +201,7 @@ simd_ty!(i32x16[i32]: simd_ty!(i64x8[i64]: i64, i64, i64, i64, i64, i64, i64, i64 | x0, x1, x2, x3, x4, x5, x6, x7); + +simd_ty!(u64x8[u64]: + u64, u64, u64, u64, u64, u64, u64, u64 + | x0, x1, x2, x3, x4, x5, x6, x7); diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 9515b7f728..64f13f9bfa 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -94,6 +94,35 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { transmute(i64x8::splat(a)) } +/// Sets packed 64-bit integers in `dst` with the supplied values. +/// +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +pub unsafe fn _mm512_set_epi64( + e7: i64, + e6: i64, + e5: i64, + e4: i64, + e3: i64, + e2: i64, + e1: i64, + e0: i64, +) -> __m512i { + let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmpuq))] +pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) +} + #[cfg(test)] mod tests { use std; @@ -197,4 +226,12 @@ mod tests { ); assert_eq_m512i(r, e); } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmplt_epu64_mask(a, b); + assert_eq!(m, 0b11001111); + } } diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs index 78a3e23179..2a47597a66 100644 --- a/crates/core_arch/src/x86/mod.rs +++ b/crates/core_arch/src/x86/mod.rs @@ -346,6 +346,10 @@ types! { #[allow(non_camel_case_types)] pub type __mmask16 = i16; +/// The `__mmask8` type used in AVX-512 intrinsics, a 8-bit integer +#[allow(non_camel_case_types)] +pub type __mmask8 = u8; + #[cfg(test)] mod test; #[cfg(test)] @@ -509,6 +513,11 @@ pub(crate) trait m512iExt: Sized { fn as_i32x16(self) -> crate::core_arch::simd::i32x16 { unsafe { transmute(self.as_m512i()) } } + + #[inline] + fn as_u64x8(self) -> crate::core_arch::simd::u64x8 { + unsafe { transmute(self.as_m512i()) } + } } impl m512iExt for __m512i { From 17b48f9fea2e9e2c6bf2742626c5ca3a7964f3b8 Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Wed, 27 May 2020 22:35:36 +0000 Subject: [PATCH 02/11] Add __mmask8 type --- crates/stdarch-verify/src/lib.rs | 1 + crates/stdarch-verify/tests/x86-intel.rs | 3 +++ 2 files changed, 4 insertions(+) diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs index c56fb0de7e..62ad41c48f 100644 --- a/crates/stdarch-verify/src/lib.rs +++ b/crates/stdarch-verify/src/lib.rs @@ -145,6 +145,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { "__m512" => quote! { &M512 }, "__m512d" => quote! { &M512D }, "__m512i" => quote! { &M512I }, + "__mmask8" => quote! { &MMASK8 }, "__mmask16" => quote! { &MMASK16 }, "__m64" => quote! { &M64 }, "bool" => quote! { &BOOL }, diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs index 572de603d3..bf8ede6071 100644 --- a/crates/stdarch-verify/tests/x86-intel.rs +++ b/crates/stdarch-verify/tests/x86-intel.rs @@ -53,6 +53,7 @@ static M256D: Type = Type::M256D; static M512: Type = Type::M512; static M512I: Type = Type::M512I; static M512D: Type = Type::M512D; +static MMASK8: Type = Type::MMASK8; static MMASK16: Type = Type::MMASK16; static TUPLE: Type = Type::Tuple; @@ -76,6 +77,7 @@ enum Type { M512, M512D, M512I, + MMASK8, MMASK16, Tuple, CpuidResult, @@ -653,6 +655,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(), (&Type::ConstPtr(&Type::M512I), "__m512i const*") => {} (&Type::ConstPtr(&Type::M512D), "__m512d const*") => {} + (&Type::MMASK8, "__mmask8") => {} (&Type::MMASK16, "__mmask16") => {} // This is a macro (?) in C which seems to mutate its arguments, but From fda1c0d216b6f4b32bc314d178090d2f6088ca60 Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Wed, 27 May 2020 22:36:42 +0000 Subject: [PATCH 03/11] Move 64 bit function based on stdarch-verify --- crates/core_arch/src/x86/avx512f.rs | 19 ---------- crates/core_arch/src/x86_64/avx512f.rs | 50 ++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 19 deletions(-) create mode 100644 crates/core_arch/src/x86_64/avx512f.rs diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 64f13f9bfa..5b64367d1f 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -94,25 +94,6 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { transmute(i64x8::splat(a)) } -/// Sets packed 64-bit integers in `dst` with the supplied values. -/// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64) -#[inline] -#[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set_epi64( - e7: i64, - e6: i64, - e5: i64, - e4: i64, - e3: i64, - e2: i64, - e1: i64, - e0: i64, -) -> __m512i { - let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7); - transmute(r) -} - /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs new file mode 100644 index 0000000000..8c40235958 --- /dev/null +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -0,0 +1,50 @@ +use crate::{ + core_arch::{simd::*, simd_llvm::*, x86_64::*}, + mem::{self, transmute}, +}; + +/// Sets packed 64-bit integers in `dst` with the supplied values. +/// +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +pub unsafe fn _mm512_set_epi64( + e7: i64, + e6: i64, + e5: i64, + e4: i64, + e3: i64, + e2: i64, + e1: i64, + e0: i64, +) -> __m512i { + let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmpuq))] +pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) +} + +#[cfg(test)] +mod tests { + use std; + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::core_arch::x86_64::*; + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmplt_epu64_mask(a, b); + assert_eq!(m, 0b11001111); + } +} From 1c7ade8346b6bf884caf563ab57e3c15d70b968a Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Wed, 27 May 2020 22:57:27 +0000 Subject: [PATCH 04/11] Fix stdarch-verify test --- crates/core_arch/src/x86/avx512f.rs | 8 ------ crates/core_arch/src/x86_64/avx512f.rs | 35 ++++++++++++++++++++------ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 5b64367d1f..804ffc30d3 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -207,12 +207,4 @@ mod tests { ); assert_eq_m512i(r, e); } - - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmplt_epu64_mask() { - let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); - let b = _mm512_set1_epi64(-1); - let m = _mm512_cmplt_epu64_mask(a, b); - assert_eq!(m, 0b11001111); - } } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 8c40235958..c4544a5acf 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -18,18 +18,27 @@ pub unsafe fn _mm512_set_epi64( e1: i64, e0: i64, ) -> __m512i { - let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7); - transmute(r) + _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0); } -/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// Sets packed 64-bit integers in `dst` with the supplied values in +/// reverse order. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmpuq))] -pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) +pub unsafe fn _mm512_setr_epi64( + e7: i64, + e6: i64, + e5: i64, + e4: i64, + e3: i64, + e2: i64, + e1: i64, + e0: i64, +) -> __m512i { + let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) } #[cfg(test)] @@ -47,4 +56,16 @@ mod tests { let m = _mm512_cmplt_epu64_mask(a, b); assert_eq!(m, 0b11001111); } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_epi64() { + let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0)) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr_epi64() { + let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0)) + } } From 61b67d527d4c096704748855fa510e57846751a1 Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Wed, 27 May 2020 23:08:23 +0000 Subject: [PATCH 05/11] Use correct instruction --- crates/core_arch/src/x86/avx512f.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 804ffc30d3..a8632111d5 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -99,7 +99,7 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmpuq))] +#[cfg_attr(test, assert_instr(vpcmpltuq))] pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) } From 9ddd036ab8a9bff2a25e71baacb930dda5472a0d Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Thu, 28 May 2020 20:45:13 +0000 Subject: [PATCH 06/11] Only check for the instruction prefix since MSVC and Clang use different instructions --- crates/core_arch/src/x86/avx512f.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index a8632111d5..2e71cf296a 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -99,7 +99,7 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmpltuq))] +#[cfg_attr(test, assert_instr(vpcmp))] pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) } From 65bdc88af3e2f8628e89900615a71aee91d8db63 Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Thu, 28 May 2020 21:49:54 +0000 Subject: [PATCH 07/11] rustfmt --- crates/core_arch/src/x86_64/avx512f.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index c4544a5acf..ab6eb69e66 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -18,7 +18,7 @@ pub unsafe fn _mm512_set_epi64( e1: i64, e0: i64, ) -> __m512i { - _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0); + _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0); } /// Sets packed 64-bit integers in `dst` with the supplied values in @@ -60,12 +60,12 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_set_epi64() { let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); - assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0)) + assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0)) } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_setr_epi64() { let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); - assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0)) + assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0)) } } From 3c6eca455eaf2705aa4584ef76e3e26b9a1fdd7e Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Thu, 28 May 2020 22:15:12 +0000 Subject: [PATCH 08/11] finish/fix adding avx512f to x86_64 --- crates/core_arch/src/x86_64/avx512f.rs | 36 +++++++++++++------------- crates/core_arch/src/x86_64/mod.rs | 3 +++ 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index ab6eb69e66..c5ab3af525 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -1,6 +1,6 @@ use crate::{ - core_arch::{simd::*, simd_llvm::*, x86_64::*}, - mem::{self, transmute}, + core_arch::{simd::*, x86::*}, + mem::transmute, }; /// Sets packed 64-bit integers in `dst` with the supplied values. @@ -9,16 +9,16 @@ use crate::{ #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_set_epi64( - e7: i64, - e6: i64, - e5: i64, - e4: i64, - e3: i64, - e2: i64, - e1: i64, e0: i64, + e1: i64, + e2: i64, + e3: i64, + e4: i64, + e5: i64, + e6: i64, + e7: i64, ) -> __m512i { - _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0); + _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) } /// Sets packed 64-bit integers in `dst` with the supplied values in @@ -28,16 +28,16 @@ pub unsafe fn _mm512_set_epi64( #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_setr_epi64( - e7: i64, - e6: i64, - e5: i64, - e4: i64, - e3: i64, - e2: i64, - e1: i64, e0: i64, + e1: i64, + e2: i64, + e3: i64, + e4: i64, + e5: i64, + e6: i64, + e7: i64, ) -> __m512i { - let r = i64x8(e0, e1, e2, e3, e4, e5, e6, e7); + let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); transmute(r) } diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs index 038f6478cc..c9f3bd637c 100644 --- a/crates/core_arch/src/x86_64/mod.rs +++ b/crates/core_arch/src/x86_64/mod.rs @@ -33,6 +33,9 @@ pub use self::bmi2::*; mod avx2; pub use self::avx2::*; +mod avx512f; +pub use self::avx512f::*; + mod bswap; pub use self::bswap::*; From 3c13fabb0258efa0b81be95e4aac9189eac3463d Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Thu, 28 May 2020 22:19:28 +0000 Subject: [PATCH 09/11] Add mask variant to cmplt --- crates/core_arch/src/x86/avx512f.rs | 11 +++++++++++ crates/core_arch/src/x86_64/avx512f.rs | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 2e71cf296a..46357c82ff 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -104,6 +104,17 @@ pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) } +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epu64_mask(a, b) & m +} + #[cfg(test)] mod tests { use std; diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index c5ab3af525..22cd159bd9 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -57,6 +57,15 @@ mod tests { assert_eq!(m, 0b11001111); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + let r = _mm512_mask_cmplt_epu64_mask(mask, a, b); + assert_eq!(r, 0b01001010); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_set_epi64() { let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); From 6b83a787df6fdeae67f877ccf65fad85e46c54d2 Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Thu, 28 May 2020 22:29:46 +0000 Subject: [PATCH 10/11] Add gt and eq unsigned variants --- crates/core_arch/src/x86/avx512f.rs | 42 ++++++++++++++++++++++++++ crates/core_arch/src/x86_64/avx512f.rs | 34 +++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 46357c82ff..646a4aa2f4 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -115,6 +115,48 @@ pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) _mm512_cmplt_epu64_mask(a, b) & m } +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) +} + +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) +} + +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpeq_epu64_mask(a, b) & m +} + #[cfg(test)] mod tests { use std; diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 22cd159bd9..411c871e4e 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -66,6 +66,40 @@ mod tests { assert_eq!(r, 0b01001010); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpgt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmpgt_epu64_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpgt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + let r = _mm512_mask_cmpgt_epu64_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpeq_epu64_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpeq_epu64_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_set_epi64() { let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); From 764bfdb687d79957057d190aafb2f45464327814 Mon Sep 17 00:00:00 2001 From: Daniel Smith <daniel.smith@datadoghq.com> Date: Thu, 28 May 2020 22:40:37 +0000 Subject: [PATCH 11/11] Add signed variants --- crates/core_arch/src/x86/avx512f.rs | 63 ++++++++++++++++++++++++++ crates/core_arch/src/x86/mod.rs | 5 ++ crates/core_arch/src/x86_64/avx512f.rs | 51 +++++++++++++++++++++ 3 files changed, 119 insertions(+) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 646a4aa2f4..421146d53d 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -157,6 +157,69 @@ pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) _mm512_cmpeq_epu64_mask(a, b) & m } +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) +} + +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epi64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) +} + +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epi64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) +} + +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpeq_epi64_mask(a, b) & m +} + #[cfg(test)] mod tests { use std; diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs index 2a47597a66..3e96478b65 100644 --- a/crates/core_arch/src/x86/mod.rs +++ b/crates/core_arch/src/x86/mod.rs @@ -518,6 +518,11 @@ pub(crate) trait m512iExt: Sized { fn as_u64x8(self) -> crate::core_arch::simd::u64x8 { unsafe { transmute(self.as_m512i()) } } + + #[inline] + fn as_i64x8(self) -> crate::core_arch::simd::i64x8 { + unsafe { transmute(self.as_m512i()) } + } } impl m512iExt for __m512i { diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 411c871e4e..ad2e29e5cc 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -100,6 +100,57 @@ mod tests { assert_eq!(r, 0b01001010); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmplt_epi64_mask(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01100110; + let r = _mm512_mask_cmplt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpgt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmpgt_epi64_mask(b, a); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpgt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01100110; + let r = _mm512_mask_cmpgt_epi64_mask(mask, b, a); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpeq_epi64_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpeq_epi64_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_set_epi64() { let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);