From 6d2e9b2520ed7fbcc85d3216bb24e44def69d353 Mon Sep 17 00:00:00 2001 From: jironglin Date: Tue, 18 Aug 2020 14:02:07 +0000 Subject: [PATCH 1/9] add some avx512f intrinsics --- crates/core_arch/avx512f.md | 107 + crates/core_arch/src/x86/avx512f.rs | 5277 ++++++++++++++++-------- crates/core_arch/src/x86_64/avx512f.rs | 552 +++ 3 files changed, 4216 insertions(+), 1720 deletions(-) create mode 100644 crates/core_arch/avx512f.md diff --git a/crates/core_arch/avx512f.md b/crates/core_arch/avx512f.md new file mode 100644 index 0000000000..567fd0e7ce --- /dev/null +++ b/crates/core_arch/avx512f.md @@ -0,0 +1,107 @@ +["AVX512F"]

+ * [x] [`_mm512_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi32&expand=5236) + * [x] [`_mm512_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi64&expand=5236) + * [x] [`_mm512_and_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_si512&expand=5236) + * [x] [`_mm512_kand`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kand&expand=5236) + * [x] [`_mm512_kor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kor&expand=5236) + * [x] [`_mm512_kxor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kxor&expand=5236) + * [x] [`_kand_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212) + * [x] [`_kor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239) + * [x] [`_kxor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291) + * [x] [`_mm512_mask_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi32&expand=5236) + * [x] [`_mm512_mask_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi64&expand=5236) + * [x] [`_mm512_mask_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi32&expand=5236) + * [x] [`_mm512_mask_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi64&expand=5236) + * [x] [`_mm512_mask_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi32&expand=5236) + * [x] [`_mm512_mask_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi64&expand=5236) + * [x] [`_mm512_mask_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi32&expand=5236) + * [x] [`_mm512_mask_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi64&expand=5236) + * [x] [`_mm512_mask_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi32&expand=5236) + * [x] [`_mm512_mask_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi64&expand=5236) + * [x] [`_mm512_mask_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi32&expand=5236) + * [x] [`_mm512_mask_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi64&expand=5236) + * [x] [`_mm512_mask_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi32&expand=5236) + * [x] [`_mm512_mask_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi64&expand=5236) + * [x] [`_mm512_mask_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi32&expand=5236) + * [x] [`_mm512_mask_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi64&expand=5236) + * [x] [`_mm512_mask_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi32&expand=5236) + * [x] [`_mm512_mask_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi64&expand=5236) + * [x] [`_mm512_mask_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi32&expand=5236) + * [x] [`_mm512_mask_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi64&expand=5236) + * [x] [`_mm512_mask_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi32&expand=5236) + * [x] [`_mm512_mask_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi64&expand=5236) + * [x] [`_mm512_mask_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi32&expand=5236) + * [x] [`_mm512_mask_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi64&expand=5236) + * [x] [`_mm512_mask_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi32&expand=5236) + * [x] [`_mm512_mask_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi64&expand=5236) + * [x] [`_mm512_mask_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi32&expand=5236) + * [x] [`_mm512_mask_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi64&expand=5236) + * [x] [`_mm512_mask_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi32&expand=5236) + * [x] [`_mm512_mask_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi64&expand=5236) + * [x] [`_mm512_mask_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi32&expand=5236) + * [x] [`_mm512_mask_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi64&expand=5236) + * [x] [`_mm512_maskz_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi32&expand=5236) + * [x] [`_mm512_maskz_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi64&expand=5236) + * [x] [`_mm512_maskz_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi32&expand=5236) + * [x] [`_mm512_maskz_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi64&expand=5236) + * [x] [`_mm512_maskz_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi32&expand=5236) + * [x] [`_mm512_maskz_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi64&expand=5236) + * [x] [`_mm512_maskz_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi32&expand=5236) + * [x] [`_mm512_maskz_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi64&expand=5236) + * [x] [`_mm512_maskz_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi32&expand=5236) + * [x] [`_mm512_maskz_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi64&expand=5236) + * [x] [`_mm512_maskz_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi32&expand=5236) + * [x] [`_mm512_maskz_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi64&expand=5236) + * [x] [`_mm512_maskz_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi32&expand=5236) + * [x] [`_mm512_maskz_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi64&expand=5236) + * [x] [`_mm512_maskz_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi32&expand=5236) + * [x] [`_mm512_maskz_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi64&expand=5236) + * [x] [`_mm512_maskz_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi32&expand=5236) + * [x] [`_mm512_maskz_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi64&expand=5236) + * [x] [`_mm512_maskz_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi32&expand=5236) + * [x] [`_mm512_maskz_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi64&expand=5236) + * [x] [`_mm512_maskz_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi32&expand=5236) + * [x] [`_mm512_maskz_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi64&expand=5236) + * [x] [`_mm512_maskz_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi32&expand=5236) + * [x] [`_mm512_maskz_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi64&expand=5236) + * [x] [`_mm512_maskz_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi32&expand=5236) + * [x] [`_mm512_maskz_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi64&expand=5236) + * [x] [`_mm512_maskz_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi32&expand=5236) + * [x] [`_mm512_maskz_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi64&expand=5236) + * [x] [`_mm512_maskz_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi32&expand=5236) + * [x] [`_mm512_maskz_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi64&expand=5236) + * [x] [`_mm512_maskz_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi32&expand=5236) + * [x] [`_mm512_maskz_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi64&expand=5236) + * [x] [`_mm512_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi32&expand=5236) + * [x] [`_mm512_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi64&expand=5236) + * [x] [`_mm512_or_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_si512&expand=5236) + * [x] [`_mm512_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi32&expand=5236) + * [x] [`_mm512_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi64&expand=5236) + * [x] [`_mm512_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi32&expand=5236) + * [x] [`_mm512_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi64&expand=5236) + * [x] [`_mm512_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi32&expand=5236) + * [x] [`_mm512_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi64&expand=5236) + * [x] [`_mm512_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi32&expand=5236) + * [x] [`_mm512_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi64&expand=5236) + * [x] [`_mm512_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi32&expand=5236) + * [x] [`_mm512_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi64&expand=5236) + * [x] [`_mm512_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi32&expand=5236) + * [x] [`_mm512_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi64&expand=5236) + * [x] [`_mm512_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi32&expand=5236) + * [x] [`_mm512_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi64&expand=5236) + * [x] [`_mm512_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi32&expand=5236) + * [x] [`_mm512_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi64&expand=5236) + * [x] [`_mm512_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi32&expand=5236) + * [x] [`_mm512_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi64&expand=5236) + * [x] [`_mm512_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi32&expand=5236) + * [x] [`_mm512_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi64&expand=5236) + * [x] [`_mm512_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi32&expand=5236) + * [x] [`_mm512_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi64&expand=5236) + * [x] [`_mm512_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi32&expand=5236) + * [x] [`_mm512_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi64&expand=5236) + * [x] [`_mm512_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi32&expand=5236) + * [x] [`_mm512_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi64&expand=5236) + * [x] [`_mm512_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi32&expand=5236) + * [x] [`_mm512_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi64&expand=5236) + * [x] [`_mm512_xor_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_si512&expand=5236) +

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 4744b435dd..dfcc7e7bc1 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -854,1646 +854,2897 @@ pub unsafe fn _mm512_mask_i64scatter_epi32( constify_imm8_gather!(scale, call); } -/// Sets packed 32-bit integers in `dst` with the supplied values. +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. /// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set_ps( - e0: f32, - e1: f32, - e2: f32, - e3: f32, - e4: f32, - e5: f32, - e6: f32, - e7: f32, - e8: f32, - e9: f32, - e10: f32, - e11: f32, - e12: f32, - e13: f32, - e14: f32, - e15: f32, -) -> __m512 { - _mm512_setr_ps( - e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, - ) +#[cfg_attr(test, assert_instr(vprold))] +pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: u8) -> __m512i { + transmute(vprold(a.as_i32x16(), imm8 as i8)) } -/// Sets packed 32-bit integers in `dst` with the supplied values in -/// reverse order. +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_setr_ps( - e0: f32, - e1: f32, - e2: f32, - e3: f32, - e4: f32, - e5: f32, - e6: f32, - e7: f32, - e8: f32, - e9: f32, - e10: f32, - e11: f32, - e12: f32, - e13: f32, - e14: f32, - e15: f32, -) -> __m512 { - let r = f32x16::new( - e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, - ); - transmute(r) +#[cfg_attr(test, assert_instr(vprold))] +pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i { + let rol = _mm512_rol_epi32(a, imm8).as_i32x16(); + transmute(simd_select_bitmask(k, rol, src.as_i32x16())) } -/// Broadcast 64-bit float `a` to all elements of `dst`. +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d { - transmute(f64x8::splat(a)) +#[cfg_attr(test, assert_instr(vprold))] +pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i { + let rol = _mm512_rol_epi32(a, imm8).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, rol, zero)) } -/// Broadcast 32-bit float `a` to all elements of `dst`. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 { - transmute(f32x16::splat(a)) +#[cfg_attr(test, assert_instr(vprord))] +pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i { + transmute(vprord(a.as_i32x16(), imm8 as i8)) } -/// Sets packed 32-bit integers in `dst` with the supplied values. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set_epi32( - e15: i32, - e14: i32, - e13: i32, - e12: i32, - e11: i32, - e10: i32, - e9: i32, - e8: i32, - e7: i32, - e6: i32, - e5: i32, - e4: i32, - e3: i32, - e2: i32, - e1: i32, - e0: i32, -) -> __m512i { - _mm512_setr_epi32( - e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, - ) +#[cfg_attr(test, assert_instr(vprord))] +pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i { + let ror = _mm512_ror_epi32(a, imm8).as_i32x16(); + transmute(simd_select_bitmask(k, ror, src.as_i32x16())) } -/// Broadcast 32-bit integer `a` to all elements of `dst`. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i { - transmute(i32x16::splat(a)) +#[cfg_attr(test, assert_instr(vprord))] +pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i { + let ror = _mm512_ror_epi32(a, imm8).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, ror, zero)) } -/// Broadcast 64-bit integer `a` to all elements of `dst`. +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { - transmute(i64x8::splat(a)) +#[cfg_attr(test, assert_instr(vprolq))] +pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i { + transmute(vprolq(a.as_i64x8(), imm8 as i8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector. +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_LT_OS) +#[cfg_attr(test, assert_instr(vprolq))] +pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i { + let rol = _mm512_rol_epi64(a, imm8).as_i64x8(); + transmute(simd_select_bitmask(k, rol, src.as_i64x8())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS) +#[cfg_attr(test, assert_instr(vprolq))] +pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i { + let rol = _mm512_rol_epi64(a, imm8).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, rol, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_NLT_US) +#[cfg_attr(test, assert_instr(vprorq))] +pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i { + transmute(vprorq(a.as_i64x8(), imm8 as i8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US) +#[cfg_attr(test, assert_instr(vprorq))] +pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i { + let ror = _mm512_ror_epi64(a, imm8).as_i64x8(); + transmute(simd_select_bitmask(k, ror, src.as_i64x8())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector. +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_LE_OS) +#[cfg_attr(test, assert_instr(vprorq))] +pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i { + let ror = _mm512_ror_epi64(a, imm8).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, ror, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS) +#[cfg_attr(test, assert_instr(vpsllid))] +pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { + transmute(vpsllid(a.as_i32x16(), imm8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_NLE_US) +#[cfg_attr(test, assert_instr(vpsllid))] +pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_slli_epi32(a, imm8).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US) +#[cfg_attr(test, assert_instr(vpsllid))] +pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_slli_epi32(a, imm8).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ) +#[cfg_attr(test, assert_instr(vpsrlid))] +pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { + transmute(vpsrlid(a.as_i32x16(), imm8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ) +#[cfg_attr(test, assert_instr(vpsrlid))] +pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_srli_epi32(a, imm8).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ) +#[cfg_attr(test, assert_instr(vpsrlid))] +pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_srli_epi32(a, imm8).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ) +#[cfg_attr(test, assert_instr(vpslliq))] +pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { + transmute(vpslliq(a.as_i64x8(), imm8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr) => { - vcmpps( - a.as_f32x16(), - b.as_f32x16(), - $imm5, - neg_one, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpslliq))] +pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_slli_epi64(a, imm8).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 { - macro_rules! call { - ($imm5:expr) => { - vcmpps( - a.as_f32x16(), - b.as_f32x16(), - $imm5, - m as i16, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpslliq))] +pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_slli_epi64(a, imm8).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2, 3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsrliq))] +pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { + transmute(vpsrliq(a.as_i64x8(), imm8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3, 4)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm512_mask_cmp_round_ps_mask( - m: __mmask16, - a: __m512, - b: __m512, - op: i32, - sae: i32, -) -> __mmask16 { - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsrliq))] +pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_srli_epi64(a, imm8).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q) +#[cfg_attr(test, assert_instr(vpsrliq))] +pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_srli_epi64(a, imm8).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q) +#[cfg_attr(test, assert_instr(vpslld))] +pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { + transmute(vpslld(a.as_i32x16(), count.as_i32x4())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q) +#[cfg_attr(test, assert_instr(vpslld))] +pub unsafe fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q) +#[cfg_attr(test, assert_instr(vpslld))] +pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_LT_OS) +#[cfg_attr(test, assert_instr(vpsrld))] +pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS) +#[cfg_attr(test, assert_instr(vpsrld))] +pub unsafe fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_NLT_US) +#[cfg_attr(test, assert_instr(vpsrld))] +pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US) +#[cfg_attr(test, assert_instr(vpsllq))] +pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector. +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi64&expand=5287) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_LE_OS) +#[cfg_attr(test, assert_instr(vpsllq))] +pub unsafe fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS) +#[cfg_attr(test, assert_instr(vpsllq))] +pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi64&expand=5501) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_NLE_US) +#[cfg_attr(test, assert_instr(vpsrlq))] +pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi64&expand=5499) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US) +#[cfg_attr(test, assert_instr(vpsrlq))] +pub unsafe fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector. +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ) +#[cfg_attr(test, assert_instr(vpsrlq))] +pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ) +#[cfg_attr(test, assert_instr(vpsrad))] +pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ) +#[cfg_attr(test, assert_instr(vpsrad))] +pub unsafe fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ) +#[cfg_attr(test, assert_instr(vpsrad))] +pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr) => { - vcmppd( - a.as_f64x8(), - b.as_f64x8(), - $imm5, - neg_one, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsraq))] +pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 { - macro_rules! call { - ($imm5:expr) => { - vcmppd( - a.as_f64x8(), - b.as_f64x8(), - $imm5, - m as i8, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsraq))] +pub unsafe fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2, 3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsraq))] +pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3, 4)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm512_mask_cmp_round_pd_mask( - m: __mmask8, - a: __m512d, - b: __m512d, - op: i32, - sae: i32, -) -> __mmask8 { - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsraid))] +pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { + transmute(vpsraid(a.as_i32x16(), imm8)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q) +#[cfg_attr(test, assert_instr(vpsraid))] +pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q) +#[cfg_attr(test, assert_instr(vpsraid))] +pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q) +#[cfg_attr(test, assert_instr(vpsraiq))] +pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { + transmute(vpsraiq(a.as_i64x8(), imm8)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q) +#[cfg_attr(test, assert_instr(vpsraiq))] +pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr) => { - vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsraiq))] +pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 { - macro_rules! call { - ($imm5:expr) => { - vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravd))] +pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2, 3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpss(a, b, $imm5, neg_one, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravd))] +pub unsafe fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3, 4)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_mask_cmp_round_ss_mask( - m: __mmask8, - a: __m128, - b: __m128, - op: i32, - sae: i32, -) -> __mmask8 { - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpss(a, b, $imm5, m as i8, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravd))] +pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr) => { - vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravq))] +pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 { - macro_rules! call { - ($imm5:expr) => { - vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravq))] +pub unsafe fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi64&expand=5473) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2, 3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpsd(a, b, $imm5, neg_one, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravq))] +pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi32&expand=4703) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3, 4)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_mask_cmp_round_sd_mask( - m: __mmask8, - a: __m128d, - b: __m128d, - op: i32, - sae: i32, -) -> __mmask8 { - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpsd(a, b, $imm5, m as i8, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vprolvd))] +pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector. +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vprolvd))] +pub unsafe fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, rol, src.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi32&expand=4702) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmplt_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vprolvd))] +pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, rol, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vprorvd))] +pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpgt_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vprorvd))] +pub unsafe fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, ror, src.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vprorvd))] +pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, ror, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmple_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vprolvq))] +pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi64&expand=4710) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vprolvq))] +pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, rol, src.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpge_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vprolvq))] +pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, rol, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector. +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vprorvq))] +pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpeq_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vprorvq))] +pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, ror, src.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector. +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vprorvq))] +pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, ror, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi32&expand=5342) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpneq_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsllvd))] +pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op. +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 { - let neg_one = -1; - macro_rules! call { - ($imm3:expr) => { - vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsllvd))] +pub unsafe fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_epu32_mask( - m: __mmask16, - a: __m512i, - b: __m512i, - op: _MM_CMPINT_ENUM, -) -> __mmask16 { - macro_rules! call { - ($imm3:expr) => { - vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsllvd))] +pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmplt_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub unsafe fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpgt_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsllvq))] +pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) } -/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpsllvq))] +pub unsafe fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmple_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsllvq))] +pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi64&expand=5563) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) } -/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpge_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub unsafe fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpeq_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpandd))] +pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector. +/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpandd))] +pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, and, src.as_i32x16())) } -/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_epi32&expand=274) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpneq_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpandd))] +pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, and, zero)) } -/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op. +/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 { - let neg_one = -1; - macro_rules! call { - ($imm3:expr) => { - vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_and(a.as_i64x8(), b.as_i64x8())) } -/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi64&expand=280) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_epi32_mask( - m: __mmask16, - a: __m512i, - b: __m512i, - op: _MM_CMPINT_ENUM, -) -> __mmask16 { - macro_rules! call { - ($imm3:expr) => { - vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, and, src.as_i64x8())) } -/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_Epi32&expand=274) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, and, zero)) } -/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmplt_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpandd))] +pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector. +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vpord))] +pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi32&expand=4040) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpgt_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpord))] +pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, or, src.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi32&expand=4041) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vpord))] +pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, or, zero)) } -/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi64&expand=4051) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmple_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_or(a.as_i64x8(), b.as_i64x8())) } -/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi64&expand=4049) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, or, src.as_i64x8())) } -/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi64&expand=4050) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpge_epu64_mask(b, a) & m +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, or, zero)) } -/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector. +/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vpord))] +pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpeq_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpxord))] +pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector. +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi32&expand=6140) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vpxord))] +pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, xor, src.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi32&expand=6141) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpneq_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpxord))] +pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, xor, zero)) } -/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op. +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi64&expand=6151) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm3:expr) => { - vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) } -/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi64&expand=6149) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_epu64_mask( - m: __mmask8, - a: __m512i, - b: __m512i, - op: _MM_CMPINT_ENUM, -) -> __mmask8 { - macro_rules! call { - ($imm3:expr) => { - vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, xor, src.as_i64x8())) } -/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi64&expand=6150) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, xor, zero)) } -/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmplt_epi64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpxord))] +pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector. +/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(kandw))] +pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + kandw(a, b) } -/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpgt_epi64_mask(a, b) & m +#[cfg_attr(test, assert_instr(kandw))] +pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { + kandw(a, b) } -/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(korw))] +pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + korw(a, b) } -/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmple_epi64_mask(a, b) & m +#[cfg_attr(test, assert_instr(korw))] +pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { + korw(a, b) } -/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(kxorw))] +pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + kxorw(a, b) } -/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpge_epi64_mask(b, a) & m +#[cfg_attr(test, assert_instr(kxorw))] +pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { + kxorw(a, b) } -/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector. + + + + + +/// Sets packed 32-bit integers in `dst` with the supplied values. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64) +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) +pub unsafe fn _mm512_set_ps( + e0: f32, + e1: f32, + e2: f32, + e3: f32, + e4: f32, + e5: f32, + e6: f32, + e7: f32, + e8: f32, + e9: f32, + e10: f32, + e11: f32, + e12: f32, + e13: f32, + e14: f32, + e15: f32, +) -> __m512 { + _mm512_setr_ps( + e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, + ) } -/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Sets packed 32-bit integers in `dst` with the supplied values in +/// reverse order. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64) +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpeq_epi64_mask(a, b) & m +pub unsafe fn _mm512_setr_ps( + e0: f32, + e1: f32, + e2: f32, + e3: f32, + e4: f32, + e5: f32, + e6: f32, + e7: f32, + e8: f32, + e9: f32, + e10: f32, + e11: f32, + e12: f32, + e13: f32, + e14: f32, + e15: f32, +) -> __m512 { + let r = f32x16::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ); + transmute(r) } -/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64) +/// Broadcast 64-bit float `a` to all elements of `dst`. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) +pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d { + transmute(f64x8::splat(a)) } -/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64) +/// Broadcast 32-bit float `a` to all elements of `dst`. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpneq_epi64_mask(a, b) & m +pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 { + transmute(f32x16::splat(a)) } -/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask) +/// Sets packed 32-bit integers in `dst` with the supplied values. #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm3:expr) => { - vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +pub unsafe fn _mm512_set_epi32( + e15: i32, + e14: i32, + e13: i32, + e12: i32, + e11: i32, + e10: i32, + e9: i32, + e8: i32, + e7: i32, + e6: i32, + e5: i32, + e4: i32, + e3: i32, + e2: i32, + e1: i32, + e0: i32, +) -> __m512i { + _mm512_setr_epi32( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ) } -/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask) +/// Broadcast 32-bit integer `a` to all elements of `dst`. #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_epi64_mask( - m: __mmask8, - a: __m512i, - b: __m512i, - op: _MM_CMPINT_ENUM, -) -> __mmask8 { - macro_rules! call { - ($imm3:expr) => { - vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i { + transmute(i32x16::splat(a)) } -/// Returns vector of type `__m512d` with undefined elements. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd) +/// Broadcast 64-bit integer `a` to all elements of `dst`. #[inline] #[target_feature(enable = "avx512f")] -// This intrinsic has no corresponding instruction. -pub unsafe fn _mm512_undefined_pd() -> __m512d { - _mm512_set1_pd(0.0) +pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { + transmute(i64x8::splat(a)) } -/// Returns vector of type `__m512` with undefined elements. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps) #[inline] #[target_feature(enable = "avx512f")] -// This intrinsic has no corresponding instruction. -pub unsafe fn _mm512_undefined_ps() -> __m512 { - _mm512_set1_ps(0.0) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_LT_OS) } -/// Loads 512-bits (composed of 8 packed double-precision (64-bit) -/// floating-point elements) from memory into result. -/// `mem_addr` does not need to be aligned on any particular boundary. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmovups))] -pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d { - ptr::read_unaligned(mem_addr as *const __m512d) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS) } -/// Stores 512-bits (composed of 8 packed double-precision (64-bit) -/// floating-point elements) from `a` into memory. -/// `mem_addr` does not need to be aligned on any particular boundary. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmovups))] -pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) { - ptr::write_unaligned(mem_addr as *mut __m512d, a); +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_NLT_US) } -/// Loads 512-bits (composed of 16 packed single-precision (32-bit) -/// floating-point elements) from memory into result. -/// `mem_addr` does not need to be aligned on any particular boundary. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmovups))] -pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 { - ptr::read_unaligned(mem_addr as *const __m512) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US) } -/// Stores 512-bits (composed of 16 packed single-precision (32-bit) -/// floating-point elements) from `a` into memory. -/// `mem_addr` does not need to be aligned on any particular boundary. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmovups))] -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) { - ptr::write_unaligned(mem_addr as *mut __m512, a); +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_LE_OS) } -/// Sets packed 64-bit integers in `dst` with the supplied values in -/// reverse order. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_setr_pd( - e0: f64, - e1: f64, - e2: f64, - e3: f64, - e4: f64, - e5: f64, - e6: f64, - e7: f64, -) -> __m512d { - let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); - transmute(r) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS) } -/// Sets packed 64-bit integers in `dst` with the supplied values. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. /// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set_pd( - e0: f64, - e1: f64, - e2: f64, - e3: f64, - e4: f64, - e5: f64, - e6: f64, - e7: f64, -) -> __m512d { - _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_NLE_US) } -/// Equal -pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00; -/// Less-than -pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01; -/// Less-than-or-equal -pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02; -/// False -pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03; -/// Not-equal -pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04; -/// Not less-than -pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05; -/// Not less-than-or-equal -pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06; -/// True -pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07; +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US) +} -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.x86.avx512.gather.dpd.512"] - fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8; - #[link_name = "llvm.x86.avx512.gather.dps.512"] - fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16; - #[link_name = "llvm.x86.avx512.gather.qpd.512"] - fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8; - #[link_name = "llvm.x86.avx512.gather.qps.512"] - fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8; - #[link_name = "llvm.x86.avx512.gather.dpq.512"] - fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8; - #[link_name = "llvm.x86.avx512.gather.dpi.512"] - fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16; - #[link_name = "llvm.x86.avx512.gather.qpq.512"] - fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8; - #[link_name = "llvm.x86.avx512.gather.qpi.512"] - fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8; +/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ) +} - #[link_name = "llvm.x86.avx512.scatter.dpd.512"] - fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.dps.512"] - fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.qpd.512"] - fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.qps.512"] - fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.dpq.512"] - fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.dpi.512"] - fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.qpq.512"] - fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.qpi.512"] - fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32); +/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ) +} - #[link_name = "llvm.x86.avx512.mask.cmp.ss"] - fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8; - #[link_name = "llvm.x86.avx512.mask.cmp.sd"] - fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8; - #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"] - fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16; - #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"] - fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8; - #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"] - fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8; - #[link_name = "llvm.x86.avx512.mask.cmp.q.512"] - fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8; - #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"] - fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; - #[link_name = "llvm.x86.avx512.mask.cmp.d.512"] - fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; +/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ) } -#[cfg(test)] -mod tests { - use std; - use stdarch_test::simd_test; +/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ) +} - use crate::core_arch::x86::*; - use crate::hint::black_box; +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr) => { + vcmpps( + a.as_f32x16(), + b.as_f32x16(), + $imm5, + neg_one, + _MM_FROUND_CUR_DIRECTION, + ) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_abs_epi32() { - #[rustfmt::skip] - let a = _mm512_setr_epi32( - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - ); - let r = _mm512_abs_epi32(a); +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 { + macro_rules! call { + ($imm5:expr) => { + vcmpps( + a.as_f32x16(), + b.as_f32x16(), + $imm5, + m as i16, + _MM_FROUND_CUR_DIRECTION, + ) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2, 3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3, 4)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm512_mask_cmp_round_ps_mask( + m: __mmask16, + a: __m512, + b: __m512, + op: i32, + sae: i32, +) -> __mmask16 { + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_LT_OS) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_NLT_US) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_LE_OS) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_NLE_US) +} + +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr) => { + vcmppd( + a.as_f64x8(), + b.as_f64x8(), + $imm5, + neg_one, + _MM_FROUND_CUR_DIRECTION, + ) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 { + macro_rules! call { + ($imm5:expr) => { + vcmppd( + a.as_f64x8(), + b.as_f64x8(), + $imm5, + m as i8, + _MM_FROUND_CUR_DIRECTION, + ) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2, 3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3, 4)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm512_mask_cmp_round_pd_mask( + m: __mmask8, + a: __m512d, + b: __m512d, + op: i32, + sae: i32, +) -> __mmask8 { + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr) => { + vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 { + macro_rules! call { + ($imm5:expr) => { + vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2, 3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpss(a, b, $imm5, neg_one, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3, 4)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_mask_cmp_round_ss_mask( + m: __mmask8, + a: __m128, + b: __m128, + op: i32, + sae: i32, +) -> __mmask8 { + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpss(a, b, $imm5, m as i8, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr) => { + vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 { + macro_rules! call { + ($imm5:expr) => { + vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2, 3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpsd(a, b, $imm5, neg_one, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3, 4)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_mask_cmp_round_sd_mask( + m: __mmask8, + a: __m128d, + b: __m128d, + op: i32, + sae: i32, +) -> __mmask8 { + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpsd(a, b, $imm5, m as i8, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmplt_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_gt(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpgt_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmple_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_ge(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpge_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_eq(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpeq_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_ne(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpneq_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 { + let neg_one = -1; + macro_rules! call { + ($imm3:expr) => { + vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_epu32_mask( + m: __mmask16, + a: __m512i, + b: __m512i, + op: _MM_CMPINT_ENUM, +) -> __mmask16 { + macro_rules! call { + ($imm3:expr) => { + vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmplt_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_gt(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpgt_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmple_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_ge(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpge_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_eq(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpeq_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_ne(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpneq_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 { + let neg_one = -1; + macro_rules! call { + ($imm3:expr) => { + vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_epi32_mask( + m: __mmask16, + a: __m512i, + b: __m512i, + op: _MM_CMPINT_ENUM, +) -> __mmask16 { + macro_rules! call { + ($imm3:expr) => { + vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmple_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpge_epu64_mask(b, a) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpeq_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpneq_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm3:expr) => { + vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_epu64_mask( + m: __mmask8, + a: __m512i, + b: __m512i, + op: _MM_CMPINT_ENUM, +) -> __mmask8 { + macro_rules! call { + ($imm3:expr) => { + vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmple_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpge_epi64_mask(b, a) & m +} + +/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpeq_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpneq_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm3:expr) => { + vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_epi64_mask( + m: __mmask8, + a: __m512i, + b: __m512i, + op: _MM_CMPINT_ENUM, +) -> __mmask8 { + macro_rules! call { + ($imm3:expr) => { + vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Returns vector of type `__m512d` with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd) +#[inline] +#[target_feature(enable = "avx512f")] +// This intrinsic has no corresponding instruction. +pub unsafe fn _mm512_undefined_pd() -> __m512d { + _mm512_set1_pd(0.0) +} + +/// Returns vector of type `__m512` with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps) +#[inline] +#[target_feature(enable = "avx512f")] +// This intrinsic has no corresponding instruction. +pub unsafe fn _mm512_undefined_ps() -> __m512 { + _mm512_set1_ps(0.0) +} + +/// Loads 512-bits (composed of 8 packed double-precision (64-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d { + ptr::read_unaligned(mem_addr as *const __m512d) +} + +/// Stores 512-bits (composed of 8 packed double-precision (64-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) { + ptr::write_unaligned(mem_addr as *mut __m512d, a); +} + +/// Loads 512-bits (composed of 16 packed single-precision (32-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 { + ptr::read_unaligned(mem_addr as *const __m512) +} + +/// Stores 512-bits (composed of 16 packed single-precision (32-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) { + ptr::write_unaligned(mem_addr as *mut __m512, a); +} + +/// Sets packed 64-bit integers in `dst` with the supplied values in +/// reverse order. +/// +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +#[inline] +#[target_feature(enable = "avx512f")] +pub unsafe fn _mm512_setr_pd( + e0: f64, + e1: f64, + e2: f64, + e3: f64, + e4: f64, + e5: f64, + e6: f64, + e7: f64, +) -> __m512d { + let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) +} + +/// Sets packed 64-bit integers in `dst` with the supplied values. +/// +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +#[inline] +#[target_feature(enable = "avx512f")] +pub unsafe fn _mm512_set_pd( + e0: f64, + e1: f64, + e2: f64, + e3: f64, + e4: f64, + e5: f64, + e6: f64, + e7: f64, +) -> __m512d { + _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) +} + +/// Equal +pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00; +/// Less-than +pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01; +/// Less-than-or-equal +pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02; +/// False +pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03; +/// Not-equal +pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04; +/// Not less-than +pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05; +/// Not less-than-or-equal +pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06; +/// True +pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.avx512.gather.dpd.512"] + fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.gather.dps.512"] + fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.gather.qpd.512"] + fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.gather.qps.512"] + fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8; + #[link_name = "llvm.x86.avx512.gather.dpq.512"] + fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.gather.dpi.512"] + fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.gather.qpq.512"] + fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.gather.qpi.512"] + fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8; + + #[link_name = "llvm.x86.avx512.scatter.dpd.512"] + fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.dps.512"] + fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qpd.512"] + fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qps.512"] + fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.dpq.512"] + fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.dpi.512"] + fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qpq.512"] + fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qpi.512"] + fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32); + + #[link_name = "llvm.x86.avx512.mask.cmp.ss"] + fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.sd"] + fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"] + fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16; + #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"] + fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8; + #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"] + fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.q.512"] + fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8; + #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"] + fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; + #[link_name = "llvm.x86.avx512.mask.cmp.d.512"] + fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; + + #[link_name = "llvm.x86.avx512.mask.prol.d.512"] + fn vprold(a: i32x16, shift: i8) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.pror.d.512"] + fn vprord(a: i32x16, shift: i8) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prol.q.512"] + fn vprolq(a: i64x8, shift: i8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.pror.q.512"] + fn vprorq(a: i64x8, shift: i8) -> i64x8; + + #[link_name = "llvm.x86.avx512.mask.prolv.d.512"] + fn vprolvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prorv.d.512"] + fn vprorvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prolv.q.512"] + fn vprolvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.prorv.q.512"] + fn vprorvq(a: i64x8, b: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.psllv.d.512"] + fn vpsllvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psrlv.d.512"] + fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psllv.q.512"] + fn vpsllvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.psrlv.q.512"] + fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.pslli.d.512"] + fn vpsllid(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx512.psrli.d.512"] + fn vpsrlid(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx512.pslli.q.512"] + fn vpslliq(a: i64x8, imm8: u32) -> i64x8; + #[link_name = "llvm.x86.avx512.psrli.q.512"] + fn vpsrliq(a: i64x8, imm8: u32) -> i64x8; + + #[link_name = "llvm.x86.avx512.psll.d.512"] + fn vpslld(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psrl.d.512"] + fn vpsrld(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psll.q.512"] + fn vpsllq(a: i64x8, count: i64x2) -> i64x8; + #[link_name = "llvm.x86.avx512.psrl.q.512"] + fn vpsrlq(a: i64x8, count: i64x2) -> i64x8; + + #[link_name = "llvm.x86.avx512.psra.d.512"] + fn vpsrad(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psra.q.512"] + fn vpsraq(a: i64x8, count: i64x2) -> i64x8; + + #[link_name = "llvm.x86.avx512.psrai.d.512"] + fn vpsraid(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx512.psrai.q.512"] + fn vpsraiq(a: i64x8, imm8: u32) -> i64x8; + + #[link_name = "llvm.x86.avx512.psrav.d.512"] + fn vpsravd(a: i32x16, count: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psrav.q.512"] + fn vpsravq(a: i64x8, count: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.kand.w"] + fn kandw(ma: u16, mb: u16) -> u16; + #[link_name = "llvm.x86.avx512.kor.w"] + fn korw(ma: u16, mb: u16) -> u16; + #[link_name = "llvm.x86.avx512.kxor.w"] + fn kxorw(ma: u16, mb: u16) -> u16; +} + +#[cfg(test)] +mod tests { + use std; + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::hint::black_box; + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm512_abs_epi32(a); + let e = _mm512_setr_epi32( + 0, + 1, + 1, + i32::MAX, + i32::MAX.wrapping_add(1), + 100, + 100, + 32, + 0, + 1, + 1, + i32::MAX, + i32::MAX.wrapping_add(1), + 100, + 100, + 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm512_mask_abs_epi32(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_abs_epi32(a, 0b11111111, a); let e = _mm512_setr_epi32( 0, 1, @@ -2503,6 +3754,31 @@ mod tests { 100, 100, 32, + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_abs_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm512_maskz_abs_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi32(0b11111111, a); + let e = _mm512_setr_epi32( 0, 1, 1, @@ -2511,945 +3787,1506 @@ mod tests { 100, 100, 32, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32gather_ps() { + let mut arr = [0f32; 256]; + for i in 0..256 { + arr[i] = i as f32; + } + // A multiplier of 4 is word-addressing + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 120, 128, 136, 144, 152, 160, 168, 176); + let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4); + #[rustfmt::skip] + assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112., + 120., 128., 136., 144., 152., 160., 168., 176.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32gather_ps() { + let mut arr = [0f32; 256]; + for i in 0..256 { + arr[i] = i as f32; + } + let src = _mm512_set1_ps(2.); + let mask = 0b10101010_10101010; + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 120, 128, 136, 144, 152, 160, 168, 176); + // A multiplier of 4 is word-addressing + let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4); + #[rustfmt::skip] + assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112., + 2., 128., 2., 144., 2., 160., 2., 176.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32gather_epi32() { + let mut arr = [0i32; 256]; + for i in 0..256 { + arr[i] = i as i32; + } + // A multiplier of 4 is word-addressing + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 120, 128, 136, 144, 152, 160, 168, 176); + let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4); + #[rustfmt::skip] + assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 120, 128, 136, 144, 152, 160, 168, 176)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32gather_epi32() { + let mut arr = [0i32; 256]; + for i in 0..256 { + arr[i] = i as i32; + } + let src = _mm512_set1_epi32(2); + let mask = 0b10101010_10101010; + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 240); + // A multiplier of 4 is word-addressing + let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4); + #[rustfmt::skip] + assert_eq_m512i(r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, + 2, 144, 2, 176, 2, 208, 2, 240)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32scatter_ps() { + let mut arr = [0f32; 256]; + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 240); + let src = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., ); - assert_eq_m512i(r, e); + // A multiplier of 4 is word-addressing + _mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4); + let mut expected = [0f32; 256]; + for i in 0..16 { + expected[i * 16] = (i + 1) as f32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32scatter_ps() { + let mut arr = [0f32; 256]; + let mask = 0b10101010_10101010; + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 240); + let src = _mm512_setr_ps( + 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., + ); + // A multiplier of 4 is word-addressing + _mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4); + let mut expected = [0f32; 256]; + for i in 0..8 { + expected[i * 32 + 16] = 2. * (i + 1) as f32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32scatter_epi32() { + let mut arr = [0i32; 256]; + #[rustfmt::skip] + + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 240); + let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + // A multiplier of 4 is word-addressing + _mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4); + let mut expected = [0i32; 256]; + for i in 0..16 { + expected[i * 16] = (i + 1) as i32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32scatter_epi32() { + let mut arr = [0i32; 256]; + let mask = 0b10101010_10101010; + #[rustfmt::skip] + let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 240); + let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + // A multiplier of 4 is word-addressing + _mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4); + let mut expected = [0i32; 256]; + for i in 0..8 { + expected[i * 32 + 16] = 2 * (i + 1) as i32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let m = _mm512_cmplt_ps_mask(a, b); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmplt_ps_mask(mask, a, b); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpnlt_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpnlt_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01111010_01111010; + assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpnle_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let m = _mm512_cmpnle_ps_mask(b, a); + assert_eq!(m, 0b00001101_00001101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpnle_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmpnle_ps_mask(mask, b, a); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., + 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01111010_01111010; + assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); + #[rustfmt::skip] + let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); + let m = _mm512_cmpeq_ps_mask(b, a); + assert_eq!(m, 0b11001101_11001101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); + #[rustfmt::skip] + let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpeq_ps_mask(mask, b, a); + assert_eq!(r, 0b01001000_01001000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpneq_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); + #[rustfmt::skip] + let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); + let m = _mm512_cmpneq_ps_mask(b, a); + assert_eq!(m, 0b00110010_00110010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); + #[rustfmt::skip] + let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., + 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpneq_ps_mask(mask, b, a); + assert_eq!(r, 0b00110010_00110010) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ); + assert_eq!(r, 0b00000100_00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_round_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_round_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., + 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); + let b = _mm512_set1_ps(-1.); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION); + assert_eq!(r, 0b00000100_00000100); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_abs_epi32() { + unsafe fn test_mm512_cmpord_ps_mask() { #[rustfmt::skip] - let a = _mm512_setr_epi32( - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - ); - let r = _mm512_mask_abs_epi32(a, 0, a); - assert_eq_m512i(r, a); - let r = _mm512_mask_abs_epi32(a, 0b11111111, a); - let e = _mm512_setr_epi32( - 0, - 1, - 1, - i32::MAX, - i32::MAX.wrapping_add(1), - 100, - 100, - 32, - 0, - 1, - -1, - i32::MAX, - i32::MIN, - 100, - -100, - -32, + let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., + f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); + #[rustfmt::skip] + let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); + let m = _mm512_cmpord_ps_mask(a, b); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpord_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., + f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); + #[rustfmt::skip] + let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); + let mask = 0b11000011_11000011; + let m = _mm512_mask_cmpord_ps_mask(mask, a, b); + assert_eq!(m, 0b00000001_00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpunord_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., + f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); + #[rustfmt::skip] + let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); + let m = _mm512_cmpunord_ps_mask(a, b); + + assert_eq!(m, 0b11111010_11111010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpunord_ps_mask() { + #[rustfmt::skip] + let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., + f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); + #[rustfmt::skip] + let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); + let mask = 0b00001111_00001111; + let m = _mm512_mask_cmpunord_ps_mask(mask, a, b); + assert_eq!(m, 0b000001010_00001010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cmp_ss_mask() { + let a = _mm_setr_ps(2., 1., 1., 1.); + let b = _mm_setr_ps(1., 2., 2., 2.); + let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cmp_ss_mask() { + let a = _mm_setr_ps(2., 1., 1., 1.); + let b = _mm_setr_ps(1., 2., 2., 2.); + let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS); + assert_eq!(m, 0); + let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cmp_round_ss_mask() { + let a = _mm_setr_ps(2., 1., 1., 1.); + let b = _mm_setr_ps(1., 2., 2., 2.); + let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cmp_round_ss_mask() { + let a = _mm_setr_ps(2., 1., 1., 1.); + let b = _mm_setr_ps(1., 2., 2., 2.); + let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); + assert_eq!(m, 0); + let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cmp_sd_mask() { + let a = _mm_setr_pd(2., 1.); + let b = _mm_setr_pd(1., 2.); + let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cmp_sd_mask() { + let a = _mm_setr_pd(2., 1.); + let b = _mm_setr_pd(1., 2.); + let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS); + assert_eq!(m, 0); + let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cmp_round_sd_mask() { + let a = _mm_setr_pd(2., 1.); + let b = _mm_setr_pd(1., 2.); + let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_mask_cmp_round_sd_mask() { + let a = _mm_setr_pd(2., 1.); + let b = _mm_setr_pd(1., 2.); + let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); + assert_eq!(m, 0); + let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); + assert_eq!(m, 1); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmplt_epu32_mask(a, b); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmplt_epu32_mask(mask, a, b); + assert_eq!(r, 0b01001010_01001010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpgt_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmpgt_epu32_mask(b, a); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpgt_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a); + assert_eq!(r, 0b01001010_01001010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + assert_eq!( + _mm512_cmple_epu32_mask(a, b), + !_mm512_cmpgt_epu32_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + assert_eq!( + _mm512_mask_cmple_epu32_mask(mask, a, b), + 0b01111010_01111010 ); - assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_maskz_abs_epi32() { + unsafe fn test_mm512_cmpge_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + assert_eq!( + _mm512_cmpge_epu32_mask(a, b), + !_mm512_cmplt_epu32_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpge_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_epu32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm512_cmpeq_epu32_mask(b, a); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epu32_mask() { #[rustfmt::skip] - let a = _mm512_setr_epi32( - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - ); - let r = _mm512_maskz_abs_epi32(0, a); - assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_abs_epi32(0b11111111, a); - let e = _mm512_setr_epi32( - 0, - 1, - 1, - i32::MAX, - i32::MAX.wrapping_add(1), - 100, - 100, - 32, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ); - assert_eq_m512i(r, e); + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a); + assert_eq!(r, 0b01001010_01001010); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_i32gather_ps() { - let mut arr = [0f32; 256]; - for i in 0..256 { - arr[i] = i as f32; - } - // A multiplier of 4 is word-addressing + unsafe fn test_mm512_cmpneq_epu32_mask() { #[rustfmt::skip] - let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, - 120, 128, 136, 144, 152, 160, 168, 176); - let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4); + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); #[rustfmt::skip] - assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112., - 120., 128., 136., 144., 152., 160., 168., 176.)); + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm512_cmpneq_epu32_mask(b, a); + assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a)); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_i32gather_ps() { - let mut arr = [0f32; 256]; - for i in 0..256 { - arr[i] = i as f32; - } - let src = _mm512_set1_ps(2.); - let mask = 0b10101010_10101010; + unsafe fn test_mm512_mask_cmpneq_epu32_mask() { #[rustfmt::skip] - let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, - 120, 128, 136, 144, 152, 160, 168, 176); - // A multiplier of 4 is word-addressing - let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4); + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100); #[rustfmt::skip] - assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112., - 2., 128., 2., 144., 2., 160., 2., 176.)); + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a); + assert_eq!(r, 0b00110010_00110010); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_i32gather_epi32() { - let mut arr = [0i32; 256]; - for i in 0..256 { - arr[i] = i as i32; - } - // A multiplier of 4 is word-addressing + unsafe fn test_mm512_cmp_epu32_mask() { #[rustfmt::skip] - let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, - 120, 128, 136, 144, 152, 160, 168, 176); - let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4); + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_epu32_mask() { #[rustfmt::skip] - assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, - 120, 128, 136, 144, 152, 160, 168, 176)); + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT); + assert_eq!(r, 0b01001010_01001010); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_i32gather_epi32() { - let mut arr = [0i32; 256]; - for i in 0..256 { - arr[i] = i as i32; - } - let src = _mm512_set1_epi32(2); - let mask = 0b10101010_10101010; + unsafe fn test_mm512_cmplt_epi32_mask() { #[rustfmt::skip] - let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, - 128, 144, 160, 176, 192, 208, 224, 240); - // A multiplier of 4 is word-addressing - let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4); + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmplt_epi32_mask(a, b); + assert_eq!(m, 0b00000101_00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epi32_mask() { #[rustfmt::skip] - assert_eq_m512i(r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, - 2, 144, 2, 176, 2, 208, 2, 240)); + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmplt_epi32_mask(mask, a, b); + assert_eq!(r, 0b00000100_00000100); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_i32scatter_ps() { - let mut arr = [0f32; 256]; + unsafe fn test_mm512_cmpgt_epi32_mask() { #[rustfmt::skip] - let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, - 128, 144, 160, 176, 192, 208, 224, 240); - let src = _mm512_setr_ps( - 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., - ); - // A multiplier of 4 is word-addressing - _mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4); - let mut expected = [0f32; 256]; - for i in 0..16 { - expected[i * 16] = (i + 1) as f32; - } - assert_eq!(&arr[..], &expected[..],); + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmpgt_epi32_mask(b, a); + assert_eq!(m, 0b00000101_00000101); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_i32scatter_ps() { - let mut arr = [0f32; 256]; - let mask = 0b10101010_10101010; + unsafe fn test_mm512_mask_cmpgt_epi32_mask() { #[rustfmt::skip] - let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, - 128, 144, 160, 176, 192, 208, 224, 240); - let src = _mm512_setr_ps( - 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., - ); - // A multiplier of 4 is word-addressing - _mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4); - let mut expected = [0f32; 256]; - for i in 0..8 { - expected[i * 32 + 16] = 2. * (i + 1) as f32; - } - assert_eq!(&arr[..], &expected[..],); + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01100110_01100110; + let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a); + assert_eq!(r, 0b00000100_00000100); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_i32scatter_epi32() { - let mut arr = [0i32; 256]; + unsafe fn test_mm512_cmple_epi32_mask() { #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + assert_eq!( + _mm512_cmple_epi32_mask(a, b), + !_mm512_cmpgt_epi32_mask(a, b) + ) + } - let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, - 128, 144, 160, 176, 192, 208, 224, 240); - let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - // A multiplier of 4 is word-addressing - _mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4); - let mut expected = [0i32; 256]; - for i in 0..16 { - expected[i * 16] = (i + 1) as i32; - } - assert_eq!(&arr[..], &expected[..],); + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_i32scatter_epi32() { - let mut arr = [0i32; 256]; - let mask = 0b10101010_10101010; + unsafe fn test_mm512_cmpge_epi32_mask() { #[rustfmt::skip] - let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, - 128, 144, 160, 176, 192, 208, 224, 240); - let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - // A multiplier of 4 is word-addressing - _mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4); - let mut expected = [0i32; 256]; - for i in 0..8 { - expected[i * 32 + 16] = 2 * (i + 1) as i32; - } - assert_eq!(&arr[..], &expected[..],); + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + assert_eq!( + _mm512_cmpge_epi32_mask(a, b), + !_mm512_cmplt_epi32_mask(a, b) + ) } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmplt_ps_mask() { + unsafe fn test_mm512_mask_cmpge_epi32_mask() { #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., - 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - let m = _mm512_cmplt_ps_mask(a, b); - assert_eq!(m, 0b00000101_00000101); + let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let mask = 0b01111010_01111010; + assert_eq!( + _mm512_mask_cmpge_epi32_mask(mask, a, b), + 0b01111010_01111010 + ); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmplt_ps_mask() { + unsafe fn test_mm512_cmpeq_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm512_cmpeq_epi32_mask(b, a); + assert_eq!(m, 0b11001111_11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epi32_mask() { + #[rustfmt::skip] + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., - 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - let mask = 0b01100110_01100110; - let r = _mm512_mask_cmplt_ps_mask(mask, a, b); - assert_eq!(r, 0b00000100_00000100); + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let mask = 0b01111010_01111010; + let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a); + assert_eq!(r, 0b01001010_01001010); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpnlt_ps_mask() { + unsafe fn test_mm512_cmpneq_epi32_mask() { #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., - 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b)); + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); + let m = _mm512_cmpneq_epi32_mask(b, a); + assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a)); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpnlt_ps_mask() { + unsafe fn test_mm512_mask_cmpneq_epi32_mask() { #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., - 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100, + 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100); + #[rustfmt::skip] + let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, + 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); let mask = 0b01111010_01111010; - assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010); + let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a); + assert_eq!(r, 0b00110010_00110010) } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpnle_ps_mask() { + unsafe fn test_mm512_cmp_epi32_mask() { #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., - 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - let m = _mm512_cmpnle_ps_mask(b, a); - assert_eq!(m, 0b00001101_00001101); + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); + let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT); + assert_eq!(m, 0b00000101_00000101); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpnle_ps_mask() { + unsafe fn test_mm512_mask_cmp_epi32_mask() { #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., - 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); + let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, + 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); + let b = _mm512_set1_epi32(-1); let mask = 0b01100110_01100110; - let r = _mm512_mask_cmpnle_ps_mask(mask, b, a); + let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT); assert_eq!(r, 0b00000100_00000100); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmple_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., - 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101); + unsafe fn test_mm512_set_epi32() { + let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i( + r, + _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), + ) } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmple_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100., - 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - let mask = 0b01111010_01111010; - assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000); + unsafe fn test_mm512_setr_epi32() { + let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i( + r, + _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), + ) } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpeq_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., - 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); - #[rustfmt::skip] - let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., - 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); - let m = _mm512_cmpeq_ps_mask(b, a); - assert_eq!(m, 0b11001101_11001101); + unsafe fn test_mm512_set1_epi32() { + let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, _mm512_set1_epi32(2)); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpeq_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., - 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); - #[rustfmt::skip] - let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., - 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); - let mask = 0b01111010_01111010; - let r = _mm512_mask_cmpeq_ps_mask(mask, b, a); - assert_eq!(r, 0b01001000_01001000); + unsafe fn test_mm512_setzero_si512() { + assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512()); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpneq_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., - 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); - #[rustfmt::skip] - let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., - 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); - let m = _mm512_cmpneq_ps_mask(b, a); - assert_eq!(m, 0b00110010_00110010); + unsafe fn test_mm512_set_ps() { + let r = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512( + r, + _mm512_set_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ), + ) } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpneq_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100., - 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.); - #[rustfmt::skip] - let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100., - 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.); - let mask = 0b01111010_01111010; - let r = _mm512_mask_cmpneq_ps_mask(mask, b, a); - assert_eq!(r, 0b00110010_00110010) + unsafe fn test_mm512_setr_ps() { + let r = _mm512_set_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512( + r, + _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ), + ) } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmp_ps_mask() { + unsafe fn test_mm512_set1_ps() { #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., - 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ); - assert_eq!(m, 0b00000101_00000101); + let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2., + 2., 2., 2., 2., 2., 2., 2., 2.); + assert_eq_m512(expected, _mm512_set1_ps(2.)); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmp_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., - 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - let mask = 0b01100110_01100110; - let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ); - assert_eq!(r, 0b00000100_00000100); + unsafe fn test_mm512_setzero_ps() { + assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.)); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmp_round_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., - 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION); - assert_eq!(m, 0b00000101_00000101); + unsafe fn test_mm512_loadu_pd() { + let a = &[4., 3., 2., 5., 8., 9., 64., 50.]; + let p = a.as_ptr(); + let r = _mm512_loadu_pd(black_box(p)); + let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.); + assert_eq_m512d(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmp_round_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100., - 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.); - let b = _mm512_set1_ps(-1.); - let mask = 0b01100110_01100110; - let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION); - assert_eq!(r, 0b00000100_00000100); + unsafe fn test_mm512_storeu_pd() { + let a = _mm512_set1_pd(9.); + let mut r = _mm512_undefined_pd(); + _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a); + assert_eq_m512d(r, a); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpord_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., - f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); - #[rustfmt::skip] - let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., - f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); - let m = _mm512_cmpord_ps_mask(a, b); - assert_eq!(m, 0b00000101_00000101); + unsafe fn test_mm512_loadu_ps() { + let a = &[ + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., + ]; + let p = a.as_ptr(); + let r = _mm512_loadu_ps(black_box(p)); + let e = _mm512_setr_ps( + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., + ); + assert_eq_m512(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpord_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., - f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); - #[rustfmt::skip] - let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., - f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); - let mask = 0b11000011_11000011; - let m = _mm512_mask_cmpord_ps_mask(mask, a, b); - assert_eq!(m, 0b00000001_00000001); + unsafe fn test_mm512_storeu_ps() { + let a = _mm512_set1_ps(9.); + let mut r = _mm512_undefined_ps(); + _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a); + assert_eq_m512(r, a); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpunord_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., - f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); - #[rustfmt::skip] - let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., - f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); - let m = _mm512_cmpunord_ps_mask(a, b); + unsafe fn test_mm512_setr_pd() { + let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.)); + } - assert_eq!(m, 0b11111010_11111010); + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_pd() { + let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.)); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpunord_ps_mask() { - #[rustfmt::skip] - let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0., - f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.); - #[rustfmt::skip] - let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0., - f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.); - let mask = 0b00001111_00001111; - let m = _mm512_mask_cmpunord_ps_mask(mask, a, b); - assert_eq!(m, 0b000001010_00001010); + unsafe fn test_mm512_rol_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_rol_epi32(a, 1); + let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm_cmp_ss_mask() { - let a = _mm_setr_ps(2., 1., 1., 1.); - let b = _mm_setr_ps(1., 2., 2., 2.); - let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS); - assert_eq!(m, 1); + unsafe fn test_mm512_mask_rol_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_mask_rol_epi32(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1); + let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm_mask_cmp_ss_mask() { - let a = _mm_setr_ps(2., 1., 1., 1.); - let b = _mm_setr_ps(1., 2., 2., 2.); - let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS); - assert_eq!(m, 0); - let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS); - assert_eq!(m, 1); + unsafe fn test_mm512_maskz_rol_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let r = _mm512_maskz_rol_epi32(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1<<0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm_cmp_round_ss_mask() { - let a = _mm_setr_ps(2., 1., 1., 1.); - let b = _mm_setr_ps(1., 2., 2., 2.); - let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); - assert_eq!(m, 1); + unsafe fn test_mm512_ror_epi32() { + let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_ror_epi32(a, 1); + let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm_mask_cmp_round_ss_mask() { - let a = _mm_setr_ps(2., 1., 1., 1.); - let b = _mm_setr_ps(1., 2., 2., 2.); - let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); - assert_eq!(m, 0); - let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); - assert_eq!(m, 1); + unsafe fn test_mm512_mask_ror_epi32() { + let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_mask_ror_epi32(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1); + let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm_cmp_sd_mask() { - let a = _mm_setr_pd(2., 1.); - let b = _mm_setr_pd(1., 2.); - let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS); - assert_eq!(m, 1); + unsafe fn test_mm512_maskz_ror_epi32() { + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1<<0); + let r = _mm512_maskz_ror_epi32(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1<<31); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm_mask_cmp_sd_mask() { - let a = _mm_setr_pd(2., 1.); - let b = _mm_setr_pd(1., 2.); - let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS); - assert_eq!(m, 0); - let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS); - assert_eq!(m, 1); + unsafe fn test_mm512_slli_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_slli_epi32(a, 1); + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm_cmp_round_sd_mask() { - let a = _mm_setr_pd(2., 1.); - let b = _mm_setr_pd(1., 2.); - let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); - assert_eq!(m, 1); + unsafe fn test_mm512_mask_slli_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_mask_slli_epi32(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_slli_epi32(a, 0b11111111_11111111, a, 1); + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm_mask_cmp_round_sd_mask() { - let a = _mm_setr_pd(2., 1.); - let b = _mm_setr_pd(1., 2.); - let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); - assert_eq!(m, 0); - let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION); - assert_eq!(m, 1); + unsafe fn test_mm512_maskz_slli_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let r = _mm512_maskz_slli_epi32(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_slli_epi32(0b00000000_11111111, a, 1); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmplt_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let m = _mm512_cmplt_epu32_mask(a, b); - assert_eq!(m, 0b11001111_11001111); + unsafe fn test_mm512_srli_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_srli_epi32(a, 1); + let e = _mm512_set_epi32(0<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmplt_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01111010_01111010; - let r = _mm512_mask_cmplt_epu32_mask(mask, a, b); - assert_eq!(r, 0b01001010_01001010); + unsafe fn test_mm512_mask_srli_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_mask_srli_epi32(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1); + let e = _mm512_set_epi32(0<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpgt_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let m = _mm512_cmpgt_epu32_mask(b, a); - assert_eq!(m, 0b11001111_11001111); + unsafe fn test_mm512_maskz_srli_epi32() { + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0); + let r = _mm512_maskz_srli_epi32(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0<<31); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpgt_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01111010_01111010; - let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a); - assert_eq!(r, 0b01001010_01001010); + unsafe fn test_mm512_rolv_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_rolv_epi32(a, b); + + let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmple_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - assert_eq!( - _mm512_cmple_epu32_mask(a, b), - !_mm512_cmpgt_epu32_mask(a, b) - ) + unsafe fn test_mm512_mask_rolv_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1,1,1,1); + + let r = _mm512_mask_rolv_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b); + + let e = _mm512_set_epi32(1<<0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmple_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01111010_01111010; - assert_eq!( - _mm512_mask_cmple_epu32_mask(mask, a, b), - 0b01111010_01111010 - ); + unsafe fn test_mm512_maskz_rolv_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_maskz_rolv_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b); + + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1<<0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpge_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - assert_eq!( - _mm512_cmpge_epu32_mask(a, b), - !_mm512_cmplt_epu32_mask(a, b) - ) + unsafe fn test_mm512_rorv_epi32() { + let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_rorv_epi32(a, b); + + let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpge_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01111010_01111010; - assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000); + unsafe fn test_mm512_mask_rorv_epi32() { + let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_mask_rorv_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b); + + let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpeq_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - #[rustfmt::skip] - let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, - 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); - let m = _mm512_cmpeq_epu32_mask(b, a); - assert_eq!(m, 0b11001111_11001111); + unsafe fn test_mm512_maskz_rorv_epi32() { + let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1<<0); + let b = _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_maskz_rorv_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b); + + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1<<31); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpeq_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - #[rustfmt::skip] - let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, - 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); - let mask = 0b01111010_01111010; - let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a); - assert_eq!(r, 0b01001010_01001010); + unsafe fn test_mm512_sllv_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_sllv_epi32(a, count); + + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpneq_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - #[rustfmt::skip] - let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, - 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); - let m = _mm512_cmpneq_epu32_mask(b, a); - assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a)); + unsafe fn test_mm512_mask_sllv_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_mask_sllv_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count); + + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpneq_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100); - #[rustfmt::skip] - let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, - 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); - let mask = 0b01111010_01111010; - let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a); - assert_eq!(r, 0b00110010_00110010); + unsafe fn test_mm512_maskz_sllv_epi32() { + let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_maskz_sllv_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count); + + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmp_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT); - assert_eq!(m, 0b11001111_11001111); + unsafe fn test_mm512_srlv_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_srlv_epi32(a, count); + + let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmp_epu32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01111010_01111010; - let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT); - assert_eq!(r, 0b01001010_01001010); + unsafe fn test_mm512_mask_srlv_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_mask_srlv_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count); + + let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmplt_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let m = _mm512_cmplt_epi32_mask(a, b); - assert_eq!(m, 0b00000101_00000101); + unsafe fn test_mm512_maskz_srlv_epi32() { + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0); + let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_maskz_srlv_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count); + + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sll_epi32() { + let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_sll_epi32(a, count); + let e = _mm512_set_epi32(0, 1<<2, 1<<3, 1<<4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmplt_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01100110_01100110; - let r = _mm512_mask_cmplt_epi32_mask(mask, a, b); - assert_eq!(r, 0b00000100_00000100); + unsafe fn test_mm512_mask_sll_epi32() { + let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_mask_sll_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(0, 1<<2, 1<<3, 1<<4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpgt_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let m = _mm512_cmpgt_epi32_mask(b, a); - assert_eq!(m, 0b00000101_00000101); + unsafe fn test_mm512_maskz_sll_epi32() { + let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<31); + let count = _mm_set_epi32(2, 0, 0, 2); + let r = _mm512_maskz_sll_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpgt_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01100110_01100110; - let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a); - assert_eq!(r, 0b00000100_00000100); + unsafe fn test_mm512_srl_epi32() { + let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_srl_epi32(a, count); + let e = _mm512_set_epi32(1<<29, 0, 0, 1<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmple_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - assert_eq!( - _mm512_cmple_epi32_mask(a, b), - !_mm512_cmpgt_epi32_mask(a, b) - ) + unsafe fn test_mm512_mask_srl_epi32() { + let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_mask_srl_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(1<<29, 0, 0, 1<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmple_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01111010_01111010; - assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000); + unsafe fn test_mm512_maskz_srl_epi32() { + let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<31); + let count = _mm_set_epi32(2, 0, 0, 2); + let r = _mm512_maskz_srl_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<29); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpge_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - assert_eq!( - _mm512_cmpge_epi32_mask(a, b), - !_mm512_cmplt_epi32_mask(a, b) - ) + unsafe fn test_mm512_sra_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let count = _mm_set_epi32(1, 0, 0, 2); + let r = _mm512_sra_epi32(a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpge_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01111010_01111010; - assert_eq!( - _mm512_mask_cmpge_epi32_mask(mask, a, b), - 0b01111010_01111010 - ); + unsafe fn test_mm512_mask_sra_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_mask_sra_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpeq_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); - #[rustfmt::skip] - let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, - 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); - let m = _mm512_cmpeq_epi32_mask(b, a); - assert_eq!(m, 0b11001111_11001111); + unsafe fn test_mm512_maskz_sra_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14); + let count = _mm_set_epi32(2, 0, 0, 2); + let r = _mm512_maskz_sra_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4 ); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpeq_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); - #[rustfmt::skip] - let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, - 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); - let mask = 0b01111010_01111010; - let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a); - assert_eq!(r, 0b01001010_01001010); + unsafe fn test_mm512_srav_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r = _mm512_srav_epi32(a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmpneq_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); - #[rustfmt::skip] - let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, - 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); - let m = _mm512_cmpneq_epi32_mask(b, a); - assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a)); + unsafe fn test_mm512_mask_srav_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16); + let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let r = _mm512_mask_srav_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmpneq_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100, - 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100); - #[rustfmt::skip] - let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100, - 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100); - let mask = 0b01111010_01111010; - let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a); - assert_eq!(r, 0b00110010_00110010) + unsafe fn test_mm512_maskz_srav_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14); + let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2); + let r = _mm512_maskz_srav_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srai_epi32() { + let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15); + let r = _mm512_srai_epi32(a, 2); + let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cmp_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT); - assert_eq!(m, 0b00000101_00000101); + unsafe fn test_mm512_mask_srai_epi32() { + let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); + let r = _mm512_mask_srai_epi32(a, 0, a, 2); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2); + let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cmp_epi32_mask() { - #[rustfmt::skip] - let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100, - 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100); - let b = _mm512_set1_epi32(-1); - let mask = 0b01100110_01100110; - let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT); - assert_eq!(r, 0b00000100_00000100); + unsafe fn test_mm512_maskz_srai_epi32() { + let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); + let r = _mm512_maskz_srai_epi32(0, a, 2); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_epi32() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_and_epi32(a, b); + let e = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_set_epi32() { - let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - assert_eq_m512i( - r, - _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), - ) + unsafe fn test_mm512_mask_and_epi32() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_mask_and_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b); + let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_setr_epi32() { - let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - assert_eq_m512i( - r, - _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), - ) + unsafe fn test_mm512_maskz_and_epi32() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_maskz_and_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_si512() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_and_epi32(a, b); + let e = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_set1_epi32() { - let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - assert_eq_m512i(r, _mm512_set1_epi32(2)); + unsafe fn test_mm512_or_epi32() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_or_epi32(a, b); + let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_setzero_si512() { - assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512()); + unsafe fn test_mm512_mask_or_epi32() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_mask_or_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b); + let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_set_ps() { - let r = _mm512_setr_ps( - 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., - ); - assert_eq_m512( - r, - _mm512_set_ps( - 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., - ), - ) + unsafe fn test_mm512_maskz_or_epi32() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_maskz_or_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_si512() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_or_epi32(a, b); + let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_setr_ps() { - let r = _mm512_set_ps( - 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., - ); - assert_eq_m512( - r, - _mm512_setr_ps( - 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., - ), - ) + unsafe fn test_mm512_xor_epi32() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_xor_epi32(a, b); + let e = _mm512_set_epi32(1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_set1_ps() { - #[rustfmt::skip] - let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2., - 2., 2., 2., 2., 2., 2., 2., 2.); - assert_eq_m512(expected, _mm512_set1_ps(2.)); + unsafe fn test_mm512_mask_xor_epi32() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_mask_xor_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b); + let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_setzero_ps() { - assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.)); + unsafe fn test_mm512_maskz_xor_epi32() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_maskz_xor_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_si512() { + let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); + let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let r = _mm512_xor_epi32(a, b); + let e = _mm512_set_epi32(1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4); + assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_loadu_pd() { - let a = &[4., 3., 2., 5., 8., 9., 64., 50.]; - let p = a.as_ptr(); - let r = _mm512_loadu_pd(black_box(p)); - let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.); - assert_eq_m512d(r, e); + unsafe fn test_mm512_kand() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b11001100_00110011; + let r = _mm512_kand(a, b); + let e: u16 = 0b11001100_00110011; + assert_eq!(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_storeu_pd() { - let a = _mm512_set1_pd(9.); - let mut r = _mm512_undefined_pd(); - _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a); - assert_eq_m512d(r, a); + unsafe fn test_kand_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b11001100_00110011; + let r = _kand_mask16(a, b); + let e: u16 = 0b11001100_00110011; + assert_eq!(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_loadu_ps() { - let a = &[ - 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., - ]; - let p = a.as_ptr(); - let r = _mm512_loadu_ps(black_box(p)); - let e = _mm512_setr_ps( - 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., - ); - assert_eq_m512(r, e); + unsafe fn test_mm512_kor() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kor(a, b); + let e: u16 = 0b11101110_00111011; + assert_eq!(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_storeu_ps() { - let a = _mm512_set1_ps(9.); - let mut r = _mm512_undefined_ps(); - _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a); - assert_eq_m512(r, a); + unsafe fn test_kor_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _kor_mask16(a, b); + let e: u16 = 0b11101110_00111011; + assert_eq!(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_setr_pd() { - let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); - assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.)); + unsafe fn test_mm512_kxor() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kxor(a, b); + let e: u16 = 0b11100010_00111000; + assert_eq!(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_set_pd() { - let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); - assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.)); + unsafe fn test_kxor_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _kxor_mask16(a, b); + let e: u16 = 0b11100010_00111000; + assert_eq!(r, e); } + } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index ce2970ee51..da728413e7 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -41,6 +41,8 @@ pub unsafe fn _mm512_setr_epi64( transmute(r) } + + #[cfg(test)] mod tests { use std; @@ -854,4 +856,554 @@ mod tests { } assert_eq!(&arr[..], &expected[..],); } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rol_epi64() { + let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let r = _mm512_rol_epi64(a, 1); + let e = _mm512_set_epi64( 1<<0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rol_epi64() { + let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let r = _mm512_mask_rol_epi64(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rol_epi64(a, 0b11111111, a, 1); + let e = _mm512_set_epi64( 1<<0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rol_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 ); + let r = _mm512_maskz_rol_epi64(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rol_epi64(0b00001111, a, 1); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 1<<0 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_ror_epi64() { + let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let r = _mm512_ror_epi64(a, 1); + let e = _mm512_set_epi64( 1<<63, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_ror_epi64() { + let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let r = _mm512_mask_ror_epi64(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_ror_epi64(a, 0b11111111, a, 1); + let e = _mm512_set_epi64( 1<<63, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_ror_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); + let r = _mm512_maskz_ror_epi64(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_ror_epi64(0b00001111, a, 1); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 1<<63 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_slli_epi64() { + let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let r = _mm512_slli_epi64(a, 1); + let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_slli_epi64() { + let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let r = _mm512_mask_slli_epi64(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_slli_epi64(a, 0b11111111, a, 1); + let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_slli_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 ); + let r = _mm512_maskz_slli_epi64(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_slli_epi64(0b00001111, a, 1); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 0 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srli_epi64() { + let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let r = _mm512_srli_epi64(a, 1); + let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srli_epi64() { + let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let r = _mm512_mask_srli_epi64(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srli_epi64(a, 0b11111111, a, 1); + let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srli_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); + let r = _mm512_maskz_srli_epi64(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srli_epi64(0b00001111, a, 1); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 0 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rolv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let r = _mm512_rolv_epi64(a, b); + let e = _mm512_set_epi64( 1<<32, 1<<0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rolv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let r = _mm512_mask_rolv_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rolv_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64( 1<<32, 1<<0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rolv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<62 ); + let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 2 ); + let r = _mm512_maskz_rolv_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rolv_epi64(0b00001111, a, b); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<36, 1<<37, 1<<38, 1<<0 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rorv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let r = _mm512_rorv_epi64(a, b); + let e = _mm512_set_epi64( 1<<32, 1<<63, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rorv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let r = _mm512_mask_rorv_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rorv_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64( 1<<32, 1<<63, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rorv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); + let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 2 ); + let r = _mm512_maskz_rorv_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rorv_epi64(0b00001111, a, b); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<28, 1<<27, 1<<26, 1<<62 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sllv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let count = _mm512_set_epi64( 0, 2, 2, 3, 4, 5, 6, 7 ); + let r = _mm512_sllv_epi64(a, count); + let e = _mm512_set_epi64( 1<<32, 0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sllv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let r = _mm512_mask_sllv_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sllv_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64( 1<<32, 1<<33, 0, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sllv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 ); + let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 1 ); + let r = _mm512_maskz_sllv_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sllv_epi64(0b00001111, a, count); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<36, 1<<37, 1<<38, 0 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srlv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let r = _mm512_srlv_epi64(a, count); + let e = _mm512_set_epi64( 1<<32, 0, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srlv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let r = _mm512_mask_srlv_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srlv_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64( 1<<32, 0, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srlv_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); + let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let r = _mm512_maskz_srlv_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srlv_epi64(0b00001111, a, count); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<28, 1<<27, 1<<26, 0 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sll_epi64() { + let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_sll_epi64(a, count); + let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + assert_eq_m512i(r, e); + + let count = _mm_set_epi64x(1, 0); + let r = _mm512_sll_epi64(a, count); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sll_epi64() { + let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_mask_sll_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sll_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sll_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_maskz_sll_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sll_epi64(0b00001111, a, count); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 0 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srl_epi64() { + let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_srl_epi64(a, count); + let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srl_epi64() { + let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_mask_srl_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srl_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srl_epi64() { + let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_maskz_srl_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srl_epi64(0b00001111, a, count); + let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 0 ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_sra_epi64(a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_mask_sra_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sra_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_maskz_sra_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sra_epi64(0b00001111, a, count); + let e = _mm512_set_epi64( 0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_srav_epi64(a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_mask_srav_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srav_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_maskz_srav_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srav_epi64(0b00001111, a, count); + let e = _mm512_set_epi64( 0, 0, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_srai_epi64(a, 2); + let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_mask_srai_epi64(a, 0, a, 2); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srai_epi64(a, 0b11111111, a, 2); + let e = _mm512_set_epi64( 0, -1, 3, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_maskz_srai_epi64(0, a, 2); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srai_epi64(0b00001111, a, 2); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_epi64() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_and_epi64(a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_and_epi64() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_mask_and_epi64(a, 0, a, b); + let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + + let r = _mm512_mask_and_epi64(a, 0b01111111, a, b); + let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_and_epi64() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_maskz_and_epi64(0, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + + let r = _mm512_maskz_and_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_si512() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_and_epi64(a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_epi64() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_or_epi64(a, b); + let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_or_epi64() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_mask_or_epi64(a, 0, a, b); + let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + + let r = _mm512_mask_or_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_or_epi64() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_maskz_or_epi64(0, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + + let r = _mm512_maskz_or_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_si512() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_or_epi64(a, b); + let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_epi64() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_xor_epi64(a, b); + let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_xor_epi64() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_mask_xor_epi64(a, 0, a, b); + let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + assert_eq_m512i(r, e); + + let r = _mm512_mask_xor_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_xor_epi64() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_maskz_xor_epi64(0, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + + let r = _mm512_maskz_xor_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_si512() { + let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let r = _mm512_xor_epi64(a, b); + let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } } From a77dfebdecbb7e3194bddfe9fed1531cbb516e03 Mon Sep 17 00:00:00 2001 From: jironglin Date: Wed, 19 Aug 2020 23:45:29 +0000 Subject: [PATCH 2/9] add rustc_args_required_const --- crates/core_arch/src/x86/avx512f.rs | 145 ++++++++++++++++++++++++- crates/core_arch/src/x86_64/avx512f.rs | 66 +++++++++++ 2 files changed, 206 insertions(+), 5 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index dfcc7e7bc1..72e4fd4017 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -49,6 +49,44 @@ pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { transmute(simd_select_bitmask(k, abs, zero)) } +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_epi64&expand=48) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i { + let a = a.as_i64x8(); + // all-0 is a properly initialized i64x8 + let zero: i64x8 = mem::zeroed(); + let sub = simd_sub(zero, a); + let cmp: i64x8 = simd_gt(a, zero); + transmute(simd_select(cmp, a, sub)) +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_epi64&expand=49) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + let abs = _mm512_abs_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, abs, src.as_i64x8())) +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_abs_epi64&expand=50) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { + let abs = _mm512_abs_epi64(a).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, abs, zero)) +} + /// Returns vector of type `__m512d` with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd) @@ -893,6 +931,7 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m5 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprord))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i { transmute(vprord(a.as_i32x16(), imm8 as i8)) } @@ -903,6 +942,7 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprord))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i { let ror = _mm512_ror_epi32(a, imm8).as_i32x16(); transmute(simd_select_bitmask(k, ror, src.as_i32x16())) @@ -914,6 +954,7 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprord))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i { let ror = _mm512_ror_epi32(a, imm8).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -926,6 +967,7 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m5 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i { transmute(vprolq(a.as_i64x8(), imm8 as i8)) } @@ -936,6 +978,7 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i { let rol = _mm512_rol_epi64(a, imm8).as_i64x8(); transmute(simd_select_bitmask(k, rol, src.as_i64x8())) @@ -947,6 +990,7 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i { let rol = _mm512_rol_epi64(a, imm8).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -959,6 +1003,7 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i { transmute(vprorq(a.as_i64x8(), imm8 as i8)) } @@ -969,6 +1014,7 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i { let ror = _mm512_ror_epi64(a, imm8).as_i64x8(); transmute(simd_select_bitmask(k, ror, src.as_i64x8())) @@ -980,6 +1026,7 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i { let ror = _mm512_ror_epi64(a, imm8).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -992,6 +1039,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllid))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { transmute(vpsllid(a.as_i32x16(), imm8)) } @@ -1002,6 +1050,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllid))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_slli_epi32(a, imm8).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -1013,6 +1062,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllid))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_slli_epi32(a, imm8).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1025,6 +1075,7 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlid))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { transmute(vpsrlid(a.as_i32x16(), imm8)) } @@ -1035,6 +1086,7 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlid))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_srli_epi32(a, imm8).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -1046,6 +1098,7 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlid))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_srli_epi32(a, imm8).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1058,6 +1111,7 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslliq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { transmute(vpslliq(a.as_i64x8(), imm8)) } @@ -1068,6 +1122,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslliq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_slli_epi64(a, imm8).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -1079,6 +1134,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslliq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_slli_epi64(a, imm8).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1091,6 +1147,7 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrliq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { transmute(vpsrliq(a.as_i64x8(), imm8)) } @@ -1101,6 +1158,7 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrliq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_srli_epi64(a, imm8).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -1112,6 +1170,7 @@ pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrliq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_srli_epi64(a, imm8).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1124,6 +1183,7 @@ pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslld))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) } @@ -1134,6 +1194,7 @@ pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslld))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sll_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -1145,6 +1206,7 @@ pub unsafe fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, coun #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslld))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sll_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1157,6 +1219,7 @@ pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrld))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) } @@ -1167,6 +1230,7 @@ pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrld))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_srl_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -1178,6 +1242,7 @@ pub unsafe fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, coun #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrld))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_srl_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1190,6 +1255,7 @@ pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) } @@ -1200,6 +1266,7 @@ pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sll_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -1211,6 +1278,7 @@ pub unsafe fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sll_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1223,6 +1291,7 @@ pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) } @@ -1233,6 +1302,7 @@ pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_srl_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -1244,6 +1314,7 @@ pub unsafe fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_srl_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1256,6 +1327,7 @@ pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrad))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) } @@ -1266,6 +1338,7 @@ pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrad))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sra_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -1277,6 +1350,7 @@ pub unsafe fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, coun #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrad))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sra_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1289,6 +1363,7 @@ pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) } @@ -1299,6 +1374,7 @@ pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sra_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -1310,6 +1386,7 @@ pub unsafe fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sra_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1322,6 +1399,7 @@ pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraid))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { transmute(vpsraid(a.as_i32x16(), imm8)) } @@ -1332,6 +1410,7 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraid))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -1343,6 +1422,7 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraid))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1355,6 +1435,7 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraiq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { transmute(vpsraiq(a.as_i64x8(), imm8)) } @@ -1365,6 +1446,7 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraiq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -1376,6 +1458,7 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraiq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1388,6 +1471,7 @@ pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravd))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) } @@ -1398,6 +1482,7 @@ pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravd))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srav_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -1409,6 +1494,7 @@ pub unsafe fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, cou #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravd))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srav_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1421,6 +1507,7 @@ pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) } @@ -1431,6 +1518,7 @@ pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srav_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -1442,6 +1530,7 @@ pub unsafe fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, coun #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srav_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1454,6 +1543,7 @@ pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvd))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) } @@ -1464,6 +1554,7 @@ pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvd))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let rol = _mm512_rolv_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, rol, src.as_i32x16())) @@ -1475,6 +1566,7 @@ pub unsafe fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvd))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let rol = _mm512_rolv_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1487,6 +1579,7 @@ pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvd))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) } @@ -1497,6 +1590,7 @@ pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvd))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let ror = _mm512_rorv_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, ror, src.as_i32x16())) @@ -1508,6 +1602,7 @@ pub unsafe fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvd))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let ror = _mm512_rorv_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1520,6 +1615,7 @@ pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) } @@ -1530,6 +1626,7 @@ pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let rol = _mm512_rolv_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, rol, src.as_i64x8())) @@ -1541,6 +1638,7 @@ pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let rol = _mm512_rolv_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1553,6 +1651,7 @@ pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) } @@ -1563,6 +1662,7 @@ pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let ror = _mm512_rorv_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, ror, src.as_i64x8())) @@ -1574,6 +1674,7 @@ pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let ror = _mm512_rorv_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1586,6 +1687,7 @@ pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvd))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) } @@ -1596,6 +1698,7 @@ pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvd))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_sllv_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -1607,6 +1710,7 @@ pub unsafe fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, cou #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvd))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_sllv_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1619,6 +1723,7 @@ pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvd))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) } @@ -1629,6 +1734,7 @@ pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvd))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srlv_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -1640,6 +1746,7 @@ pub unsafe fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, cou #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvd))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srlv_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1652,6 +1759,7 @@ pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) } @@ -1662,6 +1770,7 @@ pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_sllv_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -1673,6 +1782,7 @@ pub unsafe fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, coun #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_sllv_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1685,6 +1795,7 @@ pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) } @@ -1695,6 +1806,7 @@ pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srlv_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -1706,6 +1818,7 @@ pub unsafe fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, coun #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srlv_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1718,6 +1831,7 @@ pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandd))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } @@ -1728,6 +1842,7 @@ pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandd))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, and, src.as_i32x16())) @@ -1739,6 +1854,7 @@ pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandd))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1751,6 +1867,7 @@ pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) } @@ -1761,6 +1878,7 @@ pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, and, src.as_i64x8())) @@ -1772,6 +1890,7 @@ pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1784,6 +1903,7 @@ pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandd))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } @@ -1794,6 +1914,7 @@ pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpord))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } @@ -1804,6 +1925,7 @@ pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpord))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, or, src.as_i32x16())) @@ -1815,6 +1937,7 @@ pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpord))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1827,6 +1950,7 @@ pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vporq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) } @@ -1837,6 +1961,7 @@ pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vporq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, or, src.as_i64x8())) @@ -1848,6 +1973,7 @@ pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vporq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1860,6 +1986,7 @@ pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m5 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpord))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } @@ -1870,6 +1997,7 @@ pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxord))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } @@ -1880,6 +2008,7 @@ pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxord))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, xor, src.as_i32x16())) @@ -1891,6 +2020,7 @@ pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxord))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1903,6 +2033,7 @@ pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxorq))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) } @@ -1913,6 +2044,7 @@ pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxorq))] +#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, xor, src.as_i64x8())) @@ -1924,6 +2056,7 @@ pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxorq))] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1936,6 +2069,7 @@ pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxord))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } @@ -1946,6 +2080,7 @@ pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(kandw))] +#[rustc_args_required_const(2)] pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { kandw(a, b) } @@ -1956,6 +2091,7 @@ pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(kandw))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { kandw(a, b) } @@ -1966,6 +2102,7 @@ pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(korw))] +#[rustc_args_required_const(2)] pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { korw(a, b) } @@ -1976,6 +2113,7 @@ pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(korw))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { korw(a, b) } @@ -1986,6 +2124,7 @@ pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(kxorw))] +#[rustc_args_required_const(2)] pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { kxorw(a, b) } @@ -1996,15 +2135,11 @@ pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(kxorw))] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { kxorw(a, b) } - - - - - /// Sets packed 32-bit integers in `dst` with the supplied values. /// /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index da728413e7..74077c8023 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -51,6 +51,72 @@ mod tests { use crate::core_arch::x86::*; use crate::core_arch::x86_64::*; + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_epi64() { + let a = _mm512_setr_epi64( + 0, 1, -1, i64::MAX, + i64::MIN, 100, -100, -32 + ); + let r = _mm512_abs_epi64(a); + let e = _mm512_setr_epi64( + 0, + 1, + 1, + i64::MAX, + i64::MAX.wrapping_add(1), + 100, + 100, + 32 + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_epi64() { + #[rustfmt::skip] + let a = _mm512_setr_epi64( + 0, 1, -1, i64::MAX, + i64::MIN, 100, -100, -32 + ); + let r = _mm512_mask_abs_epi64(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_abs_epi64(a, 0b11111111, a); + let e = _mm512_setr_epi64( + 0, + 1, + 1, + i64::MAX, + i64::MAX.wrapping_add(1), + 100, + 100, + 32 + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_abs_epi64() { + #[rustfmt::skip] + let a = _mm512_setr_epi64( + 0, 1, -1, i64::MAX, + i64::MIN, 100, -100, -32 + ); + let r = _mm512_maskz_abs_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi64(0b01111111, a); + let e = _mm512_setr_epi64( + 0, + 1, + 1, + i64::MAX, + i64::MAX.wrapping_add(1), + 100, + 100, + 0 + ); + assert_eq_m512i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_setzero_pd() { assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.)); From 8c74b076120e1acc87a101645cc4a74ad1b3b6fc Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 20 Aug 2020 13:10:34 +0000 Subject: [PATCH 3/9] run cargo fmt to fix the CI errors --- crates/core_arch/src/aarch64/neon/mod.rs | 12 +- crates/core_arch/src/arm/neon/mod.rs | 10 +- crates/core_arch/src/simd.rs | 678 ++++++++++++-- crates/core_arch/src/x86/avx512f.rs | 1081 ++++++++++++++++++---- crates/core_arch/src/x86_64/avx512f.rs | 753 ++++++++++++--- 5 files changed, 2110 insertions(+), 424 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 0c73e5935d..adc653e31c 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -88,7 +88,7 @@ extern "C" { fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; #[link_name = "llvm.aarch64.neon.addp.v16i8"] fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - + #[link_name = "llvm.aarch64.neon.saddv.i32.v4i16"] fn vaddv_s16_(a: int16x4_t) -> i16; #[link_name = "llvm.aarch64.neon.saddv.i32.v2i32"] @@ -1826,9 +1826,13 @@ mod tests { #[simd_test(enable = "neon")] unsafe fn test_vpaddq_s8() { let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let b = i8x16::new(0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15); + let b = i8x16::new( + 0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15, + ); let r: i8x16 = transmute(vpaddq_s8(transmute(a), transmute(b))); - let e = i8x16::new(3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29); + let e = i8x16::new( + 3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29, + ); assert_eq!(r, e); } #[simd_test(enable = "neon")] @@ -2829,7 +2833,7 @@ mod tests { let e = i64x2::new(i64::MIN, i64::MAX); assert_eq!(r, e); } - + #[simd_test(enable = "neon")] unsafe fn test_vaddv_s16() { let a = i16x4::new(1, 2, 3, -4); diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/arm/neon/mod.rs index c006ea70d4..43fa753cc3 100644 --- a/crates/core_arch/src/arm/neon/mod.rs +++ b/crates/core_arch/src/arm/neon/mod.rs @@ -175,7 +175,7 @@ extern "C" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")] fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v4i16")] fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; @@ -299,7 +299,7 @@ pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vpadd_s8_(a,b) + vpadd_s8_(a, b) } /// Add pairwise. #[inline] @@ -308,7 +308,7 @@ pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - transmute(vpadd_s16_(transmute(a),transmute(b))) + transmute(vpadd_s16_(transmute(a), transmute(b))) } /// Add pairwise. #[inline] @@ -317,7 +317,7 @@ pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - transmute(vpadd_s32_(transmute(a),transmute(b))) + transmute(vpadd_s32_(transmute(a), transmute(b))) } /// Add pairwise. #[inline] @@ -326,7 +326,7 @@ pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - transmute(vpadd_s8_(transmute(a),transmute(b))) + transmute(vpadd_s8_(transmute(a), transmute(b))) } /// Unsigned saturating extract narrow. diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs index 202df0143c..4b71d6c2bf 100644 --- a/crates/core_arch/src/simd.rs +++ b/crates/core_arch/src/simd.rs @@ -90,16 +90,44 @@ simd_ty!(i16x2[i16]: i16, i16 | x0, x1); // 64-bit wide types: -simd_ty!(u8x8[u8]: - u8, u8, u8, u8, u8, u8, u8, u8 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + u8x8[u8]: u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3); simd_ty!(u32x2[u32]: u32, u32 | x0, x1); simd_ty!(u64x1[u64]: u64 | x1); -simd_ty!(i8x8[i8]: - i8, i8, i8, i8, i8, i8, i8, i8 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + i8x8[i8]: i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3); simd_ty!(i32x2[i32]: i32, i32 | x0, x1); simd_ty!(i64x1[i64]: i64 | x1); @@ -108,116 +136,576 @@ simd_ty!(f32x2[f32]: f32, f32 | x0, x1); // 128-bit wide types: -simd_ty!(u8x16[u8]: - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +simd_ty!( + u8x16[u8]: u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + u16x8[u16]: u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 ); -simd_ty!(u16x8[u16]: - u16, u16, u16, u16, u16, u16, u16, u16 - | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3); simd_ty!(u64x2[u64]: u64, u64 | x0, x1); -simd_ty!(i8x16[i8]: - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +simd_ty!( + i8x16[i8]: i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + i16x8[i16]: i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 ); -simd_ty!(i16x8[i16]: - i16, i16, i16, i16, i16, i16, i16, i16 - | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); simd_ty!(i64x2[i64]: i64, i64 | x0, x1); simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3); simd_ty!(f64x2[f64]: f64, f64 | x0, x1); -simd_m_ty!(m8x16[i8]: - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +simd_m_ty!( + m8x16[i8]: i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_m_ty!( + m16x8[i16]: i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 ); -simd_m_ty!(m16x8[i16]: - i16, i16, i16, i16, i16, i16, i16, i16 - | x0, x1, x2, x3, x4, x5, x6, x7); simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1); // 256-bit wide types: -simd_ty!(u8x32[u8]: - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 -); -simd_ty!(u16x16[u16]: - u16, u16, u16, u16, u16, u16, u16, u16, - u16, u16, u16, u16, u16, u16, u16, u16 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 -); -simd_ty!(u32x8[u32]: - u32, u32, u32, u32, u32, u32, u32, u32 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + u8x32[u8]: u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); +simd_ty!( + u16x16[u16]: u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + u32x8[u32]: u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3); -simd_ty!(i8x32[i8]: - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 -); -simd_ty!(i16x16[i16]: - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 -); -simd_ty!(i32x8[i32]: - i32, i32, i32, i32, i32, i32, i32, i32 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + i8x32[i8]: i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 +); +simd_ty!( + i16x16[i16]: i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + i32x8[i32]: i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3); -simd_ty!(f32x8[f32]: - f32, f32, f32, f32, f32, f32, f32, f32 | - x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + f32x8[f32]: f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); // 512-bit wide types: -simd_ty!(i32x16[i32]: - i32, i32, i32, i32, i32, i32, i32, i32, - i32, i32, i32, i32, i32, i32, i32, i32 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15); - -simd_ty!(u32x16[u32]: - u32, u32, u32, u32, u32, u32, u32, u32, - u32, u32, u32, u32, u32, u32, u32, u32 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15); - -simd_ty!(f32x16[f32]: - f32, f32, f32, f32, f32, f32, f32, f32, - f32, f32, f32, f32, f32, f32, f32, f32 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15); - -simd_ty!(i64x8[i64]: - i64, i64, i64, i64, i64, i64, i64, i64 - | x0, x1, x2, x3, x4, x5, x6, x7); - -simd_ty!(u64x8[u64]: - u64, u64, u64, u64, u64, u64, u64, u64 - | x0, x1, x2, x3, x4, x5, x6, x7); - -simd_ty!(f64x8[f64]: - f64, f64, f64, f64, f64, f64, f64, f64 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + i32x16[i32]: i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); + +simd_ty!( + u32x16[u32]: u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); + +simd_ty!( + f32x16[f32]: f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); + +simd_ty!( + i64x8[i64]: i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); + +simd_ty!( + u64x8[u64]: u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); + +simd_ty!( + f64x8[f64]: f64, + f64, + f64, + f64, + f64, + f64, + f64, + f64 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 72e4fd4017..3ec8ffffa0 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -902,7 +902,7 @@ pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: u8) -> __m512i { transmute(vprold(a.as_i32x16(), imm8 as i8)) } -/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683) #[inline] @@ -913,7 +913,7 @@ pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 transmute(simd_select_bitmask(k, rol, src.as_i32x16())) } -/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684) #[inline] @@ -936,7 +936,7 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i { transmute(vprord(a.as_i32x16(), imm8 as i8)) } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719) #[inline] @@ -948,7 +948,7 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 transmute(simd_select_bitmask(k, ror, src.as_i32x16())) } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720) #[inline] @@ -972,7 +972,7 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i { transmute(vprolq(a.as_i64x8(), imm8 as i8)) } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692) #[inline] @@ -984,7 +984,7 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: transmute(simd_select_bitmask(k, rol, src.as_i64x8())) } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693) #[inline] @@ -1008,7 +1008,7 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i { transmute(vprorq(a.as_i64x8(), imm8 as i8)) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728) #[inline] @@ -1020,7 +1020,7 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: transmute(simd_select_bitmask(k, ror, src.as_i64x8())) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729) #[inline] @@ -1033,7 +1033,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51 transmute(simd_select_bitmask(k, ror, zero)) } -/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310) #[inline] @@ -1044,7 +1044,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { transmute(vpsllid(a.as_i32x16(), imm8)) } -/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308) #[inline] @@ -1056,7 +1056,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309) #[inline] @@ -1069,7 +1069,7 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522) #[inline] @@ -1080,7 +1080,7 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { transmute(vpsrlid(a.as_i32x16(), imm8)) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520) #[inline] @@ -1092,7 +1092,7 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521) #[inline] @@ -1116,7 +1116,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { transmute(vpslliq(a.as_i64x8(), imm8)) } -/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317) #[inline] @@ -1128,7 +1128,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318) #[inline] @@ -1141,7 +1141,7 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531) #[inline] @@ -1152,7 +1152,7 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { transmute(vpsrliq(a.as_i64x8(), imm8)) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529) #[inline] @@ -1177,7 +1177,7 @@ pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst. +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280) #[inline] @@ -1188,14 +1188,19 @@ pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) } -/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslld))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +pub unsafe fn _mm512_mask_sll_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { let shf = _mm512_sll_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1213,7 +1218,7 @@ pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) - transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492) #[inline] @@ -1224,14 +1229,19 @@ pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) } -/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrld))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +pub unsafe fn _mm512_mask_srl_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { let shf = _mm512_srl_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1249,7 +1259,7 @@ pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) - transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289) #[inline] @@ -1267,12 +1277,17 @@ pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllq))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +pub unsafe fn _mm512_mask_sll_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { let shf = _mm512_sll_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) #[inline] @@ -1303,7 +1318,12 @@ pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlq))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +pub unsafe fn _mm512_mask_srl_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { let shf = _mm512_srl_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -1321,7 +1341,7 @@ pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407) #[inline] @@ -1332,14 +1352,19 @@ pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) } -/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrad))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +pub unsafe fn _mm512_mask_sra_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { let shf = _mm512_sra_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1357,7 +1382,7 @@ pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) - transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416) #[inline] @@ -1368,19 +1393,24 @@ pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) } -/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraq))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +pub unsafe fn _mm512_mask_sra_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { let shf = _mm512_sra_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415) #[inline] @@ -1404,7 +1434,7 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { transmute(vpsraid(a.as_i32x16(), imm8)) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434) #[inline] @@ -1416,7 +1446,7 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435) #[inline] @@ -1429,7 +1459,7 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445) #[inline] @@ -1440,7 +1470,7 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { transmute(vpsraiq(a.as_i64x8(), imm8)) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443) #[inline] @@ -1452,7 +1482,7 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444) #[inline] @@ -1465,7 +1495,7 @@ pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465) #[inline] @@ -1476,14 +1506,19 @@ pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravd))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { +pub unsafe fn _mm512_mask_srav_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m512i, +) -> __m512i { let shf = _mm512_srav_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1501,7 +1536,7 @@ pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474) #[inline] @@ -1512,14 +1547,19 @@ pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) } -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravq))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { +pub unsafe fn _mm512_mask_srav_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m512i, +) -> __m512i { let shf = _mm512_srav_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -1548,14 +1588,19 @@ pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) } -/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvd))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { +pub unsafe fn _mm512_mask_rolv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { let rol = _mm512_rolv_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, rol, src.as_i32x16())) } @@ -1573,7 +1618,7 @@ pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _ transmute(simd_select_bitmask(k, rol, zero)) } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739) #[inline] @@ -1584,19 +1629,24 @@ pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvd))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { +pub unsafe fn _mm512_mask_rorv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { let ror = _mm512_rorv_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, ror, src.as_i32x16())) } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738) #[inline] @@ -1609,7 +1659,7 @@ pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _ transmute(simd_select_bitmask(k, ror, zero)) } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712) #[inline] @@ -1632,7 +1682,7 @@ pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ transmute(simd_select_bitmask(k, rol, src.as_i64x8())) } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711) #[inline] @@ -1645,7 +1695,7 @@ pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __ transmute(simd_select_bitmask(k, rol, zero)) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748) #[inline] @@ -1656,7 +1706,7 @@ pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746) #[inline] @@ -1668,7 +1718,7 @@ pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ transmute(simd_select_bitmask(k, ror, src.as_i64x8())) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747) #[inline] @@ -1692,19 +1742,24 @@ pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) } -/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvd))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { +pub unsafe fn _mm512_mask_sllv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m512i, +) -> __m512i { let shf = _mm512_sllv_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341) #[inline] @@ -1717,7 +1772,7 @@ pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554) #[inline] @@ -1728,19 +1783,24 @@ pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvd))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { +pub unsafe fn _mm512_mask_srlv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m512i, +) -> __m512i { let shf = _mm512_srlv_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553) #[inline] @@ -1753,7 +1813,7 @@ pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351) #[inline] @@ -1764,19 +1824,24 @@ pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) } -/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvq))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { +pub unsafe fn _mm512_mask_sllv_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m512i, +) -> __m512i { let shf = _mm512_sllv_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350) #[inline] @@ -1800,19 +1865,24 @@ pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) } -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvq))] #[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { +pub unsafe fn _mm512_mask_srlv_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m512i, +) -> __m512i { let shf = _mm512_srlv_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562) #[inline] @@ -1825,7 +1895,7 @@ pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) - transmute(simd_select_bitmask(k, shf, zero)) } -/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272) #[inline] @@ -1836,7 +1906,7 @@ pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } -/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273) #[inline] @@ -1861,7 +1931,7 @@ pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ transmute(simd_select_bitmask(k, and, zero)) } -/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. +/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279) #[inline] @@ -1897,7 +1967,7 @@ pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m transmute(simd_select_bitmask(k, and, zero)) } -/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. +/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302) #[inline] @@ -1908,7 +1978,7 @@ pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } -/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042) #[inline] @@ -1991,7 +2061,7 @@ pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } -/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142) #[inline] @@ -4808,67 +4878,67 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_rol_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_rol_epi32(a, 1); - let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_rol_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_mask_rol_epi32(a, 0, a, 1); assert_eq_m512i(r, a); let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1); - let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_rol_epi32() { - let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); let r = _mm512_maskz_rol_epi32(0, a, 1); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1<<0); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_ror_epi32() { - let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); let r = _mm512_ror_epi32(a, 1); - let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_ror_epi32() { - let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); let r = _mm512_mask_ror_epi32(a, 0, a, 1); assert_eq_m512i(r, a); let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1); - let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_ror_epi32() { - let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1<<0); + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0); let r = _mm512_maskz_ror_epi32(0, a, 1); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_slli_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_slli_epi32(a, 1); let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m512i(r, e); @@ -4876,7 +4946,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_slli_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_mask_slli_epi32(a, 0, a, 1); assert_eq_m512i(r, a); @@ -4887,7 +4957,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_slli_epi32() { - let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); let r = _mm512_maskz_slli_epi32(0, a, 1); assert_eq_m512i(r, _mm512_setzero_si512()); @@ -4900,7 +4970,7 @@ mod tests { unsafe fn test_mm512_srli_epi32() { let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); let r = _mm512_srli_epi32(a, 1); - let e = _mm512_set_epi32(0<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } @@ -4911,7 +4981,7 @@ mod tests { assert_eq_m512i(r, a); let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1); - let e = _mm512_set_epi32(0<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } @@ -4922,38 +4992,38 @@ mod tests { assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0<<31); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_rolv_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_rolv_epi32(a, b); - let e = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_rolv_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1,1,1,1); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_mask_rolv_epi32(a, 0, a, b); assert_eq_m512i(r, a); let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b); - let e = _mm512_set_epi32(1<<0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_rolv_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_maskz_rolv_epi32(0, a, b); @@ -4961,24 +5031,24 @@ mod tests { let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1<<0); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_rorv_epi32() { - let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_rorv_epi32(a, b); - let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_rorv_epi32() { - let a = _mm512_set_epi32(1<<0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_mask_rorv_epi32(a, 0, a, b); @@ -4986,13 +5056,13 @@ mod tests { let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b); - let e = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_rorv_epi32() { - let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1<<0); + let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0); let b = _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_maskz_rorv_epi32(0, a, b); @@ -5000,13 +5070,13 @@ mod tests { let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_sllv_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_sllv_epi32(a, count); @@ -5017,7 +5087,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_sllv_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_mask_sllv_epi32(a, 0, a, count); @@ -5031,7 +5101,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_sllv_epi32() { - let a = _mm512_set_epi32(1<<31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<31); + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); let r = _mm512_maskz_sllv_epi32(0, a, count); @@ -5084,28 +5154,113 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_sll_epi32() { - let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); let count = _mm_set_epi32(0, 0, 0, 2); let r = _mm512_sll_epi32(a, count); - let e = _mm512_set_epi32(0, 1<<2, 1<<3, 1<<4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e = _mm512_set_epi32( + 0, + 1 << 2, + 1 << 3, + 1 << 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_sll_epi32() { - let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); let count = _mm_set_epi32(0, 0, 0, 2); let r = _mm512_mask_sll_epi32(a, 0, a, count); assert_eq_m512i(r, a); let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count); - let e = _mm512_set_epi32(0, 1<<2, 1<<3, 1<<4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e = _mm512_set_epi32( + 0, + 1 << 2, + 1 << 3, + 1 << 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_sll_epi32() { - let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<31); + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 31, + ); let count = _mm_set_epi32(2, 0, 0, 2); let r = _mm512_maskz_sll_epi32(0, a, count); assert_eq_m512i(r, _mm512_setzero_si512()); @@ -5117,34 +5272,85 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_srl_epi32() { - let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); let count = _mm_set_epi32(0, 0, 0, 2); let r = _mm512_srl_epi32(a, count); - let e = _mm512_set_epi32(1<<29, 0, 0, 1<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_srl_epi32() { - let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); let count = _mm_set_epi32(0, 0, 0, 2); let r = _mm512_mask_srl_epi32(a, 0, a, count); assert_eq_m512i(r, a); let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count); - let e = _mm512_set_epi32(1<<29, 0, 0, 1<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_srl_epi32() { - let a = _mm512_set_epi32(1<<31, 1<<0, 1<<1, 1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<31); + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 31, + ); let count = _mm_set_epi32(2, 0, 0, 2); let r = _mm512_maskz_srl_epi32(0, a, count); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<29); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29); assert_eq_m512i(r, e); } @@ -5177,7 +5383,7 @@ mod tests { assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4 ); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4); assert_eq_m512i(r, e); } @@ -5210,10 +5416,10 @@ mod tests { assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4 ); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4); assert_eq_m512i(r, e); } - + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_srai_epi32() { let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15); @@ -5243,130 +5449,674 @@ mod tests { let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); assert_eq_m512i(r, e); } - + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_and_epi32() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_and_epi32(a, b); - let e = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3); + let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_and_epi32() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_mask_and_epi32(a, 0, a, b); assert_eq_m512i(r, a); let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b); - let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_and_epi32() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_maskz_and_epi32(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3); assert_eq_m512i(r, e); } - + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_and_si512() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_and_epi32(a, b); - let e = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3); + let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_or_epi32() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_or_epi32(a, b); - let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3 | 1 << 4, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_or_epi32() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_mask_or_epi32(a, 0, a, b); assert_eq_m512i(r, a); let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b); - let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3 | 1 << 4, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_or_epi32() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_maskz_or_epi32(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4); + let e = _mm512_set_epi32( + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3 | 1 << 4, + ); assert_eq_m512i(r, e); } - + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_or_si512() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_or_epi32(a, b); - let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3|1<<4); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3 | 1 << 4, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_xor_epi32() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_xor_epi32(a, b); - let e = _mm512_set_epi32(1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4); + let e = _mm512_set_epi32( + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 4, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_xor_epi32() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_mask_xor_epi32(a, 0, a, b); assert_eq_m512i(r, a); let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b); - let e = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 4, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_xor_epi32() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_maskz_xor_epi32(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b); - let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4); assert_eq_m512i(r, e); } - + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_xor_si512() { - let a = _mm512_set_epi32(1<<1|1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<3); - let b = _mm512_set_epi32(1<<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<3|1<<4); + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); let r = _mm512_xor_epi32(a, b); - let e = _mm512_set_epi32(1<<2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1<<1|1<<4); + let e = _mm512_set_epi32( + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 4, + ); assert_eq_m512i(r, e); } @@ -5423,5 +6173,4 @@ mod tests { let e: u16 = 0b11100010_00111000; assert_eq!(r, e); } - } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 74077c8023..aa6857ecce 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -41,8 +41,6 @@ pub unsafe fn _mm512_setr_epi64( transmute(r) } - - #[cfg(test)] mod tests { use std; @@ -53,23 +51,11 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_abs_epi64() { - let a = _mm512_setr_epi64( - 0, 1, -1, i64::MAX, - i64::MIN, 100, -100, -32 - ); + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); let r = _mm512_abs_epi64(a); - let e = _mm512_setr_epi64( - 0, - 1, - 1, - i64::MAX, - i64::MAX.wrapping_add(1), - 100, - 100, - 32 - ); + let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32); assert_eq_m512i(r, e); - } + } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_abs_epi64() { @@ -81,20 +67,11 @@ mod tests { let r = _mm512_mask_abs_epi64(a, 0, a); assert_eq_m512i(r, a); let r = _mm512_mask_abs_epi64(a, 0b11111111, a); - let e = _mm512_setr_epi64( - 0, - 1, - 1, - i64::MAX, - i64::MAX.wrapping_add(1), - 100, - 100, - 32 - ); + let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32); assert_eq_m512i(r, e); } - #[simd_test(enable = "avx512f")] + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_abs_epi64() { #[rustfmt::skip] let a = _mm512_setr_epi64( @@ -104,19 +81,10 @@ mod tests { let r = _mm512_maskz_abs_epi64(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_abs_epi64(0b01111111, a); - let e = _mm512_setr_epi64( - 0, - 1, - 1, - i64::MAX, - i64::MAX.wrapping_add(1), - 100, - 100, - 0 - ); + let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 0); assert_eq_m512i(r, e); } - + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_setzero_pd() { assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.)); @@ -925,262 +893,640 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_rol_epi64() { - let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let r = _mm512_rol_epi64(a, 1); - let e = _mm512_set_epi64( 1<<0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + let e = _mm512_set_epi64( + 1 << 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_rol_epi64() { - let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let r = _mm512_mask_rol_epi64(a, 0, a, 1); assert_eq_m512i(r, a); let r = _mm512_mask_rol_epi64(a, 0b11111111, a, 1); - let e = _mm512_set_epi64( 1<<0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + let e = _mm512_set_epi64( + 1 << 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_rol_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 63, + ); let r = _mm512_maskz_rol_epi64(0, a, 1); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_rol_epi64(0b00001111, a, 1); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 1<<0 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 1 << 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_ror_epi64() { - let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let r = _mm512_ror_epi64(a, 1); - let e = _mm512_set_epi64( 1<<63, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + let e = _mm512_set_epi64( + 1 << 63, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_ror_epi64() { - let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let r = _mm512_mask_ror_epi64(a, 0, a, 1); assert_eq_m512i(r, a); let r = _mm512_mask_ror_epi64(a, 0b11111111, a, 1); - let e = _mm512_set_epi64( 1<<63, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + let e = _mm512_set_epi64( + 1 << 63, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_ror_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); let r = _mm512_maskz_ror_epi64(0, a, 1); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_ror_epi64(0b00001111, a, 1); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 1<<63 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 1 << 63); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_slli_epi64() { - let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let r = _mm512_slli_epi64(a, 1); - let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + let e = _mm512_set_epi64( + 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_slli_epi64() { - let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let r = _mm512_mask_slli_epi64(a, 0, a, 1); assert_eq_m512i(r, a); let r = _mm512_mask_slli_epi64(a, 0b11111111, a, 1); - let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + let e = _mm512_set_epi64( + 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_slli_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 63, + ); let r = _mm512_maskz_slli_epi64(0, a, 1); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_slli_epi64(0b00001111, a, 1); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 0 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_srli_epi64() { - let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let r = _mm512_srli_epi64(a, 1); - let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + let e = _mm512_set_epi64( + 0, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_srli_epi64() { - let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let r = _mm512_mask_srli_epi64(a, 0, a, 1); assert_eq_m512i(r, a); let r = _mm512_mask_srli_epi64(a, 0b11111111, a, 1); - let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + let e = _mm512_set_epi64( + 0, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_srli_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); let r = _mm512_maskz_srli_epi64(0, a, 1); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_srli_epi64(0b00001111, a, 1); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 0 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_rolv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); - let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm512_rolv_epi64(a, b); - let e = _mm512_set_epi64( 1<<32, 1<<0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 ); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 34, + 1 << 35, + 1 << 36, + 1 << 37, + 1 << 38, + 1 << 39, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_rolv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); - let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm512_mask_rolv_epi64(a, 0, a, b); assert_eq_m512i(r, a); let r = _mm512_mask_rolv_epi64(a, 0b11111111, a, b); - let e = _mm512_set_epi64( 1<<32, 1<<0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 ); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 34, + 1 << 35, + 1 << 36, + 1 << 37, + 1 << 38, + 1 << 39, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_rolv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<62 ); - let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 2 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 62, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2); let r = _mm512_maskz_rolv_epi64(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_rolv_epi64(0b00001111, a, b); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<36, 1<<37, 1<<38, 1<<0 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 1 << 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_rorv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); - let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm512_rorv_epi64(a, b); - let e = _mm512_set_epi64( 1<<32, 1<<63, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 ); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 30, + 1 << 29, + 1 << 28, + 1 << 27, + 1 << 26, + 1 << 25, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_rorv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); - let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm512_mask_rorv_epi64(a, 0, a, b); assert_eq_m512i(r, a); let r = _mm512_mask_rorv_epi64(a, 0b11111111, a, b); - let e = _mm512_set_epi64( 1<<32, 1<<63, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 ); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 30, + 1 << 29, + 1 << 28, + 1 << 27, + 1 << 26, + 1 << 25, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_rorv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); - let b = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 2 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2); let r = _mm512_maskz_rorv_epi64(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_rorv_epi64(0b00001111, a, b); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<28, 1<<27, 1<<26, 1<<62 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 1 << 62); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_sllv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); - let count = _mm512_set_epi64( 0, 2, 2, 3, 4, 5, 6, 7 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm512_set_epi64(0, 2, 2, 3, 4, 5, 6, 7); let r = _mm512_sllv_epi64(a, count); - let e = _mm512_set_epi64( 1<<32, 0, 1<<34, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 ); + let e = _mm512_set_epi64( + 1 << 32, + 0, + 1 << 34, + 1 << 35, + 1 << 36, + 1 << 37, + 1 << 38, + 1 << 39, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_sllv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); - let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm512_mask_sllv_epi64(a, 0, a, count); assert_eq_m512i(r, a); let r = _mm512_mask_sllv_epi64(a, 0b11111111, a, count); - let e = _mm512_set_epi64( 1<<32, 1<<33, 0, 1<<35, 1<<36, 1<<37, 1<<38, 1<<39 ); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 33, + 0, + 1 << 35, + 1 << 36, + 1 << 37, + 1 << 38, + 1 << 39, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_sllv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 ); - let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 1 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 63, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 1); let r = _mm512_maskz_sllv_epi64(0, a, count); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_sllv_epi64(0b00001111, a, count); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<36, 1<<37, 1<<38, 0 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_srlv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); - let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm512_srlv_epi64(a, count); - let e = _mm512_set_epi64( 1<<32, 0, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 ); + let e = _mm512_set_epi64( + 1 << 32, + 0, + 1 << 30, + 1 << 29, + 1 << 28, + 1 << 27, + 1 << 26, + 1 << 25, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_srlv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); - let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm512_mask_srlv_epi64(a, 0, a, count); assert_eq_m512i(r, a); let r = _mm512_mask_srlv_epi64(a, 0b11111111, a, count); - let e = _mm512_set_epi64( 1<<32, 0, 1<<30, 1<<29, 1<<28, 1<<27, 1<<26, 1<<25 ); + let e = _mm512_set_epi64( + 1 << 32, + 0, + 1 << 30, + 1 << 29, + 1 << 28, + 1 << 27, + 1 << 26, + 1 << 25, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_srlv_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); - let count = _mm512_set_epi64( 0, 1, 2, 3, 4, 5, 6, 7 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm512_maskz_srlv_epi64(0, a, count); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_srlv_epi64(0b00001111, a, count); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<28, 1<<27, 1<<26, 0 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_sll_epi64() { - let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let count = _mm_set_epi64x(0, 1); let r = _mm512_sll_epi64(a, count); - let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + let e = _mm512_set_epi64( + 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); assert_eq_m512i(r, e); let count = _mm_set_epi64x(1, 0); @@ -1190,58 +1536,130 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_sll_epi64() { - let a = _mm512_set_epi64( 1<<63, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let count = _mm_set_epi64x(0, 1); let r = _mm512_mask_sll_epi64(a, 0, a, count); assert_eq_m512i(r, a); let r = _mm512_mask_sll_epi64(a, 0b11111111, a, count); - let e = _mm512_set_epi64( 0, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33, 1<<33 ); + let e = _mm512_set_epi64( + 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_sll_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<63 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 63, + ); let count = _mm_set_epi64x(0, 1); let r = _mm512_maskz_sll_epi64(0, a, count); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_sll_epi64(0b00001111, a, count); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<33, 1<<33, 1<<33, 0 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_srl_epi64() { - let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let count = _mm_set_epi64x(0, 1); let r = _mm512_srl_epi64(a, count); - let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + let e = _mm512_set_epi64( + 0, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_srl_epi64() { - let a = _mm512_set_epi64( 1<<0, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32 ); + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); let count = _mm_set_epi64x(0, 1); let r = _mm512_mask_srl_epi64(a, 0, a, count); assert_eq_m512i(r, a); let r = _mm512_mask_srl_epi64(a, 0b11111111, a, count); - let e = _mm512_set_epi64( 0, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31, 1<<31 ); + let e = _mm512_set_epi64( + 0, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_srl_epi64() { - let a = _mm512_set_epi64( 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<32, 1<<0 ); + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); let count = _mm_set_epi64x(0, 1); let r = _mm512_maskz_srl_epi64(0, a, count); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_srl_epi64(0b00001111, a, count); - let e = _mm512_set_epi64( 0, 0, 0, 0, 1<<31, 1<<31, 1<<31, 0 ); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0); assert_eq_m512i(r, e); } @@ -1253,7 +1671,7 @@ mod tests { let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4); assert_eq_m512i(r, e); } - + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_sra_epi64() { let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); @@ -1274,7 +1692,7 @@ mod tests { assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_sra_epi64(0b00001111, a, count); - let e = _mm512_set_epi64( 0, 0, 0, 0, 0, 0, 3, -4); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -4); assert_eq_m512i(r, e); } @@ -1286,7 +1704,7 @@ mod tests { let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8); assert_eq_m512i(r, e); } - + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_srav_epi64() { let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); @@ -1307,7 +1725,7 @@ mod tests { assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_srav_epi64(0b00001111, a, count); - let e = _mm512_set_epi64( 0, 0, 0, 0, 0, 0, 3, -8); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -8); assert_eq_m512i(r, e); } @@ -1326,7 +1744,7 @@ mod tests { assert_eq_m512i(r, a); let r = _mm512_mask_srai_epi64(a, 0b11111111, a, 2); - let e = _mm512_set_epi64( 0, -1, 3, 0, 0, 0, 0, -4); + let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4); assert_eq_m512i(r, e); } @@ -1343,118 +1761,145 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_and_epi64() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_and_epi64(a, b); - let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_and_epi64() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_mask_and_epi64(a, 0, a, b); - let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); assert_eq_m512i(r, e); let r = _mm512_mask_and_epi64(a, 0b01111111, a, b); - let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_and_epi64() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_maskz_and_epi64(0, a, b); let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); let r = _mm512_maskz_and_epi64(0b00001111, a, b); - let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_and_si512() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_and_epi64(a, b); - let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_or_epi64() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_or_epi64(a, b); - let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 2 | 1 << 3, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_or_epi64() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_mask_or_epi64(a, 0, a, b); - let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); assert_eq_m512i(r, e); let r = _mm512_mask_or_epi64(a, 0b11111111, a, b); - let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 2 | 1 << 3, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_or_epi64() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_maskz_or_epi64(0, a, b); let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); let r = _mm512_maskz_or_epi64(0b00001111, a, b); - let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_or_si512() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_or_epi64(a, b); - let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 2 | 1 << 3, + ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_xor_epi64() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_xor_epi64(a, b); - let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_xor_epi64() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_mask_xor_epi64(a, 0, a, b); - let e = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); assert_eq_m512i(r, e); let r = _mm512_mask_xor_epi64(a, 0b11111111, a, b); - let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_xor_epi64() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_maskz_xor_epi64(0, a, b); let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); @@ -1466,10 +1911,10 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_xor_si512() { - let a = _mm512_set_epi64(1<<0|1<<15, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); - let b = _mm512_set_epi64(1<<13, 0, 0, 0, 0, 0, 0, 1<<1|1<<2|1<<3); + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); let r = _mm512_xor_epi64(a, b); - let e = _mm512_set_epi64(1<<0|1<<13|1<<15, 0, 0, 0, 0, 0, 0, 0); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } } From 601b615319ac3d1daad281a929e1bd75ba289da4 Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 20 Aug 2020 16:57:49 +0000 Subject: [PATCH 4/9] fix rustc_args_reqired_const and imm8 --- crates/core_arch/src/x86/avx512f.rs | 260 ++++++++++++---------------- 1 file changed, 111 insertions(+), 149 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 3ec8ffffa0..19813ea0cb 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -898,8 +898,10 @@ pub unsafe fn _mm512_mask_i64scatter_epi32( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprold))] -pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: u8) -> __m512i { - transmute(vprold(a.as_i32x16(), imm8 as i8)) +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprold(a.as_i32x16(), imm8)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -908,8 +910,10 @@ pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: u8) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprold))] -pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i { - let rol = _mm512_rol_epi32(a, imm8).as_i32x16(); +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprold(a.as_i32x16(), imm8); transmute(simd_select_bitmask(k, rol, src.as_i32x16())) } @@ -919,8 +923,10 @@ pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprold))] -pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i { - let rol = _mm512_rol_epi32(a, imm8).as_i32x16(); +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprold(a.as_i32x16(), imm8); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, rol, zero)) } @@ -931,9 +937,10 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m5 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprord))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i { - transmute(vprord(a.as_i32x16(), imm8 as i8)) +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprord(a.as_i32x16(), imm8)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -942,9 +949,10 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: u8) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprord))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u8) -> __m512i { - let ror = _mm512_ror_epi32(a, imm8).as_i32x16(); +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprord(a.as_i32x16(), imm8); transmute(simd_select_bitmask(k, ror, src.as_i32x16())) } @@ -954,9 +962,10 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprord))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m512i { - let ror = _mm512_ror_epi32(a, imm8).as_i32x16(); +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprord(a.as_i32x16(), imm8); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, ror, zero)) } @@ -967,9 +976,10 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: u8) -> __m5 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolq))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i { - transmute(vprolq(a.as_i64x8(), imm8 as i8)) +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprolq(a.as_i64x8(), imm8)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -978,9 +988,10 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: u8) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolq))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i { - let rol = _mm512_rol_epi64(a, imm8).as_i64x8(); +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprolq(a.as_i64x8(), imm8); transmute(simd_select_bitmask(k, rol, src.as_i64x8())) } @@ -990,9 +1001,10 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolq))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i { - let rol = _mm512_rol_epi64(a, imm8).as_i64x8(); +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprolq(a.as_i64x8(), imm8); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, rol, zero)) } @@ -1003,9 +1015,10 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorq))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i { - transmute(vprorq(a.as_i64x8(), imm8 as i8)) +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprorq(a.as_i64x8(), imm8)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1014,9 +1027,10 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: u8) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorq))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u8) -> __m512i { - let ror = _mm512_ror_epi64(a, imm8).as_i64x8(); +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprorq(a.as_i64x8(), imm8); transmute(simd_select_bitmask(k, ror, src.as_i64x8())) } @@ -1026,9 +1040,10 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorq))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m512i { - let ror = _mm512_ror_epi64(a, imm8).as_i64x8(); +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprorq(a.as_i64x8(), imm8); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, ror, zero)) } @@ -1039,8 +1054,9 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: u8) -> __m51 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllid))] -#[rustc_args_required_const(2)] +#[rustc_args_required_const(1)] pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); transmute(vpsllid(a.as_i32x16(), imm8)) } @@ -1050,9 +1066,10 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllid))] -#[rustc_args_required_const(4)] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_slli_epi32(a, imm8).as_i32x16(); + assert!(imm8 <= 255); + let shf = vpsllid(a.as_i32x16(), imm8); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1062,9 +1079,10 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllid))] -#[rustc_args_required_const(3)] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_slli_epi32(a, imm8).as_i32x16(); + assert!(imm8 <= 255); + let shf = vpsllid(a.as_i32x16(), imm8); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1075,8 +1093,9 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlid))] -#[rustc_args_required_const(2)] +#[rustc_args_required_const(1)] pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); transmute(vpsrlid(a.as_i32x16(), imm8)) } @@ -1086,9 +1105,10 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlid))] -#[rustc_args_required_const(4)] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_srli_epi32(a, imm8).as_i32x16(); + assert!(imm8 <= 255); + let shf = vpsrlid(a.as_i32x16(), imm8); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1098,21 +1118,23 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlid))] -#[rustc_args_required_const(3)] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_srli_epi32(a, imm8).as_i32x16(); + assert!(imm8 <= 255); + let shf = vpsrlid(a.as_i32x16(), imm8); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) } -/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslliq))] -#[rustc_args_required_const(2)] +#[rustc_args_required_const(1)] pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); transmute(vpslliq(a.as_i64x8(), imm8)) } @@ -1122,9 +1144,10 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslliq))] -#[rustc_args_required_const(4)] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_slli_epi64(a, imm8).as_i64x8(); + assert!(imm8 <= 255); + let shf = vpslliq(a.as_i64x8(), imm8); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -1134,9 +1157,10 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslliq))] -#[rustc_args_required_const(3)] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_slli_epi64(a, imm8).as_i64x8(); + assert!(imm8 <= 255); + let shf = vpslliq(a.as_i64x8(), imm8); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1147,8 +1171,9 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrliq))] -#[rustc_args_required_const(2)] +#[rustc_args_required_const(1)] pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); transmute(vpsrliq(a.as_i64x8(), imm8)) } @@ -1158,21 +1183,23 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrliq))] -#[rustc_args_required_const(4)] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_srli_epi64(a, imm8).as_i64x8(); + assert!(imm8 <= 255); + let shf = vpsrliq(a.as_i64x8(), imm8); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrliq))] -#[rustc_args_required_const(3)] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_srli_epi64(a, imm8).as_i64x8(); + assert!(imm8 <= 255); + let shf = vpsrliq(a.as_i64x8(), imm8); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1183,7 +1210,6 @@ pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslld))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) } @@ -1194,7 +1220,6 @@ pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslld))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sll_epi32( src: __m512i, k: __mmask16, @@ -1205,13 +1230,12 @@ pub unsafe fn _mm512_mask_sll_epi32( transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpslld))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sll_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1224,7 +1248,6 @@ pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrld))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) } @@ -1235,7 +1258,6 @@ pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrld))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srl_epi32( src: __m512i, k: __mmask16, @@ -1246,13 +1268,12 @@ pub unsafe fn _mm512_mask_srl_epi32( transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrld))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_srl_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1265,7 +1286,6 @@ pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) } @@ -1276,7 +1296,6 @@ pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sll_epi64( src: __m512i, k: __mmask8, @@ -1293,7 +1312,6 @@ pub unsafe fn _mm512_mask_sll_epi64( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sll_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1306,7 +1324,6 @@ pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) } @@ -1317,7 +1334,6 @@ pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srl_epi64( src: __m512i, k: __mmask8, @@ -1328,13 +1344,12 @@ pub unsafe fn _mm512_mask_srl_epi64( transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_srl_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1347,7 +1362,6 @@ pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrad))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) } @@ -1358,7 +1372,6 @@ pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrad))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sra_epi32( src: __m512i, k: __mmask16, @@ -1369,13 +1382,12 @@ pub unsafe fn _mm512_mask_sra_epi32( transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrad))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sra_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1388,7 +1400,6 @@ pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) } @@ -1399,7 +1410,6 @@ pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sra_epi64( src: __m512i, k: __mmask8, @@ -1416,7 +1426,6 @@ pub unsafe fn _mm512_mask_sra_epi64( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sra_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1429,8 +1438,9 @@ pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraid))] -#[rustc_args_required_const(2)] +#[rustc_args_required_const(1)] pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); transmute(vpsraid(a.as_i32x16(), imm8)) } @@ -1440,9 +1450,11 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraid))] -#[rustc_args_required_const(4)] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); + assert!(imm8 <= 255); + let shf = vpsraid(a.as_i32x16(), imm8); + //let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1452,9 +1464,11 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraid))] -#[rustc_args_required_const(3)] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); + assert!(imm8 <= 255); + let shf = vpsraid(a.as_i32x16(), imm8); + //let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1465,8 +1479,9 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraiq))] -#[rustc_args_required_const(2)] +#[rustc_args_required_const(1)] pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); transmute(vpsraiq(a.as_i64x8(), imm8)) } @@ -1476,9 +1491,11 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraiq))] -#[rustc_args_required_const(4)] +#[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); + assert!(imm8 <= 255); + let shf = vpsraiq(a.as_i64x8(), imm8); + //let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -1488,9 +1505,11 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsraiq))] -#[rustc_args_required_const(3)] +#[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); + assert!(imm8 <= 255); + let shf = vpsraiq(a.as_i64x8(), imm8); + //let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1501,7 +1520,6 @@ pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravd))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) } @@ -1512,7 +1530,6 @@ pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravd))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srav_epi32( src: __m512i, k: __mmask16, @@ -1523,13 +1540,12 @@ pub unsafe fn _mm512_mask_srav_epi32( transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravd))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srav_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1542,7 +1558,6 @@ pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) } @@ -1553,7 +1568,6 @@ pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srav_epi64( src: __m512i, k: __mmask8, @@ -1570,7 +1584,6 @@ pub unsafe fn _mm512_mask_srav_epi64( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsravq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srav_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1583,7 +1596,6 @@ pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvd))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) } @@ -1594,7 +1606,6 @@ pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvd))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_rolv_epi32( src: __m512i, k: __mmask16, @@ -1611,7 +1622,6 @@ pub unsafe fn _mm512_mask_rolv_epi32( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvd))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let rol = _mm512_rolv_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1624,7 +1634,6 @@ pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvd))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) } @@ -1635,7 +1644,6 @@ pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvd))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_rorv_epi32( src: __m512i, k: __mmask16, @@ -1652,7 +1660,6 @@ pub unsafe fn _mm512_mask_rorv_epi32( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvd))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let ror = _mm512_rorv_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1665,7 +1672,6 @@ pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) } @@ -1676,7 +1682,6 @@ pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let rol = _mm512_rolv_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, rol, src.as_i64x8())) @@ -1688,7 +1693,6 @@ pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprolvq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let rol = _mm512_rolv_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1701,7 +1705,6 @@ pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) } @@ -1712,7 +1715,6 @@ pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let ror = _mm512_rorv_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, ror, src.as_i64x8())) @@ -1724,7 +1726,6 @@ pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vprorvq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let ror = _mm512_rorv_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1737,7 +1738,6 @@ pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvd))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) } @@ -1748,7 +1748,6 @@ pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvd))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sllv_epi32( src: __m512i, k: __mmask16, @@ -1765,7 +1764,6 @@ pub unsafe fn _mm512_mask_sllv_epi32( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvd))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_sllv_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1778,7 +1776,6 @@ pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvd))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) } @@ -1789,7 +1786,6 @@ pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvd))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srlv_epi32( src: __m512i, k: __mmask16, @@ -1806,7 +1802,6 @@ pub unsafe fn _mm512_mask_srlv_epi32( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvd))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srlv_epi32(a, count).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1819,7 +1814,6 @@ pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) } @@ -1830,7 +1824,6 @@ pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_sllv_epi64( src: __m512i, k: __mmask8, @@ -1847,7 +1840,6 @@ pub unsafe fn _mm512_mask_sllv_epi64( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsllvq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_sllv_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1860,7 +1852,6 @@ pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) } @@ -1871,7 +1862,6 @@ pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_srlv_epi64( src: __m512i, k: __mmask8, @@ -1888,7 +1878,6 @@ pub unsafe fn _mm512_mask_srlv_epi64( #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpsrlvq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srlv_epi64(a, count).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1901,7 +1890,6 @@ pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) - #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandd))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } @@ -1912,7 +1900,6 @@ pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandd))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, and, src.as_i32x16())) @@ -1924,7 +1911,6 @@ pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandd))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -1937,7 +1923,6 @@ pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) } @@ -1948,7 +1933,6 @@ pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, and, src.as_i64x8())) @@ -1960,7 +1944,6 @@ pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -1973,7 +1956,6 @@ pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpandd))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } @@ -1984,7 +1966,6 @@ pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpord))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } @@ -1995,7 +1976,6 @@ pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpord))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, or, src.as_i32x16())) @@ -2007,7 +1987,6 @@ pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpord))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -2020,7 +1999,6 @@ pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vporq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) } @@ -2031,7 +2009,6 @@ pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vporq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, or, src.as_i64x8())) @@ -2043,7 +2020,6 @@ pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vporq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -2056,7 +2032,6 @@ pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m5 #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpord))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } @@ -2067,7 +2042,6 @@ pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxord))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } @@ -2078,7 +2052,6 @@ pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxord))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi32(a, b).as_i32x16(); transmute(simd_select_bitmask(k, xor, src.as_i32x16())) @@ -2090,7 +2063,6 @@ pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxord))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); @@ -2103,7 +2075,6 @@ pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxorq))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) } @@ -2114,7 +2085,6 @@ pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxorq))] -#[rustc_args_required_const(4)] pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi64(a, b).as_i64x8(); transmute(simd_select_bitmask(k, xor, src.as_i64x8())) @@ -2126,7 +2096,6 @@ pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxorq))] -#[rustc_args_required_const(3)] pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); @@ -2139,7 +2108,6 @@ pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpxord))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } @@ -2150,7 +2118,6 @@ pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(kandw))] -#[rustc_args_required_const(2)] pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { kandw(a, b) } @@ -2161,7 +2128,6 @@ pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(kandw))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { kandw(a, b) } @@ -2172,7 +2138,6 @@ pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(korw))] -#[rustc_args_required_const(2)] pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { korw(a, b) } @@ -2183,7 +2148,6 @@ pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(korw))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { korw(a, b) } @@ -2194,7 +2158,6 @@ pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(kxorw))] -#[rustc_args_required_const(2)] pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { kxorw(a, b) } @@ -2205,7 +2168,6 @@ pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(kxorw))] -#[rustc_args_required_const(2)] pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { kxorw(a, b) } @@ -3832,13 +3794,13 @@ extern "C" { fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; #[link_name = "llvm.x86.avx512.mask.prol.d.512"] - fn vprold(a: i32x16, shift: i8) -> i32x16; + fn vprold(a: i32x16, i8: i32) -> i32x16; #[link_name = "llvm.x86.avx512.mask.pror.d.512"] - fn vprord(a: i32x16, shift: i8) -> i32x16; + fn vprord(a: i32x16, i8: i32) -> i32x16; #[link_name = "llvm.x86.avx512.mask.prol.q.512"] - fn vprolq(a: i64x8, shift: i8) -> i64x8; + fn vprolq(a: i64x8, i8: i32) -> i64x8; #[link_name = "llvm.x86.avx512.mask.pror.q.512"] - fn vprorq(a: i64x8, shift: i8) -> i64x8; + fn vprorq(a: i64x8, i8: i32) -> i64x8; #[link_name = "llvm.x86.avx512.mask.prolv.d.512"] fn vprolvd(a: i32x16, b: i32x16) -> i32x16; From 16b931ca86df019a87a706a779a06adb20cfda1a Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 20 Aug 2020 17:29:08 +0000 Subject: [PATCH 5/9] fix assert_instr --- crates/core_arch/src/x86/avx512f.rs | 58 ++++++++++++++--------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 19813ea0cb..fd973d8e0d 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -897,7 +897,7 @@ pub unsafe fn _mm512_mask_i64scatter_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold))] +#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -909,7 +909,7 @@ pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold))] +#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -922,7 +922,7 @@ pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold))] +#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -936,7 +936,7 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprord))] +#[cfg_attr(test, assert_instr(vprord, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -948,7 +948,7 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprord))] +#[cfg_attr(test, assert_instr(vprord, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -961,7 +961,7 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprord))] +#[cfg_attr(test, assert_instr(vprord, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -975,7 +975,7 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq))] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -987,7 +987,7 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq))] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -1000,7 +1000,7 @@ pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq))] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -1014,7 +1014,7 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorq))] +#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -1026,7 +1026,7 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorq))] +#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -1039,7 +1039,7 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorq))] +#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -1053,7 +1053,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllid))] +#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1065,7 +1065,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllid))] +#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1078,7 +1078,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllid))] +#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1092,7 +1092,7 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlid))] +#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1104,7 +1104,7 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlid))] +#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1117,7 +1117,7 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlid))] +#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1131,7 +1131,7 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslliq))] +#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1143,7 +1143,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslliq))] +#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1156,7 +1156,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslliq))] +#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1170,7 +1170,7 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrliq))] +#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1182,7 +1182,7 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrliq))] +#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1195,7 +1195,7 @@ pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrliq))] +#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1449,7 +1449,7 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraid))] +#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1463,7 +1463,7 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraid))] +#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1478,7 +1478,7 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraiq))] +#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1490,7 +1490,7 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraiq))] +#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1504,7 +1504,7 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraiq))] +#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); From ae9e8bf70bbd972442029e1cec5d4b03fee1b2bc Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 20 Aug 2020 17:37:01 +0000 Subject: [PATCH 6/9] fix assert_instr 2 --- crates/core_arch/src/x86/avx512f.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index fd973d8e0d..89f5db10a6 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -1437,7 +1437,7 @@ pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraid))] +#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1454,7 +1454,6 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); let shf = vpsraid(a.as_i32x16(), imm8); - //let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } @@ -1468,7 +1467,6 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); let shf = vpsraid(a.as_i32x16(), imm8); - //let shf = _mm512_srai_epi32(a, imm8).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, shf, zero)) } @@ -1495,7 +1493,6 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); let shf = vpsraiq(a.as_i64x8(), imm8); - //let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } @@ -1509,7 +1506,6 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); let shf = vpsraiq(a.as_i64x8(), imm8); - //let shf = _mm512_srai_epi64(a, imm8).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); transmute(simd_select_bitmask(k, shf, zero)) } From a9193d66ccae50fe6835ebe47062d892126a748e Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 20 Aug 2020 18:04:20 +0000 Subject: [PATCH 7/9] fix assert_instr 3 --- crates/core_arch/src/x86/avx512f.rs | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 89f5db10a6..0641dbf615 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -1053,7 +1053,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1065,7 +1065,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1078,7 +1078,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllid, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1092,7 +1092,7 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1104,7 +1104,7 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1117,7 +1117,7 @@ pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlid, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1131,7 +1131,7 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1143,7 +1143,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1156,7 +1156,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslliq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1170,7 +1170,7 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1182,7 +1182,7 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1195,7 +1195,7 @@ pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrliq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1437,7 +1437,7 @@ pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1449,7 +1449,7 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1462,7 +1462,7 @@ pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraid, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1476,7 +1476,7 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1488,7 +1488,7 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1501,7 +1501,7 @@ pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraiq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); From 0cc213995e111fb35f253b49e07298f0361502de Mon Sep 17 00:00:00 2001 From: jironglin Date: Thu, 20 Aug 2020 20:24:36 +0000 Subject: [PATCH 8/9] change llvm.x86.avx512.mask.pror.d to llvm.x86.avx512.pror.d --- crates/core_arch/src/x86/avx512f.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 0641dbf615..977a177595 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -3791,7 +3791,7 @@ extern "C" { #[link_name = "llvm.x86.avx512.mask.prol.d.512"] fn vprold(a: i32x16, i8: i32) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.pror.d.512"] + #[link_name = "llvm.x86.avx512.pror.d.512"] fn vprord(a: i32x16, i8: i32) -> i32x16; #[link_name = "llvm.x86.avx512.mask.prol.q.512"] fn vprolq(a: i64x8, i8: i32) -> i64x8; From e700382dcafc82d5b6488ae37de46f6a9a61479a Mon Sep 17 00:00:00 2001 From: jironglin Date: Mon, 24 Aug 2020 18:38:29 +0000 Subject: [PATCH 9/9] fix to pass CI --- crates/core_arch/src/x86/avx512f.rs | 62 ++++++++++++++--------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 977a177595..2160744a5c 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -936,7 +936,7 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprord, imm8 = 1))] +#[cfg_attr(test, assert_instr(vprold, imm8 = 233))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -948,7 +948,7 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprord, imm8 = 1))] +#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -961,7 +961,7 @@ pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprord, imm8 = 1))] +#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -1014,7 +1014,7 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -1026,7 +1026,7 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -1039,7 +1039,7 @@ pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { assert!(imm8 >= 0 && imm8 <= 255); @@ -1053,7 +1053,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1065,7 +1065,7 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1078,7 +1078,7 @@ pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslld, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1131,7 +1131,7 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] #[rustc_args_required_const(1)] pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1143,7 +1143,7 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] #[rustc_args_required_const(3)] pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1156,7 +1156,7 @@ pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllq, imm8 = 1))] +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { assert!(imm8 <= 255); @@ -1885,7 +1885,7 @@ pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) - /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpandd))] +#[cfg_attr(test, assert_instr(vpandq))] pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } @@ -1951,7 +1951,7 @@ pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpandd))] +#[cfg_attr(test, assert_instr(vpandq))] pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } @@ -1961,7 +1961,7 @@ pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpord))] +#[cfg_attr(test, assert_instr(vporq))] pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } @@ -2027,7 +2027,7 @@ pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpord))] +#[cfg_attr(test, assert_instr(vporq))] pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } @@ -2037,7 +2037,7 @@ pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpxord))] +#[cfg_attr(test, assert_instr(vpxorq))] pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } @@ -2103,7 +2103,7 @@ pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpxord))] +#[cfg_attr(test, assert_instr(vpxorq))] pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } @@ -2113,9 +2113,9 @@ pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(kandw))] +#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { - kandw(a, b) + transmute(kandw(a, b)) } /// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. @@ -2123,9 +2123,9 @@ pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(kandw))] +#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { - kandw(a, b) + transmute(kandw(a, b)) } /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. @@ -2133,9 +2133,9 @@ pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(korw))] +#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { - korw(a, b) + transmute(korw(a, b)) } /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. @@ -2143,9 +2143,9 @@ pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(korw))] +#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { - korw(a, b) + transmute(korw(a, b)) } /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. @@ -2153,9 +2153,9 @@ pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(kxorw))] +#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { - kxorw(a, b) + transmute(kxorw(a, b)) } /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. @@ -2163,9 +2163,9 @@ pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(kxorw))] +#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { - kxorw(a, b) + transmute(kxorw(a, b)) } /// Sets packed 32-bit integers in `dst` with the supplied values. @@ -3791,7 +3791,7 @@ extern "C" { #[link_name = "llvm.x86.avx512.mask.prol.d.512"] fn vprold(a: i32x16, i8: i32) -> i32x16; - #[link_name = "llvm.x86.avx512.pror.d.512"] + #[link_name = "llvm.x86.avx512.mask.pror.d.512"] fn vprord(a: i32x16, i8: i32) -> i32x16; #[link_name = "llvm.x86.avx512.mask.prol.q.512"] fn vprolq(a: i64x8, i8: i32) -> i64x8;